refactor(parser): extract split() pipeline into named methods

Extract stripMaidenName, normalizeDotCompressed, stripAnnotation,
stripTitle, and splitByKnownLastNameOrFallback as individually
testable pipeline steps. Each extraction method is a pass-through
until its feature issue fills in the logic.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-08 11:48:08 +02:00
parent 1e1921e0fa
commit dea1635d75
2 changed files with 77 additions and 9 deletions

View File

@@ -118,35 +118,80 @@ public class PersonNameParser {
return nameParts;
}
// --- Pipeline result records (package-private for testing) ---
public record MaidenNameResult(String cleaned, String maidenName) {}
public record AnnotationResult(String cleaned, String annotation) {}
public record TitleResult(String cleaned, String title) {}
record NameParts(String firstName, String lastName) {}
/**
* Splits a single full name string into firstName and lastName.
* Uses known last names first; falls back to splitting on the last space.
* Splits a single full name string into a structured SplitName.
* Pipeline: stripMaidenName → normalizeDotCompressed → stripAnnotation → stripTitle → splitByKnownLastNameOrFallback
*/
public static SplitName split(String rawName) {
if (rawName == null || rawName.isBlank()) {
return new SplitName(null, "?", "?", null, null);
}
String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim();
MaidenNameResult maiden = stripMaidenName(rawName);
String cleaned = maiden.cleaned();
// Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke"
if (!cleaned.contains(" ") && cleaned.contains(".")) {
cleaned = cleaned.replace(".", ". ").trim();
cleaned = normalizeDotCompressed(cleaned);
AnnotationResult paren = stripAnnotation(cleaned);
cleaned = paren.cleaned();
TitleResult title = stripTitle(cleaned);
cleaned = title.cleaned();
NameParts parts = splitByKnownLastNameOrFallback(cleaned);
return new SplitName(
title.title(), parts.firstName(), parts.lastName(),
maiden.maidenName(), paren.annotation()
);
}
/** Strips "geb. Xxx" maiden-name annotations. Pass-through until #209. */
public static MaidenNameResult stripMaidenName(String input) {
String cleaned = GEB_PATTERN.matcher(input).replaceAll("").trim();
return new MaidenNameResult(cleaned, null);
}
/** Normalizes dot-compressed names: "Dr.Fr.Zarncke" → "Dr. Fr. Zarncke" */
static String normalizeDotCompressed(String input) {
if (!input.contains(" ") && input.contains(".")) {
return input.replace(".", ". ").trim();
}
return input;
}
/** Strips parenthesized annotations. Pass-through until #210. */
public static AnnotationResult stripAnnotation(String input) {
return new AnnotationResult(input, null);
}
/** Strips title prefixes. Pass-through until #212. */
public static TitleResult stripTitle(String input) {
return new TitleResult(input, null);
}
/** Splits a cleaned name into firstName/lastName using known last names or last-space fallback. */
static NameParts splitByKnownLastNameOrFallback(String cleaned) {
String lastName = findKnownLastName(cleaned);
if (lastName != null) {
String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim();
if (firstName.isBlank()) firstName = cleaned;
return new SplitName(null, firstName, lastName, null, null);
return new NameParts(firstName, lastName);
}
int lastSpace = cleaned.lastIndexOf(' ');
if (lastSpace > 0) {
return new SplitName(null, cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim(), null, null);
return new NameParts(cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim());
}
return new SplitName(null, cleaned, "?", null, null);
return new NameParts(cleaned, "?");
}
/** Returns the known last name that the given string ends with, or null. */

View File

@@ -256,4 +256,27 @@ class PersonNameParserTest {
List<String> result = PersonNameParser.parseReceivers("Müller und Herbert de Gruyter");
assertThat(result).containsExactlyInAnyOrder("Müller", "Herbert de Gruyter");
}
// --- pipeline pass-through methods ---
@Test
void stripMaidenName_isPassthrough() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.maidenName()).isNull();
}
@Test
void stripAnnotation_isPassthrough() {
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.annotation()).isNull();
}
@Test
void stripTitle_isPassthrough() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.title()).isNull();
}
}