refactor(parser): extract split() pipeline into named methods

Extract stripMaidenName, normalizeDotCompressed, stripAnnotation,
stripTitle, and splitByKnownLastNameOrFallback as individually
testable pipeline steps. Each extraction method is a pass-through
until its feature issue fills in the logic.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-08 11:48:08 +02:00
parent 1e1921e0fa
commit dea1635d75
2 changed files with 77 additions and 9 deletions

View File

@@ -118,35 +118,80 @@ public class PersonNameParser {
return nameParts; return nameParts;
} }
// --- Pipeline result records (package-private for testing) ---
public record MaidenNameResult(String cleaned, String maidenName) {}
public record AnnotationResult(String cleaned, String annotation) {}
public record TitleResult(String cleaned, String title) {}
record NameParts(String firstName, String lastName) {}
/** /**
* Splits a single full name string into firstName and lastName. * Splits a single full name string into a structured SplitName.
* Uses known last names first; falls back to splitting on the last space. * Pipeline: stripMaidenName → normalizeDotCompressed → stripAnnotation → stripTitle → splitByKnownLastNameOrFallback
*/ */
public static SplitName split(String rawName) { public static SplitName split(String rawName) {
if (rawName == null || rawName.isBlank()) { if (rawName == null || rawName.isBlank()) {
return new SplitName(null, "?", "?", null, null); return new SplitName(null, "?", "?", null, null);
} }
String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim(); MaidenNameResult maiden = stripMaidenName(rawName);
String cleaned = maiden.cleaned();
// Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke" cleaned = normalizeDotCompressed(cleaned);
if (!cleaned.contains(" ") && cleaned.contains(".")) {
cleaned = cleaned.replace(".", ". ").trim(); AnnotationResult paren = stripAnnotation(cleaned);
cleaned = paren.cleaned();
TitleResult title = stripTitle(cleaned);
cleaned = title.cleaned();
NameParts parts = splitByKnownLastNameOrFallback(cleaned);
return new SplitName(
title.title(), parts.firstName(), parts.lastName(),
maiden.maidenName(), paren.annotation()
);
}
/** Strips "geb. Xxx" maiden-name annotations. Pass-through until #209. */
public static MaidenNameResult stripMaidenName(String input) {
String cleaned = GEB_PATTERN.matcher(input).replaceAll("").trim();
return new MaidenNameResult(cleaned, null);
}
/** Normalizes dot-compressed names: "Dr.Fr.Zarncke" → "Dr. Fr. Zarncke" */
static String normalizeDotCompressed(String input) {
if (!input.contains(" ") && input.contains(".")) {
return input.replace(".", ". ").trim();
} }
return input;
}
/** Strips parenthesized annotations. Pass-through until #210. */
public static AnnotationResult stripAnnotation(String input) {
return new AnnotationResult(input, null);
}
/** Strips title prefixes. Pass-through until #212. */
public static TitleResult stripTitle(String input) {
return new TitleResult(input, null);
}
/** Splits a cleaned name into firstName/lastName using known last names or last-space fallback. */
static NameParts splitByKnownLastNameOrFallback(String cleaned) {
String lastName = findKnownLastName(cleaned); String lastName = findKnownLastName(cleaned);
if (lastName != null) { if (lastName != null) {
String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim(); String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim();
if (firstName.isBlank()) firstName = cleaned; if (firstName.isBlank()) firstName = cleaned;
return new SplitName(null, firstName, lastName, null, null); return new NameParts(firstName, lastName);
} }
int lastSpace = cleaned.lastIndexOf(' '); int lastSpace = cleaned.lastIndexOf(' ');
if (lastSpace > 0) { if (lastSpace > 0) {
return new SplitName(null, cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim(), null, null); return new NameParts(cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim());
} }
return new SplitName(null, cleaned, "?", null, null); return new NameParts(cleaned, "?");
} }
/** Returns the known last name that the given string ends with, or null. */ /** Returns the known last name that the given string ends with, or null. */

View File

@@ -256,4 +256,27 @@ class PersonNameParserTest {
List<String> result = PersonNameParser.parseReceivers("Müller und Herbert de Gruyter"); List<String> result = PersonNameParser.parseReceivers("Müller und Herbert de Gruyter");
assertThat(result).containsExactlyInAnyOrder("Müller", "Herbert de Gruyter"); assertThat(result).containsExactlyInAnyOrder("Müller", "Herbert de Gruyter");
} }
// --- pipeline pass-through methods ---
@Test
void stripMaidenName_isPassthrough() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.maidenName()).isNull();
}
@Test
void stripAnnotation_isPassthrough() {
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.annotation()).isNull();
}
@Test
void stripTitle_isPassthrough() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.title()).isNull();
}
} }