refactor(parser): extract split() pipeline into named methods
Extract stripMaidenName, normalizeDotCompressed, stripAnnotation, stripTitle, and splitByKnownLastNameOrFallback as individually testable pipeline steps. Each extraction method is a pass-through until its feature issue fills in the logic. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,35 +118,80 @@ public class PersonNameParser {
|
||||
return nameParts;
|
||||
}
|
||||
|
||||
// --- Pipeline result records (package-private for testing) ---
|
||||
|
||||
public record MaidenNameResult(String cleaned, String maidenName) {}
|
||||
public record AnnotationResult(String cleaned, String annotation) {}
|
||||
public record TitleResult(String cleaned, String title) {}
|
||||
record NameParts(String firstName, String lastName) {}
|
||||
|
||||
/**
|
||||
* Splits a single full name string into firstName and lastName.
|
||||
* Uses known last names first; falls back to splitting on the last space.
|
||||
* Splits a single full name string into a structured SplitName.
|
||||
* Pipeline: stripMaidenName → normalizeDotCompressed → stripAnnotation → stripTitle → splitByKnownLastNameOrFallback
|
||||
*/
|
||||
public static SplitName split(String rawName) {
|
||||
if (rawName == null || rawName.isBlank()) {
|
||||
return new SplitName(null, "?", "?", null, null);
|
||||
}
|
||||
|
||||
String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim();
|
||||
MaidenNameResult maiden = stripMaidenName(rawName);
|
||||
String cleaned = maiden.cleaned();
|
||||
|
||||
// Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke"
|
||||
if (!cleaned.contains(" ") && cleaned.contains(".")) {
|
||||
cleaned = cleaned.replace(".", ". ").trim();
|
||||
cleaned = normalizeDotCompressed(cleaned);
|
||||
|
||||
AnnotationResult paren = stripAnnotation(cleaned);
|
||||
cleaned = paren.cleaned();
|
||||
|
||||
TitleResult title = stripTitle(cleaned);
|
||||
cleaned = title.cleaned();
|
||||
|
||||
NameParts parts = splitByKnownLastNameOrFallback(cleaned);
|
||||
|
||||
return new SplitName(
|
||||
title.title(), parts.firstName(), parts.lastName(),
|
||||
maiden.maidenName(), paren.annotation()
|
||||
);
|
||||
}
|
||||
|
||||
/** Strips "geb. Xxx" maiden-name annotations. Pass-through until #209. */
|
||||
public static MaidenNameResult stripMaidenName(String input) {
|
||||
String cleaned = GEB_PATTERN.matcher(input).replaceAll("").trim();
|
||||
return new MaidenNameResult(cleaned, null);
|
||||
}
|
||||
|
||||
/** Normalizes dot-compressed names: "Dr.Fr.Zarncke" → "Dr. Fr. Zarncke" */
|
||||
static String normalizeDotCompressed(String input) {
|
||||
if (!input.contains(" ") && input.contains(".")) {
|
||||
return input.replace(".", ". ").trim();
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
/** Strips parenthesized annotations. Pass-through until #210. */
|
||||
public static AnnotationResult stripAnnotation(String input) {
|
||||
return new AnnotationResult(input, null);
|
||||
}
|
||||
|
||||
/** Strips title prefixes. Pass-through until #212. */
|
||||
public static TitleResult stripTitle(String input) {
|
||||
return new TitleResult(input, null);
|
||||
}
|
||||
|
||||
/** Splits a cleaned name into firstName/lastName using known last names or last-space fallback. */
|
||||
static NameParts splitByKnownLastNameOrFallback(String cleaned) {
|
||||
String lastName = findKnownLastName(cleaned);
|
||||
if (lastName != null) {
|
||||
String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim();
|
||||
if (firstName.isBlank()) firstName = cleaned;
|
||||
return new SplitName(null, firstName, lastName, null, null);
|
||||
return new NameParts(firstName, lastName);
|
||||
}
|
||||
|
||||
int lastSpace = cleaned.lastIndexOf(' ');
|
||||
if (lastSpace > 0) {
|
||||
return new SplitName(null, cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim(), null, null);
|
||||
return new NameParts(cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim());
|
||||
}
|
||||
|
||||
return new SplitName(null, cleaned, "?", null, null);
|
||||
return new NameParts(cleaned, "?");
|
||||
}
|
||||
|
||||
/** Returns the known last name that the given string ends with, or null. */
|
||||
|
||||
@@ -256,4 +256,27 @@ class PersonNameParserTest {
|
||||
List<String> result = PersonNameParser.parseReceivers("Müller und Herbert de Gruyter");
|
||||
assertThat(result).containsExactlyInAnyOrder("Müller", "Herbert de Gruyter");
|
||||
}
|
||||
|
||||
// --- pipeline pass-through methods ---
|
||||
|
||||
@Test
|
||||
void stripMaidenName_isPassthrough() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(result.maidenName()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_isPassthrough() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Walter de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(result.annotation()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_isPassthrough() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(result.title()).isNull();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user