From dea1635d759e855ae012ef33fbc032171d797801 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 8 Apr 2026 11:48:08 +0200 Subject: [PATCH] refactor(parser): extract split() pipeline into named methods Extract stripMaidenName, normalizeDotCompressed, stripAnnotation, stripTitle, and splitByKnownLastNameOrFallback as individually testable pipeline steps. Each extraction method is a pass-through until its feature issue fills in the logic. Co-Authored-By: Claude Sonnet 4.6 --- .../service/PersonNameParser.java | 63 ++++++++++++++++--- .../service/PersonNameParserTest.java | 23 +++++++ 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java index bb07e568..d66db4fd 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java @@ -118,35 +118,80 @@ public class PersonNameParser { return nameParts; } + // --- Pipeline result records (package-private for testing) --- + + public record MaidenNameResult(String cleaned, String maidenName) {} + public record AnnotationResult(String cleaned, String annotation) {} + public record TitleResult(String cleaned, String title) {} + record NameParts(String firstName, String lastName) {} + /** - * Splits a single full name string into firstName and lastName. - * Uses known last names first; falls back to splitting on the last space. + * Splits a single full name string into a structured SplitName. + * Pipeline: stripMaidenName → normalizeDotCompressed → stripAnnotation → stripTitle → splitByKnownLastNameOrFallback */ public static SplitName split(String rawName) { if (rawName == null || rawName.isBlank()) { return new SplitName(null, "?", "?", null, null); } - String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim(); + MaidenNameResult maiden = stripMaidenName(rawName); + String cleaned = maiden.cleaned(); - // Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke" - if (!cleaned.contains(" ") && cleaned.contains(".")) { - cleaned = cleaned.replace(".", ". ").trim(); + cleaned = normalizeDotCompressed(cleaned); + + AnnotationResult paren = stripAnnotation(cleaned); + cleaned = paren.cleaned(); + + TitleResult title = stripTitle(cleaned); + cleaned = title.cleaned(); + + NameParts parts = splitByKnownLastNameOrFallback(cleaned); + + return new SplitName( + title.title(), parts.firstName(), parts.lastName(), + maiden.maidenName(), paren.annotation() + ); + } + + /** Strips "geb. Xxx" maiden-name annotations. Pass-through until #209. */ + public static MaidenNameResult stripMaidenName(String input) { + String cleaned = GEB_PATTERN.matcher(input).replaceAll("").trim(); + return new MaidenNameResult(cleaned, null); + } + + /** Normalizes dot-compressed names: "Dr.Fr.Zarncke" → "Dr. Fr. Zarncke" */ + static String normalizeDotCompressed(String input) { + if (!input.contains(" ") && input.contains(".")) { + return input.replace(".", ". ").trim(); } + return input; + } + /** Strips parenthesized annotations. Pass-through until #210. */ + public static AnnotationResult stripAnnotation(String input) { + return new AnnotationResult(input, null); + } + + /** Strips title prefixes. Pass-through until #212. */ + public static TitleResult stripTitle(String input) { + return new TitleResult(input, null); + } + + /** Splits a cleaned name into firstName/lastName using known last names or last-space fallback. */ + static NameParts splitByKnownLastNameOrFallback(String cleaned) { String lastName = findKnownLastName(cleaned); if (lastName != null) { String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim(); if (firstName.isBlank()) firstName = cleaned; - return new SplitName(null, firstName, lastName, null, null); + return new NameParts(firstName, lastName); } int lastSpace = cleaned.lastIndexOf(' '); if (lastSpace > 0) { - return new SplitName(null, cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim(), null, null); + return new NameParts(cleaned.substring(0, lastSpace).trim(), cleaned.substring(lastSpace + 1).trim()); } - return new SplitName(null, cleaned, "?", null, null); + return new NameParts(cleaned, "?"); } /** Returns the known last name that the given string ends with, or null. */ diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java index 6249b8aa..deecacd3 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java @@ -256,4 +256,27 @@ class PersonNameParserTest { List result = PersonNameParser.parseReceivers("Müller und Herbert de Gruyter"); assertThat(result).containsExactlyInAnyOrder("Müller", "Herbert de Gruyter"); } + + // --- pipeline pass-through methods --- + + @Test + void stripMaidenName_isPassthrough() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter"); + assertThat(result.cleaned()).isEqualTo("Walter de Gruyter"); + assertThat(result.maidenName()).isNull(); + } + + @Test + void stripAnnotation_isPassthrough() { + PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Walter de Gruyter"); + assertThat(result.cleaned()).isEqualTo("Walter de Gruyter"); + assertThat(result.annotation()).isNull(); + } + + @Test + void stripTitle_isPassthrough() { + PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter"); + assertThat(result.cleaned()).isEqualTo("Walter de Gruyter"); + assertThat(result.title()).isNull(); + } }