diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java index d66db4fd..d33cd75f 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java @@ -18,7 +18,7 @@ public class PersonNameParser { static final List KNOWN_LAST_NAMES = List.of( "de Gruyter", "Dieckmann", "Gruber", "Müller", "Wolff", "Cram"); - private static final Pattern GEB_PATTERN = Pattern.compile("\\s+geb\\.\\s+\\S+"); + private static final Pattern GEB_PATTERN = Pattern.compile(",?\\s*geb\\.?\\s+(.+)$"); private static final Pattern PAREN_LAST_NAME = Pattern.compile("\\(([^)]+)\\)\\s*$"); private static final Pattern MULTI_SEPARATOR = Pattern.compile("\\s+(?:und|u)\\s+"); private static final Pattern SLASH_SEPARATOR = Pattern.compile("//"); @@ -153,10 +153,15 @@ public class PersonNameParser { ); } - /** Strips "geb. Xxx" maiden-name annotations. Pass-through until #209. */ + /** Strips geb annotations and extracts the maiden name. */ public static MaidenNameResult stripMaidenName(String input) { - String cleaned = GEB_PATTERN.matcher(input).replaceAll("").trim(); - return new MaidenNameResult(cleaned, null); + Matcher m = GEB_PATTERN.matcher(input); + if (m.find()) { + String cleaned = input.substring(0, m.start()).trim(); + String maidenName = m.group(1).trim(); + return new MaidenNameResult(cleaned, maidenName); + } + return new MaidenNameResult(input, null); } /** Normalizes dot-compressed names: "Dr.Fr.Zarncke" → "Dr. Fr. Zarncke" */ diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java index a1148fa7..650e9dc9 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java @@ -115,7 +115,7 @@ class PersonNameParserTest { assertThat(result.title()).isNull(); assertThat(result.firstName()).isEqualTo("Eugenie"); assertThat(result.lastName()).isEqualTo("de Gruyter"); - assertThat(result.maidenName()).isNull(); + assertThat(result.maidenName()).isEqualTo("Müller"); assertThat(result.annotation()).isNull(); } @@ -282,6 +282,68 @@ class PersonNameParserTest { assertThat(result.title()).isNull(); } + // --- stripMaidenName — maiden name extraction --- + + @Test + void stripMaidenName_standardDot_singleWord() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Eugenie de Gruyter geb. Muller"); + assertThat(result.cleaned()).isEqualTo("Eugenie de Gruyter"); + assertThat(result.maidenName()).isEqualTo("Muller"); + } + + @Test + void stripMaidenName_dot_multiWordMaidenName() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Clara Cram geb. de Gruyter"); + assertThat(result.cleaned()).isEqualTo("Clara Cram"); + assertThat(result.maidenName()).isEqualTo("de Gruyter"); + } + + @Test + void stripMaidenName_commaPrefix_noDot_multiWord() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Ella Dieckmann, geb de Gruyter"); + assertThat(result.cleaned()).isEqualTo("Ella Dieckmann"); + assertThat(result.maidenName()).isEqualTo("de Gruyter"); + } + + @Test + void stripMaidenName_noDot_singleWord() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Elise Rockstroh geb Sintenis"); + assertThat(result.cleaned()).isEqualTo("Elise Rockstroh"); + assertThat(result.maidenName()).isEqualTo("Sintenis"); + } + + @Test + void stripMaidenName_noDot_noMarriedLastName() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Elisabeth geb Fernow"); + assertThat(result.cleaned()).isEqualTo("Elisabeth"); + assertThat(result.maidenName()).isEqualTo("Fernow"); + } + + @Test + void stripMaidenName_noGeb_returnsNullMaidenName() { + PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter"); + assertThat(result.cleaned()).isEqualTo("Walter de Gruyter"); + assertThat(result.maidenName()).isNull(); + } + + // --- split — maiden name extraction end-to-end --- + + @Test + void split_gebDot_extractsMaidenName() { + PersonNameParser.SplitName result = PersonNameParser.split("Eugenie de Gruyter geb. Muller"); + assertThat(result.firstName()).isEqualTo("Eugenie"); + assertThat(result.lastName()).isEqualTo("de Gruyter"); + assertThat(result.maidenName()).isEqualTo("Muller"); + } + + @Test + void split_gebNoDot_multiWordMaidenName() { + PersonNameParser.SplitName result = PersonNameParser.split("Clara Cram geb. de Gruyter"); + assertThat(result.firstName()).isEqualTo("Clara"); + assertThat(result.lastName()).isEqualTo("Cram"); + assertThat(result.maidenName()).isEqualTo("de Gruyter"); + } + // --- enum values --- @Test