diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java index cf888702..2706d630 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.service; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,6 +21,7 @@ public class PersonNameParser { private static final Pattern GEB_PATTERN = Pattern.compile("\\s+geb\\.\\s+\\S+"); private static final Pattern PAREN_LAST_NAME = Pattern.compile("\\(([^)]+)\\)\\s*$"); private static final Pattern MULTI_SEPARATOR = Pattern.compile("\\s+(?:und|u)\\s+"); + private static final Pattern SLASH_SEPARATOR = Pattern.compile("//"); public record SplitName(String firstName, String lastName) {} @@ -38,6 +40,16 @@ public class PersonNameParser { public static List parseReceivers(String raw) { if (raw == null || raw.isBlank()) return List.of(); + // 0. Pre-split on "//" — each segment is an independent name entry + String[] slashParts = SLASH_SEPARATOR.split(raw, -1); + if (slashParts.length > 1) { + return Arrays.stream(slashParts) + .map(String::trim) + .filter(s -> !s.isBlank()) + .flatMap(segment -> parseReceivers(segment).stream()) + .toList(); + } + // 1. Strip "geb. Xxx" maiden-name annotations String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim(); @@ -111,6 +123,11 @@ public class PersonNameParser { String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim(); + // Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke" + if (!cleaned.contains(" ") && cleaned.contains(".")) { + cleaned = cleaned.replace(".", ". ").trim(); + } + String lastName = findKnownLastName(cleaned); if (lastName != null) { String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java index 01ab46d2..75eab4b2 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java @@ -133,6 +133,55 @@ class PersonNameParserTest { assertThat(result.lastName()).isEqualTo("de Gruyter"); } + // --- split — dot-compressed names --- + + @Test + void split_dotCompressed_initialAndLastName() { + PersonNameParser.SplitName result = PersonNameParser.split("E.Rockstroh"); + assertThat(result.firstName()).isEqualTo("E."); + assertThat(result.lastName()).isEqualTo("Rockstroh"); + } + + @Test + void split_dotCompressed_twoInitials() { + PersonNameParser.SplitName result = PersonNameParser.split("E.M."); + assertThat(result.firstName()).isEqualTo("E."); + assertThat(result.lastName()).isEqualTo("M."); + } + + @Test + void split_dotCompressed_titleFirstNameLastName() { + PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke"); + assertThat(result.firstName()).isEqualTo("Dr. Fr."); + assertThat(result.lastName()).isEqualTo("Zarncke"); + } + + @Test + void split_dotCompressed_titleAndLastName() { + PersonNameParser.SplitName result = PersonNameParser.split("Dr.Zarnke"); + assertThat(result.firstName()).isEqualTo("Dr."); + assertThat(result.lastName()).isEqualTo("Zarnke"); + } + + @Test + void parseReceivers_dotCompressedName_passthrough() { + assertThat(PersonNameParser.parseReceivers("Dr.Fr.Zarncke")) + .containsExactly("Dr.Fr.Zarncke"); + } + + @Test + void split_alreadySpacedDotName_noDoubleSpacing() { + PersonNameParser.SplitName result = PersonNameParser.split("Dr. Fr. Zarncke"); + assertThat(result.firstName()).isEqualTo("Dr. Fr."); + assertThat(result.lastName()).isEqualTo("Zarncke"); + } + + @Test + void slashSeparator_combinedWithDotCompressed() { + assertThat(PersonNameParser.parseReceivers("E.Rockstroh//Dr.Fr.Zarncke")) + .containsExactly("E.Rockstroh", "Dr.Fr.Zarncke"); + } + // --- parseReceivers — shared last name with full-name part ───────────────── @Test @@ -149,6 +198,38 @@ class PersonNameParserTest { assertThat(result).containsExactlyInAnyOrder("Clara Cram", "Eugenie de Gruyter"); } + // --- parseReceivers — // separator --- + + @Test + void slashSeparator_twoIndependentFullNames() { + assertThat(PersonNameParser.parseReceivers("Charl.Blomquist//Tante Lolly")) + .containsExactly("Charl.Blomquist", "Tante Lolly"); + } + + @Test + void slashSeparator_abbreviatedFirstName() { + assertThat(PersonNameParser.parseReceivers("Walter de Gruyter//Eugenie de Gruyter")) + .containsExactly("Walter de Gruyter", "Eugenie de Gruyter"); + } + + @Test + void slashSeparator_withSpacesAroundSlashes() { + assertThat(PersonNameParser.parseReceivers(" Herbert Cram // Eugenie de Gruyter ")) + .containsExactly("Herbert Cram", "Eugenie de Gruyter"); + } + + @Test + void slashSeparator_segmentContainsUnd() { + assertThat(PersonNameParser.parseReceivers("Herbert und Clara Cram//Eugenie de Gruyter")) + .containsExactly("Herbert Cram", "Clara Cram", "Eugenie de Gruyter"); + } + + @Test + void slashSeparator_trailingSlash() { + assertThat(PersonNameParser.parseReceivers("Herbert Cram//")) + .containsExactly("Herbert Cram"); + } + @Test void parseReceivers_returnsEmpty_whenAllPartsAreFamilie() { // All parts filtered out → nameParts.isEmpty() = true → return List.of()