From 59475efbcba19475340355fba11ca96ea2c66eea Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 7 Apr 2026 17:33:55 +0200 Subject: [PATCH 1/3] feat(parser): support // as multi-person separator in parseReceivers Pre-splits input on "//" before existing logic so each segment is processed independently through the full pipeline (und/u splitting, last-name distribution, etc.). Co-Authored-By: Claude Sonnet 4.6 --- .../service/PersonNameParser.java | 12 +++++++ .../service/PersonNameParserTest.java | 32 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java index cf888702..99585e9a 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.service; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,6 +21,7 @@ public class PersonNameParser { private static final Pattern GEB_PATTERN = Pattern.compile("\\s+geb\\.\\s+\\S+"); private static final Pattern PAREN_LAST_NAME = Pattern.compile("\\(([^)]+)\\)\\s*$"); private static final Pattern MULTI_SEPARATOR = Pattern.compile("\\s+(?:und|u)\\s+"); + private static final Pattern SLASH_SEPARATOR = Pattern.compile("//"); public record SplitName(String firstName, String lastName) {} @@ -38,6 +40,16 @@ public class PersonNameParser { public static List parseReceivers(String raw) { if (raw == null || raw.isBlank()) return List.of(); + // 0. Pre-split on "//" — each segment is an independent name entry + String[] slashParts = SLASH_SEPARATOR.split(raw, -1); + if (slashParts.length > 1) { + return Arrays.stream(slashParts) + .map(String::trim) + .filter(s -> !s.isBlank()) + .flatMap(segment -> parseReceivers(segment).stream()) + .toList(); + } + // 1. Strip "geb. Xxx" maiden-name annotations String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java index 01ab46d2..45911728 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java @@ -149,6 +149,38 @@ class PersonNameParserTest { assertThat(result).containsExactlyInAnyOrder("Clara Cram", "Eugenie de Gruyter"); } + // --- parseReceivers — // separator --- + + @Test + void slashSeparator_twoIndependentFullNames() { + assertThat(PersonNameParser.parseReceivers("Charl.Blomquist//Tante Lolly")) + .containsExactly("Charl.Blomquist", "Tante Lolly"); + } + + @Test + void slashSeparator_abbreviatedFirstName() { + assertThat(PersonNameParser.parseReceivers("Walter de Gruyter//Eugenie de Gruyter")) + .containsExactly("Walter de Gruyter", "Eugenie de Gruyter"); + } + + @Test + void slashSeparator_withSpacesAroundSlashes() { + assertThat(PersonNameParser.parseReceivers(" Herbert Cram // Eugenie de Gruyter ")) + .containsExactly("Herbert Cram", "Eugenie de Gruyter"); + } + + @Test + void slashSeparator_segmentContainsUnd() { + assertThat(PersonNameParser.parseReceivers("Herbert und Clara Cram//Eugenie de Gruyter")) + .containsExactly("Herbert Cram", "Clara Cram", "Eugenie de Gruyter"); + } + + @Test + void slashSeparator_trailingSlash() { + assertThat(PersonNameParser.parseReceivers("Herbert Cram//")) + .containsExactly("Herbert Cram"); + } + @Test void parseReceivers_returnsEmpty_whenAllPartsAreFamilie() { // All parts filtered out → nameParts.isEmpty() = true → return List.of() -- 2.49.1 From 0b577175867a607e046877ac164b283c982f1c9c Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 7 Apr 2026 17:34:56 +0200 Subject: [PATCH 2/3] feat(parser): normalize dot-compressed names in split() Inserts spaces after dots when the cleaned name has no spaces but contains dots, so the existing last-space fallback handles names like "E.Rockstroh" and "Dr.Fr.Zarncke" correctly. Co-Authored-By: Claude Sonnet 4.6 --- .../service/PersonNameParser.java | 5 +++ .../service/PersonNameParserTest.java | 36 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java index 99585e9a..2706d630 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonNameParser.java @@ -123,6 +123,11 @@ public class PersonNameParser { String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim(); + // Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke" + if (!cleaned.contains(" ") && cleaned.contains(".")) { + cleaned = cleaned.replace(".", ". ").trim(); + } + String lastName = findKnownLastName(cleaned); if (lastName != null) { String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java index 45911728..6139a60c 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java @@ -133,6 +133,42 @@ class PersonNameParserTest { assertThat(result.lastName()).isEqualTo("de Gruyter"); } + // --- split — dot-compressed names --- + + @Test + void split_dotCompressed_initialAndLastName() { + PersonNameParser.SplitName result = PersonNameParser.split("E.Rockstroh"); + assertThat(result.firstName()).isEqualTo("E."); + assertThat(result.lastName()).isEqualTo("Rockstroh"); + } + + @Test + void split_dotCompressed_twoInitials() { + PersonNameParser.SplitName result = PersonNameParser.split("E.M."); + assertThat(result.firstName()).isEqualTo("E."); + assertThat(result.lastName()).isEqualTo("M."); + } + + @Test + void split_dotCompressed_titleFirstNameLastName() { + PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke"); + assertThat(result.firstName()).isEqualTo("Dr. Fr."); + assertThat(result.lastName()).isEqualTo("Zarncke"); + } + + @Test + void split_dotCompressed_titleAndLastName() { + PersonNameParser.SplitName result = PersonNameParser.split("Dr.Zarnke"); + assertThat(result.firstName()).isEqualTo("Dr."); + assertThat(result.lastName()).isEqualTo("Zarnke"); + } + + @Test + void parseReceivers_dotCompressedName_passthrough() { + assertThat(PersonNameParser.parseReceivers("Dr.Fr.Zarncke")) + .containsExactly("Dr.Fr.Zarncke"); + } + // --- parseReceivers — shared last name with full-name part ───────────────── @Test -- 2.49.1 From d6e74972eb2abfc9299da1d2ea51b274748dbdf2 Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 7 Apr 2026 17:35:30 +0200 Subject: [PATCH 3/3] test(parser): add regression and cross-feature interaction tests Regression test confirms already-spaced dot names are not double-spaced. Interaction test confirms // separator works with dot-compressed names. Co-Authored-By: Claude Sonnet 4.6 --- .../service/PersonNameParserTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java index 6139a60c..75eab4b2 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonNameParserTest.java @@ -169,6 +169,19 @@ class PersonNameParserTest { .containsExactly("Dr.Fr.Zarncke"); } + @Test + void split_alreadySpacedDotName_noDoubleSpacing() { + PersonNameParser.SplitName result = PersonNameParser.split("Dr. Fr. Zarncke"); + assertThat(result.firstName()).isEqualTo("Dr. Fr."); + assertThat(result.lastName()).isEqualTo("Zarncke"); + } + + @Test + void slashSeparator_combinedWithDotCompressed() { + assertThat(PersonNameParser.parseReceivers("E.Rockstroh//Dr.Fr.Zarncke")) + .containsExactly("E.Rockstroh", "Dr.Fr.Zarncke"); + } + // --- parseReceivers — shared last name with full-name part ───────────────── @Test -- 2.49.1