feat(parser): support // as multi-person separator in parseReceivers

Pre-splits input on "//" before existing logic so each segment is
processed independently through the full pipeline (und/u splitting,
last-name distribution, etc.).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-07 17:33:55 +02:00
parent f435f2441c
commit 59475efbcb
2 changed files with 44 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
package org.raddatz.familienarchiv.service;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -20,6 +21,7 @@ public class PersonNameParser {
private static final Pattern GEB_PATTERN = Pattern.compile("\\s+geb\\.\\s+\\S+");
private static final Pattern PAREN_LAST_NAME = Pattern.compile("\\(([^)]+)\\)\\s*$");
private static final Pattern MULTI_SEPARATOR = Pattern.compile("\\s+(?:und|u)\\s+");
private static final Pattern SLASH_SEPARATOR = Pattern.compile("//");
public record SplitName(String firstName, String lastName) {}
@@ -38,6 +40,16 @@ public class PersonNameParser {
public static List<String> parseReceivers(String raw) {
if (raw == null || raw.isBlank()) return List.of();
// 0. Pre-split on "//" — each segment is an independent name entry
String[] slashParts = SLASH_SEPARATOR.split(raw, -1);
if (slashParts.length > 1) {
return Arrays.stream(slashParts)
.map(String::trim)
.filter(s -> !s.isBlank())
.flatMap(segment -> parseReceivers(segment).stream())
.toList();
}
// 1. Strip "geb. Xxx" maiden-name annotations
String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim();

View File

@@ -149,6 +149,38 @@ class PersonNameParserTest {
assertThat(result).containsExactlyInAnyOrder("Clara Cram", "Eugenie de Gruyter");
}
// --- parseReceivers — // separator ---
@Test
void slashSeparator_twoIndependentFullNames() {
assertThat(PersonNameParser.parseReceivers("Charl.Blomquist//Tante Lolly"))
.containsExactly("Charl.Blomquist", "Tante Lolly");
}
@Test
void slashSeparator_abbreviatedFirstName() {
assertThat(PersonNameParser.parseReceivers("Walter de Gruyter//Eugenie de Gruyter"))
.containsExactly("Walter de Gruyter", "Eugenie de Gruyter");
}
@Test
void slashSeparator_withSpacesAroundSlashes() {
assertThat(PersonNameParser.parseReceivers(" Herbert Cram // Eugenie de Gruyter "))
.containsExactly("Herbert Cram", "Eugenie de Gruyter");
}
@Test
void slashSeparator_segmentContainsUnd() {
assertThat(PersonNameParser.parseReceivers("Herbert und Clara Cram//Eugenie de Gruyter"))
.containsExactly("Herbert Cram", "Clara Cram", "Eugenie de Gruyter");
}
@Test
void slashSeparator_trailingSlash() {
assertThat(PersonNameParser.parseReceivers("Herbert Cram//"))
.containsExactly("Herbert Cram");
}
@Test
void parseReceivers_returnsEmpty_whenAllPartsAreFamilie() {
// All parts filtered out → nameParts.isEmpty() = true → return List.of()