fix(parser): preserve annotation parens for single-person inputs

Move paren extraction in parseReceivers() after the multi-separator
check so single-person entries like "Clara de Gruyter(*1871)" keep
their parens intact for split()'s annotation extraction. Multi-person
entries like "Hedi und Tutu (Gruber)" still use parens as shared
last-name override.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-08 13:00:34 +02:00
parent e696e5056d
commit e49ae5de29
2 changed files with 20 additions and 6 deletions

View File

@@ -59,7 +59,14 @@ public class PersonNameParser {
// 1. Strip "geb. Xxx" maiden-name annotations
String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim();
// 2. Extract parenthesised last name override, e.g. "(Gruber)"
// 2. If no multi-separator present, this is a single person — leave parens
// intact for split()'s annotation extraction
if (!MULTI_SEPARATOR.matcher(cleaned).find()) {
return List.of(cleaned);
}
// 3. Extract parenthesised last name override, e.g. "(Gruber)"
// Only applies to multi-person entries like "Hedi und Tutu (Gruber)"
String sharedLastName = null;
Matcher parenMatcher = PAREN_LAST_NAME.matcher(cleaned);
if (parenMatcher.find()) {
@@ -67,11 +74,6 @@ public class PersonNameParser {
cleaned = cleaned.substring(0, parenMatcher.start()).trim();
}
// 3. If no multi-separator present, this is a single person
if (!MULTI_SEPARATOR.matcher(cleaned).find()) {
return List.of(cleaned);
}
// 4. Split on " und " / " u "
String[] parts = MULTI_SEPARATOR.split(cleaned);

View File

@@ -24,6 +24,18 @@ class PersonNameParserTest {
.containsExactly("Eugenie de Gruyter");
}
@Test
void singlePerson_annotationParenPreserved() {
assertThat(PersonNameParser.parseReceivers("Clara de Gruyter(*1871)"))
.containsExactly("Clara de Gruyter(*1871)");
}
@Test
void singlePerson_nicknameParenPreserved() {
assertThat(PersonNameParser.parseReceivers("Gertrud D.(Tuttu)"))
.containsExactly("Gertrud D.(Tuttu)");
}
@Test
void gebAnnotation_noDot_multiWord_stripped() {
assertThat(PersonNameParser.parseReceivers("Ella Dieckmann, geb de Gruyter"))