fix(parser): preserve annotation parens for single-person inputs
Move paren extraction in parseReceivers() after the multi-separator check so single-person entries like "Clara de Gruyter(*1871)" keep their parens intact for split()'s annotation extraction. Multi-person entries like "Hedi und Tutu (Gruber)" still use parens as shared last-name override. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -59,7 +59,14 @@ public class PersonNameParser {
|
||||
// 1. Strip "geb. Xxx" maiden-name annotations
|
||||
String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim();
|
||||
|
||||
// 2. Extract parenthesised last name override, e.g. "(Gruber)"
|
||||
// 2. If no multi-separator present, this is a single person — leave parens
|
||||
// intact for split()'s annotation extraction
|
||||
if (!MULTI_SEPARATOR.matcher(cleaned).find()) {
|
||||
return List.of(cleaned);
|
||||
}
|
||||
|
||||
// 3. Extract parenthesised last name override, e.g. "(Gruber)"
|
||||
// Only applies to multi-person entries like "Hedi und Tutu (Gruber)"
|
||||
String sharedLastName = null;
|
||||
Matcher parenMatcher = PAREN_LAST_NAME.matcher(cleaned);
|
||||
if (parenMatcher.find()) {
|
||||
@@ -67,11 +74,6 @@ public class PersonNameParser {
|
||||
cleaned = cleaned.substring(0, parenMatcher.start()).trim();
|
||||
}
|
||||
|
||||
// 3. If no multi-separator present, this is a single person
|
||||
if (!MULTI_SEPARATOR.matcher(cleaned).find()) {
|
||||
return List.of(cleaned);
|
||||
}
|
||||
|
||||
// 4. Split on " und " / " u "
|
||||
String[] parts = MULTI_SEPARATOR.split(cleaned);
|
||||
|
||||
|
||||
@@ -24,6 +24,18 @@ class PersonNameParserTest {
|
||||
.containsExactly("Eugenie de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void singlePerson_annotationParenPreserved() {
|
||||
assertThat(PersonNameParser.parseReceivers("Clara de Gruyter(*1871)"))
|
||||
.containsExactly("Clara de Gruyter(*1871)");
|
||||
}
|
||||
|
||||
@Test
|
||||
void singlePerson_nicknameParenPreserved() {
|
||||
assertThat(PersonNameParser.parseReceivers("Gertrud D.(Tuttu)"))
|
||||
.containsExactly("Gertrud D.(Tuttu)");
|
||||
}
|
||||
|
||||
@Test
|
||||
void gebAnnotation_noDot_multiWord_stripped() {
|
||||
assertThat(PersonNameParser.parseReceivers("Ella Dieckmann, geb de Gruyter"))
|
||||
|
||||
Reference in New Issue
Block a user