fix(parser): preserve annotation parens for single-person inputs
Move paren extraction in parseReceivers() after the multi-separator check so single-person entries like "Clara de Gruyter(*1871)" keep their parens intact for split()'s annotation extraction. Multi-person entries like "Hedi und Tutu (Gruber)" still use parens as shared last-name override. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -59,7 +59,14 @@ public class PersonNameParser {
|
|||||||
// 1. Strip "geb. Xxx" maiden-name annotations
|
// 1. Strip "geb. Xxx" maiden-name annotations
|
||||||
String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim();
|
String cleaned = GEB_PATTERN.matcher(raw).replaceAll("").trim();
|
||||||
|
|
||||||
// 2. Extract parenthesised last name override, e.g. "(Gruber)"
|
// 2. If no multi-separator present, this is a single person — leave parens
|
||||||
|
// intact for split()'s annotation extraction
|
||||||
|
if (!MULTI_SEPARATOR.matcher(cleaned).find()) {
|
||||||
|
return List.of(cleaned);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Extract parenthesised last name override, e.g. "(Gruber)"
|
||||||
|
// Only applies to multi-person entries like "Hedi und Tutu (Gruber)"
|
||||||
String sharedLastName = null;
|
String sharedLastName = null;
|
||||||
Matcher parenMatcher = PAREN_LAST_NAME.matcher(cleaned);
|
Matcher parenMatcher = PAREN_LAST_NAME.matcher(cleaned);
|
||||||
if (parenMatcher.find()) {
|
if (parenMatcher.find()) {
|
||||||
@@ -67,11 +74,6 @@ public class PersonNameParser {
|
|||||||
cleaned = cleaned.substring(0, parenMatcher.start()).trim();
|
cleaned = cleaned.substring(0, parenMatcher.start()).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. If no multi-separator present, this is a single person
|
|
||||||
if (!MULTI_SEPARATOR.matcher(cleaned).find()) {
|
|
||||||
return List.of(cleaned);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Split on " und " / " u "
|
// 4. Split on " und " / " u "
|
||||||
String[] parts = MULTI_SEPARATOR.split(cleaned);
|
String[] parts = MULTI_SEPARATOR.split(cleaned);
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,18 @@ class PersonNameParserTest {
|
|||||||
.containsExactly("Eugenie de Gruyter");
|
.containsExactly("Eugenie de Gruyter");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void singlePerson_annotationParenPreserved() {
|
||||||
|
assertThat(PersonNameParser.parseReceivers("Clara de Gruyter(*1871)"))
|
||||||
|
.containsExactly("Clara de Gruyter(*1871)");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void singlePerson_nicknameParenPreserved() {
|
||||||
|
assertThat(PersonNameParser.parseReceivers("Gertrud D.(Tuttu)"))
|
||||||
|
.containsExactly("Gertrud D.(Tuttu)");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void gebAnnotation_noDot_multiWord_stripped() {
|
void gebAnnotation_noDot_multiWord_stripped() {
|
||||||
assertThat(PersonNameParser.parseReceivers("Ella Dieckmann, geb de Gruyter"))
|
assertThat(PersonNameParser.parseReceivers("Ella Dieckmann, geb de Gruyter"))
|
||||||
|
|||||||
Reference in New Issue
Block a user