feat(parser): implement stripAnnotation for parenthesized content
Extract trailing (...) content as annotation. Handles birth years (*1871), nicknames (Tuttu), uncertainty markers (?), and uncertain names (Quast ?) where the name part is extracted back into the cleaned result. Uses [^)]* regex to prevent ReDoS. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -281,12 +281,65 @@ class PersonNameParserTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_isPassthrough() {
|
||||
void stripAnnotation_noParens_returnsNull() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Walter de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(result.annotation()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_birthYear_noSpace() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Clara de Gruyter(*1871)");
|
||||
assertThat(result.cleaned()).isEqualTo("Clara de Gruyter");
|
||||
assertThat(result.annotation()).isEqualTo("*1871");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_uncertainty_withSpace() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Ernst Kurmany (?)");
|
||||
assertThat(result.cleaned()).isEqualTo("Ernst Kurmany");
|
||||
assertThat(result.annotation()).isEqualTo("?");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_nickname_noSpace() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Gertrud D.(Tuttu)");
|
||||
assertThat(result.cleaned()).isEqualTo("Gertrud D.");
|
||||
assertThat(result.annotation()).isEqualTo("Tuttu");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_uncertainName_extractsNameBack() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("Richard (Quast ? )");
|
||||
assertThat(result.cleaned()).isEqualTo("Richard Quast");
|
||||
assertThat(result.annotation()).isEqualTo("?");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripAnnotation_onlyParen_returnsPlaceholder() {
|
||||
PersonNameParser.AnnotationResult result = PersonNameParser.stripAnnotation("(OnlyParen)");
|
||||
assertThat(result.cleaned()).isEmpty();
|
||||
assertThat(result.annotation()).isEqualTo("OnlyParen");
|
||||
}
|
||||
|
||||
// --- split — annotation extraction end-to-end ---
|
||||
|
||||
@Test
|
||||
void split_birthYearAnnotation_extracted() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Clara de Gruyter(*1871)");
|
||||
assertThat(result.firstName()).isEqualTo("Clara");
|
||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||
assertThat(result.annotation()).isEqualTo("*1871");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_uncertainName_extractsLastName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Richard (Quast ? )");
|
||||
assertThat(result.firstName()).isEqualTo("Richard");
|
||||
assertThat(result.lastName()).isEqualTo("Quast");
|
||||
assertThat(result.annotation()).isEqualTo("?");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_isPassthrough() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter");
|
||||
|
||||
Reference in New Issue
Block a user