feat(parser): widen GEB_PATTERN and extract maiden name in stripMaidenName
Widen pattern from `\s+geb\.\s+\S+` to `,?\s*geb\.?\s+(.+)$` to handle: optional comma, optional dot, multi-word maiden names. stripMaidenName() now captures the maiden name instead of discarding it. Handles all 5 input variants from the ODS data. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -115,7 +115,7 @@ class PersonNameParserTest {
|
||||
assertThat(result.title()).isNull();
|
||||
assertThat(result.firstName()).isEqualTo("Eugenie");
|
||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||
assertThat(result.maidenName()).isNull();
|
||||
assertThat(result.maidenName()).isEqualTo("Müller");
|
||||
assertThat(result.annotation()).isNull();
|
||||
}
|
||||
|
||||
@@ -282,6 +282,68 @@ class PersonNameParserTest {
|
||||
assertThat(result.title()).isNull();
|
||||
}
|
||||
|
||||
// --- stripMaidenName — maiden name extraction ---
|
||||
|
||||
@Test
|
||||
void stripMaidenName_standardDot_singleWord() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Eugenie de Gruyter geb. Muller");
|
||||
assertThat(result.cleaned()).isEqualTo("Eugenie de Gruyter");
|
||||
assertThat(result.maidenName()).isEqualTo("Muller");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripMaidenName_dot_multiWordMaidenName() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Clara Cram geb. de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Clara Cram");
|
||||
assertThat(result.maidenName()).isEqualTo("de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripMaidenName_commaPrefix_noDot_multiWord() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Ella Dieckmann, geb de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Ella Dieckmann");
|
||||
assertThat(result.maidenName()).isEqualTo("de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripMaidenName_noDot_singleWord() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Elise Rockstroh geb Sintenis");
|
||||
assertThat(result.cleaned()).isEqualTo("Elise Rockstroh");
|
||||
assertThat(result.maidenName()).isEqualTo("Sintenis");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripMaidenName_noDot_noMarriedLastName() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Elisabeth geb Fernow");
|
||||
assertThat(result.cleaned()).isEqualTo("Elisabeth");
|
||||
assertThat(result.maidenName()).isEqualTo("Fernow");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripMaidenName_noGeb_returnsNullMaidenName() {
|
||||
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(result.maidenName()).isNull();
|
||||
}
|
||||
|
||||
// --- split — maiden name extraction end-to-end ---
|
||||
|
||||
@Test
|
||||
void split_gebDot_extractsMaidenName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Eugenie de Gruyter geb. Muller");
|
||||
assertThat(result.firstName()).isEqualTo("Eugenie");
|
||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||
assertThat(result.maidenName()).isEqualTo("Muller");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_gebNoDot_multiWordMaidenName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Clara Cram geb. de Gruyter");
|
||||
assertThat(result.firstName()).isEqualTo("Clara");
|
||||
assertThat(result.lastName()).isEqualTo("Cram");
|
||||
assertThat(result.maidenName()).isEqualTo("de Gruyter");
|
||||
}
|
||||
|
||||
// --- enum values ---
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user