feat(parser): implement stripTitle for known prefixes
Two-pass title stripping with loop for stacked titles: - Dot-prefixes (Dr., Prof.) matched without trailing space - Word-prefixes (Tante, Frau, Schwester, etc.) matched at word boundary - Stacked titles like "Prof. Dr. Muller" handled correctly - Single token after title strip goes to lastName (not firstName) Add 5 "von" last names to KNOWN_LAST_NAMES for correct splitting of entries like "Freifrau von Massenbach". 15 new test cases + updated 3 existing tests for title behavior. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -190,14 +190,16 @@ class PersonNameParserTest {
|
||||
@Test
|
||||
void split_dotCompressed_titleFirstNameLastName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke");
|
||||
assertThat(result.firstName()).isEqualTo("Dr. Fr.");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
assertThat(result.firstName()).isEqualTo("Fr.");
|
||||
assertThat(result.lastName()).isEqualTo("Zarncke");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_dotCompressed_titleAndLastName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Zarnke");
|
||||
assertThat(result.firstName()).isEqualTo("Dr.");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
assertThat(result.firstName()).isNull();
|
||||
assertThat(result.lastName()).isEqualTo("Zarnke");
|
||||
}
|
||||
|
||||
@@ -210,7 +212,8 @@ class PersonNameParserTest {
|
||||
@Test
|
||||
void split_alreadySpacedDotName_noDoubleSpacing() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Dr. Fr. Zarncke");
|
||||
assertThat(result.firstName()).isEqualTo("Dr. Fr.");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
assertThat(result.firstName()).isEqualTo("Fr.");
|
||||
assertThat(result.lastName()).isEqualTo("Zarncke");
|
||||
}
|
||||
|
||||
@@ -353,12 +356,112 @@ class PersonNameParserTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_isPassthrough() {
|
||||
void stripTitle_noPrefix_returnsNull() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter");
|
||||
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(result.title()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_tante() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Tante Molly");
|
||||
assertThat(result.cleaned()).isEqualTo("Molly");
|
||||
assertThat(result.title()).isEqualTo("Tante");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_schwester() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Schwester Hanni");
|
||||
assertThat(result.cleaned()).isEqualTo("Hanni");
|
||||
assertThat(result.title()).isEqualTo("Schwester");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_frau() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Frau Bakker");
|
||||
assertThat(result.cleaned()).isEqualTo("Bakker");
|
||||
assertThat(result.title()).isEqualTo("Frau");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_cousine_withFullName() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Cousine Emmy Haniel");
|
||||
assertThat(result.cleaned()).isEqualTo("Emmy Haniel");
|
||||
assertThat(result.title()).isEqualTo("Cousine");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_freifrau() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Freifrau von Massenbach");
|
||||
assertThat(result.cleaned()).isEqualTo("von Massenbach");
|
||||
assertThat(result.title()).isEqualTo("Freifrau");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_dotPrefix_withSpace() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Dr. Sattelmacher");
|
||||
assertThat(result.cleaned()).isEqualTo("Sattelmacher");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_dotPrefix_noSpace() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Dr.von Gelden");
|
||||
assertThat(result.cleaned()).isEqualTo("von Gelden");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
}
|
||||
|
||||
@Test
|
||||
void stripTitle_stacked_profDr() {
|
||||
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Prof. Dr. Muller");
|
||||
assertThat(result.cleaned()).isEqualTo("Muller");
|
||||
assertThat(result.title()).isEqualTo("Prof. Dr.");
|
||||
}
|
||||
|
||||
// --- split — title extraction end-to-end ---
|
||||
|
||||
@Test
|
||||
void split_tante_setsTitle_firstNameNull() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Tante Molly");
|
||||
assertThat(result.title()).isEqualTo("Tante");
|
||||
assertThat(result.firstName()).isNull();
|
||||
assertThat(result.lastName()).isEqualTo("Molly");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_dotTitle_afterDotNorm() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
assertThat(result.firstName()).isEqualTo("Fr.");
|
||||
assertThat(result.lastName()).isEqualTo("Zarncke");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_dotTitle_noSpace_vonLastName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Dr.von Gelden");
|
||||
assertThat(result.title()).isEqualTo("Dr.");
|
||||
assertThat(result.firstName()).isNull();
|
||||
assertThat(result.lastName()).isEqualTo("von Gelden");
|
||||
}
|
||||
|
||||
// --- regression: non-prefixes not stripped ---
|
||||
|
||||
@Test
|
||||
void split_walter_noTitleStrip() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Walter de Gruyter");
|
||||
assertThat(result.title()).isNull();
|
||||
assertThat(result.firstName()).isEqualTo("Walter");
|
||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_conrad_vonGeldern_noTitleStrip() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Conrad von Geldern");
|
||||
assertThat(result.title()).isNull();
|
||||
assertThat(result.firstName()).isEqualTo("Conrad");
|
||||
assertThat(result.lastName()).isEqualTo("von Geldern");
|
||||
}
|
||||
|
||||
// --- stripMaidenName — maiden name extraction ---
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user