feat(parser): implement stripTitle for known prefixes
Some checks failed
CI / Unit & Component Tests (push) Failing after 3s
CI / Backend Unit Tests (push) Failing after 1s
CI / Unit & Component Tests (pull_request) Failing after 2s
CI / Backend Unit Tests (pull_request) Failing after 1s

Two-pass title stripping with loop for stacked titles:
- Dot-prefixes (Dr., Prof.) matched without trailing space
- Word-prefixes (Tante, Frau, Schwester, etc.) matched at
  word boundary
- Stacked titles like "Prof. Dr. Muller" handled correctly
- Single token after title strip goes to lastName (not firstName)

Add 5 "von" last names to KNOWN_LAST_NAMES for correct splitting
of entries like "Freifrau von Massenbach".

15 new test cases + updated 3 existing tests for title behavior.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-08 13:15:18 +02:00
parent 6ee1ef73c3
commit 73640ef5b6
2 changed files with 166 additions and 7 deletions

View File

@@ -190,14 +190,16 @@ class PersonNameParserTest {
@Test
void split_dotCompressed_titleFirstNameLastName() {
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke");
assertThat(result.firstName()).isEqualTo("Dr. Fr.");
assertThat(result.title()).isEqualTo("Dr.");
assertThat(result.firstName()).isEqualTo("Fr.");
assertThat(result.lastName()).isEqualTo("Zarncke");
}
@Test
void split_dotCompressed_titleAndLastName() {
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Zarnke");
assertThat(result.firstName()).isEqualTo("Dr.");
assertThat(result.title()).isEqualTo("Dr.");
assertThat(result.firstName()).isNull();
assertThat(result.lastName()).isEqualTo("Zarnke");
}
@@ -210,7 +212,8 @@ class PersonNameParserTest {
@Test
void split_alreadySpacedDotName_noDoubleSpacing() {
PersonNameParser.SplitName result = PersonNameParser.split("Dr. Fr. Zarncke");
assertThat(result.firstName()).isEqualTo("Dr. Fr.");
assertThat(result.title()).isEqualTo("Dr.");
assertThat(result.firstName()).isEqualTo("Fr.");
assertThat(result.lastName()).isEqualTo("Zarncke");
}
@@ -353,12 +356,112 @@ class PersonNameParserTest {
}
@Test
void stripTitle_isPassthrough() {
void stripTitle_noPrefix_returnsNull() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.title()).isNull();
}
@Test
void stripTitle_tante() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Tante Molly");
assertThat(result.cleaned()).isEqualTo("Molly");
assertThat(result.title()).isEqualTo("Tante");
}
@Test
void stripTitle_schwester() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Schwester Hanni");
assertThat(result.cleaned()).isEqualTo("Hanni");
assertThat(result.title()).isEqualTo("Schwester");
}
@Test
void stripTitle_frau() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Frau Bakker");
assertThat(result.cleaned()).isEqualTo("Bakker");
assertThat(result.title()).isEqualTo("Frau");
}
@Test
void stripTitle_cousine_withFullName() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Cousine Emmy Haniel");
assertThat(result.cleaned()).isEqualTo("Emmy Haniel");
assertThat(result.title()).isEqualTo("Cousine");
}
@Test
void stripTitle_freifrau() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Freifrau von Massenbach");
assertThat(result.cleaned()).isEqualTo("von Massenbach");
assertThat(result.title()).isEqualTo("Freifrau");
}
@Test
void stripTitle_dotPrefix_withSpace() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Dr. Sattelmacher");
assertThat(result.cleaned()).isEqualTo("Sattelmacher");
assertThat(result.title()).isEqualTo("Dr.");
}
@Test
void stripTitle_dotPrefix_noSpace() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Dr.von Gelden");
assertThat(result.cleaned()).isEqualTo("von Gelden");
assertThat(result.title()).isEqualTo("Dr.");
}
@Test
void stripTitle_stacked_profDr() {
PersonNameParser.TitleResult result = PersonNameParser.stripTitle("Prof. Dr. Muller");
assertThat(result.cleaned()).isEqualTo("Muller");
assertThat(result.title()).isEqualTo("Prof. Dr.");
}
// --- split — title extraction end-to-end ---
@Test
void split_tante_setsTitle_firstNameNull() {
PersonNameParser.SplitName result = PersonNameParser.split("Tante Molly");
assertThat(result.title()).isEqualTo("Tante");
assertThat(result.firstName()).isNull();
assertThat(result.lastName()).isEqualTo("Molly");
}
@Test
void split_dotTitle_afterDotNorm() {
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke");
assertThat(result.title()).isEqualTo("Dr.");
assertThat(result.firstName()).isEqualTo("Fr.");
assertThat(result.lastName()).isEqualTo("Zarncke");
}
@Test
void split_dotTitle_noSpace_vonLastName() {
PersonNameParser.SplitName result = PersonNameParser.split("Dr.von Gelden");
assertThat(result.title()).isEqualTo("Dr.");
assertThat(result.firstName()).isNull();
assertThat(result.lastName()).isEqualTo("von Gelden");
}
// --- regression: non-prefixes not stripped ---
@Test
void split_walter_noTitleStrip() {
PersonNameParser.SplitName result = PersonNameParser.split("Walter de Gruyter");
assertThat(result.title()).isNull();
assertThat(result.firstName()).isEqualTo("Walter");
assertThat(result.lastName()).isEqualTo("de Gruyter");
}
@Test
void split_conrad_vonGeldern_noTitleStrip() {
PersonNameParser.SplitName result = PersonNameParser.split("Conrad von Geldern");
assertThat(result.title()).isNull();
assertThat(result.firstName()).isEqualTo("Conrad");
assertThat(result.lastName()).isEqualTo("von Geldern");
}
// --- stripMaidenName — maiden name extraction ---
@Test