feat(parser): normalize dot-compressed names in split()
Inserts spaces after dots when the cleaned name has no spaces but contains dots, so the existing last-space fallback handles names like "E.Rockstroh" and "Dr.Fr.Zarncke" correctly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -123,6 +123,11 @@ public class PersonNameParser {
|
|||||||
|
|
||||||
String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim();
|
String cleaned = GEB_PATTERN.matcher(rawName).replaceAll("").trim();
|
||||||
|
|
||||||
|
// Normalize dot-compressed names: "Dr.Fr.Zarncke" -> "Dr. Fr. Zarncke"
|
||||||
|
if (!cleaned.contains(" ") && cleaned.contains(".")) {
|
||||||
|
cleaned = cleaned.replace(".", ". ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
String lastName = findKnownLastName(cleaned);
|
String lastName = findKnownLastName(cleaned);
|
||||||
if (lastName != null) {
|
if (lastName != null) {
|
||||||
String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim();
|
String firstName = cleaned.substring(0, cleaned.length() - lastName.length()).trim();
|
||||||
|
|||||||
@@ -133,6 +133,42 @@ class PersonNameParserTest {
|
|||||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- split — dot-compressed names ---
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void split_dotCompressed_initialAndLastName() {
|
||||||
|
PersonNameParser.SplitName result = PersonNameParser.split("E.Rockstroh");
|
||||||
|
assertThat(result.firstName()).isEqualTo("E.");
|
||||||
|
assertThat(result.lastName()).isEqualTo("Rockstroh");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void split_dotCompressed_twoInitials() {
|
||||||
|
PersonNameParser.SplitName result = PersonNameParser.split("E.M.");
|
||||||
|
assertThat(result.firstName()).isEqualTo("E.");
|
||||||
|
assertThat(result.lastName()).isEqualTo("M.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void split_dotCompressed_titleFirstNameLastName() {
|
||||||
|
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Fr.Zarncke");
|
||||||
|
assertThat(result.firstName()).isEqualTo("Dr. Fr.");
|
||||||
|
assertThat(result.lastName()).isEqualTo("Zarncke");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void split_dotCompressed_titleAndLastName() {
|
||||||
|
PersonNameParser.SplitName result = PersonNameParser.split("Dr.Zarnke");
|
||||||
|
assertThat(result.firstName()).isEqualTo("Dr.");
|
||||||
|
assertThat(result.lastName()).isEqualTo("Zarnke");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void parseReceivers_dotCompressedName_passthrough() {
|
||||||
|
assertThat(PersonNameParser.parseReceivers("Dr.Fr.Zarncke"))
|
||||||
|
.containsExactly("Dr.Fr.Zarncke");
|
||||||
|
}
|
||||||
|
|
||||||
// --- parseReceivers — shared last name with full-name part ─────────────────
|
// --- parseReceivers — shared last name with full-name part ─────────────────
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|||||||
Reference in New Issue
Block a user