feat: add PersonNameParser utility for ODS name normalisation
Pure static utility that parses raw name strings from the ODS into structured Person data. Handles multi-receiver patterns like "Walter und Eugenie de Gruyter" → [Walter de Gruyter, Eugenie de Gruyter], parenthesised last names, "geb." maiden-name stripping, and "Familie" filtering. Includes unit tests for all patterns found in the data. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
package org.raddatz.familienarchiv.service;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class PersonNameParserTest {
|
||||
|
||||
// --- parseReceivers ---
|
||||
|
||||
@Test
|
||||
void singlePerson_noChange() {
|
||||
assertThat(PersonNameParser.parseReceivers("Walter de Gruyter"))
|
||||
.containsExactly("Walter de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void gebAnnotation_stripped() {
|
||||
assertThat(PersonNameParser.parseReceivers("Eugenie de Gruyter geb. Müller"))
|
||||
.containsExactly("Eugenie de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void twoFirstNames_sharedKnownLastName_und() {
|
||||
assertThat(PersonNameParser.parseReceivers("Walter und Eugenie de Gruyter"))
|
||||
.containsExactly("Walter de Gruyter", "Eugenie de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void twoFirstNames_sharedKnownLastName_u() {
|
||||
assertThat(PersonNameParser.parseReceivers("Herbert und Clara Cram"))
|
||||
.containsExactly("Herbert Cram", "Clara Cram");
|
||||
}
|
||||
|
||||
@Test
|
||||
void twoFirstNames_sharedKnownLastName_u_short() {
|
||||
assertThat(PersonNameParser.parseReceivers("Ella u Walter Dieckmann"))
|
||||
.containsExactly("Ella Dieckmann", "Walter Dieckmann");
|
||||
}
|
||||
|
||||
@Test
|
||||
void twoFirstNames_parenthesisedLastName() {
|
||||
assertThat(PersonNameParser.parseReceivers("Hedi und Tutu (Gruber)"))
|
||||
.containsExactly("Hedi Gruber", "Tutu Gruber");
|
||||
}
|
||||
|
||||
@Test
|
||||
void twoPersons_differentLastNames() {
|
||||
assertThat(PersonNameParser.parseReceivers("Clara Cram u Ellen B-M"))
|
||||
.containsExactly("Clara Cram", "Ellen B-M");
|
||||
}
|
||||
|
||||
@Test
|
||||
void familie_filtered_out() {
|
||||
assertThat(PersonNameParser.parseReceivers("Clara u Familie"))
|
||||
.containsExactly("Clara");
|
||||
}
|
||||
|
||||
@Test
|
||||
void twoFirstNames_noLastName() {
|
||||
assertThat(PersonNameParser.parseReceivers("Walter und Eugenie"))
|
||||
.containsExactly("Walter", "Eugenie");
|
||||
}
|
||||
|
||||
@Test
|
||||
void nullInput_returnsEmpty() {
|
||||
assertThat(PersonNameParser.parseReceivers(null)).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void blankInput_returnsEmpty() {
|
||||
assertThat(PersonNameParser.parseReceivers(" ")).isEmpty();
|
||||
}
|
||||
|
||||
// --- split ---
|
||||
|
||||
@Test
|
||||
void split_knownMultiWordLastName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Walter de Gruyter");
|
||||
assertThat(result.firstName()).isEqualTo("Walter");
|
||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_knownSingleWordLastName() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Clara Cram");
|
||||
assertThat(result.firstName()).isEqualTo("Clara");
|
||||
assertThat(result.lastName()).isEqualTo("Cram");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_unknownLastName_fallsBackToLastSpace() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Ellen Burkhard-Meier");
|
||||
assertThat(result.firstName()).isEqualTo("Ellen");
|
||||
assertThat(result.lastName()).isEqualTo("Burkhard-Meier");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_singleToken_lastNameIsPlaceholder() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Clara");
|
||||
assertThat(result.firstName()).isEqualTo("Clara");
|
||||
assertThat(result.lastName()).isEqualTo("?");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_gebAnnotation_stripped() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split("Eugenie de Gruyter geb. Müller");
|
||||
assertThat(result.firstName()).isEqualTo("Eugenie");
|
||||
assertThat(result.lastName()).isEqualTo("de Gruyter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void split_null_returnsPlaceholder() {
|
||||
PersonNameParser.SplitName result = PersonNameParser.split(null);
|
||||
assertThat(result.firstName()).isEqualTo("?");
|
||||
assertThat(result.lastName()).isEqualTo("?");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user