feat: PersonNameParser enhancements and Person model refactor (#209-#213) #215

Merged
marcel merged 25 commits from feat/issues-209-213-person-parser-enhancements into main 2026-04-08 18:48:00 +02:00
2 changed files with 72 additions and 5 deletions
Showing only changes of commit c49cb345ca - Show all commits

View File

@@ -18,7 +18,7 @@ public class PersonNameParser {
static final List<String> KNOWN_LAST_NAMES = List.of(
"de Gruyter", "Dieckmann", "Gruber", "Müller", "Wolff", "Cram");
private static final Pattern GEB_PATTERN = Pattern.compile("\\s+geb\\.\\s+\\S+");
private static final Pattern GEB_PATTERN = Pattern.compile(",?\\s*geb\\.?\\s+(.+)$");
private static final Pattern PAREN_LAST_NAME = Pattern.compile("\\(([^)]+)\\)\\s*$");
private static final Pattern MULTI_SEPARATOR = Pattern.compile("\\s+(?:und|u)\\s+");
private static final Pattern SLASH_SEPARATOR = Pattern.compile("//");
@@ -153,10 +153,15 @@ public class PersonNameParser {
);
}
/** Strips "geb. Xxx" maiden-name annotations. Pass-through until #209. */
/** Strips geb annotations and extracts the maiden name. */
public static MaidenNameResult stripMaidenName(String input) {
String cleaned = GEB_PATTERN.matcher(input).replaceAll("").trim();
return new MaidenNameResult(cleaned, null);
Matcher m = GEB_PATTERN.matcher(input);
if (m.find()) {
String cleaned = input.substring(0, m.start()).trim();
String maidenName = m.group(1).trim();
return new MaidenNameResult(cleaned, maidenName);
}
return new MaidenNameResult(input, null);
}
/** Normalizes dot-compressed names: "Dr.Fr.Zarncke" → "Dr. Fr. Zarncke" */

View File

@@ -115,7 +115,7 @@ class PersonNameParserTest {
assertThat(result.title()).isNull();
assertThat(result.firstName()).isEqualTo("Eugenie");
assertThat(result.lastName()).isEqualTo("de Gruyter");
assertThat(result.maidenName()).isNull();
assertThat(result.maidenName()).isEqualTo("Müller");
assertThat(result.annotation()).isNull();
}
@@ -282,6 +282,68 @@ class PersonNameParserTest {
assertThat(result.title()).isNull();
}
// --- stripMaidenName — maiden name extraction ---
@Test
void stripMaidenName_standardDot_singleWord() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Eugenie de Gruyter geb. Muller");
assertThat(result.cleaned()).isEqualTo("Eugenie de Gruyter");
assertThat(result.maidenName()).isEqualTo("Muller");
}
@Test
void stripMaidenName_dot_multiWordMaidenName() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Clara Cram geb. de Gruyter");
assertThat(result.cleaned()).isEqualTo("Clara Cram");
assertThat(result.maidenName()).isEqualTo("de Gruyter");
}
@Test
void stripMaidenName_commaPrefix_noDot_multiWord() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Ella Dieckmann, geb de Gruyter");
assertThat(result.cleaned()).isEqualTo("Ella Dieckmann");
assertThat(result.maidenName()).isEqualTo("de Gruyter");
}
@Test
void stripMaidenName_noDot_singleWord() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Elise Rockstroh geb Sintenis");
assertThat(result.cleaned()).isEqualTo("Elise Rockstroh");
assertThat(result.maidenName()).isEqualTo("Sintenis");
}
@Test
void stripMaidenName_noDot_noMarriedLastName() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Elisabeth geb Fernow");
assertThat(result.cleaned()).isEqualTo("Elisabeth");
assertThat(result.maidenName()).isEqualTo("Fernow");
}
@Test
void stripMaidenName_noGeb_returnsNullMaidenName() {
PersonNameParser.MaidenNameResult result = PersonNameParser.stripMaidenName("Walter de Gruyter");
assertThat(result.cleaned()).isEqualTo("Walter de Gruyter");
assertThat(result.maidenName()).isNull();
}
// --- split — maiden name extraction end-to-end ---
@Test
void split_gebDot_extractsMaidenName() {
PersonNameParser.SplitName result = PersonNameParser.split("Eugenie de Gruyter geb. Muller");
assertThat(result.firstName()).isEqualTo("Eugenie");
assertThat(result.lastName()).isEqualTo("de Gruyter");
assertThat(result.maidenName()).isEqualTo("Muller");
}
@Test
void split_gebNoDot_multiWordMaidenName() {
PersonNameParser.SplitName result = PersonNameParser.split("Clara Cram geb. de Gruyter");
assertThat(result.firstName()).isEqualTo("Clara");
assertThat(result.lastName()).isEqualTo("Cram");
assertThat(result.maidenName()).isEqualTo("de Gruyter");
}
// --- enum values ---
@Test