feat(service): add PersonTypeClassifier with keyword heuristics

Static classify() method uses position-aware keyword matching:
- SKIP: Briefumschlag, Kondolenzbriefe, Hochzeitsgedicht (start)
- INSTITUTION: Firma, Architekt (start), GmbH, Co (end)
- GROUP: Familie, Comité, Comite, Geschwister, Gesellschafter,
  Garde, Mitarbeiter (start), Eltern, Kinder,
  Schwiegereltern (word boundary)
- PERSON: default for all other inputs

Case-insensitive. 25 parameterized test cases.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-08 13:03:53 +02:00
parent e49ae5de29
commit 68f0c4c4b9
2 changed files with 149 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
package org.raddatz.familienarchiv.service;
import java.util.List;
import org.raddatz.familienarchiv.model.PersonType;
public class PersonTypeClassifier {
private static final List<String> SKIP_KEYWORDS = List.of(
"Briefumschlag", "Kondolenzbriefe", "Hochzeitsgedicht");
private static final List<String> INSTITUTION_START = List.of(
"Firma", "Architekt");
private static final List<String> INSTITUTION_END = List.of(
"GmbH");
private static final List<String> GROUP_START = List.of(
"Familie", "Comité", "Comite", "Geschwister", "Gesellschafter",
"Garde", "Mitarbeiter");
private static final List<String> GROUP_CONTAINS = List.of(
"Eltern", "Kinder", "Schwiegereltern");
public static PersonType classify(String rawName) {
if (rawName == null || rawName.isBlank()) return PersonType.PERSON;
String trimmed = rawName.trim();
String lower = trimmed.toLowerCase();
for (String keyword : SKIP_KEYWORDS) {
if (lower.startsWith(keyword.toLowerCase())) return PersonType.SKIP;
}
for (String keyword : INSTITUTION_START) {
if (lower.startsWith(keyword.toLowerCase())) return PersonType.INSTITUTION;
}
for (String keyword : INSTITUTION_END) {
if (lower.endsWith(keyword.toLowerCase())) return PersonType.INSTITUTION;
}
if (lower.endsWith(" co") || lower.endsWith(" co.")) return PersonType.INSTITUTION;
for (String keyword : GROUP_START) {
if (lower.startsWith(keyword.toLowerCase())) return PersonType.GROUP;
}
for (String keyword : GROUP_CONTAINS) {
if (containsWord(lower, keyword.toLowerCase())) return PersonType.GROUP;
}
return PersonType.PERSON;
}
private static boolean containsWord(String text, String word) {
int idx = text.indexOf(word);
if (idx < 0) return false;
boolean startOk = idx == 0 || !Character.isLetter(text.charAt(idx - 1));
int end = idx + word.length();
boolean endOk = end >= text.length() || !Character.isLetter(text.charAt(end));
return startOk && endOk;
}
}