feat(service): add PersonTypeClassifier with keyword heuristics
Static classify() method uses position-aware keyword matching: - SKIP: Briefumschlag, Kondolenzbriefe, Hochzeitsgedicht (start) - INSTITUTION: Firma, Architekt (start), GmbH, Co (end) - GROUP: Familie, Comité, Comite, Geschwister, Gesellschafter, Garde, Mitarbeiter (start), Eltern, Kinder, Schwiegereltern (word boundary) - PERSON: default for all other inputs Case-insensitive. 25 parameterized test cases. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
package org.raddatz.familienarchiv.service;
|
||||
|
||||
import java.util.List;
|
||||
import org.raddatz.familienarchiv.model.PersonType;
|
||||
|
||||
public class PersonTypeClassifier {
|
||||
|
||||
private static final List<String> SKIP_KEYWORDS = List.of(
|
||||
"Briefumschlag", "Kondolenzbriefe", "Hochzeitsgedicht");
|
||||
|
||||
private static final List<String> INSTITUTION_START = List.of(
|
||||
"Firma", "Architekt");
|
||||
|
||||
private static final List<String> INSTITUTION_END = List.of(
|
||||
"GmbH");
|
||||
|
||||
private static final List<String> GROUP_START = List.of(
|
||||
"Familie", "Comité", "Comite", "Geschwister", "Gesellschafter",
|
||||
"Garde", "Mitarbeiter");
|
||||
|
||||
private static final List<String> GROUP_CONTAINS = List.of(
|
||||
"Eltern", "Kinder", "Schwiegereltern");
|
||||
|
||||
public static PersonType classify(String rawName) {
|
||||
if (rawName == null || rawName.isBlank()) return PersonType.PERSON;
|
||||
|
||||
String trimmed = rawName.trim();
|
||||
String lower = trimmed.toLowerCase();
|
||||
|
||||
for (String keyword : SKIP_KEYWORDS) {
|
||||
if (lower.startsWith(keyword.toLowerCase())) return PersonType.SKIP;
|
||||
}
|
||||
|
||||
for (String keyword : INSTITUTION_START) {
|
||||
if (lower.startsWith(keyword.toLowerCase())) return PersonType.INSTITUTION;
|
||||
}
|
||||
for (String keyword : INSTITUTION_END) {
|
||||
if (lower.endsWith(keyword.toLowerCase())) return PersonType.INSTITUTION;
|
||||
}
|
||||
if (lower.endsWith(" co") || lower.endsWith(" co.")) return PersonType.INSTITUTION;
|
||||
|
||||
for (String keyword : GROUP_START) {
|
||||
if (lower.startsWith(keyword.toLowerCase())) return PersonType.GROUP;
|
||||
}
|
||||
for (String keyword : GROUP_CONTAINS) {
|
||||
if (containsWord(lower, keyword.toLowerCase())) return PersonType.GROUP;
|
||||
}
|
||||
|
||||
return PersonType.PERSON;
|
||||
}
|
||||
|
||||
private static boolean containsWord(String text, String word) {
|
||||
int idx = text.indexOf(word);
|
||||
if (idx < 0) return false;
|
||||
boolean startOk = idx == 0 || !Character.isLetter(text.charAt(idx - 1));
|
||||
int end = idx + word.length();
|
||||
boolean endOk = end >= text.length() || !Character.isLetter(text.charAt(end));
|
||||
return startOk && endOk;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user