feat(service): add PersonTypeClassifier with keyword heuristics
Static classify() method uses position-aware keyword matching: - SKIP: Briefumschlag, Kondolenzbriefe, Hochzeitsgedicht (start) - INSTITUTION: Firma, Architekt (start), GmbH, Co (end) - GROUP: Familie, Comité, Comite, Geschwister, Gesellschafter, Garde, Mitarbeiter (start), Eltern, Kinder, Schwiegereltern (word boundary) - PERSON: default for all other inputs Case-insensitive. 25 parameterized test cases. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,89 @@
|
||||
package org.raddatz.familienarchiv.service;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.CsvSource;
|
||||
import org.raddatz.familienarchiv.model.PersonType;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class PersonTypeClassifierTest {
|
||||
|
||||
// --- SKIP ---
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource({
|
||||
"'Briefumschlag aus Java', SKIP",
|
||||
"'Kondolenzbriefe zum Tod von Walter de Gruyter', SKIP",
|
||||
"'Hochzeitsgedicht fur Paul u Luise de Gruyter', SKIP"
|
||||
})
|
||||
void classify_skipEntries(String input, PersonType expected) {
|
||||
assertThat(PersonTypeClassifier.classify(input)).isEqualTo(expected);
|
||||
}
|
||||
|
||||
// --- INSTITUTION ---
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource({
|
||||
"'Arthur Collignon GmbH', INSTITUTION",
|
||||
"'Firma Auschrath', INSTITUTION",
|
||||
"'Westermann u Co', INSTITUTION",
|
||||
"'Architekt Korschelt u Renker', INSTITUTION"
|
||||
})
|
||||
void classify_institutionEntries(String input, PersonType expected) {
|
||||
assertThat(PersonTypeClassifier.classify(input)).isEqualTo(expected);
|
||||
}
|
||||
|
||||
// --- GROUP ---
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource({
|
||||
"'Comite der Abschiedsfeier', GROUP",
|
||||
"'Comité zur Errichtung eines Heine-Denkmals', GROUP",
|
||||
"'Garde du Corps', GROUP",
|
||||
"'Geschwister de Gruyter', GROUP",
|
||||
"'Gesellschafter des Verlages', GROUP",
|
||||
"'Ella de Gruyters Eltern', GROUP",
|
||||
"'Eugenie de Gruyters Kinder', GROUP",
|
||||
"'Hilde de Gruyters Schwiegereltern', GROUP",
|
||||
"'Eltern Muller', GROUP",
|
||||
"'Familie Cram', GROUP",
|
||||
"'Familie Hasenvlever', GROUP",
|
||||
"'Mitarbeiter Verlag', GROUP",
|
||||
"'Mitarbeiter Druckerei TrebbinClara Cram', GROUP",
|
||||
"'Mitarbeiter Kunstverlag Mu', GROUP"
|
||||
})
|
||||
void classify_groupEntries(String input, PersonType expected) {
|
||||
assertThat(PersonTypeClassifier.classify(input)).isEqualTo(expected);
|
||||
}
|
||||
|
||||
// --- PERSON (default) ---
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource({
|
||||
"'Walter de Gruyter', PERSON",
|
||||
"'Clara Cram', PERSON",
|
||||
"'Eugenie de Gruyter geb. Müller', PERSON",
|
||||
"'Dr. Firma Mueller', PERSON"
|
||||
})
|
||||
void classify_personEntries(String input, PersonType expected) {
|
||||
assertThat(PersonTypeClassifier.classify(input)).isEqualTo(expected);
|
||||
}
|
||||
|
||||
// --- Edge cases ---
|
||||
|
||||
@Test
|
||||
void classify_null_returnsPerson() {
|
||||
assertThat(PersonTypeClassifier.classify(null)).isEqualTo(PersonType.PERSON);
|
||||
}
|
||||
|
||||
@Test
|
||||
void classify_blank_returnsPerson() {
|
||||
assertThat(PersonTypeClassifier.classify(" ")).isEqualTo(PersonType.PERSON);
|
||||
}
|
||||
|
||||
@Test
|
||||
void classify_caseInsensitive() {
|
||||
assertThat(PersonTypeClassifier.classify("firma auschrath")).isEqualTo(PersonType.INSTITUTION);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user