feat(person): add name-match tokenizer for direct matching (#763)
Lowercase, split on whitespace/hyphen/apostrophe, drop empties. Applied symmetrically to query and candidate name components so "Anna-Maria" and "Anna Maria" tokenize alike. Foundation for resolveByName direct matching. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -909,4 +909,36 @@ class PersonServiceTest {
|
||||
assertThat(result).containsExactly(walter);
|
||||
verify(personRepository).searchByName("Walter");
|
||||
}
|
||||
|
||||
// ─── tokenize (name-match contract) ───────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void tokenize_hyphenatedName_splitsOnHyphen() {
|
||||
assertThat(PersonService.tokenize("Anna-Maria")).containsExactly("anna", "maria");
|
||||
}
|
||||
|
||||
@Test
|
||||
void tokenize_apostropheName_splitsOnApostrophe() {
|
||||
assertThat(PersonService.tokenize("D'Angelo")).containsExactly("d", "angelo");
|
||||
}
|
||||
|
||||
@Test
|
||||
void tokenize_umlautName_lowercasesToSingleToken() {
|
||||
assertThat(PersonService.tokenize("Müller")).containsExactly("müller");
|
||||
}
|
||||
|
||||
@Test
|
||||
void tokenize_doubleSpace_dropsEmptyTokens() {
|
||||
assertThat(PersonService.tokenize("Clara Cram")).containsExactly("clara", "cram");
|
||||
}
|
||||
|
||||
@Test
|
||||
void tokenize_allWhitespace_returnsEmpty() {
|
||||
assertThat(PersonService.tokenize(" ")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void tokenize_null_returnsEmpty() {
|
||||
assertThat(PersonService.tokenize(null)).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user