feat(person): add name-match tokenizer for direct matching (#763)
Lowercase, split on whitespace/hyphen/apostrophe, drop empties. Applied symmetrically to query and candidate name components so "Anna-Maria" and "Anna Maria" tokenize alike. Foundation for resolveByName direct matching. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,12 @@
|
|||||||
package org.raddatz.familienarchiv.person;
|
package org.raddatz.familienarchiv.person;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
import org.springframework.lang.Nullable;
|
import org.springframework.lang.Nullable;
|
||||||
@@ -24,9 +28,11 @@ import org.springframework.transaction.annotation.Transactional;
|
|||||||
import org.springframework.web.server.ResponseStatusException;
|
import org.springframework.web.server.ResponseStatusException;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
public class PersonService {
|
public class PersonService {
|
||||||
|
|
||||||
private final PersonRepository personRepository;
|
private final PersonRepository personRepository;
|
||||||
@@ -103,6 +109,22 @@ public class PersonService {
|
|||||||
return personRepository.searchByName(fragment);
|
return personRepository.searchByName(fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Name-match tokenizer (issue #763): lowercase, split on whitespace/hyphen/apostrophe,
|
||||||
|
// drop empties. Applied symmetrically to the query and to every candidate name component so
|
||||||
|
// that "Anna-Maria" and "Anna Maria" tokenize alike. Order-preserving for deterministic tests.
|
||||||
|
static Set<String> tokenize(String raw) {
|
||||||
|
if (raw == null || raw.isBlank()) {
|
||||||
|
return Set.of();
|
||||||
|
}
|
||||||
|
LinkedHashSet<String> tokens = new LinkedHashSet<>();
|
||||||
|
for (String part : raw.toLowerCase(Locale.ROOT).split("[\\s\\-']+")) {
|
||||||
|
if (!part.isEmpty()) {
|
||||||
|
tokens.add(part);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
public List<Person> findAllFamilyMembers() {
|
public List<Person> findAllFamilyMembers() {
|
||||||
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -909,4 +909,36 @@ class PersonServiceTest {
|
|||||||
assertThat(result).containsExactly(walter);
|
assertThat(result).containsExactly(walter);
|
||||||
verify(personRepository).searchByName("Walter");
|
verify(personRepository).searchByName("Walter");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── tokenize (name-match contract) ───────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void tokenize_hyphenatedName_splitsOnHyphen() {
|
||||||
|
assertThat(PersonService.tokenize("Anna-Maria")).containsExactly("anna", "maria");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void tokenize_apostropheName_splitsOnApostrophe() {
|
||||||
|
assertThat(PersonService.tokenize("D'Angelo")).containsExactly("d", "angelo");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void tokenize_umlautName_lowercasesToSingleToken() {
|
||||||
|
assertThat(PersonService.tokenize("Müller")).containsExactly("müller");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void tokenize_doubleSpace_dropsEmptyTokens() {
|
||||||
|
assertThat(PersonService.tokenize("Clara Cram")).containsExactly("clara", "cram");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void tokenize_allWhitespace_returnsEmpty() {
|
||||||
|
assertThat(PersonService.tokenize(" ")).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void tokenize_null_returnsEmpty() {
|
||||||
|
assertThat(PersonService.tokenize(null)).isEmpty();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user