feat(person): add resolveByName for direct/partial name matching (#763)
Token-set containment over all of a person's name components (firstName, lastName, alias, each PersonNameAlias first+last, title) decides direct vs partial. Orchestrates tokenize → cap(8) → fetch pool → classify → cap(10) after classification, with an empty-token guard and a PII-free debug log of the outcome bucket. MAX_TOKENS is a DoS control; the after-classify cap keeps a direct match that sorts past position 10 among partials. Read-only transaction keeps lazy nameAliases reachable during classification (ADR-022). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,13 @@
|
|||||||
|
package org.raddatz.familienarchiv.person;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of {@link PersonService#resolveByName(String)}: candidate persons split by name-match
|
||||||
|
* strength. {@code direct} = every query token is a whole-token match across the person's name
|
||||||
|
* components (alias/maiden-name aware); {@code partial} = matched the substring fetch but is not
|
||||||
|
* direct. The vocabulary is deliberately name-match strength ({@code direct}/{@code partial}), not
|
||||||
|
* the search layer's resolved/ambiguous buckets — the caller maps these into its own outcome.
|
||||||
|
*/
|
||||||
|
public record NameMatches(List<Person> direct, List<Person> partial) {
|
||||||
|
}
|
||||||
@@ -2,12 +2,14 @@ package org.raddatz.familienarchiv.person;
|
|||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.springframework.lang.Nullable;
|
import org.springframework.lang.Nullable;
|
||||||
|
|
||||||
@@ -35,6 +37,13 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
public class PersonService {
|
public class PersonService {
|
||||||
|
|
||||||
|
// Co-located with the fetch loop that owns them (issue #763). MAX_TOKENS caps the number of
|
||||||
|
// unindexed leading-wildcard LIKE scans per name — a DoS control, not just perf. MAX_CANDIDATES
|
||||||
|
// bounds each result bucket and is applied AFTER classification so a direct match that sorts
|
||||||
|
// past position 10 among partials is never discarded.
|
||||||
|
private static final int MAX_TOKENS = 8;
|
||||||
|
private static final int MAX_CANDIDATES = 10;
|
||||||
|
|
||||||
private final PersonRepository personRepository;
|
private final PersonRepository personRepository;
|
||||||
private final PersonNameAliasRepository aliasRepository;
|
private final PersonNameAliasRepository aliasRepository;
|
||||||
|
|
||||||
@@ -125,6 +134,76 @@ public class PersonService {
|
|||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves an extracted person name into {@link NameMatches} by name-match strength.
|
||||||
|
* Orchestrates tokenize → cap → fetch pool → classify → cap-after-classify. Read-only
|
||||||
|
* transaction keeps the Hibernate session open so each candidate's lazy {@code nameAliases}
|
||||||
|
* are reachable during classification (see ADR-022).
|
||||||
|
*/
|
||||||
|
@Transactional(readOnly = true)
|
||||||
|
public NameMatches resolveByName(String name) {
|
||||||
|
Set<String> queryTokens = capTokens(tokenize(name));
|
||||||
|
if (queryTokens.isEmpty()) {
|
||||||
|
log.debug("resolveByName outcome=no-match tokens=0");
|
||||||
|
return new NameMatches(List.of(), List.of());
|
||||||
|
}
|
||||||
|
return classify(fetchPool(queryTokens), queryTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> capTokens(Set<String> tokens) {
|
||||||
|
return tokens.stream().limit(MAX_TOKENS).collect(Collectors.toCollection(LinkedHashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Person> fetchPool(Set<String> queryTokens) {
|
||||||
|
LinkedHashMap<UUID, Person> pool = new LinkedHashMap<>();
|
||||||
|
for (String token : queryTokens) {
|
||||||
|
for (Person candidate : findByDisplayNameContaining(token)) {
|
||||||
|
pool.putIfAbsent(candidate.getId(), candidate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ArrayList<>(pool.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
private NameMatches classify(List<Person> pool, Set<String> queryTokens) {
|
||||||
|
List<Person> direct = new ArrayList<>();
|
||||||
|
List<Person> partial = new ArrayList<>();
|
||||||
|
for (Person candidate : pool) {
|
||||||
|
if (personTokens(candidate).containsAll(queryTokens)) {
|
||||||
|
direct.add(candidate);
|
||||||
|
} else {
|
||||||
|
partial.add(candidate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
List<Person> cappedDirect = cap(direct);
|
||||||
|
List<Person> cappedPartial = cap(partial);
|
||||||
|
log.debug("resolveByName outcome={} tokens={}", outcome(cappedDirect, cappedPartial), queryTokens.size());
|
||||||
|
return new NameMatches(cappedDirect, cappedPartial);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Set<String> personTokens(Person person) {
|
||||||
|
Set<String> tokens = new LinkedHashSet<>();
|
||||||
|
tokens.addAll(tokenize(person.getFirstName()));
|
||||||
|
tokens.addAll(tokenize(person.getLastName()));
|
||||||
|
tokens.addAll(tokenize(person.getAlias()));
|
||||||
|
tokens.addAll(tokenize(person.getTitle()));
|
||||||
|
for (PersonNameAlias alias : person.getNameAliases()) {
|
||||||
|
tokens.addAll(tokenize(alias.getFirstName()));
|
||||||
|
tokens.addAll(tokenize(alias.getLastName()));
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Person> cap(List<Person> people) {
|
||||||
|
return people.size() > MAX_CANDIDATES ? people.subList(0, MAX_CANDIDATES) : people;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String outcome(List<Person> direct, List<Person> partial) {
|
||||||
|
if (direct.size() == 1) return "direct=1";
|
||||||
|
if (direct.size() >= 2) return "direct>=2";
|
||||||
|
if (!partial.isEmpty()) return "partial-only";
|
||||||
|
return "no-match";
|
||||||
|
}
|
||||||
|
|
||||||
public List<Person> findAllFamilyMembers() {
|
public List<Person> findAllFamilyMembers() {
|
||||||
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -941,4 +941,108 @@ class PersonServiceTest {
|
|||||||
void tokenize_null_returnsEmpty() {
|
void tokenize_null_returnsEmpty() {
|
||||||
assertThat(PersonService.tokenize(null)).isEmpty();
|
assertThat(PersonService.tokenize(null)).isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── resolveByName (direct / partial classification) ──────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_singleDirectMatch_classifiesAsDirect() {
|
||||||
|
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
|
||||||
|
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||||
|
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Clara Cram");
|
||||||
|
|
||||||
|
assertThat(result.direct()).containsExactly(clara);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_maidenAliasToken_classifiesAsDirect() {
|
||||||
|
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Müller")
|
||||||
|
.nameAliases(List.of(PersonNameAlias.builder().lastName("Cram")
|
||||||
|
.type(PersonNameAliasType.MAIDEN_NAME).build()))
|
||||||
|
.build();
|
||||||
|
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||||
|
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Clara Cram");
|
||||||
|
|
||||||
|
assertThat(result.direct()).containsExactly(clara);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_aliasFirstNameToken_isFetchedAndClassified() {
|
||||||
|
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram")
|
||||||
|
.nameAliases(List.of(PersonNameAlias.builder().firstName("Wilhelmina").lastName("de Gruyter")
|
||||||
|
.type(PersonNameAliasType.BIRTH).build()))
|
||||||
|
.build();
|
||||||
|
when(personRepository.searchByName("wilhelmina")).thenReturn(List.of(clara));
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Wilhelmina");
|
||||||
|
|
||||||
|
assertThat(result.direct()).containsExactly(clara);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_middleName_stillDirect() {
|
||||||
|
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara Maria").lastName("Cram").build();
|
||||||
|
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||||
|
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Clara Cram");
|
||||||
|
|
||||||
|
assertThat(result.direct()).containsExactly(clara);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_reorderedTokens_stillDirect() {
|
||||||
|
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
|
||||||
|
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||||
|
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Cram Clara");
|
||||||
|
|
||||||
|
assertThat(result.direct()).containsExactly(clara);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_cramVsCramer_classifiesAsPartial() {
|
||||||
|
Person cramer = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build();
|
||||||
|
when(personRepository.searchByName("clara")).thenReturn(List.of(cramer));
|
||||||
|
when(personRepository.searchByName("cram")).thenReturn(List.of(cramer));
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Clara Cram");
|
||||||
|
|
||||||
|
assertThat(result.partial()).containsExactly(cramer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_emptyAfterTokenizing_returnsNoCandidates() {
|
||||||
|
NameMatches result = personService.resolveByName(" - ");
|
||||||
|
|
||||||
|
assertThat(result.direct()).isEmpty();
|
||||||
|
verify(personRepository, never()).searchByName(any());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_directSortsBeyondCap_stillReturnedAsDirect() {
|
||||||
|
List<Person> pool = new java.util.ArrayList<>();
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
pool.add(Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build());
|
||||||
|
}
|
||||||
|
Person direct = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
|
||||||
|
pool.add(direct);
|
||||||
|
when(personRepository.searchByName("clara")).thenReturn(pool);
|
||||||
|
when(personRepository.searchByName("cram")).thenReturn(pool);
|
||||||
|
|
||||||
|
NameMatches result = personService.resolveByName("Clara Cram");
|
||||||
|
|
||||||
|
assertThat(result.direct()).containsExactly(direct);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveByName_over8Tokens_issuesAtMost8Fetches() {
|
||||||
|
personService.resolveByName("a b c d e f g h i j");
|
||||||
|
|
||||||
|
verify(personRepository, org.mockito.Mockito.atMost(8)).searchByName(any());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user