feat(search): auto-select a single direct person match in smart search (#763) #769
@@ -0,0 +1,13 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Result of {@link PersonService#resolveByName(String)}: candidate persons split by name-match
|
||||
* strength. {@code direct} = every query token is a whole-token match across the person's name
|
||||
* components (alias/maiden-name aware); {@code partial} = matched the substring fetch but is not
|
||||
* direct. The vocabulary is deliberately name-match strength ({@code direct}/{@code partial}), not
|
||||
* the search layer's resolved/ambiguous buckets — the caller maps these into its own outcome.
|
||||
*/
|
||||
public record NameMatches(List<Person> direct, List<Person> partial) {
|
||||
}
|
||||
@@ -2,12 +2,14 @@ package org.raddatz.familienarchiv.person;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.lang.Nullable;
|
||||
|
||||
@@ -35,6 +37,13 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
public class PersonService {
|
||||
|
||||
// Co-located with the fetch loop that owns them (issue #763). MAX_TOKENS caps the number of
|
||||
// unindexed leading-wildcard LIKE scans per name — a DoS control, not just perf. MAX_CANDIDATES
|
||||
// bounds each result bucket and is applied AFTER classification so a direct match that sorts
|
||||
// past position 10 among partials is never discarded.
|
||||
private static final int MAX_TOKENS = 8;
|
||||
private static final int MAX_CANDIDATES = 10;
|
||||
|
||||
private final PersonRepository personRepository;
|
||||
private final PersonNameAliasRepository aliasRepository;
|
||||
|
||||
@@ -125,6 +134,76 @@ public class PersonService {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves an extracted person name into {@link NameMatches} by name-match strength.
|
||||
* Orchestrates tokenize → cap → fetch pool → classify → cap-after-classify. Read-only
|
||||
* transaction keeps the Hibernate session open so each candidate's lazy {@code nameAliases}
|
||||
* are reachable during classification (see ADR-022).
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public NameMatches resolveByName(String name) {
|
||||
Set<String> queryTokens = capTokens(tokenize(name));
|
||||
if (queryTokens.isEmpty()) {
|
||||
log.debug("resolveByName outcome=no-match tokens=0");
|
||||
return new NameMatches(List.of(), List.of());
|
||||
}
|
||||
return classify(fetchPool(queryTokens), queryTokens);
|
||||
}
|
||||
|
||||
private Set<String> capTokens(Set<String> tokens) {
|
||||
return tokens.stream().limit(MAX_TOKENS).collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
}
|
||||
|
||||
private List<Person> fetchPool(Set<String> queryTokens) {
|
||||
LinkedHashMap<UUID, Person> pool = new LinkedHashMap<>();
|
||||
for (String token : queryTokens) {
|
||||
for (Person candidate : findByDisplayNameContaining(token)) {
|
||||
pool.putIfAbsent(candidate.getId(), candidate);
|
||||
}
|
||||
}
|
||||
return new ArrayList<>(pool.values());
|
||||
}
|
||||
|
||||
private NameMatches classify(List<Person> pool, Set<String> queryTokens) {
|
||||
List<Person> direct = new ArrayList<>();
|
||||
List<Person> partial = new ArrayList<>();
|
||||
for (Person candidate : pool) {
|
||||
if (personTokens(candidate).containsAll(queryTokens)) {
|
||||
direct.add(candidate);
|
||||
} else {
|
||||
partial.add(candidate);
|
||||
}
|
||||
}
|
||||
List<Person> cappedDirect = cap(direct);
|
||||
List<Person> cappedPartial = cap(partial);
|
||||
log.debug("resolveByName outcome={} tokens={}", outcome(cappedDirect, cappedPartial), queryTokens.size());
|
||||
return new NameMatches(cappedDirect, cappedPartial);
|
||||
}
|
||||
|
||||
private static Set<String> personTokens(Person person) {
|
||||
Set<String> tokens = new LinkedHashSet<>();
|
||||
tokens.addAll(tokenize(person.getFirstName()));
|
||||
tokens.addAll(tokenize(person.getLastName()));
|
||||
tokens.addAll(tokenize(person.getAlias()));
|
||||
tokens.addAll(tokenize(person.getTitle()));
|
||||
for (PersonNameAlias alias : person.getNameAliases()) {
|
||||
tokens.addAll(tokenize(alias.getFirstName()));
|
||||
tokens.addAll(tokenize(alias.getLastName()));
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private static List<Person> cap(List<Person> people) {
|
||||
return people.size() > MAX_CANDIDATES ? people.subList(0, MAX_CANDIDATES) : people;
|
||||
}
|
||||
|
||||
private static String outcome(List<Person> direct, List<Person> partial) {
|
||||
if (direct.size() == 1) return "direct=1";
|
||||
if (direct.size() >= 2) return "direct>=2";
|
||||
if (!partial.isEmpty()) return "partial-only";
|
||||
return "no-match";
|
||||
}
|
||||
|
||||
public List<Person> findAllFamilyMembers() {
|
||||
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
||||
}
|
||||
|
||||
@@ -941,4 +941,108 @@ class PersonServiceTest {
|
||||
void tokenize_null_returnsEmpty() {
|
||||
assertThat(PersonService.tokenize(null)).isEmpty();
|
||||
}
|
||||
|
||||
// ─── resolveByName (direct / partial classification) ──────────────────────
|
||||
|
||||
@Test
|
||||
void resolveByName_singleDirectMatch_classifiesAsDirect() {
|
||||
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
|
||||
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||
|
||||
NameMatches result = personService.resolveByName("Clara Cram");
|
||||
|
||||
assertThat(result.direct()).containsExactly(clara);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_maidenAliasToken_classifiesAsDirect() {
|
||||
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Müller")
|
||||
.nameAliases(List.of(PersonNameAlias.builder().lastName("Cram")
|
||||
.type(PersonNameAliasType.MAIDEN_NAME).build()))
|
||||
.build();
|
||||
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||
|
||||
NameMatches result = personService.resolveByName("Clara Cram");
|
||||
|
||||
assertThat(result.direct()).containsExactly(clara);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_aliasFirstNameToken_isFetchedAndClassified() {
|
||||
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram")
|
||||
.nameAliases(List.of(PersonNameAlias.builder().firstName("Wilhelmina").lastName("de Gruyter")
|
||||
.type(PersonNameAliasType.BIRTH).build()))
|
||||
.build();
|
||||
when(personRepository.searchByName("wilhelmina")).thenReturn(List.of(clara));
|
||||
|
||||
NameMatches result = personService.resolveByName("Wilhelmina");
|
||||
|
||||
assertThat(result.direct()).containsExactly(clara);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_middleName_stillDirect() {
|
||||
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara Maria").lastName("Cram").build();
|
||||
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||
|
||||
NameMatches result = personService.resolveByName("Clara Cram");
|
||||
|
||||
assertThat(result.direct()).containsExactly(clara);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_reorderedTokens_stillDirect() {
|
||||
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
|
||||
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
|
||||
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
|
||||
|
||||
NameMatches result = personService.resolveByName("Cram Clara");
|
||||
|
||||
assertThat(result.direct()).containsExactly(clara);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_cramVsCramer_classifiesAsPartial() {
|
||||
Person cramer = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build();
|
||||
when(personRepository.searchByName("clara")).thenReturn(List.of(cramer));
|
||||
when(personRepository.searchByName("cram")).thenReturn(List.of(cramer));
|
||||
|
||||
NameMatches result = personService.resolveByName("Clara Cram");
|
||||
|
||||
assertThat(result.partial()).containsExactly(cramer);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_emptyAfterTokenizing_returnsNoCandidates() {
|
||||
NameMatches result = personService.resolveByName(" - ");
|
||||
|
||||
assertThat(result.direct()).isEmpty();
|
||||
verify(personRepository, never()).searchByName(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_directSortsBeyondCap_stillReturnedAsDirect() {
|
||||
List<Person> pool = new java.util.ArrayList<>();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
pool.add(Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build());
|
||||
}
|
||||
Person direct = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
|
||||
pool.add(direct);
|
||||
when(personRepository.searchByName("clara")).thenReturn(pool);
|
||||
when(personRepository.searchByName("cram")).thenReturn(pool);
|
||||
|
||||
NameMatches result = personService.resolveByName("Clara Cram");
|
||||
|
||||
assertThat(result.direct()).containsExactly(direct);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveByName_over8Tokens_issuesAtMost8Fetches() {
|
||||
personService.resolveByName("a b c d e f g h i j");
|
||||
|
||||
verify(personRepository, org.mockito.Mockito.atMost(8)).searchByName(any());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user