feat(person): add resolveByName for direct/partial name matching (#763)

Token-set containment over all of a person's name components (firstName,
lastName, alias, each PersonNameAlias first+last, title) decides direct vs
partial. Orchestrates tokenize → cap(8) → fetch pool → classify → cap(10)
after classification, with an empty-token guard and a PII-free debug log of
the outcome bucket. MAX_TOKENS is a DoS control; the after-classify cap keeps a
direct match that sorts past position 10 among partials. Read-only transaction
keeps lazy nameAliases reachable during classification (ADR-022).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-06-07 00:55:13 +02:00
committed by marcel
parent 9a26bf75b0
commit ca52145556
3 changed files with 196 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
package org.raddatz.familienarchiv.person;
import java.util.List;
/**
* Result of {@link PersonService#resolveByName(String)}: candidate persons split by name-match
* strength. {@code direct} = every query token is a whole-token match across the person's name
* components (alias/maiden-name aware); {@code partial} = matched the substring fetch but is not
* direct. The vocabulary is deliberately name-match strength ({@code direct}/{@code partial}), not
* the search layer's resolved/ambiguous buckets — the caller maps these into its own outcome.
*/
public record NameMatches(List<Person> direct, List<Person> partial) {
}

View File

@@ -2,12 +2,14 @@ package org.raddatz.familienarchiv.person;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.UUID; import java.util.UUID;
import java.util.stream.Collectors;
import org.springframework.lang.Nullable; import org.springframework.lang.Nullable;
@@ -35,6 +37,13 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j @Slf4j
public class PersonService { public class PersonService {
// Co-located with the fetch loop that owns them (issue #763). MAX_TOKENS caps the number of
// unindexed leading-wildcard LIKE scans per name — a DoS control, not just perf. MAX_CANDIDATES
// bounds each result bucket and is applied AFTER classification so a direct match that sorts
// past position 10 among partials is never discarded.
private static final int MAX_TOKENS = 8;
private static final int MAX_CANDIDATES = 10;
private final PersonRepository personRepository; private final PersonRepository personRepository;
private final PersonNameAliasRepository aliasRepository; private final PersonNameAliasRepository aliasRepository;
@@ -125,6 +134,76 @@ public class PersonService {
return tokens; return tokens;
} }
/**
* Resolves an extracted person name into {@link NameMatches} by name-match strength.
* Orchestrates tokenize → cap → fetch pool → classify → cap-after-classify. Read-only
* transaction keeps the Hibernate session open so each candidate's lazy {@code nameAliases}
* are reachable during classification (see ADR-022).
*/
@Transactional(readOnly = true)
public NameMatches resolveByName(String name) {
Set<String> queryTokens = capTokens(tokenize(name));
if (queryTokens.isEmpty()) {
log.debug("resolveByName outcome=no-match tokens=0");
return new NameMatches(List.of(), List.of());
}
return classify(fetchPool(queryTokens), queryTokens);
}
private Set<String> capTokens(Set<String> tokens) {
return tokens.stream().limit(MAX_TOKENS).collect(Collectors.toCollection(LinkedHashSet::new));
}
private List<Person> fetchPool(Set<String> queryTokens) {
LinkedHashMap<UUID, Person> pool = new LinkedHashMap<>();
for (String token : queryTokens) {
for (Person candidate : findByDisplayNameContaining(token)) {
pool.putIfAbsent(candidate.getId(), candidate);
}
}
return new ArrayList<>(pool.values());
}
private NameMatches classify(List<Person> pool, Set<String> queryTokens) {
List<Person> direct = new ArrayList<>();
List<Person> partial = new ArrayList<>();
for (Person candidate : pool) {
if (personTokens(candidate).containsAll(queryTokens)) {
direct.add(candidate);
} else {
partial.add(candidate);
}
}
List<Person> cappedDirect = cap(direct);
List<Person> cappedPartial = cap(partial);
log.debug("resolveByName outcome={} tokens={}", outcome(cappedDirect, cappedPartial), queryTokens.size());
return new NameMatches(cappedDirect, cappedPartial);
}
private static Set<String> personTokens(Person person) {
Set<String> tokens = new LinkedHashSet<>();
tokens.addAll(tokenize(person.getFirstName()));
tokens.addAll(tokenize(person.getLastName()));
tokens.addAll(tokenize(person.getAlias()));
tokens.addAll(tokenize(person.getTitle()));
for (PersonNameAlias alias : person.getNameAliases()) {
tokens.addAll(tokenize(alias.getFirstName()));
tokens.addAll(tokenize(alias.getLastName()));
}
return tokens;
}
private static List<Person> cap(List<Person> people) {
return people.size() > MAX_CANDIDATES ? people.subList(0, MAX_CANDIDATES) : people;
}
private static String outcome(List<Person> direct, List<Person> partial) {
if (direct.size() == 1) return "direct=1";
if (direct.size() >= 2) return "direct>=2";
if (!partial.isEmpty()) return "partial-only";
return "no-match";
}
public List<Person> findAllFamilyMembers() { public List<Person> findAllFamilyMembers() {
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc(); return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
} }

View File

@@ -941,4 +941,108 @@ class PersonServiceTest {
void tokenize_null_returnsEmpty() { void tokenize_null_returnsEmpty() {
assertThat(PersonService.tokenize(null)).isEmpty(); assertThat(PersonService.tokenize(null)).isEmpty();
} }
// ─── resolveByName (direct / partial classification) ──────────────────────
@Test
void resolveByName_singleDirectMatch_classifiesAsDirect() {
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
NameMatches result = personService.resolveByName("Clara Cram");
assertThat(result.direct()).containsExactly(clara);
}
@Test
void resolveByName_maidenAliasToken_classifiesAsDirect() {
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Müller")
.nameAliases(List.of(PersonNameAlias.builder().lastName("Cram")
.type(PersonNameAliasType.MAIDEN_NAME).build()))
.build();
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
NameMatches result = personService.resolveByName("Clara Cram");
assertThat(result.direct()).containsExactly(clara);
}
@Test
void resolveByName_aliasFirstNameToken_isFetchedAndClassified() {
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram")
.nameAliases(List.of(PersonNameAlias.builder().firstName("Wilhelmina").lastName("de Gruyter")
.type(PersonNameAliasType.BIRTH).build()))
.build();
when(personRepository.searchByName("wilhelmina")).thenReturn(List.of(clara));
NameMatches result = personService.resolveByName("Wilhelmina");
assertThat(result.direct()).containsExactly(clara);
}
@Test
void resolveByName_middleName_stillDirect() {
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara Maria").lastName("Cram").build();
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
NameMatches result = personService.resolveByName("Clara Cram");
assertThat(result.direct()).containsExactly(clara);
}
@Test
void resolveByName_reorderedTokens_stillDirect() {
Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
NameMatches result = personService.resolveByName("Cram Clara");
assertThat(result.direct()).containsExactly(clara);
}
@Test
void resolveByName_cramVsCramer_classifiesAsPartial() {
Person cramer = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build();
when(personRepository.searchByName("clara")).thenReturn(List.of(cramer));
when(personRepository.searchByName("cram")).thenReturn(List.of(cramer));
NameMatches result = personService.resolveByName("Clara Cram");
assertThat(result.partial()).containsExactly(cramer);
}
@Test
void resolveByName_emptyAfterTokenizing_returnsNoCandidates() {
NameMatches result = personService.resolveByName(" - ");
assertThat(result.direct()).isEmpty();
verify(personRepository, never()).searchByName(any());
}
@Test
void resolveByName_directSortsBeyondCap_stillReturnedAsDirect() {
List<Person> pool = new java.util.ArrayList<>();
for (int i = 0; i < 10; i++) {
pool.add(Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build());
}
Person direct = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
pool.add(direct);
when(personRepository.searchByName("clara")).thenReturn(pool);
when(personRepository.searchByName("cram")).thenReturn(pool);
NameMatches result = personService.resolveByName("Clara Cram");
assertThat(result.direct()).containsExactly(direct);
}
@Test
void resolveByName_over8Tokens_issuesAtMost8Fetches() {
personService.resolveByName("a b c d e f g h i j");
verify(personRepository, org.mockito.Mockito.atMost(8)).searchByName(any());
}
} }