From ef75f7f098cd97c77f65398c0aa47622b3f907aa Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 7 Jun 2026 00:55:13 +0200 Subject: [PATCH] feat(person): add resolveByName for direct/partial name matching (#763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Token-set containment over all of a person's name components (firstName, lastName, alias, each PersonNameAlias first+last, title) decides direct vs partial. Orchestrates tokenize → cap(8) → fetch pool → classify → cap(10) after classification, with an empty-token guard and a PII-free debug log of the outcome bucket. MAX_TOKENS is a DoS control; the after-classify cap keeps a direct match that sorts past position 10 among partials. Read-only transaction keeps lazy nameAliases reachable during classification (ADR-022). Co-Authored-By: Claude Opus 4.8 --- .../familienarchiv/person/NameMatches.java | 13 +++ .../familienarchiv/person/PersonService.java | 79 +++++++++++++ .../person/PersonServiceTest.java | 104 ++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java b/backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java new file mode 100644 index 00000000..ddc7a1c6 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java @@ -0,0 +1,13 @@ +package org.raddatz.familienarchiv.person; + +import java.util.List; + +/** + * Result of {@link PersonService#resolveByName(String)}: candidate persons split by name-match + * strength. {@code direct} = every query token is a whole-token match across the person's name + * components (alias/maiden-name aware); {@code partial} = matched the substring fetch but is not + * direct. The vocabulary is deliberately name-match strength ({@code direct}/{@code partial}), not + * the search layer's resolved/ambiguous buckets — the caller maps these into its own outcome. + */ +public record NameMatches(List direct, List partial) { +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java index c9710c7e..d195d0bb 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java @@ -2,12 +2,14 @@ package org.raddatz.familienarchiv.person; import java.util.ArrayList; import java.util.Comparator; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Optional; import java.util.Set; import java.util.UUID; +import java.util.stream.Collectors; import org.springframework.lang.Nullable; @@ -35,6 +37,13 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class PersonService { + // Co-located with the fetch loop that owns them (issue #763). MAX_TOKENS caps the number of + // unindexed leading-wildcard LIKE scans per name — a DoS control, not just perf. MAX_CANDIDATES + // bounds each result bucket and is applied AFTER classification so a direct match that sorts + // past position 10 among partials is never discarded. + private static final int MAX_TOKENS = 8; + private static final int MAX_CANDIDATES = 10; + private final PersonRepository personRepository; private final PersonNameAliasRepository aliasRepository; @@ -125,6 +134,76 @@ public class PersonService { return tokens; } + /** + * Resolves an extracted person name into {@link NameMatches} by name-match strength. + * Orchestrates tokenize → cap → fetch pool → classify → cap-after-classify. Read-only + * transaction keeps the Hibernate session open so each candidate's lazy {@code nameAliases} + * are reachable during classification (see ADR-022). + */ + @Transactional(readOnly = true) + public NameMatches resolveByName(String name) { + Set queryTokens = capTokens(tokenize(name)); + if (queryTokens.isEmpty()) { + log.debug("resolveByName outcome=no-match tokens=0"); + return new NameMatches(List.of(), List.of()); + } + return classify(fetchPool(queryTokens), queryTokens); + } + + private Set capTokens(Set tokens) { + return tokens.stream().limit(MAX_TOKENS).collect(Collectors.toCollection(LinkedHashSet::new)); + } + + private List fetchPool(Set queryTokens) { + LinkedHashMap pool = new LinkedHashMap<>(); + for (String token : queryTokens) { + for (Person candidate : findByDisplayNameContaining(token)) { + pool.putIfAbsent(candidate.getId(), candidate); + } + } + return new ArrayList<>(pool.values()); + } + + private NameMatches classify(List pool, Set queryTokens) { + List direct = new ArrayList<>(); + List partial = new ArrayList<>(); + for (Person candidate : pool) { + if (personTokens(candidate).containsAll(queryTokens)) { + direct.add(candidate); + } else { + partial.add(candidate); + } + } + List cappedDirect = cap(direct); + List cappedPartial = cap(partial); + log.debug("resolveByName outcome={} tokens={}", outcome(cappedDirect, cappedPartial), queryTokens.size()); + return new NameMatches(cappedDirect, cappedPartial); + } + + private static Set personTokens(Person person) { + Set tokens = new LinkedHashSet<>(); + tokens.addAll(tokenize(person.getFirstName())); + tokens.addAll(tokenize(person.getLastName())); + tokens.addAll(tokenize(person.getAlias())); + tokens.addAll(tokenize(person.getTitle())); + for (PersonNameAlias alias : person.getNameAliases()) { + tokens.addAll(tokenize(alias.getFirstName())); + tokens.addAll(tokenize(alias.getLastName())); + } + return tokens; + } + + private static List cap(List people) { + return people.size() > MAX_CANDIDATES ? people.subList(0, MAX_CANDIDATES) : people; + } + + private static String outcome(List direct, List partial) { + if (direct.size() == 1) return "direct=1"; + if (direct.size() >= 2) return "direct>=2"; + if (!partial.isEmpty()) return "partial-only"; + return "no-match"; + } + public List findAllFamilyMembers() { return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc(); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java index 248504c5..43afccd7 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java @@ -941,4 +941,108 @@ class PersonServiceTest { void tokenize_null_returnsEmpty() { assertThat(PersonService.tokenize(null)).isEmpty(); } + + // ─── resolveByName (direct / partial classification) ────────────────────── + + @Test + void resolveByName_singleDirectMatch_classifiesAsDirect() { + Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build(); + when(personRepository.searchByName("clara")).thenReturn(List.of(clara)); + when(personRepository.searchByName("cram")).thenReturn(List.of(clara)); + + NameMatches result = personService.resolveByName("Clara Cram"); + + assertThat(result.direct()).containsExactly(clara); + } + + @Test + void resolveByName_maidenAliasToken_classifiesAsDirect() { + Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Müller") + .nameAliases(List.of(PersonNameAlias.builder().lastName("Cram") + .type(PersonNameAliasType.MAIDEN_NAME).build())) + .build(); + when(personRepository.searchByName("clara")).thenReturn(List.of(clara)); + when(personRepository.searchByName("cram")).thenReturn(List.of(clara)); + + NameMatches result = personService.resolveByName("Clara Cram"); + + assertThat(result.direct()).containsExactly(clara); + } + + @Test + void resolveByName_aliasFirstNameToken_isFetchedAndClassified() { + Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram") + .nameAliases(List.of(PersonNameAlias.builder().firstName("Wilhelmina").lastName("de Gruyter") + .type(PersonNameAliasType.BIRTH).build())) + .build(); + when(personRepository.searchByName("wilhelmina")).thenReturn(List.of(clara)); + + NameMatches result = personService.resolveByName("Wilhelmina"); + + assertThat(result.direct()).containsExactly(clara); + } + + @Test + void resolveByName_middleName_stillDirect() { + Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara Maria").lastName("Cram").build(); + when(personRepository.searchByName("clara")).thenReturn(List.of(clara)); + when(personRepository.searchByName("cram")).thenReturn(List.of(clara)); + + NameMatches result = personService.resolveByName("Clara Cram"); + + assertThat(result.direct()).containsExactly(clara); + } + + @Test + void resolveByName_reorderedTokens_stillDirect() { + Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build(); + when(personRepository.searchByName("cram")).thenReturn(List.of(clara)); + when(personRepository.searchByName("clara")).thenReturn(List.of(clara)); + + NameMatches result = personService.resolveByName("Cram Clara"); + + assertThat(result.direct()).containsExactly(clara); + } + + @Test + void resolveByName_cramVsCramer_classifiesAsPartial() { + Person cramer = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build(); + when(personRepository.searchByName("clara")).thenReturn(List.of(cramer)); + when(personRepository.searchByName("cram")).thenReturn(List.of(cramer)); + + NameMatches result = personService.resolveByName("Clara Cram"); + + assertThat(result.partial()).containsExactly(cramer); + } + + @Test + void resolveByName_emptyAfterTokenizing_returnsNoCandidates() { + NameMatches result = personService.resolveByName(" - "); + + assertThat(result.direct()).isEmpty(); + verify(personRepository, never()).searchByName(any()); + } + + @Test + void resolveByName_directSortsBeyondCap_stillReturnedAsDirect() { + List pool = new java.util.ArrayList<>(); + for (int i = 0; i < 10; i++) { + pool.add(Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build()); + } + Person direct = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build(); + pool.add(direct); + when(personRepository.searchByName("clara")).thenReturn(pool); + when(personRepository.searchByName("cram")).thenReturn(pool); + + NameMatches result = personService.resolveByName("Clara Cram"); + + assertThat(result.direct()).containsExactly(direct); + } + + @Test + void resolveByName_over8Tokens_issuesAtMost8Fetches() { + personService.resolveByName("a b c d e f g h i j"); + + verify(personRepository, org.mockito.Mockito.atMost(8)).searchByName(any()); + } }