merge: resolve conflicts with origin/main (#763 person name-match integration)
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m31s
CI / OCR Service Tests (pull_request) Successful in 25s
CI / Backend Unit Tests (pull_request) Successful in 3m48s
CI / fail2ban Regex (pull_request) Successful in 45s
CI / Semgrep Security Scan (pull_request) Successful in 22s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m6s
CI / Unit & Component Tests (push) Successful in 3m20s
CI / OCR Service Tests (push) Successful in 23s
CI / Backend Unit Tests (push) Successful in 3m48s
CI / fail2ban Regex (push) Successful in 46s
CI / Semgrep Security Scan (push) Successful in 23s
CI / Compose Bucket Idempotency (push) Successful in 1m8s
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m31s
CI / OCR Service Tests (pull_request) Successful in 25s
CI / Backend Unit Tests (pull_request) Successful in 3m48s
CI / fail2ban Regex (pull_request) Successful in 45s
CI / Semgrep Security Scan (pull_request) Successful in 22s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m6s
CI / Unit & Component Tests (push) Successful in 3m20s
CI / OCR Service Tests (push) Successful in 23s
CI / Backend Unit Tests (push) Successful in 3m48s
CI / fail2ban Regex (push) Successful in 46s
CI / Semgrep Security Scan (push) Successful in 23s
CI / Compose Bucket Idempotency (push) Successful in 1m8s
- Drop unused MAX_CANDIDATES constant (not referenced in service) - Keep detached-entity safety comment in resolveTags() - Add 3 new partial-name match tests (23a/b/c) from #763 - Use resolveByName() API in test 28 (replaces findByDisplayNameContaining) - Add NameMatches glossary entry from #763 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit was merged in pull request #765.
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Result of {@link PersonService#resolveByName(String)}: candidate persons split by name-match
|
||||
* strength. {@code direct} = every query token is a whole-token match across the person's name
|
||||
* components (alias/maiden-name aware); {@code partial} = matched the substring fetch but is not
|
||||
* direct. The vocabulary is deliberately name-match strength ({@code direct}/{@code partial}), not
|
||||
* the search layer's resolved/ambiguous buckets — the caller maps these into its own outcome.
|
||||
*/
|
||||
public record NameMatches(List<Person> direct, List<Person> partial) {
|
||||
}
|
||||
@@ -19,7 +19,8 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
"LOWER(CONCAT(COALESCE(p.firstName, ''),' ',p.lastName)) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
|
||||
"LOWER(CONCAT(p.lastName, ' ', COALESCE(p.firstName, ''))) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
|
||||
"LOWER(p.alias) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
|
||||
"LOWER(a.lastName) LIKE LOWER(CONCAT('%', :query, '%')) " +
|
||||
"LOWER(a.lastName) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
|
||||
"LOWER(a.firstName) LIKE LOWER(CONCAT('%', :query, '%')) " +
|
||||
"ORDER BY p.lastName ASC, p.firstName ASC")
|
||||
List<Person> searchByName(@Param("query") String query);
|
||||
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.lang.Nullable;
|
||||
|
||||
@@ -24,11 +30,20 @@ import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class PersonService {
|
||||
|
||||
// Co-located with the fetch loop that owns them (issue #763). MAX_TOKENS caps the number of
|
||||
// unindexed leading-wildcard LIKE scans per name — a DoS control, not just perf. MAX_CANDIDATES
|
||||
// bounds each result bucket and is applied AFTER classification so a direct match that sorts
|
||||
// past position 10 among partials is never discarded.
|
||||
private static final int MAX_TOKENS = 8;
|
||||
private static final int MAX_CANDIDATES = 10;
|
||||
|
||||
private final PersonRepository personRepository;
|
||||
private final PersonNameAliasRepository aliasRepository;
|
||||
|
||||
@@ -103,6 +118,92 @@ public class PersonService {
|
||||
return personRepository.searchByName(fragment);
|
||||
}
|
||||
|
||||
// Name-match tokenizer (issue #763): lowercase, split on whitespace/hyphen/apostrophe,
|
||||
// drop empties. Applied symmetrically to the query and to every candidate name component so
|
||||
// that "Anna-Maria" and "Anna Maria" tokenize alike. Order-preserving for deterministic tests.
|
||||
static Set<String> tokenize(String raw) {
|
||||
if (raw == null || raw.isBlank()) {
|
||||
return Set.of();
|
||||
}
|
||||
LinkedHashSet<String> tokens = new LinkedHashSet<>();
|
||||
for (String part : raw.toLowerCase(Locale.ROOT).split("[\\s\\-']+")) {
|
||||
if (!part.isEmpty()) {
|
||||
tokens.add(part);
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves an extracted person name into {@link NameMatches} by name-match strength.
|
||||
* Orchestrates tokenize → cap → fetch pool → classify → cap-after-classify. Read-only
|
||||
* transaction keeps the Hibernate session open so each candidate's lazy {@code nameAliases}
|
||||
* are reachable during classification (see ADR-022).
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public NameMatches resolveByName(String name) {
|
||||
Set<String> queryTokens = capTokens(tokenize(name));
|
||||
if (queryTokens.isEmpty()) {
|
||||
log.debug("resolveByName outcome=no-match tokens=0");
|
||||
return new NameMatches(List.of(), List.of());
|
||||
}
|
||||
return classify(fetchPool(queryTokens), queryTokens);
|
||||
}
|
||||
|
||||
private Set<String> capTokens(Set<String> tokens) {
|
||||
return tokens.stream().limit(MAX_TOKENS).collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
}
|
||||
|
||||
private List<Person> fetchPool(Set<String> queryTokens) {
|
||||
LinkedHashMap<UUID, Person> pool = new LinkedHashMap<>();
|
||||
for (String token : queryTokens) {
|
||||
for (Person candidate : findByDisplayNameContaining(token)) {
|
||||
pool.putIfAbsent(candidate.getId(), candidate);
|
||||
}
|
||||
}
|
||||
return new ArrayList<>(pool.values());
|
||||
}
|
||||
|
||||
private NameMatches classify(List<Person> pool, Set<String> queryTokens) {
|
||||
List<Person> direct = new ArrayList<>();
|
||||
List<Person> partial = new ArrayList<>();
|
||||
for (Person candidate : pool) {
|
||||
if (personTokens(candidate).containsAll(queryTokens)) {
|
||||
direct.add(candidate);
|
||||
} else {
|
||||
partial.add(candidate);
|
||||
}
|
||||
}
|
||||
List<Person> cappedDirect = cap(direct);
|
||||
List<Person> cappedPartial = cap(partial);
|
||||
log.debug("resolveByName outcome={} tokens={}", outcome(cappedDirect, cappedPartial), queryTokens.size());
|
||||
return new NameMatches(cappedDirect, cappedPartial);
|
||||
}
|
||||
|
||||
private static Set<String> personTokens(Person person) {
|
||||
Set<String> tokens = new LinkedHashSet<>();
|
||||
tokens.addAll(tokenize(person.getFirstName()));
|
||||
tokens.addAll(tokenize(person.getLastName()));
|
||||
tokens.addAll(tokenize(person.getAlias()));
|
||||
tokens.addAll(tokenize(person.getTitle()));
|
||||
for (PersonNameAlias alias : person.getNameAliases()) {
|
||||
tokens.addAll(tokenize(alias.getFirstName()));
|
||||
tokens.addAll(tokenize(alias.getLastName()));
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private static List<Person> cap(List<Person> people) {
|
||||
return people.size() > MAX_CANDIDATES ? people.subList(0, MAX_CANDIDATES) : people;
|
||||
}
|
||||
|
||||
private static String outcome(List<Person> direct, List<Person> partial) {
|
||||
if (direct.size() == 1) return "direct=1";
|
||||
if (direct.size() >= 2) return "direct>=2";
|
||||
if (!partial.isEmpty()) return "partial-only";
|
||||
return "no-match";
|
||||
}
|
||||
|
||||
public List<Person> findAllFamilyMembers() {
|
||||
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ Features: person CRUD, name alias management, person merge (deduplication), fami
|
||||
| `getAllById(List<UUID>)` | document | Bulk fetch for sender/receiver resolution |
|
||||
| `findAll(String q)` | document, dashboard | List all persons |
|
||||
| `findByName(String firstName, String lastName)` | document | Filename-based **sender resolution** in `storeDocument`: exact-case match → single case-insensitive match → else **empty** (ambiguous names leave the sender unset; a null first name never matches). See ADR-033. |
|
||||
| `resolveByName(String name)` | search | NL-search name resolution returning `NameMatches` (direct vs partial). Token/word-boundary, alias-aware matching so a single direct match auto-selects even when looser substring hits coexist ("Clara Cram" vs "Clara Cramer"). See #763. |
|
||||
| `findOrCreateByAlias(String rawName)` | importing | Idempotent create during mass import; type classification happens internally. Resolves exact-case → lowest-id case-insensitive sibling → create — never throws on case-colliding aliases. See ADR-033. |
|
||||
| `findAllFamilyMembers()` | dashboard | Family member list for stats |
|
||||
| `findCorrespondents()` | document | Correspondent list for conversation filter |
|
||||
|
||||
@@ -8,6 +8,7 @@ import org.raddatz.familienarchiv.document.DocumentSort;
|
||||
import org.raddatz.familienarchiv.document.SearchFilters;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.NameMatches;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
@@ -30,7 +31,6 @@ public class NlQueryParserService {
|
||||
private static final int MIN_QUERY = 3;
|
||||
private static final int MAX_QUERY = 500;
|
||||
private static final int MAX_NAME_LENGTH = 200;
|
||||
private static final int MAX_CANDIDATES = 10;
|
||||
private static final int MIN_TAG_TERM = 3;
|
||||
private static final int MAX_RESOLVED_TAGS = 10;
|
||||
|
||||
@@ -113,24 +113,24 @@ public class NlQueryParserService {
|
||||
log.debug("Skipping name fragment (too long or null): length={}", name == null ? 0 : name.length());
|
||||
continue;
|
||||
}
|
||||
List<Person> candidates = personService.findByDisplayNameContaining(name);
|
||||
List<Person> capped = candidates.size() > MAX_CANDIDATES
|
||||
? candidates.subList(0, MAX_CANDIDATES)
|
||||
: candidates;
|
||||
NameMatches matches = personService.resolveByName(name);
|
||||
List<Person> direct = matches.direct();
|
||||
List<Person> partial = matches.partial();
|
||||
|
||||
if (capped.isEmpty()) {
|
||||
noMatchFragments.add(name);
|
||||
} else if (capped.size() == 1) {
|
||||
Person p = capped.get(0);
|
||||
PersonHint hint = new PersonHint(p.getId(), p.getDisplayName());
|
||||
if (direct.size() == 1) {
|
||||
Person p = direct.get(0);
|
||||
resolvedIndex++;
|
||||
if (resolvedIndex <= 2) {
|
||||
resolved.add(hint);
|
||||
resolved.add(new PersonHint(p.getId(), p.getDisplayName()));
|
||||
} else {
|
||||
extraFragments.add(name);
|
||||
}
|
||||
} else if (direct.size() >= 2) {
|
||||
direct.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
|
||||
} else if (!partial.isEmpty()) {
|
||||
partial.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
|
||||
} else {
|
||||
capped.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
|
||||
noMatchFragments.add(name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -133,7 +133,9 @@ app:
|
||||
ollama:
|
||||
base-url: http://ollama:11434
|
||||
model: qwen2.5:7b-instruct-q4_K_M
|
||||
timeout-seconds: 30
|
||||
# CPU inference: ~18s warm. Higher ceiling absorbs the cold model load on the
|
||||
# first query after an Ollama (re)start before OLLAMA_KEEP_ALIVE pins it.
|
||||
timeout-seconds: 60
|
||||
health-check-timeout-seconds: 2
|
||||
|
||||
nl-search:
|
||||
|
||||
Reference in New Issue
Block a user