diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java b/backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java
new file mode 100644
index 00000000..ddc7a1c6
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/NameMatches.java
@@ -0,0 +1,13 @@
+package org.raddatz.familienarchiv.person;
+
+import java.util.List;
+
+/**
+ * Result of {@link PersonService#resolveByName(String)}: candidate persons split by name-match
+ * strength. {@code direct} = every query token is a whole-token match across the person's name
+ * components (alias/maiden-name aware); {@code partial} = matched the substring fetch but is not
+ * direct. The vocabulary is deliberately name-match strength ({@code direct}/{@code partial}), not
+ * the search layer's resolved/ambiguous buckets — the caller maps these into its own outcome.
+ */
+public record NameMatches(List<Person> direct, List<Person> partial) {
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java
index fe619e0b..0afc9e66 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java
@@ -19,7 +19,8 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
            "LOWER(CONCAT(COALESCE(p.firstName, ''),' ',p.lastName)) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
            "LOWER(CONCAT(p.lastName, ' ', COALESCE(p.firstName, ''))) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
            "LOWER(p.alias) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
-           "LOWER(a.lastName) LIKE LOWER(CONCAT('%', :query, '%')) " +
+           "LOWER(a.lastName) LIKE LOWER(CONCAT('%', :query, '%')) OR " +
+           "LOWER(a.firstName) LIKE LOWER(CONCAT('%', :query, '%')) " +
            "ORDER BY p.lastName ASC, p.firstName ASC")
     List<Person> searchByName(@Param("query") String query);
 
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java
index 23d38baa..d195d0bb 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java
@@ -1,9 +1,15 @@
 package org.raddatz.familienarchiv.person;
 
+import java.util.ArrayList;
 import java.util.Comparator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Optional;
+import java.util.Set;
 import java.util.UUID;
+import java.util.stream.Collectors;
 
 import org.springframework.lang.Nullable;
 
@@ -24,11 +30,20 @@ import org.springframework.transaction.annotation.Transactional;
 import org.springframework.web.server.ResponseStatusException;
 
 import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
 
 @Service
 @RequiredArgsConstructor
+@Slf4j
 public class PersonService {
 
+    // Co-located with the fetch loop that owns them (issue #763). MAX_TOKENS caps the number of
+    // unindexed leading-wildcard LIKE scans per name — a DoS control, not just perf. MAX_CANDIDATES
+    // bounds each result bucket and is applied AFTER classification so a direct match that sorts
+    // past position 10 among partials is never discarded.
+    private static final int MAX_TOKENS = 8;
+    private static final int MAX_CANDIDATES = 10;
+
     private final PersonRepository personRepository;
     private final PersonNameAliasRepository aliasRepository;
 
@@ -103,6 +118,92 @@ public class PersonService {
         return personRepository.searchByName(fragment);
     }
 
+    // Name-match tokenizer (issue #763): lowercase, split on whitespace/hyphen/apostrophe,
+    // drop empties. Applied symmetrically to the query and to every candidate name component so
+    // that "Anna-Maria" and "Anna Maria" tokenize alike. Order-preserving for deterministic tests.
+    static Set<String> tokenize(String raw) {
+        if (raw == null || raw.isBlank()) {
+            return Set.of();
+        }
+        LinkedHashSet<String> tokens = new LinkedHashSet<>();
+        for (String part : raw.toLowerCase(Locale.ROOT).split("[\\s\\-']+")) {
+            if (!part.isEmpty()) {
+                tokens.add(part);
+            }
+        }
+        return tokens;
+    }
+
+    /**
+     * Resolves an extracted person name into {@link NameMatches} by name-match strength.
+     * Orchestrates tokenize → cap → fetch pool → classify → cap-after-classify. Read-only
+     * transaction keeps the Hibernate session open so each candidate's lazy {@code nameAliases}
+     * are reachable during classification (see ADR-022).
+     */
+    @Transactional(readOnly = true)
+    public NameMatches resolveByName(String name) {
+        Set<String> queryTokens = capTokens(tokenize(name));
+        if (queryTokens.isEmpty()) {
+            log.debug("resolveByName outcome=no-match tokens=0");
+            return new NameMatches(List.of(), List.of());
+        }
+        return classify(fetchPool(queryTokens), queryTokens);
+    }
+
+    private Set<String> capTokens(Set<String> tokens) {
+        return tokens.stream().limit(MAX_TOKENS).collect(Collectors.toCollection(LinkedHashSet::new));
+    }
+
+    private List<Person> fetchPool(Set<String> queryTokens) {
+        LinkedHashMap<UUID, Person> pool = new LinkedHashMap<>();
+        for (String token : queryTokens) {
+            for (Person candidate : findByDisplayNameContaining(token)) {
+                pool.putIfAbsent(candidate.getId(), candidate);
+            }
+        }
+        return new ArrayList<>(pool.values());
+    }
+
+    private NameMatches classify(List<Person> pool, Set<String> queryTokens) {
+        List<Person> direct = new ArrayList<>();
+        List<Person> partial = new ArrayList<>();
+        for (Person candidate : pool) {
+            if (personTokens(candidate).containsAll(queryTokens)) {
+                direct.add(candidate);
+            } else {
+                partial.add(candidate);
+            }
+        }
+        List<Person> cappedDirect = cap(direct);
+        List<Person> cappedPartial = cap(partial);
+        log.debug("resolveByName outcome={} tokens={}", outcome(cappedDirect, cappedPartial), queryTokens.size());
+        return new NameMatches(cappedDirect, cappedPartial);
+    }
+
+    private static Set<String> personTokens(Person person) {
+        Set<String> tokens = new LinkedHashSet<>();
+        tokens.addAll(tokenize(person.getFirstName()));
+        tokens.addAll(tokenize(person.getLastName()));
+        tokens.addAll(tokenize(person.getAlias()));
+        tokens.addAll(tokenize(person.getTitle()));
+        for (PersonNameAlias alias : person.getNameAliases()) {
+            tokens.addAll(tokenize(alias.getFirstName()));
+            tokens.addAll(tokenize(alias.getLastName()));
+        }
+        return tokens;
+    }
+
+    private static List<Person> cap(List<Person> people) {
+        return people.size() > MAX_CANDIDATES ? people.subList(0, MAX_CANDIDATES) : people;
+    }
+
+    private static String outcome(List<Person> direct, List<Person> partial) {
+        if (direct.size() == 1) return "direct=1";
+        if (direct.size() >= 2) return "direct>=2";
+        if (!partial.isEmpty()) return "partial-only";
+        return "no-match";
+    }
+
     public List<Person> findAllFamilyMembers() {
         return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
     }
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/README.md b/backend/src/main/java/org/raddatz/familienarchiv/person/README.md
index ffbe6c72..48105b6c 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/README.md
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/README.md
@@ -21,6 +21,7 @@ Features: person CRUD, name alias management, person merge (deduplication), fami
 | `getAllById(List<UUID>)` | document | Bulk fetch for sender/receiver resolution |
 | `findAll(String q)` | document, dashboard | List all persons |
 | `findByName(String firstName, String lastName)` | document | Filename-based **sender resolution** in `storeDocument`: exact-case match → single case-insensitive match → else **empty** (ambiguous names leave the sender unset; a null first name never matches). See ADR-033. |
+| `resolveByName(String name)` | search | NL-search name resolution returning `NameMatches` (direct vs partial). Token/word-boundary, alias-aware matching so a single direct match auto-selects even when looser substring hits coexist ("Clara Cram" vs "Clara Cramer"). See #763. |
 | `findOrCreateByAlias(String rawName)` | importing | Idempotent create during mass import; type classification happens internally. Resolves exact-case → lowest-id case-insensitive sibling → create — never throws on case-colliding aliases. See ADR-033. |
 | `findAllFamilyMembers()` | dashboard | Family member list for stats |
 | `findCorrespondents()` | document | Correspondent list for conversation filter |
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/search/NlQueryParserService.java b/backend/src/main/java/org/raddatz/familienarchiv/search/NlQueryParserService.java
index 35e52994..7f9d1edb 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/search/NlQueryParserService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/search/NlQueryParserService.java
@@ -8,6 +8,7 @@ import org.raddatz.familienarchiv.document.DocumentSort;
 import org.raddatz.familienarchiv.document.SearchFilters;
 import org.raddatz.familienarchiv.exception.DomainException;
 import org.raddatz.familienarchiv.exception.ErrorCode;
+import org.raddatz.familienarchiv.person.NameMatches;
 import org.raddatz.familienarchiv.person.Person;
 import org.raddatz.familienarchiv.person.PersonService;
 import org.raddatz.familienarchiv.tag.Tag;
@@ -30,7 +31,6 @@ public class NlQueryParserService {
     private static final int MIN_QUERY = 3;
     private static final int MAX_QUERY = 500;
     private static final int MAX_NAME_LENGTH = 200;
-    private static final int MAX_CANDIDATES = 10;
     private static final int MIN_TAG_TERM = 3;
     private static final int MAX_RESOLVED_TAGS = 10;
 
@@ -113,24 +113,24 @@ public class NlQueryParserService {
                 log.debug("Skipping name fragment (too long or null): length={}", name == null ? 0 : name.length());
                 continue;
             }
-            List<Person> candidates = personService.findByDisplayNameContaining(name);
-            List<Person> capped = candidates.size() > MAX_CANDIDATES
-                    ? candidates.subList(0, MAX_CANDIDATES)
-                    : candidates;
+            NameMatches matches = personService.resolveByName(name);
+            List<Person> direct = matches.direct();
+            List<Person> partial = matches.partial();
 
-            if (capped.isEmpty()) {
-                noMatchFragments.add(name);
-            } else if (capped.size() == 1) {
-                Person p = capped.get(0);
-                PersonHint hint = new PersonHint(p.getId(), p.getDisplayName());
+            if (direct.size() == 1) {
+                Person p = direct.get(0);
                 resolvedIndex++;
                 if (resolvedIndex <= 2) {
-                    resolved.add(hint);
+                    resolved.add(new PersonHint(p.getId(), p.getDisplayName()));
                 } else {
                     extraFragments.add(name);
                 }
+            } else if (direct.size() >= 2) {
+                direct.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
+            } else if (!partial.isEmpty()) {
+                partial.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
             } else {
-                capped.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
+                noMatchFragments.add(name);
             }
         }
 
diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml
index 36d5298a..ce517f25 100644
--- a/backend/src/main/resources/application.yaml
+++ b/backend/src/main/resources/application.yaml
@@ -133,7 +133,9 @@ app:
   ollama:
     base-url: http://ollama:11434
     model: qwen2.5:7b-instruct-q4_K_M
-    timeout-seconds: 30
+    # CPU inference: ~18s warm. Higher ceiling absorbs the cold model load on the
+    # first query after an Ollama (re)start before OLLAMA_KEEP_ALIVE pins it.
+    timeout-seconds: 60
     health-check-timeout-seconds: 2
 
   nl-search:
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java
index e53366b9..48483476 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java
@@ -428,6 +428,67 @@ class PersonRepositoryTest {
         assertThat(results).hasSize(1);
     }
 
+    @Test
+    void searchByName_findsByAliasFirstName() {
+        Person clara = personRepository.save(Person.builder().firstName("Clara").lastName("Cram").build());
+        aliasRepository.save(PersonNameAlias.builder()
+                .person(clara).firstName("Wilhelmina").lastName("de Gruyter")
+                .type(PersonNameAliasType.BIRTH).sortOrder(0).build());
+
+        List<Person> results = personRepository.searchByName("Wilhelmina");
+
+        assertThat(results).hasSize(1);
+        assertThat(results.get(0).getLastName()).isEqualTo("Cram");
+    }
+
+    @Test
+    void searchByName_ordersByLastNameThenFirstName() {
+        personRepository.save(Person.builder().firstName("Clara").lastName("Cram").build());
+        personRepository.save(Person.builder().firstName("Anna").lastName("Cram").build());
+        personRepository.save(Person.builder().firstName("Bernd").lastName("Cram").build());
+
+        List<Person> results = personRepository.searchByName("Cram");
+
+        assertThat(results).extracting(Person::getFirstName)
+                .containsExactly("Anna", "Bernd", "Clara");
+    }
+
+    // ─── resolveByName fetch→classify, end-to-end on real Postgres (#763 review) ───
+    // The classifier unit tests in PersonServiceTest stub searchByName, so they never prove the
+    // fetch query actually finds an alias-only match and feeds it into classification. These walk
+    // the whole searchByName → resolveByName path over the real Postgres slice, closing AC#4/#5.
+
+    @Test
+    void resolveByName_maidenAlias_classifiesAsDirect_endToEnd() {
+        PersonService personService = new PersonService(personRepository, aliasRepository);
+        Person clara = personRepository.save(Person.builder().firstName("Clara").lastName("Müller").build());
+        aliasRepository.save(PersonNameAlias.builder()
+                .person(clara).lastName("Cram").type(PersonNameAliasType.MAIDEN_NAME).sortOrder(0).build());
+        // Detach so resolveByName re-fetches with its lazy nameAliases loaded from the DB —
+        // the fresh-session behaviour the @Transactional(readOnly=true) path has in production.
+        entityManager.flush();
+        entityManager.clear();
+
+        NameMatches matches = personService.resolveByName("Clara Cram");
+
+        assertThat(matches.direct()).extracting(Person::getId).containsExactly(clara.getId());
+    }
+
+    @Test
+    void resolveByName_aliasFirstName_classifiesAsDirect_endToEnd() {
+        PersonService personService = new PersonService(personRepository, aliasRepository);
+        Person clara = personRepository.save(Person.builder().firstName("Clara").lastName("Cram").build());
+        aliasRepository.save(PersonNameAlias.builder()
+                .person(clara).firstName("Wilhelmina").lastName("de Gruyter")
+                .type(PersonNameAliasType.BIRTH).sortOrder(0).build());
+        entityManager.flush();
+        entityManager.clear();
+
+        NameMatches matches = personService.resolveByName("Wilhelmina");
+
+        assertThat(matches.direct()).extracting(Person::getId).containsExactly(clara.getId());
+    }
+
     // ─── searchWithDocumentCount with aliases ────────────────────────────────
 
     @Test
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java
index 865ae9ad..9cabe1ce 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java
@@ -909,4 +909,154 @@ class PersonServiceTest {
         assertThat(result).containsExactly(walter);
         verify(personRepository).searchByName("Walter");
     }
+
+    // ─── tokenize (name-match contract) ───────────────────────────────────────
+
+    @Test
+    void tokenize_hyphenatedName_splitsOnHyphen() {
+        assertThat(PersonService.tokenize("Anna-Maria")).containsExactly("anna", "maria");
+    }
+
+    @Test
+    void tokenize_apostropheName_splitsOnApostrophe() {
+        assertThat(PersonService.tokenize("D'Angelo")).containsExactly("d", "angelo");
+    }
+
+    @Test
+    void tokenize_umlautName_lowercasesToSingleToken() {
+        assertThat(PersonService.tokenize("Müller")).containsExactly("müller");
+    }
+
+    @Test
+    void tokenize_doubleSpace_dropsEmptyTokens() {
+        assertThat(PersonService.tokenize("Clara  Cram")).containsExactly("clara", "cram");
+    }
+
+    @Test
+    void tokenize_allWhitespace_returnsEmpty() {
+        assertThat(PersonService.tokenize("   ")).isEmpty();
+    }
+
+    @Test
+    void tokenize_null_returnsEmpty() {
+        assertThat(PersonService.tokenize(null)).isEmpty();
+    }
+
+    // ─── resolveByName (direct / partial classification) ──────────────────────
+
+    @Test
+    void resolveByName_singleDirectMatch_classifiesAsDirect() {
+        Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
+        when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
+        when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
+
+        NameMatches result = personService.resolveByName("Clara Cram");
+
+        assertThat(result.direct()).containsExactly(clara);
+    }
+
+    @Test
+    void resolveByName_maidenAliasToken_classifiesAsDirect() {
+        Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Müller")
+                .nameAliases(List.of(PersonNameAlias.builder().lastName("Cram")
+                        .type(PersonNameAliasType.MAIDEN_NAME).build()))
+                .build();
+        when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
+        when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
+
+        NameMatches result = personService.resolveByName("Clara Cram");
+
+        assertThat(result.direct()).containsExactly(clara);
+    }
+
+    @Test
+    void resolveByName_aliasFirstNameToken_isFetchedAndClassified() {
+        Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram")
+                .nameAliases(List.of(PersonNameAlias.builder().firstName("Wilhelmina").lastName("de Gruyter")
+                        .type(PersonNameAliasType.BIRTH).build()))
+                .build();
+        when(personRepository.searchByName("wilhelmina")).thenReturn(List.of(clara));
+
+        NameMatches result = personService.resolveByName("Wilhelmina");
+
+        assertThat(result.direct()).containsExactly(clara);
+    }
+
+    @Test
+    void resolveByName_middleName_stillDirect() {
+        Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara Maria").lastName("Cram").build();
+        when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
+        when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
+
+        NameMatches result = personService.resolveByName("Clara Cram");
+
+        assertThat(result.direct()).containsExactly(clara);
+    }
+
+    @Test
+    void resolveByName_reorderedTokens_stillDirect() {
+        Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
+        when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
+        when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
+
+        NameMatches result = personService.resolveByName("Cram Clara");
+
+        assertThat(result.direct()).containsExactly(clara);
+    }
+
+    @Test
+    void resolveByName_cramVsCramer_classifiesAsPartial() {
+        Person cramer = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build();
+        when(personRepository.searchByName("clara")).thenReturn(List.of(cramer));
+        when(personRepository.searchByName("cram")).thenReturn(List.of(cramer));
+
+        NameMatches result = personService.resolveByName("Clara Cram");
+
+        assertThat(result.partial()).containsExactly(cramer);
+    }
+
+    @Test
+    void resolveByName_emptyAfterTokenizing_returnsNoCandidates() {
+        NameMatches result = personService.resolveByName("  -  ");
+
+        assertThat(result.direct()).isEmpty();
+        verify(personRepository, never()).searchByName(any());
+    }
+
+    @Test
+    void resolveByName_directSortsBeyondCap_stillReturnedAsDirect() {
+        List<Person> pool = new java.util.ArrayList<>();
+        for (int i = 0; i < 10; i++) {
+            pool.add(Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cramer").build());
+        }
+        Person direct = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
+        pool.add(direct);
+        when(personRepository.searchByName("clara")).thenReturn(pool);
+        when(personRepository.searchByName("cram")).thenReturn(pool);
+
+        NameMatches result = personService.resolveByName("Clara Cram");
+
+        assertThat(result.direct()).containsExactly(direct);
+    }
+
+    @Test
+    void resolveByName_over8Tokens_issuesAtMost8Fetches() {
+        personService.resolveByName("a b c d e f g h i j");
+
+        verify(personRepository, org.mockito.Mockito.atMost(8)).searchByName(any());
+    }
+
+    @Test
+    void resolveByName_samePersonFromTwoTokens_appearsOnce() {
+        // Both token fetches return the same person id — fetchPool's putIfAbsent must dedup so the
+        // candidate is classified once, not twice.
+        Person clara = Person.builder().id(UUID.randomUUID()).firstName("Clara").lastName("Cram").build();
+        when(personRepository.searchByName("clara")).thenReturn(List.of(clara));
+        when(personRepository.searchByName("cram")).thenReturn(List.of(clara));
+
+        NameMatches result = personService.resolveByName("Clara Cram");
+
+        assertThat(result.direct()).hasSize(1);
+        assertThat(result.partial()).isEmpty();
+    }
 }
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/search/NlQueryParserServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/search/NlQueryParserServiceTest.java
index d1e9c970..61c00b6a 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/search/NlQueryParserServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/search/NlQueryParserServiceTest.java
@@ -11,6 +11,7 @@ import org.raddatz.familienarchiv.document.DocumentSort;
 import org.raddatz.familienarchiv.document.SearchFilters;
 import org.raddatz.familienarchiv.exception.DomainException;
 import org.raddatz.familienarchiv.exception.ErrorCode;
+import org.raddatz.familienarchiv.person.NameMatches;
 import org.raddatz.familienarchiv.person.Person;
 import org.raddatz.familienarchiv.person.PersonService;
 import org.raddatz.familienarchiv.tag.Tag;
@@ -64,6 +65,18 @@ class NlQueryParserServiceTest {
         return Person.builder().id(id).firstName(firstName).lastName(lastName).build();
     }
 
+    private NameMatches makeNameMatches() {
+        return new NameMatches(List.of(), List.of());
+    }
+
+    private NameMatches makeNameMatches(List<Person> direct) {
+        return new NameMatches(direct, List.of());
+    }
+
+    private NameMatches makeNameMatches(List<Person> direct, List<Person> partial) {
+        return new NameMatches(direct, partial);
+    }
+
     private static final UUID P1 = UUID.fromString("00000000-0000-0000-0000-000000000001");
     private static final UUID P2 = UUID.fromString("00000000-0000-0000-0000-000000000002");
     private static final UUID P3 = UUID.fromString("00000000-0000-0000-0000-000000000003");
@@ -75,7 +88,7 @@ class NlQueryParserServiceTest {
         Person walter = person(P1, "Walter", "Raddatz");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
 
         NlSearchResponse resp = service.search("Was hat Walter geschrieben?", PAGE);
 
@@ -96,7 +109,7 @@ class NlQueryParserServiceTest {
         Person b = person(UUID.randomUUID(), "Walter", "Schmidt");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(a, b));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(a, b)));
 
         NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
 
@@ -114,7 +127,7 @@ class NlQueryParserServiceTest {
         Person b = person(UUID.randomUUID(), "Emma", "Raddatz");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Emma"), "any", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(a, b));
+        when(personService.resolveByName("Emma")).thenReturn(makeNameMatches(List.of(a, b)));
 
         NlSearchResponse resp = service.search("Briefe an Emma", PAGE);
 
@@ -129,7 +142,7 @@ class NlQueryParserServiceTest {
     void search_noMatchName_isFoldedIntoText() {
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Karl"), "any", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Karl")).thenReturn(List.of());
+        when(personService.resolveByName("Karl")).thenReturn(makeNameMatches());
 
         service.search("Briefe von Karl", PAGE);
 
@@ -147,7 +160,7 @@ class NlQueryParserServiceTest {
         Person walter = person(P1, "Walter", "Raddatz");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter"), "any", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
 
         NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
 
@@ -164,8 +177,8 @@ class NlQueryParserServiceTest {
         Person emma = person(P2, "Emma", "Raddatz");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter", "Emma"), "any", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
-        when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
+        when(personService.resolveByName("Emma")).thenReturn(makeNameMatches(List.of(emma)));
 
         NlSearchResponse resp = service.search("Briefe von Walter an Emma", PAGE);
 
@@ -186,8 +199,8 @@ class NlQueryParserServiceTest {
         Person emma2 = person(P3, "Emma", "Schmidt");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter", "Emma"), "sender", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
-        when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma1, emma2));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
+        when(personService.resolveByName("Emma")).thenReturn(makeNameMatches(List.of(emma1, emma2)));
 
         NlSearchResponse resp = service.search("Briefe von Walter an Emma", PAGE);
 
@@ -202,8 +215,8 @@ class NlQueryParserServiceTest {
         Person emma = person(P2, "Emma", "Raddatz");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Karl", "Emma"), "sender", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Karl")).thenReturn(List.of());
-        when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
+        when(personService.resolveByName("Karl")).thenReturn(makeNameMatches());
+        when(personService.resolveByName("Emma")).thenReturn(makeNameMatches(List.of(emma)));
 
         service.search("Briefe von Karl an Emma", PAGE);
 
@@ -222,9 +235,9 @@ class NlQueryParserServiceTest {
         Person heinrich = person(P3, "Heinrich", "Braun");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter", "Emma", "Heinrich"), "any", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
-        when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
-        when(personService.findByDisplayNameContaining("Heinrich")).thenReturn(List.of(heinrich));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
+        when(personService.resolveByName("Emma")).thenReturn(makeNameMatches(List.of(emma)));
+        when(personService.resolveByName("Heinrich")).thenReturn(makeNameMatches(List.of(heinrich)));
 
         service.search("Briefe von Walter an Emma über Heinrich", PAGE);
 
@@ -343,7 +356,7 @@ class NlQueryParserServiceTest {
         // but NlQueryParserService must also be safe if something unexpected arrives.
         when(ollamaClient.parse(anyString()))
                 .thenReturn(new OllamaExtraction(List.of("Walter"), "unknown_role", null, null, List.of(), "query"));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
 
         NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
 
@@ -374,20 +387,21 @@ class NlQueryParserServiceTest {
 
         service.search("Briefe von sehr langem Namen", PAGE);
 
-        verify(personService, never()).findByDisplayNameContaining(anyString());
+        verify(personService, never()).resolveByName(anyString());
     }
 
-    // --- 20. Max 10 candidates cap: 11 persons returned → only first 10 in ambiguousPersons ---
+    // --- 20. Cap lives in resolveByName (after classification): a pre-capped 10-direct result
+    //         maps straight to ambiguousPersons; the search layer adds no second cap. ---
 
     @Test
-    void search_elevenCandidates_capsAtTen() {
-        List<Person> eleven = new ArrayList<>();
-        for (int i = 0; i < 11; i++) {
-            eleven.add(person(UUID.randomUUID(), "Walter", "Person" + i));
+    void search_tenDirectMatches_allShownAsAmbiguous() {
+        List<Person> ten = new ArrayList<>();
+        for (int i = 0; i < 10; i++) {
+            ten.add(person(UUID.randomUUID(), "Walter", "Person" + i));
         }
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(eleven);
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(ten));
 
         NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
 
@@ -421,7 +435,7 @@ class NlQueryParserServiceTest {
         Person emma = person(P2, "Emma", "Raddatz");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Emma"), "receiver", null, null, List.of()));
-        when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
+        when(personService.resolveByName("Emma")).thenReturn(makeNameMatches(List.of(emma)));
 
         service.search("Briefe an Emma", PAGE);
 
@@ -443,6 +457,52 @@ class NlQueryParserServiceTest {
         assertThat(resp.interpretation().keywordsApplied()).isTrue();
     }
 
+    // --- 23a. Partial-only, one candidate → ambiguous (1-item picker), search skipped ---
+
+    @Test
+    void search_partialOnly_oneCandidate_populatesAmbiguous() {
+        Person cramer = person(P1, "Clara", "Cramer");
+        when(ollamaClient.parse(anyString()))
+                .thenReturn(extraction(List.of("Clara Cram"), "any", null, null, List.of()));
+        when(personService.resolveByName("Clara Cram")).thenReturn(makeNameMatches(List.of(), List.of(cramer)));
+
+        NlSearchResponse resp = service.search("Briefe von Clara Cram", PAGE);
+
+        assertThat(resp.interpretation().ambiguousPersons()).hasSize(1);
+        verify(documentService, never()).searchDocuments(any(), any(), any(), any());
+    }
+
+    // --- 23b. Partial-only, two candidates → ambiguous (multi-item picker) ---
+
+    @Test
+    void search_partialOnly_twoCandidates_populatesAmbiguous() {
+        Person cramer = person(P1, "Clara", "Cramer");
+        Person crammond = person(P2, "Clara", "Crammond");
+        when(ollamaClient.parse(anyString()))
+                .thenReturn(extraction(List.of("Clara Cram"), "any", null, null, List.of()));
+        when(personService.resolveByName("Clara Cram"))
+                .thenReturn(makeNameMatches(List.of(), List.of(cramer, crammond)));
+
+        NlSearchResponse resp = service.search("Briefe von Clara Cram", PAGE);
+
+        assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
+    }
+
+    // --- 23c. Exactly one direct match → search executes, no picker ---
+
+    @Test
+    void search_oneDirect_executesSearch() {
+        Person clara = person(P1, "Clara", "Cram");
+        when(ollamaClient.parse(anyString()))
+                .thenReturn(extraction(List.of("Clara Cram"), "any", null, null, List.of()));
+        when(personService.resolveByName("Clara Cram")).thenReturn(makeNameMatches(List.of(clara)));
+
+        NlSearchResponse resp = service.search("Briefe von Clara Cram", PAGE);
+
+        verify(documentService).searchDocumentsByPersonId(eq(P1), isNull(), isNull(), eq(PAGE));
+        assertThat(resp.interpretation().ambiguousPersons()).isEmpty();
+    }
+
     // --- Tag resolution helpers ---
 
     private Tag tag(UUID id, String name) {
@@ -546,7 +606,7 @@ class NlQueryParserServiceTest {
         Tag hochzeit = tag(T1, "Hochzeit");
         when(ollamaClient.parse(anyString()))
                 .thenReturn(extraction(List.of("Walter"), "any", null, null, List.of("Hochzeit")));
-        when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
+        when(personService.resolveByName("Walter")).thenReturn(makeNameMatches(List.of(walter)));
         when(tagService.findByNameContaining("Hochzeit")).thenReturn(List.of(hochzeit));
 
         NlSearchResponse resp = service.search("Briefe von Walter über Hochzeit", PAGE);
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index 26e07442..9c60b3bf 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -50,6 +50,7 @@ volumes:
   minio-data:
   ocr-models:
   ocr-cache:
+  ollama-models:
 
 services:
   db:
@@ -200,6 +201,73 @@ services:
     security_opt:
       - no-new-privileges:true
 
+  # --- Ollama: Model init (one-shot pull) ---
+  # Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama-models volume on
+  # first start; exits quickly on subsequent starts (model already cached).
+  # The ollama/ollama image's ENTRYPOINT is `ollama` and the image ships WITHOUT
+  # curl, so the entrypoint is overridden to a shell and readiness is probed with
+  # `ollama list` (not curl). The pull is guarded by a `grep` on the cached model
+  # list so a model already on the volume exits clean WITHOUT a registry round-trip
+  # — a host reboot during a registry/network blip can no longer fail init (which
+  # would block the ollama service via service_completed_successfully).
+  # Backend degrades gracefully (503) if Ollama is absent.
+  ollama-model-init:
+    image: ollama/ollama:0.30.6
+    restart: "no"
+    entrypoint: ["/bin/sh", "-c"]
+    command:
+      - "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)"
+    networks:
+      - archiv-net
+    volumes:
+      - ollama-models:/root/.ollama
+    mem_limit: 2g
+    read_only: true
+    tmpfs:
+      - /tmp:size=512m
+    cap_drop:
+      - ALL
+    security_opt:
+      - no-new-privileges:true
+
+  # --- Ollama: LLM inference server ---
+  # Serves the pre-pulled model for NL search inference. Backend reaches it at
+  # http://ollama:11434 (application.yaml default; no env override required).
+  # Healthcheck uses `ollama list` because the image has no curl.
+  ollama:
+    image: ollama/ollama:0.30.6
+    restart: unless-stopped
+    expose:
+      - "11434"
+    networks:
+      - archiv-net
+    volumes:
+      - ollama-models:/root/.ollama
+    environment:
+      # Pin the model in memory (no idle unload). Without this, Ollama evicts
+      # the model after ~5 min idle and the next query pays a cold-load penalty
+      # that exceeds the backend read timeout → NL search 503 after idle.
+      OLLAMA_KEEP_ALIVE: "-1"
+    cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
+    mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
+    memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
+    read_only: true
+    tmpfs:
+      - /tmp:size=512m
+    cap_drop:
+      - ALL
+    security_opt:
+      - no-new-privileges:true
+    healthcheck:
+      test: ["CMD", "ollama", "list"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 60s
+    depends_on:
+      ollama-model-init:
+        condition: service_completed_successfully
+
   backend:
     image: familienarchiv/backend:${TAG:-nightly}
     build:
diff --git a/docker-compose.yml b/docker-compose.yml
index 78ac969a..f9e618ea 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -161,8 +161,13 @@ services:
       - ALL
     security_opt:
       - no-new-privileges:true
-    command: >
-      sh -c "ollama serve & SERVE_PID=$$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $$SERVE_PID"
+    # The image ENTRYPOINT is `ollama`, so override it to a shell; the image has
+    # no curl, so readiness is probed with `ollama list` instead of a curl loop.
+    # The pull is guarded by a `grep` on the cached model list so an already-cached
+    # model exits clean without a registry round-trip (offline-safe re-up).
+    entrypoint: ["/bin/sh", "-c"]
+    command:
+      - "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)"
 
   # --- Ollama: LLM inference server ---
   # Serves the pre-pulled model for NL search inference.
@@ -180,6 +185,9 @@ services:
       - ollama_models:/root/.ollama
     environment:
       OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
+      # Pin the model in memory (no idle unload) so queries never pay a cold-load
+      # penalty that exceeds the backend read timeout → NL search 503 after idle.
+      OLLAMA_KEEP_ALIVE: "-1"
     cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
     mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
     memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
@@ -191,7 +199,9 @@ services:
     security_opt:
       - no-new-privileges:true
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      # `ollama list` hits the local API and exits non-zero if the server is
+      # down — used instead of curl, which the image does not ship.
+      test: ["CMD", "ollama", "list"]
       interval: 30s
       timeout: 10s
       retries: 5
diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
index 5c2580de..f8523515 100644
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -613,7 +613,7 @@ Expected output includes `qwen2.5:7b-instruct-q4_K_M`.
 |---|---|---|
 | `app.ollama.base-url` | `http://ollama:11434` | Ollama service URL (dev: `http://localhost:11434`) |
 | `app.ollama.model` | `qwen2.5:7b-instruct-q4_K_M` | Model to use for inference |
-| `app.ollama.timeout-seconds` | `30` | Read timeout for inference calls |
+| `app.ollama.timeout-seconds` | `60` | Read timeout for inference calls (absorbs cold model load on the first query after an Ollama restart) |
 | `app.nl-search.rate-limit.max-requests-per-minute` | `5` | Per-user rate limit |
 
 ### Upgrade the Ollama model
@@ -625,7 +625,7 @@ To switch to a newer model version (e.g. a future release of `qwen2.5`):
    ```bash
    docker volume rm familienarchiv_ollama_models
    ```
-   (In production the volume name is prefixed with the compose project: `archiv-production_ollama_models`.)
+   (In production the volume name is prefixed with the compose project: `archiv-production_ollama-models`.)
 3. Restart the stack:
    ```bash
    docker compose up -d
diff --git a/docs/GLOSSARY.md b/docs/GLOSSARY.md
index 7ee1b565..e30cf875 100644
--- a/docs/GLOSSARY.md
+++ b/docs/GLOSSARY.md
@@ -177,6 +177,8 @@ _See also [Chronik](#chronik-internal)._
 
 **PersonHint** — a lightweight `{id, displayName}` pair used in `NlQueryInterpretation` to describe a resolved or ambiguous person without exposing the full `Person` entity to the frontend.
 
+**NameMatches** — the Person-domain result of `PersonService.resolveByName(name)`: candidate persons split by name-match strength into `direct` and `partial`. A match is **direct** when every query token is a whole-token match (order-independent, alias/maiden-name aware) across all of a person's name components (`firstName`, `lastName`, `alias`, each `PersonNameAlias` first+last, `title`); a **partial** matched the substring fetch but is not direct (e.g. "Cram" → "Clara Cramer"). The vocabulary is deliberately match strength, not the search layer's resolved/ambiguous buckets — `NlQueryParserService` maps one direct → resolved (auto-select), ≥2 direct → ambiguous, partial-only → ambiguous suggestions ("Meintest du …?"), and no candidates → folded into full-text search.
+
 **TagHint** — a lightweight `{id, name, color?}` triple used in `NlQueryInterpretation.resolvedTags` to describe a tag matched by keyword→tag resolution. `color` is the tag's effective color (one-level inheritance from parent when the tag has no own color), or null if neither tag nor parent has a color.
 
 **theme chip** `[frontend]` — a removable chip rendered in `InterpretationChipRow` for each entry in `NlQueryInterpretation.resolvedTags` when `tagsApplied` is `true`. Displays "Thema: {tag.name}" (prefix varies by locale). Clicking × removes the tag from the OR-union filter and navigates to `/documents?tag=…&tagOp=OR` with remaining tag and person parameters preserved.
diff --git a/docs/adr/034-ollama-production-deployment-and-keep-alive.md b/docs/adr/034-ollama-production-deployment-and-keep-alive.md
new file mode 100644
index 00000000..0ff4a790
--- /dev/null
+++ b/docs/adr/034-ollama-production-deployment-and-keep-alive.md
@@ -0,0 +1,125 @@
+# ADR-034: Ollama in production — deployment, keep-alive pinning, and corrected init recipe
+
+**Date:** 2026-06-06
+**Status:** Accepted
+**Deciders:** Marcel Raddatz
+**Relates to:** #758 (bug), #759 (fix), #737 (NL search infrastructure)
+**Corrects:** ADR-028 §10–§11 (init recipe and readiness probe)
+
+---
+
+## Context
+
+ADR-028 introduced Ollama as a Docker Compose service for NL search and documented
+its topology, graceful-degradation contract, and memory budget. Two defects survived
+that work and only surfaced once NL search reached staging (#758):
+
+1. **Ollama was added only to the dev `docker-compose.yml`.** Staging and production
+   deploy from the self-contained `docker-compose.prod.yml`, which had no `ollama`
+   service. The backend defaults to `app.ollama.base-url: http://ollama:11434`, so its
+   client bean was active and resolved to a non-existent host → `ResourceAccessException`
+   → HTTP 503 on every NL search.
+2. **The init recipe documented in ADR-028 §10 never worked.** The `ollama/ollama` image
+   `ENTRYPOINT` is `ollama`, so a bare `command: sh -c "…"` ran as `ollama sh -c "…"`
+   (`unknown command "sh"`), and the image ships **no curl**, so the curl-based readiness
+   loop and the curl healthcheck could never pass.
+
+This ADR records the production deployment decision and the corrected operational
+contract. It is also the durable record of *why* `OLLAMA_KEEP_ALIVE=-1` is set, so a
+future maintainer does not "optimize" it away and reintroduce the cold-load 503.
+
+---
+
+## Decisions
+
+### 1. Ollama is a first-class production service
+
+`docker-compose.prod.yml` now defines `ollama` + `ollama-model-init` + the
+`ollama-models` volume, mirroring the dev stack. The graceful-degradation contract from
+ADR-028 §3 is preserved: `backend` has **no** hard `depends_on` on `ollama`, so an absent
+or unhealthy Ollama still yields a clean 503 rather than blocking backend startup.
+
+### 2. Corrected init recipe (supersedes ADR-028 §10)
+
+The init container overrides the image entrypoint to a shell and probes readiness with
+`ollama list` (not curl, which the image lacks):
+
+```sh
+ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && \
+  (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)
+```
+
+```yaml
+entrypoint: ["/bin/sh", "-c"]
+```
+
+The pull is **guarded by a grep on the cached model list**. A model already on the volume
+exits clean without any registry round-trip. This makes re-up offline-safe: a host reboot
+during a registry/network blip can no longer fail init (which, via
+`condition: service_completed_successfully`, would otherwise block the `ollama` service
+and take NL search down until the registry was reachable again). The same recipe is used
+in dev and prod — one mental model.
+
+### 3. Healthcheck uses `ollama list` (supersedes ADR-028 §11 probe)
+
+```yaml
+healthcheck:
+  test: ["CMD", "ollama", "list"]
+```
+
+`ollama list` hits the local API and exits non-zero when the server is down — the correct
+probe for a curl-less image. The `start_period: 60s` rationale from ADR-028 §11 still holds.
+
+### 4. `OLLAMA_KEEP_ALIVE=-1` — pin the model in memory
+
+```yaml
+environment:
+  OLLAMA_KEEP_ALIVE: "-1"
+```
+
+By default Ollama evicts an idle model after ~5 minutes. The next query then pays a
+cold-load penalty that exceeds the backend read timeout, producing an NL search 503 after
+any idle period. Pinning the model (`-1` = never unload) keeps warm-path latency
+predictable (~18 s on CPU). **Do not remove this** without re-introducing the post-idle
+cold-load 503.
+
+### 5. Read timeout raised 30 → 60 s
+
+`app.ollama.timeout-seconds` is raised from 30 to 60 (`application.yaml`, mirrored in
+`DEPLOYMENT.md`). Warm CPU inference is ~18 s; the higher ceiling absorbs the one cold
+model load on the first query after an Ollama (re)start, before §4's pin takes hold.
+
+**Implicit NFR made explicit:** NL search shall return a result or a 503 within 60 s; the
+cold-start path immediately after an Ollama restart is the only path that approaches this
+ceiling.
+
+### 6. Hard-OOM trade-off (refines ADR-028 §2)
+
+`memswap_limit == mem_limit` (both `${OLLAMA_MEM_LIMIT:-8g}`) disables swap for the
+container. Combined with §4's pinned model, a memory-pressure event is a **hard OOM-kill,
+not graceful latency degradation**. This is deliberate — swap-thrashing an LLM is worse
+than a clean restart — but it means the 8 GB envelope is a real ceiling. `qwen2.5-7B-q4`
+plus its KV cache under load sits close enough to 8 GB that this needs a Prometheus
+memory alert on the `ollama` container before it bites in production (tracked as
+observability follow-up, not in this PR).
+
+---
+
+## Consequences
+
+### Positive
+
+- NL search works on staging/production, not just dev — the actual deploy artifact now
+  matches the documented architecture.
+- Re-up is offline-safe: a cached model never depends on registry reachability.
+- The keep-alive pin and timeout ceiling make NL search latency predictable on CPU.
+
+### Risks and operational implications
+
+- **Hard OOM under memory pressure** (§6): a Prometheus alert on `ollama` container memory
+  is required before this is load-bearing in prod. Tracked as an observability follow-up.
+- **Unauthenticated inference** relies entirely on `archiv-net` isolation (ADR-028 §7/§12,
+  unchanged). Sending an `Authorization` header from `RestClientOllamaClient` is a separate
+  durable hardening item, tracked outside this PR.
+- ADR-028 §10–§11 describe a recipe that never functioned; this ADR is the authoritative
+  init/healthcheck contract going forward.
diff --git a/docs/architecture/c4/l2-containers.puml b/docs/architecture/c4/l2-containers.puml
index 2d471dd9..b8630001 100644
--- a/docs/architecture/c4/l2-containers.puml
+++ b/docs/architecture/c4/l2-containers.puml
@@ -17,7 +17,6 @@ System_Boundary(archiv, "Familienarchiv (Docker Compose)") {
     ContainerDb(db, "Relational Database", "PostgreSQL 16", "Stores document metadata, persons, users, permission groups, tags, transcription blocks, audit log, and Spring Session data.")
     ContainerDb(storage, "Object Storage", "MinIO (S3-compatible)", "Stores the actual document files (PDFs, scans). Backend uses a bucket-scoped service account (archiv-app), not MinIO root.")
     Container(mc, "Bucket / Service-Account Init", "MinIO Client (mc)", "One-shot container on startup. Idempotent: creates the archive bucket, the archiv-app service account, and attaches the readwrite policy.")
-    Container(ollama, "Ollama", "Ollama / port 11434", "Local LLM inference server. Hosts qwen2.5:7b-instruct-q4_K_M for natural-language query parsing (NL Search). CPU-only; GPU not required.")
 }
 
 System_Boundary(observability, "Observability Stack (/opt/familienarchiv/docker-compose.observability.yml)") {
@@ -49,7 +48,6 @@ Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API")
 Rel(backend, tempo, "Sends distributed traces via OTLP", "HTTP / OTLP / port 4318 (archiv-net)")
 Rel(prometheus, backend, "Scrapes JVM + HTTP metrics", "HTTP 8081 /actuator/prometheus")
 Rel(prometheus, ocr, "Scrapes OCR + http_* metrics", "HTTP 8000 /metrics")
-Rel(backend, ollama, "NL search inference requests", "HTTP / REST / JSON")
 Rel(prometheus, ollama, "Scrapes LLM request metrics", "HTTP 11434 /metrics")
 Rel(grafana, prometheus, "Queries metrics", "HTTP 9090")
 Rel(grafana, loki, "Queries logs", "HTTP 3100")
diff --git a/frontend/messages/de.json b/frontend/messages/de.json
index 5ea1b00a..e53f9583 100644
--- a/frontend/messages/de.json
+++ b/frontend/messages/de.json
@@ -47,6 +47,7 @@
 	"search_disambiguation_trigger_label": "Mehrere Personen gefunden — zum Auswählen klicken",
 	"search_disambiguation_cue": "(auswählen…)",
 	"search_disambiguation_heading": "Person auswählen",
+	"search_disambiguation_did_you_mean": "Meintest du {name}?",
 	"search_disambiguation_select_label": "{name} auswählen",
 	"error_validation_error": "Die Eingabe ist ungültig.",
 	"error_internal_error": "Ein unerwarteter Fehler ist aufgetreten.",
diff --git a/frontend/messages/en.json b/frontend/messages/en.json
index be3af0ce..84be1557 100644
--- a/frontend/messages/en.json
+++ b/frontend/messages/en.json
@@ -47,6 +47,7 @@
 	"search_disambiguation_trigger_label": "Several people found — click to choose",
 	"search_disambiguation_cue": "(choose…)",
 	"search_disambiguation_heading": "Choose a person",
+	"search_disambiguation_did_you_mean": "Did you mean {name}?",
 	"search_disambiguation_select_label": "Select {name}",
 	"error_validation_error": "The input is invalid.",
 	"error_internal_error": "An unexpected error occurred.",
diff --git a/frontend/messages/es.json b/frontend/messages/es.json
index c5fb2fc8..7a40b82c 100644
--- a/frontend/messages/es.json
+++ b/frontend/messages/es.json
@@ -47,6 +47,7 @@
 	"search_disambiguation_trigger_label": "Se encontraron varias personas — haga clic para elegir",
 	"search_disambiguation_cue": "(elegir…)",
 	"search_disambiguation_heading": "Elegir una persona",
+	"search_disambiguation_did_you_mean": "¿Quería decir {name}?",
 	"search_disambiguation_select_label": "Seleccionar {name}",
 	"error_validation_error": "La entrada no es válida.",
 	"error_internal_error": "Se ha producido un error inesperado.",
diff --git a/frontend/src/routes/documents/+page.svelte b/frontend/src/routes/documents/+page.svelte
index 7e836964..7d494b24 100644
--- a/frontend/src/routes/documents/+page.svelte
+++ b/frontend/src/routes/documents/+page.svelte
@@ -57,7 +57,16 @@ let nlResult = $state<DocumentSearchResult | null>(null);
 
 const showNlView = $derived(smartMode && nlSubmitted);
 const nlHasResults = $derived((nlResult?.items.length ?? 0) > 0);
-const nlIsAmbiguous = $derived((nlInterpretation?.ambiguousPersons.length ?? 0) > 0);
+const ambiguousPersons = $derived(nlInterpretation?.ambiguousPersons ?? []);
+const nlIsAmbiguous = $derived(ambiguousPersons.length > 0);
+// A 1-item picker is always a "did you mean …?" suggestion (a single direct match auto-selects
+// and never reaches the picker); ≥2 keeps the "choose a person" framing and the action cue.
+const disambiguationHeading = $derived(
+	ambiguousPersons.length === 1
+		? m.search_disambiguation_did_you_mean({ name: ambiguousPersons[0].displayName })
+		: m.search_disambiguation_heading()
+);
+const showDisambiguationCue = $derived(ambiguousPersons.length >= 2);
 
 function hasAdvancedFilters() {
 	return (
@@ -442,6 +451,8 @@ $effect(() => {
 						{#if nlIsAmbiguous}
 							<DisambiguationPicker
 								persons={nlInterpretation.ambiguousPersons}
+								heading={disambiguationHeading}
+								showCue={showDisambiguationCue}
 								onSelect={selectDisambiguated}
 							/>
 						{:else}
diff --git a/frontend/src/routes/search/DisambiguationPicker.svelte b/frontend/src/routes/search/DisambiguationPicker.svelte
index b99fea10..c99619c9 100644
--- a/frontend/src/routes/search/DisambiguationPicker.svelte
+++ b/frontend/src/routes/search/DisambiguationPicker.svelte
@@ -6,15 +6,28 @@ import type { components } from '$lib/generated/api';
 
 type PersonHint = components['schemas']['PersonHint'];
 
-let { persons, onSelect }: { persons: PersonHint[]; onSelect: (person: PersonHint) => void } =
-	$props();
+let {
+	persons,
+	heading,
+	showCue,
+	onSelect
+}: {
+	persons: PersonHint[];
+	heading: string;
+	showCue: boolean;
+	onSelect: (person: PersonHint) => void;
+} = $props();
 
 let open = $state(false);
 let triggerEl = $state<HTMLButtonElement>();
 let listEl = $state<HTMLUListElement>();
 
 const panelId = 'disambiguation-panel';
+const headingId = 'disambiguation-heading';
 const names = $derived(persons.map((person) => person.displayName).join(', '));
+const triggerLabel = $derived(
+	persons.length === 1 ? heading : m.search_disambiguation_trigger_label()
+);
 
 async function openPicker() {
 	open = true;
@@ -54,33 +67,36 @@ function onKeydown(event: KeyboardEvent) {
 		aria-haspopup="true"
 		aria-expanded={open}
 		aria-controls={panelId}
-		aria-label={m.search_disambiguation_trigger_label()}
+		aria-label={triggerLabel}
 		onclick={toggle}
 		class="inline-flex min-h-[44px] items-center gap-1.5 rounded-full border border-line bg-muted px-3 text-sm text-ink-2 outline-none focus-visible:ring-2 focus-visible:ring-brand-navy"
 	>
 		<span class="max-w-[8rem] truncate sm:max-w-[12rem]">{names}</span>
-		<span class="text-ink-3">{m.search_disambiguation_cue()}</span>
+		{#if showCue}
+			<span class="text-ink-3">{m.search_disambiguation_cue()}</span>
+		{/if}
 	</button>
 
 	{#if open}
-		<ul
-			bind:this={listEl}
+		<div
 			id={panelId}
-			aria-label={m.search_disambiguation_heading()}
 			class="absolute left-0 z-10 mt-1 min-w-[12rem] rounded-sm border border-line bg-surface py-1 shadow-md"
 		>
-			{#each persons as person (person.id)}
-				<li>
-					<button
-						type="button"
-						aria-label={m.search_disambiguation_select_label({ name: person.displayName })}
-						onclick={() => select(person)}
-						class="flex min-h-[44px] w-full items-center px-4 text-left text-sm text-ink outline-none hover:bg-muted focus-visible:bg-muted focus-visible:ring-2 focus-visible:ring-brand-navy"
-					>
-						{person.displayName}
-					</button>
-				</li>
-			{/each}
-		</ul>
+			<p id={headingId} class="px-4 py-1.5 text-sm font-bold text-ink">{heading}</p>
+			<ul bind:this={listEl} aria-labelledby={headingId}>
+				{#each persons as person (person.id)}
+					<li>
+						<button
+							type="button"
+							aria-label={m.search_disambiguation_select_label({ name: person.displayName })}
+							onclick={() => select(person)}
+							class="flex min-h-[44px] w-full items-center px-4 text-left text-sm text-ink outline-none hover:bg-muted focus-visible:bg-muted focus-visible:ring-2 focus-visible:ring-brand-navy"
+						>
+							{person.displayName}
+						</button>
+					</li>
+				{/each}
+			</ul>
+		</div>
 	{/if}
 </div>
diff --git a/frontend/src/routes/search/DisambiguationPicker.svelte.spec.ts b/frontend/src/routes/search/DisambiguationPicker.svelte.spec.ts
index 5b87a996..04eac3dd 100644
--- a/frontend/src/routes/search/DisambiguationPicker.svelte.spec.ts
+++ b/frontend/src/routes/search/DisambiguationPicker.svelte.spec.ts
@@ -13,6 +13,8 @@ const persons: PersonHint[] = [
 	{ id: 'w2', displayName: 'Walter Müller' }
 ];
 
+const multiProps = { persons, heading: 'Person auswählen', showCue: true };
+
 function pressEscape() {
 	(document.activeElement as HTMLElement).dispatchEvent(
 		new KeyboardEvent('keydown', { key: 'Escape', bubbles: true })
@@ -21,7 +23,7 @@ function pressEscape() {
 
 describe('DisambiguationPicker', () => {
 	it('opens the picker and shows a select option per ambiguous person', async () => {
-		render(DisambiguationPicker, { persons, onSelect: vi.fn() });
+		render(DisambiguationPicker, { ...multiProps, onSelect: vi.fn() });
 		await page.getByRole('button', { name: /Mehrere Personen gefunden/ }).click();
 		await expect
 			.element(page.getByRole('button', { name: 'Walter Raddatz auswählen' }))
@@ -32,7 +34,7 @@ describe('DisambiguationPicker', () => {
 	});
 
 	it('moves focus into the picker list on open', async () => {
-		render(DisambiguationPicker, { persons, onSelect: vi.fn() });
+		render(DisambiguationPicker, { ...multiProps, onSelect: vi.fn() });
 		await page.getByRole('button', { name: /Mehrere Personen gefunden/ }).click();
 		await expect
 			.element(page.getByRole('button', { name: 'Walter Raddatz auswählen' }))
@@ -40,7 +42,7 @@ describe('DisambiguationPicker', () => {
 	});
 
 	it('returns focus to the trigger when closed with Escape', async () => {
-		render(DisambiguationPicker, { persons, onSelect: vi.fn() });
+		render(DisambiguationPicker, { ...multiProps, onSelect: vi.fn() });
 		const trigger = page.getByRole('button', { name: /Mehrere Personen gefunden/ });
 		await trigger.click();
 		await expect
@@ -52,7 +54,7 @@ describe('DisambiguationPicker', () => {
 
 	it('does not call onSelect when dismissed without choosing', async () => {
 		const onSelect = vi.fn();
-		render(DisambiguationPicker, { persons, onSelect });
+		render(DisambiguationPicker, { ...multiProps, onSelect });
 		await page.getByRole('button', { name: /Mehrere Personen gefunden/ }).click();
 		await expect
 			.element(page.getByRole('button', { name: 'Walter Raddatz auswählen' }))
@@ -63,9 +65,54 @@ describe('DisambiguationPicker', () => {
 
 	it('calls onSelect with the chosen person', async () => {
 		const onSelect = vi.fn();
-		render(DisambiguationPicker, { persons, onSelect });
+		render(DisambiguationPicker, { ...multiProps, onSelect });
 		await page.getByRole('button', { name: /Mehrere Personen gefunden/ }).click();
 		await page.getByRole('button', { name: 'Walter Müller auswählen' }).click();
 		expect(onSelect).toHaveBeenCalledWith(persons[1]);
 	});
+
+	it('renders the supplied heading as a visible panel heading', async () => {
+		render(DisambiguationPicker, {
+			persons: [{ id: 'c1', displayName: 'Clara Cramer' }],
+			heading: 'Meintest du Clara Cramer?',
+			showCue: false,
+			onSelect: vi.fn()
+		});
+		await page.getByRole('button', { name: 'Meintest du Clara Cramer?' }).click();
+		await expect.element(page.getByText('Meintest du Clara Cramer?')).toBeVisible();
+	});
+
+	it('suppresses the cue when showCue is false', async () => {
+		render(DisambiguationPicker, {
+			persons: [{ id: 'c1', displayName: 'Clara Cramer' }],
+			heading: 'Meintest du Clara Cramer?',
+			showCue: false,
+			onSelect: vi.fn()
+		});
+		await expect.element(page.getByText('(auswählen…)')).not.toBeInTheDocument();
+	});
+
+	it('shows the cue when showCue is true', async () => {
+		render(DisambiguationPicker, { ...multiProps, onSelect: vi.fn() });
+		await expect.element(page.getByText('(auswählen…)')).toBeVisible();
+	});
+
+	it('announces the did-you-mean heading as the trigger accessible name for a single suggestion', async () => {
+		render(DisambiguationPicker, {
+			persons: [{ id: 'c1', displayName: 'Clara Cramer' }],
+			heading: 'Meintest du Clara Cramer?',
+			showCue: false,
+			onSelect: vi.fn()
+		});
+		await expect
+			.element(page.getByRole('button', { name: 'Meintest du Clara Cramer?' }))
+			.toBeInTheDocument();
+	});
+
+	it('keeps the multiple-people trigger accessible name for two or more suggestions', async () => {
+		render(DisambiguationPicker, { ...multiProps, onSelect: vi.fn() });
+		await expect
+			.element(page.getByRole('button', { name: /Mehrere Personen gefunden/ }))
+			.toBeInTheDocument();
+	});
 });