Compare commits

..

36 Commits

Author SHA1 Message Date
Marcel
9a9e1c4c40 merge(search): resolve DEPLOYMENT.md conflict — keep setup + upgrade sections
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m17s
CI / OCR Service Tests (pull_request) Successful in 23s
CI / Backend Unit Tests (pull_request) Successful in 3m45s
CI / fail2ban Regex (pull_request) Successful in 48s
CI / Semgrep Security Scan (pull_request) Successful in 22s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m4s
Both the first-time model pull runbook (from this branch) and the model
upgrade procedure (from main) belong in DEPLOYMENT.md.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 16:47:49 +02:00
Marcel
4c620619d4 fix(search): formal Sie form in German error strings; clean up DocumentService imports
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m19s
CI / OCR Service Tests (pull_request) Successful in 23s
CI / Backend Unit Tests (pull_request) Successful in 3m57s
CI / fail2ban Regex (pull_request) Successful in 45s
CI / Semgrep Security Scan (pull_request) Successful in 21s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m5s
- error_smart_search_unavailable/rate_limited now use "Sie" (formal) to
  match the tone of all existing German error messages
- Replace inline FQNs in DocumentService.buildPersonSpec with proper
  JoinType + Predicate imports

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 16:46:40 +02:00
Marcel
44baff9c9c docs(search): update CLAUDE.md, GLOSSARY, DEPLOYMENT, and C4 diagrams
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m21s
CI / OCR Service Tests (pull_request) Successful in 22s
CI / Backend Unit Tests (pull_request) Successful in 3m52s
CI / fail2ban Regex (pull_request) Successful in 44s
CI / Semgrep Security Scan (pull_request) Successful in 21s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m3s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 16:16:04 +02:00
Marcel
4634da9865 feat(search): add @Schema annotations and regenerate TypeScript API types
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 16:11:01 +02:00
Marcel
79e4a3f9db feat(search): add searchDocumentsByPersonId with Specification-based sender/receiver query
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 16:04:54 +02:00
Marcel
70e8a6e6ad feat(search): implement NlSearchController with @WebMvcTest tests (7 cases)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:58:35 +02:00
Marcel
3af1095d13 feat(search): implement NlQueryParserService with Mockito tests (23 cases)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:54:45 +02:00
Marcel
8c835e957a feat(search): implement RestClientOllamaClient with WireMock tests
Switch to wiremock-jetty12 artifact and force ee10 Jetty deps to 12.1.8
to resolve compatibility with Spring Boot 4's Jetty 12.1.8 core.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:43:49 +02:00
Marcel
fe8fcba7a7 feat(search): add NlSearchRateLimiter with Bucket4j/Caffeine
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:39:06 +02:00
Marcel
e0c80ac193 feat(search): add Ollama and rate-limit config properties
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:37:24 +02:00
Marcel
005265b5a8 feat(search): add NL search error codes and i18n strings
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:36:13 +02:00
Marcel
684c6e63de feat(search): add NL search domain records and OllamaClient interfaces
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:33:56 +02:00
Marcel
e27d52b9ee docs(c4): add L3 backend search component diagram
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:32:40 +02:00
Marcel
6f5497c7bf docs(adr): ADR-028 — NL search via Ollama
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:31:53 +02:00
Marcel
e0fac783e8 feat(person): add findByDisplayNameContaining service method
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:30:30 +02:00
Marcel
202ea85a58 build(deps): add org.wiremock:wiremock 3.9.2 as test dependency
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 15:28:55 +02:00
Marcel
7679596c70 docs(ollama): add model upgrade runbook + post-deploy smoke test to DEPLOYMENT.md
Some checks failed
CI / Unit & Component Tests (pull_request) Has been cancelled
CI / OCR Service Tests (pull_request) Has been cancelled
CI / Backend Unit Tests (pull_request) Has been cancelled
CI / fail2ban Regex (pull_request) Has been cancelled
CI / Semgrep Security Scan (pull_request) Has been cancelled
CI / Compose Bucket Idempotency (pull_request) Has been cancelled
CI / Unit & Component Tests (push) Successful in 3m16s
CI / OCR Service Tests (push) Successful in 23s
CI / Backend Unit Tests (push) Successful in 3m37s
CI / fail2ban Regex (push) Successful in 47s
CI / Semgrep Security Scan (push) Successful in 22s
CI / Compose Bucket Idempotency (push) Successful in 1m4s
Addresses Elicit's and Sara's review concerns on PR #749:
- Expand §6 ollama_models section into a full model upgrade runbook (step-by-step
  docker volume rm + recreate, including production volume name prefix)
- Add re-deploy idempotency note to §3.4 (init container exits quickly when model
  already present on the volume)
- Add NL search smoke test to §3.4 (curl command distinguishing 200 from 503
  NL_SEARCH_UNAVAILABLE)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
3d5dcd1f18 docs(deployment): fix OLLAMA_API_KEY version ref and add --wait warning
Updated OLLAMA_API_KEY env vars table from 0.6.5 to 0.6.5 or 0.30.6 to
match both tested versions. Added an explicit warning in §3.4 that
docker compose up -d --wait blocks for 60–90 min on first deploy when the
model pull has not yet completed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
52fca38f0f docs(env): correct OLLAMA_API_KEY comment — tested on 0.6.5 and 0.30.6
Both versions were tested and neither enforces the key. Comment updated to
say "0.6.5 or 0.30.6" and surface archiv-net as the sole effective control.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
662a8f3e80 fix(infra): interpolate APP_OLLAMA_BASE_URL so .env empty-value disables Ollama
Hardcoded literal overrides any .env setting — setting APP_OLLAMA_BASE_URL=
in .env had no effect on the backend container. Now uses the same pattern
as APP_OCR_TRAINING_TOKEN with a safe default.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
cbba95c3f8 docs(c4): fix Ollama container version 0.6.5 → 0.30.6 in l2-containers.puml
Diagram must match the pinned image version in docker-compose.yml.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
3536ed884c docs(adr): fix ADR-028 §12 false API-key claim, stale TBD, and §7 title
§12 stated OLLAMA_API_KEY guards against lateral movement — contradicts
§7's empirical finding that it is not enforced. Replaced with an accurate
note referencing §7. Stale pre-merge placeholder in Consequences ("Three
TBD items must be resolved") removed; all three are resolved. §7 section
title updated from "0.6.5" to "0.6.5 and 0.30.6" to match the body text.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
5a939d9222 fix(infra): escape \$\$SERVE_PID in compose command to prevent interpolation (#737)
Docker Compose interpolates $VAR in command strings — use $$ to pass a
literal $ to the shell so SERVE_PID=$! and kill $SERVE_PID work correctly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
93e90424ab docs(adr): update ADR-028 with 0.30.6 verified findings for API key + read_only (#737)
- OLLAMA_API_KEY: non-enforcement confirmed on both 0.6.5 and 0.30.6
- read_only: true: confirmed working on both 0.6.5 and 0.30.6
- Peak RSS during pull: ~108 MiB (well under 2g limit)
- All TBD placeholders resolved

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
e8f3004c4f feat(infra): add Ollama env vars to .env.example (#737)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
9637ebbca2 feat(infra): add Ollama Docker Compose services for NL search (#737)
- ollama-model-init: one-shot init container that pulls qwen2.5:7b-instruct-q4_K_M
  into the ollama_models volume on first start
- ollama: main inference service on archiv-net (expose: only, no public port)
- ollama_models named volume for persistent model storage
- APP_OLLAMA_BASE_URL + APP_OLLAMA_API_KEY added to backend env
- Both services: cap_drop ALL, no-new-privileges, read_only+tmpfs (ADR-019 + ADR-028)
- start_period: 60s — model pre-pulled by init container

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
df10a42069 docs(deploy): document Ollama hardware requirements, env vars, and ops notes (#737)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:59:35 +02:00
Marcel
64120a30b5 docs(arch): add Ollama container to C4 level-2 container diagram (#737)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:58:49 +02:00
Marcel
25252fc709 feat(observability): add Grafana Ollama inference latency dashboard (#737)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:58:49 +02:00
Marcel
1f379a161d fix(observability): fix OCR target name + add Ollama scrape job (#737)
- prometheus.yml: ocr:8000 → ocr-service:8000 (Docker service name is
  ocr-service, not ocr — current scrape target has never resolved)
- Add Ollama scrape job on ollama:11434 /metrics

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:58:49 +02:00
Marcel
c0d034c85d docs(adr): add ADR-028 — Ollama Docker Compose service for NL search (#737)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:58:49 +02:00
Marcel
ca93cde06e docs(infra): correct server specs — Hetzner Serverbörse i7-6700 64 GB, not CX32
All checks were successful
CI / Unit & Component Tests (push) Successful in 3m18s
CI / OCR Service Tests (push) Successful in 21s
CI / Backend Unit Tests (push) Successful in 3m46s
CI / fail2ban Regex (push) Successful in 48s
CI / Semgrep Security Scan (push) Successful in 23s
CI / Compose Bucket Idempotency (push) Successful in 1m6s
Replace all references to the CX32 VPS (8 GB RAM, Hetzner Cloud) with the
actual production server: a Hetzner Serverbörse dedicated server with an
Intel Core i7-6700 (4C/8T, 3.4 GHz) and 64 GB RAM.

Affected files:
- .claude/personas/devops.md — monthly cost line + upgrade example
- docs/infrastructure/production-compose.md — sizing section + cost table
- docs/DEPLOYMENT.md — OCR memory table + OCR_MEM_LIMIT env var description
- docs/adr/004-pdfbox-thumbnails.md — thumbnailExecutor memory ceiling note
- docs/adr/021-tmpdir-persistent-volume-staging.md — OOMKill rationale in alternatives

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:51:07 +02:00
Marcel
7629e35897 docs(adr): renumber tag case-collision ADR 032 → 033 to resolve number clash (#731)
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m15s
CI / OCR Service Tests (pull_request) Successful in 23s
CI / Backend Unit Tests (pull_request) Successful in 3m40s
CI / fail2ban Regex (pull_request) Successful in 44s
CI / Semgrep Security Scan (pull_request) Successful in 21s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m5s
CI / Unit & Component Tests (push) Successful in 3m13s
CI / OCR Service Tests (push) Successful in 23s
CI / Backend Unit Tests (push) Successful in 3m40s
CI / fail2ban Regex (push) Successful in 46s
CI / Semgrep Security Scan (push) Successful in 21s
CI / Compose Bucket Idempotency (push) Successful in 1m7s
Both #730 (tag case-collision) and #684 (person-delete DB integrity) landed
an ADR-032 on main. Renumber the tag/case-collision one to 033 — it is
referenced only from this PR's person-domain comments and its own file, so the
move is self-contained and touches no Flyway migration. The person-delete
ADR-032 and the V71 migration comment that cites it are deliberately left
untouched (editing an applied migration would drift its Flyway checksum).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 13:52:25 +02:00
Marcel
cd741b9f57 docs(person): clarify case-collision scope at the exact-case lookups (#731)
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m15s
CI / OCR Service Tests (pull_request) Successful in 22s
CI / Backend Unit Tests (pull_request) Successful in 3m42s
CI / fail2ban Regex (pull_request) Successful in 46s
CI / Semgrep Security Scan (pull_request) Successful in 21s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m5s
Review noted the "never throws" claim was overstated: the exact-case Optional
lookups still surface a NonUniqueResultException on two byte-identical
same-case rows. That is a true data anomaly out of #731's scope (ambiguous =
case-insensitive) and resolves to the opaque INTERNAL_ERROR, never a wrong
row. Record that boundary at both resolution points and in ADR-032 so the gap
is not silently assumed covered.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 13:36:22 +02:00
Marcel
ddf378aaac fix(person): resolve ambiguous sender names to null on upload (#731)
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m18s
CI / OCR Service Tests (pull_request) Successful in 25s
CI / Backend Unit Tests (pull_request) Successful in 3m38s
CI / fail2ban Regex (pull_request) Successful in 43s
CI / Semgrep Security Scan (pull_request) Successful in 22s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m6s
findByName resolved via Optional<Person>
findByFirstNameIgnoreCaseAndLastNameIgnoreCase, which threw
NonUniqueResultException once two people shared a first+last name case-
insensitively (hans müller / Hans Müller) — a 500 on the routine upload path
(DocumentService.storeDocument sender resolution).

findByName now resolves exact-case → single case-insensitive match → else
empty. The sender path deliberately diverges from the alias path: an
ambiguous name leaves the sender UNSET rather than guessing the lowest id,
because correct provenance beats a confidently-wrong pre-fill a reviewer
won't re-check. The two new name queries use explicit HQL equality so a null
first name binds as `= NULL` (no match) instead of the derived-query fold to
`first_name IS NULL`, which would widen a last-name-only row in as a sender.

Pins the opaque error path (IncorrectResultSizeDataAccessException stays
INTERNAL_ERROR with no Hibernate/SQL/row-count leak) and extends ADR-032 with
the Person section.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 13:03:04 +02:00
Marcel
20cfe41f21 fix(person): resolve case-colliding aliases without throwing (#731)
findOrCreateByAlias resolved via Optional<Person> findByAliasIgnoreCase,
which throws NonUniqueResultException once two aliases collide only by case
(müller / Müller) — a generic 500 on the importer path. Mirror the #730 tag
fix: resolve exact-case first, then the lowest-id case-insensitive sibling,
then create-when-absent (institution/group and maiden-name alias preserved).
The throwing Optional<…>IgnoreCase variant is deleted so it can't be reused.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 12:50:21 +02:00
49 changed files with 2305 additions and 239 deletions

View File

@@ -154,9 +154,9 @@ Schedule monthly automated restore tests. If the restore fails, the backup is wo
```
Every alert needs: description, severity, likely cause, resolution steps, escalation path.
3. **Upgrading VPS tier before profiling**
3. **Upgrading hardware before profiling**
```
# "The app feels slow" → upgrade from CX32 to CX42
# "The app feels slow" → order more RAM / a faster CPU
# Actual cause: unindexed query scanning 100k rows
```
Profile with Grafana dashboards first. Most perceived performance issues are application bugs, not resource constraints.
@@ -404,8 +404,8 @@ Hetzner Object Storage (S3-compatible, replaces MinIO in prod)
Prometheus + Loki + Alertmanager
```
### Monthly Cost: ~23 EUR
CX32 VPS (4 vCPU, 8GB RAM): 17 EUR · Object Storage (~200GB): 5 EUR · SMTP relay: ~1 EUR
### Monthly Cost: ~6 EUR (excl. server)
Hetzner dedicated server (Serverbörse, i7-6700, 64 GB RAM): see invoice · Object Storage (~200GB): 5 EUR · SMTP relay: ~1 EUR
### Reference Documentation
- Full CI workflow, Gitea vs GitHub differences: `docs/infrastructure/ci-gitea.md`

View File

@@ -92,6 +92,7 @@ backend/src/main/java/org/raddatz/familienarchiv/
├── ocr/ OCR domain — OcrService, OcrBatchService, training
├── person/ Person domain
│ └── relationship/ PersonRelationship sub-domain
├── search/ NL search domain — NlSearchController, NlQueryParserService, RestClientOllamaClient, NlSearchRateLimiter
├── security/ SecurityConfig, Permission, @RequirePermission, PermissionAspect
├── tag/ Tag domain
└── user/ User domain — AppUser, UserGroup, UserService
@@ -160,7 +161,7 @@ Input DTOs live flat in the domain package. Response types are the model entitie
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded); `SMART_SEARCH_UNAVAILABLE` (HTTP 503 — Ollama inference service offline or timed out); `SMART_SEARCH_RATE_LIMITED` (HTTP 429 — user exceeded 5 NL search requests per minute).
### Security / Permissions
@@ -268,7 +269,7 @@ Back button pattern — use the shared `<BackButton>` component from `$lib/share
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded); `SMART_SEARCH_UNAVAILABLE` (HTTP 503 — Ollama inference service offline or timed out); `SMART_SEARCH_RATE_LIMITED` (HTTP 429 — user exceeded 5 NL search requests per minute).
---

View File

@@ -41,6 +41,27 @@
<type>pom</type>
<scope>import</scope>
</dependency>
<!-- Force WireMock's ee10 Jetty transitive deps to match Spring Boot's 12.1.8 core -->
<dependency>
<groupId>org.eclipse.jetty.ee10</groupId>
<artifactId>jetty-ee10-servlet</artifactId>
<version>12.1.8</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty.ee10</groupId>
<artifactId>jetty-ee10-servlets</artifactId>
<version>12.1.8</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty.ee10</groupId>
<artifactId>jetty-ee10-webapp</artifactId>
<version>12.1.8</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-ee</artifactId>
<version>12.1.8</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
@@ -137,6 +158,12 @@
<artifactId>archunit-junit5</artifactId>
<version>1.3.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wiremock</groupId>
<artifactId>wiremock-jetty12</artifactId>
<version>3.9.2</version>
<scope>test</scope>
</dependency>
<!-- Excel Bearbeitung (Apache POI) -->
<dependency>

View File

@@ -57,6 +57,7 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
@EntityGraph("Document.full")
List<Document> findByReceiversId(UUID receiverId);
// Callers access only doc.getTags() to mutate the set — receivers/sender not touched; no graph needed.
List<Document> findByTags_Id(UUID tagId);

View File

@@ -32,6 +32,8 @@ import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import jakarta.persistence.criteria.JoinType;
import jakarta.persistence.criteria.Predicate;
import org.springframework.data.jpa.domain.Specification;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
@@ -1033,6 +1035,28 @@ public class DocumentService {
return documentRepository.findByReceiversId(receiverId);
}
public DocumentSearchResult searchDocumentsByPersonId(UUID personId, LocalDate from, LocalDate to, Pageable pageable) {
Person person = personService.getById(personId);
Specification<Document> spec = buildPersonSpec(person, from, to);
Page<Document> page = documentRepository.findAll(spec, pageable);
List<DocumentListItem> items = enrichItems(page.getContent(), null);
return DocumentSearchResult.paged(items, pageable, page.getTotalElements());
}
private Specification<Document> buildPersonSpec(Person person, LocalDate from, LocalDate to) {
return (root, query, cb) -> {
if (query != null) query.distinct(true);
var receiversJoin = root.join("receivers", JoinType.LEFT);
var senderPredicate = cb.equal(root.get("sender"), person);
var receiverPredicate = cb.equal(receiversJoin, person);
var personPredicate = cb.or(senderPredicate, receiverPredicate);
var predicates = new ArrayList<>(List.of(personPredicate));
if (from != null) predicates.add(cb.greaterThanOrEqualTo(root.get("documentDate"), from));
if (to != null) predicates.add(cb.lessThanOrEqualTo(root.get("documentDate"), to));
return cb.and(predicates.toArray(new Predicate[0]));
};
}
public long getIncompleteCount() {
return documentRepository.countByMetadataCompleteFalse();
}

View File

@@ -78,4 +78,8 @@ public class DomainException extends RuntimeException {
public static DomainException tooManyRequests(ErrorCode code, String message, long retryAfterSeconds) {
return new DomainException(code, HttpStatus.TOO_MANY_REQUESTS, message, retryAfterSeconds);
}
public static DomainException serviceUnavailable(ErrorCode code, String message) {
return new DomainException(code, HttpStatus.SERVICE_UNAVAILABLE, message);
}
}

View File

@@ -135,6 +135,12 @@ public enum ErrorCode {
/** The merge target is a descendant of the source tag. 400 */
TAG_MERGE_INVALID_TARGET,
// --- NL Search ---
/** Ollama is unreachable or timed out. 503 */
SMART_SEARCH_UNAVAILABLE,
/** NL search rate limit exceeded (5 requests per user per minute). 429 */
SMART_SEARCH_RATE_LIMITED,
// --- Generic ---
/** Request validation failed (missing or malformed fields). 400 */
VALIDATION_ERROR,

View File

@@ -29,14 +29,36 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
// Stammbaum-Knoten: alle Personen mit family_member = true.
List<Person> findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
// Lookup by full alias string, used during ODS mass import
Optional<Person> findByAliasIgnoreCase(String alias);
// Exact-case alias lookup — the first resolution step in findOrCreateByAlias.
// Case-colliding aliases across persons (müller / Müller) are valid human labels, NOT
// duplicates: source_ref is the stable identity (ADR-025/033), alias is editable. Do NOT
// add a unique(lower(alias)) constraint — see ADR-033.
Optional<Person> findByAlias(String alias);
// Plural case-insensitive alias lookup — the fallback step. Returns ALL case-folding
// siblings so the service can pick a deterministic one (lowest id) instead of letting a
// derived Optional<…>IgnoreCase throw NonUniqueResultException. See ADR-033.
List<Person> findAllByAliasIgnoreCase(String alias);
// Lookup by the normalizer person_id, used for idempotent canonical re-import (Phase 3).
Optional<Person> findBySourceRef(String sourceRef);
// Exact first+last name match, used for filename-based sender lookup
Optional<Person> findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName);
// Exact-case first+last name match — the first step of filename-based sender resolution.
// Explicit `=` (HQL, not a derived query) so a null firstName binds as `first_name = NULL`
// — never a match — instead of the derived-query fold to `first_name IS NULL`, which would
// pull a last-name-only row in as a sender (a provenance defect). See ADR-033.
@Query("SELECT p FROM Person p WHERE p.firstName = :firstName AND p.lastName = :lastName")
Optional<Person> findByFirstNameAndLastName(@Param("firstName") String firstName,
@Param("lastName") String lastName);
// Plural case-insensitive first+last name match — lets findByName bail to empty on 2+ matches
// instead of letting a derived Optional<…>IgnoreCase throw NonUniqueResultException. Same
// null fail-closed guarantee as above: LOWER(:firstName) is NULL for a null arg, so a null
// first name resolves to no match (not first_name IS NULL widening). See ADR-033.
@Query("SELECT p FROM Person p WHERE LOWER(p.firstName) = LOWER(:firstName) "
+ "AND LOWER(p.lastName) = LOWER(:lastName)")
List<Person> findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(@Param("firstName") String firstName,
@Param("lastName") String lastName);
// --- PersonSummaryDTO with document count ---

View File

@@ -1,5 +1,6 @@
package org.raddatz.familienarchiv.person;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
@@ -98,6 +99,10 @@ public class PersonService {
return personRepository.findAllById(ids);
}
public List<Person> findByDisplayNameContaining(String fragment) {
return personRepository.searchByName(fragment);
}
public List<Person> findAllFamilyMembers() {
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
}
@@ -110,7 +115,19 @@ public class PersonService {
}
public Optional<Person> findByName(String firstName, String lastName) {
return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
// Same scope as findOrCreateByAlias (#731): a case-collision resolves without throwing;
// two byte-identical same-case persons are an out-of-scope data anomaly the exact
// Optional below would surface as the opaque INTERNAL_ERROR, not a wrong sender.
Optional<Person> exact = personRepository.findByFirstNameAndLastName(firstName, lastName);
if (exact.isPresent()) return exact;
List<Person> caseInsensitive =
personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
// Deliberate divergence from findOrCreateByAlias: an ambiguous filename leaves the sender
// UNSET rather than picking the lowest id. The archive's value is correct provenance — a
// confidently-wrong pre-filled "Hans Müller" is worse than an empty field, because a
// reviewer won't re-check a pre-filled value. Do NOT "consistency-clean" this into the
// lowest-id fallback. See ADR-033.
return caseInsensitive.size() == 1 ? Optional.of(caseInsensitive.get(0)) : Optional.empty();
}
/** Lookup by the normalizer person_id — used by the canonical importer for register-first matching. */
@@ -125,32 +142,45 @@ public class PersonService {
PersonType type = PersonTypeClassifier.classify(alias);
if (type == PersonType.SKIP) return null;
return personRepository.findByAliasIgnoreCase(alias).orElseGet(() -> {
if (type == PersonType.INSTITUTION || type == PersonType.GROUP) {
return personRepository.save(Person.builder()
.alias(alias)
.lastName(alias)
.personType(type)
.build());
}
// Aliases differing only by case (müller / Müller) are valid distinct persons, not
// duplicates, so a CASE-COLLISION must not throw: exact-case first, then the lowest-id
// case-insensitive sibling, then create. Mirrors the tag path — see ADR-033.
// Scope (#731): "ambiguous" means case-insensitive. Two BYTE-IDENTICAL same-case aliases
// are a true data anomaly out of scope here; the exact Optional below would surface that
// as the opaque INTERNAL_ERROR (never a wrong row), not silently pick one.
Optional<Person> exact = personRepository.findByAlias(alias);
if (exact.isPresent()) return exact.get(); // exact-case wins
List<Person> caseInsensitive = personRepository.findAllByAliasIgnoreCase(alias);
if (!caseInsensitive.isEmpty()) {
return caseInsensitive.stream().min(Comparator.comparing(Person::getId)).orElseThrow(); // deterministic tie-break — list is non-empty, never throws
}
PersonNameParser.SplitName split = PersonNameParser.split(alias);
Person person = personRepository.save(Person.builder()
// Create-when-absent: institution/group keep the full label in lastName; a person name
// is split and a maiden name (geb. …) becomes a MAIDEN_NAME alias.
if (type == PersonType.INSTITUTION || type == PersonType.GROUP) {
return personRepository.save(Person.builder()
.alias(alias)
.firstName(split.firstName())
.lastName(split.lastName())
.lastName(alias)
.personType(type)
.build());
if (split.maidenName() != null) {
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
aliasRepository.save(PersonNameAlias.builder()
.person(person)
.lastName(split.maidenName())
.type(PersonNameAliasType.MAIDEN_NAME)
.sortOrder(nextSortOrder)
.build());
}
return person;
});
}
PersonNameParser.SplitName split = PersonNameParser.split(alias);
Person person = personRepository.save(Person.builder()
.alias(alias)
.firstName(split.firstName())
.lastName(split.lastName())
.build());
if (split.maidenName() != null) {
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
aliasRepository.save(PersonNameAlias.builder()
.person(person)
.lastName(split.maidenName())
.type(PersonNameAliasType.MAIDEN_NAME)
.sortOrder(nextSortOrder)
.build());
}
return person;
}
/**

View File

@@ -20,8 +20,8 @@ Features: person CRUD, name alias management, person merge (deduplication), fami
| `getById(UUID)` | document, geschichte, ocr | Fetch one person by ID |
| `getAllById(List<UUID>)` | document | Bulk fetch for sender/receiver resolution |
| `findAll(String q)` | document, dashboard | List all persons |
| `findByName(String firstName, String lastName)` | document | Typeahead search |
| `findOrCreateByAlias(String rawName)` | importing | Idempotent create during mass import; type classification happens internally |
| `findByName(String firstName, String lastName)` | document | Filename-based **sender resolution** in `storeDocument`: exact-case match → single case-insensitive match → else **empty** (ambiguous names leave the sender unset; a null first name never matches). See ADR-033. |
| `findOrCreateByAlias(String rawName)` | importing | Idempotent create during mass import; type classification happens internally. Resolves exact-case → lowest-id case-insensitive sibling → create — never throws on case-colliding aliases. See ADR-033. |
| `findAllFamilyMembers()` | dashboard | Family member list for stats |
| `findCorrespondents()` | document | Correspondent list for conversation filter |
| `count()` | dashboard | Total person count for stats |

View File

@@ -0,0 +1,22 @@
package org.raddatz.familienarchiv.search;
import io.swagger.v3.oas.annotations.media.Schema;
import java.time.LocalDate;
import java.util.List;
public record NlQueryInterpretation(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<PersonHint> resolvedPersons,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<PersonHint> ambiguousPersons,
LocalDate dateFrom,
LocalDate dateTo,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<String> keywords,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
String rawQuery,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
boolean keywordsApplied
) {
}

View File

@@ -0,0 +1,160 @@
package org.raddatz.familienarchiv.search;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.document.DocumentSearchResult;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentSort;
import org.raddatz.familienarchiv.document.SearchFilters;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.tag.TagOperator;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Service;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
@Service
@RequiredArgsConstructor
@Slf4j
public class NlQueryParserService {
private static final int MIN_QUERY = 3;
private static final int MAX_QUERY = 500;
private static final int MAX_NAME_LENGTH = 200;
private static final int MAX_CANDIDATES = 10;
private final OllamaClient ollamaClient;
private final PersonService personService;
private final DocumentService documentService;
public NlSearchResponse search(String query, Pageable pageable) {
if (query == null || query.length() < MIN_QUERY || query.length() > MAX_QUERY) {
throw DomainException.badRequest(ErrorCode.VALIDATION_ERROR,
"Query must be between " + MIN_QUERY + " and " + MAX_QUERY + " characters");
}
OllamaExtraction ext = ollamaClient.parse(query);
List<String> personNames = ext.personNames() != null ? ext.personNames() : List.of();
List<String> keywords = ext.keywords() != null ? ext.keywords() : List.of();
NameResolution resolution = resolveNames(personNames);
if (!resolution.ambiguous().isEmpty()) {
NlQueryInterpretation interpretation = new NlQueryInterpretation(
List.of(), resolution.ambiguous(),
ext.dateFrom(), ext.dateTo(),
keywords, ext.rawQuery(), false);
return new NlSearchResponse(DocumentSearchResult.of(List.of()), interpretation);
}
List<PersonHint> resolved = resolution.resolved();
List<String> noMatchFragments = resolution.noMatchFragments();
List<String> extraFragments = resolution.extraFragments();
String text = buildText(keywords, noMatchFragments, extraFragments, ext.rawQuery());
if (resolved.size() == 1 && isAnyRole(ext.personRole())) {
UUID personId = resolved.get(0).id();
DocumentSearchResult docs = documentService.searchDocumentsByPersonId(
personId, ext.dateFrom(), ext.dateTo(), pageable);
NlQueryInterpretation interpretation = new NlQueryInterpretation(
resolved, List.of(), ext.dateFrom(), ext.dateTo(), keywords, ext.rawQuery(), false);
return new NlSearchResponse(docs, interpretation);
}
UUID sender = buildSender(resolved, ext.personRole());
UUID receiver = buildReceiver(resolved, ext.personRole());
SearchFilters filters = new SearchFilters(
text.isBlank() ? null : text,
ext.dateFrom(), ext.dateTo(),
sender, receiver,
List.of(), null,
null, TagOperator.AND, false);
DocumentSearchResult docs = documentService.searchDocuments(filters, DocumentSort.DATE, "desc", pageable);
boolean keywordsApplied = !text.isBlank();
NlQueryInterpretation interpretation = new NlQueryInterpretation(
resolved, List.of(), ext.dateFrom(), ext.dateTo(), keywords, ext.rawQuery(), keywordsApplied);
return new NlSearchResponse(docs, interpretation);
}
private NameResolution resolveNames(List<String> personNames) {
List<PersonHint> resolved = new ArrayList<>();
List<PersonHint> ambiguous = new ArrayList<>();
List<String> noMatchFragments = new ArrayList<>();
List<String> extraFragments = new ArrayList<>();
int resolvedIndex = 0;
for (String name : personNames) {
if (name == null || name.length() > MAX_NAME_LENGTH) {
log.debug("Skipping name fragment (too long or null): length={}", name == null ? 0 : name.length());
continue;
}
List<Person> candidates = personService.findByDisplayNameContaining(name);
List<Person> capped = candidates.size() > MAX_CANDIDATES
? candidates.subList(0, MAX_CANDIDATES)
: candidates;
if (capped.isEmpty()) {
noMatchFragments.add(name);
} else if (capped.size() == 1) {
Person p = capped.get(0);
PersonHint hint = new PersonHint(p.getId(), p.getDisplayName());
resolvedIndex++;
if (resolvedIndex <= 2) {
resolved.add(hint);
} else {
extraFragments.add(name);
}
} else {
capped.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
}
}
return new NameResolution(resolved, ambiguous, noMatchFragments, extraFragments);
}
private String buildText(List<String> keywords, List<String> noMatchFragments,
List<String> extraFragments, String rawQuery) {
List<String> parts = new ArrayList<>();
parts.addAll(keywords);
parts.addAll(noMatchFragments);
parts.addAll(extraFragments);
String text = String.join(" ", parts).strip();
if (text.isBlank() && rawQuery != null && !rawQuery.isBlank()) {
return rawQuery;
}
return text;
}
private boolean isAnyRole(String role) {
return role == null || "any".equals(role) || (!"sender".equals(role) && !"receiver".equals(role));
}
private UUID buildSender(List<PersonHint> resolved, String role) {
if (resolved.size() >= 2) return resolved.get(0).id();
if (resolved.size() == 1 && "sender".equals(role)) return resolved.get(0).id();
return null;
}
private UUID buildReceiver(List<PersonHint> resolved, String role) {
if (resolved.size() >= 2) return resolved.get(1).id();
if (resolved.size() == 1 && "receiver".equals(role)) return resolved.get(0).id();
return null;
}
private record NameResolution(
List<PersonHint> resolved,
List<PersonHint> ambiguous,
List<String> noMatchFragments,
List<String> extraFragments
) {}
}

View File

@@ -0,0 +1,28 @@
package org.raddatz.familienarchiv.search;
import jakarta.validation.Valid;
import lombok.RequiredArgsConstructor;
import org.raddatz.familienarchiv.security.Permission;
import org.raddatz.familienarchiv.security.RequirePermission;
import org.springframework.data.domain.Pageable;
import org.springframework.security.core.annotation.AuthenticationPrincipal;
import org.springframework.security.core.userdetails.UserDetails;
import org.springframework.web.bind.annotation.*;
@RestController
@RequestMapping("/api/search/nl")
@RequiredArgsConstructor
public class NlSearchController {
private final NlQueryParserService nlQueryParserService;
private final NlSearchRateLimiter rateLimiter;
@PostMapping
@RequirePermission(Permission.READ_ALL)
public NlSearchResponse search(@Valid @RequestBody NlSearchRequest request,
Pageable pageable,
@AuthenticationPrincipal UserDetails principal) {
rateLimiter.checkAndConsume(principal.getUsername());
return nlQueryParserService.search(request.query(), pageable);
}
}

View File

@@ -0,0 +1,12 @@
package org.raddatz.familienarchiv.search;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties("app.nl-search.rate-limit")
@Data
public class NlSearchRateLimitProperties {
private int maxRequestsPerMinute = 5;
}

View File

@@ -0,0 +1,46 @@
package org.raddatz.familienarchiv.search;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import io.github.bucket4j.Bandwidth;
import io.github.bucket4j.Bucket;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.springframework.stereotype.Service;
import java.time.Duration;
import java.util.concurrent.TimeUnit;
@Service
public class NlSearchRateLimiter {
private final LoadingCache<String, Bucket> byUser;
private final int maxRequestsPerMinute;
public NlSearchRateLimiter(NlSearchRateLimitProperties props) {
this.maxRequestsPerMinute = props.getMaxRequestsPerMinute();
this.byUser = Caffeine.newBuilder()
.expireAfterAccess(1, TimeUnit.MINUTES)
.build(key -> newBucket(maxRequestsPerMinute));
}
public void checkAndConsume(String userKey) {
if (!byUser.get(userKey).tryConsume(1)) {
throw DomainException.tooManyRequests(ErrorCode.SMART_SEARCH_RATE_LIMITED,
"NL search rate limit exceeded for user: " + userKey, 60L);
}
}
void resetForTest() {
byUser.invalidateAll();
}
private static Bucket newBucket(int limit) {
return Bucket.builder()
.addLimit(Bandwidth.builder()
.capacity(limit)
.refillGreedy(limit, Duration.ofMinutes(1))
.build())
.build();
}
}

View File

@@ -0,0 +1,11 @@
package org.raddatz.familienarchiv.search;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.Size;
public record NlSearchRequest(
@NotBlank
@Size(min = 3, max = 500)
String query
) {
}

View File

@@ -0,0 +1,12 @@
package org.raddatz.familienarchiv.search;
import io.swagger.v3.oas.annotations.media.Schema;
import org.raddatz.familienarchiv.document.DocumentSearchResult;
public record NlSearchResponse(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
DocumentSearchResult result,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
NlQueryInterpretation interpretation
) {
}

View File

@@ -0,0 +1,5 @@
package org.raddatz.familienarchiv.search;
public interface OllamaClient {
OllamaExtraction parse(String query);
}

View File

@@ -0,0 +1,18 @@
package org.raddatz.familienarchiv.search;
import java.time.LocalDate;
import java.util.List;
/**
* Raw structured output from Ollama after parsing and sanitising.
* personRole is always one of "sender", "receiver", "any" — defensive parsing ensures this.
*/
record OllamaExtraction(
List<String> personNames,
String personRole,
LocalDate dateFrom,
LocalDate dateTo,
List<String> keywords,
String rawQuery
) {
}

View File

@@ -0,0 +1,5 @@
package org.raddatz.familienarchiv.search;
public interface OllamaHealthClient {
boolean isHealthy();
}

View File

@@ -0,0 +1,15 @@
package org.raddatz.familienarchiv.search;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties("app.ollama")
@Data
public class OllamaProperties {
private String baseUrl;
private String model;
private int timeoutSeconds = 30;
private int healthCheckTimeoutSeconds = 2;
}

View File

@@ -0,0 +1,13 @@
package org.raddatz.familienarchiv.search;
import io.swagger.v3.oas.annotations.media.Schema;
import java.util.UUID;
public record PersonHint(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
UUID id,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
String displayName
) {
}

View File

@@ -0,0 +1,184 @@
package org.raddatz.familienarchiv.search;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.springframework.http.client.JdkClientHttpRequestFactory;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestClient;
import org.springframework.web.client.RestClientException;
import java.net.http.HttpClient;
import java.time.Duration;
import java.time.LocalDate;
import java.time.Year;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.List;
import java.util.Map;
import java.util.Set;
@Service
@Slf4j
public class RestClientOllamaClient implements OllamaClient, OllamaHealthClient {
private static final ObjectMapper MAPPER = new ObjectMapper();
private static final Set<String> VALID_ROLES = Set.of("sender", "receiver", "any");
private static final int MAX_NAME_LENGTH = 200;
private static final int MAX_KEYWORD_LENGTH = 100;
private static final Map<String, Object> JSON_SCHEMA = Map.of(
"type", "object",
"required", List.of("personNames", "personRole", "keywords"),
"properties", Map.of(
"personNames", Map.of("type", "array", "items", Map.of("type", "string", "maxLength", MAX_NAME_LENGTH)),
"personRole", Map.of("type", "string", "enum", List.of("sender", "receiver", "any")),
"dateFrom", Map.of("type", List.of("string", "null"), "maxLength", 20),
"dateTo", Map.of("type", List.of("string", "null"), "maxLength", 20),
"keywords", Map.of("type", "array", "items", Map.of("type", "string", "maxLength", MAX_KEYWORD_LENGTH))
)
);
private final RestClient inferenceClient;
private final RestClient healthClient;
private final OllamaProperties props;
public RestClientOllamaClient(OllamaProperties props) {
this.props = props;
HttpClient inferenceHttp = HttpClient.newBuilder()
.version(HttpClient.Version.HTTP_1_1)
.connectTimeout(Duration.ofSeconds(10))
.build();
JdkClientHttpRequestFactory inferenceFactory = new JdkClientHttpRequestFactory(inferenceHttp);
inferenceFactory.setReadTimeout(Duration.ofSeconds(props.getTimeoutSeconds()));
this.inferenceClient = RestClient.builder()
.baseUrl(props.getBaseUrl())
.requestFactory(inferenceFactory)
.build();
HttpClient healthHttp = HttpClient.newBuilder()
.version(HttpClient.Version.HTTP_1_1)
.connectTimeout(Duration.ofSeconds(props.getHealthCheckTimeoutSeconds()))
.build();
JdkClientHttpRequestFactory healthFactory = new JdkClientHttpRequestFactory(healthHttp);
healthFactory.setReadTimeout(Duration.ofSeconds(props.getHealthCheckTimeoutSeconds()));
this.healthClient = RestClient.builder()
.baseUrl(props.getBaseUrl())
.requestFactory(healthFactory)
.build();
}
@Override
public OllamaExtraction parse(String query) {
try {
OllamaGenerateRequest request = new OllamaGenerateRequest(
props.getModel(), query, JSON_SCHEMA, false);
String responseBody = inferenceClient.post()
.uri("/api/generate")
.contentType(org.springframework.http.MediaType.APPLICATION_JSON)
.body(request)
.retrieve()
.body(String.class);
return parseOllamaResponse(responseBody, query);
} catch (DomainException e) {
throw e;
} catch (Exception e) {
log.warn("Ollama inference failed: {}", e.getClass().getSimpleName());
throw DomainException.serviceUnavailable(ErrorCode.SMART_SEARCH_UNAVAILABLE,
"Ollama unavailable: " + e.getClass().getSimpleName());
}
}
@Override
public boolean isHealthy() {
try {
healthClient.get().uri("/api/tags").retrieve().toBodilessEntity();
return true;
} catch (Exception e) {
return false;
}
}
private OllamaExtraction parseOllamaResponse(String responseBody, String rawQuery) {
try {
OllamaGenerateResponse response = MAPPER.readValue(responseBody, OllamaGenerateResponse.class);
String inner = response.response();
if (inner == null || inner.isBlank()) {
return fallbackExtraction(rawQuery);
}
RawOllamaOutput raw = MAPPER.readValue(inner, RawOllamaOutput.class);
return toExtraction(raw, rawQuery);
} catch (Exception e) {
log.warn("Failed to parse Ollama response: {}", e.getClass().getSimpleName());
throw DomainException.serviceUnavailable(ErrorCode.SMART_SEARCH_UNAVAILABLE,
"Failed to parse Ollama response: " + e.getClass().getSimpleName());
}
}
private OllamaExtraction toExtraction(RawOllamaOutput raw, String rawQuery) {
List<String> names = raw.personNames() == null ? List.of() : raw.personNames().stream()
.filter(n -> n != null && n.length() <= MAX_NAME_LENGTH)
.toList();
List<String> keywords = raw.keywords() == null ? List.of() : raw.keywords().stream()
.filter(k -> k != null && k.length() <= MAX_KEYWORD_LENGTH)
.toList();
String role = sanitiseRole(raw.personRole());
LocalDate dateFrom = parseDate(raw.dateFrom(), true);
LocalDate dateTo = parseDate(raw.dateTo(), false);
return new OllamaExtraction(names, role, dateFrom, dateTo, keywords, rawQuery);
}
private OllamaExtraction fallbackExtraction(String rawQuery) {
return new OllamaExtraction(List.of(), "any", null, null, List.of(), rawQuery);
}
private String sanitiseRole(String role) {
if (role != null && VALID_ROLES.contains(role)) {
return role;
}
log.warn("Unexpected personRole from Ollama: {}", role);
return "any";
}
private LocalDate parseDate(String raw, boolean isFrom) {
if (raw == null || raw.isBlank()) return null;
try {
return LocalDate.parse(raw, DateTimeFormatter.ISO_LOCAL_DATE);
} catch (DateTimeParseException ignored) {
}
try {
int year = Integer.parseInt(raw.strip());
if (year > 1000 && year < 3000) {
return isFrom ? Year.of(year).atDay(1) : Year.of(year).atMonth(12).atEndOfMonth();
}
} catch (NumberFormatException ignored) {
}
return null;
}
@JsonIgnoreProperties(ignoreUnknown = true)
private record OllamaGenerateResponse(String response) {
}
@JsonIgnoreProperties(ignoreUnknown = true)
private record RawOllamaOutput(
@JsonProperty("personNames") List<String> personNames,
@JsonProperty("personRole") String personRole,
@JsonProperty("dateFrom") String dateFrom,
@JsonProperty("dateTo") String dateTo,
@JsonProperty("keywords") List<String> keywords
) {
}
private record OllamaGenerateRequest(
String model,
String prompt,
Object format,
boolean stream
) {
}
}

View File

@@ -11,3 +11,7 @@ springdoc:
swagger-ui:
enabled: true
path: /swagger-ui.html
app:
ollama:
base-url: http://localhost:11434

View File

@@ -130,6 +130,16 @@ app:
# The loader maps columns by header name — no positional indices (see ADR-025).
dir: ${IMPORT_DIR:/import}
ollama:
base-url: http://ollama:11434
model: qwen2.5:7b-instruct-q4_K_M
timeout-seconds: 30
health-check-timeout-seconds: 2
nl-search:
rate-limit:
max-requests-per-minute: 5
ocr:
sender-model:
activation-threshold: 100

View File

@@ -624,4 +624,88 @@ class DocumentRepositoryTest {
.reviewed(reviewed)
.build();
}
// ─── searchDocumentsByPersonId (via Specification) ───────────────────────
private Page<Document> searchByPerson(Person person, LocalDate from, LocalDate to) {
Specification<Document> spec = (root, query, cb) -> {
if (query != null) query.distinct(true);
var receiversJoin = root.join("receivers", jakarta.persistence.criteria.JoinType.LEFT);
var personPredicate = cb.or(
cb.equal(root.get("sender"), person),
cb.equal(receiversJoin, person));
var predicates = new java.util.ArrayList<>(java.util.List.of(personPredicate));
if (from != null) predicates.add(cb.greaterThanOrEqualTo(root.get("documentDate"), from));
if (to != null) predicates.add(cb.lessThanOrEqualTo(root.get("documentDate"), to));
return cb.and(predicates.toArray(new jakarta.persistence.criteria.Predicate[0]));
};
return documentRepository.findAll(spec, PageRequest.of(0, 10));
}
@Test
void searchByPersonSpec_returnsDocument_whenPersonIsSender() {
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
Document doc = documentRepository.save(Document.builder()
.title("Senderbrief").originalFilename("sender.pdf")
.status(DocumentStatus.UPLOADED).sender(person).build());
Page<Document> result = searchByPerson(person, null, null);
assertThat(result.getContent()).extracting(Document::getId).containsExactly(doc.getId());
}
@Test
void searchByPersonSpec_returnsDocument_whenPersonIsReceiver() {
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
Document doc = documentRepository.save(Document.builder()
.title("Empfängerbrief").originalFilename("receiver.pdf")
.status(DocumentStatus.UPLOADED)
.receivers(new java.util.HashSet<>(List.of(person))).build());
Page<Document> result = searchByPerson(person, null, null);
assertThat(result.getContent()).extracting(Document::getId).containsExactly(doc.getId());
}
@Test
void searchByPersonSpec_returnsDocumentOnce_whenPersonIsBothSenderAndReceiver() {
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
Document doc = documentRepository.save(Document.builder()
.title("SenderEmpfänger").originalFilename("both.pdf")
.status(DocumentStatus.UPLOADED).sender(person)
.receivers(new java.util.HashSet<>(List.of(person))).build());
Page<Document> result = searchByPerson(person, null, null);
assertThat(result.getContent()).hasSize(1);
assertThat(result.getContent().get(0).getId()).isEqualTo(doc.getId());
}
@Test
void searchByPersonSpec_excludesDocuments_outsideDateRange() {
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
Document inside = documentRepository.save(Document.builder()
.title("Innen").originalFilename("inside.pdf").status(DocumentStatus.UPLOADED)
.sender(person).documentDate(LocalDate.of(1918, 6, 15)).build());
documentRepository.save(Document.builder()
.title("Außen").originalFilename("outside.pdf").status(DocumentStatus.UPLOADED)
.sender(person).documentDate(LocalDate.of(1920, 1, 1)).build());
Page<Document> result = searchByPerson(person, LocalDate.of(1914, 1, 1), LocalDate.of(1918, 12, 31));
assertThat(result.getContent()).extracting(Document::getId).containsExactly(inside.getId());
}
@Test
void searchByPersonSpec_returnsEmpty_whenNoMatchingDocuments() {
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
Person other = personRepository.save(Person.builder().lastName("Braun").build());
documentRepository.save(Document.builder()
.title("Fremder Brief").originalFilename("other.pdf")
.status(DocumentStatus.UPLOADED).sender(other).build());
Page<Document> result = searchByPerson(person, null, null);
assertThat(result.getContent()).isEmpty();
}
}

View File

@@ -12,6 +12,7 @@ import org.mockito.MockedStatic;
import org.mockito.junit.jupiter.MockitoExtension;
import org.slf4j.LoggerFactory;
import org.springframework.dao.DataIntegrityViolationException;
import org.springframework.dao.IncorrectResultSizeDataAccessException;
import org.springframework.http.ResponseEntity;
import static org.assertj.core.api.Assertions.assertThat;
@@ -37,6 +38,30 @@ class GlobalExceptionHandlerTest {
}
}
@Test
void handleGeneric_incorrectResultSize_staysOpaque_noHibernateOrRowCountLeak() {
// #731: before the fix, a case-colliding alias/name made Hibernate throw
// NonUniqueResultException → IncorrectResultSizeDataAccessException, which has no
// dedicated handler and falls through to handleGeneric. The fix removes the throw, but
// this pins the handler: a stray one must stay opaque — no Hibernate class name, no SQL,
// no "2 results were returned" row count reaching the client (CWE-209).
IncorrectResultSizeDataAccessException ex = new IncorrectResultSizeDataAccessException(
"query did not return a unique result: 2 results were returned", 1, 2);
try (MockedStatic<Sentry> sentryMock = mockStatic(Sentry.class)) {
ResponseEntity<GlobalExceptionHandler.ErrorResponse> response = handler.handleGeneric(ex);
assertThat(response.getStatusCode().value()).isEqualTo(500);
assertThat(response.getBody()).isNotNull();
assertThat(response.getBody().code()).isEqualTo(ErrorCode.INTERNAL_ERROR);
assertThat(response.getBody().message())
.isEqualTo("An unexpected error occurred")
.doesNotContain("results were returned")
.doesNotContain("NonUnique")
.doesNotContain("IncorrectResultSize");
}
}
@Test
void handleDataIntegrityViolation_returns400_withoutLeakingConstraint_orSentry() {
// A DataIntegrityViolationException carries the constraint name + SQL in its message;

View File

@@ -121,37 +121,60 @@ class PersonRepositoryTest {
.containsExactly("Anna", "Clara");
}
// ─── findByAliasIgnoreCase ────────────────────────────────────────────────
// ─── findByAlias (exact) / findAllByAliasIgnoreCase (case-folding siblings) ───
@Test
void findByAliasIgnoreCase_returnsMatchingPerson() {
void findByAlias_returnsExactCaseMatchOnly() {
personRepository.save(Person.builder()
.firstName("Karl").lastName("Brandt").alias("Opa Karl").build());
Optional<Person> found = personRepository.findByAliasIgnoreCase("opa karl");
assertThat(found).isPresent();
assertThat(found.get().getFirstName()).isEqualTo("Karl");
assertThat(personRepository.findByAlias("Opa Karl")).isPresent();
assertThat(personRepository.findByAlias("opa karl")).isEmpty(); // exact-case: a folded form does NOT match
}
@Test
void findByAliasIgnoreCase_returnsEmpty_whenAliasDoesNotMatch() {
Optional<Person> found = personRepository.findByAliasIgnoreCase("nobody");
assertThat(found).isEmpty();
void findAllByAliasIgnoreCase_returnsEmpty_whenAliasDoesNotMatch() {
assertThat(personRepository.findAllByAliasIgnoreCase("nobody")).isEmpty();
}
// ─── findByFirstNameIgnoreCaseAndLastNameIgnoreCase ───────────────────────
@Test
void findAllByAliasIgnoreCase_foldsUmlautCase_inRealPostgres() {
// Proves Postgres LOWER() folds ü the same way for both rows — a plain-ASCII probe would
// stay green even if umlaut folding regressed. Both case-colliding aliases must match.
personRepository.save(Person.builder().lastName("Müller").alias("Müller").build());
personRepository.save(Person.builder().lastName("müller").alias("müller").build());
assertThat(personRepository.findAllByAliasIgnoreCase("MÜLLER")).hasSize(2);
}
// ─── findByFirstNameAndLastName (exact) / findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase ───
@Test
void findByFirstNameIgnoreCaseAndLastNameIgnoreCase_returnsMatch() {
void findByFirstNameAndLastName_returnsExactCaseMatchOnly() {
personRepository.save(Person.builder().firstName("Maria").lastName("Raddatz").build());
Optional<Person> found = personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(
"maria", "raddatz");
assertThat(personRepository.findByFirstNameAndLastName("Maria", "Raddatz")).isPresent();
assertThat(personRepository.findByFirstNameAndLastName("maria", "raddatz")).isEmpty(); // exact-case only
}
assertThat(found).isPresent();
assertThat(found.get().getFirstName()).isEqualTo("Maria");
@Test
void findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase_foldsUmlautCase_inRealPostgres() {
personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
personRepository.save(Person.builder().firstName("hans").lastName("müller").build());
assertThat(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase("HANS", "MÜLLER"))
.hasSize(2);
}
@Test
void findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase_nullFirstName_foldsToNoMatch() {
// Fail-closed: a last-name-only filename (null first name) must NOT widen to first_name IS
// NULL and pull in the institution/last-name-only row as a "sender". Proven on real
// Postgres because a mocked unit test cannot catch the IS NULL vs `= NULL` semantics.
personRepository.save(Person.builder().lastName("Müller").build()); // first_name NULL
assertThat(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(null, "Müller"))
.isEmpty();
}
// ─── findCorrespondents ───────────────────────────────────────────────────

View File

@@ -4,6 +4,7 @@ import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.PostgresContainerConfig;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentRepository;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonType;
@@ -16,10 +17,13 @@ import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.transaction.annotation.Transactional;
import software.amazon.awssdk.services.s3.S3Client;
import org.springframework.mock.web.MockMultipartFile;
import jakarta.persistence.EntityManager;
import jakarta.persistence.PersistenceContext;
import java.util.Set;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
@@ -33,6 +37,7 @@ class PersonServiceIntegrationTest {
@Autowired PersonService personService;
@Autowired PersonRepository personRepository;
@Autowired DocumentRepository documentRepository;
@Autowired DocumentService documentService;
@PersistenceContext EntityManager entityManager;
@@ -75,6 +80,93 @@ class PersonServiceIntegrationTest {
assertThat(result.getLastName()).isEqualTo("Cram");
}
// ─── #731: case-colliding alias resolution against real Postgres ───────────
// The umlaut pair is mandatory — only the real DB proves Postgres LOWER() folds ü; a
// plain-ASCII test would stay green while umlaut aliases regressed.
@Test
void findOrCreateByAlias_resolvesUmlautAliasCollision_toLowestId_withoutThrow() {
Person muller = personRepository.save(Person.builder().lastName("Müller").alias("Müller").build());
Person mullerLower = personRepository.save(Person.builder().lastName("müller").alias("müller").build());
UUID expected = muller.getId().compareTo(mullerLower.getId()) <= 0 ? muller.getId() : mullerLower.getId();
// No exact-case "MÜLLER" row → falls through to the case-insensitive branch with two
// candidates and must pick the lowest id, never throwing NonUniqueResultException.
Person resolved = personService.findOrCreateByAlias("MÜLLER");
assertThat(resolved.getId()).isEqualTo(expected);
}
@Test
void findOrCreateByAlias_umlautAliasCollision_isDeterministicAcrossCalls() {
personRepository.save(Person.builder().lastName("Müller").alias("Müller").build());
personRepository.save(Person.builder().lastName("müller").alias("müller").build());
Person first = personService.findOrCreateByAlias("MÜLLER");
Person second = personService.findOrCreateByAlias("MÜLLER");
assertThat(second.getId()).isEqualTo(first.getId());
}
// ─── #731: filename-based sender resolution against real Postgres ──────────
@Test
void storeDocument_resolvesSender_whenFilenameNameIsUnique() throws Exception {
Person hans = personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
Document doc = uploadNamed("1965-03-12_Müller_Hans.pdf").document();
assertThat(doc.getSender()).isNotNull();
assertThat(doc.getSender().getId()).isEqualTo(hans.getId());
}
@Test
void storeDocument_resolvesSender_onSingleCaseInsensitiveMatch() throws Exception {
Person hans = personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
// Filename folds to "hans müller"; the only stored person is "Hans Müller".
Document doc = uploadNamed("1965-03-12_müller_hans.pdf").document();
assertThat(doc.getSender()).isNotNull();
assertThat(doc.getSender().getId()).isEqualTo(hans.getId());
}
@Test
void storeDocument_leavesSenderUnset_whenFilenameNameIsAmbiguous() throws Exception {
// Two persons collide case-insensitively; the filename casing ("HANS"/"MÜLLER") matches
// neither exactly → no exact-case winner → bail to null (never an arbitrary guess), no 500.
personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
personRepository.save(Person.builder().firstName("hans").lastName("müller").build());
Document doc = uploadNamed("1965-03-12_MÜLLER_HANS.pdf").document();
assertThat(doc.getSender()).isNull();
}
@Test
void storeDocument_leavesSenderUnset_whenFilenameHasNoFirstName() throws Exception {
// A last-name-only filename never resolves to a sender (the parser yields no parsed name).
personRepository.save(Person.builder().lastName("Müller").build());
Document doc = uploadNamed("1965-03-12_Müller.pdf").document();
assertThat(doc.getSender()).isNull();
}
@Test
void findByName_nullFirstName_resolvesToEmpty_inRealPostgres() {
// Fail-closed against the real DB: a null first name must NOT widen to first_name IS NULL
// and pick up the last-name-only row.
personRepository.save(Person.builder().lastName("Müller").build()); // first_name NULL
assertThat(personService.findByName(null, "Müller")).isEmpty();
}
private DocumentService.StoreResult uploadNamed(String filename) throws Exception {
MockMultipartFile file = new MockMultipartFile("file", filename, "application/pdf", new byte[]{1, 2, 3});
return documentService.storeDocument(file, null);
}
// ─── #667: confirm round-trip + reader-default semantics ──────────────────
@Test

View File

@@ -375,14 +375,57 @@ class PersonServiceTest {
// ─── findOrCreateByAlias ─────────────────────────────────────────────────
@Test
void findOrCreateByAlias_returnsExisting_whenAliasFound() {
String alias = "Walter de Gruyter";
Person existing = Person.builder().id(UUID.randomUUID()).alias(alias).build();
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.of(existing));
void findOrCreateByAlias_returnsExactCaseMatch_overCaseInsensitiveSibling() {
String alias = "müller";
Person exact = Person.builder().id(UUID.randomUUID()).alias("müller").build();
when(personRepository.findByAlias(alias)).thenReturn(Optional.of(exact));
Person result = personService.findOrCreateByAlias(alias);
assertThat(result).isEqualTo(existing);
assertThat(result).isEqualTo(exact);
verify(personRepository, never()).findAllByAliasIgnoreCase(any());
verify(personRepository, never()).save(any());
}
@Test
void findOrCreateByAlias_returnsExactCaseMatch_evenWhenMultipleSiblingsCollide() {
String alias = "Müller";
Person exact = Person.builder().id(UUID.randomUUID()).alias("Müller").build();
when(personRepository.findByAlias(alias)).thenReturn(Optional.of(exact));
Person result = personService.findOrCreateByAlias(alias);
assertThat(result).isEqualTo(exact);
// exact-case short-circuits — the case-insensitive siblings are never consulted.
verify(personRepository, never()).findAllByAliasIgnoreCase(any());
}
@Test
void findOrCreateByAlias_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
String alias = "müller";
Person only = Person.builder().id(UUID.randomUUID()).alias("Müller").build();
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of(only));
Person result = personService.findOrCreateByAlias(alias);
assertThat(result).isEqualTo(only);
verify(personRepository, never()).save(any());
}
@Test
void findOrCreateByAlias_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() {
String alias = "müller";
Person lower = Person.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).alias("Müller").build();
Person higher = Person.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).alias("müller").build();
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of(higher, lower)); // unordered
Person first = personService.findOrCreateByAlias(alias);
Person second = personService.findOrCreateByAlias(alias);
assertThat(first.getId()).isEqualTo(lower.getId()); // lowest id wins
assertThat(second.getId()).isEqualTo(first.getId()); // same result every call — never throws
verify(personRepository, never()).save(any());
}
@@ -390,7 +433,8 @@ class PersonServiceTest {
void findOrCreateByAlias_createsNew_whenAliasNotFound() {
String alias = "Clara Cram";
Person saved = Person.builder().id(UUID.randomUUID()).alias(alias).firstName("Clara").lastName("Cram").build();
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
when(personRepository.save(any())).thenReturn(saved);
Person result = personService.findOrCreateByAlias(alias);
@@ -403,7 +447,8 @@ class PersonServiceTest {
void findOrCreateByAlias_createsMaidenNameAlias_whenGebPresent() {
String alias = "Clara Cram geb. de Gruyter";
Person saved = Person.builder().id(UUID.randomUUID()).alias(alias).firstName("Clara").lastName("Cram").build();
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
when(personRepository.save(any())).thenReturn(saved);
when(aliasRepository.findMaxSortOrder(saved.getId())).thenReturn(0);
when(aliasRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
@@ -425,7 +470,8 @@ class PersonServiceTest {
@Test
void findOrCreateByAlias_setsInstitutionType_withFullNameInLastName() {
String alias = "Arthur Collignon GmbH";
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
when(personRepository.save(any())).thenAnswer(inv -> {
Person p = inv.getArgument(0);
p.setId(UUID.randomUUID());
@@ -442,7 +488,8 @@ class PersonServiceTest {
@Test
void findOrCreateByAlias_setsGroupType_withFullNameInLastName() {
String alias = "Geschwister de Gruyter";
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
when(personRepository.save(any())).thenAnswer(inv -> {
Person p = inv.getArgument(0);
p.setId(UUID.randomUUID());
@@ -460,7 +507,8 @@ class PersonServiceTest {
void findOrCreateByAlias_noAlias_whenNoGeb() {
String alias = "Clara Cram";
Person saved = Person.builder().id(UUID.randomUUID()).alias(alias).firstName("Clara").lastName("Cram").build();
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
when(personRepository.save(any())).thenReturn(saved);
personService.findOrCreateByAlias(alias);
@@ -472,11 +520,54 @@ class PersonServiceTest {
void findOrCreateByAlias_trimsInput() {
String alias = " Clara Cram ";
Person saved = Person.builder().id(UUID.randomUUID()).alias("Clara Cram").build();
when(personRepository.findByAliasIgnoreCase("Clara Cram")).thenReturn(Optional.of(saved));
when(personRepository.findByAlias("Clara Cram")).thenReturn(Optional.of(saved));
personService.findOrCreateByAlias(alias);
verify(personRepository).findByAliasIgnoreCase("Clara Cram");
verify(personRepository).findByAlias("Clara Cram");
}
// ─── findByName (filename-based sender resolution) ────────────────────────
@Test
void findByName_returnsExactCaseMatch_overCaseInsensitiveSibling() {
Person exact = Person.builder().id(UUID.randomUUID()).firstName("Hans").lastName("Müller").build();
when(personRepository.findByFirstNameAndLastName("Hans", "Müller")).thenReturn(Optional.of(exact));
assertThat(personService.findByName("Hans", "Müller")).contains(exact);
verify(personRepository, never()).findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(any(), any());
}
@Test
void findByName_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
Person only = Person.builder().id(UUID.randomUUID()).firstName("Hans").lastName("Müller").build();
when(personRepository.findByFirstNameAndLastName("hans", "müller")).thenReturn(Optional.empty());
when(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase("hans", "müller"))
.thenReturn(List.of(only));
assertThat(personService.findByName("hans", "müller")).contains(only);
}
@Test
void findByName_bailsToEmpty_whenTwoOrMoreCaseInsensitiveMatches() {
Person a = Person.builder().id(UUID.randomUUID()).firstName("Hans").lastName("Müller").build();
Person b = Person.builder().id(UUID.randomUUID()).firstName("hans").lastName("müller").build();
when(personRepository.findByFirstNameAndLastName("hans", "müller")).thenReturn(Optional.empty());
when(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase("hans", "müller"))
.thenReturn(List.of(a, b));
// Ambiguous sender → unset, never an arbitrary guess (provenance correctness over a
// confidently-wrong pre-fill). This is the deliberate divergence from the alias path.
assertThat(personService.findByName("hans", "müller")).isEmpty();
}
@Test
void findByName_returnsEmpty_whenFirstNameNullFoldsToNoMatch() {
when(personRepository.findByFirstNameAndLastName(null, "Müller")).thenReturn(Optional.empty());
when(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(null, "Müller"))
.thenReturn(List.of());
assertThat(personService.findByName(null, "Müller")).isEmpty();
}
// ─── updatePerson (notes) ────────────────────────────────────────────────
@@ -807,4 +898,15 @@ class PersonServiceTest {
.extracting(e -> ((DomainException) e).getStatus().value())
.isEqualTo(403);
}
@Test
void findByDisplayNameContaining_delegatesToSearchByName() {
Person walter = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
when(personRepository.searchByName("Walter")).thenReturn(List.of(walter));
List<Person> result = personService.findByDisplayNameContaining("Walter");
assertThat(result).containsExactly(walter);
verify(personRepository).searchByName("Walter");
}
}

View File

@@ -0,0 +1,440 @@
package org.raddatz.familienarchiv.search;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.raddatz.familienarchiv.document.DocumentSearchResult;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentSort;
import org.raddatz.familienarchiv.document.SearchFilters;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.tag.TagOperator;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.*;
class NlQueryParserServiceTest {
@Mock OllamaClient ollamaClient;
@Mock PersonService personService;
@Mock DocumentService documentService;
NlQueryParserService service;
static final Pageable PAGE = PageRequest.of(0, 20);
@BeforeEach
void setUp() {
MockitoAnnotations.openMocks(this);
service = new NlQueryParserService(ollamaClient, personService, documentService);
when(documentService.searchDocuments(any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
when(documentService.searchDocumentsByPersonId(any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
}
// --- Factory helpers ---
private OllamaExtraction extraction(List<String> names, String role, LocalDate from, LocalDate to,
List<String> keywords) {
String raw = names.isEmpty() ? "test query" : String.join(" ", names);
return new OllamaExtraction(names, role, from, to, keywords, raw);
}
private Person person(UUID id, String firstName, String lastName) {
return Person.builder().id(id).firstName(firstName).lastName(lastName).build();
}
private static final UUID P1 = UUID.fromString("00000000-0000-0000-0000-000000000001");
private static final UUID P2 = UUID.fromString("00000000-0000-0000-0000-000000000002");
private static final UUID P3 = UUID.fromString("00000000-0000-0000-0000-000000000003");
// --- 1. Single resolved name + personRole=sender ---
@Test
void search_resolvesSingleName_asSender() {
Person walter = person(P1, "Walter", "Raddatz");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
NlSearchResponse resp = service.search("Was hat Walter geschrieben?", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), eq(DocumentSort.DATE), eq("desc"), eq(PAGE));
assertThat(cap.getValue().sender()).isEqualTo(P1);
assertThat(cap.getValue().receiver()).isNull();
assertThat(resp.interpretation().resolvedPersons()).hasSize(1);
assertThat(resp.interpretation().resolvedPersons().get(0).id()).isEqualTo(P1);
assertThat(resp.interpretation().ambiguousPersons()).isEmpty();
}
// --- 2. Multi-match name → ambiguous, search NOT executed ---
@Test
void search_multiMatchName_populatesAmbiguous_andSkipsSearch() {
Person a = person(UUID.randomUUID(), "Walter", "Braun");
Person b = person(UUID.randomUUID(), "Walter", "Schmidt");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(a, b));
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
verify(documentService, never()).searchDocumentsByPersonId(any(), any(), any(), any());
assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
assertThat(resp.interpretation().resolvedPersons()).isEmpty();
}
// --- 3. Multi-match + personRole=any → still ambiguous, search NOT executed ---
@Test
void search_multiMatchName_withPersonRoleAny_stillSkipsSearch() {
Person a = person(UUID.randomUUID(), "Emma", "Braun");
Person b = person(UUID.randomUUID(), "Emma", "Raddatz");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Emma"), "any", null, null, List.of()));
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(a, b));
NlSearchResponse resp = service.search("Briefe an Emma", PAGE);
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
verify(documentService, never()).searchDocumentsByPersonId(any(), any(), any(), any());
assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
}
// --- 4. No-match name → folded into text ---
@Test
void search_noMatchName_isFoldedIntoText() {
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Karl"), "any", null, null, List.of()));
when(personService.findByDisplayNameContaining("Karl")).thenReturn(List.of());
service.search("Briefe von Karl", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().text()).contains("Karl");
assertThat(cap.getValue().sender()).isNull();
assertThat(cap.getValue().receiver()).isNull();
}
// --- 5. personRole=any + 1 resolved → searchDocumentsByPersonId called ---
@Test
void search_personRoleAny_singleMatch_callsSearchDocumentsByPersonId() {
Person walter = person(P1, "Walter", "Raddatz");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter"), "any", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
verify(documentService).searchDocumentsByPersonId(eq(P1), isNull(), isNull(), eq(PAGE));
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
assertThat(resp.interpretation().keywordsApplied()).isFalse();
}
// --- 6. 2 names both resolve → sender=person1, receiver=person2 ---
@Test
void search_twoNamesResolve_assignsSenderAndReceiver() {
Person walter = person(P1, "Walter", "Raddatz");
Person emma = person(P2, "Emma", "Raddatz");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter", "Emma"), "any", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
NlSearchResponse resp = service.search("Briefe von Walter an Emma", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), eq(DocumentSort.DATE), eq("desc"), eq(PAGE));
assertThat(cap.getValue().sender()).isEqualTo(P1);
assertThat(cap.getValue().receiver()).isEqualTo(P2);
assertThat(resp.interpretation().resolvedPersons().get(0).id()).isEqualTo(P1);
assertThat(resp.interpretation().resolvedPersons().get(1).id()).isEqualTo(P2);
}
// --- 7. 2 names, first resolves, second ambiguous → search NOT executed ---
@Test
void search_twoNames_secondAmbiguous_skipsSearch() {
Person walter = person(P1, "Walter", "Raddatz");
Person emma1 = person(P2, "Emma", "Braun");
Person emma2 = person(P3, "Emma", "Schmidt");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter", "Emma"), "sender", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma1, emma2));
NlSearchResponse resp = service.search("Briefe von Walter an Emma", PAGE);
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
}
// --- 8. 2 names, first no match → folded into text, second used as single person ---
@Test
void search_twoNames_firstNoMatch_secondResolved_foldFirstIntoText() {
Person emma = person(P2, "Emma", "Raddatz");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Karl", "Emma"), "sender", null, null, List.of()));
when(personService.findByDisplayNameContaining("Karl")).thenReturn(List.of());
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
service.search("Briefe von Karl an Emma", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().text()).contains("Karl");
assertThat(cap.getValue().sender()).isEqualTo(P2);
}
// --- 9. 3+ names all resolve → first two as sender/receiver, third folded into text ---
@Test
void search_threeNamesResolve_extraFoldedIntoText() {
Person walter = person(P1, "Walter", "Raddatz");
Person emma = person(P2, "Emma", "Raddatz");
Person heinrich = person(P3, "Heinrich", "Braun");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter", "Emma", "Heinrich"), "any", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
when(personService.findByDisplayNameContaining("Heinrich")).thenReturn(List.of(heinrich));
service.search("Briefe von Walter an Emma über Heinrich", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().sender()).isEqualTo(P1);
assertThat(cap.getValue().receiver()).isEqualTo(P2);
assertThat(cap.getValue().text()).contains("Heinrich");
}
// --- 10. Keywords space-joined into text ---
@Test
void search_keywords_areJoinedIntoText() {
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of(), "any", null, null, List.of("Krieg", "Walter")));
service.search("Dokumente über den Krieg Walter", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().text()).isEqualTo("Krieg Walter");
}
// --- 11. Date range passed through ---
@Test
void search_dateRange_passedIntoSearchFilters() {
LocalDate from = LocalDate.of(1914, 1, 1);
LocalDate to = LocalDate.of(1914, 12, 31);
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of(), "any", from, to, List.of()));
service.search("Briefe aus dem Jahr 1914", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().from()).isEqualTo(from);
assertThat(cap.getValue().to()).isEqualTo(to);
}
// --- 12. Null dates → null in SearchFilters (not an error) ---
@Test
void search_nullDates_passedAsNullIntoFilters() {
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of(), "any", null, null, List.of("Hochzeit")));
service.search("Hochzeitsbriefe", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().from()).isNull();
assertThat(cap.getValue().to()).isNull();
}
// --- 13. Query under 3 chars → VALIDATION_ERROR before Ollama call ---
@Test
void search_queryTooShort_throwsValidationError() {
assertThatThrownBy(() -> service.search("ab", PAGE))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.VALIDATION_ERROR);
verify(ollamaClient, never()).parse(anyString());
}
// --- 14. Query over 500 chars → VALIDATION_ERROR ---
@Test
void search_queryTooLong_throwsValidationError() {
String longQuery = "a".repeat(501);
assertThatThrownBy(() -> service.search(longQuery, PAGE))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.VALIDATION_ERROR);
verify(ollamaClient, never()).parse(anyString());
}
// --- 15. Ollama returns empty names/keywords → raw query used as keyword fallback ---
@Test
void search_ollamaReturnsEmpty_usesRawQueryAsTextFallback() {
String raw = "Briefe aus dem Krieg";
when(ollamaClient.parse(anyString()))
.thenReturn(new OllamaExtraction(List.of(), "any", null, null, List.of(), raw));
service.search(raw, PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().text()).isEqualTo(raw);
}
// --- 16. Null personNames/keywords from Ollama → no NPE ---
@Test
void search_nullPersonNamesAndKeywords_handledWithoutNpe() {
OllamaExtraction ext = new OllamaExtraction(null, "any", null, null, null, "test query");
when(ollamaClient.parse(anyString())).thenReturn(ext);
NlSearchResponse resp = service.search("test query", PAGE);
assertThat(resp).isNotNull();
verify(documentService).searchDocuments(any(), any(), any(), any());
}
// --- 17. Unrecognized personRole → defaults to any-like behavior (no crash) ---
@Test
void search_unrecognizedPersonRole_treatedLikeAny_withSingleResolvedPerson() {
Person walter = person(P1, "Walter", "Raddatz");
// OllamaClient defensive parsing returns "any" for unknown roles,
// but NlQueryParserService must also be safe if something unexpected arrives.
when(ollamaClient.parse(anyString()))
.thenReturn(new OllamaExtraction(List.of("Walter"), "unknown_role", null, null, List.of(), "query"));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
// Should not crash; "unknown_role" treated as fallback (neither sender nor receiver → any)
assertThat(resp).isNotNull();
}
// --- 18. Ollama throws SMART_SEARCH_UNAVAILABLE → propagates to caller ---
@Test
void search_ollamaThrowsUnavailable_propagates() {
when(ollamaClient.parse(anyString()))
.thenThrow(DomainException.tooManyRequests(ErrorCode.SMART_SEARCH_UNAVAILABLE, "offline"));
assertThatThrownBy(() -> service.search("Was hat Walter geschrieben?", PAGE))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
}
// --- 19. LLM-extracted name > 200 chars → skipped, PersonService never called ---
@Test
void search_nameLongerThan200Chars_isSkippedBeforePersonServiceCall() {
String longName = "A".repeat(201);
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of(longName), "sender", null, null, List.of()));
service.search("Briefe von sehr langem Namen", PAGE);
verify(personService, never()).findByDisplayNameContaining(anyString());
}
// --- 20. Max 10 candidates cap: 11 persons returned → only first 10 in ambiguousPersons ---
@Test
void search_elevenCandidates_capsAtTen() {
List<Person> eleven = new ArrayList<>();
for (int i = 0; i < 11; i++) {
eleven.add(person(UUID.randomUUID(), "Walter", "Person" + i));
}
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
when(personService.findByDisplayNameContaining("Walter")).thenReturn(eleven);
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
assertThat(resp.interpretation().ambiguousPersons()).hasSize(10);
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
}
// --- 21. SearchFilters defaults: tagOperator=AND, status=null, undated=false, tags=empty ---
@Test
void search_searchFiltersDefaults_areCorrect() {
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of(), "any", null, null, List.of("Krieg")));
service.search("Dokumente über den Krieg", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), eq(DocumentSort.DATE), eq("desc"), eq(PAGE));
SearchFilters f = cap.getValue();
assertThat(f.tagOperator()).isEqualTo(TagOperator.AND);
assertThat(f.status()).isNull();
assertThat(f.undated()).isFalse();
assertThat(f.tags()).isEmpty();
assertThat(f.tagQ()).isNull();
}
// --- 22. personRole=receiver + 1 resolved → receiver UUID set ---
@Test
void search_personRoleReceiver_singleMatch_setsReceiver() {
Person emma = person(P2, "Emma", "Raddatz");
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of("Emma"), "receiver", null, null, List.of()));
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
service.search("Briefe an Emma", PAGE);
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
assertThat(cap.getValue().receiver()).isEqualTo(P2);
assertThat(cap.getValue().sender()).isNull();
}
// --- 23. keywordsApplied=true when text is non-blank ---
@Test
void search_keywordsApplied_trueWhenTextNonBlank() {
when(ollamaClient.parse(anyString()))
.thenReturn(extraction(List.of(), "any", null, null, List.of("Feldpost")));
NlSearchResponse resp = service.search("Feldpost aus dem Krieg", PAGE);
assertThat(resp.interpretation().keywordsApplied()).isTrue();
}
}

View File

@@ -0,0 +1,161 @@
package org.raddatz.familienarchiv.search;
import tools.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.document.DocumentSearchResult;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.security.SecurityConfig;
import org.raddatz.familienarchiv.security.PermissionAspect;
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
import org.springframework.boot.webmvc.test.autoconfigure.WebMvcTest;
import org.springframework.context.annotation.Import;
import org.springframework.http.MediaType;
import org.springframework.security.test.context.support.WithMockUser;
import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.test.web.servlet.MockMvc;
import java.util.List;
import java.util.UUID;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.when;
import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*;
@WebMvcTest(NlSearchController.class)
@Import({SecurityConfig.class, PermissionAspect.class, AopAutoConfiguration.class,
NlSearchRateLimiter.class, NlSearchRateLimitProperties.class})
class NlSearchControllerTest {
@Autowired MockMvc mockMvc;
private final ObjectMapper objectMapper = new ObjectMapper();
@MockitoBean NlQueryParserService nlQueryParserService;
@MockitoBean CustomUserDetailsService customUserDetailsService;
@Autowired NlSearchRateLimiter rateLimiter;
@BeforeEach
void resetRateLimiter() {
rateLimiter.resetForTest();
}
private NlSearchResponse makeResponse() {
PersonHint hint = new PersonHint(UUID.randomUUID(), "Walter Raddatz");
NlQueryInterpretation interp = new NlQueryInterpretation(
List.of(hint), List.of(), null, null,
List.of("Krieg"), "Briefe von Walter im Krieg", true);
return new NlSearchResponse(DocumentSearchResult.of(List.of()), interp);
}
// --- 1. Happy path ---
@Test
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
void search_returns200_withNlSearchResponse() throws Exception {
when(nlQueryParserService.search(anyString(), any())).thenReturn(makeResponse());
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"Briefe von Walter im Krieg\"}"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.interpretation.rawQuery").value("Briefe von Walter im Krieg"))
.andExpect(jsonPath("$.interpretation.resolvedPersons[0].displayName").value("Walter Raddatz"))
.andExpect(jsonPath("$.interpretation.keywordsApplied").value(true));
}
// --- 2. ambiguousPersons in response shape ---
@Test
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
void search_returns200_withAmbiguousPersons() throws Exception {
PersonHint a = new PersonHint(UUID.randomUUID(), "Walter Braun");
PersonHint b = new PersonHint(UUID.randomUUID(), "Walter Schmidt");
NlQueryInterpretation interp = new NlQueryInterpretation(
List.of(), List.of(a, b), null, null,
List.of(), "Briefe von Walter", false);
NlSearchResponse resp = new NlSearchResponse(DocumentSearchResult.of(List.of()), interp);
when(nlQueryParserService.search(anyString(), any())).thenReturn(resp);
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"Briefe von Walter\"}"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.interpretation.ambiguousPersons").isArray())
.andExpect(jsonPath("$.interpretation.ambiguousPersons[0].displayName").value("Walter Braun"))
.andExpect(jsonPath("$.interpretation.ambiguousPersons[1].id").isNotEmpty());
}
// --- 3. Unauthenticated → 401 ---
@Test
void search_returns401_whenUnauthenticated() throws Exception {
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"Briefe von Walter\"}"))
.andExpect(status().isUnauthorized());
}
// --- 4. Query < 3 chars → 400 ---
@Test
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
void search_returns400_whenQueryTooShort() throws Exception {
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"ab\"}"))
.andExpect(status().isBadRequest());
}
// --- 5. Query > 500 chars → 400 ---
@Test
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
void search_returns400_whenQueryTooLong() throws Exception {
String longQuery = "a".repeat(501);
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"" + longQuery + "\"}"))
.andExpect(status().isBadRequest());
}
// --- 6. Ollama unavailable → 503 ---
@Test
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
void search_returns503_whenOllamaUnavailable() throws Exception {
when(nlQueryParserService.search(anyString(), any()))
.thenThrow(DomainException.serviceUnavailable(ErrorCode.SMART_SEARCH_UNAVAILABLE, "Ollama offline"));
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"Briefe von Walter\"}"))
.andExpect(status().isServiceUnavailable())
.andExpect(jsonPath("$.code").value("SMART_SEARCH_UNAVAILABLE"));
}
// --- 7. 6th request in 1 minute → 429 ---
@Test
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
void search_returns429_onSixthRequestWithinRateLimit() throws Exception {
when(nlQueryParserService.search(anyString(), any())).thenReturn(makeResponse());
for (int i = 0; i < 5; i++) {
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"Briefe von Walter\"}"))
.andExpect(status().isOk());
}
mockMvc.perform(post("/api/search/nl").with(csrf())
.contentType(MediaType.APPLICATION_JSON)
.content("{\"query\":\"Briefe von Walter\"}"))
.andExpect(status().isTooManyRequests())
.andExpect(jsonPath("$.code").value("SMART_SEARCH_RATE_LIMITED"));
}
}

View File

@@ -0,0 +1,62 @@
package org.raddatz.familienarchiv.search;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import static org.assertj.core.api.Assertions.assertThatCode;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
class NlSearchRateLimiterTest {
private NlSearchRateLimiter rateLimiter;
@BeforeEach
void setUp() {
NlSearchRateLimitProperties props = new NlSearchRateLimitProperties();
props.setMaxRequestsPerMinute(5);
rateLimiter = new NlSearchRateLimiter(props);
}
@Test
void checkAndConsume_allowsRequestsWithinLimit() {
for (int i = 0; i < 5; i++) {
assertThatCode(() -> rateLimiter.checkAndConsume("user@example.com"))
.doesNotThrowAnyException();
}
}
@Test
void checkAndConsume_throwsRateLimited_onSixthRequest() {
for (int i = 0; i < 5; i++) {
rateLimiter.checkAndConsume("user@example.com");
}
assertThatThrownBy(() -> rateLimiter.checkAndConsume("user@example.com"))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.SMART_SEARCH_RATE_LIMITED);
}
@Test
void checkAndConsume_limitsAreIndependentPerUser() {
for (int i = 0; i < 5; i++) {
rateLimiter.checkAndConsume("alice@example.com");
}
assertThatCode(() -> rateLimiter.checkAndConsume("bob@example.com"))
.doesNotThrowAnyException();
}
@Test
void resetForTest_clearsAllBuckets() {
for (int i = 0; i < 5; i++) {
rateLimiter.checkAndConsume("user@example.com");
}
rateLimiter.resetForTest();
assertThatCode(() -> rateLimiter.checkAndConsume("user@example.com"))
.doesNotThrowAnyException();
}
}

View File

@@ -0,0 +1,113 @@
package org.raddatz.familienarchiv.search;
import com.github.tomakehurst.wiremock.WireMockServer;
import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import static com.github.tomakehurst.wiremock.client.WireMock.*;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
class RestClientOllamaClientTest {
private WireMockServer wireMock;
private RestClientOllamaClient client;
@BeforeEach
void setUp() {
wireMock = new WireMockServer(WireMockConfiguration.wireMockConfig().dynamicPort());
wireMock.start();
OllamaProperties props = new OllamaProperties();
props.setBaseUrl("http://localhost:" + wireMock.port());
props.setModel("qwen2.5:7b-instruct-q4_K_M");
props.setTimeoutSeconds(5);
props.setHealthCheckTimeoutSeconds(2);
client = new RestClientOllamaClient(props);
}
@AfterEach
void tearDown() {
wireMock.stop();
}
// --- Factory helpers ---
private String makeOllamaResponseJson(String personNamesJson, String personRole,
String dateFrom, String dateTo, String keywordsJson) {
String inner = String.format(
"{\"personNames\":%s,\"personRole\":\"%s\",\"dateFrom\":%s,\"dateTo\":%s,\"keywords\":%s}",
personNamesJson, personRole,
dateFrom == null ? "null" : "\"" + dateFrom + "\"",
dateTo == null ? "null" : "\"" + dateTo + "\"",
keywordsJson
);
return String.format("{\"model\":\"qwen2.5:7b-instruct-q4_K_M\",\"response\":\"%s\",\"done\":true}",
inner.replace("\"", "\\\""));
}
// --- Test cases ---
@Test
void parse_returnsExtraction_whenOllamaReturnsValidJson() {
String body = makeOllamaResponseJson("[\"Walter\"]", "sender", "1914-01-01", "1914-12-31", "[\"Krieg\"]");
wireMock.stubFor(post(urlEqualTo("/api/generate"))
.willReturn(aResponse()
.withStatus(200)
.withHeader("Content-Type", "application/json")
.withBody(body)));
OllamaExtraction result = client.parse("Was hat Walter im Krieg geschrieben?");
assertThat(result.personNames()).containsExactly("Walter");
assertThat(result.personRole()).isEqualTo("sender");
assertThat(result.keywords()).containsExactly("Krieg");
assertThat(result.dateFrom()).isNotNull();
assertThat(result.dateTo()).isNotNull();
}
@Test
void parse_throwsSmartSearchUnavailable_whenOllamaReturns500() {
wireMock.stubFor(post(urlEqualTo("/api/generate"))
.willReturn(aResponse().withStatus(500)));
assertThatThrownBy(() -> client.parse("some query"))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
}
@Test
void parse_throwsSmartSearchUnavailable_whenOllamaExceedsTimeout() {
wireMock.stubFor(post(urlEqualTo("/api/generate"))
.willReturn(aResponse()
.withStatus(200)
.withHeader("Content-Type", "application/json")
.withFixedDelay(6000)
.withBody("{\"response\":\"{}\",\"done\":true}")));
assertThatThrownBy(() -> client.parse("some query"))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
}
@Test
void parse_throwsSmartSearchUnavailable_whenOllamaReturnsMalformedJson() {
wireMock.stubFor(post(urlEqualTo("/api/generate"))
.willReturn(aResponse()
.withStatus(200)
.withHeader("Content-Type", "application/json")
.withBody("{\"response\":\"not-json-at-all\",\"done\":true}")));
assertThatThrownBy(() -> client.parse("some query"))
.isInstanceOf(DomainException.class)
.extracting(e -> ((DomainException) e).getCode())
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
}
}

View File

@@ -52,13 +52,14 @@ The OCR service requires significant RAM for model loading. The dev compose sets
| Production target | RAM | Recommended OCR limit | NL Search | Notes |
|---|---|---|---|---|
| Hetzner CX42 | 16 GB | 12 GB | Supported (Ollama 8 GB + OCR 6 GB active ≈ 14 GB) | Recommended for OCR-enabled production |
| Hetzner CX32 | 8 GB | 6 GB | Disabled — set `APP_OLLAMA_BASE_URL=` (empty) | Accept reduced batch sizes and slower throughput |
| Hetzner CX22 | 4 GB | — | Unsupported | Disable the OCR service (`profiles: [ocr]`); run OCR on demand only |
| Current server (Hetzner Serverbörse, i7-6700) | 64 GB | 12 GB | Supported | Default `mem_limit: 12g` works comfortably; plenty of headroom for Ollama |
| ≥ 16 GB RAM | 16+ GB | 12 GB | Supported | Default works |
| 8 GB RAM | 8 GB | 6 GB | Disabled — set `APP_OLLAMA_BASE_URL=` (empty) | Set `OCR_MEM_LIMIT=6g`; accept reduced batch sizes |
| 4 GB RAM | 4 GB | — | Unsupported | Disable OCR service (`profiles: [ocr]`); run OCR on demand only |
A CX32 cannot honour the default `mem_limit: 12g` — set the `OCR_MEM_LIMIT=6g` env var (in `.env.production` / `.env.staging`, or as a Gitea secret consumed by the workflow) before deploying on a CX32. The prod compose interpolates this var with a 12g default.
On servers with less than 16 GB RAM the default `mem_limit: 12g` cannot be honoured — set the `OCR_MEM_LIMIT` env var (in `.env.production` / `.env.staging`, or as a Gitea secret consumed by the workflow). The prod compose interpolates this var with a 12g default.
> **Memory budget (CX42):** OCR (~6 GB active) + Ollama (~8 GB) = ~14 GB. Do not run `docker-compose.observability.yml` continuously alongside both services on a CX42.
> **Memory budget:** OCR (~6 GB active) + Ollama (~8 GB) = ~14 GB. On servers with less than 16 GB RAM, do not run `docker-compose.observability.yml` continuously alongside both OCR and Ollama.
### Dev vs production differences
@@ -142,7 +143,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
| `ALLOWED_PDF_HOSTS` | SSRF protection — comma-separated list of allowed PDF source hosts. **Do not widen to `*`** | `minio,localhost,127.0.0.1` | YES | — |
| `KRAKEN_MODEL_PATH` | Directory containing Kraken HTR models (populated by `download-kraken-models.sh`) | `/app/models/` | — | — |
| `BLLA_MODEL_PATH` | Kraken baseline layout analysis model path | `/app/models/blla.mlmodel` | — | — |
| `OCR_MEM_LIMIT` | Container memory cap for ocr-service in `docker-compose.prod.yml`. Set to `6g` on CX32 hosts; leave unset on CX42+ to use the 12g default | `12g` (prod compose default) | — | — |
| `OCR_MEM_LIMIT` | Container memory cap for ocr-service in `docker-compose.prod.yml`. Set to `6g` on servers with 8 GB RAM; leave unset (12g default) on servers with ≥ 16 GB RAM | `12g` (prod compose default) | — | — |
| `XDG_CACHE_HOME` | XDG cache base dir — redirects Matplotlib and other XDG-aware libraries away from the read-only `HOME` (`/home/ocr`) to the writable cache volume | `/app/cache` | — | — |
| `TORCH_HOME` | PyTorch model cache — redirects `~/.cache/torch` to the writable models volume | `/app/models/torch` | — | — |
@@ -584,6 +585,37 @@ bash scripts/download-kraken-models.sh
> Downloads the Kurrent/Sütterlin HTR models. Run once after a fresh clone or when models are updated.
### Ollama — natural-language search (NL Search)
NL search uses a local Ollama instance for query parsing. The `ollama` service is defined in `docker-compose.yml` alongside the main stack.
**First-time model pull** (required before the feature works):
```bash
docker compose exec ollama ollama pull qwen2.5:7b-instruct-q4_K_M
```
This downloads ~4.4 GB. The model is stored in the `ollama_data` Docker volume and persists across container restarts.
**Verify the model is available:**
```bash
docker compose exec ollama ollama list
```
Expected output includes `qwen2.5:7b-instruct-q4_K_M`.
**Health check** — the backend polls `GET /api/tags` on Ollama at startup and before inference. If Ollama is absent, `POST /api/search/nl` returns HTTP 503 with `SMART_SEARCH_UNAVAILABLE`.
**Configuration** (see `application.yaml` under `app.ollama`):
| Property | Default | Description |
|---|---|---|
| `app.ollama.base-url` | `http://ollama:11434` | Ollama service URL (dev: `http://localhost:11434`) |
| `app.ollama.model` | `qwen2.5:7b-instruct-q4_K_M` | Model to use for inference |
| `app.ollama.timeout-seconds` | `30` | Read timeout for inference calls |
| `app.nl-search.rate-limit.max-requests-per-minute` | `5` | Per-user rate limit |
### Upgrade the Ollama model
To switch to a newer model version (e.g. a future release of `qwen2.5`):

View File

@@ -167,6 +167,16 @@ _See also [Chronik](#chronik-internal)._
---
## NL Search Terms
**NlSearch** — the natural-language document search feature. Users type a plain-German query (e.g. "Was hat Walter im Krieg an Emma geschrieben?"); the backend parses it via Ollama, resolves person names to database UUIDs, and delegates to the standard `DocumentService.searchDocuments()` path. Endpoint: `POST /api/search/nl`.
**NlQueryInterpretation** — the structured result of parsing a natural-language query. Contains: `resolvedPersons` (persons whose names unambiguously matched one DB record), `ambiguousPersons` (all candidates when a name matched more than one person), `keywords` (LLM-extracted search terms), `dateFrom`/`dateTo` (extracted date range), `rawQuery` (the original user input), and `keywordsApplied` (whether keyword FTS was used in the search).
**PersonHint** — a lightweight `{id, displayName}` pair used in `NlQueryInterpretation` to describe a resolved or ambiguous person without exposing the full `Person` entity to the frontend.
---
## Infrastructure Terms
**archiv-app** — the bucket-scoped MinIO service account the backend uses to read and write the `familienarchiv` bucket. Distinct from the MinIO root account (`archiv`, used only by the bootstrap container for admin operations). Defined and provisioned in [`infra/minio/bootstrap.sh`](../infra/minio/bootstrap.sh) and consumed by the backend as `S3_ACCESS_KEY` in [`docker-compose.prod.yml`](../docker-compose.prod.yml). The attached `archiv-app-policy` grants `s3:GetObject/PutObject/DeleteObject` on `familienarchiv/*` and `s3:ListBucket/GetBucketLocation` on the bucket only — not the built-in `readwrite` policy which would grant `s3:*` on all buckets.

View File

@@ -35,7 +35,7 @@ Render thumbnails in-process in Spring Boot using **Apache PDFBox 3.0.4** (alrea
**Harder:**
- PDFBox is a parser attack surface. Mitigated by a 30-second watchdog timeout in `ThumbnailAsyncRunner` and by the fire-and-forget contract (failures never break upload).
- Memory ceiling: the `thumbnailExecutor` is capped at 2 threads on the CX32 (8 GB). A busy backfill alongside OCR can approach the 3 GB heap — acceptable but not comfortable. Streaming via `FileService.downloadFileStream` keeps this bounded for PDFs up to 50 MB.
- Memory ceiling: the `thumbnailExecutor` is capped at 2 threads on memory-constrained hosts. A busy backfill alongside OCR can approach the 3 GB heap on an 8 GB server — acceptable but not comfortable. The current production server (64 GB) has ample headroom. Streaming via `FileService.downloadFileStream` keeps this bounded for PDFs up to 50 MB.
### Operational caveats (intentional)

View File

@@ -62,7 +62,7 @@ The `/tmp` tmpfs remains at 512 MB and continues to serve training-ZIP extractio
## Alternatives considered
**Approach B — Enlarge `/tmp` to 4 GB**
One-line change. Discarded because: (1) 4 GB tmpfs counts against the cgroup `mem_limit`; on CX32 hosts with `OCR_MEM_LIMIT=6g` the combined Surya resident set + tmpfs would trigger OOMKill on cold start; (2) staging GB-scale model files through RAM is using the wrong storage tier; (3) any future model larger than 4 GB requires another bump.
One-line change. Discarded because: (1) 4 GB tmpfs counts against the cgroup `mem_limit`; on servers with `OCR_MEM_LIMIT=6g` the combined Surya resident set + tmpfs would trigger OOMKill on cold start; (2) staging GB-scale model files through RAM is using the wrong storage tier; (3) any future model larger than 4 GB requires another bump.
**Approach C — Both TMPDIR redirect and enlarged /tmp**
Belt-and-suspenders: Approach A + 1 GB tmpfs. Discarded in favour of the cleaner Approach A. The defence-in-depth benefit does not outweigh the extra compose churn; the 512 MB cap on `/tmp` is intentional.

View File

@@ -0,0 +1,65 @@
# ADR-028 — Natural language search is powered by Ollama (Qwen 2.5 7B), not a cloud API
**Date:** 2026-06-06
**Status:** Accepted
**Issue:** #738 (NL search backend); part of epic #735
**Milestone:** Archive Intelligence — NL Search
---
## Context
Family members write their search intent in plain German ("Was hat Walter im Krieg an Emma geschrieben?"), not in structured filter forms. Issue #735 defines NL search as a core product goal. Three delivery options were evaluated:
**Option A — extend the OCR service.** The OCR Python microservice already runs on the same host. Adding LLM inference there avoids a new container. Rejected: the OCR service is a single-purpose, CPU-bound pipeline optimised for Kraken; bundling a 4.5 GB LLM weight into the same image would bloat it, complicate model lifecycle management, and create an unrelated failure domain (OOM on large OCR batches vs. LLM load time). ADR-001 was explicit about keeping OCR single-purpose.
**Option B — call an external API (OpenAI, Anthropic, etc.).** Cloud inference is instant and requires no local hardware. Rejected: the archive contains real person names and private family correspondence from 18991950 — sending query content to a third party violates the project's data-residency principle (family data stays on the family server). Additionally, API cost and availability are outside the operator's control; the system must work air-gapped.
**Option C — local Ollama service (chosen).** Ollama is a purpose-built LLM runtime with a simple REST API, model lifecycle management (`ollama pull`), and support for grammar-constrained JSON output. It runs entirely on the existing server (i7-6700, 64 GB RAM) with no cloud dependency.
**Model selection:** Qwen 2.5 7B Q4_K_M (`qwen2.5:7b-instruct-q4_K_M`) was chosen over larger models because:
- Quantised weight is ~4.5 GB — fits comfortably in 64 GB RAM alongside PostgreSQL and the JVM.
- Instruction-tuned variant follows the structured JSON schema reliably without fine-tuning.
- CPU-only inference at Q4_K_M takes 215 seconds per query, acceptable for a search that replaces a multi-step filter form.
**Prompt injection mitigation:** The backend sends the raw user query to Ollama. To prevent the model from being prompted to return schema-breaking output, the API call uses Ollama's `format` parameter with a grammar-constrained JSON schema. Output length is further bounded by `maxLength` constraints in the schema (names ≤ 200 chars, keywords ≤ 100 chars). `NlQueryParserService` enforces these limits in code before any LLM-extracted fragment is passed to `PersonRepository.searchByName()` — defence in depth.
**DB-blind name resolution:** The Ollama prompt stays small (the raw query only); person database records are never sent to the model. Name resolution happens as a cheap SQL query after the model returns. This keeps the prompt short, avoids data leakage, and means adding 1,000 new persons requires no prompt change.
**Graceful degradation:** `RestClientOllamaClient.isHealthy()` is called inline before each inference request (calls `GET /api/tags` on a 2-second connect-timeout client). If Ollama is absent or times out, `NlQueryParserService` throws `DomainException` with `SMART_SEARCH_UNAVAILABLE` (HTTP 503). The regular structured search (`GET /api/documents/search`) is unaffected — it never calls Ollama.
**Expected inference latency:** 215 seconds on the current CPU-only hardware. The frontend issue must show a persistent "Suche läuft…" indicator for the full duration (see `aria-live="polite"` requirement in issue #738 frontend notes). The backend timeout is 30 seconds (`app.ollama.timeout-seconds=30`) — chosen as a safe upper bound for Q4_K_M on the i7-6700 with a realistic 500-character query under modest concurrent load.
**NL query logging policy:** Only metadata is logged — query length, resolved person count, latency in milliseconds. The raw query is never written to the log file. Rationale: queries contain real family names (PII); log files persist to disk and may be shipped to Loki. Structured metadata is sufficient for debugging latency regressions.
**Prompt-amplification abuse:** A malicious user could submit a long or crafted query to cause slow Ollama inference, consuming CPU. Mitigated by `NlSearchRateLimiter` (5 requests per user per minute, Bucket4j + Caffeine) and by `@Size(max=500)` on the request body. The rate limiter is node-local; in multi-replica deployments the effective limit multiplies by replica count — acceptable at the current single-node deployment scale.
**Ollama model pre-pull requirement:** The Docker image contains only the Ollama binary, not the model weights. The operator must run `ollama pull qwen2.5:7b-instruct-q4_K_M` (≈4.5 GB download, 1030 minutes) before the backend starts inference. If skipped, every NL search request returns 503 until the pull completes. The deployment runbook in `docs/DEPLOYMENT.md` covers this explicitly.
**Startup dependency:** The `backend` Compose service declares `depends_on: ollama: condition: service_healthy`. The Ollama healthcheck polls `GET http://localhost:11434/api/tags`; `start_period: 120s` provides margin for weight loading (2060 s on SSD). Note: `service_healthy` confirms the API is responding, not that the model is downloaded — if the pull was skipped, inference still returns 404.
**Multi-name resolution heuristic:** For 2-name queries (e.g. "Was hat Walter an Emma geschrieben?"), the first extracted name is treated as sender and the second as receiver. Per-name role annotation (e.g. `{name: "Walter", role: "sender"}`) was rejected because it would require a combinatorially complex Ollama schema and the most natural German phrasing strongly implies sender→receiver order. For single-name queries, a `personRole` field (`sender`/`receiver`/`any`) is returned.
**`personRole: "any"` keyword limitation:** When `personRole` is `"any"` and the name resolves to exactly one person, `DocumentService.searchDocumentsByPersonId()` is called (OR semantics: person as sender or receiver). Keyword filtering is not applied on this path — only person identity and date range. `keywordsApplied = false` is returned in the response. Rationale: the JPQL for OR-semantics person queries has no text predicate; adding FTS would require a native query or a separate pass, adding complexity for a case that is already well-narrowed by person identity.
**`search/``person/` + `document/` dependency direction:** `NlQueryParserService` calls `PersonService.findByDisplayNameContaining()` and `DocumentService.searchDocuments()` — both are legitimate cross-domain service calls, not repository leaks. The `search/` package has no JPA entities of its own and never accesses `PersonRepository` or `DocumentRepository` directly.
## Decision
**Introduce a new `search/` domain package** with a local Ollama integration via `RestClientOllamaClient`. The Ollama service runs as a separate Docker container, reachable only on the internal Docker network (`expose: ["11434"]`, not `ports:`). The backend calls Ollama's `/api/generate` endpoint with grammar-constrained JSON output. Name resolution and document search are performed by existing services after the model returns.
Key component structure:
- `OllamaClient` / `OllamaHealthClient` interfaces — mockable for tests, modelled on `OcrClient`/`OcrHealthClient`
- `RestClientOllamaClient` — two `RestClient` instances (30 s inference, 2 s health-check)
- `NlQueryParserService` — orchestrates Ollama → name resolution → document search
- `NlSearchRateLimiter` — Bucket4j + Caffeine, 5 req/min per user
- `NlSearchController``POST /api/search/nl`, `@RequirePermission(READ_ALL)`
## Consequences
- Family members can query in natural German without learning filter UI. Expected search satisfaction improvement for the 60+ age cohort (primary transcription audience) is significant.
- NL search is unavailable when Ollama is down or the model pull is not complete. The regular search is unaffected. The 503 response includes a CTA directing users to the regular search.
- Operator responsibility: run `ollama pull` on first deploy and after model updates. The backup runbook must exclude `ollama_models` volume (model weights are re-downloadable, not user data).
- Inference takes 215 seconds. The frontend loading indicator is a hard requirement (see issue #738 frontend notes).
- The rate limiter is node-local. At the current single-node deployment scale this is correct. If the service is ever scaled horizontally, the rate limiter must be moved to Redis (same caveat as `LoginRateLimiter`).
- The `search/` package introduces a new cross-domain dependency direction (`search``person`, `search``document`). This is intentional and documented in `docs/architecture/c4/l3-backend-search.puml`.

View File

@@ -1,4 +1,4 @@
# ADR-032 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint
# ADR-033 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint
**Date:** 2026-06-06
**Status:** Accepted
@@ -82,15 +82,58 @@ added later.
`IncorrectResultSizeDataAccessException`, and `GlobalExceptionHandler`'s generic handler maps
any stray one to `INTERNAL_ERROR` with no Hibernate/SQL leak — so no dedicated handler was
added.
- **The sibling Person path is unfixed but tracked.** `PersonService.findOrCreateByAlias`
(`findByAliasIgnoreCase`) and `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` carry the same
latent `Optional`-non-unique throw on user-influenced names; deferred to #731 rather than
widened into this fix.
- **The sibling Person path is fixed the same way — see the Person extension below (#731).**
- Postgres `LOWER()` folding of umlauts (`ü`/`ä`) is the actual correctness hinge of the
fallback and cannot be proven by a mocked repo, so it is pinned by a Testcontainers
`postgres:16-alpine` test on a `Glückwünsche`/`glückwünsche` pair; a plain-ASCII test would
stay green while the bug reappeared for umlaut tags.
## Person extension (#731)
The Person domain carried the same latent throw on **two** user-influenced lookup surfaces, and
is fixed with the same exact-case-first, non-throwing pattern — but with a deliberately
**different fallback per surface**, because the two paths have different consequences.
- **Alias path — `PersonService.findOrCreateByAlias` — deterministic lowest-id (mirrors tag).**
`findByAliasIgnoreCase` (`Optional`) is replaced by `findByAlias` (exact) → `findAllByAliasIgnoreCase`
(plural, lowest id) → the existing create-when-absent branch (INSTITUTION/GROUP and the
maiden-name alias are preserved verbatim). There is no human in the importer loop and the path
creates-on-absent anyway, so a deterministic guess is the right behaviour — exactly like tags.
- **Name/sender path — `PersonService.findByName` — bail to null on ambiguity (the new wrinkle).**
Used only by `DocumentService.storeDocument` to resolve the upload **sender** from the parsed
filename. `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` (`Optional`) is replaced by
`findByFirstNameAndLastName` (exact) → `findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase`
(plural). Resolution returns the exact-case match, else the single case-insensitive match, else
— on **two or more** matches — **empty**. The sender is left unset rather than guessing.
**Why this diverges from the alias (and tag) decision:** the archive's value is correct
provenance. A confidently-wrong pre-filled `Hans Müller` is worse than an empty field, because a
senior reviewer will not re-check a value that is already filled in, whereas an empty sender
routes the document into the "needs completion" state (`metadataComplete=false`) for a human to
assign. The load-bearing comment at `findByName` records this so a future "consistency cleanup"
does not reintroduce the confidently-wrong-sender bug by switching it to lowest-id.
- **Fail-closed on a null first name.** A parsed filename can lack a first name. The two new name
methods use explicit HQL equality (`= :firstName`) rather than a derived
`…IgnoreCase` query, because Spring Data folds a null derived-query argument to `first_name IS
NULL` — which would silently widen the match and pull a last-name-only / institution row in as a
"sender" (a quiet provenance-integrity defect). With HQL equality a null binds as `= NULL`,
which never matches, so a null first name resolves to **no sender**. This is pinned by a
real-Postgres repository test.
- **Scope — "ambiguous" is case-insensitive only.** Both exact-case lookups (`findByAlias`,
`findByFirstNameAndLastName`) return `Optional`, so two **byte-identical same-case** rows would
still throw `NonUniqueResultException`. That is a true data anomaly, deliberately out of scope
(it is not a case-collision), and it surfaces as the opaque `INTERNAL_ERROR` — never a silently
wrong row — so it is no worse than any other unexpected error and needs no extra handling here.
- **Same stance as tags otherwise:** no `unique(lower(alias))` / `unique(lower(name))` constraint
(collisions are valid human labels; `source_ref` is the stable identity per ADR-025), no
merge/dedupe, code-only and reversible, and no shared `resolveExactThenCi(...)` helper — the
two Person paths have different fallbacks, so the exact→CI→fallback logic is inlined at each
with its load-bearing comment (KISS).
## Alternatives considered
- **A `unique(lower(name))` index** — rejected: the collisions are valid canonical nodes, and

View File

@@ -9,10 +9,12 @@ Person(member, "Family Member", "Access by administrator invite. Searches, brows
System(familienarchiv, "Familienarchiv", "Web application for digitising, organising, and searching family documents")
System_Ext(mail, "Email Service", "SMTP server. Delivers notification emails (mentions, replies) and password-reset links.")
System_Ext(glitchtip, "GlitchTip", "Self-hosted error tracking (Sentry-compatible). Receives frontend and backend error events with stack traces.")
System_Ext(ollama, "Ollama (self-hosted)", "Local LLM inference server (qwen2.5:7b). Parses natural-language search queries into structured filters. Runs in the same Docker Compose stack.")
Rel(admin, familienarchiv, "Manages via browser", "HTTPS")
Rel(member, familienarchiv, "Searches, reads, and transcribes via browser", "HTTPS")
Rel(familienarchiv, mail, "Sends notification and password-reset emails (optional)", "SMTP")
Rel(familienarchiv, glitchtip, "Sends error events with errorId and stack trace", "HTTPS")
Rel(familienarchiv, ollama, "NL query parsing for natural-language search", "HTTP / REST (internal)")
@enduml

View File

@@ -17,6 +17,7 @@ System_Boundary(archiv, "Familienarchiv (Docker Compose)") {
ContainerDb(db, "Relational Database", "PostgreSQL 16", "Stores document metadata, persons, users, permission groups, tags, transcription blocks, audit log, and Spring Session data.")
ContainerDb(storage, "Object Storage", "MinIO (S3-compatible)", "Stores the actual document files (PDFs, scans). Backend uses a bucket-scoped service account (archiv-app), not MinIO root.")
Container(mc, "Bucket / Service-Account Init", "MinIO Client (mc)", "One-shot container on startup. Idempotent: creates the archive bucket, the archiv-app service account, and attaches the readwrite policy.")
Container(ollama, "Ollama", "Ollama / port 11434", "Local LLM inference server. Hosts qwen2.5:7b-instruct-q4_K_M for natural-language query parsing (NL Search). CPU-only; GPU not required.")
}
System_Boundary(observability, "Observability Stack (/opt/familienarchiv/docker-compose.observability.yml)") {
@@ -43,6 +44,7 @@ Rel(backend, ocr, "OCR job requests with presigned MinIO URL", "HTTP / REST / JS
Rel(backend, mail, "Sends notification and password-reset emails (optional)", "SMTP")
Rel(ocr, storage, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
Rel(mc, storage, "Bootstraps bucket + service account on startup", "MinIO Client CLI")
Rel(backend, ollama, "NL query parsing (POST /api/generate)", "HTTP / REST / JSON")
Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API")
Rel(backend, tempo, "Sends distributed traces via OTLP", "HTTP / OTLP / port 4318 (archiv-net)")
Rel(prometheus, backend, "Scrapes JVM + HTTP metrics", "HTTP 8081 /actuator/prometheus")

View File

@@ -0,0 +1,33 @@
@startuml
!include <C4/C4_Component>
title Component Diagram: API Backend — NL Search
Container(frontend, "Web Frontend", "SvelteKit")
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
Container(ollama, "Ollama", "ollama/ollama — port 11434 (internal only)")
System_Boundary(backend, "API Backend (Spring Boot)") {
Component(nlCtrl, "NlSearchController", "Spring MVC — POST /api/search/nl", "REST entry point for natural language search. Enforces READ_ALL permission. Uses @AuthenticationPrincipal UserDetails to obtain the caller's email for rate limiting. Delegates to NlQueryParserService and returns NlSearchResponse.")
Component(rateLimiter, "NlSearchRateLimiter", "Spring Service", "Bucket4j + Caffeine LoadingCache keyed on user email. Allows 5 NL search requests per minute per user. Throws DomainException(SMART_SEARCH_RATE_LIMITED / HTTP 429) when the bucket is exhausted. Node-local — same caveat as LoginRateLimiter.")
Component(parserSvc, "NlQueryParserService", "Spring Service", "Orchestrates the full NL search pipeline: (1) validates query length, (2) calls OllamaClient.parse() to extract structured intent, (3) resolves each person name via PersonService.findByDisplayNameContaining(), (4) applies multi-name / personRole heuristics, (5) delegates to DocumentService.searchDocuments() or searchDocumentsByPersonId(). Returns NlSearchResponse. Never logs raw query content (PII).")
Component(ollamaClient, "RestClientOllamaClient", "Spring Service — implements OllamaClient + OllamaHealthClient", "HTTP client for the Ollama API. Uses two separate RestClient instances: inference client (30 s read timeout) and health-check client (2 s connect timeout). Calls POST /api/generate with grammar-constrained JSON schema (personNames, personRole, dateFrom, dateTo, keywords). isHealthy() polls GET /api/tags. Null-coalesces absent personNames/keywords to List.of(). Defaults unknown personRole to 'any' with a warning log. Maps timeout/5xx/parse errors to DomainException(SMART_SEARCH_UNAVAILABLE / HTTP 503).")
Component(ollamaProps, "OllamaProperties", "@ConfigurationProperties(\"app.ollama\")", "Config bean: baseUrl, model (qwen2.5:7b-instruct-q4_K_M), timeoutSeconds (default: 30), healthCheckTimeoutSeconds (default: 2).")
Component(rateLimitProps, "NlSearchRateLimitProperties", "@ConfigurationProperties(\"app.nl-search.rate-limit\")", "Config bean: maxRequestsPerMinute (default: 5).")
}
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. findByDisplayNameContaining(fragment) delegates to PersonRepository.searchByName() — covers first+last name, alias, and name aliases via LEFT JOIN.")
Component(documentSvc, "DocumentService", "Spring Service", "See diagram 3b. searchDocuments() for keyword/sender/receiver/date queries. searchDocumentsByPersonId() for OR-semantics single-person queries (person as sender OR receiver, no keyword filter).")
Rel(frontend, nlCtrl, "POST /api/search/nl with JSON query", "HTTP / JSON")
Rel(nlCtrl, rateLimiter, "checkAndConsume(userEmail)")
Rel(nlCtrl, parserSvc, "parse(query)")
Rel(parserSvc, ollamaClient, "parse(rawQuery) — extracts intent", "HTTP / JSON")
Rel(ollamaClient, ollama, "POST /api/generate (grammar-constrained JSON schema)", "HTTP / REST")
Rel(ollamaClient, ollama, "GET /api/tags (health check)", "HTTP / REST")
Rel(parserSvc, personSvc, "findByDisplayNameContaining(name) for each extracted name")
Rel(parserSvc, documentSvc, "searchDocuments() or searchDocumentsByPersonId()")
Rel(documentSvc, db, "JPA queries", "JDBC")
Rel(personSvc, db, "JPA queries", "JDBC")
@enduml

View File

@@ -20,24 +20,19 @@ The observability stack (Prometheus, Loki, Grafana, Tempo, GlitchTip) ships as a
---
## VPS Sizing Recommendations
## Server Sizing
### Recommended: Hetzner CX32
### Current Production Server: Hetzner Dedicated (Serverbörse)
**Specs**: 4 vCPU, 8 GB RAM, 80 GB SSD · **Cost**: 17 EUR/mo
**Specs**: Intel Core i7-6700 (4C/8T, 3.4 GHz), 64 GB RAM · acquired via Hetzner server auction
Sufficient for the application stack (Postgres, MinIO, OCR with `mem_limit: 12g`, backend, frontend, Caddy) on a CX32 today. Once the observability stack lands (Prometheus/Loki/Grafana/Alertmanager add ~2 GB) consider a CX42.
Comfortably handles the full application stack (Postgres, MinIO, OCR with `mem_limit: 12g`, backend, frontend, Caddy, full observability stack) with headroom to spare. The 64 GB RAM means OCR, Ollama inference, and the observability stack can all run concurrently without memory pressure.
### When to Upgrade: Hetzner CX42
### When to Reconsider Hardware
**Specs**: 8 vCPU, 16 GB RAM · **Cost**: 29 EUR/mo
Upgrade when:
- Observability stack adds memory pressure (Loki + Grafana with >30 days retention)
- OCR throughput needs scaling beyond a single-node Surya/Kraken setup
- Real user load profiled in Grafana shows response-time degradation
Never upgrade the VPS tier before profiling — most perceived performance issues are application bugs, not resource constraints.
- CPU is Skylake (2015) — single-threaded performance is the likely bottleneck before RAM
- Profile with Grafana dashboards before concluding hardware is the constraint
- Most perceived performance issues are application bugs (unindexed queries, N+1 loads), not resource limits
---
@@ -45,12 +40,11 @@ Never upgrade the VPS tier before profiling — most perceived performance issue
| Service | Cost |
|---|---|
| Hetzner CX32 VPS | 17.00 EUR |
| Hetzner dedicated server (Serverbörse, i7-6700, 64 GB RAM) | see invoice |
| Hetzner DNS | 0.00 EUR |
| Hetzner SMTP relay | ~1.00 EUR |
| **Total** | **~18 EUR/mo** |
MinIO data lives on the VPS disk (no Object Storage line item yet). The Hetzner OBS migration would add ~5 EUR/mo at ~200 GB.
MinIO data lives on the server disk (no Object Storage line item yet). The Hetzner OBS migration would add ~5 EUR/mo at ~200 GB.
Equivalent SaaS stack: 200300 EUR/mo.

View File

@@ -22,6 +22,9 @@
"error_forbidden": "Sie haben keine Berechtigung für diese Aktion.",
"error_csrf_token_missing": "Sitzungsfehler. Bitte laden Sie die Seite neu.",
"error_too_many_login_attempts": "Zu viele Anmeldeversuche. Bitte versuchen Sie es später erneut.",
"error_smart_search_unavailable": "Die intelligente Suche ist momentan nicht verfügbar. Bitte nutzen Sie die normale Suche.",
"error_smart_search_rate_limited": "Sie haben die Suchfunktion zu häufig genutzt. Bitte warten Sie eine Minute.",
"smart_search_keywords_not_applied": "Schlüsselwörter konnten bei dieser Suche nicht berücksichtigt werden.",
"error_validation_error": "Die Eingabe ist ungültig.",
"error_internal_error": "Ein unerwarteter Fehler ist aufgetreten.",
"nav_documents": "Dokumente",

View File

@@ -22,6 +22,9 @@
"error_forbidden": "You do not have permission for this action.",
"error_csrf_token_missing": "Session error. Please reload the page.",
"error_too_many_login_attempts": "Too many login attempts. Please try again later.",
"error_smart_search_unavailable": "The smart search is currently unavailable. Please use the regular search.",
"error_smart_search_rate_limited": "You have used the search function too frequently. Please wait a minute.",
"smart_search_keywords_not_applied": "Keywords could not be applied to this search.",
"error_validation_error": "The input is invalid.",
"error_internal_error": "An unexpected error occurred.",
"nav_documents": "Documents",

View File

@@ -22,6 +22,9 @@
"error_forbidden": "No tiene permiso para realizar esta acción.",
"error_csrf_token_missing": "Error de sesión. Recargue la página.",
"error_too_many_login_attempts": "Demasiados intentos. Por favor, inténtelo más tarde.",
"error_smart_search_unavailable": "La búsqueda inteligente no está disponible en este momento. Por favor, usa la búsqueda normal.",
"error_smart_search_rate_limited": "Has utilizado la función de búsqueda demasiadas veces. Por favor, espera un minuto.",
"smart_search_keywords_not_applied": "Las palabras clave no pudieron aplicarse a esta búsqueda.",
"error_validation_error": "La entrada no es válida.",
"error_internal_error": "Se ha producido un error inesperado.",
"nav_documents": "Documentos",

View File

@@ -84,22 +84,6 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/persons/{id}/confirm": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch: operations["confirmPerson"];
trace?: never;
};
"/api/documents/{id}": {
parameters: {
query?: never;
@@ -244,6 +228,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/search/nl": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post: operations["search"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/persons": {
parameters: {
query?: never;
@@ -708,6 +708,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/admin/backfill-titles": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post: operations["backfillTitles"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/admin/backfill-file-hashes": {
parameters: {
query?: never;
@@ -740,6 +756,22 @@ export interface paths {
patch: operations["patchFamilyMember"];
trace?: never;
};
"/api/persons/{id}/confirm": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch: operations["confirmPerson"];
trace?: never;
};
"/api/notifications/{id}/read": {
parameters: {
query?: never;
@@ -859,7 +891,7 @@ export interface paths {
path?: never;
cookie?: never;
};
get: operations["search"];
get: operations["search_1"];
put?: never;
post?: never;
delete?: never;
@@ -1323,7 +1355,7 @@ export interface paths {
path?: never;
cookie?: never;
};
get: operations["search_1"];
get: operations["search_2"];
put?: never;
post?: never;
delete?: never;
@@ -1651,7 +1683,7 @@ export interface components {
/** Format: int32 */
deathYear?: number;
/** Format: int32 */
generation?: number | null;
generation?: number;
};
Person: {
/** Format: uuid */
@@ -1668,7 +1700,7 @@ export interface components {
/** Format: int32 */
deathYear?: number;
/** Format: int32 */
generation?: number | null;
generation?: number;
familyMember: boolean;
sourceRef?: string;
provisional: boolean;
@@ -1803,6 +1835,98 @@ export interface components {
/** Format: uuid */
targetId: string;
};
NlSearchRequest: {
query: string;
};
Pageable: {
/** Format: int32 */
page?: number;
/** Format: int32 */
size?: number;
sort?: string[];
};
ActivityActorDTO: {
initials: string;
color: string;
name?: string;
};
DocumentListItem: {
/** Format: uuid */
id: string;
title: string;
originalFilename: string;
thumbnailUrl?: string;
/** Format: date */
documentDate?: string;
/** @enum {string} */
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
sender?: components["schemas"]["Person"];
receivers: components["schemas"]["Person"][];
tags: components["schemas"]["Tag"][];
archiveBox?: string;
archiveFolder?: string;
location?: string;
summary?: string;
/** Format: int32 */
completionPercentage: number;
contributors: components["schemas"]["ActivityActorDTO"][];
matchData: components["schemas"]["SearchMatchData"];
/** Format: date-time */
createdAt: string;
/** Format: date-time */
updatedAt: string;
};
DocumentSearchResult: {
items: components["schemas"]["DocumentListItem"][];
/** Format: int64 */
totalElements: number;
/** Format: int32 */
pageNumber: number;
/** Format: int32 */
pageSize: number;
/** Format: int32 */
totalPages: number;
/** Format: int64 */
undatedCount: number;
};
MatchOffset: {
/** Format: int32 */
start: number;
/** Format: int32 */
length: number;
};
NlQueryInterpretation: {
resolvedPersons: components["schemas"]["PersonHint"][];
ambiguousPersons: components["schemas"]["PersonHint"][];
/** Format: date */
dateFrom?: string;
/** Format: date */
dateTo?: string;
keywords: string[];
rawQuery: string;
keywordsApplied: boolean;
};
NlSearchResponse: {
result: components["schemas"]["DocumentSearchResult"];
interpretation: components["schemas"]["NlQueryInterpretation"];
};
PersonHint: {
/** Format: uuid */
id: string;
displayName: string;
};
SearchMatchData: {
transcriptionSnippet?: string;
titleOffsets: components["schemas"]["MatchOffset"][];
senderMatched: boolean;
matchedReceiverIds: string[];
matchedTagIds: string[];
snippetOffsets: components["schemas"]["MatchOffset"][];
summarySnippet?: string;
summaryOffsets: components["schemas"]["MatchOffset"][];
};
CreateRelationshipRequest: {
/** Format: uuid */
relatedPersonId: string;
@@ -2188,11 +2312,6 @@ export interface components {
/** Format: int64 */
transcriptionCount: number;
};
ActivityActorDTO: {
initials: string;
color: string;
name?: string;
};
TranscriptionQueueItemDTO: {
/** Format: uuid */
id: string;
@@ -2235,25 +2354,6 @@ export interface components {
/** Format: int64 */
totalStories: number;
};
PersonSummaryDTO: {
title?: string;
/** Format: uuid */
id?: string;
displayName?: string;
firstName?: string;
lastName?: string;
/** Format: int64 */
documentCount?: number;
/** Format: int32 */
birthYear?: number;
/** Format: int32 */
deathYear?: number;
alias?: string;
notes?: string;
personType?: string;
familyMember?: boolean;
provisional?: boolean;
};
PersonSearchResult: {
items: components["schemas"]["PersonSummaryDTO"][];
/** Format: int64 */
@@ -2265,6 +2365,25 @@ export interface components {
/** Format: int32 */
totalPages: number;
};
PersonSummaryDTO: {
title?: string;
/** Format: uuid */
id?: string;
displayName?: string;
firstName?: string;
lastName?: string;
/** Format: int64 */
documentCount?: number;
notes?: string;
/** Format: int32 */
birthYear?: number;
/** Format: int32 */
deathYear?: number;
provisional?: boolean;
alias?: string;
personType?: string;
familyMember?: boolean;
};
InferredRelationshipWithPersonDTO: {
person: components["schemas"]["PersonNodeDTO"];
label: string;
@@ -2280,7 +2399,7 @@ export interface components {
/** Format: int32 */
deathYear?: number;
/** Format: int32 */
generation?: number | null;
generation?: number;
familyMember: boolean;
};
InferredRelationshipDTO: {
@@ -2433,63 +2552,6 @@ export interface components {
/** Format: int32 */
totalPages?: number;
};
DocumentListItem: {
/** Format: uuid */
id: string;
title: string;
originalFilename: string;
thumbnailUrl?: string;
/** Format: date */
documentDate?: string;
/** @enum {string} */
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
sender?: components["schemas"]["Person"];
receivers: components["schemas"]["Person"][];
tags: components["schemas"]["Tag"][];
archiveBox?: string;
archiveFolder?: string;
location?: string;
summary?: string;
/** Format: int32 */
completionPercentage: number;
contributors: components["schemas"]["ActivityActorDTO"][];
matchData: components["schemas"]["SearchMatchData"];
/** Format: date-time */
createdAt: string;
/** Format: date-time */
updatedAt: string;
};
DocumentSearchResult: {
items: components["schemas"]["DocumentListItem"][];
/** Format: int64 */
totalElements: number;
/** Format: int32 */
pageNumber: number;
/** Format: int32 */
pageSize: number;
/** Format: int32 */
totalPages: number;
/** Format: int64 */
undatedCount: number;
};
MatchOffset: {
/** Format: int32 */
start: number;
/** Format: int32 */
length: number;
};
SearchMatchData: {
transcriptionSnippet?: string;
titleOffsets: components["schemas"]["MatchOffset"][];
senderMatched: boolean;
matchedReceiverIds: string[];
matchedTagIds: string[];
snippetOffsets: components["schemas"]["MatchOffset"][];
summarySnippet?: string;
summaryOffsets: components["schemas"]["MatchOffset"][];
};
IncompleteDocumentDTO: {
/** Format: uuid */
id: string;
@@ -2828,6 +2890,26 @@ export interface operations {
};
};
};
deletePerson: {
parameters: {
query?: never;
header?: never;
path: {
id: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description No Content */
204: {
headers: {
[name: string]: unknown;
};
content?: never;
};
};
};
getDocument: {
parameters: {
query?: never;
@@ -3154,6 +3236,32 @@ export interface operations {
};
};
};
search: {
parameters: {
query: {
pageable: components["schemas"]["Pageable"];
};
header?: never;
path?: never;
cookie?: never;
};
requestBody: {
content: {
"application/json": components["schemas"]["NlSearchRequest"];
};
};
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["NlSearchResponse"];
};
};
};
};
getPersons: {
parameters: {
query?: {
@@ -3184,48 +3292,6 @@ export interface operations {
};
};
};
confirmPerson: {
parameters: {
query?: never;
header?: never;
path: {
id: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["Person"];
};
};
};
};
deletePerson: {
parameters: {
query?: never;
header?: never;
path: {
id: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description No Content */
204: {
headers: {
[name: string]: unknown;
};
content?: never;
};
};
};
createPerson: {
parameters: {
query?: never;
@@ -4117,6 +4183,26 @@ export interface operations {
};
};
};
backfillTitles: {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["BackfillResult"];
};
};
};
};
backfillFileHashes: {
parameters: {
query?: never;
@@ -4163,6 +4249,28 @@ export interface operations {
};
};
};
confirmPerson: {
parameters: {
query?: never;
header?: never;
path: {
id: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["Person"];
};
};
};
};
markOneRead: {
parameters: {
query?: never;
@@ -4443,7 +4551,7 @@ export interface operations {
};
};
};
search: {
search_1: {
parameters: {
query?: {
q?: string;
@@ -5067,7 +5175,7 @@ export interface operations {
};
};
};
search_1: {
search_2: {
parameters: {
query?: {
q?: string;

View File

@@ -53,6 +53,8 @@ export type ErrorCode =
| 'FORBIDDEN'
| 'CSRF_TOKEN_MISSING'
| 'TOO_MANY_LOGIN_ATTEMPTS'
| 'SMART_SEARCH_UNAVAILABLE'
| 'SMART_SEARCH_RATE_LIMITED'
| 'VALIDATION_ERROR'
| 'BATCH_TOO_LARGE'
| 'BULK_EDIT_TOO_MANY_IDS'
@@ -178,6 +180,10 @@ export function getErrorMessage(code: ErrorCode | string | undefined): string {
return m.error_csrf_token_missing();
case 'TOO_MANY_LOGIN_ATTEMPTS':
return m.error_too_many_login_attempts();
case 'SMART_SEARCH_UNAVAILABLE':
return m.error_smart_search_unavailable();
case 'SMART_SEARCH_RATE_LIMITED':
return m.error_smart_search_rate_limited();
case 'VALIDATION_ERROR':
return m.error_validation_error();
case 'BATCH_TOO_LARGE':