Compare commits
221 Commits
944370dcfd
...
worktree-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9a9e1c4c40 | ||
|
|
4c620619d4 | ||
|
|
44baff9c9c | ||
|
|
4634da9865 | ||
|
|
79e4a3f9db | ||
|
|
70e8a6e6ad | ||
|
|
3af1095d13 | ||
|
|
8c835e957a | ||
|
|
fe8fcba7a7 | ||
|
|
e0c80ac193 | ||
|
|
005265b5a8 | ||
|
|
684c6e63de | ||
|
|
e27d52b9ee | ||
|
|
6f5497c7bf | ||
|
|
e0fac783e8 | ||
|
|
202ea85a58 | ||
|
|
7679596c70 | ||
|
|
3d5dcd1f18 | ||
|
|
52fca38f0f | ||
|
|
662a8f3e80 | ||
|
|
cbba95c3f8 | ||
|
|
3536ed884c | ||
|
|
5a939d9222 | ||
|
|
93e90424ab | ||
|
|
e8f3004c4f | ||
|
|
9637ebbca2 | ||
|
|
df10a42069 | ||
|
|
64120a30b5 | ||
|
|
25252fc709 | ||
|
|
1f379a161d | ||
|
|
c0d034c85d | ||
|
|
ca93cde06e | ||
|
|
7629e35897 | ||
|
|
cd741b9f57 | ||
|
|
ddf378aaac | ||
|
|
20cfe41f21 | ||
|
|
43601a3770 | ||
|
|
6603bc5333 | ||
|
|
6753d115f9 | ||
|
|
73dd6c80fa | ||
|
|
9ade36dd3b | ||
|
|
378da60ae8 | ||
|
|
6d267f2269 | ||
|
|
ff76a3784f | ||
|
|
534665459f | ||
|
|
fd792f6d78 | ||
|
|
bafbf609eb | ||
|
|
2710f2e233 | ||
|
|
80f6468d52 | ||
|
|
a58378e8f0 | ||
|
|
d000170f52 | ||
|
|
d1ed9c022f | ||
|
|
1e5e8e43e8 | ||
|
|
8c198f22be | ||
|
|
6fd05e08d8 | ||
|
|
ab469b744c | ||
|
|
f07527158c | ||
|
|
9f75de0350 | ||
|
|
8a9fbc6aef | ||
|
|
0336d07980 | ||
|
|
61256942e1 | ||
|
|
6aaf8ddb9e | ||
|
|
1b9707c6cd | ||
|
|
8353e71eed | ||
|
|
0693cfddd1 | ||
|
|
f656f7c1ff | ||
|
|
7316c51d4a | ||
|
|
cf457cb96f | ||
|
|
83e0afb466 | ||
|
|
12db7b3596 | ||
|
|
26b45f1c78 | ||
|
|
e6ce00035e | ||
|
|
b1f77bcfb6 | ||
|
|
4d1a5862d0 | ||
|
|
4e8a430dc3 | ||
|
|
e1d404609e | ||
|
|
b36addde22 | ||
|
|
456e019c3d | ||
|
|
d3bb08e7ff | ||
|
|
6703347468 | ||
|
|
1d55901388 | ||
|
|
0cd4882ef4 | ||
|
|
a85b22efcf | ||
|
|
7627589844 | ||
|
|
96a1afe09a | ||
|
|
c1b125bdb2 | ||
|
|
e4a9999f2f | ||
|
|
e48c794c12 | ||
|
|
add619d81d | ||
|
|
a46c3b416b | ||
|
|
7e8b90c8ee | ||
|
|
fc5c837d2c | ||
|
|
4f874bf4e9 | ||
|
|
28997fc391 | ||
|
|
003bc9b8cb | ||
|
|
485e13cfea | ||
|
|
439a386a37 | ||
|
|
23006a6562 | ||
|
|
c35f51d209 | ||
|
|
5297c70453 | ||
|
|
ad820955fd | ||
|
|
27b6d58632 | ||
|
|
4db2e97490 | ||
|
|
25b23843c9 | ||
|
|
ad067d2e0e | ||
|
|
29015ee864 | ||
|
|
b1b8505b93 | ||
|
|
abe860bec7 | ||
|
|
ec9d46da7a | ||
|
|
e562b3bbea | ||
|
|
e725910402 | ||
|
|
782a34e34b | ||
|
|
30f450b0d1 | ||
|
|
d4c0287e92 | ||
|
|
301cfc5c9e | ||
|
|
724c3881e4 | ||
|
|
fab2930ca8 | ||
|
|
d83707ec3b | ||
|
|
caea0d5633 | ||
|
|
2bf14aeab9 | ||
|
|
5b565d5271 | ||
|
|
df0f4879b8 | ||
|
|
98d081397e | ||
|
|
4e68b81bf7 | ||
|
|
985b31f71f | ||
|
|
3fb312b1c6 | ||
|
|
e2ec45f819 | ||
|
|
7d9526440a | ||
|
|
13bbfa7abd | ||
|
|
975223c972 | ||
|
|
403a043d51 | ||
|
|
e259908d6a | ||
|
|
7d37e610da | ||
|
|
9c1eb7608b | ||
|
|
9bba5e4a7a | ||
|
|
751a48b22c | ||
|
|
58a30a6e2e | ||
|
|
2430092e43 | ||
|
|
4a93543645 | ||
|
|
b453c13bae | ||
|
|
599c3977fb | ||
|
|
03e2615fa7 | ||
|
|
3db6a3bf8f | ||
|
|
0e06626eef | ||
|
|
a47564934d | ||
|
|
02fb16a0bd | ||
|
|
4757a174c9 | ||
|
|
75293c6aa8 | ||
|
|
4e9b13c0e4 | ||
|
|
ad27c1f757 | ||
|
|
0e30e5c570 | ||
|
|
a6a8552a48 | ||
|
|
b0d28c1e0b | ||
|
|
420c0e3e10 | ||
|
|
cb61e63b02 | ||
|
|
8eb321ccea | ||
|
|
e16b7402bd | ||
|
|
229c1b0539 | ||
|
|
f24c415b04 | ||
|
|
4c57a2262f | ||
|
|
b8e01f997d | ||
|
|
e8e57d2712 | ||
|
|
817835fd6a | ||
|
|
c361b3cd45 | ||
|
|
5c8034d298 | ||
|
|
8b1b070254 | ||
|
|
4ca1c967d2 | ||
|
|
24d9d975d1 | ||
|
|
8a1cc2d1f0 | ||
|
|
d5bf401085 | ||
|
|
4944918692 | ||
|
|
bf90427bfa | ||
|
|
50f554680c | ||
|
|
1dd162f1be | ||
|
|
ff7cfd4b1a | ||
|
|
88600d54cd | ||
|
|
654ac1478c | ||
|
|
3a4c2c6225 | ||
|
|
73f614bc3a | ||
|
|
6c5e5273bb | ||
|
|
a574d96351 | ||
|
|
246568301a | ||
|
|
aab4fe37ae | ||
|
|
4ebebe1e07 | ||
|
|
81224829a2 | ||
|
|
7cc2ddc6ad | ||
|
|
da3067150d | ||
|
|
10249c33be | ||
|
|
9c12f62345 | ||
|
|
e5784caa9d | ||
|
|
4583ee2c4d | ||
|
|
0a7b4fa265 | ||
|
|
a3858b6c80 | ||
|
|
9f5d7b8570 | ||
|
|
f6da95014e | ||
|
|
7a655ce6f4 | ||
|
|
3b594c0b0b | ||
|
|
2e44cab614 | ||
|
|
4c2f036de0 | ||
|
|
dcb57ffacd | ||
|
|
1c961619f1 | ||
|
|
2cc43c3c44 | ||
|
|
6c4d10d12f | ||
|
|
2cdb48f4a4 | ||
|
|
6be7413ba4 | ||
|
|
33aeefbb5b | ||
|
|
4bbdd33344 | ||
|
|
f4f853be8b | ||
|
|
44b5934fa7 | ||
|
|
78cc537f0e | ||
|
|
fc69758a92 | ||
|
|
f55efda0d2 | ||
|
|
77eddfc599 | ||
|
|
a76999c3d4 | ||
|
|
6d4aa8bd5c | ||
|
|
1fc74f8892 | ||
|
|
29ea27319a | ||
|
|
16f1fe7616 | ||
|
|
5ea47d4ec7 | ||
|
|
2f1538754e | ||
|
|
138bf446e4 |
@@ -154,9 +154,9 @@ Schedule monthly automated restore tests. If the restore fails, the backup is wo
|
||||
```
|
||||
Every alert needs: description, severity, likely cause, resolution steps, escalation path.
|
||||
|
||||
3. **Upgrading VPS tier before profiling**
|
||||
3. **Upgrading hardware before profiling**
|
||||
```
|
||||
# "The app feels slow" → upgrade from CX32 to CX42
|
||||
# "The app feels slow" → order more RAM / a faster CPU
|
||||
# Actual cause: unindexed query scanning 100k rows
|
||||
```
|
||||
Profile with Grafana dashboards first. Most perceived performance issues are application bugs, not resource constraints.
|
||||
@@ -404,8 +404,8 @@ Hetzner Object Storage (S3-compatible, replaces MinIO in prod)
|
||||
Prometheus + Loki + Alertmanager
|
||||
```
|
||||
|
||||
### Monthly Cost: ~23 EUR
|
||||
CX32 VPS (4 vCPU, 8GB RAM): 17 EUR · Object Storage (~200GB): 5 EUR · SMTP relay: ~1 EUR
|
||||
### Monthly Cost: ~6 EUR (excl. server)
|
||||
Hetzner dedicated server (Serverbörse, i7-6700, 64 GB RAM): see invoice · Object Storage (~200GB): 5 EUR · SMTP relay: ~1 EUR
|
||||
|
||||
### Reference Documentation
|
||||
- Full CI workflow, Gitea vs GitHub differences: `docs/infrastructure/ci-gitea.md`
|
||||
|
||||
19
.env.example
19
.env.example
@@ -72,6 +72,25 @@ VITE_SENTRY_DSN=
|
||||
# Sentry/GlitchTip auth token for source map upload at build time (optional)
|
||||
SENTRY_AUTH_TOKEN=
|
||||
|
||||
# NL search — Ollama LLM inference
|
||||
# Leave APP_OLLAMA_BASE_URL empty to disable NL search (safe default for CX32 / CI).
|
||||
# Set to http://ollama:11434 to enable. Requires CX42 (16 GB RAM) to run alongside OCR.
|
||||
APP_OLLAMA_BASE_URL=http://ollama:11434
|
||||
|
||||
# CPU limit: 4.0 is safe on both CX32 (4 vCPUs) and CX42 (8 vCPUs).
|
||||
# Raise to 7.5 on CX42 for full throughput.
|
||||
OLLAMA_CPU_LIMIT=4.0
|
||||
|
||||
# Memory limit: requires CX42 (16 GB) to run alongside OCR.
|
||||
# Reduce or set APP_OLLAMA_BASE_URL= on smaller hosts.
|
||||
OLLAMA_MEM_LIMIT=8g
|
||||
|
||||
# Ollama API key — set on the Ollama service to restrict inference API access on archiv-net.
|
||||
# Generate with: openssl rand -hex 32
|
||||
# NOTE: Empirically verified that OLLAMA_API_KEY is NOT enforced in Ollama 0.6.5 or 0.30.6 (ADR-028 §7).
|
||||
# archiv-net network isolation is the only effective access control. Retained for forward compatibility.
|
||||
OLLAMA_API_KEY=
|
||||
|
||||
# Production SMTP — uncomment and fill in to send real emails instead of catching them
|
||||
# APP_BASE_URL=https://your-domain.example.com
|
||||
# MAIL_HOST=smtp.example.com
|
||||
|
||||
127
.gitea/actions/deploy-obs/action.yml
Normal file
127
.gitea/actions/deploy-obs/action.yml
Normal file
@@ -0,0 +1,127 @@
|
||||
name: Deploy observability stack
|
||||
description: >-
|
||||
Deploy observability configs + secrets to /opt/familienarchiv, validate the
|
||||
compose config, start the stack, and assert the five healthchecked services
|
||||
are healthy. Per-environment values arrive as inputs.
|
||||
|
||||
inputs:
|
||||
grafana_admin_password:
|
||||
description: Grafana admin password (secret)
|
||||
required: true
|
||||
grafana_db_password:
|
||||
description: Read-only grafana_reader DB role password (secret, issue #651)
|
||||
required: true
|
||||
glitchtip_secret_key:
|
||||
description: GlitchTip Django secret key (secret)
|
||||
required: true
|
||||
postgres_password:
|
||||
description: PostgreSQL password for the environment (secret)
|
||||
required: true
|
||||
postgres_host:
|
||||
description: >-
|
||||
Compose project + service hostname, e.g. archiv-staging-db-1. Derived
|
||||
from the Compose project name and service name — a project rename
|
||||
requires updating the caller's value. Plain input, not a secret.
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Deploy observability configs
|
||||
shell: bash
|
||||
# Copies the compose file and config tree from the workspace checkout
|
||||
# into /opt/familienarchiv/ — the permanent location that persists
|
||||
# between CI runs. Containers started in the next step bind-mount
|
||||
# from there, so a future workspace wipe cannot corrupt a running
|
||||
# config file.
|
||||
#
|
||||
# obs-secrets.env is written fresh from Gitea secrets on every run so
|
||||
# Gitea is always the single source of truth for secret rotation.
|
||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
||||
#
|
||||
# secrets.* is NOT available inside a composite action, so the values
|
||||
# arrive as inputs mapped to env: below and are referenced as $VAR in
|
||||
# the heredoc. The delimiter MUST stay unquoted (<<EOF, not <<'EOF') so
|
||||
# the shell expands $VAR — a quoted delimiter would write the literal
|
||||
# string "$GRAFANA_ADMIN_PASSWORD" and `config --quiet` would still pass
|
||||
# (the var is present, just wrong). Do not stage these into intermediate
|
||||
# variables either, or Gitea log masking can be lost.
|
||||
env:
|
||||
GRAFANA_ADMIN_PASSWORD: ${{ inputs.grafana_admin_password }}
|
||||
GRAFANA_DB_PASSWORD: ${{ inputs.grafana_db_password }}
|
||||
GLITCHTIP_SECRET_KEY: ${{ inputs.glitchtip_secret_key }}
|
||||
POSTGRES_PASSWORD: ${{ inputs.postgres_password }}
|
||||
POSTGRES_HOST: ${{ inputs.postgres_host }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
rm -rf /opt/familienarchiv/infra/observability
|
||||
mkdir -p /opt/familienarchiv/infra/observability
|
||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
cat > /opt/familienarchiv/obs-secrets.env <<EOF
|
||||
GRAFANA_ADMIN_PASSWORD=$GRAFANA_ADMIN_PASSWORD
|
||||
GRAFANA_DB_PASSWORD=$GRAFANA_DB_PASSWORD
|
||||
GLITCHTIP_SECRET_KEY=$GLITCHTIP_SECRET_KEY
|
||||
POSTGRES_PASSWORD=$POSTGRES_PASSWORD
|
||||
POSTGRES_HOST=$POSTGRES_HOST
|
||||
EOF
|
||||
# Five-key non-empty guard: a bare presence check matches an empty
|
||||
# `KEY=` line, so assert each key has a value. Fail loudly on any
|
||||
# missing/empty key rather than starting the stack with broken auth.
|
||||
for key in GRAFANA_ADMIN_PASSWORD GRAFANA_DB_PASSWORD GLITCHTIP_SECRET_KEY POSTGRES_PASSWORD POSTGRES_HOST; do
|
||||
grep -Eq "^${key}=.+" /opt/familienarchiv/obs-secrets.env \
|
||||
|| { echo "::error::obs-secrets.env missing or empty: ${key}"; exit 1; }
|
||||
done
|
||||
# chmod 600 MUST be the final operation: the ordering is the security
|
||||
# property — there is no window where the file is world-readable.
|
||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
||||
|
||||
- name: Validate observability compose config
|
||||
shell: bash
|
||||
# Dry-run: resolves all variable substitutions and reports any missing
|
||||
# required keys before containers start. Catches undefined variables and
|
||||
# YAML errors in config files updated by the previous step.
|
||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
||||
# second (CI-written secrets). Later files win on duplicate keys. POSTGRES_HOST
|
||||
# is environment-specific and supplied only by obs-secrets.env — obs.env
|
||||
# documents it but deliberately does not set a value.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
config --quiet
|
||||
|
||||
- name: Start observability stack
|
||||
shell: bash
|
||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
||||
# that survive workspace wipes between runs (see ADR-016).
|
||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
||||
# obs-secrets.env second — later file wins on duplicate keys.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Assert observability stack health
|
||||
shell: bash
|
||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
||||
# This step explicitly asserts the five healthchecked critical services are
|
||||
# healthy before the smoke test proceeds.
|
||||
run: |
|
||||
set -e
|
||||
unhealthy=""
|
||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
||||
if [ "$status" != "healthy" ]; then
|
||||
echo "::error::$svc is not healthy (status: $status)"
|
||||
unhealthy="$unhealthy $svc"
|
||||
fi
|
||||
done
|
||||
[ -z "$unhealthy" ] || exit 1
|
||||
echo "All critical observability services are healthy"
|
||||
41
.gitea/actions/reload-caddy/action.yml
Normal file
41
.gitea/actions/reload-caddy/action.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Reload Caddy
|
||||
description: >-
|
||||
Reload the host Caddy service from a DooD job container via a privileged
|
||||
sibling container and nsenter. No inputs.
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Reload Caddy
|
||||
shell: bash
|
||||
# Apply any committed Caddyfile changes before smoke-testing the
|
||||
# public surface. Without this step, a Caddyfile edit lands in the
|
||||
# repo but Caddy keeps serving the previous config until someone
|
||||
# reloads it manually — the smoke test would then catch a stale
|
||||
# header or a still-proxied /actuator route rather than confirming
|
||||
# the current config is live.
|
||||
#
|
||||
# The runner executes job steps inside Docker containers (DooD).
|
||||
# `systemctl` is not present in container images and cannot reach
|
||||
# the host's systemd directly. We use the Docker socket (mounted
|
||||
# into every job container via runner-config.yaml) to spin up a
|
||||
# privileged sibling container in the host PID namespace; nsenter
|
||||
# then enters the host's namespaces so systemctl talks to the real
|
||||
# host systemd daemon. No sudoers entry is required — the Docker
|
||||
# socket already grants root-equivalent host access.
|
||||
#
|
||||
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
||||
# tooling, and the digest is pinned so any upstream change requires
|
||||
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
||||
# at run time; apk add takes ~1 s on the warm VPS cache.
|
||||
#
|
||||
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
||||
# config in-process without dropping TLS connections. `restart`
|
||||
# would briefly stop the service, losing in-flight requests.
|
||||
#
|
||||
# If Caddy is not running this step fails fast before the smoke test
|
||||
# issues a misleading "port 443 refused" error.
|
||||
run: |
|
||||
docker run --rm --privileged --pid=host \
|
||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||
58
.gitea/actions/smoke-test/action.yml
Normal file
58
.gitea/actions/smoke-test/action.yml
Normal file
@@ -0,0 +1,58 @@
|
||||
name: Smoke test
|
||||
description: >-
|
||||
Verify the deployed public surface (login reachable, HSTS pinned,
|
||||
Permissions-Policy present, /actuator blocked) against a given vhost.
|
||||
|
||||
inputs:
|
||||
host:
|
||||
description: Public vhost to smoke-test, e.g. staging.raddatz.cloud
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Smoke test deployed environment
|
||||
shell: bash
|
||||
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
||||
# public surface works. This step catches: Caddy not reloaded, HSTS
|
||||
# header dropped, /actuator block bypassed.
|
||||
#
|
||||
# --resolve pins the public host to the Docker bridge gateway IP
|
||||
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
||||
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
||||
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
||||
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
||||
# and is therefore reachable from the container via that IP.
|
||||
# SNI still uses the public hostname so the TLS cert validates correctly.
|
||||
#
|
||||
# --resolve is stored as a Bash array so "${RESOLVE[@]}" expands to two
|
||||
# separate arguments; a quoted string would pass the flag and its value
|
||||
# as one token and curl would reject it as an unknown option.
|
||||
#
|
||||
# Gateway detection reads /proc/net/route (always present, no package
|
||||
# required) instead of `ip route` to avoid a dependency on iproute2.
|
||||
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
||||
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
||||
env:
|
||||
HOST: ${{ inputs.host }}
|
||||
run: |
|
||||
set -e
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "::error::could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "::error::expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
@@ -108,6 +108,32 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Assert deploy-obs writes obs-secrets.env via an unquoted heredoc (#603)
|
||||
shell: bash
|
||||
run: |
|
||||
# Inside a composite action, secrets arrive as $VAR from env: (secrets.*
|
||||
# is unavailable there), so the obs-secrets.env heredoc MUST use an
|
||||
# unquoted delimiter (<<EOF) for $VAR to expand. A quoted delimiter
|
||||
# (<<'EOF') would write the literal string "$GRAFANA_ADMIN_PASSWORD",
|
||||
# and the action's five-key non-empty guard would STILL pass (the line
|
||||
# is present, just wrong). This guard enforces the invariant in CI so a
|
||||
# future re-quote cannot ship broken obs auth green. See ADR-029 / #603.
|
||||
action='.gitea/actions/deploy-obs/action.yml'
|
||||
quoted='obs-secrets\.env\s*<<-?\s*[\x27\x22]'
|
||||
# Self-test: the regex must catch a quoted delimiter and ignore the unquoted one.
|
||||
printf "obs-secrets.env <<'EOF'\n" | grep -qP "$quoted" \
|
||||
|| { echo "FAIL: guard self-test — regex missed the quoted <<'EOF' form"; exit 1; }
|
||||
printf 'obs-secrets.env <<EOF\n' | grep -qvP "$quoted" \
|
||||
|| { echo "FAIL: guard self-test — regex wrongly flagged the unquoted <<EOF form"; exit 1; }
|
||||
# Positive: the unquoted heredoc must be present at all.
|
||||
grep -qP 'obs-secrets\.env\s*<<-?EOF\b' "$action" \
|
||||
|| { echo "::error::$action no longer writes obs-secrets.env via an unquoted <<EOF heredoc (ADR-029 / #603)"; exit 1; }
|
||||
# Negative: never a quoted delimiter on the obs-secrets.env heredoc.
|
||||
if grep -nP "$quoted" "$action"; then
|
||||
echo "::error::$action writes obs-secrets.env with a quoted heredoc delimiter — secrets would be written as literal \$VAR strings. Use unquoted <<EOF (ADR-029 / #603)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run unit and component tests with coverage
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
@@ -23,6 +23,11 @@ name: nightly
|
||||
# - host ports: backend 8081, frontend 3001
|
||||
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
||||
#
|
||||
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||
# release.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||
# only exists on disk after checkout.
|
||||
#
|
||||
# Required Gitea secrets:
|
||||
# STAGING_POSTGRES_PASSWORD
|
||||
# STAGING_MINIO_PASSWORD
|
||||
@@ -55,6 +60,8 @@ jobs:
|
||||
# for the same repo is within that boundary.
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# MUST be first: the composite actions below live under .gitea/actions/
|
||||
# and only exist on disk once the repo is checked out (ADR-029).
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Write staging env file
|
||||
@@ -92,6 +99,7 @@ jobs:
|
||||
# `compose config` renders both shorthand and longform mounts as
|
||||
# `target: /import` + `read_only: true`, so we assert against
|
||||
# the rendered form rather than the raw source YAML.
|
||||
# App-compose check (not obs), nightly-only — stays inline.
|
||||
run: |
|
||||
set -e
|
||||
docker compose \
|
||||
@@ -128,150 +136,21 @@ jobs:
|
||||
--profile staging \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Deploy observability configs
|
||||
# Copies the compose file and config tree from the workspace checkout
|
||||
# into /opt/familienarchiv/ — the permanent location that persists
|
||||
# between CI runs. Containers started in the next step bind-mount
|
||||
# from there, so a future workspace wipe cannot corrupt a running
|
||||
# config file.
|
||||
#
|
||||
# obs-secrets.env is written fresh from Gitea secrets on every run so
|
||||
# Gitea is always the single source of truth for secret rotation.
|
||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
||||
run: |
|
||||
rm -rf /opt/familienarchiv/infra/observability
|
||||
mkdir -p /opt/familienarchiv/infra/observability
|
||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||
POSTGRES_HOST=archiv-staging-db-1
|
||||
EOF
|
||||
# Note: POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
||||
# and service name (db). A project rename requires updating this value.
|
||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
||||
# POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
||||
# and service name (db). A project rename requires updating this value.
|
||||
- uses: ./.gitea/actions/deploy-obs
|
||||
with:
|
||||
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||
postgres_host: archiv-staging-db-1
|
||||
|
||||
- name: Validate observability compose config
|
||||
# Dry-run: resolves all variable substitutions and reports any missing
|
||||
# required keys before containers start. Catches undefined variables and
|
||||
# YAML errors in config files updated by the previous step.
|
||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
||||
# second (CI-written secrets). Later files win on duplicate keys, so
|
||||
# obs-secrets.env overrides POSTGRES_HOST set in obs.env.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
config --quiet
|
||||
- uses: ./.gitea/actions/reload-caddy
|
||||
|
||||
- name: Start observability stack
|
||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
||||
# that survive workspace wipes between nightly runs (see ADR-016).
|
||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
||||
# obs-secrets.env second — later file wins on duplicate keys.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Assert observability stack health
|
||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
||||
# This step explicitly asserts the five healthchecked critical services are
|
||||
# healthy before the smoke test proceeds.
|
||||
run: |
|
||||
set -e
|
||||
unhealthy=""
|
||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
||||
if [ "$status" != "healthy" ]; then
|
||||
echo "::error::$svc is not healthy (status: $status)"
|
||||
unhealthy="$unhealthy $svc"
|
||||
fi
|
||||
done
|
||||
[ -z "$unhealthy" ] || exit 1
|
||||
echo "All critical observability services are healthy"
|
||||
|
||||
- name: Reload Caddy
|
||||
# Apply any committed Caddyfile changes before smoke-testing the
|
||||
# public surface. Without this step, a Caddyfile edit lands in the
|
||||
# repo but Caddy keeps serving the previous config until someone
|
||||
# reloads it manually — the smoke test would then catch a stale
|
||||
# header or a still-proxied /actuator route rather than confirming
|
||||
# the current config is live.
|
||||
#
|
||||
# The runner executes job steps inside Docker containers (DooD).
|
||||
# `systemctl` is not present in container images and cannot reach
|
||||
# the host's systemd directly. We use the Docker socket (mounted
|
||||
# into every job container via runner-config.yaml) to spin up a
|
||||
# privileged sibling container in the host PID namespace; nsenter
|
||||
# then enters the host's namespaces so systemctl talks to the real
|
||||
# host systemd daemon. No sudoers entry is required — the Docker
|
||||
# socket already grants root-equivalent host access.
|
||||
#
|
||||
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
||||
# tooling, and the digest is pinned so any upstream change requires
|
||||
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
||||
# at run time; apk add takes ~1 s on the warm VPS cache.
|
||||
#
|
||||
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
||||
# config in-process without dropping TLS connections. `restart`
|
||||
# would briefly stop the service, losing in-flight requests.
|
||||
#
|
||||
# If Caddy is not running this step fails fast before the smoke test
|
||||
# issues a misleading "port 443 refused" error.
|
||||
run: |
|
||||
docker run --rm --privileged --pid=host \
|
||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||
|
||||
- name: Smoke test deployed environment
|
||||
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
||||
# public surface works. This step catches: Caddy not reloaded, HSTS
|
||||
# header dropped, /actuator block bypassed.
|
||||
#
|
||||
# --resolve pins staging.raddatz.cloud to the Docker bridge gateway IP
|
||||
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
||||
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
||||
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
||||
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
||||
# and is therefore reachable from the container via that IP.
|
||||
# SNI still uses the public hostname so the TLS cert validates correctly.
|
||||
#
|
||||
# Gateway detection reads /proc/net/route (always present, no package
|
||||
# required) instead of `ip route` to avoid a dependency on iproute2.
|
||||
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
||||
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
||||
run: |
|
||||
set -e
|
||||
HOST="staging.raddatz.cloud"
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
- uses: ./.gitea/actions/smoke-test
|
||||
with:
|
||||
host: staging.raddatz.cloud
|
||||
|
||||
- name: Cleanup env file
|
||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||
|
||||
@@ -23,6 +23,11 @@ name: release
|
||||
# - host ports: backend 8080, frontend 3000
|
||||
# - profile: (none) — mailpit is excluded; real SMTP relay is used
|
||||
#
|
||||
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||
# nightly.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||
# only exists on disk after checkout.
|
||||
#
|
||||
# Required Gitea secrets:
|
||||
# PROD_POSTGRES_PASSWORD
|
||||
# PROD_MINIO_PASSWORD
|
||||
@@ -53,6 +58,8 @@ jobs:
|
||||
# advertised label of our single-tenant self-hosted runner.
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# MUST be first: the composite actions below live under .gitea/actions/
|
||||
# and only exist on disk once the repo is checked out (ADR-029).
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Write production env file
|
||||
@@ -100,117 +107,21 @@ jobs:
|
||||
--env-file .env.production \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Deploy observability configs
|
||||
# Mirrors the nightly approach: copies obs compose file and config tree
|
||||
# to /opt/familienarchiv/ (permanent path, survives workspace wipes — ADR-016),
|
||||
# then writes obs-secrets.env fresh from Gitea secrets.
|
||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
||||
run: |
|
||||
rm -rf /opt/familienarchiv/infra/observability
|
||||
mkdir -p /opt/familienarchiv/infra/observability
|
||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
POSTGRES_PASSWORD=${{ secrets.PROD_POSTGRES_PASSWORD }}
|
||||
POSTGRES_HOST=archiv-production-db-1
|
||||
EOF
|
||||
# Note: POSTGRES_HOST is derived from the Compose project name (archiv-production)
|
||||
# and service name (db). A project rename requires updating this value.
|
||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
||||
# POSTGRES_HOST is derived from the Compose project name (archiv-production)
|
||||
# and service name (db). A project rename requires updating this value.
|
||||
- uses: ./.gitea/actions/deploy-obs
|
||||
with:
|
||||
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
postgres_password: ${{ secrets.PROD_POSTGRES_PASSWORD }}
|
||||
postgres_host: archiv-production-db-1
|
||||
|
||||
- name: Validate observability compose config
|
||||
# Dry-run: resolves all variable substitutions and reports any missing
|
||||
# required keys before containers start. Catches undefined variables and
|
||||
# YAML errors in config files updated by the previous step.
|
||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
||||
# second (CI-written secrets). Later files win on duplicate keys, so
|
||||
# obs-secrets.env overrides POSTGRES_HOST set in obs.env.
|
||||
# Keep in sync with the equivalent step in nightly.yml (#603).
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
config --quiet
|
||||
- uses: ./.gitea/actions/reload-caddy
|
||||
|
||||
- name: Start observability stack
|
||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
||||
# that survive workspace wipes between runs (see ADR-016).
|
||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
||||
# obs-secrets.env second — later file wins on duplicate keys.
|
||||
# Keep in sync with the equivalent step in nightly.yml (#603).
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Assert observability stack health
|
||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
||||
# This step explicitly asserts the five healthchecked critical services are
|
||||
# healthy before the smoke test proceeds.
|
||||
# Keep in sync with the equivalent step in nightly.yml (#603).
|
||||
run: |
|
||||
set -e
|
||||
unhealthy=""
|
||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
||||
if [ "$status" != "healthy" ]; then
|
||||
echo "::error::$svc is not healthy (status: $status)"
|
||||
unhealthy="$unhealthy $svc"
|
||||
fi
|
||||
done
|
||||
[ -z "$unhealthy" ] || exit 1
|
||||
echo "All critical observability services are healthy"
|
||||
|
||||
- name: Reload Caddy
|
||||
# See nightly.yml — same rationale and mechanism: DooD job containers
|
||||
# cannot call systemctl directly; nsenter via a privileged sibling
|
||||
# container reaches the host systemd. Must run after deploy (so the
|
||||
# latest Caddyfile is on disk) and before the smoke test (so the
|
||||
# public surface reflects the current config). Alpine with pinned
|
||||
# digest; reload not restart — see nightly.yml for full rationale.
|
||||
run: |
|
||||
docker run --rm --privileged --pid=host \
|
||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||
|
||||
- name: Smoke test deployed environment
|
||||
# See nightly.yml — same three checks, against the prod vhost.
|
||||
# --resolve stored as a Bash array so "${RESOLVE[@]}" expands to two
|
||||
# separate arguments; a quoted string would pass the flag and its value
|
||||
# as one token and curl would reject it as an unknown option.
|
||||
# Gateway detection via /proc/net/route — no iproute2 dependency.
|
||||
# See nightly.yml for the full network topology explanation.
|
||||
run: |
|
||||
set -e
|
||||
HOST="archiv.raddatz.cloud"
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
- uses: ./.gitea/actions/smoke-test
|
||||
with:
|
||||
host: archiv.raddatz.cloud
|
||||
|
||||
- name: Cleanup env file
|
||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||
|
||||
@@ -92,6 +92,7 @@ backend/src/main/java/org/raddatz/familienarchiv/
|
||||
├── ocr/ OCR domain — OcrService, OcrBatchService, training
|
||||
├── person/ Person domain
|
||||
│ └── relationship/ PersonRelationship sub-domain
|
||||
├── search/ NL search domain — NlSearchController, NlQueryParserService, RestClientOllamaClient, NlSearchRateLimiter
|
||||
├── security/ SecurityConfig, Permission, @RequirePermission, PermissionAspect
|
||||
├── tag/ Tag domain
|
||||
└── user/ User domain — AppUser, UserGroup, UserService
|
||||
@@ -160,7 +161,7 @@ Input DTOs live flat in the domain package. Response types are the model entitie
|
||||
|
||||
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
|
||||
|
||||
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
|
||||
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded); `SMART_SEARCH_UNAVAILABLE` (HTTP 503 — Ollama inference service offline or timed out); `SMART_SEARCH_RATE_LIMITED` (HTTP 429 — user exceeded 5 NL search requests per minute).
|
||||
|
||||
### Security / Permissions
|
||||
|
||||
@@ -194,7 +195,6 @@ frontend/src/routes/
|
||||
│ ├── [id]/edit/ Person edit form
|
||||
│ ├── new/ Create person form
|
||||
│ └── review/ Triage view — confirm/rename/merge/delete provisional persons
|
||||
├── briefwechsel/ Bilateral conversation timeline (Briefwechsel)
|
||||
├── aktivitaeten/ Unified activity feed (Chronik)
|
||||
├── geschichten/ Stories — list, [id], [id]/edit, new
|
||||
├── stammbaum/ Family tree (Stammbaum)
|
||||
@@ -269,7 +269,7 @@ Back button pattern — use the shared `<BackButton>` component from `$lib/share
|
||||
|
||||
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
|
||||
|
||||
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
|
||||
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded); `SMART_SEARCH_UNAVAILABLE` (HTTP 503 — Ollama inference service offline or timed out); `SMART_SEARCH_RATE_LIMITED` (HTTP 429 — user exceeded 5 NL search requests per minute).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -28,4 +28,18 @@ Authorization: Basic Gast_User gast
|
||||
###Groups
|
||||
#GET
|
||||
GET http://localhost:8080/api/admin/tags
|
||||
Authorization: Basic admin admin123
|
||||
Authorization: Basic admin admin123
|
||||
|
||||
### One-time backfill: re-sync already-stale auto-titles (#726)
|
||||
# RUNBOOK: a one-shot ADMIN maintenance call, NOT part of normal operation. Run it ONCE
|
||||
# after deploying #726 to clean the existing backlog of stale titles (e.g. a title still
|
||||
# showing "2028" after the date was corrected to "1928"). It is synchronous and idempotent
|
||||
# — a second run returns {"count": 0} and writes nothing. Hit the backend DIRECTLY on
|
||||
# port 8080 (NOT through the SvelteKit proxy) so the sweep can't trip the proxy timeout.
|
||||
# Returns {"count": <documents rewritten>}.
|
||||
POST http://localhost:8080/api/admin/backfill-titles
|
||||
Authorization: Basic admin admin123
|
||||
|
||||
### NEGATIV-TEST: ein Nicht-Admin darf den Backfill NICHT auslösen -> 403 Forbidden
|
||||
POST http://localhost:8080/api/admin/backfill-titles
|
||||
Authorization: Basic Gast_User gast
|
||||
@@ -41,6 +41,27 @@
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
<!-- Force WireMock's ee10 Jetty transitive deps to match Spring Boot's 12.1.8 core -->
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.ee10</groupId>
|
||||
<artifactId>jetty-ee10-servlet</artifactId>
|
||||
<version>12.1.8</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.ee10</groupId>
|
||||
<artifactId>jetty-ee10-servlets</artifactId>
|
||||
<version>12.1.8</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.ee10</groupId>
|
||||
<artifactId>jetty-ee10-webapp</artifactId>
|
||||
<version>12.1.8</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-ee</artifactId>
|
||||
<version>12.1.8</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
<dependencies>
|
||||
@@ -137,6 +158,12 @@
|
||||
<artifactId>archunit-junit5</artifactId>
|
||||
<version>1.3.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.wiremock</groupId>
|
||||
<artifactId>wiremock-jetty12</artifactId>
|
||||
<version>3.9.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- Excel Bearbeitung (Apache POI) -->
|
||||
<dependency>
|
||||
|
||||
@@ -177,6 +177,13 @@ public class Document {
|
||||
@Builder.Default
|
||||
private Set<TrainingLabel> trainingLabels = new HashSet<>();
|
||||
|
||||
// Not persisted — computed per detail fetch so read-only users can tell at first
|
||||
// paint whether there is a transcription to read (DocumentService.getDocumentById).
|
||||
@Transient
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private boolean hasTranscription = false;
|
||||
|
||||
// The `?v={thumbnailGeneratedAt}` cache-buster is load-bearing: the thumbnail
|
||||
// endpoint sends `Cache-Control: private, max-age=31536000, immutable`
|
||||
// (DocumentController.getDocumentThumbnail). `immutable` is only safe because
|
||||
|
||||
@@ -3,7 +3,6 @@ package org.raddatz.familienarchiv.document;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -47,9 +46,7 @@ import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||
import org.raddatz.familienarchiv.filestorage.FileService;
|
||||
import org.raddatz.familienarchiv.user.UserService;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.security.core.Authentication;
|
||||
import org.springframework.http.CacheControl;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@@ -138,7 +135,7 @@ public class DocumentController {
|
||||
// --- METADATA ---
|
||||
@GetMapping("/{id}")
|
||||
public Document getDocument(@PathVariable UUID id) {
|
||||
return documentService.getDocumentById(id);
|
||||
return documentService.getDocumentDetail(id);
|
||||
}
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
|
||||
@@ -316,7 +313,8 @@ public class DocumentController {
|
||||
@RequestParam(required = false) Boolean undated,
|
||||
Authentication authentication) {
|
||||
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
|
||||
List<UUID> ids = documentService.findIdsForFilter(q, from, to, senderId, receiverId, tags, tagQ, status, operator, Boolean.TRUE.equals(undated));
|
||||
SearchFilters filters = new SearchFilters(q, from, to, senderId, receiverId, tags, tagQ, status, operator, Boolean.TRUE.equals(undated));
|
||||
List<UUID> ids = documentService.findIdsForFilter(filters);
|
||||
if (ids.size() > BULK_EDIT_FILTER_MAX_IDS) {
|
||||
throw DomainException.badRequest(ErrorCode.BULK_EDIT_TOO_MANY_IDS,
|
||||
"Filter matches " + ids.size() + " documents — refine filter (max " + BULK_EDIT_FILTER_MAX_IDS + ")");
|
||||
@@ -388,8 +386,9 @@ public class DocumentController {
|
||||
// tagOp is a raw String at the HTTP boundary; any value other than "OR" (case-insensitive)
|
||||
// defaults to AND, which matches the frontend default and keeps old clients working.
|
||||
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
|
||||
SearchFilters filters = new SearchFilters(q, from, to, senderId, receiverId, tags, tagQ, status, operator, Boolean.TRUE.equals(undated));
|
||||
Pageable pageable = PageRequest.of(page, size);
|
||||
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator, Boolean.TRUE.equals(undated), pageable));
|
||||
return ResponseEntity.ok(documentService.searchDocuments(filters, sort, dir, pageable));
|
||||
}
|
||||
|
||||
@GetMapping(value = "/density", produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
@@ -404,9 +403,7 @@ public class DocumentController {
|
||||
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
|
||||
DocumentDensityResult result = documentService.getDensity(
|
||||
new DensityFilters(q, senderId, receiverId, tags, tagQ, status, operator));
|
||||
return ResponseEntity.ok()
|
||||
.cacheControl(CacheControl.maxAge(5, TimeUnit.MINUTES).cachePrivate())
|
||||
.body(result);
|
||||
return ResponseEntity.ok(result);
|
||||
}
|
||||
|
||||
// --- TRAINING LABELS ---
|
||||
@@ -445,17 +442,6 @@ public class DocumentController {
|
||||
return documentVersionService.getVersion(id, versionId);
|
||||
}
|
||||
|
||||
@GetMapping("/conversation")
|
||||
public List<Document> getConversation(
|
||||
@RequestParam UUID senderId,
|
||||
@RequestParam(required = false) UUID receiverId,
|
||||
@RequestParam(required = false) LocalDate from,
|
||||
@RequestParam(required = false) LocalDate to,
|
||||
@RequestParam(defaultValue = "DESC") String dir) {
|
||||
Sort sort = Sort.by(Sort.Direction.fromString(dir.toUpperCase()), "documentDate");
|
||||
return documentService.getConversationFiltered(senderId, receiverId, from, to, sort);
|
||||
}
|
||||
|
||||
private UUID requireUserId(Authentication authentication) {
|
||||
return SecurityUtils.requireUserId(authentication, userService);
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.data.repository.query.Param;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -58,6 +57,7 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
@EntityGraph("Document.full")
|
||||
List<Document> findByReceiversId(UUID receiverId);
|
||||
|
||||
|
||||
// Callers access only doc.getTags() to mutate the set — receivers/sender not touched; no graph needed.
|
||||
List<Document> findByTags_Id(UUID tagId);
|
||||
|
||||
@@ -81,32 +81,6 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
|
||||
Optional<Document> findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort);
|
||||
|
||||
@EntityGraph("Document.full")
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
"((d.sender.id = :person1 AND r.id = :person2) " +
|
||||
" OR " +
|
||||
" (d.sender.id = :person2 AND r.id = :person1)) " +
|
||||
"AND d.documentDate BETWEEN :from AND :to")
|
||||
List<Document> findConversation(
|
||||
@Param("person1") UUID person1,
|
||||
@Param("person2") UUID person2,
|
||||
@Param("from") LocalDate from,
|
||||
@Param("to") LocalDate to,
|
||||
Sort sort);
|
||||
|
||||
@EntityGraph("Document.full")
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"LEFT JOIN d.receivers r " +
|
||||
"WHERE (d.sender.id = :personId OR r.id = :personId) " +
|
||||
"AND d.documentDate BETWEEN :from AND :to")
|
||||
List<Document> findSinglePersonCorrespondence(
|
||||
@Param("personId") UUID personId,
|
||||
@Param("from") LocalDate from,
|
||||
@Param("to") LocalDate to,
|
||||
Sort sort);
|
||||
|
||||
@Query(nativeQuery = true, value = """
|
||||
SELECT d.id FROM documents d
|
||||
CROSS JOIN LATERAL (
|
||||
|
||||
@@ -32,6 +32,8 @@ import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import jakarta.persistence.criteria.JoinType;
|
||||
import jakarta.persistence.criteria.Predicate;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
@@ -68,6 +70,7 @@ import static org.raddatz.familienarchiv.document.DocumentSpecifications.*;
|
||||
public class DocumentService {
|
||||
|
||||
private final DocumentRepository documentRepository;
|
||||
private final DocumentTitleFactory documentTitleFactory;
|
||||
private final PersonService personService;
|
||||
private final FileService fileService;
|
||||
private final TagService tagService;
|
||||
@@ -137,8 +140,10 @@ public class DocumentService {
|
||||
* <p>Implementation note: groups in memory rather than via SQL GROUP BY
|
||||
* because the existing {@link Specification} predicates compose easily
|
||||
* with {@code findAll(spec)} and the archive size (≈5k docs) keeps this
|
||||
* well under the 200ms p95 target. Cache-Control: max-age=300 on the
|
||||
* controller layer absorbs repeated browse loads.
|
||||
* well under the 200ms p95 target. The controller sets no explicit
|
||||
* Cache-Control, so the response is served fresh on every load (issue
|
||||
* #709) — the recompute is imperceptible and stale month counts after an
|
||||
* edit would be misleading on an interactive chart.
|
||||
*
|
||||
* <p>Tracked in issue #481 for re-evaluation when {@code documents > 50k}
|
||||
* — at that scale move the aggregation into SQL (GROUP BY TO_CHAR(meta_date,
|
||||
@@ -167,11 +172,13 @@ public class DocumentService {
|
||||
/** Loads matching documents and projects to non-null {@link LocalDate}s. */
|
||||
private List<LocalDate> loadFilteredDates(DensityFilters filters, List<UUID> ftsIds) {
|
||||
boolean hasFts = ftsIds != null;
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasFts, ftsIds, null, null,
|
||||
filters.sender(), filters.receiver(),
|
||||
filters.tags(), filters.tagQ(),
|
||||
filters.status(), filters.tagOperator(), false);
|
||||
// Density and search keep separate filter records (DensityFilters has no
|
||||
// date/undated fields); adapt to SearchFilters here to reuse buildSearchSpec.
|
||||
// Date bounds stay null and undated=false — the density path never filters by date.
|
||||
SearchFilters searchFilters = new SearchFilters(
|
||||
filters.text(), null, null, filters.sender(), filters.receiver(),
|
||||
filters.tags(), filters.tagQ(), filters.status(), filters.tagOperator(), false);
|
||||
Specification<Document> spec = buildSearchSpec(hasFts, ftsIds, searchFilters);
|
||||
return documentRepository.findAll(spec).stream()
|
||||
.map(Document::getDocumentDate)
|
||||
.filter(Objects::nonNull)
|
||||
@@ -375,10 +382,17 @@ public class DocumentService {
|
||||
|
||||
DocumentStatus statusBefore = doc.getStatus();
|
||||
|
||||
// Auto-title sync (#726): capture the machine title from the CURRENTLY-persisted state
|
||||
// BEFORE any setter runs — the setters below overwrite date/location and applyDatePrecision
|
||||
// skips nulls, so the old state must be read first. The submitted title is the catalog
|
||||
// auto-title iff it equals this; only then does it follow date/location forward.
|
||||
String autoTitleBefore = documentTitleFactory.build(doc);
|
||||
|
||||
// 1. Einfache Felder Update
|
||||
doc.setTitle(dto.getTitle());
|
||||
doc.setTitle(resolveTitle(dto.getTitle(), autoTitleBefore, doc, dto));
|
||||
doc.setDocumentDate(dto.getDocumentDate());
|
||||
applyDatePrecision(doc, dto);
|
||||
validateDateRange(doc); // guard before any save (updateDocumentTags below persists)
|
||||
doc.setLocation(dto.getLocation());
|
||||
doc.setTranscription(dto.getTranscription());
|
||||
doc.setSummary(dto.getSummary());
|
||||
@@ -419,7 +433,11 @@ public class DocumentService {
|
||||
doc.setScriptType(dto.getScriptType());
|
||||
}
|
||||
|
||||
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde)
|
||||
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde).
|
||||
// NB (#726): this reassigns originalFilename to the uploaded file's name. The title's index
|
||||
// segment is originalFilename, so after a replace the stored title no longer matches
|
||||
// build(currentState) and the row is treated as manual — neither save-time nor backfill
|
||||
// rewrites it. Accepted fail-safe (ADR-031), and autoTitleBefore was already captured above.
|
||||
boolean fileReplaced = newFile != null && !newFile.isEmpty();
|
||||
if (fileReplaced) {
|
||||
FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename());
|
||||
@@ -448,21 +466,92 @@ public class DocumentService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the three date-precision fields only when the DTO carries them.
|
||||
* A null field means "not submitted" — overwriting the stored value with null
|
||||
* would fabricate a precision the user never chose, the exact dishonesty #666
|
||||
* exists to prevent. A row with a genuinely-unknown precision must keep it when
|
||||
* an unrelated edit (e.g. a location typo) is saved.
|
||||
* Decides the title to persist on an edit (#726). The submitted title is the catalog
|
||||
* auto-title only when it equals {@code autoBefore} (built from the stored state) — an exact
|
||||
* comparison with no heuristic, relying on the edit form round-tripping the stored title
|
||||
* verbatim when untouched. A machine title is rebuilt from the new state so a corrected
|
||||
* date/location flows into it; a hand-written or freshly-typed title is kept verbatim. A blank
|
||||
* submission is never persisted (title is always present) — it falls back to the rebuilt
|
||||
* auto-title, which always carries at least the index.
|
||||
*/
|
||||
private String resolveTitle(String submitted, String autoBefore, Document doc, DocumentUpdateDTO dto) {
|
||||
if (submitted == null || submitted.isBlank()) {
|
||||
return documentTitleFactory.build(projectedState(doc, dto));
|
||||
}
|
||||
if (!Objects.equals(submitted, autoBefore)) {
|
||||
return submitted;
|
||||
}
|
||||
return documentTitleFactory.build(projectedState(doc, dto));
|
||||
}
|
||||
|
||||
/**
|
||||
* The document state the regenerated title is built from. It is composed from the SAME
|
||||
* resolvers the real setters use — {@code documentDate}/{@code location} overwritten from the
|
||||
* DTO (a null value clears the field), precision/end/raw resolved skip-null via
|
||||
* {@link #effectivePrecision}/{@link #effectiveMetaDateEnd}/{@link #effectiveMetaDateRaw} — so
|
||||
* the projection cannot drift from {@link #updateDocument}. The index ({@code originalFilename})
|
||||
* is never touched by a metadata edit.
|
||||
*/
|
||||
private Document projectedState(Document doc, DocumentUpdateDTO dto) {
|
||||
return Document.builder()
|
||||
.originalFilename(doc.getOriginalFilename())
|
||||
.documentDate(dto.getDocumentDate())
|
||||
.location(dto.getLocation())
|
||||
.metaDatePrecision(effectivePrecision(doc, dto))
|
||||
.metaDateEnd(effectiveMetaDateEnd(doc, dto))
|
||||
.metaDateRaw(effectiveMetaDateRaw(doc, dto))
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the three date-precision fields skip-null: a null DTO field means "not submitted",
|
||||
* so the stored value is kept rather than overwritten with null — which would fabricate a
|
||||
* precision the user never chose, the exact dishonesty #666 exists to prevent. Expressed via
|
||||
* the shared {@code effective*} resolvers so {@link #projectedState} stays lock-step (writing
|
||||
* the stored value back when the DTO omits a field is a harmless no-op).
|
||||
*/
|
||||
private void applyDatePrecision(Document doc, DocumentUpdateDTO dto) {
|
||||
if (dto.getMetaDatePrecision() != null) {
|
||||
doc.setMetaDatePrecision(dto.getMetaDatePrecision());
|
||||
doc.setMetaDatePrecision(effectivePrecision(doc, dto));
|
||||
doc.setMetaDateEnd(effectiveMetaDateEnd(doc, dto));
|
||||
doc.setMetaDateRaw(effectiveMetaDateRaw(doc, dto));
|
||||
}
|
||||
|
||||
// Skip-null date-field resolution shared by applyDatePrecision (the real setters) and
|
||||
// projectedState (the title projection) — the single rule keeps them from diverging (#726).
|
||||
private static DatePrecision effectivePrecision(Document doc, DocumentUpdateDTO dto) {
|
||||
return dto.getMetaDatePrecision() != null ? dto.getMetaDatePrecision() : doc.getMetaDatePrecision();
|
||||
}
|
||||
|
||||
private static LocalDate effectiveMetaDateEnd(Document doc, DocumentUpdateDTO dto) {
|
||||
return dto.getMetaDateEnd() != null ? dto.getMetaDateEnd() : doc.getMetaDateEnd();
|
||||
}
|
||||
|
||||
private static String effectiveMetaDateRaw(Document doc, DocumentUpdateDTO dto) {
|
||||
return dto.getMetaDateRaw() != null ? dto.getMetaDateRaw() : doc.getMetaDateRaw();
|
||||
}
|
||||
|
||||
/**
|
||||
* Friendly guard for the two V69 date-range CHECK constraints, run before save so a
|
||||
* user date typo returns a clean 400 INVALID_DATE_RANGE instead of falling through to
|
||||
* the generic handler (HTTP 500 + Sentry + ERROR log). Validates the post-apply {@code doc}
|
||||
* state, not the DTO, because precision/end may have been carried over from the stored row
|
||||
* when the DTO field was null. The DB CHECK remains the backstop; this never weakens it.
|
||||
*/
|
||||
private void validateDateRange(Document doc) {
|
||||
// Mirrors chk_meta_date_end_after_start: end >= start, with null start allowed.
|
||||
// Use isBefore (equal dates are valid) — never !isAfter, which would contradict the DB's >=.
|
||||
if (doc.getMetaDatePrecision() == DatePrecision.RANGE
|
||||
&& doc.getDocumentDate() != null
|
||||
&& doc.getMetaDateEnd() != null
|
||||
&& doc.getMetaDateEnd().isBefore(doc.getDocumentDate())) {
|
||||
throw DomainException.badRequest(ErrorCode.INVALID_DATE_RANGE,
|
||||
"meta_date_end must not be before meta_date");
|
||||
}
|
||||
if (dto.getMetaDateEnd() != null) {
|
||||
doc.setMetaDateEnd(dto.getMetaDateEnd());
|
||||
}
|
||||
if (dto.getMetaDateRaw() != null) {
|
||||
doc.setMetaDateRaw(dto.getMetaDateRaw());
|
||||
// Mirrors chk_meta_date_end_only_for_range. API-only: the edit form clears the
|
||||
// end field off-RANGE, so this branch closes the same 500 class for direct clients.
|
||||
if (doc.getMetaDateEnd() != null && doc.getMetaDatePrecision() != DatePrecision.RANGE) {
|
||||
throw DomainException.badRequest(ErrorCode.INVALID_DATE_RANGE,
|
||||
"meta_date_end is only allowed when meta_date_precision is RANGE");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -500,18 +589,15 @@ public class DocumentService {
|
||||
* round-trip.
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public List<UUID> findIdsForFilter(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status, TagOperator tagOperator,
|
||||
boolean undated) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
public List<UUID> findIdsForFilter(SearchFilters filters) {
|
||||
boolean hasText = StringUtils.hasText(filters.text());
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(filters.text());
|
||||
if (rankedIds.isEmpty()) return List.of();
|
||||
}
|
||||
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator, undated);
|
||||
Specification<Document> spec = buildSearchSpec(hasText, rankedIds, filters);
|
||||
return documentRepository.findAll(spec).stream().map(Document::getId).toList();
|
||||
}
|
||||
|
||||
@@ -521,23 +607,18 @@ public class DocumentService {
|
||||
* (uncapped, ID-only). Caller does its own FTS short-circuit when the
|
||||
* full-text query returned no rows.
|
||||
*/
|
||||
private Specification<Document> buildSearchSpec(boolean hasText, List<UUID> ftsIds,
|
||||
LocalDate from, LocalDate to,
|
||||
UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ,
|
||||
DocumentStatus status, TagOperator tagOperator,
|
||||
boolean undated) {
|
||||
boolean useOrLogic = tagOperator == TagOperator.OR;
|
||||
List<Set<UUID>> expandedTagSets = tagService.expandTagNamesToDescendantIdSets(tags);
|
||||
private Specification<Document> buildSearchSpec(boolean hasText, List<UUID> ftsIds, SearchFilters filters) {
|
||||
boolean useOrLogic = filters.tagOperator() == TagOperator.OR;
|
||||
List<Set<UUID>> expandedTagSets = tagService.expandTagNamesToDescendantIdSets(filters.tags());
|
||||
Specification<Document> textSpec = hasText ? hasIds(ftsIds) : (root, query, cb) -> null;
|
||||
return Specification.where(textSpec)
|
||||
.and(isBetween(from, to))
|
||||
.and(hasSender(sender))
|
||||
.and(hasReceiver(receiver))
|
||||
.and(isBetween(filters.from(), filters.to()))
|
||||
.and(hasSender(filters.sender()))
|
||||
.and(hasReceiver(filters.receiver()))
|
||||
.and(hasTags(expandedTagSets, useOrLogic))
|
||||
.and(hasTagPartial(tagQ))
|
||||
.and(hasStatus(status))
|
||||
.and(undatedOnly(undated));
|
||||
.and(hasTagPartial(filters.tagQ()))
|
||||
.and(hasStatus(filters.status()))
|
||||
.and(undatedOnly(filters.undated()));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -666,8 +747,8 @@ public class DocumentService {
|
||||
}
|
||||
|
||||
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
||||
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, boolean undated, Pageable pageable) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
public DocumentSearchResult searchDocuments(SearchFilters filters, DocumentSort sort, String dir, Pageable pageable) {
|
||||
boolean hasText = StringUtils.hasText(filters.text());
|
||||
|
||||
// Pure-text RELEVANCE: push pagination + ts_rank ordering into SQL — skip
|
||||
// findAllMatchingIdsByFts entirely (ADR-008). This must run BEFORE any
|
||||
@@ -677,13 +758,13 @@ public class DocumentService {
|
||||
// no date/sender/receiver/tag/status filters, and undated documents are valid
|
||||
// FTS hits already folded into the ranked page, so there is no separate undated
|
||||
// count to report here.
|
||||
if (!undated && isPureTextRelevance(hasText, sort, from, to, sender, receiver, tags, tagQ, status)) {
|
||||
return relevanceSortedPageFromSql(text, pageable);
|
||||
if (!filters.undated() && isPureTextRelevance(hasText, sort, filters)) {
|
||||
return relevanceSortedPageFromSql(filters.text(), pageable);
|
||||
}
|
||||
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(filters.text());
|
||||
// FTS matched nothing → no results and, by definition, no undated matches either.
|
||||
if (rankedIds.isEmpty()) return DocumentSearchResult.of(List.of());
|
||||
}
|
||||
@@ -691,37 +772,32 @@ public class DocumentService {
|
||||
// Global undated count for the current filter (q/tags/sender/receiver/status),
|
||||
// forcing undatedOnly(true) and IGNORING the user's "Nur undatierte" toggle so
|
||||
// it never collapses to the page slice and never double-counts (issue #668).
|
||||
long undatedCount = countUndatedForFilter(hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator);
|
||||
long undatedCount = countUndatedForFilter(hasText, rankedIds, filters.withUndated(true));
|
||||
|
||||
return runSearch(text, hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, sort, dir, tagOperator, undated, pageable)
|
||||
return runSearch(hasText, rankedIds, filters, sort, dir, pageable)
|
||||
.withUndatedCount(undatedCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts every undated document (meta_date IS NULL) matching the active filter,
|
||||
* across all pages, independent of the undated toggle. Reuses {@link #buildSearchSpec}
|
||||
* with {@code undated=true} forced so the count tracks q/tags/sender/receiver/status.
|
||||
* A {@code from}/{@code to} range excludes undated rows by the collision rule (#668),
|
||||
* so the count is legitimately 0 inside a date range.
|
||||
* across all pages, independent of the undated toggle. The caller passes
|
||||
* {@code filters.withUndated(true)} so the count tracks q/tags/sender/receiver/status
|
||||
* regardless of the user's "Nur undatierte" toggle. A {@code from}/{@code to} range
|
||||
* excludes undated rows by the collision rule (#668), so the count is legitimately 0
|
||||
* inside a date range.
|
||||
*/
|
||||
private long countUndatedForFilter(boolean hasText, List<UUID> ftsIds,
|
||||
LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status, TagOperator tagOperator) {
|
||||
Specification<Document> undatedSpec = buildSearchSpec(
|
||||
hasText, ftsIds, from, to, sender, receiver, tags, tagQ, status, tagOperator, true);
|
||||
private long countUndatedForFilter(boolean hasText, List<UUID> ftsIds, SearchFilters filters) {
|
||||
Specification<Document> undatedSpec = buildSearchSpec(hasText, ftsIds, filters);
|
||||
return documentRepository.count(undatedSpec);
|
||||
}
|
||||
|
||||
/** The original search dispatch — produces the page slice + totals, sans undated count. */
|
||||
private DocumentSearchResult runSearch(String text, boolean hasText, List<UUID> rankedIds,
|
||||
LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status,
|
||||
DocumentSort sort, String dir, TagOperator tagOperator,
|
||||
boolean undated, Pageable pageable) {
|
||||
private DocumentSearchResult runSearch(boolean hasText, List<UUID> rankedIds, SearchFilters filters,
|
||||
DocumentSort sort, String dir, Pageable pageable) {
|
||||
// The pure-text RELEVANCE fast path is handled by the caller (searchDocuments)
|
||||
// before findAllMatchingIdsByFts runs, so it never reaches here (ADR-008).
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator, undated);
|
||||
Specification<Document> spec = buildSearchSpec(hasText, rankedIds, filters);
|
||||
String text = filters.text();
|
||||
|
||||
// SENDER and RECEIVER sorts load the full match set and slice in-memory.
|
||||
// JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
|
||||
@@ -755,12 +831,12 @@ public class DocumentService {
|
||||
return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
|
||||
}
|
||||
|
||||
private static boolean isPureTextRelevance(boolean hasText, DocumentSort sort,
|
||||
LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status) {
|
||||
private static boolean isPureTextRelevance(boolean hasText, DocumentSort sort, SearchFilters filters) {
|
||||
return hasText && (sort == null || sort == DocumentSort.RELEVANCE)
|
||||
&& from == null && to == null && sender == null && receiver == null
|
||||
&& (tags == null || tags.isEmpty()) && (tagQ == null || tagQ.isBlank()) && status == null;
|
||||
&& filters.from() == null && filters.to() == null
|
||||
&& filters.sender() == null && filters.receiver() == null
|
||||
&& (filters.tags() == null || filters.tags().isEmpty())
|
||||
&& (filters.tagQ() == null || filters.tagQ().isBlank()) && filters.status() == null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -901,22 +977,6 @@ public class DocumentService {
|
||||
.orElse("");
|
||||
}
|
||||
|
||||
// 2. SPEZIALITÄT: Der Schriftwechsel
|
||||
// Findet alle Briefe ZWISCHEN zwei Personen (egal wer Sender/Empfänger war)
|
||||
public List<Document> getConversation(UUID personA, UUID personB) {
|
||||
|
||||
// Fall 1: A schreibt an B
|
||||
Specification<Document> aToB = Specification.where(hasSender(personA)).and(hasReceiver(personB));
|
||||
|
||||
// Fall 2: B schreibt an A
|
||||
Specification<Document> bToA = Specification.where(hasSender(personB)).and(hasReceiver(personA));
|
||||
|
||||
// Wir wollen (A->B) ODER (B->A)
|
||||
Specification<Document> conversation = aToB.or(bToA);
|
||||
|
||||
return documentRepository.findAll(conversation, Sort.by(Sort.Direction.ASC, "documentDate"));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void updateScriptType(UUID documentId, ScriptType scriptType) {
|
||||
Document doc = getDocumentById(documentId);
|
||||
@@ -946,6 +1006,19 @@ public class DocumentService {
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a document for the detail view, additionally flagging whether it has any
|
||||
* transcription to read. Kept separate from {@link #getDocumentById} so the cheap
|
||||
* existence query only runs for the single-document detail endpoint, not for the
|
||||
* many internal callers that never read the flag.
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Document getDocumentDetail(UUID id) {
|
||||
Document doc = getDocumentById(id);
|
||||
doc.setHasTranscription(transcriptionBlockQueryService.hasBlocks(id));
|
||||
return doc;
|
||||
}
|
||||
|
||||
public List<Document> getDocumentsByIds(List<UUID> ids) {
|
||||
return documentRepository.findAllById(ids);
|
||||
}
|
||||
@@ -962,13 +1035,26 @@ public class DocumentService {
|
||||
return documentRepository.findByReceiversId(receiverId);
|
||||
}
|
||||
|
||||
public List<Document> getConversationFiltered(UUID senderId, UUID receiverId, LocalDate from, LocalDate to, Sort sort) {
|
||||
LocalDate dateFrom = (from != null) ? from : LocalDate.parse("0000-01-01");
|
||||
LocalDate dateTo = (to != null) ? to : LocalDate.now();
|
||||
if (receiverId == null) {
|
||||
return documentRepository.findSinglePersonCorrespondence(senderId, dateFrom, dateTo, sort);
|
||||
}
|
||||
return documentRepository.findConversation(senderId, receiverId, dateFrom, dateTo, sort);
|
||||
public DocumentSearchResult searchDocumentsByPersonId(UUID personId, LocalDate from, LocalDate to, Pageable pageable) {
|
||||
Person person = personService.getById(personId);
|
||||
Specification<Document> spec = buildPersonSpec(person, from, to);
|
||||
Page<Document> page = documentRepository.findAll(spec, pageable);
|
||||
List<DocumentListItem> items = enrichItems(page.getContent(), null);
|
||||
return DocumentSearchResult.paged(items, pageable, page.getTotalElements());
|
||||
}
|
||||
|
||||
private Specification<Document> buildPersonSpec(Person person, LocalDate from, LocalDate to) {
|
||||
return (root, query, cb) -> {
|
||||
if (query != null) query.distinct(true);
|
||||
var receiversJoin = root.join("receivers", JoinType.LEFT);
|
||||
var senderPredicate = cb.equal(root.get("sender"), person);
|
||||
var receiverPredicate = cb.equal(receiversJoin, person);
|
||||
var personPredicate = cb.or(senderPredicate, receiverPredicate);
|
||||
var predicates = new ArrayList<>(List.of(personPredicate));
|
||||
if (from != null) predicates.add(cb.greaterThanOrEqualTo(root.get("documentDate"), from));
|
||||
if (to != null) predicates.add(cb.lessThanOrEqualTo(root.get("documentDate"), to));
|
||||
return cb.and(predicates.toArray(new Predicate[0]));
|
||||
};
|
||||
}
|
||||
|
||||
public long getIncompleteCount() {
|
||||
@@ -1005,6 +1091,43 @@ public class DocumentService {
|
||||
tagService.delete(tagId);
|
||||
}
|
||||
|
||||
/**
|
||||
* One-time cleanup of already-stale auto-titles (#726, FR-003). For every document whose
|
||||
* stored title passes the {@link DocumentTitleBackfillMatcher} overwrite heuristic, rebuilds
|
||||
* the title from the row's current state and persists it only when it actually changed.
|
||||
* Idempotent: a second run rebuilds the same value and saves nothing. Hand-written prose is
|
||||
* left untouched.
|
||||
*
|
||||
* <p>Saves via {@code documentRepository.save} directly — it must NOT route through
|
||||
* {@link #updateDocument} (which versions every write), following the {@link #backfillFileHashes}
|
||||
* precedent: a mechanical rename must not snapshot the whole corpus into {@code document_versions}.
|
||||
*
|
||||
* @return the number of documents whose title was rewritten
|
||||
*/
|
||||
@Transactional
|
||||
public int backfillTitles() {
|
||||
List<Document> docs = documentRepository.findAll();
|
||||
int updated = 0;
|
||||
int skipped = 0;
|
||||
for (Document doc : docs) {
|
||||
if (!DocumentTitleBackfillMatcher.isOverwritable(
|
||||
doc.getTitle(), doc.getOriginalFilename(), doc.getLocation())) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
String rebuilt = documentTitleFactory.build(doc);
|
||||
if (rebuilt.equals(doc.getTitle())) {
|
||||
skipped++; // already correct — keep idempotent, no write
|
||||
continue;
|
||||
}
|
||||
doc.setTitle(rebuilt);
|
||||
documentRepository.save(doc); // direct save, no recordVersion (mechanical rename)
|
||||
updated++;
|
||||
}
|
||||
log.info("Title backfill complete: scanned={} updated={} skipped={}", docs.size(), updated, skipped);
|
||||
return updated;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public int backfillFileHashes() {
|
||||
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Heuristic overwrite test for the one-time title backfill (#726, FR-004): decides whether a
|
||||
* STORED title is a machine-generated auto-title (and so may be rebuilt from the row's current
|
||||
* state) versus hand-written prose (left untouched). Used ONLY by the backfill — save-time
|
||||
* regeneration uses an exact old-vs-new comparison instead, with no heuristic.
|
||||
*
|
||||
* <p>A stored title is overwritable iff, after stripping the literal {@code index} prefix:
|
||||
* <ol>
|
||||
* <li>it is exactly {@code {index}}, or</li>
|
||||
* <li>{@code {index} – {dateLabel}} with an optional trailing {@code – {location}} segment
|
||||
* (any location — a present, valid date label is itself strong evidence of a machine
|
||||
* title), or</li>
|
||||
* <li>{@code {index} – {location}} where the segment equals the document's current location
|
||||
* (no date label, so the segment must match the known location to be distinguished from
|
||||
* prose).</li>
|
||||
* </ol>
|
||||
*
|
||||
* <p>Security: the {@code index} is compared <em>literally</em> via {@link String#startsWith}
|
||||
* (never compiled into a regex) because {@code originalFilename} is user-controlled and may carry
|
||||
* regex metacharacters — an unquoted pattern would be a ReDoS / regex-injection vector
|
||||
* (CWE-1333 / CWE-625). The date-label sub-patterns use only bounded, non-nested quantifiers over
|
||||
* short tokens, so there is no catastrophic backtracking. Fail-closed: any null/blank index or
|
||||
* structural surprise returns {@code false}.
|
||||
*/
|
||||
final class DocumentTitleBackfillMatcher {
|
||||
|
||||
private static final String SEPARATOR = " – ";
|
||||
|
||||
// German month tokens derived from the SAME Locale.GERMAN formatters DocumentTitleFormatter
|
||||
// uses, so the matcher's accepted spellings cannot drift from what the factory emits (full
|
||||
// names "Januar"…"Dezember"; abbreviations "Jan."…"Dez." — note May/June/July/März carry no
|
||||
// period). Pattern.quote each so a "." in an abbreviation is literal, never a wildcard.
|
||||
private static final String FULL_MONTH = monthAlternation("MMMM");
|
||||
private static final String ABBR_MONTH = monthAlternation("MMM");
|
||||
private static final String SEASON = "(?:Frühling|Sommer|Herbst|Winter)";
|
||||
private static final String YEAR = "\\d{1,4}";
|
||||
private static final String DAY_NUM = "\\d{1,2}";
|
||||
|
||||
// One complete date label, anchored, optionally followed by a free-form trailing location
|
||||
// segment. Only bounded/non-nested quantifiers over short tokens plus a single trailing
|
||||
// ".+" → linear, no catastrophic backtracking (FR-004 ReDoS guard).
|
||||
private static final Pattern DATE_LABEL_WITH_OPTIONAL_LOCATION = Pattern.compile(
|
||||
"^(?:" + String.join("|",
|
||||
YEAR, // 1916
|
||||
"ca\\. " + YEAR, // ca. 1920
|
||||
FULL_MONTH + " " + YEAR, // Juni 1916
|
||||
DAY_NUM + "\\. " + FULL_MONTH + " " + YEAR, // 24. Dezember 1943
|
||||
SEASON + " " + YEAR, // Sommer 1916
|
||||
"Datum unbekannt",
|
||||
DAY_NUM + "\\.–" + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10.–11. Jan. 1917
|
||||
DAY_NUM + "\\. " + ABBR_MONTH + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Jan. – 2. Feb. 1917
|
||||
DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Dez. 1916 – 2. Jan. 1917
|
||||
DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10. Jan. 1917 (range end == start)
|
||||
"ab " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR) // ab 10. Jan. 1917
|
||||
+ ")(?: – .+)?$");
|
||||
|
||||
private DocumentTitleBackfillMatcher() {
|
||||
}
|
||||
|
||||
static boolean isOverwritable(String title, String index, String location) {
|
||||
if (title == null || index == null || index.isBlank()) {
|
||||
return false; // fail closed
|
||||
}
|
||||
if (!title.startsWith(index)) {
|
||||
return false; // index is matched LITERALLY, never as a regex
|
||||
}
|
||||
String tail = title.substring(index.length());
|
||||
if (tail.isEmpty()) {
|
||||
return true; // exactly {index}
|
||||
}
|
||||
if (!tail.startsWith(SEPARATOR)) {
|
||||
return false;
|
||||
}
|
||||
String body = tail.substring(SEPARATOR.length());
|
||||
if (DATE_LABEL_WITH_OPTIONAL_LOCATION.matcher(body).matches()) {
|
||||
return true; // {dateLabel} (+ optional trailing location)
|
||||
}
|
||||
// No date label: the lone segment must equal the document's current location to be
|
||||
// distinguished from hand-written prose.
|
||||
return location != null && !location.isBlank() && body.equals(location);
|
||||
}
|
||||
|
||||
private static String monthAlternation(String pattern) {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern, Locale.GERMAN);
|
||||
Set<String> tokens = new LinkedHashSet<>();
|
||||
for (int month = 1; month <= 12; month++) {
|
||||
tokens.add(formatter.format(LocalDate.of(2000, month, 15)));
|
||||
}
|
||||
return tokens.stream().map(Pattern::quote).collect(Collectors.joining("|", "(?:", ")"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* Single source of truth for the auto-generated document title
|
||||
* {@code {index} – {dateLabel} – {location}}.
|
||||
*
|
||||
* <p>The {@code document} package owns this formula; {@code importing} consumes it
|
||||
* (see ADR for issue #726). The leading {@code index} is the document's
|
||||
* {@code originalFilename}; the date label is the honest German label produced by
|
||||
* {@link DocumentTitleFormatter} (the Java half of the #666 date-label split); the
|
||||
* trailing location is the {@code meta_location} verbatim, omitted when blank.
|
||||
*/
|
||||
@Component
|
||||
public class DocumentTitleFactory {
|
||||
|
||||
static final String SEPARATOR = " – ";
|
||||
|
||||
/**
|
||||
* Composes the auto-title from the document's current state. The date segment is
|
||||
* dropped for UNKNOWN precision or a null date (the honest "no date" case); the
|
||||
* location segment is dropped when blank.
|
||||
*/
|
||||
public String build(Document doc) {
|
||||
// originalFilename is NOT NULL in production; guard only so a synthetic/partial entity
|
||||
// never trips StringBuilder(null) with an opaque NPE.
|
||||
StringBuilder title = new StringBuilder(doc.getOriginalFilename() == null ? "" : doc.getOriginalFilename());
|
||||
if (doc.getDocumentDate() != null && doc.getMetaDatePrecision() != DatePrecision.UNKNOWN) {
|
||||
title.append(SEPARATOR).append(DocumentTitleFormatter.formatTitleDate(
|
||||
doc.getDocumentDate(), doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(), doc.getMetaDateRaw()));
|
||||
}
|
||||
if (doc.getLocation() != null && !doc.getLocation().isBlank()) {
|
||||
title.append(SEPARATOR).append(doc.getLocation());
|
||||
}
|
||||
return title.toString();
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,4 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
@@ -0,0 +1,40 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.raddatz.familienarchiv.tag.TagOperator;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* The filter predicates honoured by {@link DocumentService#searchDocuments} and
|
||||
* {@link DocumentService#findIdsForFilter}. Sort, direction, and pagination are
|
||||
* deliberately excluded — they are not filter predicates, and {@code findIdsForFilter}
|
||||
* needs none of them; they are passed as separate arguments instead.
|
||||
*
|
||||
* Kept as a record so the ten values are passed as one named bundle instead of a
|
||||
* positional argument list where two UUIDs (sender vs. receiver) or two dates
|
||||
* (from vs. to) can be swapped by accident at the call site — a transposition that
|
||||
* compiles cleanly and silently returns the wrong rows.
|
||||
*
|
||||
* Sibling of {@link DensityFilters} (= these fields minus from/to/undated); kept
|
||||
* separate on purpose, so the density call path never reasons about date/undated
|
||||
* fields it deliberately excludes.
|
||||
*/
|
||||
public record SearchFilters(
|
||||
String text,
|
||||
LocalDate from,
|
||||
LocalDate to,
|
||||
UUID sender,
|
||||
UUID receiver,
|
||||
List<String> tags,
|
||||
String tagQ,
|
||||
DocumentStatus status,
|
||||
TagOperator tagOperator,
|
||||
boolean undated) {
|
||||
|
||||
/** Returns a copy with {@code undated} overridden — used by the undated-count path. */
|
||||
public SearchFilters withUndated(boolean undated) {
|
||||
return new SearchFilters(text, from, to, sender, receiver, tags, tagQ, status, tagOperator, undated);
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,10 @@ public class TranscriptionBlockQueryService {
|
||||
|
||||
private final TranscriptionBlockRepository blockRepository;
|
||||
|
||||
public boolean hasBlocks(UUID documentId) {
|
||||
return blockRepository.existsByDocumentId(documentId);
|
||||
}
|
||||
|
||||
public Map<UUID, Integer> getCompletionStats(List<UUID> documentIds) {
|
||||
if (documentIds.isEmpty()) return Map.of();
|
||||
Map<UUID, Integer> result = new HashMap<>();
|
||||
|
||||
@@ -43,6 +43,8 @@ public interface TranscriptionBlockRepository extends JpaRepository<Transcriptio
|
||||
|
||||
int countByDocumentId(UUID documentId);
|
||||
|
||||
boolean existsByDocumentId(UUID documentId);
|
||||
|
||||
@Query("""
|
||||
SELECT b FROM TranscriptionBlock b
|
||||
JOIN DocumentAnnotation a ON a.id = b.annotationId
|
||||
|
||||
@@ -78,4 +78,8 @@ public class DomainException extends RuntimeException {
|
||||
public static DomainException tooManyRequests(ErrorCode code, String message, long retryAfterSeconds) {
|
||||
return new DomainException(code, HttpStatus.TOO_MANY_REQUESTS, message, retryAfterSeconds);
|
||||
}
|
||||
|
||||
public static DomainException serviceUnavailable(ErrorCode code, String message) {
|
||||
return new DomainException(code, HttpStatus.SERVICE_UNAVAILABLE, message);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,8 @@ public enum ErrorCode {
|
||||
FILE_UPLOAD_FAILED,
|
||||
/** The uploaded file's content type is not supported (PDF/JPEG/PNG/TIFF only). 400 */
|
||||
UNSUPPORTED_FILE_TYPE,
|
||||
/** A RANGE date is invalid: meta_date_end is before meta_date, or an end date is set without RANGE precision. 400 */
|
||||
INVALID_DATE_RANGE,
|
||||
|
||||
// --- Users ---
|
||||
/** A user with the given ID or username does not exist. 404 */
|
||||
@@ -133,6 +135,12 @@ public enum ErrorCode {
|
||||
/** The merge target is a descendant of the source tag. 400 */
|
||||
TAG_MERGE_INVALID_TARGET,
|
||||
|
||||
// --- NL Search ---
|
||||
/** Ollama is unreachable or timed out. 503 */
|
||||
SMART_SEARCH_UNAVAILABLE,
|
||||
/** NL search rate limit exceeded (5 requests per user per minute). 429 */
|
||||
SMART_SEARCH_RATE_LIMITED,
|
||||
|
||||
// --- Generic ---
|
||||
/** Request validation failed (missing or malformed fields). 400 */
|
||||
VALIDATION_ERROR,
|
||||
|
||||
@@ -6,6 +6,7 @@ import io.sentry.Sentry;
|
||||
import jakarta.validation.ConstraintViolationException;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.dao.DataIntegrityViolationException;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.http.converter.HttpMessageNotReadableException;
|
||||
import org.springframework.web.bind.MethodArgumentNotValidException;
|
||||
@@ -64,6 +65,38 @@ public class GlobalExceptionHandler {
|
||||
.body(new ErrorResponse(ErrorCode.VALIDATION_ERROR, ex.getReason()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Backstop for any database integrity violation that slips past the explicit upstream
|
||||
* guards (e.g. a future constraint, or the import path emitting a bad range). Turns it into
|
||||
* a clean 400 instead of a 500 + Sentry alert. The known date-range cases are caught upstream
|
||||
* and never reach here; this only catches the unanticipated ones — so it logs the constraint
|
||||
* NAME at WARN to stay debuggable, without re-leaking SQL and without branching the response
|
||||
* on it (the response stays generic, which is the non-brittle part).
|
||||
*/
|
||||
@ExceptionHandler(DataIntegrityViolationException.class)
|
||||
public ResponseEntity<ErrorResponse> handleDataIntegrityViolation(DataIntegrityViolationException ex) {
|
||||
// Log the constraint NAME only — schema metadata, safe for Loki, and enough to tell which
|
||||
// constraint fired at 2am. Never pass `ex` / `ex.getMessage()`: those embed the SQL + the
|
||||
// offending values (CWE-209). No Sentry: an integrity violation is a 400, not a system fault.
|
||||
log.warn("Rejected a request that violated a database integrity constraint: {}", constraintNameOf(ex));
|
||||
return ResponseEntity.badRequest()
|
||||
.body(new ErrorResponse(ErrorCode.VALIDATION_ERROR, "The submitted data violated a database constraint"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the offending constraint's name from the cause chain, or {@code "unknown"}.
|
||||
* Reads only the name (a non-sensitive schema identifier) — never the SQL or the values.
|
||||
*/
|
||||
private static String constraintNameOf(Throwable ex) {
|
||||
for (Throwable t = ex; t != null && t != t.getCause(); t = t.getCause()) {
|
||||
if (t instanceof org.hibernate.exception.ConstraintViolationException cve
|
||||
&& cve.getConstraintName() != null) {
|
||||
return cve.getConstraintName();
|
||||
}
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
@ExceptionHandler(Exception.class)
|
||||
public ResponseEntity<ErrorResponse> handleGeneric(Exception ex) {
|
||||
Sentry.captureException(ex);
|
||||
|
||||
@@ -5,6 +5,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentTitleFactory;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
@@ -74,6 +75,7 @@ public class DocumentImporter {
|
||||
Pattern.compile("[A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF]{1,4}-+\\d+x?");
|
||||
|
||||
private final DocumentService documentService;
|
||||
private final DocumentTitleFactory documentTitleFactory;
|
||||
private final PersonService personService;
|
||||
private final TagService tagService;
|
||||
private final S3Client s3Client;
|
||||
@@ -181,7 +183,7 @@ public class DocumentImporter {
|
||||
applyAttribution(doc, row);
|
||||
applyDates(doc, row);
|
||||
applyAuthoritativeAssociations(doc, row);
|
||||
applyFileMetadata(doc, s3Key, contentType, status, index);
|
||||
applyFileMetadata(doc, s3Key, contentType, status);
|
||||
applyComputedFlags(doc);
|
||||
return doc;
|
||||
}
|
||||
@@ -217,14 +219,15 @@ public class DocumentImporter {
|
||||
attachTag(doc, row.get("tags"));
|
||||
}
|
||||
|
||||
// S3 key, content type, status, and the index-derived title.
|
||||
// S3 key, content type, status, and the index-derived title. The title formula lives in
|
||||
// the document package's DocumentTitleFactory (single source of truth, #726); by this point
|
||||
// applyDates has populated the date/location and originalFilename carries the index.
|
||||
private void applyFileMetadata(Document doc, String s3Key, String contentType,
|
||||
DocumentStatus status, String index) {
|
||||
DocumentStatus status) {
|
||||
doc.setStatus(status);
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation()));
|
||||
doc.setTitle(documentTitleFactory.build(doc));
|
||||
}
|
||||
|
||||
// metadataComplete: a document counts as fully described if any of the three "who/when"
|
||||
@@ -235,20 +238,6 @@ public class DocumentImporter {
|
||||
|| !doc.getReceivers().isEmpty());
|
||||
}
|
||||
|
||||
// The title carries the date at the HONEST precision (never a fabricated day) via the
|
||||
// shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating.
|
||||
private static String buildTitle(String index, LocalDate date, DatePrecision precision,
|
||||
LocalDate end, String raw, String location) {
|
||||
StringBuilder title = new StringBuilder(index);
|
||||
if (date != null && precision != DatePrecision.UNKNOWN) {
|
||||
title.append(" – ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw));
|
||||
}
|
||||
if (location != null && !location.isBlank()) {
|
||||
title.append(" – ").append(location);
|
||||
}
|
||||
return title.toString();
|
||||
}
|
||||
|
||||
// ─── attribution routing — register-first, always retain raw ─────────────────────
|
||||
|
||||
private Person resolveSender(String slug, String rawName) {
|
||||
|
||||
@@ -29,14 +29,36 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
// Stammbaum-Knoten: alle Personen mit family_member = true.
|
||||
List<Person> findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
||||
|
||||
// Lookup by full alias string, used during ODS mass import
|
||||
Optional<Person> findByAliasIgnoreCase(String alias);
|
||||
// Exact-case alias lookup — the first resolution step in findOrCreateByAlias.
|
||||
// Case-colliding aliases across persons (müller / Müller) are valid human labels, NOT
|
||||
// duplicates: source_ref is the stable identity (ADR-025/033), alias is editable. Do NOT
|
||||
// add a unique(lower(alias)) constraint — see ADR-033.
|
||||
Optional<Person> findByAlias(String alias);
|
||||
|
||||
// Plural case-insensitive alias lookup — the fallback step. Returns ALL case-folding
|
||||
// siblings so the service can pick a deterministic one (lowest id) instead of letting a
|
||||
// derived Optional<…>IgnoreCase throw NonUniqueResultException. See ADR-033.
|
||||
List<Person> findAllByAliasIgnoreCase(String alias);
|
||||
|
||||
// Lookup by the normalizer person_id, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Person> findBySourceRef(String sourceRef);
|
||||
|
||||
// Exact first+last name match, used for filename-based sender lookup
|
||||
Optional<Person> findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName);
|
||||
// Exact-case first+last name match — the first step of filename-based sender resolution.
|
||||
// Explicit `=` (HQL, not a derived query) so a null firstName binds as `first_name = NULL`
|
||||
// — never a match — instead of the derived-query fold to `first_name IS NULL`, which would
|
||||
// pull a last-name-only row in as a sender (a provenance defect). See ADR-033.
|
||||
@Query("SELECT p FROM Person p WHERE p.firstName = :firstName AND p.lastName = :lastName")
|
||||
Optional<Person> findByFirstNameAndLastName(@Param("firstName") String firstName,
|
||||
@Param("lastName") String lastName);
|
||||
|
||||
// Plural case-insensitive first+last name match — lets findByName bail to empty on 2+ matches
|
||||
// instead of letting a derived Optional<…>IgnoreCase throw NonUniqueResultException. Same
|
||||
// null fail-closed guarantee as above: LOWER(:firstName) is NULL for a null arg, so a null
|
||||
// first name resolves to no match (not first_name IS NULL widening). See ADR-033.
|
||||
@Query("SELECT p FROM Person p WHERE LOWER(p.firstName) = LOWER(:firstName) "
|
||||
+ "AND LOWER(p.lastName) = LOWER(:lastName)")
|
||||
List<Person> findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(@Param("firstName") String firstName,
|
||||
@Param("lastName") String lastName);
|
||||
|
||||
// --- PersonSummaryDTO with document count ---
|
||||
|
||||
@@ -189,18 +211,15 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
List<Person> findCorrespondentsWithFilter(@Param("personId") UUID personId, @Param("q") String q);
|
||||
|
||||
// --- Merge helpers (native SQL to bypass JPA entity layer) ---
|
||||
// clearAutomatically + flushAutomatically keep the L1 cache from desyncing: these bulk
|
||||
// updates run beneath Hibernate, and mergePersons follows them with a deleteById whose
|
||||
// ON DELETE CASCADE (V71) also fires beneath the session.
|
||||
|
||||
@Modifying
|
||||
@Modifying(clearAutomatically = true, flushAutomatically = true)
|
||||
@Query(value = "UPDATE documents SET sender_id = :target WHERE sender_id = :source", nativeQuery = true)
|
||||
void reassignSender(@Param("source") UUID source, @Param("target") UUID target);
|
||||
|
||||
// Used by deletePerson: detach a deleted person from documents they sent, so the hard
|
||||
// delete cannot orphan a documents.sender_id FK (the column is nullable).
|
||||
@Modifying
|
||||
@Query(value = "UPDATE documents SET sender_id = NULL WHERE sender_id = :source", nativeQuery = true)
|
||||
void reassignSenderToNull(@Param("source") UUID source);
|
||||
|
||||
@Modifying
|
||||
@Modifying(clearAutomatically = true, flushAutomatically = true)
|
||||
@Query(value = """
|
||||
INSERT INTO document_receivers (document_id, person_id)
|
||||
SELECT document_id, :target FROM document_receivers
|
||||
@@ -210,8 +229,4 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
)
|
||||
""", nativeQuery = true)
|
||||
void insertMissingReceiverReference(@Param("source") UUID source, @Param("target") UUID target);
|
||||
|
||||
@Modifying
|
||||
@Query(value = "DELETE FROM document_receivers WHERE person_id = :source", nativeQuery = true)
|
||||
void deleteReceiverReferences(@Param("source") UUID source);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
@@ -68,15 +69,13 @@ public class PersonService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard-deletes a person used by triage. Detaches the person from any documents they
|
||||
* sent (nulls sender_id) and from any received-document references first, so the delete
|
||||
* cannot orphan an FK and fail with a 500.
|
||||
* Hard-deletes a person used by triage. Referential integrity is enforced by the database
|
||||
* (V71's {@code ON DELETE} constraints: sender_id {@code SET NULL}, receiver and @-mention
|
||||
* rows {@code CASCADE}), so the service stays thin — it only verifies existence then deletes.
|
||||
*/
|
||||
@Transactional
|
||||
public void deletePerson(UUID id) {
|
||||
getById(id);
|
||||
personRepository.reassignSenderToNull(id);
|
||||
personRepository.deleteReceiverReferences(id);
|
||||
personRepository.deleteById(id);
|
||||
}
|
||||
|
||||
@@ -100,6 +99,10 @@ public class PersonService {
|
||||
return personRepository.findAllById(ids);
|
||||
}
|
||||
|
||||
public List<Person> findByDisplayNameContaining(String fragment) {
|
||||
return personRepository.searchByName(fragment);
|
||||
}
|
||||
|
||||
public List<Person> findAllFamilyMembers() {
|
||||
return personRepository.findByFamilyMemberTrueOrderByLastNameAscFirstNameAsc();
|
||||
}
|
||||
@@ -112,7 +115,19 @@ public class PersonService {
|
||||
}
|
||||
|
||||
public Optional<Person> findByName(String firstName, String lastName) {
|
||||
return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
|
||||
// Same scope as findOrCreateByAlias (#731): a case-collision resolves without throwing;
|
||||
// two byte-identical same-case persons are an out-of-scope data anomaly the exact
|
||||
// Optional below would surface as the opaque INTERNAL_ERROR, not a wrong sender.
|
||||
Optional<Person> exact = personRepository.findByFirstNameAndLastName(firstName, lastName);
|
||||
if (exact.isPresent()) return exact;
|
||||
List<Person> caseInsensitive =
|
||||
personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
|
||||
// Deliberate divergence from findOrCreateByAlias: an ambiguous filename leaves the sender
|
||||
// UNSET rather than picking the lowest id. The archive's value is correct provenance — a
|
||||
// confidently-wrong pre-filled "Hans Müller" is worse than an empty field, because a
|
||||
// reviewer won't re-check a pre-filled value. Do NOT "consistency-clean" this into the
|
||||
// lowest-id fallback. See ADR-033.
|
||||
return caseInsensitive.size() == 1 ? Optional.of(caseInsensitive.get(0)) : Optional.empty();
|
||||
}
|
||||
|
||||
/** Lookup by the normalizer person_id — used by the canonical importer for register-first matching. */
|
||||
@@ -127,32 +142,45 @@ public class PersonService {
|
||||
PersonType type = PersonTypeClassifier.classify(alias);
|
||||
if (type == PersonType.SKIP) return null;
|
||||
|
||||
return personRepository.findByAliasIgnoreCase(alias).orElseGet(() -> {
|
||||
if (type == PersonType.INSTITUTION || type == PersonType.GROUP) {
|
||||
return personRepository.save(Person.builder()
|
||||
.alias(alias)
|
||||
.lastName(alias)
|
||||
.personType(type)
|
||||
.build());
|
||||
}
|
||||
// Aliases differing only by case (müller / Müller) are valid distinct persons, not
|
||||
// duplicates, so a CASE-COLLISION must not throw: exact-case first, then the lowest-id
|
||||
// case-insensitive sibling, then create. Mirrors the tag path — see ADR-033.
|
||||
// Scope (#731): "ambiguous" means case-insensitive. Two BYTE-IDENTICAL same-case aliases
|
||||
// are a true data anomaly out of scope here; the exact Optional below would surface that
|
||||
// as the opaque INTERNAL_ERROR (never a wrong row), not silently pick one.
|
||||
Optional<Person> exact = personRepository.findByAlias(alias);
|
||||
if (exact.isPresent()) return exact.get(); // exact-case wins
|
||||
List<Person> caseInsensitive = personRepository.findAllByAliasIgnoreCase(alias);
|
||||
if (!caseInsensitive.isEmpty()) {
|
||||
return caseInsensitive.stream().min(Comparator.comparing(Person::getId)).orElseThrow(); // deterministic tie-break — list is non-empty, never throws
|
||||
}
|
||||
|
||||
PersonNameParser.SplitName split = PersonNameParser.split(alias);
|
||||
Person person = personRepository.save(Person.builder()
|
||||
// Create-when-absent: institution/group keep the full label in lastName; a person name
|
||||
// is split and a maiden name (geb. …) becomes a MAIDEN_NAME alias.
|
||||
if (type == PersonType.INSTITUTION || type == PersonType.GROUP) {
|
||||
return personRepository.save(Person.builder()
|
||||
.alias(alias)
|
||||
.firstName(split.firstName())
|
||||
.lastName(split.lastName())
|
||||
.lastName(alias)
|
||||
.personType(type)
|
||||
.build());
|
||||
if (split.maidenName() != null) {
|
||||
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
|
||||
aliasRepository.save(PersonNameAlias.builder()
|
||||
.person(person)
|
||||
.lastName(split.maidenName())
|
||||
.type(PersonNameAliasType.MAIDEN_NAME)
|
||||
.sortOrder(nextSortOrder)
|
||||
.build());
|
||||
}
|
||||
return person;
|
||||
});
|
||||
}
|
||||
|
||||
PersonNameParser.SplitName split = PersonNameParser.split(alias);
|
||||
Person person = personRepository.save(Person.builder()
|
||||
.alias(alias)
|
||||
.firstName(split.firstName())
|
||||
.lastName(split.lastName())
|
||||
.build());
|
||||
if (split.maidenName() != null) {
|
||||
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
|
||||
aliasRepository.save(PersonNameAlias.builder()
|
||||
.person(person)
|
||||
.lastName(split.maidenName())
|
||||
.type(PersonNameAliasType.MAIDEN_NAME)
|
||||
.sortOrder(nextSortOrder)
|
||||
.build());
|
||||
}
|
||||
return person;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -295,6 +323,12 @@ public class PersonService {
|
||||
return personRepository.save(person);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the source person into the target, then deletes the source. Sender references move
|
||||
* to the target; receiver references the target lacks are inserted. The source's leftover
|
||||
* receiver join rows are not deleted explicitly — they cascade-drop via V71's
|
||||
* {@code ON DELETE CASCADE} on {@code document_receivers.person_id} when the source is deleted.
|
||||
*/
|
||||
@Transactional
|
||||
public void mergePersons(UUID sourceId, UUID targetId) {
|
||||
if (sourceId.equals(targetId)) {
|
||||
@@ -311,9 +345,7 @@ public class PersonService {
|
||||
// Add target as receiver where source is receiver but target is not yet
|
||||
personRepository.insertMissingReceiverReference(sourceId, targetId);
|
||||
|
||||
// Remove all remaining source receiver references (duplicates already handled)
|
||||
personRepository.deleteReceiverReferences(sourceId);
|
||||
|
||||
// Source's remaining receiver rows cascade-drop via V71's ON DELETE CASCADE.
|
||||
personRepository.deleteById(sourceId);
|
||||
}
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ Features: person CRUD, name alias management, person merge (deduplication), fami
|
||||
| `getById(UUID)` | document, geschichte, ocr | Fetch one person by ID |
|
||||
| `getAllById(List<UUID>)` | document | Bulk fetch for sender/receiver resolution |
|
||||
| `findAll(String q)` | document, dashboard | List all persons |
|
||||
| `findByName(String firstName, String lastName)` | document | Typeahead search |
|
||||
| `findOrCreateByAlias(String rawName)` | importing | Idempotent create during mass import; type classification happens internally |
|
||||
| `findByName(String firstName, String lastName)` | document | Filename-based **sender resolution** in `storeDocument`: exact-case match → single case-insensitive match → else **empty** (ambiguous names leave the sender unset; a null first name never matches). See ADR-033. |
|
||||
| `findOrCreateByAlias(String rawName)` | importing | Idempotent create during mass import; type classification happens internally. Resolves exact-case → lowest-id case-insensitive sibling → create — never throws on case-colliding aliases. See ADR-033. |
|
||||
| `findAllFamilyMembers()` | dashboard | Family member list for stats |
|
||||
| `findCorrespondents()` | document | Correspondent list for conversation filter |
|
||||
| `count()` | dashboard | Total person count for stats |
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
|
||||
public record NlQueryInterpretation(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<PersonHint> resolvedPersons,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<PersonHint> ambiguousPersons,
|
||||
LocalDate dateFrom,
|
||||
LocalDate dateTo,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<String> keywords,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
String rawQuery,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
boolean keywordsApplied
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentSort;
|
||||
import org.raddatz.familienarchiv.document.SearchFilters;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.tag.TagOperator;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class NlQueryParserService {
|
||||
|
||||
private static final int MIN_QUERY = 3;
|
||||
private static final int MAX_QUERY = 500;
|
||||
private static final int MAX_NAME_LENGTH = 200;
|
||||
private static final int MAX_CANDIDATES = 10;
|
||||
|
||||
private final OllamaClient ollamaClient;
|
||||
private final PersonService personService;
|
||||
private final DocumentService documentService;
|
||||
|
||||
public NlSearchResponse search(String query, Pageable pageable) {
|
||||
if (query == null || query.length() < MIN_QUERY || query.length() > MAX_QUERY) {
|
||||
throw DomainException.badRequest(ErrorCode.VALIDATION_ERROR,
|
||||
"Query must be between " + MIN_QUERY + " and " + MAX_QUERY + " characters");
|
||||
}
|
||||
|
||||
OllamaExtraction ext = ollamaClient.parse(query);
|
||||
|
||||
List<String> personNames = ext.personNames() != null ? ext.personNames() : List.of();
|
||||
List<String> keywords = ext.keywords() != null ? ext.keywords() : List.of();
|
||||
|
||||
NameResolution resolution = resolveNames(personNames);
|
||||
|
||||
if (!resolution.ambiguous().isEmpty()) {
|
||||
NlQueryInterpretation interpretation = new NlQueryInterpretation(
|
||||
List.of(), resolution.ambiguous(),
|
||||
ext.dateFrom(), ext.dateTo(),
|
||||
keywords, ext.rawQuery(), false);
|
||||
return new NlSearchResponse(DocumentSearchResult.of(List.of()), interpretation);
|
||||
}
|
||||
|
||||
List<PersonHint> resolved = resolution.resolved();
|
||||
List<String> noMatchFragments = resolution.noMatchFragments();
|
||||
List<String> extraFragments = resolution.extraFragments();
|
||||
|
||||
String text = buildText(keywords, noMatchFragments, extraFragments, ext.rawQuery());
|
||||
|
||||
if (resolved.size() == 1 && isAnyRole(ext.personRole())) {
|
||||
UUID personId = resolved.get(0).id();
|
||||
DocumentSearchResult docs = documentService.searchDocumentsByPersonId(
|
||||
personId, ext.dateFrom(), ext.dateTo(), pageable);
|
||||
NlQueryInterpretation interpretation = new NlQueryInterpretation(
|
||||
resolved, List.of(), ext.dateFrom(), ext.dateTo(), keywords, ext.rawQuery(), false);
|
||||
return new NlSearchResponse(docs, interpretation);
|
||||
}
|
||||
|
||||
UUID sender = buildSender(resolved, ext.personRole());
|
||||
UUID receiver = buildReceiver(resolved, ext.personRole());
|
||||
|
||||
SearchFilters filters = new SearchFilters(
|
||||
text.isBlank() ? null : text,
|
||||
ext.dateFrom(), ext.dateTo(),
|
||||
sender, receiver,
|
||||
List.of(), null,
|
||||
null, TagOperator.AND, false);
|
||||
|
||||
DocumentSearchResult docs = documentService.searchDocuments(filters, DocumentSort.DATE, "desc", pageable);
|
||||
boolean keywordsApplied = !text.isBlank();
|
||||
NlQueryInterpretation interpretation = new NlQueryInterpretation(
|
||||
resolved, List.of(), ext.dateFrom(), ext.dateTo(), keywords, ext.rawQuery(), keywordsApplied);
|
||||
return new NlSearchResponse(docs, interpretation);
|
||||
}
|
||||
|
||||
private NameResolution resolveNames(List<String> personNames) {
|
||||
List<PersonHint> resolved = new ArrayList<>();
|
||||
List<PersonHint> ambiguous = new ArrayList<>();
|
||||
List<String> noMatchFragments = new ArrayList<>();
|
||||
List<String> extraFragments = new ArrayList<>();
|
||||
|
||||
int resolvedIndex = 0;
|
||||
for (String name : personNames) {
|
||||
if (name == null || name.length() > MAX_NAME_LENGTH) {
|
||||
log.debug("Skipping name fragment (too long or null): length={}", name == null ? 0 : name.length());
|
||||
continue;
|
||||
}
|
||||
List<Person> candidates = personService.findByDisplayNameContaining(name);
|
||||
List<Person> capped = candidates.size() > MAX_CANDIDATES
|
||||
? candidates.subList(0, MAX_CANDIDATES)
|
||||
: candidates;
|
||||
|
||||
if (capped.isEmpty()) {
|
||||
noMatchFragments.add(name);
|
||||
} else if (capped.size() == 1) {
|
||||
Person p = capped.get(0);
|
||||
PersonHint hint = new PersonHint(p.getId(), p.getDisplayName());
|
||||
resolvedIndex++;
|
||||
if (resolvedIndex <= 2) {
|
||||
resolved.add(hint);
|
||||
} else {
|
||||
extraFragments.add(name);
|
||||
}
|
||||
} else {
|
||||
capped.forEach(p -> ambiguous.add(new PersonHint(p.getId(), p.getDisplayName())));
|
||||
}
|
||||
}
|
||||
|
||||
return new NameResolution(resolved, ambiguous, noMatchFragments, extraFragments);
|
||||
}
|
||||
|
||||
private String buildText(List<String> keywords, List<String> noMatchFragments,
|
||||
List<String> extraFragments, String rawQuery) {
|
||||
List<String> parts = new ArrayList<>();
|
||||
parts.addAll(keywords);
|
||||
parts.addAll(noMatchFragments);
|
||||
parts.addAll(extraFragments);
|
||||
String text = String.join(" ", parts).strip();
|
||||
if (text.isBlank() && rawQuery != null && !rawQuery.isBlank()) {
|
||||
return rawQuery;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
private boolean isAnyRole(String role) {
|
||||
return role == null || "any".equals(role) || (!"sender".equals(role) && !"receiver".equals(role));
|
||||
}
|
||||
|
||||
private UUID buildSender(List<PersonHint> resolved, String role) {
|
||||
if (resolved.size() >= 2) return resolved.get(0).id();
|
||||
if (resolved.size() == 1 && "sender".equals(role)) return resolved.get(0).id();
|
||||
return null;
|
||||
}
|
||||
|
||||
private UUID buildReceiver(List<PersonHint> resolved, String role) {
|
||||
if (resolved.size() >= 2) return resolved.get(1).id();
|
||||
if (resolved.size() == 1 && "receiver".equals(role)) return resolved.get(0).id();
|
||||
return null;
|
||||
}
|
||||
|
||||
private record NameResolution(
|
||||
List<PersonHint> resolved,
|
||||
List<PersonHint> ambiguous,
|
||||
List<String> noMatchFragments,
|
||||
List<String> extraFragments
|
||||
) {}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import jakarta.validation.Valid;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.raddatz.familienarchiv.security.Permission;
|
||||
import org.raddatz.familienarchiv.security.RequirePermission;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.security.core.annotation.AuthenticationPrincipal;
|
||||
import org.springframework.security.core.userdetails.UserDetails;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/search/nl")
|
||||
@RequiredArgsConstructor
|
||||
public class NlSearchController {
|
||||
|
||||
private final NlQueryParserService nlQueryParserService;
|
||||
private final NlSearchRateLimiter rateLimiter;
|
||||
|
||||
@PostMapping
|
||||
@RequirePermission(Permission.READ_ALL)
|
||||
public NlSearchResponse search(@Valid @RequestBody NlSearchRequest request,
|
||||
Pageable pageable,
|
||||
@AuthenticationPrincipal UserDetails principal) {
|
||||
rateLimiter.checkAndConsume(principal.getUsername());
|
||||
return nlQueryParserService.search(request.query(), pageable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@ConfigurationProperties("app.nl-search.rate-limit")
|
||||
@Data
|
||||
public class NlSearchRateLimitProperties {
|
||||
private int maxRequestsPerMinute = 5;
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import com.github.benmanes.caffeine.cache.Caffeine;
|
||||
import com.github.benmanes.caffeine.cache.LoadingCache;
|
||||
import io.github.bucket4j.Bandwidth;
|
||||
import io.github.bucket4j.Bucket;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Service
|
||||
public class NlSearchRateLimiter {
|
||||
|
||||
private final LoadingCache<String, Bucket> byUser;
|
||||
private final int maxRequestsPerMinute;
|
||||
|
||||
public NlSearchRateLimiter(NlSearchRateLimitProperties props) {
|
||||
this.maxRequestsPerMinute = props.getMaxRequestsPerMinute();
|
||||
this.byUser = Caffeine.newBuilder()
|
||||
.expireAfterAccess(1, TimeUnit.MINUTES)
|
||||
.build(key -> newBucket(maxRequestsPerMinute));
|
||||
}
|
||||
|
||||
public void checkAndConsume(String userKey) {
|
||||
if (!byUser.get(userKey).tryConsume(1)) {
|
||||
throw DomainException.tooManyRequests(ErrorCode.SMART_SEARCH_RATE_LIMITED,
|
||||
"NL search rate limit exceeded for user: " + userKey, 60L);
|
||||
}
|
||||
}
|
||||
|
||||
void resetForTest() {
|
||||
byUser.invalidateAll();
|
||||
}
|
||||
|
||||
private static Bucket newBucket(int limit) {
|
||||
return Bucket.builder()
|
||||
.addLimit(Bandwidth.builder()
|
||||
.capacity(limit)
|
||||
.refillGreedy(limit, Duration.ofMinutes(1))
|
||||
.build())
|
||||
.build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.Size;
|
||||
|
||||
public record NlSearchRequest(
|
||||
@NotBlank
|
||||
@Size(min = 3, max = 500)
|
||||
String query
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
|
||||
public record NlSearchResponse(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
DocumentSearchResult result,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
NlQueryInterpretation interpretation
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
public interface OllamaClient {
|
||||
OllamaExtraction parse(String query);
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Raw structured output from Ollama after parsing and sanitising.
|
||||
* personRole is always one of "sender", "receiver", "any" — defensive parsing ensures this.
|
||||
*/
|
||||
record OllamaExtraction(
|
||||
List<String> personNames,
|
||||
String personRole,
|
||||
LocalDate dateFrom,
|
||||
LocalDate dateTo,
|
||||
List<String> keywords,
|
||||
String rawQuery
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
public interface OllamaHealthClient {
|
||||
boolean isHealthy();
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@ConfigurationProperties("app.ollama")
|
||||
@Data
|
||||
public class OllamaProperties {
|
||||
private String baseUrl;
|
||||
private String model;
|
||||
private int timeoutSeconds = 30;
|
||||
private int healthCheckTimeoutSeconds = 2;
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
public record PersonHint(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
UUID id,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
String displayName
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.http.client.JdkClientHttpRequestFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestClient;
|
||||
import org.springframework.web.client.RestClientException;
|
||||
|
||||
import java.net.http.HttpClient;
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDate;
|
||||
import java.time.Year;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class RestClientOllamaClient implements OllamaClient, OllamaHealthClient {
|
||||
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
private static final Set<String> VALID_ROLES = Set.of("sender", "receiver", "any");
|
||||
private static final int MAX_NAME_LENGTH = 200;
|
||||
private static final int MAX_KEYWORD_LENGTH = 100;
|
||||
|
||||
private static final Map<String, Object> JSON_SCHEMA = Map.of(
|
||||
"type", "object",
|
||||
"required", List.of("personNames", "personRole", "keywords"),
|
||||
"properties", Map.of(
|
||||
"personNames", Map.of("type", "array", "items", Map.of("type", "string", "maxLength", MAX_NAME_LENGTH)),
|
||||
"personRole", Map.of("type", "string", "enum", List.of("sender", "receiver", "any")),
|
||||
"dateFrom", Map.of("type", List.of("string", "null"), "maxLength", 20),
|
||||
"dateTo", Map.of("type", List.of("string", "null"), "maxLength", 20),
|
||||
"keywords", Map.of("type", "array", "items", Map.of("type", "string", "maxLength", MAX_KEYWORD_LENGTH))
|
||||
)
|
||||
);
|
||||
|
||||
private final RestClient inferenceClient;
|
||||
private final RestClient healthClient;
|
||||
private final OllamaProperties props;
|
||||
|
||||
public RestClientOllamaClient(OllamaProperties props) {
|
||||
this.props = props;
|
||||
|
||||
HttpClient inferenceHttp = HttpClient.newBuilder()
|
||||
.version(HttpClient.Version.HTTP_1_1)
|
||||
.connectTimeout(Duration.ofSeconds(10))
|
||||
.build();
|
||||
JdkClientHttpRequestFactory inferenceFactory = new JdkClientHttpRequestFactory(inferenceHttp);
|
||||
inferenceFactory.setReadTimeout(Duration.ofSeconds(props.getTimeoutSeconds()));
|
||||
this.inferenceClient = RestClient.builder()
|
||||
.baseUrl(props.getBaseUrl())
|
||||
.requestFactory(inferenceFactory)
|
||||
.build();
|
||||
|
||||
HttpClient healthHttp = HttpClient.newBuilder()
|
||||
.version(HttpClient.Version.HTTP_1_1)
|
||||
.connectTimeout(Duration.ofSeconds(props.getHealthCheckTimeoutSeconds()))
|
||||
.build();
|
||||
JdkClientHttpRequestFactory healthFactory = new JdkClientHttpRequestFactory(healthHttp);
|
||||
healthFactory.setReadTimeout(Duration.ofSeconds(props.getHealthCheckTimeoutSeconds()));
|
||||
this.healthClient = RestClient.builder()
|
||||
.baseUrl(props.getBaseUrl())
|
||||
.requestFactory(healthFactory)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public OllamaExtraction parse(String query) {
|
||||
try {
|
||||
OllamaGenerateRequest request = new OllamaGenerateRequest(
|
||||
props.getModel(), query, JSON_SCHEMA, false);
|
||||
String responseBody = inferenceClient.post()
|
||||
.uri("/api/generate")
|
||||
.contentType(org.springframework.http.MediaType.APPLICATION_JSON)
|
||||
.body(request)
|
||||
.retrieve()
|
||||
.body(String.class);
|
||||
return parseOllamaResponse(responseBody, query);
|
||||
} catch (DomainException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.warn("Ollama inference failed: {}", e.getClass().getSimpleName());
|
||||
throw DomainException.serviceUnavailable(ErrorCode.SMART_SEARCH_UNAVAILABLE,
|
||||
"Ollama unavailable: " + e.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHealthy() {
|
||||
try {
|
||||
healthClient.get().uri("/api/tags").retrieve().toBodilessEntity();
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private OllamaExtraction parseOllamaResponse(String responseBody, String rawQuery) {
|
||||
try {
|
||||
OllamaGenerateResponse response = MAPPER.readValue(responseBody, OllamaGenerateResponse.class);
|
||||
String inner = response.response();
|
||||
if (inner == null || inner.isBlank()) {
|
||||
return fallbackExtraction(rawQuery);
|
||||
}
|
||||
RawOllamaOutput raw = MAPPER.readValue(inner, RawOllamaOutput.class);
|
||||
return toExtraction(raw, rawQuery);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to parse Ollama response: {}", e.getClass().getSimpleName());
|
||||
throw DomainException.serviceUnavailable(ErrorCode.SMART_SEARCH_UNAVAILABLE,
|
||||
"Failed to parse Ollama response: " + e.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
private OllamaExtraction toExtraction(RawOllamaOutput raw, String rawQuery) {
|
||||
List<String> names = raw.personNames() == null ? List.of() : raw.personNames().stream()
|
||||
.filter(n -> n != null && n.length() <= MAX_NAME_LENGTH)
|
||||
.toList();
|
||||
List<String> keywords = raw.keywords() == null ? List.of() : raw.keywords().stream()
|
||||
.filter(k -> k != null && k.length() <= MAX_KEYWORD_LENGTH)
|
||||
.toList();
|
||||
String role = sanitiseRole(raw.personRole());
|
||||
LocalDate dateFrom = parseDate(raw.dateFrom(), true);
|
||||
LocalDate dateTo = parseDate(raw.dateTo(), false);
|
||||
return new OllamaExtraction(names, role, dateFrom, dateTo, keywords, rawQuery);
|
||||
}
|
||||
|
||||
private OllamaExtraction fallbackExtraction(String rawQuery) {
|
||||
return new OllamaExtraction(List.of(), "any", null, null, List.of(), rawQuery);
|
||||
}
|
||||
|
||||
private String sanitiseRole(String role) {
|
||||
if (role != null && VALID_ROLES.contains(role)) {
|
||||
return role;
|
||||
}
|
||||
log.warn("Unexpected personRole from Ollama: {}", role);
|
||||
return "any";
|
||||
}
|
||||
|
||||
private LocalDate parseDate(String raw, boolean isFrom) {
|
||||
if (raw == null || raw.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(raw, DateTimeFormatter.ISO_LOCAL_DATE);
|
||||
} catch (DateTimeParseException ignored) {
|
||||
}
|
||||
try {
|
||||
int year = Integer.parseInt(raw.strip());
|
||||
if (year > 1000 && year < 3000) {
|
||||
return isFrom ? Year.of(year).atDay(1) : Year.of(year).atMonth(12).atEndOfMonth();
|
||||
}
|
||||
} catch (NumberFormatException ignored) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
private record OllamaGenerateResponse(String response) {
|
||||
}
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
private record RawOllamaOutput(
|
||||
@JsonProperty("personNames") List<String> personNames,
|
||||
@JsonProperty("personRole") String personRole,
|
||||
@JsonProperty("dateFrom") String dateFrom,
|
||||
@JsonProperty("dateTo") String dateTo,
|
||||
@JsonProperty("keywords") List<String> keywords
|
||||
) {
|
||||
}
|
||||
|
||||
private record OllamaGenerateRequest(
|
||||
String model,
|
||||
String prompt,
|
||||
Object format,
|
||||
boolean stream
|
||||
) {
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,13 @@ Hierarchical document categories. Tags form a tree via a self-referencing `paren
|
||||
Entity: `Tag` (self-referencing `parent_id` tree).
|
||||
Features: tag CRUD, hierarchical deletion (cascade to descendants), tag typeahead, admin tag management (rename, reparent, merge).
|
||||
|
||||
## Tag tree counts (`getTagTree`)
|
||||
|
||||
`GET /api/tags/tree` returns each node with **two** document counts, from two aggregate queries (no N+1):
|
||||
|
||||
- `documentCount` — documents tagged with that **exact** tag (direct). Read by the admin surfaces (sidebar tree, merge preview, delete-impact guard), which describe direct-document operations.
|
||||
- `subtreeDocumentCount` — **distinct** documents tagged with that tag **or any descendant** (subtree rollup, recursive-CTE closure, depth guard ≤50). Read by the reader surfaces (`/themen` page, dashboard `ThemenWidget`) so the box number matches what `/documents?tag=X` actually finds.
|
||||
|
||||
## What this domain does NOT own
|
||||
|
||||
- Documents — the `document_tags` join table is on the document side. `Tag` does not hold document references.
|
||||
|
||||
@@ -20,7 +20,14 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
|
||||
}
|
||||
|
||||
|
||||
Optional<Tag> findByNameIgnoreCase(String name);
|
||||
// Tag-name resolution (see TagService.findOrCreate). Names that collide case-insensitively across
|
||||
// the canonical tree are VALID — a parent and its same-named lowercase child (e.g. "Geburt" /
|
||||
// "Geburt/geburt") are distinct nodes with their own source_ref and document attachments. So
|
||||
// resolution must be exact-case first, then a non-throwing list for the case-insensitive fallback.
|
||||
// Do NOT add a unique(lower(name)) constraint — it would reject these legitimate rows. See #730.
|
||||
Optional<Tag> findByName(String name);
|
||||
|
||||
List<Tag> findAllByNameIgnoreCase(String name);
|
||||
|
||||
// Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Tag> findBySourceRef(String sourceRef);
|
||||
@@ -126,4 +133,31 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
|
||||
*/
|
||||
@Query(value = "SELECT tag_id AS tagId, COUNT(*) AS count FROM document_tags GROUP BY tag_id", nativeQuery = true)
|
||||
List<TagCount> findDocumentCountsPerTag();
|
||||
|
||||
/**
|
||||
* Returns (tagId, count) pairs where count is the number of <b>distinct</b> documents tagged
|
||||
* with that tag <b>or any of its descendants</b> (full subtree rollup).
|
||||
* <p>
|
||||
* Builds a tag closure of (ancestor_id, descendant_id) pairs via a recursive CTE — each tag is
|
||||
* its own ancestor at depth 0, then descends into children (depth guard of 50 levels prevents a
|
||||
* cycle or pathological depth from running away) — joins it to {@code document_tags} on the
|
||||
* descendant, and counts distinct documents per ancestor. A document tagged with several tags in
|
||||
* the same subtree is therefore counted once. Tags whose entire subtree holds no documents do
|
||||
* not appear in the result (they default to 0 in the tree). One aggregate query for all tags.
|
||||
*/
|
||||
@Query(value = """
|
||||
WITH RECURSIVE closure AS (
|
||||
SELECT id AS ancestor_id, id AS descendant_id, 0 AS depth FROM tag
|
||||
UNION ALL
|
||||
SELECT c.ancestor_id, t.id AS descendant_id, c.depth + 1
|
||||
FROM tag t
|
||||
JOIN closure c ON t.parent_id = c.descendant_id
|
||||
WHERE c.depth < 50
|
||||
)
|
||||
SELECT c.ancestor_id AS tagId, COUNT(DISTINCT dt.document_id) AS count
|
||||
FROM closure c
|
||||
JOIN document_tags dt ON dt.tag_id = c.descendant_id
|
||||
GROUP BY c.ancestor_id
|
||||
""", nativeQuery = true)
|
||||
List<TagCount> findSubtreeDocumentCountsPerTag();
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
@@ -55,10 +56,21 @@ public class TagService {
|
||||
return tagRepository.findBySourceRef(sourceRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a tag name to a single tag, creating one when absent. Never throws on case-insensitive
|
||||
* collisions: names that differ only by case are valid distinct nodes in the canonical tree (a
|
||||
* parent and its same-named lowercase child), so resolution prefers an exact-case match, then
|
||||
* falls back to the lowest-id case-insensitive match, then creates. See #730.
|
||||
*/
|
||||
public Tag findOrCreate(String name) {
|
||||
String cleanName = name.trim();
|
||||
return tagRepository.findByNameIgnoreCase(cleanName)
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
|
||||
Optional<Tag> exact = tagRepository.findByName(cleanName);
|
||||
if (exact.isPresent()) return exact.get(); // exact-case wins (edit round-trip replays the stored name)
|
||||
List<Tag> caseInsensitive = tagRepository.findAllByNameIgnoreCase(cleanName);
|
||||
if (!caseInsensitive.isEmpty()) {
|
||||
return caseInsensitive.stream().min(Comparator.comparing(Tag::getId)).orElseThrow(); // deterministic tie-break by id — list is non-empty, never throws
|
||||
}
|
||||
return tagRepository.save(Tag.builder().name(cleanName).build()); // create-when-absent (orphan tag: null sourceRef/parentId)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -172,19 +184,27 @@ public class TagService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all tags assembled into a tree with document counts per node.
|
||||
* Uses a single aggregate query to avoid N+1 behaviour.
|
||||
* NOTE: document counts are global per tag, not scoped to any search filter.
|
||||
* The tree endpoint is only used for the admin sidebar, so this is intentional.
|
||||
* Returns all tags assembled into a tree, each node carrying two counts:
|
||||
* {@code documentCount} — documents tagged with that exact tag (direct) — and
|
||||
* {@code subtreeDocumentCount} — distinct documents tagged with that tag or any descendant
|
||||
* (subtree rollup). Each count comes from one aggregate query (no N+1).
|
||||
* NOTE: counts are global per tag, not scoped to any search filter.
|
||||
* Consumed by the reader surfaces (/themen page, dashboard ThemenWidget — which read the
|
||||
* subtree rollup) as well as the admin sidebar and tag operation previews (which read the
|
||||
* direct count).
|
||||
*/
|
||||
public List<TagTreeNodeDTO> getTagTree() {
|
||||
List<Tag> all = tagRepository.findAll();
|
||||
Map<UUID, Long> counts = tagRepository.findDocumentCountsPerTag().stream()
|
||||
.collect(Collectors.toMap(
|
||||
TagRepository.TagCount::getTagId,
|
||||
TagRepository.TagCount::getCount
|
||||
));
|
||||
return buildTree(all, counts);
|
||||
Map<UUID, Long> counts = toCountMap(tagRepository.findDocumentCountsPerTag());
|
||||
Map<UUID, Long> subtreeCounts = toCountMap(tagRepository.findSubtreeDocumentCountsPerTag());
|
||||
return buildTree(all, counts, subtreeCounts);
|
||||
}
|
||||
|
||||
private static Map<UUID, Long> toCountMap(List<TagRepository.TagCount> counts) {
|
||||
return counts.stream().collect(Collectors.toMap(
|
||||
TagRepository.TagCount::getTagId,
|
||||
TagRepository.TagCount::getCount
|
||||
));
|
||||
}
|
||||
|
||||
// ─── private helpers ─────────────────────────────────────────────────────
|
||||
@@ -259,12 +279,14 @@ public class TagService {
|
||||
}
|
||||
}
|
||||
|
||||
private List<TagTreeNodeDTO> buildTree(List<Tag> tags, Map<UUID, Long> counts) {
|
||||
private List<TagTreeNodeDTO> buildTree(List<Tag> tags, Map<UUID, Long> counts,
|
||||
Map<UUID, Long> subtreeCounts) {
|
||||
Map<UUID, TagTreeNodeDTO> nodeById = new LinkedHashMap<>();
|
||||
for (Tag tag : tags) {
|
||||
int documentCount = counts.getOrDefault(tag.getId(), 0L).intValue();
|
||||
int subtreeDocumentCount = subtreeCounts.getOrDefault(tag.getId(), 0L).intValue();
|
||||
nodeById.put(tag.getId(), new TagTreeNodeDTO(
|
||||
tag.getId(), tag.getName(), tag.getColor(), documentCount,
|
||||
tag.getId(), tag.getName(), tag.getColor(), documentCount, subtreeDocumentCount,
|
||||
new ArrayList<>(), tag.getParentId()
|
||||
));
|
||||
}
|
||||
|
||||
@@ -10,5 +10,8 @@ public record TagTreeNodeDTO(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String name,
|
||||
String color,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int documentCount,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED,
|
||||
description = "Distinct documents tagged with this tag or any descendant tag (subtree rollup)")
|
||||
int subtreeDocumentCount,
|
||||
List<TagTreeNodeDTO> children,
|
||||
@Schema(description = "Parent tag ID, null for root tags") UUID parentId) {}
|
||||
|
||||
@@ -51,6 +51,12 @@ public class AdminController {
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
|
||||
@PostMapping("/backfill-titles")
|
||||
public ResponseEntity<BackfillResult> backfillTitles() {
|
||||
int count = documentService.backfillTitles();
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
|
||||
@PostMapping("/generate-thumbnails")
|
||||
public ResponseEntity<ThumbnailBackfillService.BackfillStatus> generateThumbnails() {
|
||||
thumbnailBackfillService.runBackfillAsync();
|
||||
|
||||
@@ -11,3 +11,7 @@ springdoc:
|
||||
swagger-ui:
|
||||
enabled: true
|
||||
path: /swagger-ui.html
|
||||
|
||||
app:
|
||||
ollama:
|
||||
base-url: http://localhost:11434
|
||||
|
||||
@@ -130,6 +130,16 @@ app:
|
||||
# The loader maps columns by header name — no positional indices (see ADR-025).
|
||||
dir: ${IMPORT_DIR:/import}
|
||||
|
||||
ollama:
|
||||
base-url: http://ollama:11434
|
||||
model: qwen2.5:7b-instruct-q4_K_M
|
||||
timeout-seconds: 30
|
||||
health-check-timeout-seconds: 2
|
||||
|
||||
nl-search:
|
||||
rate-limit:
|
||||
max-requests-per-minute: 5
|
||||
|
||||
ocr:
|
||||
sender-model:
|
||||
activation-threshold: 100
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
-- Move person-delete referential integrity from application code into the database (#684).
|
||||
--
|
||||
-- Before this migration, PersonService.deletePerson nulled documents.sender_id and removed
|
||||
-- document_receivers rows in Java before deleting the person, because the two V1 FKs into
|
||||
-- persons had no ON DELETE behaviour. Any other delete path (a future endpoint, a manual
|
||||
-- psql, a batch job) could still orphan rows or 500. This migration makes the database the
|
||||
-- single source of truth so a person delete is safe from every path.
|
||||
--
|
||||
-- Cascade boundary: the cascade stays STRICTLY at the join/reference layer and NEVER reaches
|
||||
-- documents rows — a cascade into documents would destroy historical letters. sender_id is
|
||||
-- SET NULL (documents.senderText preserves the raw textual attribution); the receiver join
|
||||
-- row and the @-mention sidecar row are dropped.
|
||||
--
|
||||
-- No NOT VALID + VALIDATE two-step: these tables are small (thousands of rows → sub-second
|
||||
-- ACCESS EXCLUSIVE lock). Do NOT copy this drop-and-recreate pattern onto a large table.
|
||||
--
|
||||
-- Not audit-logged: a DB ON DELETE cascade runs below AuditService — a known, accepted trade.
|
||||
-- The person-delete action itself is still logged at the service layer.
|
||||
|
||||
-- documents.sender_id → ON DELETE SET NULL (deleted sender clears the link; the document survives).
|
||||
ALTER TABLE public.documents
|
||||
DROP CONSTRAINT fkl5xhww7es3b4um01vmly4y18m,
|
||||
ADD CONSTRAINT fkl5xhww7es3b4um01vmly4y18m
|
||||
FOREIGN KEY (sender_id) REFERENCES public.persons(id) ON DELETE SET NULL;
|
||||
|
||||
-- document_receivers.person_id → ON DELETE CASCADE (drop the join row), the symmetric
|
||||
-- completion of V14, which added the same to the document_id side of this table.
|
||||
ALTER TABLE public.document_receivers
|
||||
DROP CONSTRAINT fkcg7r68qvosqricx1betgrlt7s,
|
||||
ADD CONSTRAINT fkcg7r68qvosqricx1betgrlt7s
|
||||
FOREIGN KEY (person_id) REFERENCES public.persons(id) ON DELETE CASCADE;
|
||||
|
||||
-- Soft reference fix: transcription_block_mentioned_persons.person_id was a UUID with no FK
|
||||
-- (V56), so deleting a person left dangling mention rows. Give it a real FK with CASCADE.
|
||||
-- This reverses V56's deliberate "no FK on person_id" choice — that comment is now historical
|
||||
-- but is intentionally left untouched, because editing an already-applied migration changes its
|
||||
-- Flyway checksum and would fail validateOnMigrate in prod. ADR-032 is the authoritative record.
|
||||
-- Clean up pre-existing orphans first — production likely holds dangling rows because the old
|
||||
-- deletePerson never cleaned mention rows, and the ADD CONSTRAINT validation scan fails on them.
|
||||
-- A DO block with RAISE NOTICE surfaces the purge count: Flyway runs each statement via JDBC
|
||||
-- and discards a trailing SELECT's result set, so a "SELECT count(*)" would log nothing.
|
||||
DO $$
|
||||
DECLARE removed int;
|
||||
BEGIN
|
||||
DELETE FROM transcription_block_mentioned_persons m
|
||||
WHERE NOT EXISTS (SELECT 1 FROM persons p WHERE p.id = m.person_id);
|
||||
GET DIAGNOSTICS removed = ROW_COUNT;
|
||||
RAISE NOTICE 'V71 orphaned_mention_rows_removed=%', removed;
|
||||
END $$;
|
||||
|
||||
ALTER TABLE public.transcription_block_mentioned_persons
|
||||
ADD CONSTRAINT fk_tbmp_person
|
||||
FOREIGN KEY (person_id) REFERENCES public.persons(id) ON DELETE CASCADE;
|
||||
@@ -38,7 +38,6 @@ import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyBoolean;
|
||||
import static org.mockito.ArgumentMatchers.anyInt;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.verify;
|
||||
@@ -76,7 +75,7 @@ class DocumentControllerTest {
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_returns200_whenAuthenticated() throws Exception {
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search"))
|
||||
@@ -88,7 +87,7 @@ class DocumentControllerTest {
|
||||
void search_undatedTrue_isReachableByAuthenticatedUser() throws Exception {
|
||||
// The read GET must stay reachable for READ_ALL users — guards against a
|
||||
// future refactor accidentally write-guarding the undated triage path (#668).
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search").param("undated", "true"))
|
||||
@@ -104,41 +103,43 @@ class DocumentControllerTest {
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_undatedTrue_isForwardedToServiceAsTrue() throws Exception {
|
||||
ArgumentCaptor<Boolean> undatedCaptor = ArgumentCaptor.forClass(Boolean.class);
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
ArgumentCaptor<SearchFilters> filtersCaptor = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search").param("undated", "true"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), undatedCaptor.capture(), any());
|
||||
assertThat(undatedCaptor.getValue()).isTrue();
|
||||
verify(documentService).searchDocuments(filtersCaptor.capture(), any(), any(), any());
|
||||
assertThat(filtersCaptor.getValue().undated()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_withoutUndatedParam_forwardsFalseToService() throws Exception {
|
||||
ArgumentCaptor<Boolean> undatedCaptor = ArgumentCaptor.forClass(Boolean.class);
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
ArgumentCaptor<SearchFilters> filtersCaptor = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), undatedCaptor.capture(), any());
|
||||
assertThat(undatedCaptor.getValue()).isFalse();
|
||||
verify(documentService).searchDocuments(filtersCaptor.capture(), any(), any(), any());
|
||||
assertThat(filtersCaptor.getValue().undated()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_withStatusParam_passesItToService() throws Exception {
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), anyBoolean(), any()))
|
||||
ArgumentCaptor<SearchFilters> filtersCaptor = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), anyBoolean(), any());
|
||||
verify(documentService).searchDocuments(filtersCaptor.capture(), any(), any(), any());
|
||||
assertThat(filtersCaptor.getValue().status()).isEqualTo(DocumentStatus.REVIEWED);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -165,7 +166,7 @@ class DocumentControllerTest {
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_responseContainsTotalCount() throws Exception {
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search"))
|
||||
@@ -180,7 +181,7 @@ class DocumentControllerTest {
|
||||
UUID docId = UUID.randomUUID();
|
||||
var matchData = new SearchMatchData(
|
||||
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
|
||||
docId, "Brief an Anna", "brief.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
@@ -200,7 +201,7 @@ class DocumentControllerTest {
|
||||
void search_returns_flat_item_with_id_and_without_sensitive_fields() throws Exception {
|
||||
UUID docId = UUID.randomUUID();
|
||||
var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
|
||||
docId, "Brief an Anna", "brief.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
@@ -223,7 +224,7 @@ class DocumentControllerTest {
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_responseExposesPagingFields() throws Exception {
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search"))
|
||||
@@ -268,7 +269,7 @@ class DocumentControllerTest {
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_passesPageRequestToService() throws Exception {
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search").param("page", "2").param("size", "25"))
|
||||
@@ -276,7 +277,7 @@ class DocumentControllerTest {
|
||||
|
||||
org.mockito.ArgumentCaptor<org.springframework.data.domain.Pageable> captor =
|
||||
org.mockito.ArgumentCaptor.forClass(org.springframework.data.domain.Pageable.class);
|
||||
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), captor.capture());
|
||||
verify(documentService).searchDocuments(any(), any(), any(), captor.capture());
|
||||
org.springframework.data.domain.Pageable pageable = captor.getValue();
|
||||
org.assertj.core.api.Assertions.assertThat(pageable.getPageNumber()).isEqualTo(2);
|
||||
org.assertj.core.api.Assertions.assertThat(pageable.getPageSize()).isEqualTo(25);
|
||||
@@ -1208,7 +1209,7 @@ class DocumentControllerTest {
|
||||
void getDocumentIds_returns200_andDelegatesToService() throws Exception {
|
||||
when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
|
||||
UUID id = UUID.randomUUID();
|
||||
when(documentService.findIdsForFilter(any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean()))
|
||||
when(documentService.findIdsForFilter(any()))
|
||||
.thenReturn(List.of(id));
|
||||
|
||||
mockMvc.perform(get("/api/documents/ids"))
|
||||
@@ -1221,13 +1222,33 @@ class DocumentControllerTest {
|
||||
void getDocumentIds_passesSenderIdParamToService() throws Exception {
|
||||
when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
|
||||
UUID senderId = UUID.randomUUID();
|
||||
when(documentService.findIdsForFilter(any(), any(), any(), eq(senderId), any(), any(), any(), any(), any(), anyBoolean()))
|
||||
ArgumentCaptor<SearchFilters> filtersCaptor = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
when(documentService.findIdsForFilter(any()))
|
||||
.thenReturn(List.of());
|
||||
|
||||
mockMvc.perform(get("/api/documents/ids").param("senderId", senderId.toString()))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
verify(documentService).findIdsForFilter(any(), any(), any(), eq(senderId), any(), any(), any(), any(), any(), anyBoolean());
|
||||
verify(documentService).findIdsForFilter(filtersCaptor.capture());
|
||||
assertThat(filtersCaptor.getValue().sender()).isEqualTo(senderId);
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void getDocumentIds_withoutUndatedParam_coercesNullToFalse() throws Exception {
|
||||
// The controller coerces a null boxed Boolean to primitive false
|
||||
// (Boolean.TRUE.equals(undated)) so the absent param never NPEs and the
|
||||
// record always holds a concrete boolean.
|
||||
when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
|
||||
ArgumentCaptor<SearchFilters> filtersCaptor = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
when(documentService.findIdsForFilter(any()))
|
||||
.thenReturn(List.of());
|
||||
|
||||
mockMvc.perform(get("/api/documents/ids"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
verify(documentService).findIdsForFilter(filtersCaptor.capture());
|
||||
assertThat(filtersCaptor.getValue().undated()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -1237,7 +1258,7 @@ class DocumentControllerTest {
|
||||
// Service returns 5001 IDs — one over BULK_EDIT_FILTER_MAX_IDS (5000).
|
||||
java.util.List<UUID> tooMany = new java.util.ArrayList<>(5001);
|
||||
for (int i = 0; i < 5001; i++) tooMany.add(UUID.randomUUID());
|
||||
when(documentService.findIdsForFilter(any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean()))
|
||||
when(documentService.findIdsForFilter(any()))
|
||||
.thenReturn(tooMany);
|
||||
|
||||
mockMvc.perform(get("/api/documents/ids"))
|
||||
@@ -1402,16 +1423,16 @@ class DocumentControllerTest {
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void density_emitsPrivateCacheControlHeader() throws Exception {
|
||||
void density_isNeverBrowserCached() throws Exception {
|
||||
when(documentService.getDensity(any())).thenReturn(
|
||||
new DocumentDensityResult(List.of(), null, null));
|
||||
|
||||
// The endpoint sets no explicit Cache-Control, so Spring Security's
|
||||
// default no-store directive applies — the density chart is always fresh.
|
||||
mockMvc.perform(get("/api/documents/density"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(header().string("Cache-Control",
|
||||
org.hamcrest.Matchers.containsString("max-age=300")))
|
||||
.andExpect(header().string("Cache-Control",
|
||||
org.hamcrest.Matchers.containsString("private")));
|
||||
"no-cache, no-store, max-age=0, must-revalidate"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -24,6 +24,7 @@ import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.raddatz.familienarchiv.document.SearchFiltersFixtures.noFilters;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.when;
|
||||
@@ -122,8 +123,8 @@ class DocumentLazyLoadingTest {
|
||||
savedDocument("SrDoc", "sr_doc.pdf", sender, Set.of(receiver), Set.of(tag));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.RECEIVER, "asc", null, false, PageRequest.of(0, 20));
|
||||
noFilters(),
|
||||
DocumentSort.RECEIVER, "asc", PageRequest.of(0, 20));
|
||||
assertThat(result.totalElements()).isGreaterThan(0);
|
||||
assertThatCode(() ->
|
||||
result.items().forEach(i -> { if (i.sender() != null) i.sender().getLastName(); }))
|
||||
@@ -137,8 +138,8 @@ class DocumentLazyLoadingTest {
|
||||
savedDocument("SsDoc", "ss_doc.pdf", sender, Set.of(), Set.of(tag));
|
||||
|
||||
assertThatCode(() -> documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.SENDER, "asc", null, false, PageRequest.of(0, 20)))
|
||||
noFilters(),
|
||||
DocumentSort.SENDER, "asc", PageRequest.of(0, 20)))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.raddatz.familienarchiv.document.SearchFiltersFixtures.noFilters;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
|
||||
/**
|
||||
@@ -55,8 +56,8 @@ class DocumentListItemIntegrationTest {
|
||||
.build());
|
||||
|
||||
assertThatCode(() -> documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50)))
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50)))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
@@ -70,8 +71,8 @@ class DocumentListItemIntegrationTest {
|
||||
.build());
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
|
||||
assertThat(result.totalElements()).isGreaterThan(0);
|
||||
DocumentListItem item = result.items().get(0);
|
||||
@@ -91,8 +92,8 @@ class DocumentListItemIntegrationTest {
|
||||
.build());
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
|
||||
DocumentListItem item = result.items().stream()
|
||||
.filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow();
|
||||
|
||||
@@ -38,7 +38,10 @@ import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.springframework.dao.DataIntegrityViolationException;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
@DataJpaTest
|
||||
@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE)
|
||||
@@ -259,67 +262,6 @@ class DocumentRepositoryTest {
|
||||
assertThat(result.getContent()).allMatch(d -> !d.isMetadataComplete());
|
||||
}
|
||||
|
||||
// ─── findSinglePersonCorrespondence — DISTINCT / multi-receiver safety ────
|
||||
|
||||
@Test
|
||||
void findSinglePersonCorrespondence_returnsExactlyOneResult_whenDocumentHasThreeReceiversAndOneMatchesPersonId() {
|
||||
Person sender = personRepository.save(Person.builder()
|
||||
.firstName("Hans").lastName("Müller").build());
|
||||
Person receiver1 = personRepository.save(Person.builder()
|
||||
.firstName("Anna").lastName("Schmidt").build());
|
||||
Person receiver2 = personRepository.save(Person.builder()
|
||||
.firstName("Bertha").lastName("Wagner").build());
|
||||
Person receiver3 = personRepository.save(Person.builder()
|
||||
.firstName("Clara").lastName("Koch").build());
|
||||
|
||||
// Document addressed to all three receivers
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("Rundschreiben")
|
||||
.originalFilename("rundschreiben.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender)
|
||||
.receivers(new HashSet<>(Set.of(receiver1, receiver2, receiver3)))
|
||||
.documentDate(LocalDate.of(1950, 6, 1))
|
||||
.build());
|
||||
|
||||
Sort sort = Sort.by(Sort.Direction.DESC, "documentDate");
|
||||
LocalDate from = LocalDate.of(1900, 1, 1);
|
||||
LocalDate to = LocalDate.of(2000, 1, 1);
|
||||
|
||||
// Query for receiver1 — the DISTINCT must collapse the 3 JOIN rows into 1 result
|
||||
List<Document> results = documentRepository.findSinglePersonCorrespondence(
|
||||
receiver1.getId(), from, to, sort);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).getId()).isEqualTo(doc.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findSinglePersonCorrespondence_includesDocumentsWherePerson_isSender() {
|
||||
Person sender = personRepository.save(Person.builder()
|
||||
.firstName("Hans").lastName("Müller").build());
|
||||
Person receiver = personRepository.save(Person.builder()
|
||||
.firstName("Anna").lastName("Schmidt").build());
|
||||
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Brief als Absender")
|
||||
.originalFilename("brief_absender.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender)
|
||||
.receivers(new HashSet<>(Set.of(receiver)))
|
||||
.documentDate(LocalDate.of(1950, 6, 1))
|
||||
.build());
|
||||
|
||||
Sort sort = Sort.by(Sort.Direction.DESC, "documentDate");
|
||||
LocalDate from = LocalDate.of(1900, 1, 1);
|
||||
LocalDate to = LocalDate.of(2000, 1, 1);
|
||||
|
||||
List<Document> results = documentRepository.findSinglePersonCorrespondence(
|
||||
sender.getId(), from, to, sort);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
}
|
||||
|
||||
// ─── findSegmentationQueue ────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -612,6 +554,48 @@ class DocumentRepositoryTest {
|
||||
.isLessThanOrEqualTo(5);
|
||||
}
|
||||
|
||||
// ─── V69 date-range CHECK constraints (#678) ──────────────────────────────
|
||||
|
||||
@Test
|
||||
void save_acceptsRange_whenEndEqualsStart() {
|
||||
// chk_meta_date_end_after_start is end >= start, so equal dates are valid.
|
||||
// Real Postgres + Flyway here (H2 would not enforce the CHECK) pins the
|
||||
// app guard's isBefore semantics to the actual constraint — guards drift (AC2).
|
||||
LocalDate day = LocalDate.of(1917, 1, 10);
|
||||
Document saved = documentRepository.saveAndFlush(Document.builder()
|
||||
.title("Gleicher Tag")
|
||||
.originalFilename("gleicher_tag.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.documentDate(day)
|
||||
.metaDatePrecision(DatePrecision.RANGE)
|
||||
.metaDateEnd(day)
|
||||
.build());
|
||||
|
||||
Document found = documentRepository.findById(saved.getId()).orElseThrow();
|
||||
assertThat(found.getDocumentDate()).isEqualTo(day);
|
||||
assertThat(found.getMetaDateEnd()).isEqualTo(day);
|
||||
assertThat(found.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE);
|
||||
}
|
||||
|
||||
@Test
|
||||
void save_rejectsRange_whenEndBeforeStart_atDbLevel() {
|
||||
// The app guard normally intercepts this, so the DB CHECK never fires in practice.
|
||||
// Persisting directly proves chk_meta_date_end_after_start actually rejects end < start
|
||||
// (H2 would not) — if the app guard ever regresses, a bad row still can't reach the table,
|
||||
// and this is exactly the violation the GlobalExceptionHandler backstop turns into a 400.
|
||||
Document doc = Document.builder()
|
||||
.title("Verdrehte Spanne")
|
||||
.originalFilename("verdreht.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.documentDate(LocalDate.of(1917, 1, 11))
|
||||
.metaDatePrecision(DatePrecision.RANGE)
|
||||
.metaDateEnd(LocalDate.of(1917, 1, 10))
|
||||
.build();
|
||||
|
||||
assertThatThrownBy(() -> documentRepository.saveAndFlush(doc))
|
||||
.isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
// ─── seeding helpers ─────────────────────────────────────────────────────
|
||||
|
||||
private Document uploaded(String title) {
|
||||
@@ -640,4 +624,88 @@ class DocumentRepositoryTest {
|
||||
.reviewed(reviewed)
|
||||
.build();
|
||||
}
|
||||
|
||||
// ─── searchDocumentsByPersonId (via Specification) ───────────────────────
|
||||
|
||||
private Page<Document> searchByPerson(Person person, LocalDate from, LocalDate to) {
|
||||
Specification<Document> spec = (root, query, cb) -> {
|
||||
if (query != null) query.distinct(true);
|
||||
var receiversJoin = root.join("receivers", jakarta.persistence.criteria.JoinType.LEFT);
|
||||
var personPredicate = cb.or(
|
||||
cb.equal(root.get("sender"), person),
|
||||
cb.equal(receiversJoin, person));
|
||||
var predicates = new java.util.ArrayList<>(java.util.List.of(personPredicate));
|
||||
if (from != null) predicates.add(cb.greaterThanOrEqualTo(root.get("documentDate"), from));
|
||||
if (to != null) predicates.add(cb.lessThanOrEqualTo(root.get("documentDate"), to));
|
||||
return cb.and(predicates.toArray(new jakarta.persistence.criteria.Predicate[0]));
|
||||
};
|
||||
return documentRepository.findAll(spec, PageRequest.of(0, 10));
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchByPersonSpec_returnsDocument_whenPersonIsSender() {
|
||||
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("Senderbrief").originalFilename("sender.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(person).build());
|
||||
|
||||
Page<Document> result = searchByPerson(person, null, null);
|
||||
|
||||
assertThat(result.getContent()).extracting(Document::getId).containsExactly(doc.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchByPersonSpec_returnsDocument_whenPersonIsReceiver() {
|
||||
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("Empfängerbrief").originalFilename("receiver.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.receivers(new java.util.HashSet<>(List.of(person))).build());
|
||||
|
||||
Page<Document> result = searchByPerson(person, null, null);
|
||||
|
||||
assertThat(result.getContent()).extracting(Document::getId).containsExactly(doc.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchByPersonSpec_returnsDocumentOnce_whenPersonIsBothSenderAndReceiver() {
|
||||
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("SenderEmpfänger").originalFilename("both.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(person)
|
||||
.receivers(new java.util.HashSet<>(List.of(person))).build());
|
||||
|
||||
Page<Document> result = searchByPerson(person, null, null);
|
||||
|
||||
assertThat(result.getContent()).hasSize(1);
|
||||
assertThat(result.getContent().get(0).getId()).isEqualTo(doc.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchByPersonSpec_excludesDocuments_outsideDateRange() {
|
||||
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
|
||||
Document inside = documentRepository.save(Document.builder()
|
||||
.title("Innen").originalFilename("inside.pdf").status(DocumentStatus.UPLOADED)
|
||||
.sender(person).documentDate(LocalDate.of(1918, 6, 15)).build());
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Außen").originalFilename("outside.pdf").status(DocumentStatus.UPLOADED)
|
||||
.sender(person).documentDate(LocalDate.of(1920, 1, 1)).build());
|
||||
|
||||
Page<Document> result = searchByPerson(person, LocalDate.of(1914, 1, 1), LocalDate.of(1918, 12, 31));
|
||||
|
||||
assertThat(result.getContent()).extracting(Document::getId).containsExactly(inside.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchByPersonSpec_returnsEmpty_whenNoMatchingDocuments() {
|
||||
Person person = personRepository.save(Person.builder().lastName("Raddatz").build());
|
||||
Person other = personRepository.save(Person.builder().lastName("Braun").build());
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Fremder Brief").originalFilename("other.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(other).build());
|
||||
|
||||
Page<Document> result = searchByPerson(person, null, null);
|
||||
|
||||
assertThat(result.getContent()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ import java.time.LocalDate;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.raddatz.familienarchiv.document.SearchFiltersFixtures.noFilters;
|
||||
|
||||
/**
|
||||
* End-to-end paged search test with real PostgreSQL (Testcontainers). Covers the
|
||||
@@ -61,8 +62,8 @@ class DocumentSearchPagedIntegrationTest {
|
||||
@Test
|
||||
void search_firstPage_returnsExactlyPageSizeItems_andCorrectTotalElements() {
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
|
||||
assertThat(result.items()).hasSize(50);
|
||||
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE);
|
||||
@@ -74,8 +75,8 @@ class DocumentSearchPagedIntegrationTest {
|
||||
@Test
|
||||
void search_lastPartialPage_returnsRemainingItems() {
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(2, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(2, 50));
|
||||
|
||||
// Page 2 (offset 100) of 120 docs → exactly 20 items on the tail.
|
||||
assertThat(result.items()).hasSize(20);
|
||||
@@ -86,8 +87,8 @@ class DocumentSearchPagedIntegrationTest {
|
||||
@Test
|
||||
void search_pageBeyondLast_returnsEmptyContent_totalElementsStillCorrect() {
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(99, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(99, 50));
|
||||
|
||||
assertThat(result.items()).isEmpty();
|
||||
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE);
|
||||
@@ -99,8 +100,8 @@ class DocumentSearchPagedIntegrationTest {
|
||||
// comment in DocumentService). Proves that the in-memory slice path
|
||||
// returns the correct total from a real repository fetch.
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.SENDER, "asc", null, false, PageRequest.of(1, 50));
|
||||
noFilters(),
|
||||
DocumentSort.SENDER, "asc", PageRequest.of(1, 50));
|
||||
|
||||
assertThat(result.items()).hasSize(50);
|
||||
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE);
|
||||
@@ -125,8 +126,8 @@ class DocumentSearchPagedIntegrationTest {
|
||||
}
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
|
||||
// Global undated count is the full undated total, independent of page size.
|
||||
assertThat(result.undatedCount()).isEqualTo(undatedTotal);
|
||||
@@ -153,11 +154,11 @@ class DocumentSearchPagedIntegrationTest {
|
||||
}
|
||||
|
||||
DocumentSearchResult unfiltered = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
DocumentSearchResult undatedOnly = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, true, PageRequest.of(0, 50));
|
||||
noFilters().withUndated(true),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
|
||||
assertThat(unfiltered.undatedCount()).isEqualTo(undatedTotal);
|
||||
assertThat(undatedOnly.undatedCount()).isEqualTo(undatedTotal);
|
||||
@@ -178,9 +179,9 @@ class DocumentSearchPagedIntegrationTest {
|
||||
}
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, LocalDate.of(1900, 1, 1), LocalDate.of(2000, 12, 31),
|
||||
null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
new SearchFilters(null, LocalDate.of(1900, 1, 1), LocalDate.of(2000, 12, 31),
|
||||
null, null, null, null, null, null, false),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
|
||||
assertThat(result.undatedCount()).isZero();
|
||||
}
|
||||
@@ -188,11 +189,11 @@ class DocumentSearchPagedIntegrationTest {
|
||||
@Test
|
||||
void search_differentPagesReturnDisjointSlices() {
|
||||
DocumentSearchResult page0 = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(0, 50));
|
||||
DocumentSearchResult page1 = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(1, 50));
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", PageRequest.of(1, 50));
|
||||
|
||||
// No document id should appear on both pages — slicing must be exclusive.
|
||||
var idsOnPage0 = page0.items().stream()
|
||||
|
||||
@@ -67,7 +67,8 @@ class DocumentServiceSortTest {
|
||||
.thenReturn(new PageImpl<>(List.of(newer, older)));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null, false, PAGE);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.DATE, "DESC", PAGE);
|
||||
|
||||
assertThat(result.items()).hasSize(2);
|
||||
assertThat(result.items().get(0).id()).isEqualTo(id2); // newer first
|
||||
@@ -84,7 +85,8 @@ class DocumentServiceSortTest {
|
||||
.thenReturn(List.of(doc(id1)));
|
||||
|
||||
documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, PAGE);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, PAGE);
|
||||
|
||||
verify(documentRepository).findFtsPageRaw(anyString(), anyInt(), anyInt());
|
||||
verify(documentRepository, never()).findAllMatchingIdsByFts(anyString());
|
||||
@@ -102,7 +104,8 @@ class DocumentServiceSortTest {
|
||||
when(documentRepository.findAllById(any())).thenReturn(List.of(doc(id2), doc(id1))); // unordered from JPA
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, PAGE);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, PAGE);
|
||||
|
||||
assertThat(result.items().get(0).id()).isEqualTo(id1);
|
||||
}
|
||||
@@ -119,7 +122,8 @@ class DocumentServiceSortTest {
|
||||
when(documentRepository.findAllById(any())).thenReturn(List.of(doc(id2), doc(id1)));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, null, null, null, false, PAGE);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
null, null, PAGE);
|
||||
|
||||
assertThat(result.items().get(0).id()).isEqualTo(id1);
|
||||
}
|
||||
@@ -132,8 +136,8 @@ class DocumentServiceSortTest {
|
||||
Pageable hugePage = org.springframework.data.domain.PageRequest.of(Integer.MAX_VALUE / 10 + 1, 10);
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null,
|
||||
DocumentSort.RELEVANCE, null, null, false, hugePage);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, hugePage);
|
||||
|
||||
assertThat(result.items()).isEmpty();
|
||||
verify(documentRepository, never()).findFtsPageRaw(anyString(), anyInt(), anyInt());
|
||||
@@ -152,8 +156,8 @@ class DocumentServiceSortTest {
|
||||
when(documentRepository.findAllById(any())).thenReturn(List.of(doc(uuidId)));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null,
|
||||
DocumentSort.RELEVANCE, null, null, false, PAGE);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, PAGE);
|
||||
|
||||
assertThat(result.items()).hasSize(1);
|
||||
assertThat(result.items().get(0).id()).isEqualTo(uuidId);
|
||||
@@ -173,7 +177,8 @@ class DocumentServiceSortTest {
|
||||
// sender filter is active → triggers in-memory path, not findFtsPageRaw
|
||||
LocalDate from = LocalDate.of(1900, 1, 1);
|
||||
documentService.searchDocuments(
|
||||
"Brief", from, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, PAGE);
|
||||
new SearchFilters("Brief", from, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, PAGE);
|
||||
|
||||
verify(documentRepository, never()).findFtsPageRaw(anyString(), anyInt(), anyInt());
|
||||
verify(documentRepository).findAllMatchingIdsByFts("Brief");
|
||||
|
||||
@@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.Spy;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.audit.AuditKind;
|
||||
import org.raddatz.familienarchiv.audit.AuditLogQueryService;
|
||||
@@ -20,6 +21,7 @@ import org.raddatz.familienarchiv.document.MatchOffset;
|
||||
import org.raddatz.familienarchiv.document.SearchMatchData;
|
||||
import org.raddatz.familienarchiv.tag.TagOperator;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
@@ -45,6 +47,7 @@ import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.raddatz.familienarchiv.document.SearchFiltersFixtures.noFilters;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyInt;
|
||||
@@ -72,6 +75,9 @@ class DocumentServiceTest {
|
||||
@Mock AuditLogQueryService auditLogQueryService;
|
||||
@Mock TranscriptionBlockQueryService transcriptionBlockQueryService;
|
||||
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
// Real factory (pure, dependency-free) so save-time title-regeneration tests exercise the
|
||||
// shared composition rather than a stub — the #726 single source of truth.
|
||||
@Spy DocumentTitleFactory documentTitleFactory = new DocumentTitleFactory();
|
||||
@InjectMocks DocumentService documentService;
|
||||
|
||||
// ─── deleteDocument ───────────────────────────────────────────────────────
|
||||
@@ -118,6 +124,37 @@ class DocumentServiceTest {
|
||||
assertThat(documentService.getDocumentById(id)).isEqualTo(doc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentById_doesNotQueryTranscription() {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(id).title("Test").build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
|
||||
documentService.getDocumentById(id);
|
||||
|
||||
verifyNoInteractions(transcriptionBlockQueryService);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentDetail_setsHasTranscriptionTrue_whenBlocksExist() {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(id).title("Test").build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(transcriptionBlockQueryService.hasBlocks(id)).thenReturn(true);
|
||||
|
||||
assertThat(documentService.getDocumentDetail(id).isHasTranscription()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentDetail_setsHasTranscriptionFalse_whenNoBlocksExist() {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(id).title("Test").build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(transcriptionBlockQueryService.hasBlocks(id)).thenReturn(false);
|
||||
|
||||
assertThat(documentService.getDocumentDetail(id).isHasTranscription()).isFalse();
|
||||
}
|
||||
|
||||
// ─── updateDocument ───────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -171,10 +208,12 @@ class DocumentServiceTest {
|
||||
// Editing a doc (e.g. fixing a location typo) without touching the precision
|
||||
// controls must NOT fabricate a precision. The form omits the three precision
|
||||
// fields → they arrive null on the DTO → the stored values must be preserved.
|
||||
// Stored combo is RANGE + end: the only DB-valid way to have a non-null end
|
||||
// (chk_meta_date_end_only_for_range), so the carried-over state passes the guard.
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder()
|
||||
.id(id)
|
||||
.metaDatePrecision(DatePrecision.MONTH)
|
||||
.metaDatePrecision(DatePrecision.RANGE)
|
||||
.metaDateEnd(LocalDate.of(1916, 6, 30))
|
||||
.metaDateRaw("Juni 1916")
|
||||
.receivers(new HashSet<>())
|
||||
@@ -188,11 +227,329 @@ class DocumentServiceTest {
|
||||
|
||||
documentService.updateDocument(id, dto, null, null);
|
||||
|
||||
assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.MONTH);
|
||||
assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE);
|
||||
assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1916, 6, 30));
|
||||
assertThat(doc.getMetaDateRaw()).isEqualTo("Juni 1916");
|
||||
}
|
||||
|
||||
// ─── updateDocument save-time auto-title regeneration (#726) ──────────────
|
||||
//
|
||||
// Exact old-vs-new comparison: the title is the catalog auto-title iff the submitted
|
||||
// title equals what the factory builds from the CURRENTLY-persisted state. The edit form
|
||||
// round-trips the stored title verbatim when untouched, so an equal submission means the
|
||||
// user did not type over it. makeStored() seeds index/date/precision/location and sets the
|
||||
// stored title to the matching auto-title, mirroring a freshly-imported row.
|
||||
|
||||
private Document makeStored(String index, LocalDate date, DatePrecision precision, String location) {
|
||||
Document doc = Document.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.originalFilename(index)
|
||||
.documentDate(date)
|
||||
.metaDatePrecision(precision)
|
||||
.location(location)
|
||||
.receivers(new HashSet<>())
|
||||
.tags(new HashSet<>())
|
||||
.build();
|
||||
doc.setTitle(documentTitleFactory.build(doc));
|
||||
return doc;
|
||||
}
|
||||
|
||||
/** A DTO that round-trips the stored auto-title untouched, with new date/precision/location. */
|
||||
private static DocumentUpdateDTO editDto(String submittedTitle, LocalDate date,
|
||||
DatePrecision precision, String location) {
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle(submittedTitle);
|
||||
dto.setDocumentDate(date);
|
||||
dto.setMetaDatePrecision(precision);
|
||||
dto.setLocation(location);
|
||||
return dto;
|
||||
}
|
||||
|
||||
private Document runUpdate(Document stored, DocumentUpdateDTO dto) throws Exception {
|
||||
when(documentRepository.findById(stored.getId())).thenReturn(Optional.of(stored));
|
||||
when(documentRepository.save(any())).thenReturn(stored);
|
||||
documentService.updateDocument(stored.getId(), dto, null, null);
|
||||
return stored;
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_regeneratesAutoTitle_whenDateChanges() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
// title untouched ("C-0029 – 2028 – Berlin"), date corrected to 1928
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928 – Berlin");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_keepsHandWrittenTitle_whenDateChanges() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
stored.setTitle("C-0029 – Brief an Mutter"); // hand-written, ≠ auto-title
|
||||
DocumentUpdateDTO dto = editDto("C-0029 – Brief an Mutter", LocalDate.of(1930, 1, 1), DatePrecision.YEAR, null);
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – Brief an Mutter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_freshlyTypedTitleWins_overRegeneration() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
// user changed the date AND typed a new title in the same save
|
||||
DocumentUpdateDTO dto = editDto("Geburtsanzeige", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("Geburtsanzeige");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_regeneratesWithNewDateAndLocation() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "München");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928 – München");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_dropsTrailingLocationSegment_whenLocationCleared() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
// location cleared (null), title untouched
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_regeneratedTitle_doesNotContainOldDate() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).doesNotContain("2028");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_relabelsOnPrecisionChange_yearToDay() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
// stored auto-title "C-0029 – 1928"; set a full day at DAY precision
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 15), DatePrecision.DAY, null);
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 15. Januar 1928");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_populatesTitle_whenDateAddedToUnknownRow() throws Exception {
|
||||
Document stored = makeStored("C-0029", null, DatePrecision.UNKNOWN, null);
|
||||
// stored auto-title is just "C-0029"; add a 1928 YEAR date
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_roundTripsSeasonLabel() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null);
|
||||
stored.setMetaDateRaw("Frühling 1943");
|
||||
stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 – Frühling 1943"
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null);
|
||||
dto.setMetaDateRaw("Frühling 1943");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – Frühling 1943");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_carriesStoredPrecisionAndRaw_whenDtoOmitsThem() throws Exception {
|
||||
// Only the year changes; precision/end/raw are omitted from the DTO, so projectedState
|
||||
// must carry them from the entity (exercises the skip-null effective* resolvers).
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null);
|
||||
stored.setMetaDateRaw("Frühling 1943");
|
||||
stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 – Frühling 1943"
|
||||
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1944, 4, 1), null, null);
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – Frühling 1944");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_roundTripsRangeLabel_atSaveTime() throws Exception {
|
||||
Document stored = Document.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.originalFilename("C-0029")
|
||||
.documentDate(LocalDate.of(1917, 1, 10))
|
||||
.metaDatePrecision(DatePrecision.RANGE)
|
||||
.metaDateEnd(LocalDate.of(1917, 1, 11))
|
||||
.receivers(new HashSet<>())
|
||||
.tags(new HashSet<>())
|
||||
.build();
|
||||
stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 – 10.–11. Jan. 1917"
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle(stored.getTitle());
|
||||
dto.setDocumentDate(LocalDate.of(1918, 1, 10));
|
||||
dto.setMetaDatePrecision(DatePrecision.RANGE);
|
||||
dto.setMetaDateEnd(LocalDate.of(1918, 1, 11));
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 10.–11. Jan. 1918");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_doesNotRegenerateToBlank_whenSubmittedTitleEmpty() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
DocumentUpdateDTO dto = editDto("", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isNotBlank();
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_treatsFileReplacedDoc_asManual() throws Exception {
|
||||
// originalFilename was reassigned by an earlier file-replace, so the stored title (built
|
||||
// at import from the old index) no longer matches build(currentState) → treated as manual.
|
||||
Document stored = makeStored("scan_2024.pdf", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
stored.setTitle("C-0029 – 1928 – Berlin"); // legacy import title, ≠ build("scan_2024.pdf"…)
|
||||
DocumentUpdateDTO dto = editDto("C-0029 – 1928 – Berlin", LocalDate.of(1930, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928 – Berlin");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_idempotent_whenNothingChanges() throws Exception {
|
||||
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
String before = stored.getTitle();
|
||||
DocumentUpdateDTO dto = editDto(before, LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
runUpdate(stored, dto);
|
||||
|
||||
assertThat(stored.getTitle()).isEqualTo(before);
|
||||
}
|
||||
|
||||
// ─── updateDocument date-range validation (#678) ──────────────────────────
|
||||
|
||||
/** Builds a stored doc ready for an updateDocument call (collections initialised). */
|
||||
private static Document docForRangeUpdate(UUID id) {
|
||||
return Document.builder().id(id).receivers(new HashSet<>()).tags(new HashSet<>()).build();
|
||||
}
|
||||
|
||||
private static DocumentUpdateDTO rangeDto(LocalDate start, LocalDate end) {
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setDocumentDate(start);
|
||||
dto.setMetaDatePrecision(DatePrecision.RANGE);
|
||||
dto.setMetaDateEnd(end);
|
||||
return dto;
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_rejectsRange_whenEndBeforeStart() {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = docForRangeUpdate(id);
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
|
||||
DocumentUpdateDTO dto = rangeDto(LocalDate.of(1917, 1, 11), LocalDate.of(1917, 1, 10));
|
||||
|
||||
assertThatThrownBy(() -> documentService.updateDocument(id, dto, null, null))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.INVALID_DATE_RANGE);
|
||||
verify(documentRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_acceptsRange_whenEndEqualsStart() throws Exception {
|
||||
// AC2: the DB CHECK is end >= start, so equal dates are valid.
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = docForRangeUpdate(id);
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(documentRepository.save(any())).thenReturn(doc);
|
||||
|
||||
LocalDate same = LocalDate.of(1917, 1, 10);
|
||||
documentService.updateDocument(id, rangeDto(same, same), null, null);
|
||||
|
||||
assertThat(doc.getMetaDateEnd()).isEqualTo(same);
|
||||
verify(documentRepository, atLeastOnce()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_acceptsRange_whenEndAfterStart() throws Exception {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = docForRangeUpdate(id);
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(documentRepository.save(any())).thenReturn(doc);
|
||||
|
||||
documentService.updateDocument(id,
|
||||
rangeDto(LocalDate.of(1917, 1, 10), LocalDate.of(1917, 1, 11)), null, null);
|
||||
|
||||
assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1917, 1, 11));
|
||||
verify(documentRepository, atLeastOnce()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_acceptsRange_whenEndIsNull_openEnded() throws Exception {
|
||||
// AC3: an open-ended range (no end) is valid.
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = docForRangeUpdate(id);
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(documentRepository.save(any())).thenReturn(doc);
|
||||
|
||||
documentService.updateDocument(id,
|
||||
rangeDto(LocalDate.of(1917, 1, 10), null), null, null);
|
||||
|
||||
verify(documentRepository, atLeastOnce()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_acceptsRange_whenStartNullAndEndSet() throws Exception {
|
||||
// AC4: mirrors the DB "meta_date IS NULL" escape — must NOT reject (and must not NPE).
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = docForRangeUpdate(id);
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(documentRepository.save(any())).thenReturn(doc);
|
||||
|
||||
documentService.updateDocument(id,
|
||||
rangeDto(null, LocalDate.of(1917, 1, 11)), null, null);
|
||||
|
||||
assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1917, 1, 11));
|
||||
verify(documentRepository, atLeastOnce()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_rejectsEndDate_whenPrecisionNotRange() {
|
||||
// AC6: an end date only makes sense for RANGE (mirrors chk_meta_date_end_only_for_range).
|
||||
// API-only — the edit form clears the end field off-RANGE — so close the 500 class here too.
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = docForRangeUpdate(id);
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setDocumentDate(LocalDate.of(1917, 1, 10));
|
||||
dto.setMetaDatePrecision(DatePrecision.MONTH);
|
||||
dto.setMetaDateEnd(LocalDate.of(1917, 1, 31));
|
||||
|
||||
assertThatThrownBy(() -> documentService.updateDocument(id, dto, null, null))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.INVALID_DATE_RANGE);
|
||||
verify(documentRepository, never()).save(any());
|
||||
}
|
||||
|
||||
// ─── deleteTagCascading ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -338,6 +695,59 @@ class DocumentServiceTest {
|
||||
verify(documentVersionService).recordVersion(any(Document.class));
|
||||
}
|
||||
|
||||
// ─── backfillTitles — one-time stale-title cleanup (#726, FR-003) ─────────
|
||||
|
||||
@Test
|
||||
void backfillTitles_rewritesStaleAutoTitle_andCountsIt() {
|
||||
Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
stale.setTitle("C-0029 – 2028 – Berlin"); // stale stored title (date typo never fixed)
|
||||
when(documentRepository.findAll()).thenReturn(List.of(stale));
|
||||
when(documentRepository.save(any())).thenReturn(stale);
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isEqualTo(1);
|
||||
assertThat(stale.getTitle()).isEqualTo("C-0029 – 1928 – Berlin");
|
||||
verify(documentRepository).save(stale);
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillTitles_skipsProse() {
|
||||
Document prose = makeStored("C-0030", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
prose.setTitle("C-0030 – Brief an Mutter");
|
||||
when(documentRepository.findAll()).thenReturn(List.of(prose));
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isZero();
|
||||
assertThat(prose.getTitle()).isEqualTo("C-0030 – Brief an Mutter");
|
||||
verify(documentRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillTitles_isIdempotent_forAlreadyCorrectTitle() {
|
||||
Document fresh = makeStored("C-0031", LocalDate.of(1940, 1, 1), DatePrecision.YEAR, null);
|
||||
// title already equals build(current state) → nothing to do
|
||||
when(documentRepository.findAll()).thenReturn(List.of(fresh));
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isZero();
|
||||
verify(documentRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillTitles_neverRecordsVersions() {
|
||||
Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
stale.setTitle("C-0029 – 2028 – Berlin");
|
||||
when(documentRepository.findAll()).thenReturn(List.of(stale));
|
||||
when(documentRepository.save(any())).thenReturn(stale);
|
||||
|
||||
documentService.backfillTitles();
|
||||
|
||||
verify(documentVersionService, never()).recordVersion(any());
|
||||
}
|
||||
|
||||
// ─── thumbnail dispatch ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -985,53 +1395,6 @@ class DocumentServiceTest {
|
||||
.isEqualTo("19650332_Mueller_Hans");
|
||||
}
|
||||
|
||||
// ─── getConversationFiltered ───────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getConversationFiltered_passesGivenDates_whenFromAndToAreProvided() {
|
||||
UUID senderId = UUID.randomUUID();
|
||||
UUID receiverId = UUID.randomUUID();
|
||||
LocalDate from = LocalDate.of(1940, 1, 1);
|
||||
LocalDate to = LocalDate.of(1960, 12, 31);
|
||||
Sort sort = Sort.by(Sort.Direction.ASC, "documentDate");
|
||||
when(documentRepository.findConversation(senderId, receiverId, from, to, sort))
|
||||
.thenReturn(List.of());
|
||||
|
||||
documentService.getConversationFiltered(senderId, receiverId, from, to, sort);
|
||||
|
||||
verify(documentRepository).findConversation(senderId, receiverId, from, to, sort);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getConversationFiltered_usesMinDateForFrom_whenFromIsNull() {
|
||||
UUID senderId = UUID.randomUUID();
|
||||
UUID receiverId = UUID.randomUUID();
|
||||
Sort sort = Sort.by(Sort.Direction.ASC, "documentDate");
|
||||
when(documentRepository.findConversation(eq(senderId), eq(receiverId), any(LocalDate.class), any(LocalDate.class), eq(sort)))
|
||||
.thenReturn(List.of());
|
||||
|
||||
documentService.getConversationFiltered(senderId, receiverId, null, null, sort);
|
||||
|
||||
ArgumentCaptor<LocalDate> fromCaptor = ArgumentCaptor.forClass(LocalDate.class);
|
||||
verify(documentRepository).findConversation(eq(senderId), eq(receiverId), fromCaptor.capture(), any(LocalDate.class), eq(sort));
|
||||
assertThat(fromCaptor.getValue()).isEqualTo(LocalDate.parse("0000-01-01"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void getConversationFiltered_usesTodayForTo_whenToIsNull() {
|
||||
UUID senderId = UUID.randomUUID();
|
||||
UUID receiverId = UUID.randomUUID();
|
||||
Sort sort = Sort.by(Sort.Direction.ASC, "documentDate");
|
||||
when(documentRepository.findConversation(eq(senderId), eq(receiverId), any(LocalDate.class), any(LocalDate.class), eq(sort)))
|
||||
.thenReturn(List.of());
|
||||
|
||||
documentService.getConversationFiltered(senderId, receiverId, null, null, sort);
|
||||
|
||||
ArgumentCaptor<LocalDate> toCaptor = ArgumentCaptor.forClass(LocalDate.class);
|
||||
verify(documentRepository).findConversation(eq(senderId), eq(receiverId), any(LocalDate.class), toCaptor.capture(), eq(sort));
|
||||
assertThat(toCaptor.getValue()).isEqualTo(LocalDate.now());
|
||||
}
|
||||
|
||||
// ─── updateDocumentTags — empty tag in list ───────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -1410,8 +1773,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(1, 50));
|
||||
documentService.searchDocuments(
|
||||
noFilters(),
|
||||
org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", org.springframework.data.domain.PageRequest.of(1, 50));
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
|
||||
verify(documentRepository, never()).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class));
|
||||
@@ -1423,8 +1787,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(3, 25));
|
||||
documentService.searchDocuments(
|
||||
noFilters(),
|
||||
org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", org.springframework.data.domain.PageRequest.of(3, 25));
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
|
||||
assertThat(captor.getValue().getPageNumber()).isEqualTo(3);
|
||||
@@ -1439,8 +1804,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of(d), org.springframework.data.domain.PageRequest.of(0, 50), 120L));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(0, 50));
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
noFilters(),
|
||||
org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", org.springframework.data.domain.PageRequest.of(0, 50));
|
||||
|
||||
assertThat(result.totalElements()).isEqualTo(120L);
|
||||
assertThat(result.pageNumber()).isZero();
|
||||
@@ -1455,8 +1821,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(0, 5));
|
||||
documentService.searchDocuments(
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "DESC", org.springframework.data.domain.PageRequest.of(0, 5));
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
|
||||
Sort.Order dateOrder = captor.getValue().getSort().getOrderFor("documentDate");
|
||||
@@ -1478,8 +1845,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "ASC", null, false, org.springframework.data.domain.PageRequest.of(0, 5));
|
||||
documentService.searchDocuments(
|
||||
noFilters(),
|
||||
DocumentSort.DATE, "ASC", org.springframework.data.domain.PageRequest.of(0, 5));
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
|
||||
Sort.Order dateOrder = captor.getValue().getSort().getOrderFor("documentDate");
|
||||
@@ -1499,8 +1867,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
DocumentSort.UPDATED_AT, "DESC", null, false, org.springframework.data.domain.PageRequest.of(0, 5));
|
||||
documentService.searchDocuments(
|
||||
noFilters(),
|
||||
DocumentSort.UPDATED_AT, "DESC", org.springframework.data.domain.PageRequest.of(0, 5));
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
|
||||
assertThat(captor.getValue().getSort())
|
||||
@@ -1523,8 +1892,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
|
||||
.thenReturn(all);
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", null, false, org.springframework.data.domain.PageRequest.of(1, 50));
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
noFilters(),
|
||||
org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", org.springframework.data.domain.PageRequest.of(1, 50));
|
||||
|
||||
assertThat(result.totalElements()).isEqualTo(120L);
|
||||
assertThat(result.pageNumber()).isEqualTo(1);
|
||||
@@ -1547,8 +1917,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
|
||||
.thenReturn(all);
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(null, null, null, null, null, null, null, null,
|
||||
org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", null, false, org.springframework.data.domain.PageRequest.of(10, 50));
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
noFilters(),
|
||||
org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", org.springframework.data.domain.PageRequest.of(10, 50));
|
||||
|
||||
assertThat(result.items()).isEmpty();
|
||||
assertThat(result.totalElements()).isEqualTo(30L);
|
||||
@@ -1561,7 +1932,8 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null, null, false, UNPAGED);
|
||||
documentService.searchDocuments(
|
||||
new SearchFilters(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, false), null, null, UNPAGED);
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
|
||||
}
|
||||
@@ -1571,7 +1943,8 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null, null, false, UNPAGED);
|
||||
documentService.searchDocuments(
|
||||
noFilters(), null, null, UNPAGED);
|
||||
|
||||
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
|
||||
}
|
||||
@@ -1607,35 +1980,6 @@ class DocumentServiceTest {
|
||||
.isEqualTo(Sort.by(Sort.Direction.DESC, "updatedAt"));
|
||||
}
|
||||
|
||||
// ─── getConversationFiltered (single-person mode) ─────────────────────────
|
||||
|
||||
@Test
|
||||
void getConversationFiltered_callsSinglePersonQuery_whenReceiverIdIsNull() {
|
||||
UUID personId = UUID.randomUUID();
|
||||
Sort sort = Sort.by(Sort.Direction.DESC, "documentDate");
|
||||
when(documentRepository.findSinglePersonCorrespondence(eq(personId), any(), any(), eq(sort)))
|
||||
.thenReturn(List.of());
|
||||
|
||||
documentService.getConversationFiltered(personId, null, null, null, sort);
|
||||
|
||||
verify(documentRepository).findSinglePersonCorrespondence(eq(personId), any(), any(), eq(sort));
|
||||
verify(documentRepository, never()).findConversation(any(), any(), any(), any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void getConversationFiltered_callsBilateralQuery_whenReceiverIdIsSet() {
|
||||
UUID senderId = UUID.randomUUID();
|
||||
UUID receiverId = UUID.randomUUID();
|
||||
Sort sort = Sort.by(Sort.Direction.DESC, "documentDate");
|
||||
when(documentRepository.findConversation(eq(senderId), eq(receiverId), any(), any(), eq(sort)))
|
||||
.thenReturn(List.of());
|
||||
|
||||
documentService.getConversationFiltered(senderId, receiverId, null, null, sort);
|
||||
|
||||
verify(documentRepository).findConversation(eq(senderId), eq(receiverId), any(), any(), eq(sort));
|
||||
verify(documentRepository, never()).findSinglePersonCorrespondence(any(), any(), any(), any());
|
||||
}
|
||||
|
||||
// ─── searchDocuments — SENDER sort includes documents with null sender ─────
|
||||
|
||||
@Test
|
||||
@@ -1649,7 +1993,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(withSender, noSender));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, false, UNPAGED);
|
||||
noFilters(),
|
||||
DocumentSort.SENDER, "asc", UNPAGED);
|
||||
|
||||
assertThat(result.items()).hasSize(2);
|
||||
assertThat(result.items()).extracting(DocumentListItem::title).containsExactly("Has Sender", "No Sender");
|
||||
@@ -1669,7 +2014,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(noReceivers, withReceiver));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null, false, UNPAGED);
|
||||
noFilters(),
|
||||
DocumentSort.RECEIVER, "asc", UNPAGED);
|
||||
|
||||
assertThat(result.items()).extracting(DocumentListItem::title)
|
||||
.containsExactly("Has Receiver", "No Receivers");
|
||||
@@ -1702,7 +2048,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(undatedBob, datedAnna, datedBob, undatedAnna));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, false, UNPAGED);
|
||||
noFilters(),
|
||||
DocumentSort.SENDER, "asc", UNPAGED);
|
||||
|
||||
// Bob's group precedes Anna's group (ASC by sender). The sort is stable, so
|
||||
// within each group the input order is preserved (undatedBob, datedBob for Bob;
|
||||
@@ -1733,7 +2080,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(undatedBob, datedAnna, datedBob, undatedAnna));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "desc", null, false, UNPAGED);
|
||||
noFilters(),
|
||||
DocumentSort.SENDER, "desc", UNPAGED);
|
||||
|
||||
// Anna's group precedes Bob's (DESC by sender); undated stays inside its group.
|
||||
assertThat(result.items()).extracting(DocumentListItem::title)
|
||||
@@ -1756,7 +2104,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(undatedFromAlice));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, true, UNPAGED);
|
||||
noFilters().withUndated(true),
|
||||
DocumentSort.SENDER, "asc", UNPAGED);
|
||||
|
||||
// The in-memory path queried via a Specification (built by buildSearchSpec with
|
||||
// undatedOnly(true)) rather than skipping straight to a sorted findAll.
|
||||
@@ -1772,8 +2121,9 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
|
||||
.thenReturn(List.of());
|
||||
|
||||
documentService.searchDocuments("brief", null, null, null, null, null, null, null,
|
||||
DocumentSort.RELEVANCE, null, null, true, UNPAGED);
|
||||
documentService.searchDocuments(
|
||||
new SearchFilters("brief", null, null, null, null, null, null, null, null, true),
|
||||
DocumentSort.RELEVANCE, null, UNPAGED);
|
||||
|
||||
// The FTS-id path (buildSearchSpec) ran; the raw-page SQL shortcut did not.
|
||||
verify(documentRepository).findAllMatchingIdsByFts("brief");
|
||||
@@ -1796,7 +2146,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(docNullName, docSmith));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, false, UNPAGED);
|
||||
noFilters(),
|
||||
DocumentSort.SENDER, "asc", UNPAGED);
|
||||
|
||||
// null lastName should sort to end (treated as empty), not before "smith" (as "null")
|
||||
assertThat(result.items()).extracting(DocumentListItem::title)
|
||||
@@ -1819,7 +2170,8 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, UNPAGED);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, UNPAGED);
|
||||
|
||||
assertThat(result.items()).hasSize(1);
|
||||
SearchMatchData md = result.items().get(0).matchData();
|
||||
@@ -1833,7 +2185,8 @@ class DocumentServiceTest {
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, null, null, null, false, UNPAGED);
|
||||
noFilters(),
|
||||
null, null, UNPAGED);
|
||||
|
||||
assertThat(result.items()).isEmpty();
|
||||
}
|
||||
@@ -1853,7 +2206,8 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, UNPAGED);
|
||||
new SearchFilters("Brief", null, null, null, null, null, null, null, null, false),
|
||||
DocumentSort.RELEVANCE, null, UNPAGED);
|
||||
|
||||
SearchMatchData md = result.items().get(0).matchData();
|
||||
assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin");
|
||||
@@ -2370,7 +2724,7 @@ class DocumentServiceTest {
|
||||
.thenReturn(List.of(d1, d2));
|
||||
|
||||
List<UUID> result = documentService.findIdsForFilter(
|
||||
null, null, null, null, null, null, null, null, null, false);
|
||||
noFilters());
|
||||
|
||||
assertThat(result).containsExactly(d1.getId(), d2.getId());
|
||||
}
|
||||
@@ -2385,7 +2739,7 @@ class DocumentServiceTest {
|
||||
when(tagService.expandTagNamesToDescendantIdSets(any())).thenReturn(List.of());
|
||||
|
||||
documentService.findIdsForFilter(
|
||||
null, null, null, null, null, List.of("Brief"), null, null, TagOperator.OR, false);
|
||||
new SearchFilters(null, null, null, null, null, List.of("Brief"), null, null, TagOperator.OR, false));
|
||||
|
||||
// Spec built without throwing → OR branch was exercised. Coverage gain
|
||||
// is in not-throwing on the OR-specific code path; the actual SQL is
|
||||
@@ -2398,7 +2752,7 @@ class DocumentServiceTest {
|
||||
when(documentRepository.findAllMatchingIdsByFts("xyz")).thenReturn(List.of());
|
||||
|
||||
List<UUID> result = documentService.findIdsForFilter(
|
||||
"xyz", null, null, null, null, null, null, null, null, false);
|
||||
new SearchFilters("xyz", null, null, null, null, null, null, null, null, false));
|
||||
|
||||
assertThat(result).isEmpty();
|
||||
verify(documentRepository, never()).findAll(any(org.springframework.data.jpa.domain.Specification.class));
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* End-to-end backfill against a real Postgres (#726, FR-003). H2 is unusable here — the
|
||||
* {@code title} column is NOT NULL and the title-sync semantics depend on that — so this pins the
|
||||
* behaviour on {@code postgres:16-alpine}: a stale auto-title is rewritten, the sweep is
|
||||
* idempotent, prose is left alone, and the mechanical rename writes no {@code document_versions}
|
||||
* rows. Permission enforcement (401/403) is covered faster by the {@code @WebMvcTest} slice in
|
||||
* {@code AdminControllerTest}.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@Transactional
|
||||
class DocumentTitleBackfillIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
@Autowired DocumentService documentService;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
@Autowired DocumentVersionRepository documentVersionRepository;
|
||||
|
||||
private Document persist(String index, String title, LocalDate date, DatePrecision precision, String location) {
|
||||
return documentRepository.save(Document.builder()
|
||||
.originalFilename(index)
|
||||
.title(title)
|
||||
.documentDate(date)
|
||||
.metaDatePrecision(precision)
|
||||
.location(location)
|
||||
.status(DocumentStatus.PLACEHOLDER)
|
||||
.build());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfill_rewritesStaleAutoTitle() {
|
||||
Document stale = persist("C-0029", "C-0029 – 2028 – Berlin",
|
||||
LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isEqualTo(1); // exactly the one stale row seeded (clean test DB)
|
||||
assertThat(documentRepository.findById(stale.getId()).orElseThrow().getTitle())
|
||||
.isEqualTo("C-0029 – 1928 – Berlin");
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfill_isIdempotent_secondRunChangesNothing() {
|
||||
persist("C-0029", "C-0029 – 2028 – Berlin", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
|
||||
documentService.backfillTitles();
|
||||
int secondRun = documentService.backfillTitles();
|
||||
|
||||
assertThat(secondRun).isZero();
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfill_skipsProse() {
|
||||
Document prose = persist("C-0030", "C-0030 – Brief an Mutter",
|
||||
LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
|
||||
documentService.backfillTitles();
|
||||
|
||||
assertThat(documentRepository.findById(prose.getId()).orElseThrow().getTitle())
|
||||
.isEqualTo("C-0030 – Brief an Mutter");
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfill_addsNoDocumentVersionRows() {
|
||||
persist("C-0029", "C-0029 – 2028 – Berlin", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
long versionsBefore = documentVersionRepository.count();
|
||||
|
||||
documentService.backfillTitles();
|
||||
|
||||
assertThat(documentVersionRepository.count()).isEqualTo(versionsBefore);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.Timeout;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* The backfill overwrite heuristic (FR-004) in isolation — every emittable date-label form is
|
||||
* recognised, prose is left alone, and a regex-metacharacter index is matched literally without
|
||||
* hanging. The exact label spellings mirror {@code docs/date-label-fixtures.json}.
|
||||
*/
|
||||
class DocumentTitleBackfillMatcherTest {
|
||||
|
||||
private static boolean overwritable(String title, String location) {
|
||||
return DocumentTitleBackfillMatcher.isOverwritable(title, "C-0029", location);
|
||||
}
|
||||
|
||||
// ─── each date-label form (index + form) is overwritable ──────────────────
|
||||
|
||||
@Test
|
||||
void year_form() {
|
||||
assertThat(overwritable("C-0029 – 1916", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void approx_form() {
|
||||
assertThat(overwritable("C-0029 – ca. 1920", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void month_form() {
|
||||
assertThat(overwritable("C-0029 – Juni 1916", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void day_form() {
|
||||
assertThat(overwritable("C-0029 – 24. Dezember 1943", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void season_form() {
|
||||
assertThat(overwritable("C-0029 – Sommer 1916", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void unknown_label_form() {
|
||||
assertThat(overwritable("C-0029 – Datum unbekannt", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_same_month_form() {
|
||||
assertThat(overwritable("C-0029 – 10.–11. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_cross_month_form() {
|
||||
assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_cross_year_form() {
|
||||
assertThat(overwritable("C-0029 – 30. Dez. 1916 – 2. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_single_day_form() {
|
||||
assertThat(overwritable("C-0029 – 10. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_open_form() {
|
||||
assertThat(overwritable("C-0029 – ab 10. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
// ─── date label + trailing location (any location) ────────────────────────
|
||||
|
||||
@Test
|
||||
void date_form_with_trailing_location() {
|
||||
assertThat(overwritable("C-0029 – 1916 – Berlin", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_with_internal_separator_plus_trailing_location() {
|
||||
// The range label itself contains " – "; the trailing " – Berlin" must still be peeled.
|
||||
assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917 – Berlin", null)).isTrue();
|
||||
}
|
||||
|
||||
// ─── index-only and index+location cases ──────────────────────────────────
|
||||
|
||||
@Test
|
||||
void exactly_index() {
|
||||
assertThat(overwritable("C-0029", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void index_plus_location_equal_to_current() {
|
||||
assertThat(overwritable("C-0029 – Berlin", "Berlin")).isTrue();
|
||||
}
|
||||
|
||||
// ─── prose is left untouched ──────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void prose_segment_not_matching_location_is_skipped() {
|
||||
assertThat(overwritable("C-0029 – Brief an Mutter", "Berlin")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void location_only_segment_is_skipped_when_no_current_location() {
|
||||
// No date label, and the doc has no location to compare against → cannot prove machine.
|
||||
assertThat(overwritable("C-0029 – Berlin", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void title_not_starting_with_index_is_skipped() {
|
||||
assertThat(overwritable("Ganz anderer Titel", null)).isFalse();
|
||||
}
|
||||
|
||||
// ─── near-miss: shapes that look almost machine-built but are not ──────────
|
||||
|
||||
@Test
|
||||
void ascii_hyphen_instead_of_en_dash_separator_is_skipped() {
|
||||
// The separator is " – " (en dash); a plain " - " is not the machine separator.
|
||||
assertThat(overwritable("C-0029 - 1916", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void date_label_without_separator_before_trailing_text_is_skipped() {
|
||||
// "1916 Berlin" is not a date label and is not joined by " – "; prose, not machine.
|
||||
assertThat(overwritable("C-0029 – 1916 Berlin", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void year_with_trailing_letters_is_not_a_year_label() {
|
||||
assertThat(overwritable("C-0029 – 1916er Brief", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void index_immediately_followed_by_text_without_separator_is_skipped() {
|
||||
assertThat(overwritable("C-0029x – 1916", null)).isFalse();
|
||||
}
|
||||
|
||||
// ─── fail-closed guards ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void null_title_is_not_overwritable() {
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(null, "C-0029", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void null_index_is_not_overwritable() {
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable("C-0029 – 1916", null, null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void blank_index_is_not_overwritable() {
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(" – 1916", " ", null)).isFalse();
|
||||
}
|
||||
|
||||
// ─── ReDoS / regex-metacharacter index is matched literally and terminates ─
|
||||
|
||||
@Test
|
||||
@Timeout(value = 5, unit = TimeUnit.SECONDS)
|
||||
void index_with_regex_metacharacters_is_matched_literally_and_terminates() {
|
||||
String hostileIndex = "C-0029(.*).pdf";
|
||||
// Literal prefix → matches; trailing date label → overwritable. Must not hang.
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(
|
||||
hostileIndex + " – 1916", hostileIndex, null)).isTrue();
|
||||
// A title that does NOT start with the literal hostile index is skipped, also fast.
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(
|
||||
"C-0029 – 1916", hostileIndex, null)).isFalse();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.time.LocalDate;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* The auto-title composition {@code {index} – {dateLabel} – {location}} in isolation.
|
||||
* The honest date-label forms themselves are pinned by {@link DocumentTitleFormatterTest}
|
||||
* against the shared #666 fixture; here we assert only how the factory composes the
|
||||
* three segments and which segments it omits.
|
||||
*/
|
||||
class DocumentTitleFactoryTest {
|
||||
|
||||
private final DocumentTitleFactory factory = new DocumentTitleFactory();
|
||||
|
||||
private static Document.DocumentBuilder doc(String index) {
|
||||
return Document.builder()
|
||||
.originalFilename(index)
|
||||
.metaDatePrecision(DatePrecision.UNKNOWN);
|
||||
}
|
||||
|
||||
@Test
|
||||
void index_only_when_no_date_and_no_location() {
|
||||
assertThat(factory.build(doc("C-0029").build())).isEqualTo("C-0029");
|
||||
}
|
||||
|
||||
@Test
|
||||
void index_and_year_date() {
|
||||
Document d = doc("C-0029")
|
||||
.documentDate(LocalDate.of(1928, 1, 15))
|
||||
.metaDatePrecision(DatePrecision.YEAR)
|
||||
.build();
|
||||
assertThat(factory.build(d)).isEqualTo("C-0029 – 1928");
|
||||
}
|
||||
|
||||
@Test
|
||||
void index_date_and_location() {
|
||||
Document d = doc("C-0029")
|
||||
.documentDate(LocalDate.of(1928, 1, 15))
|
||||
.metaDatePrecision(DatePrecision.YEAR)
|
||||
.location("Berlin")
|
||||
.build();
|
||||
assertThat(factory.build(d)).isEqualTo("C-0029 – 1928 – Berlin");
|
||||
}
|
||||
|
||||
@Test
|
||||
void location_without_date_attaches_directly_to_index() {
|
||||
Document d = doc("C-0029").location("Berlin").build();
|
||||
assertThat(factory.build(d)).isEqualTo("C-0029 – Berlin");
|
||||
}
|
||||
|
||||
@Test
|
||||
void unknown_precision_omits_the_date_segment() {
|
||||
Document d = doc("C-0029")
|
||||
.documentDate(LocalDate.of(1928, 1, 15))
|
||||
.metaDatePrecision(DatePrecision.UNKNOWN)
|
||||
.build();
|
||||
assertThat(factory.build(d)).isEqualTo("C-0029");
|
||||
}
|
||||
|
||||
@Test
|
||||
void blank_location_is_omitted() {
|
||||
Document d = doc("C-0029")
|
||||
.documentDate(LocalDate.of(1928, 1, 15))
|
||||
.metaDatePrecision(DatePrecision.YEAR)
|
||||
.location(" ")
|
||||
.build();
|
||||
assertThat(factory.build(d)).isEqualTo("C-0029 – 1928");
|
||||
}
|
||||
|
||||
@Test
|
||||
void bare_document_with_null_index_builds_empty_string_not_npe() {
|
||||
// originalFilename is NOT NULL in production; the guard keeps a synthetic/partial entity
|
||||
// from tripping StringBuilder(null) with an opaque NPE.
|
||||
assertThat(factory.build(Document.builder().build())).isEqualTo("");
|
||||
}
|
||||
|
||||
@Test
|
||||
void day_precision_renders_the_full_german_label() {
|
||||
Document d = doc("C-0029")
|
||||
.documentDate(LocalDate.of(1928, 1, 15))
|
||||
.metaDatePrecision(DatePrecision.DAY)
|
||||
.build();
|
||||
assertThat(factory.build(d)).isEqualTo("C-0029 – 15. Januar 1928");
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,9 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.junit.jupiter.api.DynamicTest;
|
||||
import org.junit.jupiter.api.TestFactory;
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
@@ -0,0 +1,17 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
/** Test fixtures for {@link SearchFilters}. */
|
||||
final class SearchFiltersFixtures {
|
||||
|
||||
private SearchFiltersFixtures() {}
|
||||
|
||||
/**
|
||||
* A {@link SearchFilters} with no predicate active — the common search-test
|
||||
* baseline. Combine with {@code .withUndated(true)} for the undated-only case;
|
||||
* construct {@code new SearchFilters(...)} directly when a test pins a specific
|
||||
* field, so the intent stays visible at the call site.
|
||||
*/
|
||||
static SearchFilters noFilters() {
|
||||
return new SearchFilters(null, null, null, null, null, null, null, null, null, false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagRepository;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
|
||||
/**
|
||||
* #730 — tag-name resolution against a real Postgres. A mocked repo can't prove the two things that
|
||||
* actually break: that {@code findAllByNameIgnoreCase} folds case the way Postgres {@code LOWER()}
|
||||
* does (critical for umlauts like {@code ü}), and that saving a document tagged with a case-colliding
|
||||
* tag no longer throws {@code NonUniqueResultException}. H2 folds case differently, so this pins the
|
||||
* behaviour on {@code postgres:16-alpine}. The four-branch resolution logic itself is covered faster
|
||||
* by the mocked {@code TagServiceTest}.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@Transactional
|
||||
class TagCaseCollisionIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
@Autowired DocumentService documentService;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
@Autowired TagRepository tagRepository;
|
||||
@Autowired TagService tagService;
|
||||
|
||||
private Tag persistTag(String name, String sourceRef, UUID parentId) {
|
||||
return tagRepository.save(Tag.builder().name(name).sourceRef(sourceRef).parentId(parentId).build());
|
||||
}
|
||||
|
||||
private Document persistDocTaggedWith(Tag tag) {
|
||||
return documentRepository.save(Document.builder()
|
||||
.originalFilename("C-7301")
|
||||
.title("Weihnachtsbrief")
|
||||
.documentDate(LocalDate.of(1928, 1, 1))
|
||||
.metaDatePrecision(DatePrecision.YEAR)
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.tags(new HashSet<>(Set.of(tag)))
|
||||
.build());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_succeedsAndKeepsExactChildTag_whenTaggedWithCaseCollidingChild() throws Exception {
|
||||
Tag parent = persistTag("Weihnachten", "Weihnachten", null);
|
||||
Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
|
||||
Document doc = persistDocTaggedWith(child);
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle("Weihnachtsbrief");
|
||||
dto.setDocumentDate(LocalDate.of(1930, 1, 1)); // change the date — the field that 500'd on staging
|
||||
dto.setMetaDatePrecision(DatePrecision.YEAR);
|
||||
dto.setTags("weihnachten"); // the edit form round-trips the stored child name
|
||||
|
||||
assertThatCode(() -> documentService.updateDocument(doc.getId(), dto, null, null))
|
||||
.doesNotThrowAnyException();
|
||||
|
||||
Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
|
||||
assertThat(tags).hasSize(1);
|
||||
assertThat(tags.iterator().next().getId()).isEqualTo(child.getId()); // child kept, not the parent
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_resolvesUmlautCollisionDeterministically_withoutThrow() {
|
||||
// The regression catcher: a plain-ASCII pair would stay green even if Postgres folded ü wrongly.
|
||||
Tag parent = persistTag("Glückwünsche", "Glückwünsche", null);
|
||||
Tag child = persistTag("glückwünsche", "Glückwünsche/glückwünsche", parent.getId());
|
||||
|
||||
// Proof that real Postgres LOWER() folds the umlaut so both rows match case-insensitively.
|
||||
// Query with the UPPERCASE form findOrCreate actually passes — folding LOWER('GLÜCKWÜNSCHE')
|
||||
// against LOWER(name) is the exact step under test; a lowercase probe wouldn't exercise it.
|
||||
assertThat(tagRepository.findAllByNameIgnoreCase("GLÜCKWÜNSCHE")).hasSize(2);
|
||||
|
||||
// No exact-case "GLÜCKWÜNSCHE" row exists → resolution falls through to the case-insensitive
|
||||
// branch with two candidates and must pick the lowest id deterministically, never throwing.
|
||||
UUID expected = List.of(parent, child).stream().min(Comparator.comparing(Tag::getId)).orElseThrow().getId();
|
||||
Tag first = tagService.findOrCreate("GLÜCKWÜNSCHE");
|
||||
Tag second = tagService.findOrCreate("GLÜCKWÜNSCHE");
|
||||
|
||||
assertThat(first.getId()).isEqualTo(expected);
|
||||
assertThat(second.getId()).isEqualTo(first.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void bulkEdit_resolvesCaseCollidingTagThroughFindOrCreate_withoutThrow() {
|
||||
// Bulk-edit shares resolveTags → findOrCreate; this guards a future refactor that bypasses it.
|
||||
Tag parent = persistTag("Weihnachten", "Weihnachten", null);
|
||||
Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.originalFilename("C-7302")
|
||||
.title("Brief")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.build());
|
||||
|
||||
DocumentBulkEditDTO dto = new DocumentBulkEditDTO();
|
||||
dto.setTagNames(List.of("weihnachten"));
|
||||
|
||||
assertThatCode(() -> documentService.applyBulkEditToDocument(doc.getId(), dto, null))
|
||||
.doesNotThrowAnyException();
|
||||
|
||||
Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
|
||||
assertThat(tags).hasSize(1);
|
||||
assertThat(tags.iterator().next().getId()).isEqualTo(child.getId());
|
||||
}
|
||||
}
|
||||
@@ -83,6 +83,15 @@ class AnnotationControllerTest {
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void createAnnotation_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(post("/api/documents/" + UUID.randomUUID() + "/annotations").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content(ANNOTATION_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void createAnnotation_returns201_whenHasWriteAllPermission() throws Exception {
|
||||
@@ -190,6 +199,15 @@ class AnnotationControllerTest {
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void patchAnnotation_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(patch("/api/documents/" + UUID.randomUUID() + "/annotations/" + UUID.randomUUID()).with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content(PATCH_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void patchAnnotation_returns200_withWriteAllPermission() throws Exception {
|
||||
|
||||
@@ -94,6 +94,15 @@ class CommentControllerTest {
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void postBlockComment_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
UUID blockId = UUID.randomUUID();
|
||||
mockMvc.perform(post("/api/documents/" + DOC_ID + "/transcription-blocks/" + blockId + "/comments").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON).content(COMMENT_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "ANNOTATE_ALL")
|
||||
void postBlockComment_returns201_whenHasAnnotatePermission() throws Exception {
|
||||
@@ -142,6 +151,16 @@ class CommentControllerTest {
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void replyToBlockComment_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
UUID blockId = UUID.randomUUID();
|
||||
mockMvc.perform(post("/api/documents/" + DOC_ID + "/transcription-blocks/" + blockId
|
||||
+ "/comments/" + COMMENT_ID + "/replies").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON).content(COMMENT_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "ANNOTATE_ALL")
|
||||
void replyToBlockComment_returns201_whenHasPermission() throws Exception {
|
||||
@@ -181,6 +200,14 @@ class CommentControllerTest {
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void editComment_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(patch("/api/documents/" + DOC_ID + "/comments/" + COMMENT_ID).with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON).content(COMMENT_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "ANNOTATE_ALL")
|
||||
void editComment_returns200_whenHasPermission() throws Exception {
|
||||
|
||||
@@ -159,6 +159,15 @@ class TranscriptionBlockControllerTest {
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void createBlock_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(post(URL_BASE).with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content(CREATE_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void createBlock_returns201_withSavedBlock_whenAuthorised() throws Exception {
|
||||
@@ -233,6 +242,15 @@ class TranscriptionBlockControllerTest {
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void updateBlock_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(put(URL_BLOCK).with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content(UPDATE_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void updateBlock_returns200_withUpdatedBlock_whenAuthorised() throws Exception {
|
||||
@@ -363,6 +381,15 @@ class TranscriptionBlockControllerTest {
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void reorderBlocks_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(put(URL_REORDER).with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content(REORDER_JSON))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void reorderBlocks_returns200_withReorderedBlocks_whenAuthorised() throws Exception {
|
||||
@@ -440,6 +467,14 @@ class TranscriptionBlockControllerTest {
|
||||
.andExpect(jsonPath("$.reviewed").value(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void reviewBlock_returns403_whenUserHasOnlyReadAllPermission() throws Exception {
|
||||
mockMvc.perform(put("/api/documents/{documentId}/transcription-blocks/{blockId}/review",
|
||||
DOC_ID, BLOCK_ID).with(csrf()))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
// ─── PUT .../review-all ───────────────────────────────────────────────────
|
||||
|
||||
private static final String URL_REVIEW_ALL = URL_BASE + "/review-all";
|
||||
|
||||
@@ -12,6 +12,8 @@ import org.raddatz.familienarchiv.document.annotation.DocumentAnnotation;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.transcription.PersonMention;
|
||||
import org.raddatz.familienarchiv.document.transcription.TranscriptionBlock;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonRepository;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
||||
import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
|
||||
@@ -30,6 +32,7 @@ class TranscriptionBlockMentionsRepositoryTest {
|
||||
@Autowired TranscriptionBlockRepository blockRepository;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
@Autowired AnnotationRepository annotationRepository;
|
||||
@Autowired PersonRepository personRepository;
|
||||
@Autowired EntityManager em;
|
||||
|
||||
private UUID documentId;
|
||||
@@ -55,8 +58,9 @@ class TranscriptionBlockMentionsRepositoryTest {
|
||||
|
||||
@Test
|
||||
void mentionedPersons_roundTripsTwoEntries() {
|
||||
UUID auguste = UUID.randomUUID();
|
||||
UUID hermann = UUID.randomUUID();
|
||||
// person_id is a real FK since V71 — the mentioned persons must exist.
|
||||
UUID auguste = personRepository.save(Person.builder().firstName("Auguste").lastName("Raddatz").build()).getId();
|
||||
UUID hermann = personRepository.save(Person.builder().firstName("Hermann").lastName("Müller").build()).getId();
|
||||
|
||||
TranscriptionBlock saved = blockRepository.saveAndFlush(TranscriptionBlock.builder()
|
||||
.annotationId(annotationId)
|
||||
@@ -97,8 +101,9 @@ class TranscriptionBlockMentionsRepositoryTest {
|
||||
|
||||
@Test
|
||||
void findByPersonIdWithMentionsFetched_returnsOnlyBlocksReferencingPerson_withMentionsLoaded() {
|
||||
UUID augusteId = UUID.randomUUID();
|
||||
UUID hermannId = UUID.randomUUID();
|
||||
// person_id is a real FK since V71 — the mentioned persons must exist.
|
||||
UUID augusteId = personRepository.save(Person.builder().firstName("Auguste").lastName("Raddatz").build()).getId();
|
||||
UUID hermannId = personRepository.save(Person.builder().firstName("Hermann").lastName("Müller").build()).getId();
|
||||
|
||||
blockRepository.saveAndFlush(TranscriptionBlock.builder()
|
||||
.annotationId(annotationId).documentId(documentId)
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
package org.raddatz.familienarchiv.document.transcription;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class TranscriptionBlockQueryServiceTest {
|
||||
|
||||
@Mock TranscriptionBlockRepository blockRepository;
|
||||
@InjectMocks TranscriptionBlockQueryService queryService;
|
||||
|
||||
@Test
|
||||
void hasBlocks_returns_true_when_a_block_exists() {
|
||||
UUID documentId = UUID.randomUUID();
|
||||
when(blockRepository.existsByDocumentId(documentId)).thenReturn(true);
|
||||
|
||||
assertThat(queryService.hasBlocks(documentId)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void hasBlocks_returns_false_when_no_block_exists() {
|
||||
UUID documentId = UUID.randomUUID();
|
||||
when(blockRepository.existsByDocumentId(documentId)).thenReturn(false);
|
||||
|
||||
assertThat(queryService.hasBlocks(documentId)).isFalse();
|
||||
}
|
||||
}
|
||||
@@ -102,4 +102,22 @@ class TranscriptionBlockRepositoryIntegrationTest {
|
||||
assertThat(byDoc).containsEntry(DOC_A, 100);
|
||||
assertThat(byDoc).containsEntry(DOC_B, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Sql(statements = {
|
||||
"INSERT INTO documents (id, title, original_filename, status) VALUES ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 'Doc A', 'a.pdf', 'PLACEHOLDER')",
|
||||
"INSERT INTO document_annotations (id, document_id, page_number, x, y, width, height, color) VALUES ('cccccccc-cccc-cccc-cccc-cccccccccccc', 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 1, 0, 0, 1, 1, '#fff')",
|
||||
"INSERT INTO transcription_blocks (annotation_id, document_id, sort_order, reviewed) VALUES ('cccccccc-cccc-cccc-cccc-cccccccccccc', 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 0, false)"
|
||||
})
|
||||
void existsByDocumentId_returns_true_when_document_has_a_block() {
|
||||
assertThat(repository.existsByDocumentId(DOC_A)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Sql(statements = {
|
||||
"INSERT INTO documents (id, title, original_filename, status) VALUES ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 'Doc A', 'a.pdf', 'PLACEHOLDER')"
|
||||
})
|
||||
void existsByDocumentId_returns_false_when_document_has_no_blocks() {
|
||||
assertThat(repository.existsByDocumentId(DOC_A)).isFalse();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,18 @@
|
||||
package org.raddatz.familienarchiv.exception;
|
||||
|
||||
import ch.qos.logback.classic.Level;
|
||||
import ch.qos.logback.classic.Logger;
|
||||
import ch.qos.logback.classic.spi.ILoggingEvent;
|
||||
import ch.qos.logback.core.read.ListAppender;
|
||||
import io.sentry.Sentry;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.MockedStatic;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.dao.DataIntegrityViolationException;
|
||||
import org.springframework.dao.IncorrectResultSizeDataAccessException;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
@@ -30,4 +37,108 @@ class GlobalExceptionHandlerTest {
|
||||
assertThat(response.getBody().code()).isEqualTo(ErrorCode.INTERNAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void handleGeneric_incorrectResultSize_staysOpaque_noHibernateOrRowCountLeak() {
|
||||
// #731: before the fix, a case-colliding alias/name made Hibernate throw
|
||||
// NonUniqueResultException → IncorrectResultSizeDataAccessException, which has no
|
||||
// dedicated handler and falls through to handleGeneric. The fix removes the throw, but
|
||||
// this pins the handler: a stray one must stay opaque — no Hibernate class name, no SQL,
|
||||
// no "2 results were returned" row count reaching the client (CWE-209).
|
||||
IncorrectResultSizeDataAccessException ex = new IncorrectResultSizeDataAccessException(
|
||||
"query did not return a unique result: 2 results were returned", 1, 2);
|
||||
|
||||
try (MockedStatic<Sentry> sentryMock = mockStatic(Sentry.class)) {
|
||||
ResponseEntity<GlobalExceptionHandler.ErrorResponse> response = handler.handleGeneric(ex);
|
||||
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(500);
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
assertThat(response.getBody().code()).isEqualTo(ErrorCode.INTERNAL_ERROR);
|
||||
assertThat(response.getBody().message())
|
||||
.isEqualTo("An unexpected error occurred")
|
||||
.doesNotContain("results were returned")
|
||||
.doesNotContain("NonUnique")
|
||||
.doesNotContain("IncorrectResultSize");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void handleDataIntegrityViolation_returns400_withoutLeakingConstraint_orSentry() {
|
||||
// A DataIntegrityViolationException carries the constraint name + SQL in its message;
|
||||
// the response and logs must never echo it (CWE-209). It must become a clean 400, not a 500.
|
||||
DataIntegrityViolationException ex = new DataIntegrityViolationException(
|
||||
"could not execute statement; constraint [chk_meta_date_end_after_start]; "
|
||||
+ "column meta_date_end of relation documents");
|
||||
|
||||
Logger handlerLogger = (Logger) LoggerFactory.getLogger(GlobalExceptionHandler.class);
|
||||
ListAppender<ILoggingEvent> appender = new ListAppender<>();
|
||||
appender.start();
|
||||
handlerLogger.addAppender(appender);
|
||||
|
||||
try (MockedStatic<Sentry> sentryMock = mockStatic(Sentry.class)) {
|
||||
ResponseEntity<GlobalExceptionHandler.ErrorResponse> response =
|
||||
handler.handleDataIntegrityViolation(ex);
|
||||
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(400);
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
assertThat(response.getBody().code()).isEqualTo(ErrorCode.VALIDATION_ERROR);
|
||||
assertThat(response.getBody().message())
|
||||
.doesNotContain("chk_")
|
||||
.doesNotContain("meta_date");
|
||||
|
||||
// Defense-in-depth: an unanticipated integrity violation is not a system fault,
|
||||
// so it must NOT fabricate a Sentry alert.
|
||||
sentryMock.verifyNoInteractions();
|
||||
} finally {
|
||||
handlerLogger.detachAppender(appender);
|
||||
}
|
||||
|
||||
assertThat(appender.list)
|
||||
.as("logs a WARN line")
|
||||
.anySatisfy(e -> assertThat(e.getLevel()).isEqualTo(Level.WARN));
|
||||
assertThat(appender.list)
|
||||
.as("never logs the SQL statement / values (would re-leak to Loki)")
|
||||
.noneSatisfy(e -> {
|
||||
assertThat(e.getFormattedMessage()).contains("could not execute statement");
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
void handleDataIntegrityViolation_logsConstraintName_butNotTheSql() {
|
||||
// Debuggability (DevOps): the WARN must name *which* constraint fired so an
|
||||
// unanticipated violation isn't a silent mystery — but it must carry the name only,
|
||||
// never the SQL statement or the offending values that the SQLException message holds.
|
||||
java.sql.SQLException sql = new java.sql.SQLException(
|
||||
"ERROR: violates check constraint; could not execute statement; values (1917-01-10)");
|
||||
org.hibernate.exception.ConstraintViolationException cve =
|
||||
new org.hibernate.exception.ConstraintViolationException(
|
||||
"constraint violation", sql, "chk_meta_date_end_after_start");
|
||||
DataIntegrityViolationException ex = new DataIntegrityViolationException("wrapper", cve);
|
||||
|
||||
Logger handlerLogger = (Logger) LoggerFactory.getLogger(GlobalExceptionHandler.class);
|
||||
ListAppender<ILoggingEvent> appender = new ListAppender<>();
|
||||
appender.start();
|
||||
handlerLogger.addAppender(appender);
|
||||
|
||||
try (MockedStatic<Sentry> sentryMock = mockStatic(Sentry.class)) {
|
||||
ResponseEntity<GlobalExceptionHandler.ErrorResponse> response =
|
||||
handler.handleDataIntegrityViolation(ex);
|
||||
|
||||
// Response stays generic and leak-free (CWE-209) regardless of what we log.
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(400);
|
||||
assertThat(response.getBody().message())
|
||||
.doesNotContain("chk_")
|
||||
.doesNotContain("meta_date");
|
||||
sentryMock.verifyNoInteractions();
|
||||
} finally {
|
||||
handlerLogger.detachAppender(appender);
|
||||
}
|
||||
|
||||
assertThat(appender.list)
|
||||
.as("WARN names the constraint for debuggability")
|
||||
.anySatisfy(e -> assertThat(e.getFormattedMessage()).contains("chk_meta_date_end_after_start"));
|
||||
assertThat(appender.list)
|
||||
.as("but never the SQL statement or values")
|
||||
.noneSatisfy(e -> assertThat(e.getFormattedMessage()).contains("could not execute statement"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentTitleFactory;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
@@ -59,8 +60,10 @@ class DocumentImporterTest {
|
||||
// override this stub locally (load_skipsFile_whenMagicByteCheckThrowsIoException).
|
||||
lenient().when(fileStreamOpener.open(any(File.class)))
|
||||
.thenAnswer(inv -> new java.io.FileInputStream(inv.getArgument(0, File.class)));
|
||||
importer = new DocumentImporter(documentService, personService, tagService, s3Client,
|
||||
thumbnailAsyncRunner, fileStreamOpener);
|
||||
// Real factory (pure, dependency-free) so the title-content assertions below exercise
|
||||
// the shared composition rather than a stub — the #726 single source of truth.
|
||||
importer = new DocumentImporter(documentService, new DocumentTitleFactory(), personService,
|
||||
tagService, s3Client, thumbnailAsyncRunner, fileStreamOpener);
|
||||
ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ import jakarta.persistence.PersistenceContext;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@@ -120,37 +121,60 @@ class PersonRepositoryTest {
|
||||
.containsExactly("Anna", "Clara");
|
||||
}
|
||||
|
||||
// ─── findByAliasIgnoreCase ────────────────────────────────────────────────
|
||||
// ─── findByAlias (exact) / findAllByAliasIgnoreCase (case-folding siblings) ───
|
||||
|
||||
@Test
|
||||
void findByAliasIgnoreCase_returnsMatchingPerson() {
|
||||
void findByAlias_returnsExactCaseMatchOnly() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Karl").lastName("Brandt").alias("Opa Karl").build());
|
||||
|
||||
Optional<Person> found = personRepository.findByAliasIgnoreCase("opa karl");
|
||||
|
||||
assertThat(found).isPresent();
|
||||
assertThat(found.get().getFirstName()).isEqualTo("Karl");
|
||||
assertThat(personRepository.findByAlias("Opa Karl")).isPresent();
|
||||
assertThat(personRepository.findByAlias("opa karl")).isEmpty(); // exact-case: a folded form does NOT match
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByAliasIgnoreCase_returnsEmpty_whenAliasDoesNotMatch() {
|
||||
Optional<Person> found = personRepository.findByAliasIgnoreCase("nobody");
|
||||
|
||||
assertThat(found).isEmpty();
|
||||
void findAllByAliasIgnoreCase_returnsEmpty_whenAliasDoesNotMatch() {
|
||||
assertThat(personRepository.findAllByAliasIgnoreCase("nobody")).isEmpty();
|
||||
}
|
||||
|
||||
// ─── findByFirstNameIgnoreCaseAndLastNameIgnoreCase ───────────────────────
|
||||
@Test
|
||||
void findAllByAliasIgnoreCase_foldsUmlautCase_inRealPostgres() {
|
||||
// Proves Postgres LOWER() folds ü the same way for both rows — a plain-ASCII probe would
|
||||
// stay green even if umlaut folding regressed. Both case-colliding aliases must match.
|
||||
personRepository.save(Person.builder().lastName("Müller").alias("Müller").build());
|
||||
personRepository.save(Person.builder().lastName("müller").alias("müller").build());
|
||||
|
||||
assertThat(personRepository.findAllByAliasIgnoreCase("MÜLLER")).hasSize(2);
|
||||
}
|
||||
|
||||
// ─── findByFirstNameAndLastName (exact) / findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase ───
|
||||
|
||||
@Test
|
||||
void findByFirstNameIgnoreCaseAndLastNameIgnoreCase_returnsMatch() {
|
||||
void findByFirstNameAndLastName_returnsExactCaseMatchOnly() {
|
||||
personRepository.save(Person.builder().firstName("Maria").lastName("Raddatz").build());
|
||||
|
||||
Optional<Person> found = personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(
|
||||
"maria", "raddatz");
|
||||
assertThat(personRepository.findByFirstNameAndLastName("Maria", "Raddatz")).isPresent();
|
||||
assertThat(personRepository.findByFirstNameAndLastName("maria", "raddatz")).isEmpty(); // exact-case only
|
||||
}
|
||||
|
||||
assertThat(found).isPresent();
|
||||
assertThat(found.get().getFirstName()).isEqualTo("Maria");
|
||||
@Test
|
||||
void findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase_foldsUmlautCase_inRealPostgres() {
|
||||
personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
|
||||
personRepository.save(Person.builder().firstName("hans").lastName("müller").build());
|
||||
|
||||
assertThat(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase("HANS", "MÜLLER"))
|
||||
.hasSize(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase_nullFirstName_foldsToNoMatch() {
|
||||
// Fail-closed: a last-name-only filename (null first name) must NOT widen to first_name IS
|
||||
// NULL and pull in the institution/last-name-only row as a "sender". Proven on real
|
||||
// Postgres because a mocked unit test cannot catch the IS NULL vs `= NULL` semantics.
|
||||
personRepository.save(Person.builder().lastName("Müller").build()); // first_name NULL
|
||||
|
||||
assertThat(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(null, "Müller"))
|
||||
.isEmpty();
|
||||
}
|
||||
|
||||
// ─── findCorrespondents ───────────────────────────────────────────────────
|
||||
@@ -366,30 +390,6 @@ class PersonRepositoryTest {
|
||||
assertThat(result).hasSize(1);
|
||||
}
|
||||
|
||||
// ─── deleteReceiverReferences ─────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void deleteReceiverReferences_removesPersonFromAllDocumentReceivers() {
|
||||
Person toDelete = personRepository.save(Person.builder().firstName("Weg").lastName("Person").build());
|
||||
Person sender = personRepository.save(Person.builder().firstName("Send").lastName("Er").build());
|
||||
|
||||
Document doc1 = documentRepository.save(Document.builder()
|
||||
.title("Brief 1").originalFilename("b1.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender).receivers(Set.of(toDelete)).build());
|
||||
Document doc2 = documentRepository.save(Document.builder()
|
||||
.title("Brief 2").originalFilename("b2.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender).receivers(Set.of(toDelete)).build());
|
||||
|
||||
personRepository.deleteReceiverReferences(toDelete.getId());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
assertThat(documentRepository.findById(doc1.getId()).orElseThrow().getReceivers()).isEmpty();
|
||||
assertThat(documentRepository.findById(doc2.getId()).orElseThrow().getReceivers()).isEmpty();
|
||||
}
|
||||
|
||||
// ─── searchByName with aliases ───────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -707,4 +707,146 @@ class PersonRepositoryTest {
|
||||
assertThat(found).isPresent();
|
||||
assertThat(found.get().getGeneration()).isNull();
|
||||
}
|
||||
|
||||
// ─── #684: ON DELETE integrity enforced at the database layer ──────────────
|
||||
// A raw deleteById (bypassing PersonService) must keep referential integrity:
|
||||
// documents.sender_id → SET NULL, document_receivers.person_id → CASCADE, and the
|
||||
// transcription_block_mentioned_persons soft reference → CASCADE. These run against
|
||||
// real Postgres because the FK ON DELETE behaviour never fires on H2.
|
||||
|
||||
@Test
|
||||
void deleteById_personSenderOfAReceiverOfB_nullsSender_dropsReceiverRow_bothDocumentsSurvive() {
|
||||
Person target = personRepository.save(Person.builder().firstName("Weg").lastName("Person").build());
|
||||
Person bystander = personRepository.save(Person.builder().firstName("Bleibt").lastName("Hier").build());
|
||||
|
||||
Document sent = documentRepository.save(Document.builder()
|
||||
.title("Gesendet").originalFilename("sent.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(target).build());
|
||||
Document received = documentRepository.save(Document.builder()
|
||||
.title("Empfangen").originalFilename("received.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(bystander)
|
||||
.receivers(Set.of(target)).build());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personRepository.deleteById(target.getId());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
assertThat(personRepository.findById(target.getId())).isEmpty();
|
||||
|
||||
Document reloadedSent = documentRepository.findById(sent.getId()).orElseThrow();
|
||||
assertThat(reloadedSent.getSender()).isNull(); // AC-1: SET NULL
|
||||
|
||||
Document reloadedReceived = documentRepository.findById(received.getId()).orElseThrow();
|
||||
assertThat(reloadedReceived.getReceivers())
|
||||
.noneMatch(p -> p.getId().equals(target.getId())); // AC-2: CASCADE drops the join row
|
||||
|
||||
// Cascade-boundary guard (Nora, non-negotiable): the cascade stops at the join/reference
|
||||
// layer — both documents themselves survive. Guards against a future migration turning
|
||||
// documents.sender_id SET NULL into CASCADE and destroying historical letters.
|
||||
assertThat(documentRepository.findById(sent.getId())).isPresent();
|
||||
assertThat(documentRepository.findById(received.getId())).isPresent();
|
||||
}
|
||||
|
||||
@Test
|
||||
void deleteById_receiverWithCoReceiver_dropsOnlyDeletedPersonsJoinRow() {
|
||||
Person target = personRepository.save(Person.builder().firstName("Weg").lastName("Person").build());
|
||||
Person coReceiver = personRepository.save(Person.builder().firstName("Mit").lastName("Empfänger").build());
|
||||
Person sender = personRepository.save(Person.builder().firstName("Send").lastName("Er").build());
|
||||
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("Brief").originalFilename("brief.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(sender)
|
||||
.receivers(Set.of(target, coReceiver)).build());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personRepository.deleteById(target.getId());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
Document reloaded = documentRepository.findById(doc.getId()).orElseThrow();
|
||||
assertThat(reloaded.getReceivers()).extracting(Person::getId)
|
||||
.containsExactly(coReceiver.getId()); // co-receiver untouched
|
||||
}
|
||||
|
||||
@Test
|
||||
void deleteById_personIsSenderAndReceiverOfSameDocument_documentSurvives_senderNull_receiverDropped() {
|
||||
// AC-8: the trickier same-document interaction the cross-document cases don't exercise.
|
||||
Person target = personRepository.save(Person.builder().firstName("Beides").lastName("Person").build());
|
||||
Person coReceiver = personRepository.save(Person.builder().firstName("Mit").lastName("Empfänger").build());
|
||||
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("Selbstbrief").originalFilename("self.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(target)
|
||||
.receivers(Set.of(target, coReceiver)).build());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personRepository.deleteById(target.getId());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
Document reloaded = documentRepository.findById(doc.getId()).orElseThrow();
|
||||
assertThat(reloaded.getSender()).isNull();
|
||||
assertThat(reloaded.getReceivers()).extracting(Person::getId)
|
||||
.containsExactly(coReceiver.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void deleteById_mentionedPerson_dropsMentionRow_blockTextSurvives() {
|
||||
// AC-3: the @-mention sidecar is a CASCADE soft reference, but the literal "@Name" lives
|
||||
// in transcription_blocks.text and must stay visible as plain text after the person goes.
|
||||
Person mentioned = personRepository.save(Person.builder().firstName("Auguste").lastName("Raddatz").build());
|
||||
Person survivor = personRepository.save(Person.builder().firstName("Clara").lastName("Cram").build());
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("Brief").originalFilename("brief.pdf")
|
||||
.status(DocumentStatus.UPLOADED).build());
|
||||
entityManager.flush();
|
||||
|
||||
UUID annotationId = UUID.randomUUID();
|
||||
UUID blockId = UUID.randomUUID();
|
||||
entityManager.createNativeQuery(
|
||||
"INSERT INTO document_annotations (id, document_id, page_number, x, y, width, height, color) "
|
||||
+ "VALUES (?1, ?2, 1, 0.1, 0.2, 0.3, 0.1, '#fff')")
|
||||
.setParameter(1, annotationId).setParameter(2, doc.getId()).executeUpdate();
|
||||
entityManager.createNativeQuery(
|
||||
"INSERT INTO transcription_blocks (id, annotation_id, document_id, text) VALUES (?1, ?2, ?3, ?4)")
|
||||
.setParameter(1, blockId).setParameter(2, annotationId).setParameter(3, doc.getId())
|
||||
.setParameter(4, "Brief an @Auguste Raddatz und @Clara Cram").executeUpdate();
|
||||
// Two mention rows on the same block: the deleted person and an innocent bystander.
|
||||
entityManager.createNativeQuery(
|
||||
"INSERT INTO transcription_block_mentioned_persons (block_id, person_id, display_name) "
|
||||
+ "VALUES (?1, ?2, ?3)")
|
||||
.setParameter(1, blockId).setParameter(2, mentioned.getId())
|
||||
.setParameter(3, "Auguste Raddatz").executeUpdate();
|
||||
entityManager.createNativeQuery(
|
||||
"INSERT INTO transcription_block_mentioned_persons (block_id, person_id, display_name) "
|
||||
+ "VALUES (?1, ?2, ?3)")
|
||||
.setParameter(1, blockId).setParameter(2, survivor.getId())
|
||||
.setParameter(3, "Clara Cram").executeUpdate();
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personRepository.deleteById(mentioned.getId());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
Number mentionRows = (Number) entityManager.createNativeQuery(
|
||||
"SELECT count(*) FROM transcription_block_mentioned_persons WHERE person_id = ?1")
|
||||
.setParameter(1, mentioned.getId()).getSingleResult();
|
||||
assertThat(mentionRows.longValue()).isZero();
|
||||
|
||||
// The cascade is scoped to the deleted person — the bystander's mention row is untouched.
|
||||
Number survivorRows = (Number) entityManager.createNativeQuery(
|
||||
"SELECT count(*) FROM transcription_block_mentioned_persons WHERE person_id = ?1")
|
||||
.setParameter(1, survivor.getId()).getSingleResult();
|
||||
assertThat(survivorRows.longValue()).isEqualTo(1);
|
||||
|
||||
String text = (String) entityManager.createNativeQuery(
|
||||
"SELECT text FROM transcription_blocks WHERE id = ?1")
|
||||
.setParameter(1, blockId).getSingleResult();
|
||||
assertThat(text).isEqualTo("Brief an @Auguste Raddatz und @Clara Cram");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentRepository;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
@@ -16,10 +17,13 @@ import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.persistence.PersistenceContext;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@@ -33,6 +37,7 @@ class PersonServiceIntegrationTest {
|
||||
@Autowired PersonService personService;
|
||||
@Autowired PersonRepository personRepository;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
@Autowired DocumentService documentService;
|
||||
|
||||
@PersistenceContext EntityManager entityManager;
|
||||
|
||||
@@ -75,6 +80,93 @@ class PersonServiceIntegrationTest {
|
||||
assertThat(result.getLastName()).isEqualTo("Cram");
|
||||
}
|
||||
|
||||
// ─── #731: case-colliding alias resolution against real Postgres ───────────
|
||||
// The umlaut pair is mandatory — only the real DB proves Postgres LOWER() folds ü; a
|
||||
// plain-ASCII test would stay green while umlaut aliases regressed.
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_resolvesUmlautAliasCollision_toLowestId_withoutThrow() {
|
||||
Person muller = personRepository.save(Person.builder().lastName("Müller").alias("Müller").build());
|
||||
Person mullerLower = personRepository.save(Person.builder().lastName("müller").alias("müller").build());
|
||||
UUID expected = muller.getId().compareTo(mullerLower.getId()) <= 0 ? muller.getId() : mullerLower.getId();
|
||||
|
||||
// No exact-case "MÜLLER" row → falls through to the case-insensitive branch with two
|
||||
// candidates and must pick the lowest id, never throwing NonUniqueResultException.
|
||||
Person resolved = personService.findOrCreateByAlias("MÜLLER");
|
||||
|
||||
assertThat(resolved.getId()).isEqualTo(expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_umlautAliasCollision_isDeterministicAcrossCalls() {
|
||||
personRepository.save(Person.builder().lastName("Müller").alias("Müller").build());
|
||||
personRepository.save(Person.builder().lastName("müller").alias("müller").build());
|
||||
|
||||
Person first = personService.findOrCreateByAlias("MÜLLER");
|
||||
Person second = personService.findOrCreateByAlias("MÜLLER");
|
||||
|
||||
assertThat(second.getId()).isEqualTo(first.getId());
|
||||
}
|
||||
|
||||
// ─── #731: filename-based sender resolution against real Postgres ──────────
|
||||
|
||||
@Test
|
||||
void storeDocument_resolvesSender_whenFilenameNameIsUnique() throws Exception {
|
||||
Person hans = personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
|
||||
|
||||
Document doc = uploadNamed("1965-03-12_Müller_Hans.pdf").document();
|
||||
|
||||
assertThat(doc.getSender()).isNotNull();
|
||||
assertThat(doc.getSender().getId()).isEqualTo(hans.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void storeDocument_resolvesSender_onSingleCaseInsensitiveMatch() throws Exception {
|
||||
Person hans = personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
|
||||
|
||||
// Filename folds to "hans müller"; the only stored person is "Hans Müller".
|
||||
Document doc = uploadNamed("1965-03-12_müller_hans.pdf").document();
|
||||
|
||||
assertThat(doc.getSender()).isNotNull();
|
||||
assertThat(doc.getSender().getId()).isEqualTo(hans.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void storeDocument_leavesSenderUnset_whenFilenameNameIsAmbiguous() throws Exception {
|
||||
// Two persons collide case-insensitively; the filename casing ("HANS"/"MÜLLER") matches
|
||||
// neither exactly → no exact-case winner → bail to null (never an arbitrary guess), no 500.
|
||||
personRepository.save(Person.builder().firstName("Hans").lastName("Müller").build());
|
||||
personRepository.save(Person.builder().firstName("hans").lastName("müller").build());
|
||||
|
||||
Document doc = uploadNamed("1965-03-12_MÜLLER_HANS.pdf").document();
|
||||
|
||||
assertThat(doc.getSender()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void storeDocument_leavesSenderUnset_whenFilenameHasNoFirstName() throws Exception {
|
||||
// A last-name-only filename never resolves to a sender (the parser yields no parsed name).
|
||||
personRepository.save(Person.builder().lastName("Müller").build());
|
||||
|
||||
Document doc = uploadNamed("1965-03-12_Müller.pdf").document();
|
||||
|
||||
assertThat(doc.getSender()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByName_nullFirstName_resolvesToEmpty_inRealPostgres() {
|
||||
// Fail-closed against the real DB: a null first name must NOT widen to first_name IS NULL
|
||||
// and pick up the last-name-only row.
|
||||
personRepository.save(Person.builder().lastName("Müller").build()); // first_name NULL
|
||||
|
||||
assertThat(personService.findByName(null, "Müller")).isEmpty();
|
||||
}
|
||||
|
||||
private DocumentService.StoreResult uploadNamed(String filename) throws Exception {
|
||||
MockMultipartFile file = new MockMultipartFile("file", filename, "application/pdf", new byte[]{1, 2, 3});
|
||||
return documentService.storeDocument(file, null);
|
||||
}
|
||||
|
||||
// ─── #667: confirm round-trip + reader-default semantics ──────────────────
|
||||
|
||||
@Test
|
||||
@@ -180,9 +272,9 @@ class PersonServiceIntegrationTest {
|
||||
@Test
|
||||
void deletePerson_detachesSentAndReceivedReferences_beforeDelete_noOrphan() {
|
||||
// A person referenced as BOTH a document sender and a document receiver must delete
|
||||
// cleanly: deletePerson nulls the sender_id FK and removes the receiver join row first
|
||||
// (reassignSenderToNull → deleteReceiverReferences → deleteById), so no FK orphan and
|
||||
// the documents themselves survive.
|
||||
// cleanly via the service path: deletePerson just calls deleteById, and V71's ON DELETE
|
||||
// constraints null the sender_id FK and drop the receiver join row, so there is no FK
|
||||
// orphan and the documents themselves survive.
|
||||
Person target = personRepository.save(Person.builder()
|
||||
.firstName("Weg").lastName("Person").provisional(true).build());
|
||||
Person bystander = personRepository.save(Person.builder()
|
||||
@@ -196,16 +288,16 @@ class PersonServiceIntegrationTest {
|
||||
.status(DocumentStatus.UPLOADED).sender(bystander)
|
||||
.receivers(new java.util.HashSet<>(Set.of(target))).build());
|
||||
|
||||
// Persist the fixture and detach everything so the native @Modifying deletes operate on
|
||||
// the database directly without the persistence context holding stale references that
|
||||
// would re-flush a now-deleted person as a transient association.
|
||||
// Persist the fixture and detach everything so the delete operates on the database
|
||||
// directly without the persistence context holding stale references.
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personService.deletePerson(target.getId());
|
||||
|
||||
// Native @Modifying queries bypass the persistence context — clear it so the asserting
|
||||
// reads observe the post-delete database state, not stale managed entities.
|
||||
// The ON DELETE cascade fires beneath Hibernate — flush the delete and clear the L1
|
||||
// cache so the asserting reads observe the post-delete database state, not stale
|
||||
// managed entities still holding the dropped sender/receiver associations.
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
@@ -220,4 +312,38 @@ class PersonServiceIntegrationTest {
|
||||
// The other person and the documents themselves survive the delete.
|
||||
assertThat(personRepository.findById(bystander.getId())).isPresent();
|
||||
}
|
||||
|
||||
@Test
|
||||
void mergePersons_targetInheritsReferences_sourceJoinRowCascadeDrops_noFkError() {
|
||||
// AC-7: merging a source who is sender of A and receiver of B into a target leaves the
|
||||
// target as sender of A and receiver of B, drops the source's leftover receiver row via
|
||||
// V71's ON DELETE CASCADE (no explicit delete, no FK error), and co-receivers are intact.
|
||||
Person source = personRepository.save(Person.builder().firstName("Anna").lastName("Alt").build());
|
||||
Person target = personRepository.save(Person.builder().firstName("Anna").lastName("Neu").build());
|
||||
Person coReceiver = personRepository.save(Person.builder().firstName("Mit").lastName("Empfänger").build());
|
||||
Person sender = personRepository.save(Person.builder().firstName("Send").lastName("Er").build());
|
||||
|
||||
Document docA = documentRepository.save(Document.builder()
|
||||
.title("Von Anna").originalFilename("a.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(source).build());
|
||||
Document docB = documentRepository.save(Document.builder()
|
||||
.title("An Anna").originalFilename("b.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(sender)
|
||||
.receivers(new java.util.HashSet<>(Set.of(source, coReceiver))).build());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personService.mergePersons(source.getId(), target.getId());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
assertThat(personRepository.findById(source.getId())).isEmpty();
|
||||
|
||||
Document reloadedA = documentRepository.findById(docA.getId()).orElseThrow();
|
||||
assertThat(reloadedA.getSender().getId()).isEqualTo(target.getId());
|
||||
|
||||
Document reloadedB = documentRepository.findById(docB.getId()).orElseThrow();
|
||||
assertThat(reloadedB.getReceivers()).extracting(Person::getId)
|
||||
.containsExactlyInAnyOrder(target.getId(), coReceiver.getId());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.argThat;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.verifyNoMoreInteractions;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
@@ -147,9 +148,11 @@ class PersonServiceTest {
|
||||
|
||||
personService.deletePerson(id);
|
||||
|
||||
verify(personRepository).reassignSenderToNull(id);
|
||||
verify(personRepository).deleteReceiverReferences(id);
|
||||
// Integrity is enforced by V71's ON DELETE constraints — the service only checks
|
||||
// existence then deletes; it no longer detaches sender/receiver references itself.
|
||||
verify(personRepository).findById(id);
|
||||
verify(personRepository).deleteById(id);
|
||||
verifyNoMoreInteractions(personRepository);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -372,14 +375,57 @@ class PersonServiceTest {
|
||||
// ─── findOrCreateByAlias ─────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_returnsExisting_whenAliasFound() {
|
||||
String alias = "Walter de Gruyter";
|
||||
Person existing = Person.builder().id(UUID.randomUUID()).alias(alias).build();
|
||||
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.of(existing));
|
||||
void findOrCreateByAlias_returnsExactCaseMatch_overCaseInsensitiveSibling() {
|
||||
String alias = "müller";
|
||||
Person exact = Person.builder().id(UUID.randomUUID()).alias("müller").build();
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.of(exact));
|
||||
|
||||
Person result = personService.findOrCreateByAlias(alias);
|
||||
|
||||
assertThat(result).isEqualTo(existing);
|
||||
assertThat(result).isEqualTo(exact);
|
||||
verify(personRepository, never()).findAllByAliasIgnoreCase(any());
|
||||
verify(personRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_returnsExactCaseMatch_evenWhenMultipleSiblingsCollide() {
|
||||
String alias = "Müller";
|
||||
Person exact = Person.builder().id(UUID.randomUUID()).alias("Müller").build();
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.of(exact));
|
||||
|
||||
Person result = personService.findOrCreateByAlias(alias);
|
||||
|
||||
assertThat(result).isEqualTo(exact);
|
||||
// exact-case short-circuits — the case-insensitive siblings are never consulted.
|
||||
verify(personRepository, never()).findAllByAliasIgnoreCase(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
|
||||
String alias = "müller";
|
||||
Person only = Person.builder().id(UUID.randomUUID()).alias("Müller").build();
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of(only));
|
||||
|
||||
Person result = personService.findOrCreateByAlias(alias);
|
||||
|
||||
assertThat(result).isEqualTo(only);
|
||||
verify(personRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() {
|
||||
String alias = "müller";
|
||||
Person lower = Person.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).alias("Müller").build();
|
||||
Person higher = Person.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).alias("müller").build();
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of(higher, lower)); // unordered
|
||||
|
||||
Person first = personService.findOrCreateByAlias(alias);
|
||||
Person second = personService.findOrCreateByAlias(alias);
|
||||
|
||||
assertThat(first.getId()).isEqualTo(lower.getId()); // lowest id wins
|
||||
assertThat(second.getId()).isEqualTo(first.getId()); // same result every call — never throws
|
||||
verify(personRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@@ -387,7 +433,8 @@ class PersonServiceTest {
|
||||
void findOrCreateByAlias_createsNew_whenAliasNotFound() {
|
||||
String alias = "Clara Cram";
|
||||
Person saved = Person.builder().id(UUID.randomUUID()).alias(alias).firstName("Clara").lastName("Cram").build();
|
||||
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
|
||||
when(personRepository.save(any())).thenReturn(saved);
|
||||
|
||||
Person result = personService.findOrCreateByAlias(alias);
|
||||
@@ -400,7 +447,8 @@ class PersonServiceTest {
|
||||
void findOrCreateByAlias_createsMaidenNameAlias_whenGebPresent() {
|
||||
String alias = "Clara Cram geb. de Gruyter";
|
||||
Person saved = Person.builder().id(UUID.randomUUID()).alias(alias).firstName("Clara").lastName("Cram").build();
|
||||
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
|
||||
when(personRepository.save(any())).thenReturn(saved);
|
||||
when(aliasRepository.findMaxSortOrder(saved.getId())).thenReturn(0);
|
||||
when(aliasRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
@@ -422,7 +470,8 @@ class PersonServiceTest {
|
||||
@Test
|
||||
void findOrCreateByAlias_setsInstitutionType_withFullNameInLastName() {
|
||||
String alias = "Arthur Collignon GmbH";
|
||||
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
|
||||
when(personRepository.save(any())).thenAnswer(inv -> {
|
||||
Person p = inv.getArgument(0);
|
||||
p.setId(UUID.randomUUID());
|
||||
@@ -439,7 +488,8 @@ class PersonServiceTest {
|
||||
@Test
|
||||
void findOrCreateByAlias_setsGroupType_withFullNameInLastName() {
|
||||
String alias = "Geschwister de Gruyter";
|
||||
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
|
||||
when(personRepository.save(any())).thenAnswer(inv -> {
|
||||
Person p = inv.getArgument(0);
|
||||
p.setId(UUID.randomUUID());
|
||||
@@ -457,7 +507,8 @@ class PersonServiceTest {
|
||||
void findOrCreateByAlias_noAlias_whenNoGeb() {
|
||||
String alias = "Clara Cram";
|
||||
Person saved = Person.builder().id(UUID.randomUUID()).alias(alias).firstName("Clara").lastName("Cram").build();
|
||||
when(personRepository.findByAliasIgnoreCase(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findByAlias(alias)).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByAliasIgnoreCase(alias)).thenReturn(List.of());
|
||||
when(personRepository.save(any())).thenReturn(saved);
|
||||
|
||||
personService.findOrCreateByAlias(alias);
|
||||
@@ -469,11 +520,54 @@ class PersonServiceTest {
|
||||
void findOrCreateByAlias_trimsInput() {
|
||||
String alias = " Clara Cram ";
|
||||
Person saved = Person.builder().id(UUID.randomUUID()).alias("Clara Cram").build();
|
||||
when(personRepository.findByAliasIgnoreCase("Clara Cram")).thenReturn(Optional.of(saved));
|
||||
when(personRepository.findByAlias("Clara Cram")).thenReturn(Optional.of(saved));
|
||||
|
||||
personService.findOrCreateByAlias(alias);
|
||||
|
||||
verify(personRepository).findByAliasIgnoreCase("Clara Cram");
|
||||
verify(personRepository).findByAlias("Clara Cram");
|
||||
}
|
||||
|
||||
// ─── findByName (filename-based sender resolution) ────────────────────────
|
||||
|
||||
@Test
|
||||
void findByName_returnsExactCaseMatch_overCaseInsensitiveSibling() {
|
||||
Person exact = Person.builder().id(UUID.randomUUID()).firstName("Hans").lastName("Müller").build();
|
||||
when(personRepository.findByFirstNameAndLastName("Hans", "Müller")).thenReturn(Optional.of(exact));
|
||||
|
||||
assertThat(personService.findByName("Hans", "Müller")).contains(exact);
|
||||
verify(personRepository, never()).findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByName_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
|
||||
Person only = Person.builder().id(UUID.randomUUID()).firstName("Hans").lastName("Müller").build();
|
||||
when(personRepository.findByFirstNameAndLastName("hans", "müller")).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase("hans", "müller"))
|
||||
.thenReturn(List.of(only));
|
||||
|
||||
assertThat(personService.findByName("hans", "müller")).contains(only);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByName_bailsToEmpty_whenTwoOrMoreCaseInsensitiveMatches() {
|
||||
Person a = Person.builder().id(UUID.randomUUID()).firstName("Hans").lastName("Müller").build();
|
||||
Person b = Person.builder().id(UUID.randomUUID()).firstName("hans").lastName("müller").build();
|
||||
when(personRepository.findByFirstNameAndLastName("hans", "müller")).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase("hans", "müller"))
|
||||
.thenReturn(List.of(a, b));
|
||||
|
||||
// Ambiguous sender → unset, never an arbitrary guess (provenance correctness over a
|
||||
// confidently-wrong pre-fill). This is the deliberate divergence from the alias path.
|
||||
assertThat(personService.findByName("hans", "müller")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByName_returnsEmpty_whenFirstNameNullFoldsToNoMatch() {
|
||||
when(personRepository.findByFirstNameAndLastName(null, "Müller")).thenReturn(Optional.empty());
|
||||
when(personRepository.findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase(null, "Müller"))
|
||||
.thenReturn(List.of());
|
||||
|
||||
assertThat(personService.findByName(null, "Müller")).isEmpty();
|
||||
}
|
||||
|
||||
// ─── updatePerson (notes) ────────────────────────────────────────────────
|
||||
@@ -700,10 +794,14 @@ class PersonServiceTest {
|
||||
|
||||
personService.mergePersons(sourceId, targetId);
|
||||
|
||||
verify(personRepository).findById(sourceId);
|
||||
verify(personRepository).findById(targetId);
|
||||
verify(personRepository).reassignSender(sourceId, targetId);
|
||||
verify(personRepository).insertMissingReceiverReference(sourceId, targetId);
|
||||
verify(personRepository).deleteReceiverReferences(sourceId);
|
||||
verify(personRepository).deleteById(sourceId);
|
||||
// The source's leftover receiver rows cascade-drop via V71's ON DELETE CASCADE on
|
||||
// deleteById — merge no longer deletes them explicitly.
|
||||
verifyNoMoreInteractions(personRepository);
|
||||
}
|
||||
|
||||
// ─── getAliases ─────────────────────────────────────────────────────────
|
||||
@@ -800,4 +898,15 @@ class PersonServiceTest {
|
||||
.extracting(e -> ((DomainException) e).getStatus().value())
|
||||
.isEqualTo(403);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByDisplayNameContaining_delegatesToSearchByName() {
|
||||
Person walter = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
|
||||
when(personRepository.searchByName("Walter")).thenReturn(List.of(walter));
|
||||
|
||||
List<Person> result = personService.findByDisplayNameContaining("Walter");
|
||||
|
||||
assertThat(result).containsExactly(walter);
|
||||
verify(personRepository).searchByName("Walter");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,440 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentSort;
|
||||
import org.raddatz.familienarchiv.document.SearchFilters;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.tag.TagOperator;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.*;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
class NlQueryParserServiceTest {
|
||||
|
||||
@Mock OllamaClient ollamaClient;
|
||||
@Mock PersonService personService;
|
||||
@Mock DocumentService documentService;
|
||||
|
||||
NlQueryParserService service;
|
||||
|
||||
static final Pageable PAGE = PageRequest.of(0, 20);
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
MockitoAnnotations.openMocks(this);
|
||||
service = new NlQueryParserService(ollamaClient, personService, documentService);
|
||||
when(documentService.searchDocuments(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
when(documentService.searchDocumentsByPersonId(any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of()));
|
||||
}
|
||||
|
||||
// --- Factory helpers ---
|
||||
|
||||
private OllamaExtraction extraction(List<String> names, String role, LocalDate from, LocalDate to,
|
||||
List<String> keywords) {
|
||||
String raw = names.isEmpty() ? "test query" : String.join(" ", names);
|
||||
return new OllamaExtraction(names, role, from, to, keywords, raw);
|
||||
}
|
||||
|
||||
private Person person(UUID id, String firstName, String lastName) {
|
||||
return Person.builder().id(id).firstName(firstName).lastName(lastName).build();
|
||||
}
|
||||
|
||||
private static final UUID P1 = UUID.fromString("00000000-0000-0000-0000-000000000001");
|
||||
private static final UUID P2 = UUID.fromString("00000000-0000-0000-0000-000000000002");
|
||||
private static final UUID P3 = UUID.fromString("00000000-0000-0000-0000-000000000003");
|
||||
|
||||
// --- 1. Single resolved name + personRole=sender ---
|
||||
|
||||
@Test
|
||||
void search_resolvesSingleName_asSender() {
|
||||
Person walter = person(P1, "Walter", "Raddatz");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
|
||||
|
||||
NlSearchResponse resp = service.search("Was hat Walter geschrieben?", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), eq(DocumentSort.DATE), eq("desc"), eq(PAGE));
|
||||
assertThat(cap.getValue().sender()).isEqualTo(P1);
|
||||
assertThat(cap.getValue().receiver()).isNull();
|
||||
assertThat(resp.interpretation().resolvedPersons()).hasSize(1);
|
||||
assertThat(resp.interpretation().resolvedPersons().get(0).id()).isEqualTo(P1);
|
||||
assertThat(resp.interpretation().ambiguousPersons()).isEmpty();
|
||||
}
|
||||
|
||||
// --- 2. Multi-match name → ambiguous, search NOT executed ---
|
||||
|
||||
@Test
|
||||
void search_multiMatchName_populatesAmbiguous_andSkipsSearch() {
|
||||
Person a = person(UUID.randomUUID(), "Walter", "Braun");
|
||||
Person b = person(UUID.randomUUID(), "Walter", "Schmidt");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(a, b));
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
|
||||
|
||||
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
|
||||
verify(documentService, never()).searchDocumentsByPersonId(any(), any(), any(), any());
|
||||
assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
|
||||
assertThat(resp.interpretation().resolvedPersons()).isEmpty();
|
||||
}
|
||||
|
||||
// --- 3. Multi-match + personRole=any → still ambiguous, search NOT executed ---
|
||||
|
||||
@Test
|
||||
void search_multiMatchName_withPersonRoleAny_stillSkipsSearch() {
|
||||
Person a = person(UUID.randomUUID(), "Emma", "Braun");
|
||||
Person b = person(UUID.randomUUID(), "Emma", "Raddatz");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Emma"), "any", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(a, b));
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe an Emma", PAGE);
|
||||
|
||||
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
|
||||
verify(documentService, never()).searchDocumentsByPersonId(any(), any(), any(), any());
|
||||
assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
|
||||
}
|
||||
|
||||
// --- 4. No-match name → folded into text ---
|
||||
|
||||
@Test
|
||||
void search_noMatchName_isFoldedIntoText() {
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Karl"), "any", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Karl")).thenReturn(List.of());
|
||||
|
||||
service.search("Briefe von Karl", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().text()).contains("Karl");
|
||||
assertThat(cap.getValue().sender()).isNull();
|
||||
assertThat(cap.getValue().receiver()).isNull();
|
||||
}
|
||||
|
||||
// --- 5. personRole=any + 1 resolved → searchDocumentsByPersonId called ---
|
||||
|
||||
@Test
|
||||
void search_personRoleAny_singleMatch_callsSearchDocumentsByPersonId() {
|
||||
Person walter = person(P1, "Walter", "Raddatz");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter"), "any", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
|
||||
|
||||
verify(documentService).searchDocumentsByPersonId(eq(P1), isNull(), isNull(), eq(PAGE));
|
||||
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
|
||||
assertThat(resp.interpretation().keywordsApplied()).isFalse();
|
||||
}
|
||||
|
||||
// --- 6. 2 names both resolve → sender=person1, receiver=person2 ---
|
||||
|
||||
@Test
|
||||
void search_twoNamesResolve_assignsSenderAndReceiver() {
|
||||
Person walter = person(P1, "Walter", "Raddatz");
|
||||
Person emma = person(P2, "Emma", "Raddatz");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter", "Emma"), "any", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
|
||||
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe von Walter an Emma", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), eq(DocumentSort.DATE), eq("desc"), eq(PAGE));
|
||||
assertThat(cap.getValue().sender()).isEqualTo(P1);
|
||||
assertThat(cap.getValue().receiver()).isEqualTo(P2);
|
||||
assertThat(resp.interpretation().resolvedPersons().get(0).id()).isEqualTo(P1);
|
||||
assertThat(resp.interpretation().resolvedPersons().get(1).id()).isEqualTo(P2);
|
||||
}
|
||||
|
||||
// --- 7. 2 names, first resolves, second ambiguous → search NOT executed ---
|
||||
|
||||
@Test
|
||||
void search_twoNames_secondAmbiguous_skipsSearch() {
|
||||
Person walter = person(P1, "Walter", "Raddatz");
|
||||
Person emma1 = person(P2, "Emma", "Braun");
|
||||
Person emma2 = person(P3, "Emma", "Schmidt");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter", "Emma"), "sender", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
|
||||
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma1, emma2));
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe von Walter an Emma", PAGE);
|
||||
|
||||
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
|
||||
assertThat(resp.interpretation().ambiguousPersons()).hasSize(2);
|
||||
}
|
||||
|
||||
// --- 8. 2 names, first no match → folded into text, second used as single person ---
|
||||
|
||||
@Test
|
||||
void search_twoNames_firstNoMatch_secondResolved_foldFirstIntoText() {
|
||||
Person emma = person(P2, "Emma", "Raddatz");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Karl", "Emma"), "sender", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Karl")).thenReturn(List.of());
|
||||
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
|
||||
|
||||
service.search("Briefe von Karl an Emma", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().text()).contains("Karl");
|
||||
assertThat(cap.getValue().sender()).isEqualTo(P2);
|
||||
}
|
||||
|
||||
// --- 9. 3+ names all resolve → first two as sender/receiver, third folded into text ---
|
||||
|
||||
@Test
|
||||
void search_threeNamesResolve_extraFoldedIntoText() {
|
||||
Person walter = person(P1, "Walter", "Raddatz");
|
||||
Person emma = person(P2, "Emma", "Raddatz");
|
||||
Person heinrich = person(P3, "Heinrich", "Braun");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter", "Emma", "Heinrich"), "any", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
|
||||
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
|
||||
when(personService.findByDisplayNameContaining("Heinrich")).thenReturn(List.of(heinrich));
|
||||
|
||||
service.search("Briefe von Walter an Emma über Heinrich", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().sender()).isEqualTo(P1);
|
||||
assertThat(cap.getValue().receiver()).isEqualTo(P2);
|
||||
assertThat(cap.getValue().text()).contains("Heinrich");
|
||||
}
|
||||
|
||||
// --- 10. Keywords space-joined into text ---
|
||||
|
||||
@Test
|
||||
void search_keywords_areJoinedIntoText() {
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of(), "any", null, null, List.of("Krieg", "Walter")));
|
||||
|
||||
service.search("Dokumente über den Krieg Walter", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().text()).isEqualTo("Krieg Walter");
|
||||
}
|
||||
|
||||
// --- 11. Date range passed through ---
|
||||
|
||||
@Test
|
||||
void search_dateRange_passedIntoSearchFilters() {
|
||||
LocalDate from = LocalDate.of(1914, 1, 1);
|
||||
LocalDate to = LocalDate.of(1914, 12, 31);
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of(), "any", from, to, List.of()));
|
||||
|
||||
service.search("Briefe aus dem Jahr 1914", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().from()).isEqualTo(from);
|
||||
assertThat(cap.getValue().to()).isEqualTo(to);
|
||||
}
|
||||
|
||||
// --- 12. Null dates → null in SearchFilters (not an error) ---
|
||||
|
||||
@Test
|
||||
void search_nullDates_passedAsNullIntoFilters() {
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of(), "any", null, null, List.of("Hochzeit")));
|
||||
|
||||
service.search("Hochzeitsbriefe", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().from()).isNull();
|
||||
assertThat(cap.getValue().to()).isNull();
|
||||
}
|
||||
|
||||
// --- 13. Query under 3 chars → VALIDATION_ERROR before Ollama call ---
|
||||
|
||||
@Test
|
||||
void search_queryTooShort_throwsValidationError() {
|
||||
assertThatThrownBy(() -> service.search("ab", PAGE))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.VALIDATION_ERROR);
|
||||
|
||||
verify(ollamaClient, never()).parse(anyString());
|
||||
}
|
||||
|
||||
// --- 14. Query over 500 chars → VALIDATION_ERROR ---
|
||||
|
||||
@Test
|
||||
void search_queryTooLong_throwsValidationError() {
|
||||
String longQuery = "a".repeat(501);
|
||||
assertThatThrownBy(() -> service.search(longQuery, PAGE))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.VALIDATION_ERROR);
|
||||
|
||||
verify(ollamaClient, never()).parse(anyString());
|
||||
}
|
||||
|
||||
// --- 15. Ollama returns empty names/keywords → raw query used as keyword fallback ---
|
||||
|
||||
@Test
|
||||
void search_ollamaReturnsEmpty_usesRawQueryAsTextFallback() {
|
||||
String raw = "Briefe aus dem Krieg";
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(new OllamaExtraction(List.of(), "any", null, null, List.of(), raw));
|
||||
|
||||
service.search(raw, PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().text()).isEqualTo(raw);
|
||||
}
|
||||
|
||||
// --- 16. Null personNames/keywords from Ollama → no NPE ---
|
||||
|
||||
@Test
|
||||
void search_nullPersonNamesAndKeywords_handledWithoutNpe() {
|
||||
OllamaExtraction ext = new OllamaExtraction(null, "any", null, null, null, "test query");
|
||||
when(ollamaClient.parse(anyString())).thenReturn(ext);
|
||||
|
||||
NlSearchResponse resp = service.search("test query", PAGE);
|
||||
|
||||
assertThat(resp).isNotNull();
|
||||
verify(documentService).searchDocuments(any(), any(), any(), any());
|
||||
}
|
||||
|
||||
// --- 17. Unrecognized personRole → defaults to any-like behavior (no crash) ---
|
||||
|
||||
@Test
|
||||
void search_unrecognizedPersonRole_treatedLikeAny_withSingleResolvedPerson() {
|
||||
Person walter = person(P1, "Walter", "Raddatz");
|
||||
// OllamaClient defensive parsing returns "any" for unknown roles,
|
||||
// but NlQueryParserService must also be safe if something unexpected arrives.
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(new OllamaExtraction(List.of("Walter"), "unknown_role", null, null, List.of(), "query"));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(List.of(walter));
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
|
||||
|
||||
// Should not crash; "unknown_role" treated as fallback (neither sender nor receiver → any)
|
||||
assertThat(resp).isNotNull();
|
||||
}
|
||||
|
||||
// --- 18. Ollama throws SMART_SEARCH_UNAVAILABLE → propagates to caller ---
|
||||
|
||||
@Test
|
||||
void search_ollamaThrowsUnavailable_propagates() {
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenThrow(DomainException.tooManyRequests(ErrorCode.SMART_SEARCH_UNAVAILABLE, "offline"));
|
||||
|
||||
assertThatThrownBy(() -> service.search("Was hat Walter geschrieben?", PAGE))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
|
||||
}
|
||||
|
||||
// --- 19. LLM-extracted name > 200 chars → skipped, PersonService never called ---
|
||||
|
||||
@Test
|
||||
void search_nameLongerThan200Chars_isSkippedBeforePersonServiceCall() {
|
||||
String longName = "A".repeat(201);
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of(longName), "sender", null, null, List.of()));
|
||||
|
||||
service.search("Briefe von sehr langem Namen", PAGE);
|
||||
|
||||
verify(personService, never()).findByDisplayNameContaining(anyString());
|
||||
}
|
||||
|
||||
// --- 20. Max 10 candidates cap: 11 persons returned → only first 10 in ambiguousPersons ---
|
||||
|
||||
@Test
|
||||
void search_elevenCandidates_capsAtTen() {
|
||||
List<Person> eleven = new ArrayList<>();
|
||||
for (int i = 0; i < 11; i++) {
|
||||
eleven.add(person(UUID.randomUUID(), "Walter", "Person" + i));
|
||||
}
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Walter"), "sender", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Walter")).thenReturn(eleven);
|
||||
|
||||
NlSearchResponse resp = service.search("Briefe von Walter", PAGE);
|
||||
|
||||
assertThat(resp.interpretation().ambiguousPersons()).hasSize(10);
|
||||
verify(documentService, never()).searchDocuments(any(), any(), any(), any());
|
||||
}
|
||||
|
||||
// --- 21. SearchFilters defaults: tagOperator=AND, status=null, undated=false, tags=empty ---
|
||||
|
||||
@Test
|
||||
void search_searchFiltersDefaults_areCorrect() {
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of(), "any", null, null, List.of("Krieg")));
|
||||
|
||||
service.search("Dokumente über den Krieg", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), eq(DocumentSort.DATE), eq("desc"), eq(PAGE));
|
||||
SearchFilters f = cap.getValue();
|
||||
assertThat(f.tagOperator()).isEqualTo(TagOperator.AND);
|
||||
assertThat(f.status()).isNull();
|
||||
assertThat(f.undated()).isFalse();
|
||||
assertThat(f.tags()).isEmpty();
|
||||
assertThat(f.tagQ()).isNull();
|
||||
}
|
||||
|
||||
// --- 22. personRole=receiver + 1 resolved → receiver UUID set ---
|
||||
|
||||
@Test
|
||||
void search_personRoleReceiver_singleMatch_setsReceiver() {
|
||||
Person emma = person(P2, "Emma", "Raddatz");
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of("Emma"), "receiver", null, null, List.of()));
|
||||
when(personService.findByDisplayNameContaining("Emma")).thenReturn(List.of(emma));
|
||||
|
||||
service.search("Briefe an Emma", PAGE);
|
||||
|
||||
ArgumentCaptor<SearchFilters> cap = ArgumentCaptor.forClass(SearchFilters.class);
|
||||
verify(documentService).searchDocuments(cap.capture(), any(), any(), any());
|
||||
assertThat(cap.getValue().receiver()).isEqualTo(P2);
|
||||
assertThat(cap.getValue().sender()).isNull();
|
||||
}
|
||||
|
||||
// --- 23. keywordsApplied=true when text is non-blank ---
|
||||
|
||||
@Test
|
||||
void search_keywordsApplied_trueWhenTextNonBlank() {
|
||||
when(ollamaClient.parse(anyString()))
|
||||
.thenReturn(extraction(List.of(), "any", null, null, List.of("Feldpost")));
|
||||
|
||||
NlSearchResponse resp = service.search("Feldpost aus dem Krieg", PAGE);
|
||||
|
||||
assertThat(resp.interpretation().keywordsApplied()).isTrue();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import tools.jackson.databind.ObjectMapper;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.security.SecurityConfig;
|
||||
import org.raddatz.familienarchiv.security.PermissionAspect;
|
||||
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
|
||||
import org.springframework.boot.webmvc.test.autoconfigure.WebMvcTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.security.test.context.support.WithMockUser;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.test.web.servlet.MockMvc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.mockito.ArgumentMatchers.*;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf;
|
||||
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*;
|
||||
|
||||
@WebMvcTest(NlSearchController.class)
|
||||
@Import({SecurityConfig.class, PermissionAspect.class, AopAutoConfiguration.class,
|
||||
NlSearchRateLimiter.class, NlSearchRateLimitProperties.class})
|
||||
class NlSearchControllerTest {
|
||||
|
||||
@Autowired MockMvc mockMvc;
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
@MockitoBean NlQueryParserService nlQueryParserService;
|
||||
@MockitoBean CustomUserDetailsService customUserDetailsService;
|
||||
@Autowired NlSearchRateLimiter rateLimiter;
|
||||
|
||||
@BeforeEach
|
||||
void resetRateLimiter() {
|
||||
rateLimiter.resetForTest();
|
||||
}
|
||||
|
||||
private NlSearchResponse makeResponse() {
|
||||
PersonHint hint = new PersonHint(UUID.randomUUID(), "Walter Raddatz");
|
||||
NlQueryInterpretation interp = new NlQueryInterpretation(
|
||||
List.of(hint), List.of(), null, null,
|
||||
List.of("Krieg"), "Briefe von Walter im Krieg", true);
|
||||
return new NlSearchResponse(DocumentSearchResult.of(List.of()), interp);
|
||||
}
|
||||
|
||||
// --- 1. Happy path ---
|
||||
|
||||
@Test
|
||||
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
|
||||
void search_returns200_withNlSearchResponse() throws Exception {
|
||||
when(nlQueryParserService.search(anyString(), any())).thenReturn(makeResponse());
|
||||
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"Briefe von Walter im Krieg\"}"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.interpretation.rawQuery").value("Briefe von Walter im Krieg"))
|
||||
.andExpect(jsonPath("$.interpretation.resolvedPersons[0].displayName").value("Walter Raddatz"))
|
||||
.andExpect(jsonPath("$.interpretation.keywordsApplied").value(true));
|
||||
}
|
||||
|
||||
// --- 2. ambiguousPersons in response shape ---
|
||||
|
||||
@Test
|
||||
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
|
||||
void search_returns200_withAmbiguousPersons() throws Exception {
|
||||
PersonHint a = new PersonHint(UUID.randomUUID(), "Walter Braun");
|
||||
PersonHint b = new PersonHint(UUID.randomUUID(), "Walter Schmidt");
|
||||
NlQueryInterpretation interp = new NlQueryInterpretation(
|
||||
List.of(), List.of(a, b), null, null,
|
||||
List.of(), "Briefe von Walter", false);
|
||||
NlSearchResponse resp = new NlSearchResponse(DocumentSearchResult.of(List.of()), interp);
|
||||
when(nlQueryParserService.search(anyString(), any())).thenReturn(resp);
|
||||
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"Briefe von Walter\"}"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.interpretation.ambiguousPersons").isArray())
|
||||
.andExpect(jsonPath("$.interpretation.ambiguousPersons[0].displayName").value("Walter Braun"))
|
||||
.andExpect(jsonPath("$.interpretation.ambiguousPersons[1].id").isNotEmpty());
|
||||
}
|
||||
|
||||
// --- 3. Unauthenticated → 401 ---
|
||||
|
||||
@Test
|
||||
void search_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"Briefe von Walter\"}"))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
// --- 4. Query < 3 chars → 400 ---
|
||||
|
||||
@Test
|
||||
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
|
||||
void search_returns400_whenQueryTooShort() throws Exception {
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"ab\"}"))
|
||||
.andExpect(status().isBadRequest());
|
||||
}
|
||||
|
||||
// --- 5. Query > 500 chars → 400 ---
|
||||
|
||||
@Test
|
||||
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
|
||||
void search_returns400_whenQueryTooLong() throws Exception {
|
||||
String longQuery = "a".repeat(501);
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"" + longQuery + "\"}"))
|
||||
.andExpect(status().isBadRequest());
|
||||
}
|
||||
|
||||
// --- 6. Ollama unavailable → 503 ---
|
||||
|
||||
@Test
|
||||
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
|
||||
void search_returns503_whenOllamaUnavailable() throws Exception {
|
||||
when(nlQueryParserService.search(anyString(), any()))
|
||||
.thenThrow(DomainException.serviceUnavailable(ErrorCode.SMART_SEARCH_UNAVAILABLE, "Ollama offline"));
|
||||
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"Briefe von Walter\"}"))
|
||||
.andExpect(status().isServiceUnavailable())
|
||||
.andExpect(jsonPath("$.code").value("SMART_SEARCH_UNAVAILABLE"));
|
||||
}
|
||||
|
||||
// --- 7. 6th request in 1 minute → 429 ---
|
||||
|
||||
@Test
|
||||
@WithMockUser(username = "user@test.com", authorities = {"READ_ALL"})
|
||||
void search_returns429_onSixthRequestWithinRateLimit() throws Exception {
|
||||
when(nlQueryParserService.search(anyString(), any())).thenReturn(makeResponse());
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"Briefe von Walter\"}"))
|
||||
.andExpect(status().isOk());
|
||||
}
|
||||
|
||||
mockMvc.perform(post("/api/search/nl").with(csrf())
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.content("{\"query\":\"Briefe von Walter\"}"))
|
||||
.andExpect(status().isTooManyRequests())
|
||||
.andExpect(jsonPath("$.code").value("SMART_SEARCH_RATE_LIMITED"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class NlSearchRateLimiterTest {
|
||||
|
||||
private NlSearchRateLimiter rateLimiter;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
NlSearchRateLimitProperties props = new NlSearchRateLimitProperties();
|
||||
props.setMaxRequestsPerMinute(5);
|
||||
rateLimiter = new NlSearchRateLimiter(props);
|
||||
}
|
||||
|
||||
@Test
|
||||
void checkAndConsume_allowsRequestsWithinLimit() {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
assertThatCode(() -> rateLimiter.checkAndConsume("user@example.com"))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void checkAndConsume_throwsRateLimited_onSixthRequest() {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
rateLimiter.checkAndConsume("user@example.com");
|
||||
}
|
||||
|
||||
assertThatThrownBy(() -> rateLimiter.checkAndConsume("user@example.com"))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.SMART_SEARCH_RATE_LIMITED);
|
||||
}
|
||||
|
||||
@Test
|
||||
void checkAndConsume_limitsAreIndependentPerUser() {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
rateLimiter.checkAndConsume("alice@example.com");
|
||||
}
|
||||
assertThatCode(() -> rateLimiter.checkAndConsume("bob@example.com"))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
@Test
|
||||
void resetForTest_clearsAllBuckets() {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
rateLimiter.checkAndConsume("user@example.com");
|
||||
}
|
||||
|
||||
rateLimiter.resetForTest();
|
||||
|
||||
assertThatCode(() -> rateLimiter.checkAndConsume("user@example.com"))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
package org.raddatz.familienarchiv.search;
|
||||
|
||||
import com.github.tomakehurst.wiremock.WireMockServer;
|
||||
import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
|
||||
import static com.github.tomakehurst.wiremock.client.WireMock.*;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class RestClientOllamaClientTest {
|
||||
|
||||
private WireMockServer wireMock;
|
||||
private RestClientOllamaClient client;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
wireMock = new WireMockServer(WireMockConfiguration.wireMockConfig().dynamicPort());
|
||||
wireMock.start();
|
||||
|
||||
OllamaProperties props = new OllamaProperties();
|
||||
props.setBaseUrl("http://localhost:" + wireMock.port());
|
||||
props.setModel("qwen2.5:7b-instruct-q4_K_M");
|
||||
props.setTimeoutSeconds(5);
|
||||
props.setHealthCheckTimeoutSeconds(2);
|
||||
|
||||
client = new RestClientOllamaClient(props);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void tearDown() {
|
||||
wireMock.stop();
|
||||
}
|
||||
|
||||
// --- Factory helpers ---
|
||||
|
||||
private String makeOllamaResponseJson(String personNamesJson, String personRole,
|
||||
String dateFrom, String dateTo, String keywordsJson) {
|
||||
String inner = String.format(
|
||||
"{\"personNames\":%s,\"personRole\":\"%s\",\"dateFrom\":%s,\"dateTo\":%s,\"keywords\":%s}",
|
||||
personNamesJson, personRole,
|
||||
dateFrom == null ? "null" : "\"" + dateFrom + "\"",
|
||||
dateTo == null ? "null" : "\"" + dateTo + "\"",
|
||||
keywordsJson
|
||||
);
|
||||
return String.format("{\"model\":\"qwen2.5:7b-instruct-q4_K_M\",\"response\":\"%s\",\"done\":true}",
|
||||
inner.replace("\"", "\\\""));
|
||||
}
|
||||
|
||||
// --- Test cases ---
|
||||
|
||||
@Test
|
||||
void parse_returnsExtraction_whenOllamaReturnsValidJson() {
|
||||
String body = makeOllamaResponseJson("[\"Walter\"]", "sender", "1914-01-01", "1914-12-31", "[\"Krieg\"]");
|
||||
wireMock.stubFor(post(urlEqualTo("/api/generate"))
|
||||
.willReturn(aResponse()
|
||||
.withStatus(200)
|
||||
.withHeader("Content-Type", "application/json")
|
||||
.withBody(body)));
|
||||
|
||||
OllamaExtraction result = client.parse("Was hat Walter im Krieg geschrieben?");
|
||||
|
||||
assertThat(result.personNames()).containsExactly("Walter");
|
||||
assertThat(result.personRole()).isEqualTo("sender");
|
||||
assertThat(result.keywords()).containsExactly("Krieg");
|
||||
assertThat(result.dateFrom()).isNotNull();
|
||||
assertThat(result.dateTo()).isNotNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void parse_throwsSmartSearchUnavailable_whenOllamaReturns500() {
|
||||
wireMock.stubFor(post(urlEqualTo("/api/generate"))
|
||||
.willReturn(aResponse().withStatus(500)));
|
||||
|
||||
assertThatThrownBy(() -> client.parse("some query"))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
|
||||
}
|
||||
|
||||
@Test
|
||||
void parse_throwsSmartSearchUnavailable_whenOllamaExceedsTimeout() {
|
||||
wireMock.stubFor(post(urlEqualTo("/api/generate"))
|
||||
.willReturn(aResponse()
|
||||
.withStatus(200)
|
||||
.withHeader("Content-Type", "application/json")
|
||||
.withFixedDelay(6000)
|
||||
.withBody("{\"response\":\"{}\",\"done\":true}")));
|
||||
|
||||
assertThatThrownBy(() -> client.parse("some query"))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
|
||||
}
|
||||
|
||||
@Test
|
||||
void parse_throwsSmartSearchUnavailable_whenOllamaReturnsMalformedJson() {
|
||||
wireMock.stubFor(post(urlEqualTo("/api/generate"))
|
||||
.willReturn(aResponse()
|
||||
.withStatus(200)
|
||||
.withHeader("Content-Type", "application/json")
|
||||
.withBody("{\"response\":\"not-json-at-all\",\"done\":true}")));
|
||||
|
||||
assertThatThrownBy(() -> client.parse("some query"))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.SMART_SEARCH_UNAVAILABLE);
|
||||
}
|
||||
}
|
||||
@@ -102,8 +102,8 @@ class TagControllerTest {
|
||||
void getTagTree_returns200_withTreeStructure() throws Exception {
|
||||
UUID parentId = UUID.randomUUID();
|
||||
UUID childId = UUID.randomUUID();
|
||||
TagTreeNodeDTO child = new TagTreeNodeDTO(childId, "Haus", null, 0, List.of(), parentId);
|
||||
TagTreeNodeDTO parent = new TagTreeNodeDTO(parentId, "Immobilie", "teal", 0, List.of(child), null);
|
||||
TagTreeNodeDTO child = new TagTreeNodeDTO(childId, "Haus", null, 0, 0, List.of(), parentId);
|
||||
TagTreeNodeDTO parent = new TagTreeNodeDTO(parentId, "Immobilie", "teal", 0, 0, List.of(child), null);
|
||||
when(tagService.getTagTree()).thenReturn(List.of(parent));
|
||||
|
||||
mockMvc.perform(get("/api/tags/tree"))
|
||||
|
||||
@@ -0,0 +1,179 @@
|
||||
package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.config.FlywayConfig;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentRepository;
|
||||
import org.raddatz.familienarchiv.document.DocumentSpecifications;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
|
||||
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
|
||||
/**
|
||||
* Real-Postgres validation of the subtree document-count rollup ({@link TagRepository
|
||||
* #findSubtreeDocumentCountsPerTag}). The recursive CTE + COUNT(DISTINCT) cannot be exercised on
|
||||
* H2, so these run against {@code postgres:16-alpine} via Testcontainers. Covers issue #698
|
||||
* AC#1–#4, #6 (REQ-THEMEN-06 cycle guard) and #7 (count↔destination parity).
|
||||
*/
|
||||
@DataJpaTest
|
||||
@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE)
|
||||
@Import({PostgresContainerConfig.class, FlywayConfig.class})
|
||||
class TagRollupRepositoryIntegrationTest {
|
||||
|
||||
@Autowired private TagRepository tagRepository;
|
||||
@Autowired private DocumentRepository documentRepository;
|
||||
@Autowired private EntityManager entityManager;
|
||||
|
||||
// ─── helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
private Tag tag(String name, UUID parentId) {
|
||||
return tagRepository.save(Tag.builder().name(name).parentId(parentId).build());
|
||||
}
|
||||
|
||||
private Document docWithTags(String title, Tag... tags) {
|
||||
return documentRepository.save(Document.builder()
|
||||
.title(title)
|
||||
.originalFilename(title + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.tags(new HashSet<>(Set.of(tags)))
|
||||
.build());
|
||||
}
|
||||
|
||||
private Map<UUID, Long> rollup() {
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
return tagRepository.findSubtreeDocumentCountsPerTag().stream()
|
||||
.collect(Collectors.toMap(TagRepository.TagCount::getTagId, TagRepository.TagCount::getCount));
|
||||
}
|
||||
|
||||
// ─── AC#4 — rollup of a leaf equals its direct count ────────────────────────
|
||||
|
||||
@Test
|
||||
void leafTag_subtreeCount_equalsItsDirectCount() {
|
||||
Tag leaf = tag("Tagebuch", null);
|
||||
docWithTags("a", leaf);
|
||||
docWithTags("b", leaf);
|
||||
docWithTags("c", leaf);
|
||||
|
||||
assertThat(rollup().get(leaf.getId())).isEqualTo(3L);
|
||||
}
|
||||
|
||||
// ─── AC#1 + AC#2 — parent rolls up children, distinct (shared doc counted once) ──
|
||||
|
||||
@Test
|
||||
void parentTag_rollsUpChildDocuments_countingSharedDocumentOnce() {
|
||||
Tag reisen = tag("Reisen", null);
|
||||
Tag italien = tag("Italien", reisen.getId());
|
||||
|
||||
Document shared = docWithTags("shared", reisen, italien); // tagged with both
|
||||
docWithTags("reisenOnly", reisen);
|
||||
docWithTags("it1", italien);
|
||||
docWithTags("it2", italien);
|
||||
docWithTags("it3", italien);
|
||||
docWithTags("it4", italien);
|
||||
|
||||
Map<UUID, Long> rollup = rollup();
|
||||
|
||||
// Reisen direct {shared, reisenOnly} = 2; Italien {shared, it1..it4} = 5; union distinct = 6
|
||||
assertThat(rollup.get(reisen.getId())).isEqualTo(6L);
|
||||
assertThat(rollup.get(italien.getId())).isEqualTo(5L);
|
||||
assertThat(shared.getId()).isNotNull();
|
||||
}
|
||||
|
||||
// ─── AC#3 — full descendant depth (grandchildren included) ──────────────────
|
||||
|
||||
@Test
|
||||
void rollup_includesGrandchildDocuments_atFullDepth() {
|
||||
Tag reisen = tag("Reisen", null);
|
||||
Tag italien = tag("Italien", reisen.getId());
|
||||
Tag rom = tag("Rom", italien.getId());
|
||||
|
||||
docWithTags("r1", reisen);
|
||||
docWithTags("i1", italien);
|
||||
docWithTags("rom1", rom);
|
||||
docWithTags("rom2", rom);
|
||||
docWithTags("rom3", rom);
|
||||
|
||||
Map<UUID, Long> rollup = rollup();
|
||||
|
||||
assertThat(rollup.get(reisen.getId())).isEqualTo(5L); // 1 + 1 + 3, all distinct
|
||||
assertThat(rollup.get(italien.getId())).isEqualTo(4L); // 1 + 3
|
||||
assertThat(rollup.get(rom.getId())).isEqualTo(3L);
|
||||
}
|
||||
|
||||
// ─── REQ-THEMEN-05 — a tag whose whole subtree is empty is absent (→ 0) ─────
|
||||
|
||||
@Test
|
||||
void tagWithEmptySubtree_isAbsentFromRollup() {
|
||||
Tag empty = tag("Leer", null);
|
||||
Tag emptyChild = tag("LeerKind", empty.getId());
|
||||
|
||||
Map<UUID, Long> rollup = rollup();
|
||||
|
||||
assertThat(rollup).doesNotContainKey(empty.getId());
|
||||
assertThat(rollup).doesNotContainKey(emptyChild.getId());
|
||||
}
|
||||
|
||||
// ─── REQ-THEMEN-06 — a hierarchy cycle terminates safely via the depth guard ──
|
||||
|
||||
@Test
|
||||
void rollup_terminatesSafely_whenHierarchyContainsCycle() {
|
||||
Tag a = tag("CycleA", null);
|
||||
Tag b = tag("CycleB", a.getId());
|
||||
// Close the loop: A.parent = B (DB only forbids parent_id == id, so a 2-node cycle is insertable)
|
||||
a.setParentId(b.getId());
|
||||
tagRepository.save(a);
|
||||
|
||||
docWithTags("ca", a);
|
||||
docWithTags("cb", b);
|
||||
|
||||
assertThatCode(this::rollup).doesNotThrowAnyException(); // depth guard prevents a runaway recursion
|
||||
Map<UUID, Long> rollup = rollup();
|
||||
|
||||
// COUNT(DISTINCT document_id) dedupes documents reached via repeated cycle paths
|
||||
assertThat(rollup.get(a.getId())).isEqualTo(2L);
|
||||
assertThat(rollup.get(b.getId())).isEqualTo(2L);
|
||||
}
|
||||
|
||||
// ─── AC#7 — count↔destination parity with the real search expansion ─────────
|
||||
|
||||
@Test
|
||||
void subtreeCount_equalsDistinctDocumentsFoundByTagSearch_parity() {
|
||||
// Uniquely-named root so name-based search expansion lines up with the per-id rollup.
|
||||
Tag root = tag("ZzzParitaetReise", null);
|
||||
Tag child = tag("ZzzParitaetItalien", root.getId());
|
||||
Tag grandchild = tag("ZzzParitaetRom", child.getId());
|
||||
|
||||
docWithTags("p_shared", root, child); // overlap inside the subtree
|
||||
docWithTags("p_root", root);
|
||||
docWithTags("p_child", child);
|
||||
docWithTags("p_gc1", grandchild);
|
||||
docWithTags("p_gc2", grandchild);
|
||||
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
long rollupCount = rollup().get(root.getId());
|
||||
|
||||
List<UUID> searchExpansionIds = tagRepository.findDescendantIdsByName("ZzzParitaetReise");
|
||||
var spec = DocumentSpecifications.hasTags(List.of(new HashSet<>(searchExpansionIds)), true);
|
||||
long distinctSearchResults = documentRepository.findAll(spec).stream()
|
||||
.map(Document::getId).distinct().count();
|
||||
|
||||
assertThat(rollupCount).isEqualTo(distinctSearchResults);
|
||||
}
|
||||
}
|
||||
@@ -53,20 +53,68 @@ class TagServiceTest {
|
||||
// ─── findOrCreate ─────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void findOrCreate_returnsExisting_whenNameFound() {
|
||||
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
|
||||
when(tagRepository.findByNameIgnoreCase("Familie")).thenReturn(Optional.of(existing));
|
||||
void findOrCreate_exactCaseWins_overCaseInsensitiveSibling() {
|
||||
// "Geburt" (parent) and "geburt" (child) both exist; the edit round-trip replays the stored
|
||||
// name "geburt", which must bind to the exact-case row, not the parent.
|
||||
Tag exact = Tag.builder().id(UUID.randomUUID()).name("geburt").build();
|
||||
when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exact));
|
||||
|
||||
Tag result = tagService.findOrCreate("Familie");
|
||||
Tag result = tagService.findOrCreate("geburt");
|
||||
|
||||
assertThat(result).isEqualTo(existing);
|
||||
assertThat(result).isEqualTo(exact);
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_createsNew_whenNameNotFound() {
|
||||
void findOrCreate_exactCaseWins_evenWhenItsIdIsNotTheLowest() {
|
||||
// Adversarial guard: exact-case must short-circuit BEFORE the lowest-id rule. Here the exact row
|
||||
// has the higher id, so a naive "always pick lowest id across all CI matches" would pick wrong.
|
||||
Tag exactHigherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000009")).name("geburt").build();
|
||||
when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exactHigherId));
|
||||
|
||||
Tag result = tagService.findOrCreate("geburt");
|
||||
|
||||
assertThat(result).isEqualTo(exactHigherId);
|
||||
verify(tagRepository, never()).findAllByNameIgnoreCase(any()); // exact-case wins without consulting the CI list
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
|
||||
// Stored name is "Weihnachten"; a save replays "weihnachten" (no exact-case row) → bind to the
|
||||
// single case-insensitive match rather than creating a duplicate.
|
||||
Tag stored = Tag.builder().id(UUID.randomUUID()).name("Weihnachten").build();
|
||||
when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(stored));
|
||||
|
||||
Tag result = tagService.findOrCreate("weihnachten");
|
||||
|
||||
assertThat(result).isEqualTo(stored);
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() {
|
||||
// Two rows collide case-insensitively and neither equals the query exactly. Resolution must be
|
||||
// deterministic (lowest id) and never throw — proven by calling twice and getting the same id.
|
||||
Tag lowerId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).name("Reisepläne").build();
|
||||
Tag higherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).name("reisepläne").build();
|
||||
when(tagRepository.findByName("REISEPLÄNE")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("REISEPLÄNE")).thenReturn(List.of(higherId, lowerId));
|
||||
|
||||
Tag first = tagService.findOrCreate("REISEPLÄNE");
|
||||
Tag second = tagService.findOrCreate("REISEPLÄNE");
|
||||
|
||||
assertThat(first.getId()).isEqualTo(lowerId.getId());
|
||||
assertThat(second.getId()).isEqualTo(first.getId());
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_createsOrphanTag_whenNameAbsent() {
|
||||
Tag saved = Tag.builder().id(UUID.randomUUID()).name("Krieg").build();
|
||||
when(tagRepository.findByNameIgnoreCase("Krieg")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findByName("Krieg")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("Krieg")).thenReturn(List.of());
|
||||
when(tagRepository.save(any())).thenReturn(saved);
|
||||
|
||||
Tag result = tagService.findOrCreate("Krieg");
|
||||
@@ -76,13 +124,15 @@ class TagServiceTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_trimsWhitespaceBeforeLookup() {
|
||||
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Urlaub").build();
|
||||
when(tagRepository.findByNameIgnoreCase("Urlaub")).thenReturn(Optional.of(existing));
|
||||
void findOrCreate_trimsWhitespace_thenLandsOnCaseInsensitiveChild() {
|
||||
Tag child = Tag.builder().id(UUID.randomUUID()).name("weihnachten").build();
|
||||
when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(child));
|
||||
|
||||
tagService.findOrCreate(" Urlaub ");
|
||||
Tag result = tagService.findOrCreate(" weihnachten ");
|
||||
|
||||
verify(tagRepository).findByNameIgnoreCase("Urlaub");
|
||||
assertThat(result).isEqualTo(child);
|
||||
verify(tagRepository).findByName("weihnachten");
|
||||
}
|
||||
|
||||
// ─── update ───────────────────────────────────────────────────────────────
|
||||
@@ -199,6 +249,7 @@ class TagServiceTest {
|
||||
void getTagTree_returnsEmptyList_whenNoTags() {
|
||||
when(tagRepository.findAll()).thenReturn(List.of());
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
assertThat(tagService.getTagTree()).isEmpty();
|
||||
}
|
||||
@@ -213,6 +264,7 @@ class TagServiceTest {
|
||||
);
|
||||
when(tagRepository.findAll()).thenReturn(tags);
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
var tree = tagService.getTagTree();
|
||||
|
||||
@@ -228,6 +280,7 @@ class TagServiceTest {
|
||||
Tag child = Tag.builder().id(childId).name("Child").parentId(parentId).build();
|
||||
when(tagRepository.findAll()).thenReturn(List.of(parent, child));
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
var tree = tagService.getTagTree();
|
||||
|
||||
@@ -247,6 +300,7 @@ class TagServiceTest {
|
||||
Tag child = Tag.builder().id(childId).name("Child").parentId(parentId).build();
|
||||
when(tagRepository.findAll()).thenReturn(List.of(parent, child));
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
var tree = tagService.getTagTree();
|
||||
|
||||
@@ -262,6 +316,7 @@ class TagServiceTest {
|
||||
when(countEntry.getCount()).thenReturn(5L);
|
||||
when(tagRepository.findAll()).thenReturn(List.of(tag));
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of(countEntry));
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
var tree = tagService.getTagTree();
|
||||
|
||||
@@ -272,12 +327,60 @@ class TagServiceTest {
|
||||
void getTagTree_callsFindDocumentCountsPerTag_exactlyOnce() {
|
||||
when(tagRepository.findAll()).thenReturn(List.of());
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
tagService.getTagTree();
|
||||
|
||||
verify(tagRepository, times(1)).findDocumentCountsPerTag();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getTagTree_populatesSubtreeDocumentCount_fromRollupQuery() {
|
||||
UUID tagId = UUID.randomUUID();
|
||||
Tag tag = Tag.builder().id(tagId).name("Reisen").build();
|
||||
TagRepository.TagCount subtreeEntry = mock(TagRepository.TagCount.class);
|
||||
when(subtreeEntry.getTagId()).thenReturn(tagId);
|
||||
when(subtreeEntry.getCount()).thenReturn(7L);
|
||||
when(tagRepository.findAll()).thenReturn(List.of(tag));
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of(subtreeEntry));
|
||||
|
||||
var tree = tagService.getTagTree();
|
||||
|
||||
assertThat(tree.get(0).subtreeDocumentCount()).isEqualTo(7);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getTagTree_keepsDirectAndSubtreeCountsIndependent() {
|
||||
UUID tagId = UUID.randomUUID();
|
||||
Tag tag = Tag.builder().id(tagId).name("Reisen").build();
|
||||
TagRepository.TagCount directEntry = mock(TagRepository.TagCount.class);
|
||||
when(directEntry.getTagId()).thenReturn(tagId);
|
||||
when(directEntry.getCount()).thenReturn(2L);
|
||||
TagRepository.TagCount subtreeEntry = mock(TagRepository.TagCount.class);
|
||||
when(subtreeEntry.getTagId()).thenReturn(tagId);
|
||||
when(subtreeEntry.getCount()).thenReturn(7L);
|
||||
when(tagRepository.findAll()).thenReturn(List.of(tag));
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of(directEntry));
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of(subtreeEntry));
|
||||
|
||||
var node = tagService.getTagTree().get(0);
|
||||
|
||||
assertThat(node.documentCount()).isEqualTo(2);
|
||||
assertThat(node.subtreeDocumentCount()).isEqualTo(7);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getTagTree_callsFindSubtreeDocumentCountsPerTag_exactlyOnce() {
|
||||
when(tagRepository.findAll()).thenReturn(List.of());
|
||||
when(tagRepository.findDocumentCountsPerTag()).thenReturn(List.of());
|
||||
when(tagRepository.findSubtreeDocumentCountsPerTag()).thenReturn(List.of());
|
||||
|
||||
tagService.getTagTree();
|
||||
|
||||
verify(tagRepository, times(1)).findSubtreeDocumentCountsPerTag();
|
||||
}
|
||||
|
||||
// ─── resolveEffectiveColors ───────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
@@ -132,6 +132,31 @@ class AdminControllerTest {
|
||||
.andExpect(jsonPath("$.count").value(3));
|
||||
}
|
||||
|
||||
// ─── POST /api/admin/backfill-titles (#726) ────────────────────────────────
|
||||
|
||||
@Test
|
||||
void backfillTitles_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(roles = "USER")
|
||||
void backfillTitles_returns403_whenNotAdmin() throws Exception {
|
||||
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void backfillTitles_returns200_withCount_whenAdmin() throws Exception {
|
||||
when(documentService.backfillTitles()).thenReturn(7);
|
||||
|
||||
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.count").value(7));
|
||||
}
|
||||
|
||||
// ─── POST /api/admin/generate-thumbnails ───────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
@@ -141,6 +141,65 @@ services:
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
# --- Ollama: Model init (one-shot pull) ---
|
||||
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start.
|
||||
# On subsequent starts (model already in volume), exits quickly without re-downloading.
|
||||
# Not started in CI — CI uses explicit service selection
|
||||
# (docker-compose.ci.yml: db minio create-buckets)
|
||||
ollama-model-init:
|
||||
image: ollama/ollama:0.30.6
|
||||
restart: "no"
|
||||
networks:
|
||||
- archiv-net
|
||||
volumes:
|
||||
- ollama_models:/root/.ollama
|
||||
mem_limit: 2g
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m
|
||||
cap_drop:
|
||||
- ALL
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
command: >
|
||||
sh -c "ollama serve & SERVE_PID=$$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $$SERVE_PID"
|
||||
|
||||
# --- Ollama: LLM inference server ---
|
||||
# Serves the pre-pulled model for NL search inference.
|
||||
# Not started in CI — CI uses explicit service selection
|
||||
# (docker-compose.ci.yml: db minio create-buckets)
|
||||
ollama:
|
||||
image: ollama/ollama:0.30.6
|
||||
container_name: archive-ollama
|
||||
restart: unless-stopped
|
||||
expose:
|
||||
- "11434"
|
||||
networks:
|
||||
- archiv-net
|
||||
volumes:
|
||||
- ollama_models:/root/.ollama
|
||||
environment:
|
||||
OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
|
||||
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
|
||||
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
||||
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m
|
||||
cap_drop:
|
||||
- ALL
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s # model weights are pre-loaded by ollama-model-init; service only needs to bind port
|
||||
depends_on:
|
||||
ollama-model-init:
|
||||
condition: service_completed_successfully
|
||||
|
||||
# --- Backend: Spring Boot ---
|
||||
backend:
|
||||
build:
|
||||
@@ -184,6 +243,8 @@ services:
|
||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||
APP_OLLAMA_BASE_URL: "${APP_OLLAMA_BASE_URL:-http://ollama:11434}"
|
||||
APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
|
||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
||||
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
||||
@@ -247,3 +308,4 @@ volumes:
|
||||
frontend_node_modules:
|
||||
ocr_models:
|
||||
ocr_cache:
|
||||
ollama_models:
|
||||
|
||||
@@ -48,8 +48,6 @@ Both stacks are organised **package-by-domain**: each domain owns its entities,
|
||||
|
||||
A **derived domain** has its own routes and UI but no database tables of its own; it is assembled from data owned by Tier-1 domains.
|
||||
|
||||
**`conversation`** (route: `/briefwechsel`) — bilateral letter timeline between two `Person`s. Derived from `Document` sender/receiver relationships. The `DocumentRepository` bidirectional query is the only data source.
|
||||
|
||||
**`activity`** (route: `/aktivitaeten`) — family activity feed. Derived from `audit_log`, `notifications`, and document events. No aggregation table; computed on-the-fly by `DashboardService` and composed in the SvelteKit load function.
|
||||
|
||||
---
|
||||
|
||||
@@ -50,13 +50,16 @@ graph TD
|
||||
|
||||
The OCR service requires significant RAM for model loading. The dev compose sets `mem_limit: 12g`.
|
||||
|
||||
| Production target | RAM | Recommended OCR limit | Notes |
|
||||
|---|---|---|---|
|
||||
| Hetzner CX42 | 16 GB | 12 GB | Recommended for OCR-enabled production |
|
||||
| Hetzner CX32 | 8 GB | 6 GB | Accept reduced batch sizes and slower throughput |
|
||||
| Hetzner CX22 | 4 GB | — | Disable the OCR service (`profiles: [ocr]`); run OCR on demand only |
|
||||
| Production target | RAM | Recommended OCR limit | NL Search | Notes |
|
||||
|---|---|---|---|---|
|
||||
| Current server (Hetzner Serverbörse, i7-6700) | 64 GB | 12 GB | Supported | Default `mem_limit: 12g` works comfortably; plenty of headroom for Ollama |
|
||||
| ≥ 16 GB RAM | 16+ GB | 12 GB | Supported | Default works |
|
||||
| 8 GB RAM | 8 GB | 6 GB | Disabled — set `APP_OLLAMA_BASE_URL=` (empty) | Set `OCR_MEM_LIMIT=6g`; accept reduced batch sizes |
|
||||
| 4 GB RAM | 4 GB | — | Unsupported | Disable OCR service (`profiles: [ocr]`); run OCR on demand only |
|
||||
|
||||
A CX32 cannot honour the default `mem_limit: 12g` — set the `OCR_MEM_LIMIT=6g` env var (in `.env.production` / `.env.staging`, or as a Gitea secret consumed by the workflow) before deploying on a CX32. The prod compose interpolates this var with a 12g default.
|
||||
On servers with less than 16 GB RAM the default `mem_limit: 12g` cannot be honoured — set the `OCR_MEM_LIMIT` env var (in `.env.production` / `.env.staging`, or as a Gitea secret consumed by the workflow). The prod compose interpolates this var with a 12g default.
|
||||
|
||||
> **Memory budget:** OCR (~6 GB active) + Ollama (~8 GB) = ~14 GB. On servers with less than 16 GB RAM, do not run `docker-compose.observability.yml` continuously alongside both OCR and Ollama.
|
||||
|
||||
### Dev vs production differences
|
||||
|
||||
@@ -140,10 +143,20 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
|
||||
| `ALLOWED_PDF_HOSTS` | SSRF protection — comma-separated list of allowed PDF source hosts. **Do not widen to `*`** | `minio,localhost,127.0.0.1` | YES | — |
|
||||
| `KRAKEN_MODEL_PATH` | Directory containing Kraken HTR models (populated by `download-kraken-models.sh`) | `/app/models/` | — | — |
|
||||
| `BLLA_MODEL_PATH` | Kraken baseline layout analysis model path | `/app/models/blla.mlmodel` | — | — |
|
||||
| `OCR_MEM_LIMIT` | Container memory cap for ocr-service in `docker-compose.prod.yml`. Set to `6g` on CX32 hosts; leave unset on CX42+ to use the 12g default | `12g` (prod compose default) | — | — |
|
||||
| `OCR_MEM_LIMIT` | Container memory cap for ocr-service in `docker-compose.prod.yml`. Set to `6g` on servers with 8 GB RAM; leave unset (12g default) on servers with ≥ 16 GB RAM | `12g` (prod compose default) | — | — |
|
||||
| `XDG_CACHE_HOME` | XDG cache base dir — redirects Matplotlib and other XDG-aware libraries away from the read-only `HOME` (`/home/ocr`) to the writable cache volume | `/app/cache` | — | — |
|
||||
| `TORCH_HOME` | PyTorch model cache — redirects `~/.cache/torch` to the writable models volume | `/app/models/torch` | — | — |
|
||||
|
||||
### Ollama (NL search) service
|
||||
|
||||
| Variable | Purpose | Default | Required? | Sensitive? |
|
||||
|---|---|---|---|---|
|
||||
| `APP_OLLAMA_BASE_URL` | Base URL for the Ollama service. Leave empty to disable NL search. | `http://ollama:11434` | — | — |
|
||||
| `APP_OLLAMA_API_KEY` | API key passed as `Authorization: Bearer` to Ollama. Leave empty for unauthenticated access. Note: `OLLAMA_API_KEY` is not enforced in Ollama 0.6.5 or 0.30.6 (see ADR-028). | — | — | YES |
|
||||
| `OLLAMA_CPU_LIMIT` | Docker CPU quota for the Ollama container. On CX42 (8 vCPUs) can be raised to `7.5`. | `4.0` | — | — |
|
||||
| `OLLAMA_MEM_LIMIT` | Memory limit for the Ollama container. Requires CX42 (16 GB RAM). | `8g` | — | — |
|
||||
| `OLLAMA_API_KEY` | API key set on the Ollama service itself. Same value as `APP_OLLAMA_API_KEY`. Leave empty for unauthenticated. | — | — | YES |
|
||||
|
||||
### Observability stack (`docker-compose.observability.yml`)
|
||||
|
||||
| Variable | Purpose | Default | Required? | Sensitive? |
|
||||
@@ -264,6 +277,19 @@ git.raddatz.cloud A <server IP>
|
||||
|
||||
### 3.4 First deploy
|
||||
|
||||
> **First start — Ollama model pull:** On first `docker compose up -d`, the `ollama-model-init` container pulls `qwen2.5:7b-instruct-q4_K_M` (~4.7 GB). At 10 Mbps this takes approximately 60–90 minutes; at 100 Mbps approximately 6–10 minutes. The pull is a one-time operation — subsequent restarts skip it (model already on the `ollama_models` volume). Monitor progress with `docker logs -f $(docker ps -q --filter name=ollama-model-init)`.
|
||||
>
|
||||
> **Do not use `--wait` on first deploy** — `docker compose up -d --wait` waits for all services to reach their health/completion target, including `ollama-model-init`. On first pull this blocks for 60–90 minutes and will time out any CI/deploy script that uses `--wait`.
|
||||
>
|
||||
> **Re-deploy idempotency:** on subsequent `docker compose up -d` runs (including `--force-recreate`), `ollama-model-init` re-executes but exits in seconds — Ollama's CLI skips the download when the model digest already matches what is on the volume.
|
||||
>
|
||||
> **Verify NL search is active** after enabling Ollama (`APP_OLLAMA_BASE_URL=http://ollama:11434`):
|
||||
> ```bash
|
||||
> curl -s http://localhost:8080/api/nl-search?q=brief+von+grossmutter
|
||||
> # Returns 200 with results → NL search is active
|
||||
> # Returns 503 NL_SEARCH_UNAVAILABLE → Ollama is not reachable or APP_OLLAMA_BASE_URL is unset
|
||||
> ```
|
||||
|
||||
```bash
|
||||
# 1. Trigger nightly.yml manually (Repo → Actions → nightly → "Run workflow")
|
||||
# Expected: docker compose up -d --wait succeeds for archiv-staging, then
|
||||
@@ -559,6 +585,55 @@ bash scripts/download-kraken-models.sh
|
||||
|
||||
> Downloads the Kurrent/Sütterlin HTR models. Run once after a fresh clone or when models are updated.
|
||||
|
||||
### Ollama — natural-language search (NL Search)
|
||||
|
||||
NL search uses a local Ollama instance for query parsing. The `ollama` service is defined in `docker-compose.yml` alongside the main stack.
|
||||
|
||||
**First-time model pull** (required before the feature works):
|
||||
|
||||
```bash
|
||||
docker compose exec ollama ollama pull qwen2.5:7b-instruct-q4_K_M
|
||||
```
|
||||
|
||||
This downloads ~4.4 GB. The model is stored in the `ollama_data` Docker volume and persists across container restarts.
|
||||
|
||||
**Verify the model is available:**
|
||||
|
||||
```bash
|
||||
docker compose exec ollama ollama list
|
||||
```
|
||||
|
||||
Expected output includes `qwen2.5:7b-instruct-q4_K_M`.
|
||||
|
||||
**Health check** — the backend polls `GET /api/tags` on Ollama at startup and before inference. If Ollama is absent, `POST /api/search/nl` returns HTTP 503 with `SMART_SEARCH_UNAVAILABLE`.
|
||||
|
||||
**Configuration** (see `application.yaml` under `app.ollama`):
|
||||
|
||||
| Property | Default | Description |
|
||||
|---|---|---|
|
||||
| `app.ollama.base-url` | `http://ollama:11434` | Ollama service URL (dev: `http://localhost:11434`) |
|
||||
| `app.ollama.model` | `qwen2.5:7b-instruct-q4_K_M` | Model to use for inference |
|
||||
| `app.ollama.timeout-seconds` | `30` | Read timeout for inference calls |
|
||||
| `app.nl-search.rate-limit.max-requests-per-minute` | `5` | Per-user rate limit |
|
||||
|
||||
### Upgrade the Ollama model
|
||||
|
||||
To switch to a newer model version (e.g. a future release of `qwen2.5`):
|
||||
|
||||
1. Update the model name in the `ollama-model-init` `command:` in `docker-compose.yml`.
|
||||
2. Remove the existing model volume to free the old weights:
|
||||
```bash
|
||||
docker volume rm familienarchiv_ollama_models
|
||||
```
|
||||
(In production the volume name is prefixed with the compose project: `archiv-production_ollama_models`.)
|
||||
3. Restart the stack:
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
The `ollama-model-init` container pulls the new model weights on first start (~4–8 GB download depending on the model). The `ollama` inference server will not start until the pull completes (`condition: service_completed_successfully`).
|
||||
|
||||
> **`ollama_models` volume:** holds model weights only — fully reproducible by re-pull, no backup needed.
|
||||
|
||||
### Trigger a canonical import
|
||||
|
||||
The importer no longer parses the raw spreadsheet. It consumes the **canonical artifacts**
|
||||
|
||||
@@ -45,6 +45,9 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._
|
||||
|
||||
**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view.
|
||||
|
||||
**auto-generated title** (`DocumentTitleFactory`) — a `Document` title composed by the formula `{index} – {dateLabel} – {location}` (index = `originalFilename`; date label honest at the row's precision; location omitted when blank). On edit, an unchanged auto-title follows a corrected date/location forward (exact old-vs-new match in `DocumentService.updateDocument`); a hand-written title is kept verbatim. `POST /api/admin/backfill-titles` rewrites already-stale ones in one sweep using a grammar heuristic (`DocumentTitleBackfillMatcher`).
|
||||
_Not to be confused with a hand-written title_ — only a title that still equals what the factory builds is treated as machine-generated and rewritten; prose is left untouched.
|
||||
|
||||
**DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level.
|
||||
|
||||
**Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure.
|
||||
@@ -111,16 +114,21 @@ _See also [PersonRelationship](#person-person)._
|
||||
|
||||
**seeded rank** (`Person.generation`) — the imported generation index on a `Person` (G 0 = founders, increasing downward), used as a strict row anchor in `buildLayout.ts`. The iterative fallback heuristic never overrides a seeded rank, and spouse-pulldown never pulls a seeded rank — only unseeded nodes (no `generation`) flow through the heuristic.
|
||||
|
||||
**sibling block** — a layout unit holding the children of a single parent-set at one generation, used inside `buildLayout.ts`. Each block has a center computed from the parents' midpoint; blocks are then packed left-to-right within a generation row. Two adjacent sibling blocks at the same rank can be merged if a `SPOUSE_OF` edge crosses them (intra-family marriage, AC2).
|
||||
**family forest** — the model the Stammbaum horizontal layout reasons over (ADR-030, `familyForest.ts`): a forest of **units** rather than per-generation rows. Replaces the old per-generation "sibling block" packer. The canonical fixture is ~24 root units over 62 nodes.
|
||||
|
||||
**loose spouse** — a person at a given generation who is a spouse of someone in a sibling block but is not themselves a parented child of anyone in the graph. Loose spouses are attached adjacent to their parented partner (right side per Leonie's UX rule) so the spouse line stays short.
|
||||
_Not to be confused with [parented](#parented-layout)_ — loose is the absence of parent edges into the graph.
|
||||
**unit** `[layout]` — one bloodline carrier (the **primary**) plus the spouse(s) absorbed into its run, rendered as one adjacent row of cards. `members[0]` is the primary; the rest are spouses in marriage-year order (#361). A lone person is a unit of one. A unit's children are the units anchored by the couple's offspring. The unit — not the individual — is the node the tidy-tree packs.
|
||||
|
||||
**parented** `[layout]` — a layout flag on a sibling-block member indicating that the person has at least one `PARENT_OF` edge incoming from a node already in the graph at the prior generation. Parented members are the layout anchors of their block (the block is centred so the average index of parented members sits under the parents' midpoint); non-parented members (loose spouses) ride along on the side.
|
||||
**tidy tree** — the bottom-up Reingold–Tilford contour packer (`tidyTree.ts`) that assigns each unit's horizontal `x`: lay out child subtrees first, pack them so their contours clear by `COL_GAP` at every level, then centre the unit over the span of its children. Contours are indexed by absolute generation level, so unrelated roots at different generations share x-columns. `x` comes from structure; `y` still comes from rank (`assignRanks`, #689).
|
||||
|
||||
**anchor index** — within a sibling block, the average position of `parented` member indices. The block is shifted horizontally so this index, multiplied by `NODE_W + COL_GAP`, lines up under the midpoint of the block's parents — keeping every parent-child connector orthogonal (90°).
|
||||
**structural owner** — for a couple, the spouse that keeps the bloodline (hierarchy) position: lower `birthYear`, then stable `id` (`pickStructuralOwner` in `familyForest.ts`). The other spouse is absorbed into the owner's run. Reused by the cross-link, cycle, and intra-family paths so the rule is defined once.
|
||||
|
||||
**intra-family marriage** — a `SPOUSE_OF` edge where both endpoints are parented members of _different_ sibling blocks at the same rank (i.e. both have parents in the graph, but the parent sets differ). Layout merges the two blocks so the spouses sit adjacent at the join boundary; latent in current data (0 cases in the May-2026 canonical snapshot) but covered by a synthetic regression test in `buildLayout.test.ts`.
|
||||
**loose spouse** — a person who marries into the graph with no `PARENT_OF` edges of their own. They are absorbed into their partner's unit run (no ancestor subtree), but any children of theirs still anchor through the couple unit.
|
||||
|
||||
**bloodline** — the set of people reachable from a root unit via structural-owner `PARENT_OF` edges; renders as one contiguous horizontal band with no foreign node interleaved (the contiguity invariant that fixed the smeared-bloodline bug, #724).
|
||||
|
||||
**cross-link** `[layout]` — a `PARENT_OF` edge whose child is positioned in a spouse's run elsewhere (a cross-level intra-family marriage). The connector draws it with a distinct `2 6` dash at reduced opacity — never the `4 4` ended-marriage cadence — with geometry still landing on the child (WCAG 1.4.1).
|
||||
|
||||
**intra-family marriage** — a `SPOUSE_OF` edge where both endpoints have parents in the graph. The couple is always exactly adjacent in the owner's run; when the two spouses' parents sit at the same structural level the displaced parent edge stays solid (the adjacency case), otherwise it renders as a cross-link. The canonical fixture has two such marriages (Walter⚭Eugenie, Clara⚭Herbert), covered in `buildLayout.test.ts`.
|
||||
|
||||
**marriage dot** — the SVG circle drawn at the midpoint of a `SPOUSE_OF` connector in the Stammbaum tree (`StammbaumTree.svelte`). Radius is `r=6` (12 px diameter) so the marker meets WCAG 1.4.11 (3:1 non-text contrast) when it stacks to disambiguate multiple marriages on the same focal person.
|
||||
|
||||
@@ -130,6 +138,8 @@ _Not to be confused with [parented](#parented-layout)_ — loose is the absence
|
||||
|
||||
**fit-to-screen** `[user-facing, #692]` — the Stammbaum control (`⤢`) and initial state that frames the whole tree in the viewport. Because the base viewBox already encloses the layout at `z=1`, fit-to-screen is simply the default view `{x:0, y:0, z:1}`.
|
||||
|
||||
**lineage highlight** `[user-facing, #703]` — the focus+dim layer bound to the Stammbaum side panel: while a person is selected, that person, their full pedigree upward, their full descendant tree downward, and the spouses of all those blood people render at full strength while everyone else is dimmed (opacity, not a hue swap). Connectors dim unless both joined people are active. Computed by the pure traversal in `frontend/src/lib/person/genealogy/layout/highlightLineage.ts`.
|
||||
|
||||
---
|
||||
|
||||
## Other Domain Terms
|
||||
@@ -137,9 +147,6 @@ _Not to be confused with [parented](#parented-layout)_ — loose is the absence
|
||||
**Aktivität / Aktivitäten** `[user-facing]` — the family activity feed accessible at `/aktivitaeten`. Shows recent documents, transcriptions, comments, and Geschichten as a chronological timeline.
|
||||
_See also [Chronik](#chronik-internal)._
|
||||
|
||||
**Briefwechsel** `[user-facing]` — the bilateral conversation timeline between two `Person`s, derived from `Document` sender/receiver relationships. Accessible at `/briefwechsel`. Not a persistent entity — data is computed from existing `Document` records.
|
||||
_See also [Derived domain](#derived-domain)._
|
||||
|
||||
**Chronik** `[internal]` — the conceptual and code-level name for the unified activity feed (per ADR-003 `003-chronik-unified-activity-feed.md`). Used in code, architecture documents, and ADRs. The user-facing label for the same concept is [Aktivität](#aktivitat--aktivitaten-user-facing).
|
||||
|
||||
**Geschichte** (`Geschichte`) `[user-facing]` — a narrative story or article published in the archive, linking `Person`s and `Document`s. Lifecycle: `DRAFT → PUBLISHED` (see `GeschichteStatus`). DRAFT stories are hidden from users without the `BLOG_WRITE` permission.
|
||||
@@ -154,13 +161,22 @@ _See also [Derived domain](#derived-domain)._
|
||||
|
||||
**Cross-cutting** — code that lives in `lib/shared/` (frontend) or cross-domain packages (backend) because it has no entity of its own, no user-facing CRUD, AND is used by two or more domains OR is framework infrastructure (error handling, API client, i18n utilities).
|
||||
|
||||
**Derived domain** — a Tier-2 frontend domain that has its own UI but no backend entities of its own. Data is computed from Tier-1 domain records. Current derived domains: `conversation` (from `Document` sender/receivers) and `activity` (from audit, notifications, document events).
|
||||
_See also [Briefwechsel](#briefwechsel-user-facing)._
|
||||
**Derived domain** — a Tier-2 frontend domain that has its own UI but no backend entities of its own. Data is computed from Tier-1 domain records. The current derived domain is `activity` (from audit, notifications, document events).
|
||||
|
||||
**Domain** — a Tier-1 bounded context with its own entities, controller, service, repository, and DTOs. Backend domains: `document`, `person`, `tag`, `user`, `geschichte`, `notification`, `ocr`, `audit`, `dashboard`. Frontend domains mirror this structure under `src/lib/`.
|
||||
|
||||
---
|
||||
|
||||
## NL Search Terms
|
||||
|
||||
**NlSearch** — the natural-language document search feature. Users type a plain-German query (e.g. "Was hat Walter im Krieg an Emma geschrieben?"); the backend parses it via Ollama, resolves person names to database UUIDs, and delegates to the standard `DocumentService.searchDocuments()` path. Endpoint: `POST /api/search/nl`.
|
||||
|
||||
**NlQueryInterpretation** — the structured result of parsing a natural-language query. Contains: `resolvedPersons` (persons whose names unambiguously matched one DB record), `ambiguousPersons` (all candidates when a name matched more than one person), `keywords` (LLM-extracted search terms), `dateFrom`/`dateTo` (extracted date range), `rawQuery` (the original user input), and `keywordsApplied` (whether keyword FTS was used in the search).
|
||||
|
||||
**PersonHint** — a lightweight `{id, displayName}` pair used in `NlQueryInterpretation` to describe a resolved or ambiguous person without exposing the full `Person` entity to the frontend.
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure Terms
|
||||
|
||||
**archiv-app** — the bucket-scoped MinIO service account the backend uses to read and write the `familienarchiv` bucket. Distinct from the MinIO root account (`archiv`, used only by the bootstrap container for admin operations). Defined and provisioned in [`infra/minio/bootstrap.sh`](../infra/minio/bootstrap.sh) and consumed by the backend as `S3_ACCESS_KEY` in [`docker-compose.prod.yml`](../docker-compose.prod.yml). The attached `archiv-app-policy` grants `s3:GetObject/PutObject/DeleteObject` on `familienarchiv/*` and `s3:ListBucket/GetBucketLocation` on the bucket only — not the built-in `readwrite` policy which would grant `s3:*` on all buckets.
|
||||
|
||||
@@ -35,7 +35,7 @@ Render thumbnails in-process in Spring Boot using **Apache PDFBox 3.0.4** (alrea
|
||||
|
||||
**Harder:**
|
||||
- PDFBox is a parser attack surface. Mitigated by a 30-second watchdog timeout in `ThumbnailAsyncRunner` and by the fire-and-forget contract (failures never break upload).
|
||||
- Memory ceiling: the `thumbnailExecutor` is capped at 2 threads on the CX32 (8 GB). A busy backfill alongside OCR can approach the 3 GB heap — acceptable but not comfortable. Streaming via `FileService.downloadFileStream` keeps this bounded for PDFs up to 50 MB.
|
||||
- Memory ceiling: the `thumbnailExecutor` is capped at 2 threads on memory-constrained hosts. A busy backfill alongside OCR can approach the 3 GB heap on an 8 GB server — acceptable but not comfortable. The current production server (64 GB) has ample headroom. Streaming via `FileService.downloadFileStream` keeps this bounded for PDFs up to 50 MB.
|
||||
|
||||
### Operational caveats (intentional)
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# ADR 012 — Browser-Mode Test Mocking Strategy
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2026-05-11 (revised 2026-05-12)
|
||||
**Issues:** [#535 — original incident](https://git.raddatz.cloud/marcel/familienarchiv/issues/535) · [#553 — revision](https://git.raddatz.cloud/marcel/familienarchiv/issues/553)
|
||||
**Date:** 2026-05-11 (revised 2026-05-12, 2026-06-02)
|
||||
**Issues:** [#535 — original incident](https://git.raddatz.cloud/marcel/familienarchiv/issues/535) · [#553 — revision](https://git.raddatz.cloud/marcel/familienarchiv/issues/553) · [#560 — shared-mock-body dedup](https://git.raddatz.cloud/marcel/familienarchiv/issues/560)
|
||||
|
||||
---
|
||||
|
||||
@@ -71,19 +71,19 @@ The original revision of this ADR allowed `vi.mock(virtualModule, factory)` for
|
||||
|
||||
`EnrichmentBlock.svelte.spec.ts` (issue #553) was statically imported and still produced the race: its `vi.mock('$app/stores', async () => { const mod = await import(...); return mod; })` factory performed a dynamic import in its body, and that body was invoked asynchronously when Chromium fetched the manually-mocked module — sometimes after the worker's birpc channel had already closed.
|
||||
|
||||
**Therefore: under `**/*.svelte.{test,spec}.ts`, every `vi.mock` factory body must be synchronous. No `await`, no `import(...)`.**
|
||||
**Therefore: under `**/\*.svelte.{test,spec}.ts`, every `vi.mock`factory body must be synchronous. No`await`, no `import(...)`.\*\*
|
||||
|
||||
If a factory needs to share state with the spec (a mutable ref, a `vi.fn`, a writable store), use `vi.hoisted()` to lift the reference above `vi.mock`'s implicit hoist:
|
||||
|
||||
```ts
|
||||
const { mockNavigating } = vi.hoisted(() => ({
|
||||
mockNavigating: { type: null as string | null }
|
||||
mockNavigating: { type: null as string | null },
|
||||
}));
|
||||
|
||||
vi.mock('$app/state', () => ({
|
||||
get navigating() {
|
||||
return mockNavigating;
|
||||
}
|
||||
vi.mock("$app/state", () => ({
|
||||
get navigating() {
|
||||
return mockNavigating;
|
||||
},
|
||||
}));
|
||||
```
|
||||
|
||||
@@ -91,7 +91,7 @@ The getter defers the read until consumption time; `vi.hoisted` guarantees the r
|
||||
|
||||
### Architectural follow-on: prefer `$app/state` over `$app/stores`
|
||||
|
||||
`$app/stores` is the deprecated subscription-based store API; `$app/state` is the modern reactive proxy. New components should import from `$app/state`. As part of #553 we migrated `EnrichmentBlock.svelte` from `$app/stores.navigating` to `$app/state.navigating` with `!!navigating.type` — matching the pattern already established in `routes/aktivitaeten/+page.svelte:117` and `routes/documents/+page.svelte:261`. Migration eliminated the *need* to mock a store at all in that spec.
|
||||
`$app/stores` is the deprecated subscription-based store API; `$app/state` is the modern reactive proxy. New components should import from `$app/state`. As part of #553 we migrated `EnrichmentBlock.svelte` from `$app/stores.navigating` to `$app/state.navigating` with `!!navigating.type` — matching the pattern already established in `routes/aktivitaeten/+page.svelte:117` and `routes/documents/+page.svelte:261`. Migration eliminated the _need_ to mock a store at all in that spec.
|
||||
|
||||
**Pattern note:** When an overlay or dropdown triggers a navigation action, use `<button type="button">` with an `onclick` handler that calls `goto(path)` — do **not** use `<a href="…">` with `e.preventDefault()`. SvelteKit registers its link interceptor as a capture-phase `document` listener, so it fires before the component's bubble-phase `onclick`. By the time `e.preventDefault()` runs the router has already initiated navigation, which tears down the vitest-browser Playwright orchestrator iframe. A `<button>` carries no `href`, so the capture-phase interceptor never fires. See `NotificationDropdown.svelte` for the canonical example.
|
||||
|
||||
@@ -112,9 +112,9 @@ This is fixed upstream in [vitest PR #10267](https://github.com/vitest-dev/vites
|
||||
**Enforcement layers** (added in #553's second cycle, extending the four-layer chain above):
|
||||
|
||||
5. **In-suite meta-test** at `frontend/src/__meta__/no-duplicate-mock-ids.test.ts` globs `src/**/*.svelte.{test,spec}.ts`, extracts every `vi.mock` first-arg string, canonicalises by stripping a trailing `.js`/`.ts` after `.svelte`, and fails if any canonical ID is referenced under two or more distinct spellings. Same shape as `no-async-mock-factories.test.ts`.
|
||||
6. **`patch-package` backport** of PR #10267 at `frontend/patches/@vitest+browser-playwright+4.1.0.patch`. Applied automatically by the `postinstall` hook. Closes the race at the route-handler level — even if a contributor reintroduces a duplicate-ID, the patched `register` handler unroutes the existing predicate before installing the new one.
|
||||
6. **`patch-package` backport** of PR #10267 at `frontend/patches/@vitest+browser-playwright+4.1.6.patch`. Applied automatically by the `postinstall` hook. Closes the race at the route-handler level — even if a contributor reintroduces a duplicate-ID, the patched `register` handler unroutes the existing predicate before installing the new one.
|
||||
|
||||
**When to remove the patch.** Once `@vitest/browser-playwright` ships a release containing PR #10267, delete `patches/@vitest+browser-playwright+4.1.0.patch`. Bump the dependency to the version containing the fix. The in-suite meta-test stays — it's a cheap permanent guard against the contributor-facing pattern, independent of upstream library version.
|
||||
**When to remove the patch.** Once `@vitest/browser-playwright` ships a release containing PR #10267, delete `patches/@vitest+browser-playwright+4.1.6.patch`. Bump the dependency to the version containing the fix. The in-suite meta-test stays — it's a cheap permanent guard against the contributor-facing pattern, independent of upstream library version.
|
||||
|
||||
---
|
||||
|
||||
@@ -129,6 +129,48 @@ This is fixed upstream in [vitest PR #10267](https://github.com/vitest-dev/vites
|
||||
3. **In-suite meta-test** at `frontend/src/__meta__/no-async-mock-factories.test.ts` globs `src/**/*.svelte.{test,spec}.ts` and asserts none match the banned pattern. Catches at every vitest invocation — the layer hardest to disable.
|
||||
4. **CI birpc assert** runs after the coverage step and fails the build if `[birpc] rpc is closed` appears in any log line. Catches the symptom even if all the upstream layers were bypassed.
|
||||
5. **In-suite duplicate-ID meta-test** at `frontend/src/__meta__/no-duplicate-mock-ids.test.ts` enforces the one-canonical-ID-per-module rule from the duplicate-id-hazard section above.
|
||||
6. **`patch-package` backport** at `frontend/patches/@vitest+browser-playwright+4.1.0.patch` closes the upstream race itself, applied via `postinstall`. To be removed when `@vitest/browser-playwright` releases [vitest PR #10267](https://github.com/vitest-dev/vitest/pull/10267).
|
||||
6. **`patch-package` backport** at `frontend/patches/@vitest+browser-playwright+4.1.6.patch` closes the upstream race itself, applied via `postinstall`. To be removed when `@vitest/browser-playwright` releases [vitest PR #10267](https://github.com/vitest-dev/vitest/pull/10267).
|
||||
- **Acceptance verification:** `coverage-flake-probe.yml` is a `workflow_dispatch`-triggered matrix workflow that runs the coverage suite 20× in parallel against a single SHA and asserts zero birpc lines. One fire, parallel cost, deterministic signal — replaces accumulating 20 sequential push events.
|
||||
- **When to revisit the LibLoader home:** If three or more components adopt this pattern, consider extracting a shared `$lib/types/lib-loader.ts` or a generic `DynamicImportLoader<T>` type to avoid parallel type definitions across modules.
|
||||
|
||||
---
|
||||
|
||||
## Revision 2026-06-02 (#560 — shared mock bodies, no-factory ban)
|
||||
|
||||
### No-factory `vi.mock` of a virtual module is forbidden
|
||||
|
||||
PR #657 attempted to delete `vi.mock` factories entirely and rely on Vitest auto-resolving a bare `vi.mock('$app/navigation')` to an adjacent `src/__mocks__/$app/navigation.ts`, the way Jest's `__mocks__/` directory works. **This is empirically false for SvelteKit virtual modules in browser-mode Vitest.** A no-factory `vi.mock(virtualModule)` substitutes _some_ exports (plain function references like `goto`) but leaves others bound to the live implementation — notably `replaceState`, which SvelteKit re-exports through a getter delegating to the live router. CI #1857 failed on `admin/tags/[id]` with `Cannot call replaceState(...) before router is initialized`, raised from a `$effect`. A partial auto-mock is therefore unsafe.
|
||||
|
||||
**Rule:** under `**/*.svelte.{spec,test}.ts`, a `vi.mock` of a virtual module must always pass a factory. The factory body must still be synchronous (the original binding invariant above). Enforced by a seventh layer:
|
||||
|
||||
7. **In-suite no-factory-ban meta-test** at `frontend/src/__meta__/no-factory-ban.test.ts` — same source-scan mechanism as the other meta-tests; fails if any browser spec contains a `vi.mock('mod')` with no second argument.
|
||||
|
||||
### Cross-file sharing of a virtual-module mock body is infeasible (the third false premise)
|
||||
|
||||
The original #560 plan ("Option A") proposed deduplicating the non-trivial interceptor factories by importing a shared body from `src/__mocks__/` into a sync factory:
|
||||
|
||||
```ts
|
||||
import * as formsMock from "$mocks/$app/forms";
|
||||
vi.mock("$app/forms", () => ({ ...formsMock }));
|
||||
```
|
||||
|
||||
**CI proved this does not work in `@vitest/browser-playwright` 4.1.6**, across two runs:
|
||||
|
||||
1. The static-import form above fails at runtime — vitest hoists `vi.mock` _above_ the import, so the factory references an uninitialised binding: `vi.mock factory: make sure there are no top level variables inside, since this call is hoisted`.
|
||||
2. The documented escape, loading the body through an async hoisted import, fails to even parse in browser mode — vitest's hoist transform mangles it: `SyntaxError: Unexpected identifier 'vi'`.
|
||||
|
||||
```ts
|
||||
const formsMock = await vi.hoisted(() => import("$mocks/$app/forms")); // parse error in browser mode
|
||||
```
|
||||
|
||||
`vi.hoisted` has the _same_ constraint as `vi.mock` (its factory can't reference top-level imports either, since it too is hoisted above them), so there is no way to get an external module's body into the hoisted context here. **Therefore: do not share virtual-module mock bodies across spec files. Define each `vi.mock` factory inline, with a synchronous body.** Duplicating the handful of interceptor factories is the accepted cost — it is the only pattern that works. The `src/__mocks__/$app/*` modules and the `$mocks` alias added for Option A were removed. (Revisit on a newer `@vitest/browser-playwright` whose hoist transform handles async `vi.hoisted` imports.)
|
||||
|
||||
The no-factory-ban above still stands: every `vi.mock` of a virtual module must pass an _inline_ sync factory — never no factory, never a spread of an imported binding.
|
||||
|
||||
### Rejected: Option C (config-level auto-resolve)
|
||||
|
||||
Re-enabling implicit `__mocks__/` auto-resolution through a Vitest config flag or a `setupFiles` shim was rejected. It trades auditability for cosmetics: the mock binding becomes a hidden default invisible at the call site, and its failure mode (a partial mock) is the hardest to debug — exactly the PR #657 class. The no-factory-ban meta-test deliberately keeps the door closed.
|
||||
|
||||
### Patch pin
|
||||
|
||||
`@vitest/browser-playwright` is exact-pinned (no caret) to `4.1.6` in `package.json` so `patches/@vitest+browser-playwright+4.1.6.patch` keeps applying; a caret range could float onto a version the patch rejects. Pin and patch are both removed once the library ships a release containing [PR #10267](https://github.com/vitest-dev/vitest/pull/10267).
|
||||
|
||||
@@ -62,7 +62,7 @@ The `/tmp` tmpfs remains at 512 MB and continues to serve training-ZIP extractio
|
||||
## Alternatives considered
|
||||
|
||||
**Approach B — Enlarge `/tmp` to 4 GB**
|
||||
One-line change. Discarded because: (1) 4 GB tmpfs counts against the cgroup `mem_limit`; on CX32 hosts with `OCR_MEM_LIMIT=6g` the combined Surya resident set + tmpfs would trigger OOMKill on cold start; (2) staging GB-scale model files through RAM is using the wrong storage tier; (3) any future model larger than 4 GB requires another bump.
|
||||
One-line change. Discarded because: (1) 4 GB tmpfs counts against the cgroup `mem_limit`; on servers with `OCR_MEM_LIMIT=6g` the combined Surya resident set + tmpfs would trigger OOMKill on cold start; (2) staging GB-scale model files through RAM is using the wrong storage tier; (3) any future model larger than 4 GB requires another bump.
|
||||
|
||||
**Approach C — Both TMPDIR redirect and enlarged /tmp**
|
||||
Belt-and-suspenders: Approach A + 1 GB tmpfs. Discarded in favour of the cleaner Approach A. The defence-in-depth benefit does not outweigh the extra compose churn; the 512 MB cap on `/tmp` is intentional.
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
# ADR-026 — In-House Stammbaum Layout, dagre Evaluated and Deferred
|
||||
|
||||
**Date:** 2026-05-28
|
||||
**Status:** Accepted
|
||||
**Status:** Accepted — superseded in part by [ADR-030](./030-stammbaum-bloodline-tidy-tree-layout.md)
|
||||
**Issue:** #361
|
||||
**Supersedes:** _none_
|
||||
**Supersedes-on-trigger:** A future ADR-027 if any acceptance criterion below stops converging in-house.
|
||||
**Superseded-by:** ADR-030 (#724) replaces the **per-generation block packer** below with
|
||||
a bottom-up tidy-tree after its position-within-rank model stranded ancestors and smeared
|
||||
bloodlines across the canvas — the UX stop-trigger named in this ADR. The **in-house /
|
||||
no-dagre** decision and the seeded-rank invariant (#689) are retained.
|
||||
**Supersedes-on-trigger:** _(triggered)_ The UX stop-trigger fired; see ADR-030.
|
||||
|
||||
---
|
||||
|
||||
@@ -117,6 +121,7 @@ threshold, so `packBlocks.ts` is **not** yet warranted.
|
||||
is the source-of-truth probe against live data; the function is the
|
||||
capture-time and fixture-time signal that the predicate's count crossed
|
||||
zero.
|
||||
|
||||
- **AC6 — Bundle-impact gate (≤ 40 kB gzipped on `/stammbaum`).** Moot under
|
||||
this ADR; reactivates only under ADR-027 (dagre adoption).
|
||||
- **AC7 — Visual regression at 320 / 768 / 1440.** `toHaveScreenshot()`
|
||||
|
||||
65
docs/adr/028-nl-search-ollama.md
Normal file
65
docs/adr/028-nl-search-ollama.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# ADR-028 — Natural language search is powered by Ollama (Qwen 2.5 7B), not a cloud API
|
||||
|
||||
**Date:** 2026-06-06
|
||||
**Status:** Accepted
|
||||
**Issue:** #738 (NL search backend); part of epic #735
|
||||
**Milestone:** Archive Intelligence — NL Search
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
Family members write their search intent in plain German ("Was hat Walter im Krieg an Emma geschrieben?"), not in structured filter forms. Issue #735 defines NL search as a core product goal. Three delivery options were evaluated:
|
||||
|
||||
**Option A — extend the OCR service.** The OCR Python microservice already runs on the same host. Adding LLM inference there avoids a new container. Rejected: the OCR service is a single-purpose, CPU-bound pipeline optimised for Kraken; bundling a 4.5 GB LLM weight into the same image would bloat it, complicate model lifecycle management, and create an unrelated failure domain (OOM on large OCR batches vs. LLM load time). ADR-001 was explicit about keeping OCR single-purpose.
|
||||
|
||||
**Option B — call an external API (OpenAI, Anthropic, etc.).** Cloud inference is instant and requires no local hardware. Rejected: the archive contains real person names and private family correspondence from 1899–1950 — sending query content to a third party violates the project's data-residency principle (family data stays on the family server). Additionally, API cost and availability are outside the operator's control; the system must work air-gapped.
|
||||
|
||||
**Option C — local Ollama service (chosen).** Ollama is a purpose-built LLM runtime with a simple REST API, model lifecycle management (`ollama pull`), and support for grammar-constrained JSON output. It runs entirely on the existing server (i7-6700, 64 GB RAM) with no cloud dependency.
|
||||
|
||||
**Model selection:** Qwen 2.5 7B Q4_K_M (`qwen2.5:7b-instruct-q4_K_M`) was chosen over larger models because:
|
||||
- Quantised weight is ~4.5 GB — fits comfortably in 64 GB RAM alongside PostgreSQL and the JVM.
|
||||
- Instruction-tuned variant follows the structured JSON schema reliably without fine-tuning.
|
||||
- CPU-only inference at Q4_K_M takes 2–15 seconds per query, acceptable for a search that replaces a multi-step filter form.
|
||||
|
||||
**Prompt injection mitigation:** The backend sends the raw user query to Ollama. To prevent the model from being prompted to return schema-breaking output, the API call uses Ollama's `format` parameter with a grammar-constrained JSON schema. Output length is further bounded by `maxLength` constraints in the schema (names ≤ 200 chars, keywords ≤ 100 chars). `NlQueryParserService` enforces these limits in code before any LLM-extracted fragment is passed to `PersonRepository.searchByName()` — defence in depth.
|
||||
|
||||
**DB-blind name resolution:** The Ollama prompt stays small (the raw query only); person database records are never sent to the model. Name resolution happens as a cheap SQL query after the model returns. This keeps the prompt short, avoids data leakage, and means adding 1,000 new persons requires no prompt change.
|
||||
|
||||
**Graceful degradation:** `RestClientOllamaClient.isHealthy()` is called inline before each inference request (calls `GET /api/tags` on a 2-second connect-timeout client). If Ollama is absent or times out, `NlQueryParserService` throws `DomainException` with `SMART_SEARCH_UNAVAILABLE` (HTTP 503). The regular structured search (`GET /api/documents/search`) is unaffected — it never calls Ollama.
|
||||
|
||||
**Expected inference latency:** 2–15 seconds on the current CPU-only hardware. The frontend issue must show a persistent "Suche läuft…" indicator for the full duration (see `aria-live="polite"` requirement in issue #738 frontend notes). The backend timeout is 30 seconds (`app.ollama.timeout-seconds=30`) — chosen as a safe upper bound for Q4_K_M on the i7-6700 with a realistic 500-character query under modest concurrent load.
|
||||
|
||||
**NL query logging policy:** Only metadata is logged — query length, resolved person count, latency in milliseconds. The raw query is never written to the log file. Rationale: queries contain real family names (PII); log files persist to disk and may be shipped to Loki. Structured metadata is sufficient for debugging latency regressions.
|
||||
|
||||
**Prompt-amplification abuse:** A malicious user could submit a long or crafted query to cause slow Ollama inference, consuming CPU. Mitigated by `NlSearchRateLimiter` (5 requests per user per minute, Bucket4j + Caffeine) and by `@Size(max=500)` on the request body. The rate limiter is node-local; in multi-replica deployments the effective limit multiplies by replica count — acceptable at the current single-node deployment scale.
|
||||
|
||||
**Ollama model pre-pull requirement:** The Docker image contains only the Ollama binary, not the model weights. The operator must run `ollama pull qwen2.5:7b-instruct-q4_K_M` (≈4.5 GB download, 10–30 minutes) before the backend starts inference. If skipped, every NL search request returns 503 until the pull completes. The deployment runbook in `docs/DEPLOYMENT.md` covers this explicitly.
|
||||
|
||||
**Startup dependency:** The `backend` Compose service declares `depends_on: ollama: condition: service_healthy`. The Ollama healthcheck polls `GET http://localhost:11434/api/tags`; `start_period: 120s` provides margin for weight loading (20–60 s on SSD). Note: `service_healthy` confirms the API is responding, not that the model is downloaded — if the pull was skipped, inference still returns 404.
|
||||
|
||||
**Multi-name resolution heuristic:** For 2-name queries (e.g. "Was hat Walter an Emma geschrieben?"), the first extracted name is treated as sender and the second as receiver. Per-name role annotation (e.g. `{name: "Walter", role: "sender"}`) was rejected because it would require a combinatorially complex Ollama schema and the most natural German phrasing strongly implies sender→receiver order. For single-name queries, a `personRole` field (`sender`/`receiver`/`any`) is returned.
|
||||
|
||||
**`personRole: "any"` keyword limitation:** When `personRole` is `"any"` and the name resolves to exactly one person, `DocumentService.searchDocumentsByPersonId()` is called (OR semantics: person as sender or receiver). Keyword filtering is not applied on this path — only person identity and date range. `keywordsApplied = false` is returned in the response. Rationale: the JPQL for OR-semantics person queries has no text predicate; adding FTS would require a native query or a separate pass, adding complexity for a case that is already well-narrowed by person identity.
|
||||
|
||||
**`search/` → `person/` + `document/` dependency direction:** `NlQueryParserService` calls `PersonService.findByDisplayNameContaining()` and `DocumentService.searchDocuments()` — both are legitimate cross-domain service calls, not repository leaks. The `search/` package has no JPA entities of its own and never accesses `PersonRepository` or `DocumentRepository` directly.
|
||||
|
||||
## Decision
|
||||
|
||||
**Introduce a new `search/` domain package** with a local Ollama integration via `RestClientOllamaClient`. The Ollama service runs as a separate Docker container, reachable only on the internal Docker network (`expose: ["11434"]`, not `ports:`). The backend calls Ollama's `/api/generate` endpoint with grammar-constrained JSON output. Name resolution and document search are performed by existing services after the model returns.
|
||||
|
||||
Key component structure:
|
||||
- `OllamaClient` / `OllamaHealthClient` interfaces — mockable for tests, modelled on `OcrClient`/`OcrHealthClient`
|
||||
- `RestClientOllamaClient` — two `RestClient` instances (30 s inference, 2 s health-check)
|
||||
- `NlQueryParserService` — orchestrates Ollama → name resolution → document search
|
||||
- `NlSearchRateLimiter` — Bucket4j + Caffeine, 5 req/min per user
|
||||
- `NlSearchController` — `POST /api/search/nl`, `@RequirePermission(READ_ALL)`
|
||||
|
||||
## Consequences
|
||||
|
||||
- Family members can query in natural German without learning filter UI. Expected search satisfaction improvement for the 60+ age cohort (primary transcription audience) is significant.
|
||||
- NL search is unavailable when Ollama is down or the model pull is not complete. The regular search is unaffected. The 503 response includes a CTA directing users to the regular search.
|
||||
- Operator responsibility: run `ollama pull` on first deploy and after model updates. The backup runbook must exclude `ollama_models` volume (model weights are re-downloadable, not user data).
|
||||
- Inference takes 2–15 seconds. The frontend loading indicator is a hard requirement (see issue #738 frontend notes).
|
||||
- The rate limiter is node-local. At the current single-node deployment scale this is correct. If the service is ever scaled horizontally, the rate limiter must be moved to Redis (same caveat as `LoginRateLimiter`).
|
||||
- The `search/` package introduces a new cross-domain dependency direction (`search` → `person`, `search` → `document`). This is intentional and documented in `docs/architecture/c4/l3-backend-search.puml`.
|
||||
239
docs/adr/028-ollama-docker-compose-service.md
Normal file
239
docs/adr/028-ollama-docker-compose-service.md
Normal file
@@ -0,0 +1,239 @@
|
||||
# ADR-028: Ollama Docker Compose service for NL search
|
||||
|
||||
**Date:** 2026-06-06
|
||||
**Status:** Accepted
|
||||
**Deciders:** Marcel Raddatz
|
||||
**Relates to:** #737 (infrastructure), #735 (NL search epic)
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
Issue #735 introduces natural-language document search, requiring a local LLM to generate embeddings and/or run inference at query time. The family archive stores personal family history — data privacy is non-negotiable, so cloud-based inference APIs are excluded. The production target is a Hetzner CX42 (16 GB RAM, 8 vCPUs, CPU-only, ~32 EUR/month).
|
||||
|
||||
Alternatives considered:
|
||||
|
||||
| Option | Reason rejected |
|
||||
|---|---|
|
||||
| **llama.cpp** | No HTTP API out of the box; requires custom wrapper; higher ops burden |
|
||||
| **vLLM** | GPU-first; significant overhead on CPU-only hardware; overkill for this scale |
|
||||
| **Cloud APIs** (OpenAI, Gemini, etc.) | Vendor lock-in; per-token cost at scale; data leaves the server — unacceptable for a private family archive |
|
||||
| **Ollama** | Self-contained Docker image; built-in HTTP REST API; actively maintained; CPU-compatible; zero egress |
|
||||
|
||||
**Decision:** run Ollama as a Docker Compose service alongside the existing stack.
|
||||
|
||||
---
|
||||
|
||||
## Decisions
|
||||
|
||||
### 1. Hardware minimums and CPU-only constraint
|
||||
|
||||
All inference runs on CPU. The target is the Hetzner CX42 (16 GB RAM, 8 vCPUs).
|
||||
|
||||
| Tier | RAM | NL search |
|
||||
|---|---|---|
|
||||
| CX42 | 16 GB | Supported — full stack including Ollama |
|
||||
| CX32 | 8 GB | Disabled — set `APP_OLLAMA_BASE_URL=` (empty) to skip Ollama entirely |
|
||||
| CX22 | 4 GB | Unsupported for NL search |
|
||||
|
||||
### 2. Memory budget on CX42
|
||||
|
||||
| Component | `mem_limit` | Typical active RSS |
|
||||
|---|---|---|
|
||||
| OCR service | 12g (hard ceiling) | ~6 GB |
|
||||
| Ollama | 8g | ~8 GB |
|
||||
| **Total** | | **~14 GB active** |
|
||||
|
||||
`memswap_limit` on the Ollama service is set to `8g` (matching `mem_limit`) to prevent Linux from swapping model weights into swap under OCR memory pressure. Swapping model weights does not crash the container but silently degrades inference latency. This mirrors the pattern already applied to the OCR service.
|
||||
|
||||
**Operational constraint:** do NOT run `docker-compose.observability.yml` continuously alongside both OCR and Ollama on a CX42. The observability stack adds ~2 GB, which leaves no headroom.
|
||||
|
||||
### 3. Graceful-degradation contract
|
||||
|
||||
`app.ollama.base-url` absent OR blank → Ollama bean NOT registered → NL search returns HTTP 503 with `ErrorCode: NL_SEARCH_UNAVAILABLE`.
|
||||
|
||||
This single code path covers all unavailability scenarios: base-url unset, service unreachable, health check failed, and request timeout.
|
||||
|
||||
#### Why not `@ConditionalOnProperty`
|
||||
|
||||
`@ConditionalOnProperty` registers the bean when the property is present but blank (`APP_OLLAMA_BASE_URL=`). This produces a `RestClient` with an empty base URL that fails at runtime with an opaque error rather than a clean 503.
|
||||
|
||||
#### Correct condition expression
|
||||
|
||||
```java
|
||||
@ConditionalOnExpression("!'${app.ollama.base-url:}'.isBlank()")
|
||||
```
|
||||
|
||||
When the property is absent, the placeholder resolves to `''`; `.isBlank()` returns `true`; negation makes the condition `false`; the bean is not registered. Same result for an explicit empty string (`APP_OLLAMA_BASE_URL=`).
|
||||
|
||||
### 4. Backend configuration pattern
|
||||
|
||||
Use a `@ConfigurationProperties` record, not separate `@Value` injections:
|
||||
|
||||
```java
|
||||
@ConfigurationProperties("app.ollama")
|
||||
record OllamaProperties(String baseUrl, String apiKey) {}
|
||||
```
|
||||
|
||||
`OllamaProperties` is registered unconditionally — it is a plain value holder with no side effects.
|
||||
|
||||
`@ConditionalOnExpression` belongs **only** on `RestClientOllamaClient` (the bean that creates a live network client).
|
||||
|
||||
**Deliberate divergence from the OCR pattern:** the OCR service uses `@Value`-with-default because OCR is always-on and `http://ocr-service:8000` is a safe default. Ollama is truly optional — a missing URL means "feature disabled", not "use this default server". There is no safe default Ollama URL.
|
||||
|
||||
### 5. Optional<OllamaClient> injection
|
||||
|
||||
The NL search service uses constructor injection with `Optional<OllamaClient>`:
|
||||
|
||||
```java
|
||||
private final Optional<OllamaClient> ollamaClient;
|
||||
```
|
||||
|
||||
When empty (bean not registered), the service method returns 503 immediately:
|
||||
|
||||
```java
|
||||
var client = ollamaClient.orElseThrow(
|
||||
() -> DomainException.internal(ErrorCode.NL_SEARCH_UNAVAILABLE, "Ollama not configured"));
|
||||
```
|
||||
|
||||
Prefer this over `@Autowired(required = false)` with a null check — the null-check pattern is noisy when the service already uses `@RequiredArgsConstructor`.
|
||||
|
||||
### 6. Empty API key guard
|
||||
|
||||
`RestClientOllamaClient` omits the `Authorization` header entirely when `apiKey` is blank:
|
||||
|
||||
```java
|
||||
if (!apiKey.isBlank()) {
|
||||
request.header("Authorization", "Bearer " + apiKey);
|
||||
}
|
||||
```
|
||||
|
||||
Sending `Authorization: Bearer ` (empty token) has undefined or potentially broken behavior depending on the Ollama version. This mirrors the `trainingToken` guard in `RestClientOcrClient.java:107`.
|
||||
|
||||
### 7. OLLAMA_API_KEY behavior in Ollama 0.6.5 and 0.30.6
|
||||
|
||||
**Empirically verified (2026-06-06) on both `0.6.5` and `0.30.6`:** `OLLAMA_API_KEY` does **not** enforce request authentication in either version.
|
||||
|
||||
Test matrix run against `/api/tags`:
|
||||
|
||||
| Configuration | No auth header | `Authorization: Bearer ` (empty) | `Authorization: Bearer wrongkey` | `Authorization: Bearer correctkey` |
|
||||
|---|---|---|---|---|
|
||||
| `OLLAMA_API_KEY=` (empty) | 200 | 200 | — | — |
|
||||
| `OLLAMA_API_KEY` unset | 200 | — | — | — |
|
||||
| `OLLAMA_API_KEY=testkey99` | 200 | 200 | 200 | 200 |
|
||||
|
||||
**Finding:** The `OLLAMA_API_KEY` environment variable is not listed in Ollama's startup config dump and does not gate any HTTP request in either tested version. All configurations — empty string, fully unset, and a real key — accept all requests without authentication.
|
||||
|
||||
**Practical implication:** `OLLAMA_API_KEY` provides no defense-in-depth in the tested versions. `archiv-net` network isolation is the only effective security control. The env var is retained in the Compose definition and `.env.example` for forward compatibility if Ollama enables enforcement in a future version, but operators must not rely on it for access control.
|
||||
|
||||
**Backend guard still valid:** the `RestClientOllamaClient` code-level guard (omit `Authorization` header when `apiKey.isBlank()`) remains correct behavior regardless — it prevents a malformed `Authorization: Bearer ` header from being sent.
|
||||
|
||||
### 8. read_only: true feasibility
|
||||
|
||||
**Empirically verified (2026-06-06) on both `0.6.5` and `0.30.6`:** `read_only: true` works with Ollama. All three operations — `ollama serve`, `ollama pull qwen2.5:7b-instruct-q4_K_M`, and `ollama list` — succeeded with exit code 0 in both versions.
|
||||
|
||||
Test run:
|
||||
```bash
|
||||
docker run --rm --read-only \
|
||||
-v ollama_models:/root/.ollama \
|
||||
--tmpfs /tmp \
|
||||
--entrypoint sh ollama/ollama:0.30.6 \
|
||||
-c "ollama serve & sleep 5 && ollama pull qwen2.5:7b-instruct-q4_K_M && ollama list"
|
||||
```
|
||||
|
||||
**Note:** the entrypoint must be overridden to `sh` for the test command — the container's default entrypoint is `/bin/ollama` and does not accept `sh` as a subcommand. This is a Docker invocation detail; the Compose service definition uses the image's default entrypoint and `command:` override for the init container, which works correctly.
|
||||
|
||||
**Result:** `read_only: true` and `tmpfs: - /tmp:size=512m` are applied to both `ollama` and `ollama-model-init`. The `ollama_models` volume handles all persistent writes; no other paths require write access during normal operation.
|
||||
|
||||
### 9. Peak RSS of init container during pull
|
||||
|
||||
**Empirically verified (2026-06-06):** Peak RSS during `qwen2.5:7b-instruct-q4_K_M` pull was **~108 MiB**.
|
||||
|
||||
`docker stats` samples during the pull (15-second intervals):
|
||||
|
||||
| Sample | MEM |
|
||||
|---|---|
|
||||
| 1 | 54.89 MiB |
|
||||
| 2 | 66.3 MiB |
|
||||
| 5 | 97.25 MiB |
|
||||
| 9 | **107.8 MiB** (peak) |
|
||||
|
||||
`mem_limit: 2g` is adequate — the model weights stream directly to the named volume; RSS is dominated by the Ollama server process alone (~100 MB), not the model data. No bump to 4 GB needed.
|
||||
|
||||
### 10. Init container pull mechanism
|
||||
|
||||
The `ollama-model-init` container uses a curl-based readiness loop with captured PID:
|
||||
|
||||
```sh
|
||||
ollama serve & SERVE_PID=$!
|
||||
until curl -sf http://localhost:11434/api/tags; do sleep 1; done
|
||||
ollama pull qwen2.5:7b-instruct-q4_K_M
|
||||
kill $SERVE_PID
|
||||
```
|
||||
|
||||
`kill %1` (job-control syntax) is unreliable in non-interactive `sh -c` contexts. Capturing the PID via `SERVE_PID=$!` is reliable.
|
||||
|
||||
The same endpoint (`/api/tags`) is used for both the init container readiness loop and the main service `healthcheck`.
|
||||
|
||||
### 11. start_period: 60s rationale
|
||||
|
||||
The model is pre-pulled by `ollama-model-init` before the main service starts (via `condition: service_completed_successfully`). At main service startup, Ollama only loads model weights from the named volume and binds port 11434.
|
||||
|
||||
60 seconds is appropriate for this cold-start profile. 300 seconds was considered — that would be appropriate if the service pulled the model itself — but overstates actual startup time when the model is already present on the volume.
|
||||
|
||||
### 12. Security threat model
|
||||
|
||||
**Primary control:** `archiv-net` network isolation. Ollama has no externally exposed port (`expose:` only, not `ports:`). The Caddyfile must not route any path to the Ollama service.
|
||||
|
||||
**Note on `OLLAMA_API_KEY`:** Per §7, `OLLAMA_API_KEY` is not enforced in Ollama 0.6.5 or 0.30.6 and provides no authentication barrier against a compromised backend container. `archiv-net` network isolation is the sole effective security control. The env var is retained for forward compatibility only — do not rely on it for access control.
|
||||
|
||||
Both `ollama` and `ollama-model-init` receive the ADR-019 hardening baseline:
|
||||
|
||||
```yaml
|
||||
cap_drop: [ALL]
|
||||
security_opt: [no-new-privileges:true]
|
||||
```
|
||||
|
||||
### 13. CI exclusion strategy
|
||||
|
||||
Docker Compose profiles are not used — they would add developer friction (requiring `--profile ...` for all local dev commands).
|
||||
|
||||
CI uses explicit service selection in `docker-compose.ci.yml`:
|
||||
```bash
|
||||
docker compose -f docker-compose.ci.yml up -d db minio create-buckets
|
||||
```
|
||||
|
||||
Ollama is simply not listed and is never started in CI. A YAML comment on the `ollama` service block documents this:
|
||||
|
||||
```yaml
|
||||
# Not started in CI — CI uses explicit service selection
|
||||
# (docker-compose.ci.yml: db minio create-buckets)
|
||||
```
|
||||
|
||||
### 14. ollama_models volume operational note
|
||||
|
||||
The `ollama_models` named volume holds model weights only — fully reproducible by re-pull. No backup is needed.
|
||||
|
||||
If the volume fills after a model upgrade:
|
||||
```bash
|
||||
docker volume rm ollama_models && docker compose up -d
|
||||
```
|
||||
The init container re-pulls the model on next startup.
|
||||
|
||||
---
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
- NL search runs entirely on-premises; no data leaves the server and no per-token cloud cost.
|
||||
- Graceful degradation is a first-class concern: smaller or budget-constrained instances can run the app without Ollama with a single env var change.
|
||||
- The init container pattern keeps model pull out of the critical startup path for the main service, giving accurate healthcheck timings.
|
||||
- `@ConditionalOnExpression` with a blank-check is more correct than `@ConditionalOnProperty` for optional features with no safe default URL.
|
||||
|
||||
### Risks and operational implications
|
||||
|
||||
- **Memory pressure:** OCR + Ollama together consume ~14 GB on a 16 GB host. Running the observability stack simultaneously risks OOM kills. Monitor with `docker stats`.
|
||||
- **CPU inference latency:** `qwen2.5:7b-instruct-q4_K_M` is chosen for CPU viability, but inference on 8 vCPUs will be noticeably slower than GPU-accelerated alternatives. This is acceptable for the family archive use case (low concurrency, not real-time).
|
||||
- All three empirical TBD items from the original issue spec were resolved — see §7 (OLLAMA_API_KEY not enforced), §8 (`read_only: true` works), §9 (peak RSS ~108 MiB).
|
||||
- Model upgrades require a `docker volume rm` to free old weights before pulling the replacement. Document this in runbook/DEPLOYMENT.md.
|
||||
60
docs/adr/028-pdfjs-wasm-decoders-and-csp-constraint.md
Normal file
60
docs/adr/028-pdfjs-wasm-decoders-and-csp-constraint.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# ADR-028 — pdf.js wasm decoders are served same-origin; a future CSP must allow them
|
||||
|
||||
**Date:** 2026-06-01
|
||||
**Status:** Accepted
|
||||
**Issue:** #708 (scanned PDFs with CCITT/JBIG2 images render blank)
|
||||
**Milestone:** Pre-prod read-path hardening
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
pdf.js 5.x moved the **JBIG2, CCITTFax, and JPEG2000 image decoders into
|
||||
WebAssembly**. A single `jbig2.wasm` module decodes both JBIG2 and CCITTFax;
|
||||
`openjpeg.wasm` decodes JPEG2000. These modules live in
|
||||
`node_modules/pdfjs-dist/wasm/` and are not on the web path by default, and
|
||||
`getDocument` will not load them unless it is given a `wasmUrl`. Without that,
|
||||
bi-level black-and-white scans (CCITT G4 fax — ~16% of the archive) painted a
|
||||
blank canvas in production while JPEG scans rendered fine.
|
||||
|
||||
Two cross-cutting, long-lived constraints fall out of the fix and are not
|
||||
obvious from reading any single file — hence this record.
|
||||
|
||||
## Decision
|
||||
|
||||
1. **Serve the pdf.js wasm from our own origin**, at the unversioned path
|
||||
`/pdfjs-wasm/`, copied from `node_modules/pdfjs-dist/wasm/` into
|
||||
`build/client/` at build time by `vite-plugin-static-copy` (a devDependency;
|
||||
see `frontend/vite.config.ts`). `getDocument` is called with
|
||||
`wasmUrl: '/pdfjs-wasm/'`. **Never point `wasmUrl` at a public CDN** — a
|
||||
decoder on the core read path must not become a supply-chain RCE surface.
|
||||
|
||||
2. **Any future `Content-Security-Policy` MUST include
|
||||
`script-src 'wasm-unsafe-eval'` and `worker-src 'self' blob:`.** pdf.js
|
||||
instantiates WebAssembly and runs its renderer in a worker created from a
|
||||
`blob:` URL. A CSP without these directives silently re-breaks PDF rendering
|
||||
for the exact class of documents #708 fixed. No CSP is set today
|
||||
(`infra/caddy/Caddyfile` `(security_headers)`); the Caddyfile carries a
|
||||
pointer to this ADR so the future CSP author cannot miss it.
|
||||
|
||||
3. **The wasm shipping is guarded at build time.** `frontend/postbuild`
|
||||
(`scripts/assert-pdfjs-wasm.mjs`) fails the build loudly if `jbig2.wasm` or
|
||||
`openjpeg.wasm` is absent from `build/client/pdfjs-wasm/` — so a future
|
||||
`pdfjs-dist` bump that renames or relocates the wasm cannot regress to a
|
||||
blank canvas unnoticed. This runs in CI and in the Docker build stage.
|
||||
|
||||
## Consequences
|
||||
|
||||
- The decoders load from the same origin as the app — no third-party trust, no
|
||||
SRI to manage, correct `Content-Type: application/wasm` served by
|
||||
adapter-node.
|
||||
- `/pdfjs-wasm/` is **not** content-hashed, so it must not be served
|
||||
`immutable` — a revalidating cache avoids serving a stale `.wasm` against a
|
||||
newer worker after a pdfjs upgrade.
|
||||
- The CSP constraint is a standing obligation on whoever introduces a CSP. If
|
||||
that work happens, this ADR and the Caddyfile note are the source of truth.
|
||||
- No new container or external system is introduced, so the C4 L1/L2 diagrams
|
||||
are unaffected; `/pdfjs-wasm/` is a static asset served by the existing
|
||||
frontend container.
|
||||
- Render/decode failures are no longer silent: the viewer surfaces a localized
|
||||
message plus a working download link (see #708).
|
||||
@@ -0,0 +1,69 @@
|
||||
# ADR-029: Composite actions for cross-workflow deploy logic
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
The `nightly.yml` (staging) and `release.yml` (production) workflows shared three
|
||||
blocks of deploy logic verbatim: the four observability-stack steps (deploy configs,
|
||||
validate, start, assert health), the Caddy reload step, and the public-surface smoke
|
||||
test. The only per-environment differences were secret names (`STAGING_*` vs `PROD_*`),
|
||||
the `POSTGRES_HOST` value, and the smoke-test hostname.
|
||||
|
||||
This duplication was held together by `# Keep in sync with nightly.yml` comments — an
|
||||
honour-system invariant. Any change (a new healthchecked service, a different rsync flag,
|
||||
a new secret) had to be applied in two places, and nothing enforced that it was. Issue #603
|
||||
documents a real instance: the obs secret set had grown to five keys while a refactor draft
|
||||
listed only four.
|
||||
|
||||
### Decision drivers
|
||||
|
||||
1. Cross-workflow deploy logic must have a single definition, enforced — not a
|
||||
discipline-based "keep in sync" promise.
|
||||
2. Per-environment variation must be expressed as explicit, typed inputs, not by forking
|
||||
the whole step block.
|
||||
3. The mechanism must work on the existing single-tenant self-hosted Gitea runner with no
|
||||
new infrastructure.
|
||||
|
||||
### Alternatives considered
|
||||
|
||||
**A: Reusable workflow (`workflow_call`)** — Gitea supports called workflows. Rejected for
|
||||
this case: reusable workflows run as a separate job with their own runner context, which
|
||||
breaks the in-job, sequential `deploy → reload → smoke` ordering these steps rely on and
|
||||
complicates passing the already-checked-out workspace. Composite actions run inline in the
|
||||
calling job, preserving step order and the workspace.
|
||||
|
||||
**B: Shared shell script invoked from both workflows** — e.g. `scripts/deploy-obs.sh`.
|
||||
Rejected: loses the typed-input contract and per-step CI log sections, and reintroduces
|
||||
manual argument threading that is as error-prone as the duplication it replaces.
|
||||
|
||||
**C: Keep the `# Keep in sync` comments** — status quo. Rejected: unenforced; issue #603
|
||||
is direct evidence it fails.
|
||||
|
||||
## Decision
|
||||
|
||||
Extract the shared logic into three single-responsibility Gitea composite actions under
|
||||
`.gitea/actions/`: `deploy-obs` (five inputs), `reload-caddy` (no inputs), and `smoke-test`
|
||||
(`host` input). Both workflows invoke each via a single `uses: ./.gitea/actions/<name>` call,
|
||||
passing per-environment values as `with:` inputs. This is the repository's first composite
|
||||
action and sets the convention; `docs/infrastructure/ci-gitea.md` documents it.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- Shared deploy logic has one enforced definition; a change lands once and both
|
||||
environments get it. The `# Keep in sync` comments are deleted.
|
||||
- Per-environment variation is a typed input contract, not a forked block.
|
||||
- Runs inline on the existing runner — no reusable-workflow job context, no new
|
||||
infrastructure.
|
||||
|
||||
**Negative / constraints:**
|
||||
- Workflows now depend on a checked-out `.gitea/actions/` tree: `actions/checkout` MUST run
|
||||
before the first `uses: ./…` (a local action does not exist on disk until checkout).
|
||||
- Secrets cannot be read from the `secrets.*` context inside a composite action; they must
|
||||
be passed as inputs and mapped to `env:`. The `obs-secrets.env` heredoc therefore uses an
|
||||
unquoted delimiter so `$VAR` expands at the shell layer.
|
||||
- The `reload-caddy` pinned alpine digest now lives in the action, not the workflow file —
|
||||
it must be added to Renovate's watch list so it does not go stale.
|
||||
54
docs/adr/030-briefwechsel-removal-unidirectional-search.md
Normal file
54
docs/adr/030-briefwechsel-removal-unidirectional-search.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# ADR-030 — Removing Briefwechsel trades bidirectional correspondence for a unidirectional search filter
|
||||
|
||||
**Date:** 2026-06-02
|
||||
**Status:** Accepted
|
||||
**Issue:** #716 (remove the Briefwechsel view; retarget its links to document search)
|
||||
**Milestone:** —
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
The standalone **Briefwechsel** view (`/briefwechsel`) was a bilateral letter timeline
|
||||
between two `Person`s. It was not in the main navigation; its only inbound product link
|
||||
was the "Häufige Korrespondenten" card on a person's detail page. It was backed by a
|
||||
dedicated endpoint (`GET /api/documents/conversation`) and two repository queries
|
||||
(`findConversation`, `findSinglePersonCorrespondence`) that nothing else used.
|
||||
|
||||
The view's data source, `findConversation`, was **bidirectional**: it returned letters
|
||||
where `(sender = A AND receiver = B) OR (sender = B AND receiver = A)` — i.e. the
|
||||
exchange in both directions. We removed the view entirely (frontend and backend) and
|
||||
retargeted the one inbound link into the existing **document search**
|
||||
(`/documents?senderId=A&receiverId=B`).
|
||||
|
||||
Document search composes its `senderId`/`receiverId` filters with AND
|
||||
(`sender.id = A` **AND** `receivers contains B`), so the retargeted link shows **only the
|
||||
A→B direction**. The reverse direction (B's replies to A) is no longer surfaced by
|
||||
clicking a correspondent.
|
||||
|
||||
## Decision
|
||||
|
||||
**Accept the behaviour change: the retargeted card link is unidirectional (A→B only).**
|
||||
The reverse direction is intentionally dropped rather than preserved with a redirect
|
||||
shim or a new bidirectional search filter.
|
||||
|
||||
- The card link sets both params (`senderId=A&receiverId=B`); the destination is the
|
||||
consistent, already-tested document search rather than a separate dedicated view.
|
||||
- The "×N" badge on each correspondent chip remains **bilateral** — it counts shared
|
||||
letters in both directions as a relationship-strength signal — so the badge may exceed
|
||||
the unidirectional search result count. This is surfaced in the badge's title, not
|
||||
recomputed.
|
||||
|
||||
## Consequences
|
||||
|
||||
- **Regression:** a reader can no longer reach B→A replies in one click from the
|
||||
correspondents card. They must run a second search with sender/receiver swapped.
|
||||
- The bilateral query code (`findConversation`, `findSinglePersonCorrespondence`, the
|
||||
`/api/documents/conversation` endpoint, and the `getConversationFiltered` service
|
||||
method) is fully removed — no dormant dead code.
|
||||
- No data migration and no schema change: only query/endpoint code was removed; the
|
||||
`documents`, `persons` and join tables are untouched. The `hasSender`/`hasReceiver`
|
||||
specifications stay — document search still uses them.
|
||||
- **Future enhancement (out of scope here):** a bidirectional "between these two people,
|
||||
either direction" document-search filter would restore the dropped direction without
|
||||
reviving the standalone view. If built, it supersedes the unidirectional link.
|
||||
110
docs/adr/030-stammbaum-bloodline-tidy-tree-layout.md
Normal file
110
docs/adr/030-stammbaum-bloodline-tidy-tree-layout.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# ADR-030 — Stammbaum Bloodline-Contiguous Tidy-Tree Layout
|
||||
|
||||
**Date:** 2026-06-04
|
||||
**Status:** Accepted
|
||||
**Issue:** #724
|
||||
**Supersedes:** ADR-026 (in part — the per-generation block-packer decision and its
|
||||
position-within-rank fix path; the in-house / no-dagre decision and the seeded-rank
|
||||
invariant from #689 are retained)
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
ADR-026 kept Stammbaum horizontal placement in-house with a **per-generation block
|
||||
packer** and pre-committed a successor ADR "if any acceptance criterion stops
|
||||
converging in-house." Its single UX stop-trigger was Albert de Gruyter's marriages
|
||||
failing the read test.
|
||||
|
||||
The block packer hit a worse, structural failure: it placed each generation
|
||||
**independently**, centring sibling blocks under already-placed parents and only ever
|
||||
**shoving right** on collision. Two consequences followed — a deep branch that could
|
||||
not fit at its ideal centre dragged everything downstream rightward and stranded the
|
||||
ancestor at the **left edge** of its own descendants; and a parent placed before its
|
||||
descendants existed could never be re-centred over them. Extreme symptom: Albert de
|
||||
Gruyter (G0) far left, a great-great-grandchild far right — one bloodline smeared
|
||||
across the full canvas. That is exactly the "UX failure against the canonical fixture"
|
||||
ADR-026 named as the trigger to revisit the layout.
|
||||
|
||||
## Decision
|
||||
|
||||
**Replace the per-generation block packer with a bottom-up "tidy tree"
|
||||
(Reingold–Tilford / Walker contour pack), still in-house, no new dependency.**
|
||||
|
||||
The horizontal `x` rewrite is split into three reviewable, unit-tested modules
|
||||
(mirroring the `panZoom.ts` / `panZoomGestures.ts` / `animateView.ts` split):
|
||||
|
||||
- **`layout/tidyTree.ts`** — domain-agnostic contour packer over abstract
|
||||
`{ id, width, children, level? }` nodes, zero generated-API imports. Contours are
|
||||
indexed by **absolute generation level**, not tree depth, so unrelated roots at
|
||||
different generations share x-columns instead of smearing the forest wide.
|
||||
- **`layout/familyForest.ts`** — all genealogy semantics: the **unit** model (a
|
||||
bloodline-carrying primary plus the spouse(s) absorbed into its run),
|
||||
`pickStructuralOwner` (lower birthYear, then stable id), loose-spouse absorption,
|
||||
multi-spouse runs (#361), sibling/branch order (birthYear ASC NULLS LAST →
|
||||
displayName → id), intra-family resolution, and cross-link classification.
|
||||
- **`layout/buildLayout.ts`** — orchestrates forest → tidy-pack → per-person
|
||||
positions. `assignRanks` (y from rank, #689 seeding), the `generations` map, and
|
||||
`computeViewBox` are reused **unchanged**; `x` comes from structure, `y` from rank.
|
||||
|
||||
Two decisions taken during implementation and confirmed with the maintainer:
|
||||
|
||||
1. **Intra-family marriage = hybrid.** A couple is always exactly adjacent in the
|
||||
owner's run. When the two spouses' parents sit at the **same structural level** the
|
||||
displaced parent edge renders as a normal solid connector (the "adjacency" case);
|
||||
when they are **cross-level** (e.g. the canonical Clara⚭Herbert, where one parent
|
||||
is nested under Albert and the other hangs off a separate root), the structural
|
||||
owner keeps the hierarchy edge and the other parent→spouse edge renders as a
|
||||
distinct cross-link.
|
||||
2. **Cross-link is rendered with a distinct `2 6` dash at 0.7 opacity** in
|
||||
`StammbaumConnectors.svelte` — never the `4 4` ended-marriage cadence. Geometry
|
||||
still lands on the correct child top, so meaning is carried redundantly (WCAG
|
||||
1.4.1); the 0.7 opacity clears the WCAG 1.4.11 3:1 non-text floor.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Accepted
|
||||
|
||||
- **Ancestor centring** — every unit is centred over its child-units' span (named-bug
|
||||
guard `great_great_grandparent_is_not_stranded_left_of_descendants` + a fixture-wide
|
||||
loop over canonical and synthetic trees).
|
||||
- **Bloodline contiguity** — each bloodline is one band with no foreign node
|
||||
interleaved. Albert de Gruyter's bloodline shrank from a full ~4860px smear to
|
||||
~960px.
|
||||
- **#361 / #689 preserved** — multi-spouse runs in marriage-year order, seeded ranks,
|
||||
spouse pull-down; the existing `buildLayout.test.ts` cases stay green.
|
||||
- **Determinism** — every comparator ends in a stable id; a seeded permutation of
|
||||
nodes/edges yields byte-identical positions.
|
||||
- **Fail-closed on cycles** — `assignRanks`' iteration ceiling plus a forest structure
|
||||
(each unit has ≤1 hierarchy parent, cycles are unreachable from roots) guarantee a
|
||||
finite layout with every node placed exactly once.
|
||||
|
||||
### Trade-off — total canvas width replaces the ADR-026 width assumption
|
||||
|
||||
Centring every ancestor inherently makes a forest of ~24 root-bands **wider** overall
|
||||
than the old per-generation left-packer that interleaved everyone into compact shared
|
||||
rows (canonical: ~7960px vs the old ~4860px). Total canvas width is therefore the
|
||||
wrong success metric; **per-bloodline span** is. The width regression test asserts
|
||||
each contiguous bloodline stays far under the old full-canvas smear. The wider canvas
|
||||
is navigated by the pan/zoom from #692 (ADR-027) and is an accepted trade-off for
|
||||
readability.
|
||||
|
||||
### Operational
|
||||
|
||||
- **No CI, image, compose, or dependency change.** Pure frontend layout. The
|
||||
`d3-flextree` escape hatch from #724 was not needed.
|
||||
|
||||
### Deferred (follow-ups, per #724)
|
||||
|
||||
- Connector legibility at 320px — the issue's "open verification"; a manual
|
||||
`/stammbaum` pass, with a connector-clarity issue spun only if drops/cross-links
|
||||
tangle.
|
||||
- Polished cross-link routing + a relationship tooltip.
|
||||
|
||||
## Notes
|
||||
|
||||
- ADR-026's retained parts: the **no-dagre / in-house** decision and the seeded-rank
|
||||
invariant (#689) still hold — this ADR changes only _position-within / across_ rank,
|
||||
not rank assignment, and adds no dependency.
|
||||
- The `validateFixture` sanity gates and the AC3 revisit probe from ADR-026 are
|
||||
unchanged.
|
||||
@@ -0,0 +1,112 @@
|
||||
# ADR-031 — The document title is a shared `document`-package factory, re-synced by an exact match on save and a grammar heuristic on a one-time backfill
|
||||
|
||||
**Date:** 2026-06-04
|
||||
**Status:** Accepted
|
||||
**Issue:** #726 (auto-sync document titles with date/location: save-time + one-time backfill)
|
||||
**Milestone:** —
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
A document title was a string built **once**, at import time, by a private
|
||||
`DocumentImporter.buildTitle()` composing `{index} – {dateLabel} – {location}` (index =
|
||||
`originalFilename`, date label honest at the row's precision via `DocumentTitleFormatter`,
|
||||
location verbatim). Nothing rebuilt it afterwards. When an archivist later corrected a date
|
||||
or location in the edit form, the title kept its stale value (e.g. it still read `2028`
|
||||
after the date was fixed to `1928`), because the edit form round-trips the stored title
|
||||
verbatim and `updateDocument` simply re-persisted it.
|
||||
|
||||
Two distinct problems live here:
|
||||
|
||||
1. **Going forward**, an edit to date/location must flow into a title that was machine-built
|
||||
— but must never overwrite a title a human wrote.
|
||||
2. **The existing backlog** of already-stale titles must be cleaned once. For these rows the
|
||||
pre-edit state is gone, so there is no exact value to compare against.
|
||||
|
||||
The composition formula also existed only inside `importing`, which is the wrong owner: a
|
||||
title is a `document` concern, and three call sites (import, save-time, backfill) must share
|
||||
one rule or they will drift.
|
||||
|
||||
## Decision
|
||||
|
||||
### 1. One formula, owned by the `document` package (`DocumentTitleFactory`)
|
||||
|
||||
Extract the composition into `DocumentTitleFactory` (a `@Component` in the `document`
|
||||
package) with `build(Document)`. `DocumentImporter` (package `importing`) now consumes it.
|
||||
`DocumentTitleFormatter` moves into `document` alongside the factory (it stays
|
||||
package-private; `importing` reaches the formula only through the factory). The direction is
|
||||
deliberate: `document` owns the rule, `importing` depends on it — not the reverse. The
|
||||
German date *label* remains the deliberate Java/TS dual implementation pinned by
|
||||
`docs/date-label-fixtures.json` (#666); this ADR touches the **composition** only and does
|
||||
not collapse the frontend `formatDocumentDate`.
|
||||
|
||||
### 2. Save-time regeneration is an EXACT match, not a heuristic
|
||||
|
||||
In `DocumentService.updateDocument` only (bulk edit is out of scope), capture
|
||||
`autoTitleBefore = titleFactory.build(doc)` from the **currently-persisted** state *before*
|
||||
any setter runs. Then:
|
||||
|
||||
- if the **submitted** title equals `autoTitleBefore`, it was the machine value → rebuild
|
||||
from the new state;
|
||||
- otherwise keep the submitted title verbatim (hand-written or freshly typed).
|
||||
|
||||
This is an exact old-vs-new comparison — no false positives, no false negatives — relying on
|
||||
the edit form round-tripping an untouched title verbatim. `projectedState` mirrors the
|
||||
existing setter asymmetry exactly: `documentDate`/`location` overwrite unconditionally (a
|
||||
null clears them), while precision/end/raw are taken from the DTO only when non-null and
|
||||
otherwise kept from the entity. A blank submission is never persisted (the title is always
|
||||
present) — it falls back to the rebuilt auto-title, which always carries at least the index.
|
||||
|
||||
### 3. The one-time backlog cleanup is a grammar heuristic, behind an ADMIN endpoint
|
||||
|
||||
`POST /api/admin/backfill-titles` (synchronous, under `AdminController`'s class-level
|
||||
`@RequirePermission(Permission.ADMIN)`) sweeps every document and, for each whose stored
|
||||
title passes the overwrite test, rebuilds it via the factory. Because the pre-edit state is
|
||||
gone, the test (`DocumentTitleBackfillMatcher`, used **only** here) is a grammar heuristic:
|
||||
after stripping the **literal** index prefix, the remainder must be exactly the index, a
|
||||
known date-label form (+ an optional trailing location), or a lone segment equal to the
|
||||
document's current location. Prose is left untouched; anything malformed fails closed.
|
||||
|
||||
The backfill saves via `documentRepository.save` directly and **never** routes through
|
||||
`updateDocument` — following the `backfillFileHashes` precedent — so a mechanical rename does
|
||||
not snapshot the whole corpus into `document_versions`. It is idempotent (a second run
|
||||
rewrites nothing) and logs one SLF4J-parameterized `scanned/updated/skipped` line; the
|
||||
response is `BackfillResult(count)`.
|
||||
|
||||
### 4. Edit-form feedback (FR-005)
|
||||
|
||||
A localized helper line (de/en/es) under the title input explains that the title is built
|
||||
from date/place and that a hand-edit is preserved, wired via `aria-describedby` and shown
|
||||
only on the single-document edit form. A live preview was considered and declined.
|
||||
|
||||
## Consequences
|
||||
|
||||
- The three call sites can never diverge — there is exactly one formula
|
||||
(`NFR-MAINT-001`). Save-time cost is a string build + compare; the backfill is one
|
||||
synchronous transactional sweep over a low-thousands corpus.
|
||||
- Security: the index is compared **literally** (`String.startsWith` / `Pattern.quote`)
|
||||
because `originalFilename` is user-controlled and may carry regex metacharacters — an
|
||||
unquoted pattern would be a ReDoS / regex-injection vector (CWE-1333 / CWE-625). The
|
||||
date-label sub-patterns use only bounded, non-nested quantifiers.
|
||||
- **File-replaced documents are treated as manual, by design.** The index is
|
||||
`originalFilename`, which `updateDocument` reassigns to the uploaded file's name on a
|
||||
file-replace. After a replace the stored title no longer matches `build(currentState)`, so
|
||||
neither save-time nor backfill rewrites it. This is the accepted fail-safe of overloading
|
||||
`originalFilename` rather than adding a dedicated `catalogIndex` column.
|
||||
- The save-time heuristic risk is zero (exact match); the backfill heuristic can, by its
|
||||
documented FR-004 rule, treat `{index} – {valid date label} – {anything}` as machine-built
|
||||
and rewrite the trailing segment. This is the accepted trade for cleaning the backlog
|
||||
without the lost pre-edit state.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **A dedicated `catalogIndex` column** instead of overloading `originalFilename` — rejected;
|
||||
it adds a migration and a second source of truth for the index for no current benefit, and
|
||||
the file-replace fail-safe is acceptable.
|
||||
- **A heuristic at save-time too** (instead of the exact match) — rejected; the stored title
|
||||
is available pre-edit, so an exact comparison is strictly better (no false positives).
|
||||
- **A live title preview in the edit form** — rejected (FR-005); a static helper line is
|
||||
calmer for the 60+ audience and avoids a second client-side mirror of the formula.
|
||||
- **Collapsing the frontend `formatDocumentDate` into the backend** — out of scope; the
|
||||
Java/TS date-label split is the deliberate #666 design, pinned by a shared fixture.
|
||||
64
docs/adr/032-person-delete-db-level-integrity.md
Normal file
64
docs/adr/032-person-delete-db-level-integrity.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# ADR-032 — Person-delete referential integrity lives in the database, and the cascade never reaches `documents`
|
||||
|
||||
**Date:** 2026-06-06
|
||||
**Status:** Accepted
|
||||
**Issue:** #684 (move person-delete FK detach to database-level `ON DELETE`)
|
||||
**Milestone:** —
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
Deleting a `Person` had to detach the two FKs into `persons` that lacked any `ON DELETE`
|
||||
behaviour: `documents.sender_id` and `document_receivers.person_id` (both from V1).
|
||||
`PersonService.deletePerson` and `mergePersons` did this in Java — nulling the sender and
|
||||
deleting receiver join rows before `deleteById` — so the integrity guarantee lived in
|
||||
application code. Any other delete path (a future endpoint, a manual `psql`, a batch job)
|
||||
could still orphan rows or fail with an FK-violation 500.
|
||||
|
||||
A related soft reference made it worse: `transcription_block_mentioned_persons.person_id`
|
||||
was a UUID column with **no FK** (V56, a deliberate "no FK" choice), so a person delete left
|
||||
dangling `@`-mention rows. The literal `@DisplayName` lives in `transcription_blocks.text`,
|
||||
so only the *link* was ever at stake — not the visible name.
|
||||
|
||||
## Decision
|
||||
|
||||
Move person-delete integrity into the database (migration V71) and thin the service to a
|
||||
plain `deleteById`:
|
||||
|
||||
- `documents.sender_id` → `ON DELETE SET NULL` (`documents.senderText` preserves the raw
|
||||
textual attribution, so nulling the link loses no historical record).
|
||||
- `document_receivers.person_id` → `ON DELETE CASCADE` (the symmetric completion of V14,
|
||||
which gave the `document_id` side the same).
|
||||
- `transcription_block_mentioned_persons.person_id` → a real FK with `ON DELETE CASCADE`,
|
||||
reversing V56's "no FK" decision. The read renderer already degrades a `@DisplayName` with
|
||||
no sidecar row to plain escaped text, so removing the link is invisible to the reader.
|
||||
|
||||
**Cascade-boundary invariant:** the cascade stays strictly at the join/reference layer and
|
||||
**never reaches `documents` rows** — a cascade into `documents` would destroy historical
|
||||
letters. This is pinned by a non-negotiable document-survival assertion in
|
||||
`PersonRepositoryTest`.
|
||||
|
||||
## Consequences
|
||||
|
||||
- A person delete is safe from every path, not just `PersonService`. The service and merge
|
||||
stay thin (`deleteById` + the cascade); `reassignSenderToNull` and `deleteReceiverReferences`
|
||||
are deleted.
|
||||
- This *fixes* the pre-existing dead-link-on-deleted-person case — it is not a purely
|
||||
invisible refactor. Note the read renderer strips the `@` prefix when it emits a live
|
||||
mention link, but the degraded (deleted-person) path leaves the literal `@Name` in the
|
||||
block text as-is — the reader sees `@Auguste Raddatz` as plain text, never a dead link.
|
||||
- DB cascades run below `AuditService`, so the row-level cleanup is intentionally not
|
||||
audit-logged; the person-delete action itself is still logged at the service layer.
|
||||
- The V71 FK validation requires cleaning pre-existing orphan mention rows first; the
|
||||
migration does this in a `DO` block that logs the purge count via `RAISE NOTICE`.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **Keep integrity in Java** — rejected; it only protects the one code path and re-breaks the
|
||||
moment a second delete path appears.
|
||||
- **Cascade `documents.sender_id`** — rejected; it would delete historical letters when a
|
||||
sender is removed. `SET NULL` keeps the letter and its `senderText`.
|
||||
- **Leave the mention sidecar FK-less (honour V56)** — rejected; the "no FK" rationale was
|
||||
stale, the name survives in the block text regardless, and the FK removes the orphan-row
|
||||
class of bug.
|
||||
148
docs/adr/033-tag-name-resolution-tolerates-case-collisions.md
Normal file
148
docs/adr/033-tag-name-resolution-tolerates-case-collisions.md
Normal file
@@ -0,0 +1,148 @@
|
||||
# ADR-033 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint
|
||||
|
||||
**Date:** 2026-06-06
|
||||
**Status:** Accepted
|
||||
**Issue:** #730 (document with a case-colliding tag cannot be saved — `findByNameIgnoreCase` `NonUniqueResultException`)
|
||||
**Milestone:** —
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
`TagService.findOrCreate(name)` is the single point that turns a tag **name** into a `Tag`
|
||||
row. The document edit form, bulk-edit, and the upload batch all round-trip tag **names**
|
||||
(the edit form sends `tags.map(t => t.name).join(',')`) and re-resolve them on **every**
|
||||
save through `resolveTags → findOrCreate`. The old implementation resolved with
|
||||
`tagRepository.findByNameIgnoreCase(name)`, a derived query returning `Optional<Tag>`.
|
||||
|
||||
That signature encodes an invariant the data does **not** hold: that a name is globally
|
||||
unique case-insensitively. The canonical tag tree legitimately contains names that differ
|
||||
only by case — a parent container and its same-named lowercase **child** (`Geburt` /
|
||||
`Geburt/geburt`, `Weihnachten` / `Weihnachten/weihnachten`, …), or two siblings
|
||||
(`Reise/Reisepläne` / `Reise/reisepläne`). Each is a distinct node with its own
|
||||
`source_ref` (the stable identity, per ADR-025) and its own document attachments — **not** an
|
||||
accidental duplicate. When two rows matched case-insensitively, Hibernate threw
|
||||
`NonUniqueResultException` → `IncorrectResultSizeDataAccessException` → a generic HTTP 500.
|
||||
|
||||
The effect was severe and opaque: every document carrying one of ~10 colliding tags (≈180
|
||||
document-tag attachments on staging) became **un-editable** — any field change failed on save
|
||||
because the whole tag set is re-resolved — and the user saw only "an unexpected error", with
|
||||
no hint that a tag was the cause.
|
||||
|
||||
This is a **lookup** problem, not a data problem: the collisions are valid canonical nodes
|
||||
and must be preserved.
|
||||
|
||||
## Decision
|
||||
|
||||
### 1. Resolution is exact-case first, then a non-throwing deterministic fallback
|
||||
|
||||
`findOrCreate` resolves in three ordered steps and never throws on a collision:
|
||||
|
||||
1. `findByName(cleanName)` — **exact-case** derived query. If present, return it. The edit
|
||||
round-trip replays the stored name verbatim, so the exact-case row is the right binding
|
||||
(typing the bare child name `weihnachten` binds to the child; `Weihnachten` binds to the
|
||||
parent container).
|
||||
2. else `findAllByNameIgnoreCase(cleanName)` — the **plural** case-insensitive list. If
|
||||
non-empty, return the element with the **lowest `id`** (`min(comparing(Tag::getId))`).
|
||||
3. else create the tag (an orphan: null `sourceRef`/`parentId`).
|
||||
|
||||
The two repository methods are deliberately **two distinctly-named methods** — Spring Data
|
||||
cannot disambiguate an `Optional<Tag>` from a `List<Tag>` derived query by return type alone.
|
||||
The throwing `Optional<Tag> findByNameIgnoreCase` is **deleted** so the non-unique-throwing
|
||||
call cannot be reintroduced; `findOrCreate` was its only production caller.
|
||||
|
||||
### 2. The tie-break is `id`, and it is load-bearing
|
||||
|
||||
`id` is a stable, always-present, unique column, so "lowest id" is a total, deterministic
|
||||
order over the candidates: the same name resolves to the same row on every call, forever,
|
||||
without throwing. This matters only in the free-text authoring path (step 2), where no
|
||||
exact-case row exists yet two case-folding rows do.
|
||||
|
||||
### 3. No `unique(lower(name))` constraint — and a load-bearing comment says so
|
||||
|
||||
A global case-insensitive uniqueness constraint is **wrong**: it would reject the legitimate
|
||||
parent/child canonical nodes. It would also **fail to apply** against the existing rows,
|
||||
turning a code-only deploy into a failed Flyway migration that blocks startup. A comment at
|
||||
both `findOrCreate` and the repository methods records this so the constraint is not "helpfully"
|
||||
added later.
|
||||
|
||||
## Consequences
|
||||
|
||||
- **Code-only, zero migration, fully reversible** (roll back the JAR). No tag data is touched;
|
||||
the colliding rows stay exactly as the canonical importer produced them.
|
||||
- One change fixes all three write paths — single-document edit, bulk-edit, and upload batch —
|
||||
because they all funnel through `resolveTags → findOrCreate`, which stays the single source
|
||||
of truth (resolution logic is **not** hoisted into `DocumentService`).
|
||||
- **Free-text tag semantics under collision are accepted as-is** (issue #730, option A): the
|
||||
bare word `weihnachten` binds to the deep child and `Weihnachten` to the parent container.
|
||||
Correct for the edit round-trip and acceptable for authoring; making the typeahead show the
|
||||
tree path so an author can tell a container from its same-named child is a separate
|
||||
follow-up.
|
||||
- The wire response stays opaque: after the fix this path no longer throws
|
||||
`IncorrectResultSizeDataAccessException`, and `GlobalExceptionHandler`'s generic handler maps
|
||||
any stray one to `INTERNAL_ERROR` with no Hibernate/SQL leak — so no dedicated handler was
|
||||
added.
|
||||
- **The sibling Person path is fixed the same way — see the Person extension below (#731).**
|
||||
- Postgres `LOWER()` folding of umlauts (`ü`/`ä`) is the actual correctness hinge of the
|
||||
fallback and cannot be proven by a mocked repo, so it is pinned by a Testcontainers
|
||||
`postgres:16-alpine` test on a `Glückwünsche`/`glückwünsche` pair; a plain-ASCII test would
|
||||
stay green while the bug reappeared for umlaut tags.
|
||||
|
||||
## Person extension (#731)
|
||||
|
||||
The Person domain carried the same latent throw on **two** user-influenced lookup surfaces, and
|
||||
is fixed with the same exact-case-first, non-throwing pattern — but with a deliberately
|
||||
**different fallback per surface**, because the two paths have different consequences.
|
||||
|
||||
- **Alias path — `PersonService.findOrCreateByAlias` — deterministic lowest-id (mirrors tag).**
|
||||
`findByAliasIgnoreCase` (`Optional`) is replaced by `findByAlias` (exact) → `findAllByAliasIgnoreCase`
|
||||
(plural, lowest id) → the existing create-when-absent branch (INSTITUTION/GROUP and the
|
||||
maiden-name alias are preserved verbatim). There is no human in the importer loop and the path
|
||||
creates-on-absent anyway, so a deterministic guess is the right behaviour — exactly like tags.
|
||||
|
||||
- **Name/sender path — `PersonService.findByName` — bail to null on ambiguity (the new wrinkle).**
|
||||
Used only by `DocumentService.storeDocument` to resolve the upload **sender** from the parsed
|
||||
filename. `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` (`Optional`) is replaced by
|
||||
`findByFirstNameAndLastName` (exact) → `findAllByFirstNameIgnoreCaseAndLastNameIgnoreCase`
|
||||
(plural). Resolution returns the exact-case match, else the single case-insensitive match, else
|
||||
— on **two or more** matches — **empty**. The sender is left unset rather than guessing.
|
||||
|
||||
**Why this diverges from the alias (and tag) decision:** the archive's value is correct
|
||||
provenance. A confidently-wrong pre-filled `Hans Müller` is worse than an empty field, because a
|
||||
senior reviewer will not re-check a value that is already filled in, whereas an empty sender
|
||||
routes the document into the "needs completion" state (`metadataComplete=false`) for a human to
|
||||
assign. The load-bearing comment at `findByName` records this so a future "consistency cleanup"
|
||||
does not reintroduce the confidently-wrong-sender bug by switching it to lowest-id.
|
||||
|
||||
- **Fail-closed on a null first name.** A parsed filename can lack a first name. The two new name
|
||||
methods use explicit HQL equality (`= :firstName`) rather than a derived
|
||||
`…IgnoreCase` query, because Spring Data folds a null derived-query argument to `first_name IS
|
||||
NULL` — which would silently widen the match and pull a last-name-only / institution row in as a
|
||||
"sender" (a quiet provenance-integrity defect). With HQL equality a null binds as `= NULL`,
|
||||
which never matches, so a null first name resolves to **no sender**. This is pinned by a
|
||||
real-Postgres repository test.
|
||||
|
||||
- **Scope — "ambiguous" is case-insensitive only.** Both exact-case lookups (`findByAlias`,
|
||||
`findByFirstNameAndLastName`) return `Optional`, so two **byte-identical same-case** rows would
|
||||
still throw `NonUniqueResultException`. That is a true data anomaly, deliberately out of scope
|
||||
(it is not a case-collision), and it surfaces as the opaque `INTERNAL_ERROR` — never a silently
|
||||
wrong row — so it is no worse than any other unexpected error and needs no extra handling here.
|
||||
|
||||
- **Same stance as tags otherwise:** no `unique(lower(alias))` / `unique(lower(name))` constraint
|
||||
(collisions are valid human labels; `source_ref` is the stable identity per ADR-025), no
|
||||
merge/dedupe, code-only and reversible, and no shared `resolveExactThenCi(...)` helper — the
|
||||
two Person paths have different fallbacks, so the exact→CI→fallback logic is inlined at each
|
||||
with its load-bearing comment (KISS).
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **A `unique(lower(name))` index** — rejected: the collisions are valid canonical nodes, and
|
||||
the migration would fail against the existing data and block startup.
|
||||
- **Merging/deduping the colliding tags** — rejected: each has a distinct `source_ref`, tree
|
||||
position, and real document attachments; they are not duplicates.
|
||||
- **Round-tripping tag IDs instead of names** so resolution can't be ambiguous at all — the
|
||||
cleaner long-term shape (removes the name-as-key smell), but a larger change with frontend
|
||||
surface; deferred to #732. The lookup fix here is the minimal correct unblock.
|
||||
- **Hoisting resolution into `DocumentService.resolveTags`** — rejected: it would duplicate the
|
||||
rule across the edit, bulk-edit, and import paths and let them drift; `findOrCreate` stays
|
||||
the one owner.
|
||||
@@ -104,7 +104,7 @@ C4Component
|
||||
ContainerDb(minio, "MinIO")
|
||||
|
||||
System_Boundary(backend, "API Backend (Spring Boot)") {
|
||||
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, conversation thread, and batch metadata updates.")
|
||||
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, and batch metadata updates.")
|
||||
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state via GET /api/admin/import-status (IDLE/RUNNING/DONE/FAILED).")
|
||||
|
||||
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
|
||||
@@ -112,7 +112,7 @@ C4Component
|
||||
Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs four idempotent loaders (TagTree → PersonRegister → PersonTree → Document) in a fixed DAG over the normalizer's committed canonical artifacts (canonical-*.xlsx + canonical-persons-tree.json) from /import — see diagram 3b. Owns the IDLE/RUNNING/DONE/FAILED state machine.")
|
||||
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
|
||||
|
||||
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, bidirectional conversation thread queries, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
|
||||
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
|
||||
Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).")
|
||||
}
|
||||
|
||||
@@ -442,7 +442,7 @@ C4Component
|
||||
|
||||
### 3c — People, Stories & Discovery
|
||||
|
||||
Person directory, bilateral conversations, activity feed, stories, family tree, and user profiles.
|
||||
Person directory, activity feed, stories, family tree, and user profiles.
|
||||
|
||||
```mermaid
|
||||
C4Component
|
||||
@@ -454,7 +454,6 @@ C4Component
|
||||
System_Boundary(frontend, "Web Frontend (SvelteKit / SSR)") {
|
||||
Component(personsPage, "/persons and /persons/[id]", "SvelteKit Routes", "Person directory and detail. Detail: metadata, document list sent/received, correspondents, explicit and inferred family relationships.")
|
||||
Component(personEdit, "/persons/[id]/edit and /persons/new", "SvelteKit Routes", "Create and edit person forms. Edit: metadata, aliases, explicit relationships. Actions: PUT/POST /api/persons.")
|
||||
Component(briefwechsel, "/briefwechsel", "SvelteKit Route", "Bilateral conversation timeline. Selects two persons via PersonTypeahead, fetches GET /api/documents/conversation, displays chronological exchange.")
|
||||
Component(aktivitaeten, "/aktivitaeten", "SvelteKit Route", "Unified activity feed (Chronik). Loader: GET /api/dashboard/activity and GET /api/notifications?read=false.")
|
||||
Component(geschichten, "/geschichten and /geschichten/[id]", "SvelteKit Routes", "Story list and detail pages. Loader: GET /api/geschichten?status=PUBLISHED.")
|
||||
Component(geschichtenEdit, "/geschichten/[id]/edit and /geschichten/new", "SvelteKit Routes", "Story editor with rich text, person and document linking. Actions: PUT/POST /api/geschichten. Requires BLOG_WRITE permission.")
|
||||
@@ -466,7 +465,6 @@ C4Component
|
||||
Rel(user, personsPage, "Browses family members", "HTTPS / Browser")
|
||||
Rel(personsPage, backend, "GET /api/persons, GET /api/persons/{id}", "HTTP / JSON")
|
||||
Rel(personEdit, backend, "GET /api/persons/{id}, PUT /api/persons/{id}, POST /api/persons", "HTTP / JSON")
|
||||
Rel(briefwechsel, backend, "GET /api/documents/conversation", "HTTP / JSON")
|
||||
Rel(aktivitaeten, backend, "GET /api/dashboard/activity, GET /api/notifications", "HTTP / JSON")
|
||||
Rel(geschichten, backend, "GET /api/geschichten", "HTTP / JSON")
|
||||
Rel(geschichtenEdit, backend, "GET/PUT/POST /api/geschichten", "HTTP / JSON")
|
||||
|
||||
@@ -9,10 +9,12 @@ Person(member, "Family Member", "Access by administrator invite. Searches, brows
|
||||
System(familienarchiv, "Familienarchiv", "Web application for digitising, organising, and searching family documents")
|
||||
System_Ext(mail, "Email Service", "SMTP server. Delivers notification emails (mentions, replies) and password-reset links.")
|
||||
System_Ext(glitchtip, "GlitchTip", "Self-hosted error tracking (Sentry-compatible). Receives frontend and backend error events with stack traces.")
|
||||
System_Ext(ollama, "Ollama (self-hosted)", "Local LLM inference server (qwen2.5:7b). Parses natural-language search queries into structured filters. Runs in the same Docker Compose stack.")
|
||||
|
||||
Rel(admin, familienarchiv, "Manages via browser", "HTTPS")
|
||||
Rel(member, familienarchiv, "Searches, reads, and transcribes via browser", "HTTPS")
|
||||
Rel(familienarchiv, mail, "Sends notification and password-reset emails (optional)", "SMTP")
|
||||
Rel(familienarchiv, glitchtip, "Sends error events with errorId and stack trace", "HTTPS")
|
||||
Rel(familienarchiv, ollama, "NL query parsing for natural-language search", "HTTP / REST (internal)")
|
||||
|
||||
@enduml
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user