Compare commits
1 Commits
27bef28c0e
...
worktree-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
76db26d68b |
@@ -414,7 +414,7 @@ Never Kafka for teams under 10 or <100k events/day. Never gRPC inside a monolith
|
||||
|
||||
| PR contains | Required doc update |
|
||||
|---|---|
|
||||
| New Flyway migration adding/removing/renaming a table or column | `docs/architecture/db/db-orm.puml` and `docs/architecture/db/db-relationships.puml` — **except** framework-owned tables (e.g. Spring Session JDBC's `spring_session*`, Flyway's `flyway_schema_history`), which are opaque to app code; reference the relevant ADR if an exclusion is load-bearing |
|
||||
| New Flyway migration adding/removing/renaming a table or column | `docs/architecture/db/db-orm.puml` and `docs/architecture/db/db-relationships.puml` |
|
||||
| New `@ManyToMany` join table or FK | Both DB diagrams |
|
||||
| New backend package or domain module | `CLAUDE.md` package table + matching `docs/architecture/c4/l3-backend-*.puml` |
|
||||
| New controller or service in an existing backend domain | Matching `docs/architecture/c4/l3-backend-*.puml` |
|
||||
|
||||
@@ -984,7 +984,7 @@ Mark with `@pytest.mark.asyncio` so pytest runs the coroutine. Without it, the t
|
||||
|
||||
| What changed in code | Doc(s) to update |
|
||||
|---|---|
|
||||
| New Flyway migration adds/removes/renames a table or column | `docs/architecture/db/db-orm.puml` (add/remove entity or attribute) **and** `docs/architecture/db/db-relationships.puml` (add/remove relationship line) — **except** framework-owned tables (e.g. Spring Session JDBC's `spring_session*`, Flyway's `flyway_schema_history`), which are opaque to app code; reference the relevant ADR if an exclusion is load-bearing |
|
||||
| New Flyway migration adds/removes/renames a table or column | `docs/architecture/db/db-orm.puml` (add/remove entity or attribute) **and** `docs/architecture/db/db-relationships.puml` (add/remove relationship line) |
|
||||
| New `@ManyToMany` join table or FK relationship | Both DB diagrams above |
|
||||
| New backend package / domain module | `CLAUDE.md` (package structure table) **and** the matching `docs/architecture/c4/l3-backend-*.puml` diagram for that domain |
|
||||
| New Spring Boot controller or service in an existing domain | The matching `docs/architecture/c4/l3-backend-*.puml` for that domain |
|
||||
|
||||
65
.env.example
65
.env.example
@@ -26,71 +26,6 @@ PORT_MAILPIT_SMTP=1025
|
||||
# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
|
||||
OCR_TRAINING_TOKEN=change-me-in-production
|
||||
|
||||
# --- Observability ---
|
||||
# Optional stack — start with: docker compose -f docker-compose.observability.yml up -d
|
||||
# Requires the main stack to already be running (docker compose up -d creates archiv-net).
|
||||
# In production the stack is managed from /opt/familienarchiv/ (see docs/DEPLOYMENT.md §4).
|
||||
|
||||
# Ports for host access
|
||||
PORT_GRAFANA=3003
|
||||
PORT_GLITCHTIP=3002
|
||||
PORT_PROMETHEUS=9090
|
||||
|
||||
# Grafana admin password — change this before exposing Grafana beyond localhost
|
||||
GRAFANA_ADMIN_PASSWORD=changeme
|
||||
|
||||
# Password for the read-only grafana_reader PostgreSQL role used by the PO
|
||||
# Overview dashboard. Consumed by Flyway V68 (to set the role's password) and
|
||||
# by Grafana's PostgreSQL datasource (to connect). REQUIRED in production —
|
||||
# generate with: openssl rand -hex 32
|
||||
GRAFANA_DB_PASSWORD=changeme-generate-with-openssl-rand-hex-32
|
||||
|
||||
# GlitchTip domain — production: use https://glitchtip.archiv.raddatz.cloud (must match Caddy vhost)
|
||||
GLITCHTIP_DOMAIN=http://localhost:3002
|
||||
|
||||
# GlitchTip secret key — Django SECRET_KEY equivalent, used to sign sessions and tokens.
|
||||
# REQUIRED in production — must not be empty or 'changeme'. Fail-closed: GlitchTip will
|
||||
# refuse to start with an invalid key.
|
||||
# Generate with: python3 -c "import secrets; print(secrets.token_hex(50))"
|
||||
GLITCHTIP_SECRET_KEY=changeme-generate-a-real-secret
|
||||
|
||||
# PostgreSQL hostname for GlitchTip's db-init job and workers.
|
||||
# Override when only the staging stack is running (container name differs from archive-db).
|
||||
# Default (archive-db) is correct for production with the full stack up.
|
||||
POSTGRES_HOST=archive-db
|
||||
|
||||
# $$ escaping note: passwords in /opt/familienarchiv/.env that contain a literal '$' must
|
||||
# use '$$' so Docker Compose does not expand them as variable references.
|
||||
# Example: a password 'p@$$word' should be written as 'p@$$$$word' in the .env file.
|
||||
|
||||
# Error reporting DSNs — leave empty to disable the SDK (safe default).
|
||||
# SENTRY_DSN: backend (Spring Boot) — used by the GlitchTip/Sentry Java SDK
|
||||
SENTRY_DSN=
|
||||
SENTRY_TRACES_SAMPLE_RATE=
|
||||
# VITE_SENTRY_DSN: frontend (SvelteKit) — injected at build time via Vite
|
||||
VITE_SENTRY_DSN=
|
||||
# Sentry/GlitchTip auth token for source map upload at build time (optional)
|
||||
SENTRY_AUTH_TOKEN=
|
||||
|
||||
# NL search — Ollama LLM inference
|
||||
# Leave APP_OLLAMA_BASE_URL empty to disable NL search (safe default for CX32 / CI).
|
||||
# Set to http://ollama:11434 to enable. Requires CX42 (16 GB RAM) to run alongside OCR.
|
||||
APP_OLLAMA_BASE_URL=http://ollama:11434
|
||||
|
||||
# CPU limit: 4.0 is safe on both CX32 (4 vCPUs) and CX42 (8 vCPUs).
|
||||
# Raise to 7.5 on CX42 for full throughput.
|
||||
OLLAMA_CPU_LIMIT=4.0
|
||||
|
||||
# Memory limit: requires CX42 (16 GB) to run alongside OCR.
|
||||
# Reduce or set APP_OLLAMA_BASE_URL= on smaller hosts.
|
||||
OLLAMA_MEM_LIMIT=8g
|
||||
|
||||
# Ollama API key — set on the Ollama service to restrict inference API access on archiv-net.
|
||||
# Generate with: openssl rand -hex 32
|
||||
# NOTE: Empirically verified that OLLAMA_API_KEY is NOT enforced in Ollama 0.6.5 or 0.30.6 (ADR-028 §7).
|
||||
# archiv-net network isolation is the only effective access control. Retained for forward compatibility.
|
||||
OLLAMA_API_KEY=
|
||||
|
||||
# Production SMTP — uncomment and fill in to send real emails instead of catching them
|
||||
# APP_BASE_URL=https://your-domain.example.com
|
||||
# MAIL_HOST=smtp.example.com
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
name: Deploy observability stack
|
||||
description: >-
|
||||
Deploy observability configs + secrets to /opt/familienarchiv, validate the
|
||||
compose config, start the stack, and assert the five healthchecked services
|
||||
are healthy. Per-environment values arrive as inputs.
|
||||
|
||||
inputs:
|
||||
grafana_admin_password:
|
||||
description: Grafana admin password (secret)
|
||||
required: true
|
||||
grafana_db_password:
|
||||
description: Read-only grafana_reader DB role password (secret, issue #651)
|
||||
required: true
|
||||
glitchtip_secret_key:
|
||||
description: GlitchTip Django secret key (secret)
|
||||
required: true
|
||||
postgres_password:
|
||||
description: PostgreSQL password for the environment (secret)
|
||||
required: true
|
||||
postgres_host:
|
||||
description: >-
|
||||
Compose project + service hostname, e.g. archiv-staging-db-1. Derived
|
||||
from the Compose project name and service name — a project rename
|
||||
requires updating the caller's value. Plain input, not a secret.
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Deploy observability configs
|
||||
shell: bash
|
||||
# Copies the compose file and config tree from the workspace checkout
|
||||
# into /opt/familienarchiv/ — the permanent location that persists
|
||||
# between CI runs. Containers started in the next step bind-mount
|
||||
# from there, so a future workspace wipe cannot corrupt a running
|
||||
# config file.
|
||||
#
|
||||
# obs-secrets.env is written fresh from Gitea secrets on every run so
|
||||
# Gitea is always the single source of truth for secret rotation.
|
||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
||||
#
|
||||
# secrets.* is NOT available inside a composite action, so the values
|
||||
# arrive as inputs mapped to env: below and are referenced as $VAR in
|
||||
# the heredoc. The delimiter MUST stay unquoted (<<EOF, not <<'EOF') so
|
||||
# the shell expands $VAR — a quoted delimiter would write the literal
|
||||
# string "$GRAFANA_ADMIN_PASSWORD" and `config --quiet` would still pass
|
||||
# (the var is present, just wrong). Do not stage these into intermediate
|
||||
# variables either, or Gitea log masking can be lost.
|
||||
env:
|
||||
GRAFANA_ADMIN_PASSWORD: ${{ inputs.grafana_admin_password }}
|
||||
GRAFANA_DB_PASSWORD: ${{ inputs.grafana_db_password }}
|
||||
GLITCHTIP_SECRET_KEY: ${{ inputs.glitchtip_secret_key }}
|
||||
POSTGRES_PASSWORD: ${{ inputs.postgres_password }}
|
||||
POSTGRES_HOST: ${{ inputs.postgres_host }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
rm -rf /opt/familienarchiv/infra/observability
|
||||
mkdir -p /opt/familienarchiv/infra/observability
|
||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
cat > /opt/familienarchiv/obs-secrets.env <<EOF
|
||||
GRAFANA_ADMIN_PASSWORD=$GRAFANA_ADMIN_PASSWORD
|
||||
GRAFANA_DB_PASSWORD=$GRAFANA_DB_PASSWORD
|
||||
GLITCHTIP_SECRET_KEY=$GLITCHTIP_SECRET_KEY
|
||||
POSTGRES_PASSWORD=$POSTGRES_PASSWORD
|
||||
POSTGRES_HOST=$POSTGRES_HOST
|
||||
EOF
|
||||
# Five-key non-empty guard: a bare presence check matches an empty
|
||||
# `KEY=` line, so assert each key has a value. Fail loudly on any
|
||||
# missing/empty key rather than starting the stack with broken auth.
|
||||
for key in GRAFANA_ADMIN_PASSWORD GRAFANA_DB_PASSWORD GLITCHTIP_SECRET_KEY POSTGRES_PASSWORD POSTGRES_HOST; do
|
||||
grep -Eq "^${key}=.+" /opt/familienarchiv/obs-secrets.env \
|
||||
|| { echo "::error::obs-secrets.env missing or empty: ${key}"; exit 1; }
|
||||
done
|
||||
# chmod 600 MUST be the final operation: the ordering is the security
|
||||
# property — there is no window where the file is world-readable.
|
||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
||||
|
||||
- name: Validate observability compose config
|
||||
shell: bash
|
||||
# Dry-run: resolves all variable substitutions and reports any missing
|
||||
# required keys before containers start. Catches undefined variables and
|
||||
# YAML errors in config files updated by the previous step.
|
||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
||||
# second (CI-written secrets). Later files win on duplicate keys. POSTGRES_HOST
|
||||
# is environment-specific and supplied only by obs-secrets.env — obs.env
|
||||
# documents it but deliberately does not set a value.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
config --quiet
|
||||
|
||||
- name: Start observability stack
|
||||
shell: bash
|
||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
||||
# that survive workspace wipes between runs (see ADR-016).
|
||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
||||
# obs-secrets.env second — later file wins on duplicate keys.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Assert observability stack health
|
||||
shell: bash
|
||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
||||
# This step explicitly asserts the five healthchecked critical services are
|
||||
# healthy before the smoke test proceeds.
|
||||
run: |
|
||||
set -e
|
||||
unhealthy=""
|
||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
||||
if [ "$status" != "healthy" ]; then
|
||||
echo "::error::$svc is not healthy (status: $status)"
|
||||
unhealthy="$unhealthy $svc"
|
||||
fi
|
||||
done
|
||||
[ -z "$unhealthy" ] || exit 1
|
||||
echo "All critical observability services are healthy"
|
||||
@@ -1,41 +0,0 @@
|
||||
name: Reload Caddy
|
||||
description: >-
|
||||
Reload the host Caddy service from a DooD job container via a privileged
|
||||
sibling container and nsenter. No inputs.
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Reload Caddy
|
||||
shell: bash
|
||||
# Apply any committed Caddyfile changes before smoke-testing the
|
||||
# public surface. Without this step, a Caddyfile edit lands in the
|
||||
# repo but Caddy keeps serving the previous config until someone
|
||||
# reloads it manually — the smoke test would then catch a stale
|
||||
# header or a still-proxied /actuator route rather than confirming
|
||||
# the current config is live.
|
||||
#
|
||||
# The runner executes job steps inside Docker containers (DooD).
|
||||
# `systemctl` is not present in container images and cannot reach
|
||||
# the host's systemd directly. We use the Docker socket (mounted
|
||||
# into every job container via runner-config.yaml) to spin up a
|
||||
# privileged sibling container in the host PID namespace; nsenter
|
||||
# then enters the host's namespaces so systemctl talks to the real
|
||||
# host systemd daemon. No sudoers entry is required — the Docker
|
||||
# socket already grants root-equivalent host access.
|
||||
#
|
||||
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
||||
# tooling, and the digest is pinned so any upstream change requires
|
||||
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
||||
# at run time; apk add takes ~1 s on the warm VPS cache.
|
||||
#
|
||||
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
||||
# config in-process without dropping TLS connections. `restart`
|
||||
# would briefly stop the service, losing in-flight requests.
|
||||
#
|
||||
# If Caddy is not running this step fails fast before the smoke test
|
||||
# issues a misleading "port 443 refused" error.
|
||||
run: |
|
||||
docker run --rm --privileged --pid=host \
|
||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||
@@ -1,58 +0,0 @@
|
||||
name: Smoke test
|
||||
description: >-
|
||||
Verify the deployed public surface (login reachable, HSTS pinned,
|
||||
Permissions-Policy present, /actuator blocked) against a given vhost.
|
||||
|
||||
inputs:
|
||||
host:
|
||||
description: Public vhost to smoke-test, e.g. staging.raddatz.cloud
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Smoke test deployed environment
|
||||
shell: bash
|
||||
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
||||
# public surface works. This step catches: Caddy not reloaded, HSTS
|
||||
# header dropped, /actuator block bypassed.
|
||||
#
|
||||
# --resolve pins the public host to the Docker bridge gateway IP
|
||||
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
||||
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
||||
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
||||
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
||||
# and is therefore reachable from the container via that IP.
|
||||
# SNI still uses the public hostname so the TLS cert validates correctly.
|
||||
#
|
||||
# --resolve is stored as a Bash array so "${RESOLVE[@]}" expands to two
|
||||
# separate arguments; a quoted string would pass the flag and its value
|
||||
# as one token and curl would reject it as an unknown option.
|
||||
#
|
||||
# Gateway detection reads /proc/net/route (always present, no package
|
||||
# required) instead of `ip route` to avoid a dependency on iproute2.
|
||||
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
||||
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
||||
env:
|
||||
HOST: ${{ inputs.host }}
|
||||
run: |
|
||||
set -e
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "::error::could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "::error::expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
@@ -2,7 +2,6 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
@@ -13,7 +12,7 @@ jobs:
|
||||
name: Unit & Component Tests
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: mcr.microsoft.com/playwright:v1.60.0-noble
|
||||
image: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -29,18 +28,10 @@ jobs:
|
||||
run: npm ci
|
||||
working-directory: frontend
|
||||
|
||||
- name: Security audit (no dev deps)
|
||||
run: npm audit --audit-level=high --omit=dev
|
||||
working-directory: frontend
|
||||
|
||||
- name: Compile Paraglide i18n
|
||||
run: npx @inlang/paraglide-js compile --project ./project.inlang --outdir ./src/lib/paraglide
|
||||
working-directory: frontend
|
||||
|
||||
- name: Sync SvelteKit
|
||||
run: npx svelte-kit sync
|
||||
working-directory: frontend
|
||||
|
||||
- name: Lint
|
||||
run: npm run lint
|
||||
working-directory: frontend
|
||||
@@ -65,29 +56,6 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Assert no raw document date rendered via {@html} (CWE-79 — #666)
|
||||
shell: bash
|
||||
run: |
|
||||
# meta_date_raw is untrusted verbatim spreadsheet text — it must render via
|
||||
# Svelte default escaping, never {@html}. This guard flags any {@html ...}
|
||||
# whose expression references a raw-date variable. A comment mentioning
|
||||
# "{@html}" without a raw token inside the braces does NOT match.
|
||||
# The token list MUST cover every variable that carries the raw value:
|
||||
# DocumentDate.svelte exposes it via the `raw` prop, so `\braw\b` is included.
|
||||
# Grow this list whenever a new raw-bearing variable name is introduced.
|
||||
pattern='\{@html[^}]*(metaDateRaw|documentDateRaw|rawDate|\braw\b)'
|
||||
# Self-test: the regex must catch the dangerous forms and ignore the comment form.
|
||||
printf '{@html doc.metaDateRaw}\n' | grep -qP "$pattern" \
|
||||
|| { echo "FAIL: guard self-test — regex missed the unsafe {@html metaDateRaw} form"; exit 1; }
|
||||
printf '{@html raw}\n' | grep -qP "$pattern" \
|
||||
|| { echo "FAIL: guard self-test — regex missed the unsafe {@html raw} form (DocumentDate prop)"; exit 1; }
|
||||
printf 'never use {@html} for this\n' | grep -qvP "$pattern" \
|
||||
|| { echo "FAIL: guard self-test — regex wrongly flagged a {@html} comment"; exit 1; }
|
||||
if grep -rPln "$pattern" --include='*.svelte' frontend/src/; then
|
||||
echo "FAIL: meta_date_raw rendered via {@html} — use default {…} escaping (CWE-79, #666)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Assert no (upload|download)-artifact past v3
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -108,32 +76,6 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Assert deploy-obs writes obs-secrets.env via an unquoted heredoc (#603)
|
||||
shell: bash
|
||||
run: |
|
||||
# Inside a composite action, secrets arrive as $VAR from env: (secrets.*
|
||||
# is unavailable there), so the obs-secrets.env heredoc MUST use an
|
||||
# unquoted delimiter (<<EOF) for $VAR to expand. A quoted delimiter
|
||||
# (<<'EOF') would write the literal string "$GRAFANA_ADMIN_PASSWORD",
|
||||
# and the action's five-key non-empty guard would STILL pass (the line
|
||||
# is present, just wrong). This guard enforces the invariant in CI so a
|
||||
# future re-quote cannot ship broken obs auth green. See ADR-029 / #603.
|
||||
action='.gitea/actions/deploy-obs/action.yml'
|
||||
quoted='obs-secrets\.env\s*<<-?\s*[\x27\x22]'
|
||||
# Self-test: the regex must catch a quoted delimiter and ignore the unquoted one.
|
||||
printf "obs-secrets.env <<'EOF'\n" | grep -qP "$quoted" \
|
||||
|| { echo "FAIL: guard self-test — regex missed the quoted <<'EOF' form"; exit 1; }
|
||||
printf 'obs-secrets.env <<EOF\n' | grep -qvP "$quoted" \
|
||||
|| { echo "FAIL: guard self-test — regex wrongly flagged the unquoted <<EOF form"; exit 1; }
|
||||
# Positive: the unquoted heredoc must be present at all.
|
||||
grep -qP 'obs-secrets\.env\s*<<-?EOF\b' "$action" \
|
||||
|| { echo "::error::$action no longer writes obs-secrets.env via an unquoted <<EOF heredoc (ADR-029 / #603)"; exit 1; }
|
||||
# Negative: never a quoted delimiter on the obs-secrets.env heredoc.
|
||||
if grep -nP "$quoted" "$action"; then
|
||||
echo "::error::$action writes obs-secrets.env with a quoted heredoc delimiter — secrets would be written as literal \$VAR strings. Use unquoted <<EOF (ADR-029 / #603)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run unit and component tests with coverage
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -201,10 +143,7 @@ jobs:
|
||||
path: frontend/test-results/screenshots/
|
||||
|
||||
# ─── OCR Service Unit Tests ───────────────────────────────────────────────────
|
||||
# Only stdlib/lightweight tests — no ML stack (PyTorch/Surya/Kraken) required.
|
||||
# test_tmpdir.py covers the TMPDIR env var and entrypoint mkdir behaviour (ADR-021).
|
||||
# test_tmpdir_is_inside_persistent_cache_volume is skipped in CI (TMPDIR not
|
||||
# set to /app/cache here); it runs inside the deployed Docker container.
|
||||
# Only spell_check.py, test_confidence.py, test_sender_registry.py — no ML stack required.
|
||||
ocr-tests:
|
||||
name: OCR Service Tests
|
||||
runs-on: ubuntu-latest
|
||||
@@ -216,11 +155,11 @@ jobs:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install test dependencies
|
||||
run: pip install "pyspellchecker==0.9.0" "fastapi==0.115.6" pytest pytest-asyncio
|
||||
run: pip install "pyspellchecker==0.9.0" pytest pytest-asyncio
|
||||
working-directory: ocr-service
|
||||
|
||||
- name: Run OCR unit tests (no ML stack required)
|
||||
run: python -m pytest test_spell_check.py test_confidence.py test_sender_registry.py test_tmpdir.py -v
|
||||
run: python -m pytest test_spell_check.py test_confidence.py test_sender_registry.py -v
|
||||
working-directory: ocr-service
|
||||
|
||||
# ─── Backend Unit & Slice Tests ───────────────────────────────────────────────
|
||||
@@ -250,17 +189,9 @@ jobs:
|
||||
- name: Run backend tests
|
||||
run: |
|
||||
chmod +x mvnw
|
||||
./mvnw clean verify
|
||||
./mvnw clean test
|
||||
working-directory: backend
|
||||
|
||||
- name: Upload surefire reports
|
||||
if: always()
|
||||
# Gitea Actions (act_runner) does not implement upload-artifact v4 protocol — pinned per ADR-014. Do NOT upgrade. See #557.
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: surefire-reports
|
||||
path: backend/target/surefire-reports/
|
||||
|
||||
# ─── fail2ban Regex Regression ────────────────────────────────────────────────
|
||||
# The filter parses Caddy's JSON access log; a Caddy upgrade that reorders
|
||||
# the JSON keys would silently break it (fail2ban-regex would return
|
||||
@@ -332,27 +263,6 @@ jobs:
|
||||
echo "$dump" | grep -qE "\['add', 'familienarchiv-auth', 'polling'\]" \
|
||||
|| { echo "FAIL: familienarchiv-auth jail did not resolve to 'polling' backend"; exit 1; }
|
||||
|
||||
# ─── Semgrep Security Scan ───────────────────────────────────────────────────
|
||||
# Catches XXE-unprotected XML parser factories and similar patterns defined in
|
||||
# .semgrep/security.yml. Runs in parallel with backend-unit-tests for fast feedback.
|
||||
# Uses local rules only (no SEMGREP_APP_TOKEN / OIDC — act_runner does not support it).
|
||||
semgrep-scan:
|
||||
name: Semgrep Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install Semgrep
|
||||
run: pip install semgrep==1.163.0
|
||||
|
||||
- name: Run security rules
|
||||
run: semgrep --config .semgrep/security.yml --error --metrics=off backend/src/
|
||||
|
||||
# ─── Compose Bucket-Bootstrap Idempotency ─────────────────────────────────────
|
||||
# docker-compose.prod.yml's create-buckets service runs on every
|
||||
# `docker compose up` (one-shot, no restart). Must be idempotent — a
|
||||
@@ -382,7 +292,6 @@ jobs:
|
||||
MAIL_PORT=1025
|
||||
APP_MAIL_FROM=noreply@local
|
||||
IMPORT_HOST_DIR=/tmp/dummy-import
|
||||
COMPOSE_NETWORK_NAME=test-idem-archiv-net
|
||||
EOF
|
||||
|
||||
- name: Bring up minio
|
||||
|
||||
@@ -23,11 +23,6 @@ name: nightly
|
||||
# - host ports: backend 8081, frontend 3001
|
||||
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
||||
#
|
||||
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||
# release.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||
# only exists on disk after checkout.
|
||||
#
|
||||
# Required Gitea secrets:
|
||||
# STAGING_POSTGRES_PASSWORD
|
||||
# STAGING_MINIO_PASSWORD
|
||||
@@ -35,10 +30,6 @@ name: nightly
|
||||
# STAGING_OCR_TRAINING_TOKEN
|
||||
# STAGING_APP_ADMIN_USERNAME
|
||||
# STAGING_APP_ADMIN_PASSWORD
|
||||
# GRAFANA_ADMIN_PASSWORD
|
||||
# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651)
|
||||
# GLITCHTIP_SECRET_KEY
|
||||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -60,8 +51,6 @@ jobs:
|
||||
# for the same repo is within that boundary.
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# MUST be first: the composite actions below live under .gitea/actions/
|
||||
# and only exist on disk once the repo is checked out (ADR-029).
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Write staging env file
|
||||
@@ -85,10 +74,6 @@ jobs:
|
||||
MAIL_STARTTLS_ENABLE=false
|
||||
APP_MAIL_FROM=noreply@staging.raddatz.cloud
|
||||
IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
|
||||
POSTGRES_USER=archiv
|
||||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
VITE_SENTRY_DSN=${{ secrets.VITE_SENTRY_DSN }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
EOF
|
||||
|
||||
- name: Verify backend /import:ro mount is wired
|
||||
@@ -99,7 +84,6 @@ jobs:
|
||||
# `compose config` renders both shorthand and longform mounts as
|
||||
# `target: /import` + `read_only: true`, so we assert against
|
||||
# the rendered form rather than the raw source YAML.
|
||||
# App-compose check (not obs), nightly-only — stays inline.
|
||||
run: |
|
||||
set -e
|
||||
docker compose \
|
||||
@@ -136,21 +120,78 @@ jobs:
|
||||
--profile staging \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
# POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
||||
# and service name (db). A project rename requires updating this value.
|
||||
- uses: ./.gitea/actions/deploy-obs
|
||||
with:
|
||||
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||
postgres_host: archiv-staging-db-1
|
||||
- name: Reload Caddy
|
||||
# Apply any committed Caddyfile changes before smoke-testing the
|
||||
# public surface. Without this step, a Caddyfile edit lands in the
|
||||
# repo but Caddy keeps serving the previous config until someone
|
||||
# reloads it manually — the smoke test would then catch a stale
|
||||
# header or a still-proxied /actuator route rather than confirming
|
||||
# the current config is live.
|
||||
#
|
||||
# The runner executes job steps inside Docker containers (DooD).
|
||||
# `systemctl` is not present in container images and cannot reach
|
||||
# the host's systemd directly. We use the Docker socket (mounted
|
||||
# into every job container via runner-config.yaml) to spin up a
|
||||
# privileged sibling container in the host PID namespace; nsenter
|
||||
# then enters the host's namespaces so systemctl talks to the real
|
||||
# host systemd daemon. No sudoers entry is required — the Docker
|
||||
# socket already grants root-equivalent host access.
|
||||
#
|
||||
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
||||
# tooling, and the digest is pinned so any upstream change requires
|
||||
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
||||
# at run time; apk add takes ~1 s on the warm VPS cache.
|
||||
#
|
||||
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
||||
# config in-process without dropping TLS connections. `restart`
|
||||
# would briefly stop the service, losing in-flight requests.
|
||||
#
|
||||
# If Caddy is not running this step fails fast before the smoke test
|
||||
# issues a misleading "port 443 refused" error.
|
||||
run: |
|
||||
docker run --rm --privileged --pid=host \
|
||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||
|
||||
- uses: ./.gitea/actions/reload-caddy
|
||||
|
||||
- uses: ./.gitea/actions/smoke-test
|
||||
with:
|
||||
host: staging.raddatz.cloud
|
||||
- name: Smoke test deployed environment
|
||||
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
||||
# public surface works. This step catches: Caddy not reloaded, HSTS
|
||||
# header dropped, /actuator block bypassed.
|
||||
#
|
||||
# --resolve pins staging.raddatz.cloud to the Docker bridge gateway IP
|
||||
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
||||
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
||||
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
||||
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
||||
# and is therefore reachable from the container via that IP.
|
||||
# SNI still uses the public hostname so the TLS cert validates correctly.
|
||||
#
|
||||
# Gateway detection reads /proc/net/route (always present, no package
|
||||
# required) instead of `ip route` to avoid a dependency on iproute2.
|
||||
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
||||
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
||||
run: |
|
||||
set -e
|
||||
HOST="staging.raddatz.cloud"
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE="--resolve $HOST:443:$HOST_IP"
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "$RESOLVE" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "$RESOLVE" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
|
||||
- name: Cleanup env file
|
||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||
|
||||
@@ -23,11 +23,6 @@ name: release
|
||||
# - host ports: backend 8080, frontend 3000
|
||||
# - profile: (none) — mailpit is excluded; real SMTP relay is used
|
||||
#
|
||||
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||
# nightly.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||
# only exists on disk after checkout.
|
||||
#
|
||||
# Required Gitea secrets:
|
||||
# PROD_POSTGRES_PASSWORD
|
||||
# PROD_MINIO_PASSWORD
|
||||
@@ -39,10 +34,6 @@ name: release
|
||||
# MAIL_PORT
|
||||
# MAIL_USERNAME
|
||||
# MAIL_PASSWORD
|
||||
# GRAFANA_ADMIN_PASSWORD
|
||||
# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651)
|
||||
# GLITCHTIP_SECRET_KEY
|
||||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -58,8 +49,6 @@ jobs:
|
||||
# advertised label of our single-tenant self-hosted runner.
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# MUST be first: the composite actions below live under .gitea/actions/
|
||||
# and only exist on disk once the repo is checked out (ADR-029).
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Write production env file
|
||||
@@ -83,9 +72,6 @@ jobs:
|
||||
MAIL_STARTTLS_ENABLE=true
|
||||
APP_MAIL_FROM=noreply@raddatz.cloud
|
||||
IMPORT_HOST_DIR=/srv/familienarchiv-production/import
|
||||
POSTGRES_USER=archiv
|
||||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
EOF
|
||||
|
||||
- name: Build images
|
||||
@@ -107,21 +93,44 @@ jobs:
|
||||
--env-file .env.production \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
# POSTGRES_HOST is derived from the Compose project name (archiv-production)
|
||||
# and service name (db). A project rename requires updating this value.
|
||||
- uses: ./.gitea/actions/deploy-obs
|
||||
with:
|
||||
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
postgres_password: ${{ secrets.PROD_POSTGRES_PASSWORD }}
|
||||
postgres_host: archiv-production-db-1
|
||||
- name: Reload Caddy
|
||||
# See nightly.yml — same rationale and mechanism: DooD job containers
|
||||
# cannot call systemctl directly; nsenter via a privileged sibling
|
||||
# container reaches the host systemd. Must run after deploy (so the
|
||||
# latest Caddyfile is on disk) and before the smoke test (so the
|
||||
# public surface reflects the current config). Alpine with pinned
|
||||
# digest; reload not restart — see nightly.yml for full rationale.
|
||||
run: |
|
||||
docker run --rm --privileged --pid=host \
|
||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||
|
||||
- uses: ./.gitea/actions/reload-caddy
|
||||
|
||||
- uses: ./.gitea/actions/smoke-test
|
||||
with:
|
||||
host: archiv.raddatz.cloud
|
||||
- name: Smoke test deployed environment
|
||||
# See nightly.yml — same three checks, against the prod vhost.
|
||||
# --resolve pins to the bridge gateway IP (the host), not 127.0.0.1
|
||||
# — see nightly.yml for the full network topology explanation.
|
||||
run: |
|
||||
set -e
|
||||
HOST="archiv.raddatz.cloud"
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(ip route show default | awk '/default/ {print $3}')
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via 'ip route'"; exit 1; }
|
||||
RESOLVE="--resolve $HOST:443:$HOST_IP"
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "$RESOLVE" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "$RESOLVE" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
|
||||
- name: Cleanup env file
|
||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -26,10 +26,3 @@ node_modules/
|
||||
|
||||
# Repo uses npm; yarn.lock is ignored to avoid double-lockfile drift.
|
||||
frontend/yarn.lock
|
||||
|
||||
**/.venv/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Canonical import artifacts live only on the ops host (PII).
|
||||
# See tools/import-normalizer/.gitignore — load-bearing for that policy.
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Semgrep security rules for Familienarchiv backend.
|
||||
# These rules catch the absence of XXE protection on XML parser factories.
|
||||
# CWE-611: Improper Restriction of XML External Entity Reference.
|
||||
# Run: semgrep --config .semgrep/security.yml --error backend/src/
|
||||
|
||||
rules:
|
||||
|
||||
# DocumentBuilderFactory without XXE hardening.
|
||||
# All call sites must call setFeature("…disallow-doctype-decl", true) before use.
|
||||
- id: dbf-xxe-default
|
||||
patterns:
|
||||
- pattern: $X = DocumentBuilderFactory.newInstance();
|
||||
- pattern-not-inside: |
|
||||
...
|
||||
$X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
...
|
||||
message: >
|
||||
DocumentBuilderFactory without XXE protection (CWE-611).
|
||||
Call XxeSafeXmlParser.hardenedFactory() instead of DocumentBuilderFactory.newInstance().
|
||||
See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
|
||||
languages: [java]
|
||||
severity: ERROR
|
||||
|
||||
# SAXParserFactory without XXE hardening.
|
||||
- id: sax-xxe-default
|
||||
patterns:
|
||||
- pattern: $X = SAXParserFactory.newInstance();
|
||||
- pattern-not-inside: |
|
||||
...
|
||||
$X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
...
|
||||
message: >
|
||||
SAXParserFactory without XXE protection (CWE-611).
|
||||
Set disallow-doctype-decl=true, external-general-entities=false, external-parameter-entities=false,
|
||||
and load-external-dtd=false before use. Follow the pattern in XxeSafeXmlParser.hardenedFactory().
|
||||
See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
|
||||
languages: [java]
|
||||
severity: ERROR
|
||||
|
||||
# XMLInputFactory without XXE hardening (StAX parser).
|
||||
- id: stax-xxe-default
|
||||
patterns:
|
||||
- pattern: $X = XMLInputFactory.newInstance();
|
||||
- pattern-not-inside: |
|
||||
...
|
||||
$X.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
|
||||
...
|
||||
message: >
|
||||
XMLInputFactory without XXE protection (CWE-611).
|
||||
Set IS_SUPPORTING_EXTERNAL_ENTITIES=false and SUPPORT_DTD=false before use.
|
||||
Follow the pattern in XxeSafeXmlParser.hardenedFactory().
|
||||
See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
|
||||
languages: [java]
|
||||
severity: ERROR
|
||||
43
CLAUDE.md
43
CLAUDE.md
@@ -77,7 +77,6 @@ npm run generate:api # Regenerate TypeScript API types from OpenAPI spec
|
||||
```
|
||||
backend/src/main/java/org/raddatz/familienarchiv/
|
||||
├── audit/ Audit logging
|
||||
├── auth/ AuthService, AuthSessionController, LoginRequest, LoginRateLimiter, RateLimitProperties (Spring Session JDBC)
|
||||
├── config/ Infrastructure config (Minio, Async, Web)
|
||||
├── dashboard/ Dashboard analytics + StatsController/StatsService
|
||||
├── document/ Document domain (entities, controller, service, repository, DTOs)
|
||||
@@ -87,14 +86,14 @@ backend/src/main/java/org/raddatz/familienarchiv/
|
||||
├── exception/ DomainException, ErrorCode, GlobalExceptionHandler
|
||||
├── filestorage/ FileService (S3/MinIO)
|
||||
├── geschichte/ Geschichte (story) domain
|
||||
├── importing/ CanonicalImportOrchestrator + four loaders (TagTree/PersonRegister/PersonTree/Document) + CanonicalSheetReader
|
||||
├── importing/ MassImportService
|
||||
├── notification/ Notification domain + SseEmitterRegistry
|
||||
├── ocr/ OCR domain — OcrService, OcrBatchService, training
|
||||
├── person/ Person domain
|
||||
│ └── relationship/ PersonRelationship sub-domain
|
||||
├── security/ SecurityConfig, Permission, @RequirePermission, PermissionAspect
|
||||
├── tag/ Tag domain
|
||||
└── user/ User domain — AppUser, UserGroup, UserService
|
||||
└── user/ User domain — AppUser, UserGroup, UserService, auth controllers
|
||||
```
|
||||
|
||||
### Layering Rules
|
||||
@@ -160,7 +159,7 @@ Input DTOs live flat in the domain package. Response types are the model entitie
|
||||
|
||||
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
|
||||
|
||||
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
|
||||
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) mirror in `frontend/src/lib/shared/errors.ts`, (3) add i18n keys in `messages/{de,en,es}.json`.
|
||||
|
||||
### Security / Permissions
|
||||
|
||||
@@ -192,12 +191,11 @@ frontend/src/routes/
|
||||
├── persons/
|
||||
│ ├── [id]/ Person detail
|
||||
│ ├── [id]/edit/ Person edit form
|
||||
│ ├── new/ Create person form
|
||||
│ └── review/ Triage view — confirm/rename/merge/delete provisional persons
|
||||
│ └── new/ Create person form
|
||||
├── briefwechsel/ Bilateral conversation timeline (Briefwechsel)
|
||||
├── aktivitaeten/ Unified activity feed (Chronik)
|
||||
├── geschichten/ Stories — list, [id], [id]/edit, new
|
||||
├── stammbaum/ Family tree (Stammbaum)
|
||||
├── themen/ Topics directory — browsable tag index
|
||||
├── enrich/ Enrichment workflow — [id], done
|
||||
├── admin/ User, group, tag, OCR, system management
|
||||
├── hilfe/transkription/ Transcription help page
|
||||
@@ -268,7 +266,7 @@ Back button pattern — use the shared `<BackButton>` component from `$lib/share
|
||||
|
||||
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
|
||||
|
||||
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
|
||||
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`.
|
||||
|
||||
---
|
||||
|
||||
@@ -276,35 +274,6 @@ Back button pattern — use the shared `<BackButton>` component from `$lib/share
|
||||
|
||||
→ See [docs/DEPLOYMENT.md](./docs/DEPLOYMENT.md)
|
||||
|
||||
### Observability stack (separate compose file)
|
||||
|
||||
Run via `docker-compose.observability.yml` — requires the main stack to be running first. Full setup procedure: [docs/DEPLOYMENT.md §4](./docs/DEPLOYMENT.md#4-logs--observability).
|
||||
|
||||
| Service | Container | Default Port | Purpose |
|
||||
|---------|-----------|-------------|---------|
|
||||
| Grafana | `obs-grafana` | 3003 | Metrics / logs / traces dashboard |
|
||||
| Prometheus | `obs-prometheus` | 9090 (dev only — `127.0.0.1` bound) | Metrics store |
|
||||
| Loki | `obs-loki` | — (internal) | Log store |
|
||||
| Tempo | `obs-tempo` | — (internal) | Trace store |
|
||||
| GlitchTip | `obs-glitchtip` | 3002 | Error tracking (Sentry-compatible) |
|
||||
|
||||
### Observability env vars
|
||||
|
||||
| Variable | Purpose |
|
||||
|----------|---------|
|
||||
| `PORT_GRAFANA` | Host port for Grafana UI (default: `3003`) |
|
||||
| `PORT_GLITCHTIP` | Host port for GlitchTip UI (default: `3002`) |
|
||||
| `PORT_PROMETHEUS` | Host port for Prometheus UI (default: `9090`) |
|
||||
| `GRAFANA_ADMIN_PASSWORD` | Grafana `admin` login password — generate with `openssl rand -hex 32` |
|
||||
| `GLITCHTIP_SECRET_KEY` | Django secret key for GlitchTip — generate with `python3 -c "import secrets; print(secrets.token_hex(32))"` |
|
||||
| `GLITCHTIP_DOMAIN` | Public-facing base URL for GlitchTip (email links, CORS), e.g. `https://glitchtip.example.com` |
|
||||
| `SENTRY_DSN` | GlitchTip/Sentry DSN for the backend (Spring Boot) — leave empty to disable |
|
||||
| `VITE_SENTRY_DSN` | GlitchTip/Sentry DSN for the frontend (SvelteKit) — injected at build time via Vite |
|
||||
|
||||
## Observability
|
||||
|
||||
→ See [docs/OBSERVABILITY.md](./docs/OBSERVABILITY.md) — where to look for logs, traces, metrics, and errors.
|
||||
|
||||
## API Testing
|
||||
|
||||
HTTP test files are in `backend/api_tests/` for use with the VS Code REST Client extension.
|
||||
|
||||
@@ -263,7 +263,7 @@ if (!result.response.ok) {
|
||||
return { person: result.data! }; // non-null assertion is safe after the ok check
|
||||
```
|
||||
|
||||
For multipart/form-data (file uploads): bypass the typed client and use `event.fetch` directly — never global `fetch`. The typed client cannot handle multipart bodies, but `event.fetch` is still required so that `handleFetch` injects the session cookie.
|
||||
For multipart/form-data (file uploads): bypass the typed client and use raw `fetch` — the client cannot handle it.
|
||||
|
||||
### Date handling
|
||||
|
||||
@@ -272,7 +272,6 @@ For multipart/form-data (file uploads): bypass the typed client and use `event.f
|
||||
| Form display | German `dd.mm.yyyy` with auto-dot insertion via `handleDateInput()` |
|
||||
| Wire format | ISO 8601 via a hidden `<input type="hidden" name="documentDate" value={dateIso}>` |
|
||||
| Display | `new Intl.DateTimeFormat('de-DE', …).format(new Date(val + 'T12:00:00'))` |
|
||||
| Honest precision display | `formatDocumentDate(iso, precision, end?, raw?, locale?)` (`$lib/shared/utils/documentDate.ts`) or the `<DocumentDate>` component — renders a document date at exactly its `meta_date_precision` (MONTH → "Juni 1916", never a fabricated day). It mirrors the Java `DocumentTitleFormatter`; both are pinned to `docs/date-label-fixtures.json` so the title and UI labels can't drift. `meta_date_raw` is untrusted — render it via default escaping, never `{@html}` (a CI guard enforces this). |
|
||||
|
||||
### Security checklist (new endpoint)
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ Spring Boot 4.0 monolith serving the Familienarchiv REST API. Handles document m
|
||||
```
|
||||
src/main/java/org/raddatz/familienarchiv/
|
||||
├── audit/ # Audit logging (AuditService, AuditLogQueryService)
|
||||
├── auth/ # AuthService, AuthSessionController, LoginRequest (Spring Session JDBC — ADR-020)
|
||||
├── config/ # Infrastructure config (MinioConfig, AsyncConfig, WebConfig)
|
||||
├── dashboard/ # Dashboard analytics + StatsController/StatsService
|
||||
├── document/ # Document domain — entities, controller, service, repository, DTOs
|
||||
@@ -34,14 +33,14 @@ src/main/java/org/raddatz/familienarchiv/
|
||||
├── exception/ # DomainException, ErrorCode, GlobalExceptionHandler
|
||||
├── filestorage/ # FileService (S3/MinIO)
|
||||
├── geschichte/ # Geschichte (story) domain
|
||||
├── importing/ # CanonicalImportOrchestrator + 4 loaders + CanonicalSheetReader
|
||||
├── importing/ # MassImportService
|
||||
├── notification/ # Notification domain + SseEmitterRegistry
|
||||
├── ocr/ # OCR domain — OcrService, OcrBatchService, training
|
||||
├── person/ # Person domain — Person, PersonService, PersonController
|
||||
│ └── relationship/ # PersonRelationship sub-domain
|
||||
├── security/ # SecurityConfig, Permission, @RequirePermission, PermissionAspect
|
||||
├── tag/ # Tag domain — Tag, TagService, TagController
|
||||
└── user/ # User domain — AppUser, UserGroup, UserService
|
||||
└── user/ # User domain — AppUser, UserGroup, UserService, auth controllers
|
||||
```
|
||||
|
||||
For per-domain ownership and public surface, see each domain's `README.md`.
|
||||
@@ -97,10 +96,7 @@ public class MyEntity {
|
||||
|
||||
- Annotated with `@Service`, `@RequiredArgsConstructor`, optionally `@Slf4j`.
|
||||
- Write methods: `@Transactional`.
|
||||
- Read methods: no annotation (default non-transactional) — **except** when the method returns
|
||||
an entity whose lazy associations must remain accessible to the caller after the method
|
||||
returns. In that case, use `@Transactional(readOnly = true)` to keep the Hibernate session
|
||||
open. Removing this annotation causes `LazyInitializationException` in production. See ADR-022.
|
||||
- Read methods: no annotation (default non-transactional).
|
||||
- Cross-domain access goes through the other domain's service, never its repository.
|
||||
|
||||
## Error Handling
|
||||
|
||||
@@ -28,18 +28,4 @@ Authorization: Basic Gast_User gast
|
||||
###Groups
|
||||
#GET
|
||||
GET http://localhost:8080/api/admin/tags
|
||||
Authorization: Basic admin admin123
|
||||
|
||||
### One-time backfill: re-sync already-stale auto-titles (#726)
|
||||
# RUNBOOK: a one-shot ADMIN maintenance call, NOT part of normal operation. Run it ONCE
|
||||
# after deploying #726 to clean the existing backlog of stale titles (e.g. a title still
|
||||
# showing "2028" after the date was corrected to "1928"). It is synchronous and idempotent
|
||||
# — a second run returns {"count": 0} and writes nothing. Hit the backend DIRECTLY on
|
||||
# port 8080 (NOT through the SvelteKit proxy) so the sweep can't trip the proxy timeout.
|
||||
# Returns {"count": <documents rewritten>}.
|
||||
POST http://localhost:8080/api/admin/backfill-titles
|
||||
Authorization: Basic admin admin123
|
||||
|
||||
### NEGATIV-TEST: ein Nicht-Admin darf den Backfill NICHT auslösen -> 403 Forbidden
|
||||
POST http://localhost:8080/api/admin/backfill-titles
|
||||
Authorization: Basic Gast_User gast
|
||||
Authorization: Basic admin admin123
|
||||
@@ -5,7 +5,7 @@
|
||||
<parent>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-parent</artifactId>
|
||||
<version>4.0.6</version>
|
||||
<version>4.0.0</version>
|
||||
<relativePath/> <!-- lookup parent from repository -->
|
||||
</parent>
|
||||
<groupId>org.raddatz</groupId>
|
||||
@@ -29,30 +29,11 @@
|
||||
<properties>
|
||||
<java.version>21</java.version>
|
||||
</properties>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<!-- opentelemetry-spring-boot-starter:2.27.0 was built against opentelemetry-api:1.61.0,
|
||||
but Spring Boot 4.0.0 BOM only manages 1.55.0 (missing GlobalOpenTelemetry.getOrNoop()).
|
||||
Import the core OTel BOM here to override it before the Spring Boot BOM applies. -->
|
||||
<dependency>
|
||||
<groupId>io.opentelemetry</groupId>
|
||||
<artifactId>opentelemetry-bom</artifactId>
|
||||
<version>1.61.0</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-actuator</artifactId>
|
||||
</dependency>
|
||||
<!-- Spring Boot 4.0 splits Micrometer metrics export (incl. Prometheus scrape endpoint) into its own starter -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-micrometer-metrics</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-validation</artifactId>
|
||||
@@ -69,10 +50,6 @@
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-security</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-session-jdbc</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-webmvc</artifactId>
|
||||
@@ -180,16 +157,11 @@
|
||||
<artifactId>flyway-database-postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Caffeine cache + Bucket4j for in-memory rate limiting -->
|
||||
<!-- Caffeine cache for in-memory rate limiting -->
|
||||
<dependency>
|
||||
<groupId>com.github.ben-manes.caffeine</groupId>
|
||||
<artifactId>caffeine</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.bucket4j</groupId>
|
||||
<artifactId>bucket4j-core</artifactId>
|
||||
<version>8.10.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- OpenAPI / Swagger UI — enabled only in the dev Spring profile -->
|
||||
<dependency>
|
||||
@@ -216,7 +188,7 @@
|
||||
<dependency>
|
||||
<groupId>com.googlecode.owasp-java-html-sanitizer</groupId>
|
||||
<artifactId>owasp-java-html-sanitizer</artifactId>
|
||||
<version>20260101.1</version>
|
||||
<version>20240325.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- HTML → plain-text extraction for comment previews -->
|
||||
@@ -225,42 +197,6 @@
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.18.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Observability: Prometheus metrics scrape endpoint (version managed by Spring Boot BOM) -->
|
||||
<dependency>
|
||||
<groupId>io.micrometer</groupId>
|
||||
<artifactId>micrometer-registry-prometheus</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Observability: Micrometer → OpenTelemetry tracing bridge (version managed by Spring Boot BOM) -->
|
||||
<dependency>
|
||||
<groupId>io.micrometer</groupId>
|
||||
<artifactId>micrometer-tracing-bridge-otel</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Observability: OTel Spring Boot auto-instrumentation — NOT in Spring Boot BOM, pinned explicitly -->
|
||||
<dependency>
|
||||
<groupId>io.opentelemetry.instrumentation</groupId>
|
||||
<artifactId>opentelemetry-spring-boot-starter</artifactId>
|
||||
<version>2.27.0</version>
|
||||
<exclusions>
|
||||
<!-- Excludes AzureAppServiceResourceProvider which references ServiceAttributes.SERVICE_INSTANCE_ID
|
||||
that does not exist in the semconv version pulled by this project. -->
|
||||
<exclusion>
|
||||
<groupId>io.opentelemetry.contrib</groupId>
|
||||
<artifactId>opentelemetry-azure-resources</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Sentry error reporting (GlitchTip-compatible) — sentry-spring-boot-4 is the
|
||||
Spring Boot 4 / Spring Framework 7 compatible module (replaces the jakarta starter
|
||||
which crashes with SF7 due to bean-name generation for triply-nested @Import classes) -->
|
||||
<dependency>
|
||||
<groupId>io.sentry</groupId>
|
||||
<artifactId>sentry-spring-boot-4</artifactId>
|
||||
<version>8.41.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -306,7 +242,7 @@
|
||||
<phase>verify</phase>
|
||||
<goals><goal>report</goal></goals>
|
||||
</execution>
|
||||
<!-- Gate: ratchet at 0.77 — actual measured coverage after drift; raise via #496 -->
|
||||
<!-- Gate: baseline 89.4% overall / service 90.2% / controller 80.0% -->
|
||||
<execution>
|
||||
<id>check</id>
|
||||
<phase>verify</phase>
|
||||
@@ -319,7 +255,7 @@
|
||||
<limit>
|
||||
<counter>BRANCH</counter>
|
||||
<value>COVEREDRATIO</value>
|
||||
<minimum>0.77</minimum>
|
||||
<minimum>0.88</minimum>
|
||||
</limit>
|
||||
</limits>
|
||||
</rule>
|
||||
@@ -337,16 +273,6 @@
|
||||
</profiles>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<configuration>
|
||||
<forkedProcessTimeoutInSeconds>600</forkedProcessTimeoutInSeconds>
|
||||
<systemPropertyVariables>
|
||||
<junit.jupiter.execution.timeout.default>90 s</junit.jupiter.execution.timeout.default>
|
||||
</systemPropertyVariables>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
@@ -35,22 +35,7 @@ public enum AuditKind {
|
||||
USER_DELETED,
|
||||
|
||||
/** Payload: {@code {"userId": "uuid", "email": "addr", "addedGroups": ["Admin"], "removedGroups": []}} */
|
||||
GROUP_MEMBERSHIP_CHANGED,
|
||||
|
||||
/** Payload: {@code {"userId": "uuid", "ip": "1.2.3.4", "ua": "Mozilla/5.0..."}} */
|
||||
LOGIN_SUCCESS,
|
||||
|
||||
/** Payload: {@code {"email": "addr", "ip": "1.2.3.4", "ua": "Mozilla/5.0..."}} — password NEVER included */
|
||||
LOGIN_FAILED,
|
||||
|
||||
/** Payload: {@code {"userId": "uuid", "ip": "1.2.3.4", "ua": "Mozilla/5.0...", "reason": "password_change|password_reset|admin_force_logout", "revokedCount": 3}} */
|
||||
LOGOUT,
|
||||
|
||||
/** Payload: {@code {"actorId": "uuid", "targetUserId": "uuid", "revokedCount": 3}} */
|
||||
ADMIN_FORCE_LOGOUT,
|
||||
|
||||
/** Payload: {@code {"ip": "1.2.3.4", "email": "addr"}} — password NEVER included */
|
||||
LOGIN_RATE_LIMITED;
|
||||
GROUP_MEMBERSHIP_CHANGED;
|
||||
|
||||
public static final Set<AuditKind> ROLLUP_ELIGIBLE = Set.of(
|
||||
TEXT_SAVED, FILE_UPLOADED, ANNOTATION_CREATED,
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.audit.AuditKind;
|
||||
import org.raddatz.familienarchiv.audit.AuditService;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.user.AppUser;
|
||||
import org.raddatz.familienarchiv.user.UserService;
|
||||
import org.springframework.security.authentication.AuthenticationManager;
|
||||
import org.springframework.security.authentication.UsernamePasswordAuthenticationToken;
|
||||
import org.springframework.security.core.Authentication;
|
||||
import org.springframework.security.core.AuthenticationException;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class AuthService {
|
||||
|
||||
private final AuthenticationManager authenticationManager;
|
||||
private final UserService userService;
|
||||
private final AuditService auditService;
|
||||
private final LoginRateLimiter loginRateLimiter;
|
||||
private final SessionRevocationPort sessionRevocationPort;
|
||||
|
||||
public LoginResult login(String email, String password, String ip, String ua) {
|
||||
try {
|
||||
loginRateLimiter.checkAndConsume(ip, email);
|
||||
} catch (DomainException ex) {
|
||||
auditService.log(AuditKind.LOGIN_RATE_LIMITED, null, null, Map.of(
|
||||
"ip", ip,
|
||||
"email", email));
|
||||
throw ex;
|
||||
}
|
||||
try {
|
||||
Authentication auth = authenticationManager.authenticate(
|
||||
new UsernamePasswordAuthenticationToken(email, password));
|
||||
|
||||
AppUser user = userService.findByEmail(email);
|
||||
auditService.log(AuditKind.LOGIN_SUCCESS, user.getId(), null, Map.of(
|
||||
"userId", user.getId().toString(),
|
||||
"ip", ip,
|
||||
"ua", truncateUa(ua)));
|
||||
loginRateLimiter.invalidateOnSuccess(ip, email);
|
||||
return new LoginResult(user, auth);
|
||||
} catch (AuthenticationException ex) {
|
||||
// Audit login failure — intentionally does NOT log the attempted password.
|
||||
// DaoAuthenticationProvider already runs a dummy BCrypt on unknown users to
|
||||
// equalise timing between "user not found" and "wrong password" paths.
|
||||
auditService.log(AuditKind.LOGIN_FAILED, null, null, Map.of(
|
||||
"email", email,
|
||||
"ip", ip,
|
||||
"ua", truncateUa(ua)));
|
||||
throw DomainException.invalidCredentials();
|
||||
}
|
||||
}
|
||||
|
||||
public int revokeOtherSessions(String currentSessionId, String principalName) {
|
||||
return sessionRevocationPort.revokeOtherSessions(currentSessionId, principalName);
|
||||
}
|
||||
|
||||
public int revokeAllSessions(String principalName) {
|
||||
return sessionRevocationPort.revokeAllSessions(principalName);
|
||||
}
|
||||
|
||||
public void logout(String email, String ip, String ua) {
|
||||
AppUser user = userService.findByEmail(email);
|
||||
auditService.log(AuditKind.LOGOUT, user.getId(), null, Map.of(
|
||||
"userId", user.getId().toString(),
|
||||
"ip", ip,
|
||||
"ua", truncateUa(ua)));
|
||||
}
|
||||
|
||||
private static String truncateUa(String ua) {
|
||||
if (ua == null) return "";
|
||||
return ua.length() > 200 ? ua.substring(0, 200) : ua;
|
||||
}
|
||||
|
||||
public record LoginResult(AppUser user, Authentication authentication) {}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
import jakarta.servlet.http.HttpServletResponse;
|
||||
import jakarta.servlet.http.HttpSession;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.user.AppUser;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.security.core.Authentication;
|
||||
import org.springframework.security.core.context.SecurityContext;
|
||||
import org.springframework.security.core.context.SecurityContextHolder;
|
||||
import org.springframework.security.web.authentication.session.SessionAuthenticationStrategy;
|
||||
import org.springframework.security.web.context.HttpSessionSecurityContextRepository;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
// @RequirePermission is intentionally absent: login is unauthenticated by design;
|
||||
// logout requires an authenticated session (enforced by SecurityConfig), not a specific permission.
|
||||
@RestController
|
||||
@RequestMapping("/api/auth")
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class AuthSessionController {
|
||||
|
||||
private final AuthService authService;
|
||||
private final SessionAuthenticationStrategy sessionAuthenticationStrategy;
|
||||
|
||||
@PostMapping("/login")
|
||||
public ResponseEntity<AppUser> login(
|
||||
@RequestBody LoginRequest request,
|
||||
HttpServletRequest httpRequest,
|
||||
HttpServletResponse httpResponse) {
|
||||
|
||||
String ip = resolveClientIp(httpRequest);
|
||||
String ua = resolveUserAgent(httpRequest);
|
||||
|
||||
AuthService.LoginResult result = authService.login(request.email(), request.password(), ip, ua);
|
||||
|
||||
// Session-fixation defense (CWE-384): rotate the session ID at the authentication
|
||||
// boundary. ChangeSessionIdAuthenticationStrategy invalidates any pre-auth session ID
|
||||
// an attacker may have planted and mints a fresh one before we attach the SecurityContext.
|
||||
httpRequest.getSession(true);
|
||||
sessionAuthenticationStrategy.onAuthentication(result.authentication(), httpRequest, httpResponse);
|
||||
|
||||
// Spring Session JDBC intercepts setAttribute() and persists the record under the
|
||||
// (now rotated) opaque ID; the Set-Cookie: fa_session=<opaque-id> is added automatically.
|
||||
SecurityContext context = SecurityContextHolder.createEmptyContext();
|
||||
context.setAuthentication(result.authentication());
|
||||
SecurityContextHolder.setContext(context);
|
||||
httpRequest.getSession()
|
||||
.setAttribute(HttpSessionSecurityContextRepository.SPRING_SECURITY_CONTEXT_KEY, context);
|
||||
|
||||
return ResponseEntity.ok(result.user());
|
||||
}
|
||||
|
||||
@PostMapping("/logout")
|
||||
public ResponseEntity<Void> logout(Authentication authentication, HttpServletRequest httpRequest) {
|
||||
String email = authentication.getName();
|
||||
String ip = resolveClientIp(httpRequest);
|
||||
String ua = resolveUserAgent(httpRequest);
|
||||
|
||||
// CWE-613 defense: invalidate the session first — that is the contract the user
|
||||
// is relying on when they click "Log out." Audit is best-effort and must not
|
||||
// bubble up: if the user record was deleted while the session was live, the
|
||||
// audit lookup throws, but the session row in spring_session must still die.
|
||||
HttpSession session = httpRequest.getSession(false);
|
||||
if (session != null) {
|
||||
session.invalidate();
|
||||
}
|
||||
SecurityContextHolder.clearContext();
|
||||
|
||||
try {
|
||||
authService.logout(email, ip, ua);
|
||||
} catch (Exception ex) {
|
||||
log.warn("Audit logout failed for {}; session was already invalidated", email, ex);
|
||||
}
|
||||
|
||||
return ResponseEntity.noContent().build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the client IP for audit-log purposes.
|
||||
*
|
||||
* <p>Trust model: the leftmost {@code X-Forwarded-For} value is taken at face value.
|
||||
* This is correct <em>only</em> if the ingress (Caddy in production) strips any
|
||||
* client-supplied XFF before forwarding — otherwise an attacker can pin audit-log
|
||||
* IPs to whatever they want. Verify the reverse-proxy config before exposing this
|
||||
* service behind a different ingress.
|
||||
*/
|
||||
private static String resolveClientIp(HttpServletRequest request) {
|
||||
String forwarded = request.getHeader("X-Forwarded-For");
|
||||
if (forwarded != null && !forwarded.isBlank()) {
|
||||
return forwarded.split(",")[0].trim();
|
||||
}
|
||||
return request.getRemoteAddr();
|
||||
}
|
||||
|
||||
private static String resolveUserAgent(HttpServletRequest request) {
|
||||
String ua = request.getHeader("User-Agent");
|
||||
return ua != null ? ua : "";
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.session.jdbc.JdbcIndexedSessionRepository;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
class JdbcSessionRevocationAdapter implements SessionRevocationPort {
|
||||
|
||||
private final JdbcIndexedSessionRepository sessionRepository;
|
||||
|
||||
@Override
|
||||
public int revokeOtherSessions(String currentSessionId, String principalName) {
|
||||
int count = 0;
|
||||
for (String id : sessionRepository.findByPrincipalName(principalName).keySet()) {
|
||||
if (!id.equals(currentSessionId)) {
|
||||
sessionRepository.deleteById(id);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int revokeAllSessions(String principalName) {
|
||||
var sessions = sessionRepository.findByPrincipalName(principalName);
|
||||
sessions.keySet().forEach(sessionRepository::deleteById);
|
||||
return sessions.size();
|
||||
}
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import com.github.benmanes.caffeine.cache.Caffeine;
|
||||
import com.github.benmanes.caffeine.cache.LoadingCache;
|
||||
import io.github.bucket4j.Bandwidth;
|
||||
import io.github.bucket4j.Bucket;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class LoginRateLimiter {
|
||||
|
||||
private final LoadingCache<String, Bucket> byIpEmail;
|
||||
private final LoadingCache<String, Bucket> byIp;
|
||||
private final int maxPerIpEmail;
|
||||
private final int maxPerIp;
|
||||
private final int windowMinutes;
|
||||
|
||||
public LoginRateLimiter(RateLimitProperties props) {
|
||||
this.maxPerIpEmail = props.getMaxAttemptsPerIpEmail();
|
||||
this.maxPerIp = props.getMaxAttemptsPerIp();
|
||||
this.windowMinutes = props.getWindowMinutes();
|
||||
|
||||
this.byIpEmail = Caffeine.newBuilder()
|
||||
.expireAfterAccess(windowMinutes, TimeUnit.MINUTES)
|
||||
.build(key -> newBucket(maxPerIpEmail, windowMinutes));
|
||||
|
||||
this.byIp = Caffeine.newBuilder()
|
||||
.expireAfterAccess(windowMinutes, TimeUnit.MINUTES)
|
||||
.build(key -> newBucket(maxPerIp, windowMinutes));
|
||||
}
|
||||
|
||||
// NOTE: This cache is node-local (in-memory). In a multi-replica deployment,
|
||||
// effective limits would be multiplied by replica count.
|
||||
// For the current single-VPS setup this is the correct, simplest implementation.
|
||||
|
||||
public void checkAndConsume(String ip, String email) {
|
||||
long retryAfterSeconds = windowMinutes * 60L;
|
||||
String key = ip + ":" + email.toLowerCase(Locale.ROOT);
|
||||
if (!byIpEmail.get(key).tryConsume(1)) {
|
||||
throw DomainException.tooManyRequests(ErrorCode.TOO_MANY_LOGIN_ATTEMPTS,
|
||||
"Too many login attempts from " + ip, retryAfterSeconds);
|
||||
}
|
||||
if (!byIp.get(ip).tryConsume(1)) {
|
||||
// Refund the ipEmail token so IP-level blocking does not erode the per-email quota.
|
||||
byIpEmail.get(key).addTokens(1);
|
||||
throw DomainException.tooManyRequests(ErrorCode.TOO_MANY_LOGIN_ATTEMPTS,
|
||||
"Too many login attempts from " + ip, retryAfterSeconds);
|
||||
}
|
||||
}
|
||||
|
||||
public void invalidateOnSuccess(String ip, String email) {
|
||||
byIpEmail.invalidate(ip + ":" + email.toLowerCase(Locale.ROOT));
|
||||
byIp.invalidate(ip);
|
||||
}
|
||||
|
||||
private static Bucket newBucket(int limit, int minutes) {
|
||||
return Bucket.builder()
|
||||
.addLimit(Bandwidth.builder()
|
||||
.capacity(limit)
|
||||
.refillGreedy(limit, Duration.ofMinutes(minutes))
|
||||
.build())
|
||||
.build();
|
||||
}
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
public record LoginRequest(String email, String password) {}
|
||||
@@ -1,14 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
class NoOpSessionRevocationAdapter implements SessionRevocationPort {
|
||||
|
||||
@Override
|
||||
public int revokeOtherSessions(String currentSessionId, String principalName) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int revokeAllSessions(String principalName) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@ConfigurationProperties("rate-limit.login")
|
||||
@Data
|
||||
public class RateLimitProperties {
|
||||
private int maxAttemptsPerIpEmail = 10;
|
||||
private int maxAttemptsPerIp = 20;
|
||||
private int windowMinutes = 15;
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.session.jdbc.JdbcIndexedSessionRepository;
|
||||
|
||||
@Configuration
|
||||
class SessionRevocationConfig {
|
||||
|
||||
@Bean
|
||||
SessionRevocationPort sessionRevocationPort(
|
||||
@Autowired(required = false) JdbcIndexedSessionRepository sessionRepository) {
|
||||
if (sessionRepository != null) {
|
||||
return new JdbcSessionRevocationAdapter(sessionRepository);
|
||||
}
|
||||
return new NoOpSessionRevocationAdapter();
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
public interface SessionRevocationPort {
|
||||
int revokeOtherSessions(String currentSessionId, String principalName);
|
||||
int revokeAllSessions(String principalName);
|
||||
}
|
||||
@@ -5,10 +5,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.flywaydb.core.Flyway;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.core.env.Environment;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
import java.util.Map;
|
||||
|
||||
@Configuration
|
||||
@RequiredArgsConstructor
|
||||
@@ -16,7 +14,6 @@ import java.util.Map;
|
||||
public class FlywayConfig {
|
||||
|
||||
private final DataSource dataSource;
|
||||
private final Environment environment;
|
||||
|
||||
@Bean(name = "flyway")
|
||||
public Flyway flyway() {
|
||||
@@ -24,7 +21,6 @@ public class FlywayConfig {
|
||||
Flyway flyway = Flyway.configure()
|
||||
.dataSource(dataSource)
|
||||
.locations("classpath:db/migration")
|
||||
.placeholders(Map.of("grafanaDbPassword", resolveGrafanaDbPassword()))
|
||||
.baselineOnMigrate(true)
|
||||
.baselineVersion("4")
|
||||
.load();
|
||||
@@ -32,22 +28,4 @@ public class FlywayConfig {
|
||||
log.info("Flyway: {} migration(s) applied.", result.migrationsExecuted);
|
||||
return flyway;
|
||||
}
|
||||
|
||||
// Fail-closed: refuse to boot when GRAFANA_DB_PASSWORD is unset. The
|
||||
// grafana_reader role's password is (re)set on every boot by
|
||||
// R__grafana_reader_password.sql, so a missing env var means we'd either
|
||||
// skip the rotation silently or — with a hardcoded fallback — publish a
|
||||
// well-known credential for a role with SELECT on audit_log, documents,
|
||||
// and transcription_blocks. Same shape as UserDataInitializer's refusal
|
||||
// to seed default admin credentials outside dev/test/e2e.
|
||||
String resolveGrafanaDbPassword() {
|
||||
String value = environment.getProperty("GRAFANA_DB_PASSWORD");
|
||||
if (value == null || value.isBlank()) {
|
||||
throw new IllegalStateException(
|
||||
"GRAFANA_DB_PASSWORD is required: it is consumed by "
|
||||
+ "R__grafana_reader_password.sql to (re)set the grafana_reader "
|
||||
+ "role's password on every boot. Generate with: openssl rand -hex 32");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@ public class RateLimitInterceptor implements HandlerInterceptor {
|
||||
AtomicInteger count = requestCounts.get(ip, k -> new AtomicInteger(0));
|
||||
if (count.incrementAndGet() > MAX_REQUESTS_PER_MINUTE) {
|
||||
response.setStatus(HttpStatus.TOO_MANY_REQUESTS.value());
|
||||
response.setHeader("Retry-After", "60");
|
||||
response.getWriter().write("{\"code\":\"RATE_LIMIT_EXCEEDED\",\"message\":\"Too many requests\"}");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
package org.raddatz.familienarchiv.config;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.session.web.http.CookieSerializer;
|
||||
import org.springframework.session.web.http.DefaultCookieSerializer;
|
||||
|
||||
@Configuration
|
||||
public class SpringSessionConfig {
|
||||
|
||||
@Bean
|
||||
public CookieSerializer cookieSerializer() {
|
||||
DefaultCookieSerializer serializer = new DefaultCookieSerializer();
|
||||
serializer.setCookieName("fa_session");
|
||||
serializer.setSameSite("Strict");
|
||||
// cookieHttpOnly: true is the DefaultCookieSerializer default
|
||||
// useSecureCookie not set: auto-detects from request.isSecure().
|
||||
// With forward-headers-strategy: native, Caddy's X-Forwarded-Proto: https
|
||||
// causes isSecure() → true in production; direct HTTP in dev/tests → false.
|
||||
return serializer;
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
/**
|
||||
* Precision of a document's date. Verbatim mirror of the import normalizer's
|
||||
* {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the
|
||||
* contract, so there is no translation layer. Do not add, remove, or rename values without
|
||||
* also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025).
|
||||
*/
|
||||
public enum DatePrecision {
|
||||
DAY,
|
||||
MONTH,
|
||||
SEASON,
|
||||
YEAR,
|
||||
RANGE,
|
||||
APPROX,
|
||||
UNKNOWN
|
||||
}
|
||||
@@ -2,7 +2,6 @@ package org.raddatz.familienarchiv.document;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
import org.hibernate.annotations.BatchSize;
|
||||
import org.hibernate.annotations.CreationTimestamp;
|
||||
import org.hibernate.annotations.UpdateTimestamp;
|
||||
|
||||
@@ -22,17 +21,6 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
@NamedEntityGraph(name = "Document.full", attributeNodes = {
|
||||
@NamedAttributeNode("sender"),
|
||||
@NamedAttributeNode("receivers"),
|
||||
@NamedAttributeNode("tags"),
|
||||
@NamedAttributeNode("trainingLabels")
|
||||
})
|
||||
@NamedEntityGraph(name = "Document.list", attributeNodes = {
|
||||
@NamedAttributeNode("sender"),
|
||||
@NamedAttributeNode("receivers"),
|
||||
@NamedAttributeNode("tags")
|
||||
})
|
||||
@Entity
|
||||
@Table(name = "documents")
|
||||
@Data // Lombok: Generiert Getter, Setter, ToString, etc.
|
||||
@@ -91,29 +79,6 @@ public class Document {
|
||||
@Column(name = "meta_date")
|
||||
private LocalDate documentDate; // Wann wurde der Brief geschrieben?
|
||||
|
||||
// Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943").
|
||||
// Verbatim mirror of the normalizer's Precision enum (see ADR-025).
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "meta_date_precision", nullable = false, length = 16)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN;
|
||||
|
||||
// Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null).
|
||||
@Column(name = "meta_date_end")
|
||||
private LocalDate metaDateEnd;
|
||||
|
||||
// Original date cell, verbatim, preserved for provenance and "as written" display.
|
||||
@Column(name = "meta_date_raw", columnDefinition = "TEXT")
|
||||
private String metaDateRaw;
|
||||
|
||||
// Raw attribution preserved even when a person is linked via sender/receivers.
|
||||
@Column(name = "sender_text", columnDefinition = "TEXT")
|
||||
private String senderText;
|
||||
|
||||
@Column(name = "receiver_text", columnDefinition = "TEXT")
|
||||
private String receiverText;
|
||||
|
||||
@Column(name = "meta_location")
|
||||
private String location;
|
||||
|
||||
@@ -153,37 +118,27 @@ public class Document {
|
||||
@Builder.Default
|
||||
private ScriptType scriptType = ScriptType.UNKNOWN;
|
||||
|
||||
@ManyToMany(fetch = FetchType.LAZY)
|
||||
@ManyToMany(fetch = FetchType.EAGER)
|
||||
@JoinTable(name = "document_receivers", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "person_id"))
|
||||
@BatchSize(size = 50)
|
||||
@Builder.Default
|
||||
private Set<Person> receivers = new HashSet<>();
|
||||
|
||||
@ManyToOne(fetch = FetchType.LAZY)
|
||||
@ManyToOne
|
||||
@JoinColumn(name = "sender_id")
|
||||
private Person sender;
|
||||
|
||||
@ManyToMany(fetch = FetchType.LAZY)
|
||||
@ManyToMany(fetch = FetchType.EAGER)
|
||||
@JoinTable(name = "document_tags", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "tag_id"))
|
||||
@BatchSize(size = 50)
|
||||
@Builder.Default
|
||||
private Set<Tag> tags = new HashSet<>();
|
||||
|
||||
@ElementCollection(fetch = FetchType.LAZY)
|
||||
@ElementCollection(fetch = FetchType.EAGER)
|
||||
@CollectionTable(name = "document_training_labels", joinColumns = @JoinColumn(name = "document_id"))
|
||||
@Column(name = "label")
|
||||
@Enumerated(EnumType.STRING)
|
||||
@BatchSize(size = 50)
|
||||
@Builder.Default
|
||||
private Set<TrainingLabel> trainingLabels = new HashSet<>();
|
||||
|
||||
// Not persisted — computed per detail fetch so read-only users can tell at first
|
||||
// paint whether there is a transcription to read (DocumentService.getDocumentById).
|
||||
@Transient
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private boolean hasTranscription = false;
|
||||
|
||||
// The `?v={thumbnailGeneratedAt}` cache-buster is load-bearing: the thumbnail
|
||||
// endpoint sends `Cache-Control: private, max-age=31536000, immutable`
|
||||
// (DocumentController.getDocumentThumbnail). `immutable` is only safe because
|
||||
|
||||
@@ -12,8 +12,6 @@ public class DocumentBatchMetadataDTO {
|
||||
private UUID senderId;
|
||||
private List<UUID> receiverIds;
|
||||
private LocalDate documentDate;
|
||||
private DatePrecision metaDatePrecision;
|
||||
private LocalDate metaDateEnd;
|
||||
private String location;
|
||||
private List<String> tagNames;
|
||||
private Boolean metadataComplete;
|
||||
|
||||
@@ -3,6 +3,7 @@ package org.raddatz.familienarchiv.document;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -46,7 +47,9 @@ import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||
import org.raddatz.familienarchiv.filestorage.FileService;
|
||||
import org.raddatz.familienarchiv.user.UserService;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.security.core.Authentication;
|
||||
import org.springframework.http.CacheControl;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@@ -135,7 +138,7 @@ public class DocumentController {
|
||||
// --- METADATA ---
|
||||
@GetMapping("/{id}")
|
||||
public Document getDocument(@PathVariable UUID id) {
|
||||
return documentService.getDocumentDetail(id);
|
||||
return documentService.getDocumentById(id);
|
||||
}
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
|
||||
@@ -310,11 +313,9 @@ public class DocumentController {
|
||||
@RequestParam(required = false) String tagQ,
|
||||
@RequestParam(required = false) DocumentStatus status,
|
||||
@RequestParam(required = false) String tagOp,
|
||||
@RequestParam(required = false) Boolean undated,
|
||||
Authentication authentication) {
|
||||
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
|
||||
SearchFilters filters = new SearchFilters(q, from, to, senderId, receiverId, tags, tagQ, status, operator, Boolean.TRUE.equals(undated));
|
||||
List<UUID> ids = documentService.findIdsForFilter(filters);
|
||||
List<UUID> ids = documentService.findIdsForFilter(q, from, to, senderId, receiverId, tags, tagQ, status, operator);
|
||||
if (ids.size() > BULK_EDIT_FILTER_MAX_IDS) {
|
||||
throw DomainException.badRequest(ErrorCode.BULK_EDIT_TOO_MANY_IDS,
|
||||
"Filter matches " + ids.size() + " documents — refine filter (max " + BULK_EDIT_FILTER_MAX_IDS + ")");
|
||||
@@ -374,7 +375,6 @@ public class DocumentController {
|
||||
@Parameter(description = "Sort field") @RequestParam(required = false) DocumentSort sort,
|
||||
@Parameter(description = "Sort direction: ASC or DESC") @RequestParam(required = false, defaultValue = "DESC") String dir,
|
||||
@Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp,
|
||||
@Parameter(description = "Restrict to undated documents (meta_date IS NULL)") @RequestParam(required = false) Boolean undated,
|
||||
// @Max on page guards against overflow when pageable.getOffset() is computed
|
||||
// as page * size — Integer.MAX_VALUE * 50 would wrap to a negative long, which
|
||||
// Hibernate cheerfully turns into an invalid SQL OFFSET.
|
||||
@@ -386,9 +386,8 @@ public class DocumentController {
|
||||
// tagOp is a raw String at the HTTP boundary; any value other than "OR" (case-insensitive)
|
||||
// defaults to AND, which matches the frontend default and keeps old clients working.
|
||||
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
|
||||
SearchFilters filters = new SearchFilters(q, from, to, senderId, receiverId, tags, tagQ, status, operator, Boolean.TRUE.equals(undated));
|
||||
Pageable pageable = PageRequest.of(page, size);
|
||||
return ResponseEntity.ok(documentService.searchDocuments(filters, sort, dir, pageable));
|
||||
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator, pageable));
|
||||
}
|
||||
|
||||
@GetMapping(value = "/density", produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
@@ -403,7 +402,9 @@ public class DocumentController {
|
||||
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
|
||||
DocumentDensityResult result = documentService.getDensity(
|
||||
new DensityFilters(q, senderId, receiverId, tags, tagQ, status, operator));
|
||||
return ResponseEntity.ok(result);
|
||||
return ResponseEntity.ok()
|
||||
.cacheControl(CacheControl.maxAge(5, TimeUnit.MINUTES).cachePrivate())
|
||||
.body(result);
|
||||
}
|
||||
|
||||
// --- TRAINING LABELS ---
|
||||
@@ -442,6 +443,17 @@ public class DocumentController {
|
||||
return documentVersionService.getVersion(id, versionId);
|
||||
}
|
||||
|
||||
@GetMapping("/conversation")
|
||||
public List<Document> getConversation(
|
||||
@RequestParam UUID senderId,
|
||||
@RequestParam(required = false) UUID receiverId,
|
||||
@RequestParam(required = false) LocalDate from,
|
||||
@RequestParam(required = false) LocalDate to,
|
||||
@RequestParam(defaultValue = "DESC") String dir) {
|
||||
Sort sort = Sort.by(Sort.Direction.fromString(dir.toUpperCase()), "documentDate");
|
||||
return documentService.getConversationFiltered(senderId, receiverId, from, to, sort);
|
||||
}
|
||||
|
||||
private UUID requireUserId(Authentication authentication) {
|
||||
return SecurityUtils.requireUserId(authentication, userService);
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.raddatz.familienarchiv.audit.ActivityActorDTO;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
public record DocumentListItem(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
UUID id,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
String title,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
String originalFilename,
|
||||
String thumbnailUrl,
|
||||
LocalDate documentDate,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
DatePrecision metaDatePrecision,
|
||||
LocalDate metaDateEnd,
|
||||
Person sender,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<Person> receivers,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<Tag> tags,
|
||||
String archiveBox,
|
||||
String archiveFolder,
|
||||
String location,
|
||||
String summary,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int completionPercentage,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<ActivityActorDTO> contributors,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
SearchMatchData matchData,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
LocalDateTime createdAt,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
LocalDateTime updatedAt
|
||||
) {}
|
||||
@@ -7,14 +7,13 @@ import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
import org.springframework.data.jpa.repository.EntityGraph;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.data.repository.query.Param;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -24,18 +23,6 @@ import java.util.UUID;
|
||||
@Repository
|
||||
public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSpecificationExecutor<Document> {
|
||||
|
||||
@EntityGraph("Document.full")
|
||||
Optional<Document> findById(UUID id);
|
||||
|
||||
@EntityGraph("Document.list")
|
||||
Page<Document> findAll(Specification<Document> spec, Pageable pageable);
|
||||
|
||||
@EntityGraph("Document.list")
|
||||
List<Document> findAll(Specification<Document> spec);
|
||||
|
||||
@EntityGraph("Document.list")
|
||||
Page<Document> findAll(Pageable pageable);
|
||||
|
||||
// Findet ein Dokument anhand des ursprünglichen Dateinamens
|
||||
// Wichtig für den Abgleich beim Excel-Import & Datei-Upload
|
||||
Optional<Document> findByOriginalFilename(String originalFilename);
|
||||
@@ -43,21 +30,17 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
// Wie oben, gibt aber nur das erste Ergebnis zurück — sicher wenn doppelte Dateinamen existieren
|
||||
Optional<Document> findFirstByOriginalFilename(String originalFilename);
|
||||
|
||||
// Callers access only status/id scalar fields — no graph needed.
|
||||
// Findet alle Dokumente mit einem bestimmten Status
|
||||
// z.B. um alle offenen "PLACEHOLDER" zu finden
|
||||
List<Document> findByStatus(DocumentStatus status);
|
||||
|
||||
// Prüft effizient, ob ein Dateiname schon existiert (gibt true/false zurück)
|
||||
boolean existsByOriginalFilename(String originalFilename);
|
||||
|
||||
// lazy – @BatchSize(50) fallback active; see ADR-022
|
||||
@EntityGraph("Document.full")
|
||||
List<Document> findBySenderId(UUID senderId);
|
||||
|
||||
// lazy – @BatchSize(50) fallback active; see ADR-022
|
||||
@EntityGraph("Document.full")
|
||||
List<Document> findByReceiversId(UUID receiverId);
|
||||
|
||||
// Callers access only doc.getTags() to mutate the set — receivers/sender not touched; no graph needed.
|
||||
List<Document> findByTags_Id(UUID tagId);
|
||||
|
||||
@Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)")
|
||||
@@ -72,14 +55,36 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
|
||||
long countByMetadataCompleteFalse();
|
||||
|
||||
// No production callers — only used if a future export path iterates the full list; no graph needed.
|
||||
List<Document> findByMetadataCompleteFalse(Sort sort);
|
||||
|
||||
// Callers map to IncompleteDocumentDTO using only scalar fields (id, title, createdAt) — no graph needed.
|
||||
Page<Document> findByMetadataCompleteFalse(Pageable pageable);
|
||||
|
||||
Optional<Document> findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort);
|
||||
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
"((d.sender.id = :person1 AND r.id = :person2) " +
|
||||
" OR " +
|
||||
" (d.sender.id = :person2 AND r.id = :person1)) " +
|
||||
"AND d.documentDate BETWEEN :from AND :to")
|
||||
List<Document> findConversation(
|
||||
@Param("person1") UUID person1,
|
||||
@Param("person2") UUID person2,
|
||||
@Param("from") LocalDate from,
|
||||
@Param("to") LocalDate to,
|
||||
Sort sort);
|
||||
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"LEFT JOIN d.receivers r " +
|
||||
"WHERE (d.sender.id = :personId OR r.id = :personId) " +
|
||||
"AND d.documentDate BETWEEN :from AND :to")
|
||||
List<Document> findSinglePersonCorrespondence(
|
||||
@Param("personId") UUID personId,
|
||||
@Param("from") LocalDate from,
|
||||
@Param("to") LocalDate to,
|
||||
Sort sort);
|
||||
|
||||
@Query(nativeQuery = true, value = """
|
||||
SELECT d.id FROM documents d
|
||||
CROSS JOIN LATERAL (
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.raddatz.familienarchiv.audit.ActivityActorDTO;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record DocumentSearchItem(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
Document document,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
SearchMatchData matchData,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int completionPercentage,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<ActivityActorDTO> contributors
|
||||
) {}
|
||||
@@ -7,7 +7,7 @@ import java.util.List;
|
||||
|
||||
public record DocumentSearchResult(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<DocumentListItem> items,
|
||||
List<DocumentSearchItem> items,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
long totalElements,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@@ -15,45 +15,24 @@ public record DocumentSearchResult(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int pageSize,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int totalPages,
|
||||
/**
|
||||
* Total number of undated documents (meta_date IS NULL) matching the current
|
||||
* filter context (q/tags/sender/receiver/status) across ALL pages — not the
|
||||
* undated rows on the current page. Computed independently of the "Nur
|
||||
* undatierte" toggle so it never collapses to the page slice (issue #668).
|
||||
*/
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
long undatedCount
|
||||
int totalPages
|
||||
) {
|
||||
/**
|
||||
* Single-page convenience factory used by empty-result shortcuts and by tests that
|
||||
* don't care about paging. Treats the whole list as page 0 of itself. The undated
|
||||
* count defaults to 0 — the service overlays the real global count via
|
||||
* {@link #withUndatedCount(long)} before returning.
|
||||
* don't care about paging. Treats the whole list as page 0 of itself.
|
||||
*/
|
||||
public static DocumentSearchResult of(List<DocumentListItem> items) {
|
||||
public static DocumentSearchResult of(List<DocumentSearchItem> items) {
|
||||
int size = items.size();
|
||||
return new DocumentSearchResult(items, size, 0, size, size == 0 ? 0 : 1, 0L);
|
||||
return new DocumentSearchResult(items, size, 0, size, size == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Paged factory used by the service when it has a real Pageable + full match count
|
||||
* (e.g. from Spring's Page<T> or from an in-memory sort-then-slice). The undated
|
||||
* count defaults to 0 — the service overlays the real global count via
|
||||
* {@link #withUndatedCount(long)} before returning.
|
||||
* (e.g. from Spring's Page<T> or from an in-memory sort-then-slice).
|
||||
*/
|
||||
public static DocumentSearchResult paged(List<DocumentListItem> slice, Pageable pageable, long totalElements) {
|
||||
public static DocumentSearchResult paged(List<DocumentSearchItem> slice, Pageable pageable, long totalElements) {
|
||||
int pageSize = pageable.getPageSize();
|
||||
int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize);
|
||||
return new DocumentSearchResult(slice, totalElements, pageable.getPageNumber(), pageSize, totalPages, 0L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy with the global undated count overlaid, leaving every other
|
||||
* field untouched. Lets the service compute the count once and attach it to
|
||||
* whichever result shape the search path produced.
|
||||
*/
|
||||
public DocumentSearchResult withUndatedCount(long undatedCount) {
|
||||
return new DocumentSearchResult(items, totalElements, pageNumber, pageSize, totalPages, undatedCount);
|
||||
return new DocumentSearchResult(slice, totalElements, pageable.getPageNumber(), pageSize, totalPages);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import org.raddatz.familienarchiv.audit.AuditService;
|
||||
import org.raddatz.familienarchiv.document.DocumentBatchMetadataDTO;
|
||||
import org.raddatz.familienarchiv.document.DocumentBatchSummary;
|
||||
import org.raddatz.familienarchiv.document.DocumentBulkEditDTO;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchItem;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.document.DocumentSort;
|
||||
import org.raddatz.familienarchiv.document.DocumentUpdateDTO;
|
||||
@@ -68,7 +69,6 @@ import static org.raddatz.familienarchiv.document.DocumentSpecifications.*;
|
||||
public class DocumentService {
|
||||
|
||||
private final DocumentRepository documentRepository;
|
||||
private final DocumentTitleFactory documentTitleFactory;
|
||||
private final PersonService personService;
|
||||
private final FileService fileService;
|
||||
private final TagService tagService;
|
||||
@@ -138,10 +138,8 @@ public class DocumentService {
|
||||
* <p>Implementation note: groups in memory rather than via SQL GROUP BY
|
||||
* because the existing {@link Specification} predicates compose easily
|
||||
* with {@code findAll(spec)} and the archive size (≈5k docs) keeps this
|
||||
* well under the 200ms p95 target. The controller sets no explicit
|
||||
* Cache-Control, so the response is served fresh on every load (issue
|
||||
* #709) — the recompute is imperceptible and stale month counts after an
|
||||
* edit would be misleading on an interactive chart.
|
||||
* well under the 200ms p95 target. Cache-Control: max-age=300 on the
|
||||
* controller layer absorbs repeated browse loads.
|
||||
*
|
||||
* <p>Tracked in issue #481 for re-evaluation when {@code documents > 50k}
|
||||
* — at that scale move the aggregation into SQL (GROUP BY TO_CHAR(meta_date,
|
||||
@@ -170,13 +168,11 @@ public class DocumentService {
|
||||
/** Loads matching documents and projects to non-null {@link LocalDate}s. */
|
||||
private List<LocalDate> loadFilteredDates(DensityFilters filters, List<UUID> ftsIds) {
|
||||
boolean hasFts = ftsIds != null;
|
||||
// Density and search keep separate filter records (DensityFilters has no
|
||||
// date/undated fields); adapt to SearchFilters here to reuse buildSearchSpec.
|
||||
// Date bounds stay null and undated=false — the density path never filters by date.
|
||||
SearchFilters searchFilters = new SearchFilters(
|
||||
filters.text(), null, null, filters.sender(), filters.receiver(),
|
||||
filters.tags(), filters.tagQ(), filters.status(), filters.tagOperator(), false);
|
||||
Specification<Document> spec = buildSearchSpec(hasFts, ftsIds, searchFilters);
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasFts, ftsIds, null, null,
|
||||
filters.sender(), filters.receiver(),
|
||||
filters.tags(), filters.tagQ(),
|
||||
filters.status(), filters.tagOperator());
|
||||
return documentRepository.findAll(spec).stream()
|
||||
.map(Document::getDocumentDate)
|
||||
.filter(Objects::nonNull)
|
||||
@@ -380,17 +376,9 @@ public class DocumentService {
|
||||
|
||||
DocumentStatus statusBefore = doc.getStatus();
|
||||
|
||||
// Auto-title sync (#726): capture the machine title from the CURRENTLY-persisted state
|
||||
// BEFORE any setter runs — the setters below overwrite date/location and applyDatePrecision
|
||||
// skips nulls, so the old state must be read first. The submitted title is the catalog
|
||||
// auto-title iff it equals this; only then does it follow date/location forward.
|
||||
String autoTitleBefore = documentTitleFactory.build(doc);
|
||||
|
||||
// 1. Einfache Felder Update
|
||||
doc.setTitle(resolveTitle(dto.getTitle(), autoTitleBefore, doc, dto));
|
||||
doc.setTitle(dto.getTitle());
|
||||
doc.setDocumentDate(dto.getDocumentDate());
|
||||
applyDatePrecision(doc, dto);
|
||||
validateDateRange(doc); // guard before any save (updateDocumentTags below persists)
|
||||
doc.setLocation(dto.getLocation());
|
||||
doc.setTranscription(dto.getTranscription());
|
||||
doc.setSummary(dto.getSummary());
|
||||
@@ -431,11 +419,7 @@ public class DocumentService {
|
||||
doc.setScriptType(dto.getScriptType());
|
||||
}
|
||||
|
||||
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde).
|
||||
// NB (#726): this reassigns originalFilename to the uploaded file's name. The title's index
|
||||
// segment is originalFilename, so after a replace the stored title no longer matches
|
||||
// build(currentState) and the row is treated as manual — neither save-time nor backfill
|
||||
// rewrites it. Accepted fail-safe (ADR-031), and autoTitleBefore was already captured above.
|
||||
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde)
|
||||
boolean fileReplaced = newFile != null && !newFile.isEmpty();
|
||||
if (fileReplaced) {
|
||||
FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename());
|
||||
@@ -463,97 +447,6 @@ public class DocumentService {
|
||||
return saved;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decides the title to persist on an edit (#726). The submitted title is the catalog
|
||||
* auto-title only when it equals {@code autoBefore} (built from the stored state) — an exact
|
||||
* comparison with no heuristic, relying on the edit form round-tripping the stored title
|
||||
* verbatim when untouched. A machine title is rebuilt from the new state so a corrected
|
||||
* date/location flows into it; a hand-written or freshly-typed title is kept verbatim. A blank
|
||||
* submission is never persisted (title is always present) — it falls back to the rebuilt
|
||||
* auto-title, which always carries at least the index.
|
||||
*/
|
||||
private String resolveTitle(String submitted, String autoBefore, Document doc, DocumentUpdateDTO dto) {
|
||||
if (submitted == null || submitted.isBlank()) {
|
||||
return documentTitleFactory.build(projectedState(doc, dto));
|
||||
}
|
||||
if (!Objects.equals(submitted, autoBefore)) {
|
||||
return submitted;
|
||||
}
|
||||
return documentTitleFactory.build(projectedState(doc, dto));
|
||||
}
|
||||
|
||||
/**
|
||||
* The document state the regenerated title is built from. It is composed from the SAME
|
||||
* resolvers the real setters use — {@code documentDate}/{@code location} overwritten from the
|
||||
* DTO (a null value clears the field), precision/end/raw resolved skip-null via
|
||||
* {@link #effectivePrecision}/{@link #effectiveMetaDateEnd}/{@link #effectiveMetaDateRaw} — so
|
||||
* the projection cannot drift from {@link #updateDocument}. The index ({@code originalFilename})
|
||||
* is never touched by a metadata edit.
|
||||
*/
|
||||
private Document projectedState(Document doc, DocumentUpdateDTO dto) {
|
||||
return Document.builder()
|
||||
.originalFilename(doc.getOriginalFilename())
|
||||
.documentDate(dto.getDocumentDate())
|
||||
.location(dto.getLocation())
|
||||
.metaDatePrecision(effectivePrecision(doc, dto))
|
||||
.metaDateEnd(effectiveMetaDateEnd(doc, dto))
|
||||
.metaDateRaw(effectiveMetaDateRaw(doc, dto))
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the three date-precision fields skip-null: a null DTO field means "not submitted",
|
||||
* so the stored value is kept rather than overwritten with null — which would fabricate a
|
||||
* precision the user never chose, the exact dishonesty #666 exists to prevent. Expressed via
|
||||
* the shared {@code effective*} resolvers so {@link #projectedState} stays lock-step (writing
|
||||
* the stored value back when the DTO omits a field is a harmless no-op).
|
||||
*/
|
||||
private void applyDatePrecision(Document doc, DocumentUpdateDTO dto) {
|
||||
doc.setMetaDatePrecision(effectivePrecision(doc, dto));
|
||||
doc.setMetaDateEnd(effectiveMetaDateEnd(doc, dto));
|
||||
doc.setMetaDateRaw(effectiveMetaDateRaw(doc, dto));
|
||||
}
|
||||
|
||||
// Skip-null date-field resolution shared by applyDatePrecision (the real setters) and
|
||||
// projectedState (the title projection) — the single rule keeps them from diverging (#726).
|
||||
private static DatePrecision effectivePrecision(Document doc, DocumentUpdateDTO dto) {
|
||||
return dto.getMetaDatePrecision() != null ? dto.getMetaDatePrecision() : doc.getMetaDatePrecision();
|
||||
}
|
||||
|
||||
private static LocalDate effectiveMetaDateEnd(Document doc, DocumentUpdateDTO dto) {
|
||||
return dto.getMetaDateEnd() != null ? dto.getMetaDateEnd() : doc.getMetaDateEnd();
|
||||
}
|
||||
|
||||
private static String effectiveMetaDateRaw(Document doc, DocumentUpdateDTO dto) {
|
||||
return dto.getMetaDateRaw() != null ? dto.getMetaDateRaw() : doc.getMetaDateRaw();
|
||||
}
|
||||
|
||||
/**
|
||||
* Friendly guard for the two V69 date-range CHECK constraints, run before save so a
|
||||
* user date typo returns a clean 400 INVALID_DATE_RANGE instead of falling through to
|
||||
* the generic handler (HTTP 500 + Sentry + ERROR log). Validates the post-apply {@code doc}
|
||||
* state, not the DTO, because precision/end may have been carried over from the stored row
|
||||
* when the DTO field was null. The DB CHECK remains the backstop; this never weakens it.
|
||||
*/
|
||||
private void validateDateRange(Document doc) {
|
||||
// Mirrors chk_meta_date_end_after_start: end >= start, with null start allowed.
|
||||
// Use isBefore (equal dates are valid) — never !isAfter, which would contradict the DB's >=.
|
||||
if (doc.getMetaDatePrecision() == DatePrecision.RANGE
|
||||
&& doc.getDocumentDate() != null
|
||||
&& doc.getMetaDateEnd() != null
|
||||
&& doc.getMetaDateEnd().isBefore(doc.getDocumentDate())) {
|
||||
throw DomainException.badRequest(ErrorCode.INVALID_DATE_RANGE,
|
||||
"meta_date_end must not be before meta_date");
|
||||
}
|
||||
// Mirrors chk_meta_date_end_only_for_range. API-only: the edit form clears the
|
||||
// end field off-RANGE, so this branch closes the same 500 class for direct clients.
|
||||
if (doc.getMetaDateEnd() != null && doc.getMetaDatePrecision() != DatePrecision.RANGE) {
|
||||
throw DomainException.badRequest(ErrorCode.INVALID_DATE_RANGE,
|
||||
"meta_date_end is only allowed when meta_date_precision is RANGE");
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Document updateDocumentTags(UUID docId, List<String> tagNames) {
|
||||
Document doc = documentRepository.findById(docId)
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.DOCUMENT_NOT_FOUND, "Document not found: " + docId));
|
||||
@@ -587,15 +480,17 @@ public class DocumentService {
|
||||
* round-trip.
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public List<UUID> findIdsForFilter(SearchFilters filters) {
|
||||
boolean hasText = StringUtils.hasText(filters.text());
|
||||
public List<UUID> findIdsForFilter(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status, TagOperator tagOperator) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(filters.text());
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
if (rankedIds.isEmpty()) return List.of();
|
||||
}
|
||||
|
||||
Specification<Document> spec = buildSearchSpec(hasText, rankedIds, filters);
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator);
|
||||
return documentRepository.findAll(spec).stream().map(Document::getId).toList();
|
||||
}
|
||||
|
||||
@@ -605,18 +500,21 @@ public class DocumentService {
|
||||
* (uncapped, ID-only). Caller does its own FTS short-circuit when the
|
||||
* full-text query returned no rows.
|
||||
*/
|
||||
private Specification<Document> buildSearchSpec(boolean hasText, List<UUID> ftsIds, SearchFilters filters) {
|
||||
boolean useOrLogic = filters.tagOperator() == TagOperator.OR;
|
||||
List<Set<UUID>> expandedTagSets = tagService.expandTagNamesToDescendantIdSets(filters.tags());
|
||||
private Specification<Document> buildSearchSpec(boolean hasText, List<UUID> ftsIds,
|
||||
LocalDate from, LocalDate to,
|
||||
UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ,
|
||||
DocumentStatus status, TagOperator tagOperator) {
|
||||
boolean useOrLogic = tagOperator == TagOperator.OR;
|
||||
List<Set<UUID>> expandedTagSets = tagService.expandTagNamesToDescendantIdSets(tags);
|
||||
Specification<Document> textSpec = hasText ? hasIds(ftsIds) : (root, query, cb) -> null;
|
||||
return Specification.where(textSpec)
|
||||
.and(isBetween(filters.from(), filters.to()))
|
||||
.and(hasSender(filters.sender()))
|
||||
.and(hasReceiver(filters.receiver()))
|
||||
.and(isBetween(from, to))
|
||||
.and(hasSender(sender))
|
||||
.and(hasReceiver(receiver))
|
||||
.and(hasTags(expandedTagSets, useOrLogic))
|
||||
.and(hasTagPartial(filters.tagQ()))
|
||||
.and(hasStatus(filters.status()))
|
||||
.and(undatedOnly(filters.undated()));
|
||||
.and(hasTagPartial(tagQ))
|
||||
.and(hasStatus(status));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -737,7 +635,7 @@ public class DocumentService {
|
||||
return saved;
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
// 0. Zuletzt aktive Dokumente (sortiert nach updatedAt DESC)
|
||||
public List<Document> getRecentActivity(int size) {
|
||||
return documentRepository.findAll(
|
||||
PageRequest.of(0, size, Sort.by(Sort.Direction.DESC, "updatedAt"))
|
||||
@@ -745,57 +643,22 @@ public class DocumentService {
|
||||
}
|
||||
|
||||
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
||||
public DocumentSearchResult searchDocuments(SearchFilters filters, DocumentSort sort, String dir, Pageable pageable) {
|
||||
boolean hasText = StringUtils.hasText(filters.text());
|
||||
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, Pageable pageable) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
|
||||
// Pure-text RELEVANCE: push pagination + ts_rank ordering into SQL — skip
|
||||
// findAllMatchingIdsByFts entirely (ADR-008). This must run BEFORE any
|
||||
// findAllMatchingIdsByFts call so the fast path is preserved. An active undated
|
||||
// filter must NOT take this path: it bypasses buildSearchSpec, so the
|
||||
// undatedOnly predicate would be silently dropped. By definition this path has
|
||||
// no date/sender/receiver/tag/status filters, and undated documents are valid
|
||||
// FTS hits already folded into the ranked page, so there is no separate undated
|
||||
// count to report here.
|
||||
if (!filters.undated() && isPureTextRelevance(hasText, sort, filters)) {
|
||||
return relevanceSortedPageFromSql(filters.text(), pageable);
|
||||
// Pure-text RELEVANCE: push pagination into SQL — skip findAllMatchingIdsByFts entirely (ADR-008).
|
||||
if (isPureTextRelevance(hasText, sort, from, to, sender, receiver, tags, tagQ, status)) {
|
||||
return relevanceSortedPageFromSql(text, pageable);
|
||||
}
|
||||
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(filters.text());
|
||||
// FTS matched nothing → no results and, by definition, no undated matches either.
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
if (rankedIds.isEmpty()) return DocumentSearchResult.of(List.of());
|
||||
}
|
||||
|
||||
// Global undated count for the current filter (q/tags/sender/receiver/status),
|
||||
// forcing undatedOnly(true) and IGNORING the user's "Nur undatierte" toggle so
|
||||
// it never collapses to the page slice and never double-counts (issue #668).
|
||||
long undatedCount = countUndatedForFilter(hasText, rankedIds, filters.withUndated(true));
|
||||
|
||||
return runSearch(hasText, rankedIds, filters, sort, dir, pageable)
|
||||
.withUndatedCount(undatedCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts every undated document (meta_date IS NULL) matching the active filter,
|
||||
* across all pages, independent of the undated toggle. The caller passes
|
||||
* {@code filters.withUndated(true)} so the count tracks q/tags/sender/receiver/status
|
||||
* regardless of the user's "Nur undatierte" toggle. A {@code from}/{@code to} range
|
||||
* excludes undated rows by the collision rule (#668), so the count is legitimately 0
|
||||
* inside a date range.
|
||||
*/
|
||||
private long countUndatedForFilter(boolean hasText, List<UUID> ftsIds, SearchFilters filters) {
|
||||
Specification<Document> undatedSpec = buildSearchSpec(hasText, ftsIds, filters);
|
||||
return documentRepository.count(undatedSpec);
|
||||
}
|
||||
|
||||
/** The original search dispatch — produces the page slice + totals, sans undated count. */
|
||||
private DocumentSearchResult runSearch(boolean hasText, List<UUID> rankedIds, SearchFilters filters,
|
||||
DocumentSort sort, String dir, Pageable pageable) {
|
||||
// The pure-text RELEVANCE fast path is handled by the caller (searchDocuments)
|
||||
// before findAllMatchingIdsByFts runs, so it never reaches here (ADR-008).
|
||||
Specification<Document> spec = buildSearchSpec(hasText, rankedIds, filters);
|
||||
String text = filters.text();
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator);
|
||||
|
||||
// SENDER and RECEIVER sorts load the full match set and slice in-memory.
|
||||
// JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
|
||||
@@ -829,12 +692,12 @@ public class DocumentService {
|
||||
return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
|
||||
}
|
||||
|
||||
private static boolean isPureTextRelevance(boolean hasText, DocumentSort sort, SearchFilters filters) {
|
||||
private static boolean isPureTextRelevance(boolean hasText, DocumentSort sort,
|
||||
LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status) {
|
||||
return hasText && (sort == null || sort == DocumentSort.RELEVANCE)
|
||||
&& filters.from() == null && filters.to() == null
|
||||
&& filters.sender() == null && filters.receiver() == null
|
||||
&& (filters.tags() == null || filters.tags().isEmpty())
|
||||
&& (filters.tagQ() == null || filters.tagQ().isBlank()) && filters.status() == null;
|
||||
&& from == null && to == null && sender == null && receiver == null
|
||||
&& (tags == null || tags.isEmpty()) && (tagQ == null || tagQ.isBlank()) && status == null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -872,7 +735,7 @@ public class DocumentService {
|
||||
return DocumentSearchResult.paged(enrichItems(slice, text), pageable, totalElements);
|
||||
}
|
||||
|
||||
private List<DocumentListItem> enrichItems(List<Document> documents, String text) {
|
||||
private List<DocumentSearchItem> enrichItems(List<Document> documents, String text) {
|
||||
List<Document> colorResolved = resolveDocumentTagColors(documents);
|
||||
Map<UUID, SearchMatchData> matchData = enrichWithMatchData(colorResolved, text);
|
||||
|
||||
@@ -880,7 +743,7 @@ public class DocumentService {
|
||||
Map<UUID, Integer> completionByDoc = fetchCompletionPercentages(docIds);
|
||||
Map<UUID, List<ActivityActorDTO>> contributorsByDoc = auditLogQueryService.findRecentContributorsPerDocument(docIds);
|
||||
|
||||
return colorResolved.stream().map(doc -> toListItem(
|
||||
return colorResolved.stream().map(doc -> new DocumentSearchItem(
|
||||
doc,
|
||||
matchData.getOrDefault(doc.getId(), SearchMatchData.empty()),
|
||||
completionByDoc.getOrDefault(doc.getId(), 0),
|
||||
@@ -888,30 +751,6 @@ public class DocumentService {
|
||||
)).toList();
|
||||
}
|
||||
|
||||
private DocumentListItem toListItem(Document doc, SearchMatchData match, int completionPct, List<ActivityActorDTO> contributors) {
|
||||
return new DocumentListItem(
|
||||
doc.getId(),
|
||||
doc.getTitle(),
|
||||
doc.getOriginalFilename(),
|
||||
doc.getThumbnailUrl(),
|
||||
doc.getDocumentDate(),
|
||||
doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(),
|
||||
doc.getSender(),
|
||||
List.copyOf(doc.getReceivers()),
|
||||
List.copyOf(doc.getTags()),
|
||||
doc.getArchiveBox(),
|
||||
doc.getArchiveFolder(),
|
||||
doc.getLocation(),
|
||||
doc.getSummary(),
|
||||
completionPct,
|
||||
contributors,
|
||||
match,
|
||||
doc.getCreatedAt(),
|
||||
doc.getUpdatedAt()
|
||||
);
|
||||
}
|
||||
|
||||
private Map<UUID, Integer> fetchCompletionPercentages(List<UUID> docIds) {
|
||||
return transcriptionBlockQueryService.getCompletionStats(docIds);
|
||||
}
|
||||
@@ -919,15 +758,7 @@ public class DocumentService {
|
||||
private Sort resolveSort(DocumentSort sort, String dir) {
|
||||
Sort.Direction direction = "ASC".equalsIgnoreCase(dir) ? Sort.Direction.ASC : Sort.Direction.DESC;
|
||||
if (sort == null || sort == DocumentSort.DATE || sort == DocumentSort.RELEVANCE) {
|
||||
// Undated documents (null documentDate) must order last regardless of
|
||||
// direction — Postgres puts NULLs FIRST on ASC by default, which would
|
||||
// surface the undated pile at the top with no explanation (issue #668).
|
||||
// The title tiebreaker gives a stable total order when every row is
|
||||
// null-dated (the "Nur undatierte" filter), so pagination is deterministic.
|
||||
// title is @Column(nullable=false), so it is always present.
|
||||
return Sort.by(
|
||||
new Sort.Order(direction, "documentDate").nullsLast(),
|
||||
Sort.Order.asc("title"));
|
||||
return Sort.by(direction, "documentDate");
|
||||
}
|
||||
// SENDER and RECEIVER are sorted in-memory before this method is called
|
||||
return switch (sort) {
|
||||
@@ -975,6 +806,22 @@ public class DocumentService {
|
||||
.orElse("");
|
||||
}
|
||||
|
||||
// 2. SPEZIALITÄT: Der Schriftwechsel
|
||||
// Findet alle Briefe ZWISCHEN zwei Personen (egal wer Sender/Empfänger war)
|
||||
public List<Document> getConversation(UUID personA, UUID personB) {
|
||||
|
||||
// Fall 1: A schreibt an B
|
||||
Specification<Document> aToB = Specification.where(hasSender(personA)).and(hasReceiver(personB));
|
||||
|
||||
// Fall 2: B schreibt an A
|
||||
Specification<Document> bToA = Specification.where(hasSender(personB)).and(hasReceiver(personA));
|
||||
|
||||
// Wir wollen (A->B) ODER (B->A)
|
||||
Specification<Document> conversation = aToB.or(bToA);
|
||||
|
||||
return documentRepository.findAll(conversation, Sort.by(Sort.Direction.ASC, "documentDate"));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void updateScriptType(UUID documentId, ScriptType scriptType) {
|
||||
Document doc = getDocumentById(documentId);
|
||||
@@ -996,7 +843,6 @@ public class DocumentService {
|
||||
documentRepository.save(doc);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public Document getDocumentById(UUID id) {
|
||||
Document doc = documentRepository.findById(id)
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.DOCUMENT_NOT_FOUND, "Document not found: " + id));
|
||||
@@ -1004,19 +850,6 @@ public class DocumentService {
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a document for the detail view, additionally flagging whether it has any
|
||||
* transcription to read. Kept separate from {@link #getDocumentById} so the cheap
|
||||
* existence query only runs for the single-document detail endpoint, not for the
|
||||
* many internal callers that never read the flag.
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Document getDocumentDetail(UUID id) {
|
||||
Document doc = getDocumentById(id);
|
||||
doc.setHasTranscription(transcriptionBlockQueryService.hasBlocks(id));
|
||||
return doc;
|
||||
}
|
||||
|
||||
public List<Document> getDocumentsByIds(List<UUID> ids) {
|
||||
return documentRepository.findAllById(ids);
|
||||
}
|
||||
@@ -1033,6 +866,15 @@ public class DocumentService {
|
||||
return documentRepository.findByReceiversId(receiverId);
|
||||
}
|
||||
|
||||
public List<Document> getConversationFiltered(UUID senderId, UUID receiverId, LocalDate from, LocalDate to, Sort sort) {
|
||||
LocalDate dateFrom = (from != null) ? from : LocalDate.parse("0000-01-01");
|
||||
LocalDate dateTo = (to != null) ? to : LocalDate.now();
|
||||
if (receiverId == null) {
|
||||
return documentRepository.findSinglePersonCorrespondence(senderId, dateFrom, dateTo, sort);
|
||||
}
|
||||
return documentRepository.findConversation(senderId, receiverId, dateFrom, dateTo, sort);
|
||||
}
|
||||
|
||||
public long getIncompleteCount() {
|
||||
return documentRepository.countByMetadataCompleteFalse();
|
||||
}
|
||||
@@ -1067,43 +909,6 @@ public class DocumentService {
|
||||
tagService.delete(tagId);
|
||||
}
|
||||
|
||||
/**
|
||||
* One-time cleanup of already-stale auto-titles (#726, FR-003). For every document whose
|
||||
* stored title passes the {@link DocumentTitleBackfillMatcher} overwrite heuristic, rebuilds
|
||||
* the title from the row's current state and persists it only when it actually changed.
|
||||
* Idempotent: a second run rebuilds the same value and saves nothing. Hand-written prose is
|
||||
* left untouched.
|
||||
*
|
||||
* <p>Saves via {@code documentRepository.save} directly — it must NOT route through
|
||||
* {@link #updateDocument} (which versions every write), following the {@link #backfillFileHashes}
|
||||
* precedent: a mechanical rename must not snapshot the whole corpus into {@code document_versions}.
|
||||
*
|
||||
* @return the number of documents whose title was rewritten
|
||||
*/
|
||||
@Transactional
|
||||
public int backfillTitles() {
|
||||
List<Document> docs = documentRepository.findAll();
|
||||
int updated = 0;
|
||||
int skipped = 0;
|
||||
for (Document doc : docs) {
|
||||
if (!DocumentTitleBackfillMatcher.isOverwritable(
|
||||
doc.getTitle(), doc.getOriginalFilename(), doc.getLocation())) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
String rebuilt = documentTitleFactory.build(doc);
|
||||
if (rebuilt.equals(doc.getTitle())) {
|
||||
skipped++; // already correct — keep idempotent, no write
|
||||
continue;
|
||||
}
|
||||
doc.setTitle(rebuilt);
|
||||
documentRepository.save(doc); // direct save, no recordVersion (mechanical rename)
|
||||
updated++;
|
||||
}
|
||||
log.info("Title backfill complete: scanned={} updated={} skipped={}", docs.size(), updated, skipped);
|
||||
return updated;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public int backfillFileHashes() {
|
||||
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
|
||||
|
||||
@@ -55,12 +55,6 @@ public class DocumentSpecifications {
|
||||
return (root, query, cb) -> status == null ? null : cb.equal(root.get("status"), status);
|
||||
}
|
||||
|
||||
// Filtert auf undatierte Dokumente (meta_date IS NULL) — für die "Nur undatierte"-Triage.
|
||||
// false → kein Prädikat (no-op), true → documentDate IS NULL (issue #668).
|
||||
public static Specification<Document> undatedOnly(boolean undated) {
|
||||
return (root, query, cb) -> undated ? cb.isNull(root.get("documentDate")) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filtert nach vorausgeweiteten Tag-ID-Sets mit AND- oder OR-Logik.
|
||||
*
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Heuristic overwrite test for the one-time title backfill (#726, FR-004): decides whether a
|
||||
* STORED title is a machine-generated auto-title (and so may be rebuilt from the row's current
|
||||
* state) versus hand-written prose (left untouched). Used ONLY by the backfill — save-time
|
||||
* regeneration uses an exact old-vs-new comparison instead, with no heuristic.
|
||||
*
|
||||
* <p>A stored title is overwritable iff, after stripping the literal {@code index} prefix:
|
||||
* <ol>
|
||||
* <li>it is exactly {@code {index}}, or</li>
|
||||
* <li>{@code {index} – {dateLabel}} with an optional trailing {@code – {location}} segment
|
||||
* (any location — a present, valid date label is itself strong evidence of a machine
|
||||
* title), or</li>
|
||||
* <li>{@code {index} – {location}} where the segment equals the document's current location
|
||||
* (no date label, so the segment must match the known location to be distinguished from
|
||||
* prose).</li>
|
||||
* </ol>
|
||||
*
|
||||
* <p>Security: the {@code index} is compared <em>literally</em> via {@link String#startsWith}
|
||||
* (never compiled into a regex) because {@code originalFilename} is user-controlled and may carry
|
||||
* regex metacharacters — an unquoted pattern would be a ReDoS / regex-injection vector
|
||||
* (CWE-1333 / CWE-625). The date-label sub-patterns use only bounded, non-nested quantifiers over
|
||||
* short tokens, so there is no catastrophic backtracking. Fail-closed: any null/blank index or
|
||||
* structural surprise returns {@code false}.
|
||||
*/
|
||||
final class DocumentTitleBackfillMatcher {
|
||||
|
||||
private static final String SEPARATOR = " – ";
|
||||
|
||||
// German month tokens derived from the SAME Locale.GERMAN formatters DocumentTitleFormatter
|
||||
// uses, so the matcher's accepted spellings cannot drift from what the factory emits (full
|
||||
// names "Januar"…"Dezember"; abbreviations "Jan."…"Dez." — note May/June/July/März carry no
|
||||
// period). Pattern.quote each so a "." in an abbreviation is literal, never a wildcard.
|
||||
private static final String FULL_MONTH = monthAlternation("MMMM");
|
||||
private static final String ABBR_MONTH = monthAlternation("MMM");
|
||||
private static final String SEASON = "(?:Frühling|Sommer|Herbst|Winter)";
|
||||
private static final String YEAR = "\\d{1,4}";
|
||||
private static final String DAY_NUM = "\\d{1,2}";
|
||||
|
||||
// One complete date label, anchored, optionally followed by a free-form trailing location
|
||||
// segment. Only bounded/non-nested quantifiers over short tokens plus a single trailing
|
||||
// ".+" → linear, no catastrophic backtracking (FR-004 ReDoS guard).
|
||||
private static final Pattern DATE_LABEL_WITH_OPTIONAL_LOCATION = Pattern.compile(
|
||||
"^(?:" + String.join("|",
|
||||
YEAR, // 1916
|
||||
"ca\\. " + YEAR, // ca. 1920
|
||||
FULL_MONTH + " " + YEAR, // Juni 1916
|
||||
DAY_NUM + "\\. " + FULL_MONTH + " " + YEAR, // 24. Dezember 1943
|
||||
SEASON + " " + YEAR, // Sommer 1916
|
||||
"Datum unbekannt",
|
||||
DAY_NUM + "\\.–" + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10.–11. Jan. 1917
|
||||
DAY_NUM + "\\. " + ABBR_MONTH + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Jan. – 2. Feb. 1917
|
||||
DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Dez. 1916 – 2. Jan. 1917
|
||||
DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10. Jan. 1917 (range end == start)
|
||||
"ab " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR) // ab 10. Jan. 1917
|
||||
+ ")(?: – .+)?$");
|
||||
|
||||
private DocumentTitleBackfillMatcher() {
|
||||
}
|
||||
|
||||
static boolean isOverwritable(String title, String index, String location) {
|
||||
if (title == null || index == null || index.isBlank()) {
|
||||
return false; // fail closed
|
||||
}
|
||||
if (!title.startsWith(index)) {
|
||||
return false; // index is matched LITERALLY, never as a regex
|
||||
}
|
||||
String tail = title.substring(index.length());
|
||||
if (tail.isEmpty()) {
|
||||
return true; // exactly {index}
|
||||
}
|
||||
if (!tail.startsWith(SEPARATOR)) {
|
||||
return false;
|
||||
}
|
||||
String body = tail.substring(SEPARATOR.length());
|
||||
if (DATE_LABEL_WITH_OPTIONAL_LOCATION.matcher(body).matches()) {
|
||||
return true; // {dateLabel} (+ optional trailing location)
|
||||
}
|
||||
// No date label: the lone segment must equal the document's current location to be
|
||||
// distinguished from hand-written prose.
|
||||
return location != null && !location.isBlank() && body.equals(location);
|
||||
}
|
||||
|
||||
private static String monthAlternation(String pattern) {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern, Locale.GERMAN);
|
||||
Set<String> tokens = new LinkedHashSet<>();
|
||||
for (int month = 1; month <= 12; month++) {
|
||||
tokens.add(formatter.format(LocalDate.of(2000, month, 15)));
|
||||
}
|
||||
return tokens.stream().map(Pattern::quote).collect(Collectors.joining("|", "(?:", ")"));
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* Single source of truth for the auto-generated document title
|
||||
* {@code {index} – {dateLabel} – {location}}.
|
||||
*
|
||||
* <p>The {@code document} package owns this formula; {@code importing} consumes it
|
||||
* (see ADR for issue #726). The leading {@code index} is the document's
|
||||
* {@code originalFilename}; the date label is the honest German label produced by
|
||||
* {@link DocumentTitleFormatter} (the Java half of the #666 date-label split); the
|
||||
* trailing location is the {@code meta_location} verbatim, omitted when blank.
|
||||
*/
|
||||
@Component
|
||||
public class DocumentTitleFactory {
|
||||
|
||||
static final String SEPARATOR = " – ";
|
||||
|
||||
/**
|
||||
* Composes the auto-title from the document's current state. The date segment is
|
||||
* dropped for UNKNOWN precision or a null date (the honest "no date" case); the
|
||||
* location segment is dropped when blank.
|
||||
*/
|
||||
public String build(Document doc) {
|
||||
// originalFilename is NOT NULL in production; guard only so a synthetic/partial entity
|
||||
// never trips StringBuilder(null) with an opaque NPE.
|
||||
StringBuilder title = new StringBuilder(doc.getOriginalFilename() == null ? "" : doc.getOriginalFilename());
|
||||
if (doc.getDocumentDate() != null && doc.getMetaDatePrecision() != DatePrecision.UNKNOWN) {
|
||||
title.append(SEPARATOR).append(DocumentTitleFormatter.formatTitleDate(
|
||||
doc.getDocumentDate(), doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(), doc.getMetaDateRaw()));
|
||||
}
|
||||
if (doc.getLocation() != null && !doc.getLocation().isBlank()) {
|
||||
title.append(SEPARATOR).append(doc.getLocation());
|
||||
}
|
||||
return title.toString();
|
||||
}
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Produces the honest German date label baked into an import title — at exactly
|
||||
* the precision the data claims, never finer. This is the Java half of the
|
||||
* single source of truth shared with the frontend {@code formatDocumentDate}
|
||||
* (TypeScript): both are asserted against {@code docs/date-label-fixtures.json}
|
||||
* so the two implementations cannot drift (see #666).
|
||||
*
|
||||
* <p>Import titles are always German, so the labels here are the German
|
||||
* canonical form (mirroring the {@code de} Paraglide messages used by the UI).
|
||||
*/
|
||||
final class DocumentTitleFormatter {
|
||||
|
||||
private static final DateTimeFormatter LONG = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
||||
private static final DateTimeFormatter MONTH_YEAR = DateTimeFormatter.ofPattern("MMMM yyyy", Locale.GERMAN);
|
||||
private static final DateTimeFormatter MEDIUM = DateTimeFormatter.ofPattern("d. MMM yyyy", Locale.GERMAN);
|
||||
private static final DateTimeFormatter DAY_MONTH = DateTimeFormatter.ofPattern("d. MMM", Locale.GERMAN);
|
||||
|
||||
private static final String UNKNOWN = "Datum unbekannt";
|
||||
private static final String APPROX_PREFIX = "ca.";
|
||||
private static final String OPEN_RANGE_PREFIX = "ab";
|
||||
|
||||
private DocumentTitleFormatter() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param date the sort/filter anchor day; null for UNKNOWN rows
|
||||
* @param precision descriptive precision metadata
|
||||
* @param end the RANGE end day; null means an open-ended range
|
||||
* @param raw the verbatim spreadsheet cell, used only to pick a season word
|
||||
* @return the honest German label
|
||||
*/
|
||||
static String formatTitleDate(LocalDate date, DatePrecision precision, LocalDate end, String raw) {
|
||||
if (precision == DatePrecision.UNKNOWN || date == null) {
|
||||
return UNKNOWN;
|
||||
}
|
||||
return switch (precision) {
|
||||
case DAY -> LONG.format(date);
|
||||
case MONTH -> MONTH_YEAR.format(date);
|
||||
case SEASON -> seasonLabel(date, raw);
|
||||
case YEAR -> String.valueOf(date.getYear());
|
||||
case APPROX -> APPROX_PREFIX + " " + date.getYear();
|
||||
case RANGE -> rangeLabel(date, end);
|
||||
case UNKNOWN -> UNKNOWN;
|
||||
};
|
||||
}
|
||||
|
||||
private static String seasonLabel(LocalDate date, String raw) {
|
||||
Season season = seasonFromRaw(raw);
|
||||
if (season == null) {
|
||||
season = seasonOfMonth(date.getMonthValue());
|
||||
}
|
||||
return season.german + " " + date.getYear();
|
||||
}
|
||||
|
||||
private static String rangeLabel(LocalDate start, LocalDate end) {
|
||||
if (end == null) {
|
||||
return OPEN_RANGE_PREFIX + " " + MEDIUM.format(start);
|
||||
}
|
||||
if (end.equals(start)) {
|
||||
return MEDIUM.format(start);
|
||||
}
|
||||
if (start.getYear() != end.getYear()) {
|
||||
return MEDIUM.format(start) + " – " + MEDIUM.format(end);
|
||||
}
|
||||
if (start.getMonthValue() == end.getMonthValue()) {
|
||||
return start.getDayOfMonth() + ".–" + MEDIUM.format(end);
|
||||
}
|
||||
return DAY_MONTH.format(start) + " – " + MEDIUM.format(end);
|
||||
}
|
||||
|
||||
// ─── season mapping — mirrors the normalizer's representative months ─────────────
|
||||
|
||||
private enum Season {
|
||||
SPRING("Frühling"),
|
||||
SUMMER("Sommer"),
|
||||
AUTUMN("Herbst"),
|
||||
WINTER("Winter");
|
||||
|
||||
private final String german;
|
||||
|
||||
Season(String german) {
|
||||
this.german = german;
|
||||
}
|
||||
}
|
||||
|
||||
private static Season seasonOfMonth(int month) {
|
||||
if (month >= 3 && month <= 5) return Season.SPRING;
|
||||
if (month >= 6 && month <= 8) return Season.SUMMER;
|
||||
if (month >= 9 && month <= 11) return Season.AUTUMN;
|
||||
return Season.WINTER;
|
||||
}
|
||||
|
||||
private static Season seasonFromRaw(String raw) {
|
||||
if (raw == null || raw.isBlank()) return null;
|
||||
String token = raw.trim().split("\\s+")[0].toLowerCase(Locale.GERMAN);
|
||||
return switch (token) {
|
||||
case "frühling", "frühjahr" -> Season.SPRING;
|
||||
case "sommer" -> Season.SUMMER;
|
||||
case "herbst" -> Season.AUTUMN;
|
||||
case "winter" -> Season.WINTER;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -11,11 +11,6 @@ import org.raddatz.familienarchiv.ocr.ScriptType;
|
||||
public class DocumentUpdateDTO {
|
||||
private String title;
|
||||
private LocalDate documentDate;
|
||||
private DatePrecision metaDatePrecision;
|
||||
private LocalDate metaDateEnd;
|
||||
private String metaDateRaw;
|
||||
private String senderText;
|
||||
private String receiverText;
|
||||
private String location;
|
||||
private String documentLocation;
|
||||
private String archiveBox;
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.raddatz.familienarchiv.tag.TagOperator;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* The filter predicates honoured by {@link DocumentService#searchDocuments} and
|
||||
* {@link DocumentService#findIdsForFilter}. Sort, direction, and pagination are
|
||||
* deliberately excluded — they are not filter predicates, and {@code findIdsForFilter}
|
||||
* needs none of them; they are passed as separate arguments instead.
|
||||
*
|
||||
* Kept as a record so the ten values are passed as one named bundle instead of a
|
||||
* positional argument list where two UUIDs (sender vs. receiver) or two dates
|
||||
* (from vs. to) can be swapped by accident at the call site — a transposition that
|
||||
* compiles cleanly and silently returns the wrong rows.
|
||||
*
|
||||
* Sibling of {@link DensityFilters} (= these fields minus from/to/undated); kept
|
||||
* separate on purpose, so the density call path never reasons about date/undated
|
||||
* fields it deliberately excludes.
|
||||
*/
|
||||
public record SearchFilters(
|
||||
String text,
|
||||
LocalDate from,
|
||||
LocalDate to,
|
||||
UUID sender,
|
||||
UUID receiver,
|
||||
List<String> tags,
|
||||
String tagQ,
|
||||
DocumentStatus status,
|
||||
TagOperator tagOperator,
|
||||
boolean undated) {
|
||||
|
||||
/** Returns a copy with {@code undated} overridden — used by the undated-count path. */
|
||||
public SearchFilters withUndated(boolean undated) {
|
||||
return new SearchFilters(text, from, to, sender, receiver, tags, tagQ, status, tagOperator, undated);
|
||||
}
|
||||
}
|
||||
@@ -43,7 +43,7 @@ public class TranscriptionBlockController {
|
||||
|
||||
@PostMapping
|
||||
@ResponseStatus(HttpStatus.CREATED)
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock createBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@Valid @RequestBody CreateTranscriptionBlockDTO dto,
|
||||
@@ -53,7 +53,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/{blockId}")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock updateBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId,
|
||||
@@ -65,7 +65,7 @@ public class TranscriptionBlockController {
|
||||
|
||||
@DeleteMapping("/{blockId}")
|
||||
@ResponseStatus(HttpStatus.NO_CONTENT)
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public void deleteBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId) {
|
||||
@@ -73,7 +73,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/reorder")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public List<TranscriptionBlock> reorderBlocks(
|
||||
@PathVariable UUID documentId,
|
||||
@RequestBody ReorderTranscriptionBlocksDTO dto) {
|
||||
@@ -82,7 +82,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/{blockId}/review")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock reviewBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId,
|
||||
@@ -92,7 +92,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/review-all")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public List<TranscriptionBlock> markAllBlocksReviewed(
|
||||
@PathVariable UUID documentId,
|
||||
Authentication authentication) {
|
||||
|
||||
@@ -17,10 +17,6 @@ public class TranscriptionBlockQueryService {
|
||||
|
||||
private final TranscriptionBlockRepository blockRepository;
|
||||
|
||||
public boolean hasBlocks(UUID documentId) {
|
||||
return blockRepository.existsByDocumentId(documentId);
|
||||
}
|
||||
|
||||
public Map<UUID, Integer> getCompletionStats(List<UUID> documentIds) {
|
||||
if (documentIds.isEmpty()) return Map.of();
|
||||
Map<UUID, Integer> result = new HashMap<>();
|
||||
|
||||
@@ -43,8 +43,6 @@ public interface TranscriptionBlockRepository extends JpaRepository<Transcriptio
|
||||
|
||||
int countByDocumentId(UUID documentId);
|
||||
|
||||
boolean existsByDocumentId(UUID documentId);
|
||||
|
||||
@Query("""
|
||||
SELECT b FROM TranscriptionBlock b
|
||||
JOIN DocumentAnnotation a ON a.id = b.annotationId
|
||||
|
||||
@@ -10,21 +10,11 @@ public class DomainException extends RuntimeException {
|
||||
|
||||
private final ErrorCode code;
|
||||
private final HttpStatus status;
|
||||
/** Seconds until the rate-limit window resets; {@code null} when not applicable. */
|
||||
private final Long retryAfterSeconds;
|
||||
|
||||
public DomainException(ErrorCode code, HttpStatus status, String developerMessage) {
|
||||
super(developerMessage);
|
||||
this.code = code;
|
||||
this.status = status;
|
||||
this.retryAfterSeconds = null;
|
||||
}
|
||||
|
||||
private DomainException(ErrorCode code, HttpStatus status, String developerMessage, Long retryAfterSeconds) {
|
||||
super(developerMessage);
|
||||
this.code = code;
|
||||
this.status = status;
|
||||
this.retryAfterSeconds = retryAfterSeconds;
|
||||
}
|
||||
|
||||
public ErrorCode getCode() {
|
||||
@@ -35,11 +25,6 @@ public class DomainException extends RuntimeException {
|
||||
return status;
|
||||
}
|
||||
|
||||
/** Returns the {@code Retry-After} value in seconds, or {@code null} if not set. */
|
||||
public Long getRetryAfterSeconds() {
|
||||
return retryAfterSeconds;
|
||||
}
|
||||
|
||||
// --- Static factories for common cases ---
|
||||
|
||||
public static DomainException notFound(ErrorCode code, String message) {
|
||||
@@ -54,11 +39,6 @@ public class DomainException extends RuntimeException {
|
||||
return new DomainException(ErrorCode.UNAUTHORIZED, HttpStatus.UNAUTHORIZED, message);
|
||||
}
|
||||
|
||||
public static DomainException invalidCredentials() {
|
||||
return new DomainException(ErrorCode.INVALID_CREDENTIALS, HttpStatus.UNAUTHORIZED,
|
||||
"Invalid email or password");
|
||||
}
|
||||
|
||||
public static DomainException conflict(ErrorCode code, String message) {
|
||||
return new DomainException(code, HttpStatus.CONFLICT, message);
|
||||
}
|
||||
@@ -70,12 +50,4 @@ public class DomainException extends RuntimeException {
|
||||
public static DomainException internal(ErrorCode code, String message) {
|
||||
return new DomainException(code, HttpStatus.INTERNAL_SERVER_ERROR, message);
|
||||
}
|
||||
|
||||
public static DomainException tooManyRequests(ErrorCode code, String message) {
|
||||
return new DomainException(code, HttpStatus.TOO_MANY_REQUESTS, message);
|
||||
}
|
||||
|
||||
public static DomainException tooManyRequests(ErrorCode code, String message, long retryAfterSeconds) {
|
||||
return new DomainException(code, HttpStatus.TOO_MANY_REQUESTS, message, retryAfterSeconds);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,14 +26,10 @@ public enum ErrorCode {
|
||||
FILE_UPLOAD_FAILED,
|
||||
/** The uploaded file's content type is not supported (PDF/JPEG/PNG/TIFF only). 400 */
|
||||
UNSUPPORTED_FILE_TYPE,
|
||||
/** A RANGE date is invalid: meta_date_end is before meta_date, or an end date is set without RANGE precision. 400 */
|
||||
INVALID_DATE_RANGE,
|
||||
|
||||
// --- Users ---
|
||||
/** A user with the given ID or username does not exist. 404 */
|
||||
USER_NOT_FOUND,
|
||||
/** A group with the given ID does not exist. 404 */
|
||||
GROUP_NOT_FOUND,
|
||||
/** The supplied email address is already used by another account. 409 */
|
||||
EMAIL_ALREADY_IN_USE,
|
||||
/** The supplied current password does not match the stored hash. 400 */
|
||||
@@ -42,8 +38,6 @@ public enum ErrorCode {
|
||||
// --- Import ---
|
||||
/** A mass import is already in progress; only one can run at a time. 409 */
|
||||
IMPORT_ALREADY_RUNNING,
|
||||
/** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
|
||||
IMPORT_ARTIFACT_INVALID,
|
||||
|
||||
// --- Thumbnails ---
|
||||
/** A thumbnail backfill is already in progress; only one can run at a time. 409 */
|
||||
@@ -58,24 +52,14 @@ public enum ErrorCode {
|
||||
INVITE_REVOKED,
|
||||
/** The invite has passed its expiry date. 410 */
|
||||
INVITE_EXPIRED,
|
||||
/** A group cannot be deleted because one or more active invites reference it. 409 */
|
||||
GROUP_HAS_ACTIVE_INVITES,
|
||||
|
||||
// --- Auth ---
|
||||
/** The request is not authenticated. 401 */
|
||||
UNAUTHORIZED,
|
||||
/** The authenticated user lacks the required permission. 403 */
|
||||
FORBIDDEN,
|
||||
/** The supplied email/password combination does not match any active account. 401 */
|
||||
INVALID_CREDENTIALS,
|
||||
/** The session has expired or been invalidated. 401 */
|
||||
SESSION_EXPIRED,
|
||||
/** The password-reset token is missing, expired, or already used. 400 */
|
||||
INVALID_RESET_TOKEN,
|
||||
/** CSRF token is missing or does not match the expected value. 403 */
|
||||
CSRF_TOKEN_MISSING,
|
||||
/** The login rate limit has been exceeded for this IP/email combination. 429 */
|
||||
TOO_MANY_LOGIN_ATTEMPTS,
|
||||
|
||||
// --- Annotations ---
|
||||
/** The annotation with the given ID does not exist. 404 */
|
||||
|
||||
@@ -2,11 +2,9 @@ package org.raddatz.familienarchiv.exception;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import io.sentry.Sentry;
|
||||
import jakarta.validation.ConstraintViolationException;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.dao.DataIntegrityViolationException;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.http.converter.HttpMessageNotReadableException;
|
||||
import org.springframework.web.bind.MethodArgumentNotValidException;
|
||||
@@ -24,11 +22,9 @@ public class GlobalExceptionHandler {
|
||||
|
||||
@ExceptionHandler(DomainException.class)
|
||||
public ResponseEntity<ErrorResponse> handleDomain(DomainException ex) {
|
||||
var builder = ResponseEntity.status(ex.getStatus());
|
||||
if (ex.getRetryAfterSeconds() != null) {
|
||||
builder = builder.header("Retry-After", String.valueOf(ex.getRetryAfterSeconds()));
|
||||
}
|
||||
return builder.body(new ErrorResponse(ex.getCode(), ex.getMessage()));
|
||||
return ResponseEntity
|
||||
.status(ex.getStatus())
|
||||
.body(new ErrorResponse(ex.getCode(), ex.getMessage()));
|
||||
}
|
||||
|
||||
@ExceptionHandler(MethodArgumentNotValidException.class)
|
||||
@@ -65,41 +61,8 @@ public class GlobalExceptionHandler {
|
||||
.body(new ErrorResponse(ErrorCode.VALIDATION_ERROR, ex.getReason()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Backstop for any database integrity violation that slips past the explicit upstream
|
||||
* guards (e.g. a future constraint, or the import path emitting a bad range). Turns it into
|
||||
* a clean 400 instead of a 500 + Sentry alert. The known date-range cases are caught upstream
|
||||
* and never reach here; this only catches the unanticipated ones — so it logs the constraint
|
||||
* NAME at WARN to stay debuggable, without re-leaking SQL and without branching the response
|
||||
* on it (the response stays generic, which is the non-brittle part).
|
||||
*/
|
||||
@ExceptionHandler(DataIntegrityViolationException.class)
|
||||
public ResponseEntity<ErrorResponse> handleDataIntegrityViolation(DataIntegrityViolationException ex) {
|
||||
// Log the constraint NAME only — schema metadata, safe for Loki, and enough to tell which
|
||||
// constraint fired at 2am. Never pass `ex` / `ex.getMessage()`: those embed the SQL + the
|
||||
// offending values (CWE-209). No Sentry: an integrity violation is a 400, not a system fault.
|
||||
log.warn("Rejected a request that violated a database integrity constraint: {}", constraintNameOf(ex));
|
||||
return ResponseEntity.badRequest()
|
||||
.body(new ErrorResponse(ErrorCode.VALIDATION_ERROR, "The submitted data violated a database constraint"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the offending constraint's name from the cause chain, or {@code "unknown"}.
|
||||
* Reads only the name (a non-sensitive schema identifier) — never the SQL or the values.
|
||||
*/
|
||||
private static String constraintNameOf(Throwable ex) {
|
||||
for (Throwable t = ex; t != null && t != t.getCause(); t = t.getCause()) {
|
||||
if (t instanceof org.hibernate.exception.ConstraintViolationException cve
|
||||
&& cve.getConstraintName() != null) {
|
||||
return cve.getConstraintName();
|
||||
}
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
@ExceptionHandler(Exception.class)
|
||||
public ResponseEntity<ErrorResponse> handleGeneric(Exception ex) {
|
||||
Sentry.captureException(ex);
|
||||
log.error("Unhandled exception", ex);
|
||||
return ResponseEntity.internalServerError()
|
||||
.body(new ErrorResponse(ErrorCode.INTERNAL_ERROR, "An unexpected error occurred"));
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationType;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationshipService;
|
||||
import org.raddatz.familienarchiv.person.relationship.dto.NetworkDTO;
|
||||
import org.raddatz.familienarchiv.person.relationship.dto.PersonNodeDTO;
|
||||
import org.raddatz.familienarchiv.person.relationship.dto.RelationshipDTO;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Runs the four canonical loaders in their real dependency order — encoded explicitly
|
||||
* here, not implied by call order — and owns the async runner plus the {@link ImportStatus}
|
||||
* state machine the admin UI consumes. The orchestrator smoke-checks that all four
|
||||
* artifacts are present before starting, failing fast rather than half-loading tags but no
|
||||
* documents. A malformed artifact (a loader throwing) sets {@code FAILED}; an individual
|
||||
* bad file is surfaced through the {@link ImportStatus.SkippedFile} mechanism instead.
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class CanonicalImportOrchestrator {
|
||||
|
||||
private static final String TAG_TREE_ARTIFACT = "canonical-tag-tree.xlsx";
|
||||
private static final String PERSONS_ARTIFACT = "canonical-persons.xlsx";
|
||||
private static final String PERSONS_TREE_ARTIFACT = "canonical-persons-tree.json";
|
||||
private static final String DOCUMENTS_ARTIFACT = "canonical-documents.xlsx";
|
||||
|
||||
private final TagTreeImporter tagTreeImporter;
|
||||
private final PersonRegisterImporter personRegisterImporter;
|
||||
private final PersonTreeImporter personTreeImporter;
|
||||
private final DocumentImporter documentImporter;
|
||||
private final RelationshipService relationshipService;
|
||||
|
||||
@Value("${app.import.dir:/import}")
|
||||
private String canonicalDir;
|
||||
|
||||
private volatile ImportStatus currentStatus = new ImportStatus(
|
||||
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||
|
||||
public ImportStatus getStatus() {
|
||||
return currentStatus;
|
||||
}
|
||||
|
||||
@Async
|
||||
public void runImportAsync() {
|
||||
if (currentStatus.state() == ImportStatus.State.RUNNING) {
|
||||
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
||||
}
|
||||
runImport();
|
||||
}
|
||||
|
||||
/** Synchronous entry point — wrapped by {@link #runImportAsync()} and called directly in tests. */
|
||||
void runImport() {
|
||||
currentStatus = new ImportStatus(ImportStatus.State.RUNNING, "IMPORT_RUNNING",
|
||||
"Import läuft...", 0, List.of(), LocalDateTime.now());
|
||||
try {
|
||||
File tagTree = requireArtifact(TAG_TREE_ARTIFACT);
|
||||
File persons = requireArtifact(PERSONS_ARTIFACT);
|
||||
File personsTree = requireArtifact(PERSONS_TREE_ARTIFACT);
|
||||
File documents = requireArtifact(DOCUMENTS_ARTIFACT);
|
||||
|
||||
// Dependency DAG: documents need persons + tags; the tree needs persons.
|
||||
tagTreeImporter.load(tagTree);
|
||||
personRegisterImporter.load(persons);
|
||||
personTreeImporter.load(personsTree);
|
||||
warnOnGenerationMonotonicityViolations();
|
||||
DocumentImporter.LoadResult result = documentImporter.load(documents);
|
||||
|
||||
currentStatus = new ImportStatus(ImportStatus.State.DONE, "IMPORT_DONE",
|
||||
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
|
||||
result.processed(), result.skippedFiles(), currentStatus.startedAt());
|
||||
} catch (DomainException e) {
|
||||
log.error("Canonical import failed: {}", e.getMessage());
|
||||
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_ARTIFACT",
|
||||
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
||||
} catch (Exception e) {
|
||||
log.error("Canonical import failed", e);
|
||||
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_INTERNAL",
|
||||
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
||||
}
|
||||
}
|
||||
|
||||
private File requireArtifact(String name) {
|
||||
File artifact = new File(canonicalDir, name);
|
||||
if (!artifact.isFile()) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Missing canonical artifact: " + name);
|
||||
}
|
||||
return artifact;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks every PARENT_OF edge in the family graph and logs a WARN whenever a child's
|
||||
* generation is not strictly deeper than its parent's. Soft check only — the import
|
||||
* is never aborted; the warning is a forensic signal for the curator. Reads through
|
||||
* {@link RelationshipService} so the orchestrator stays within the layering rule
|
||||
* (no direct repository access).
|
||||
*/
|
||||
private void warnOnGenerationMonotonicityViolations() {
|
||||
NetworkDTO network = relationshipService.getFamilyNetwork();
|
||||
Map<UUID, PersonNodeDTO> byId = new HashMap<>(network.nodes().size());
|
||||
for (PersonNodeDTO node : network.nodes()) {
|
||||
byId.put(node.id(), node);
|
||||
}
|
||||
for (RelationshipDTO edge : network.edges()) {
|
||||
if (edge.relationType() != RelationType.PARENT_OF) continue;
|
||||
PersonNodeDTO parent = byId.get(edge.personId());
|
||||
PersonNodeDTO child = byId.get(edge.relatedPersonId());
|
||||
if (parent == null || child == null) continue;
|
||||
Integer pg = parent.generation();
|
||||
Integer cg = child.generation();
|
||||
if (pg != null && cg != null && cg <= pg) {
|
||||
log.warn("Generation monotonicity violation: parent {} (G{}) -> child {} (G{})",
|
||||
parent.displayName(), pg, child.displayName(), cg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.DateUtil;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Value-level POI helper for the canonical import artifacts. No Spring, no domain
|
||||
* knowledge: it opens a workbook, maps the header row to column indices by name, and
|
||||
* yields typed rows whose cells are looked up by header name — the seam that replaces
|
||||
* the old positional {@code @Value app.import.col.*} indices. List columns are split on
|
||||
* the pipe delimiter the normalizer emits.
|
||||
*/
|
||||
public final class CanonicalSheetReader {
|
||||
|
||||
private CanonicalSheetReader() {
|
||||
}
|
||||
|
||||
/** A single data row, addressable by canonical header name (never by index). */
|
||||
public static final class Row {
|
||||
|
||||
private final Map<String, Integer> headerIndex;
|
||||
private final List<String> cells;
|
||||
|
||||
private Row(Map<String, Integer> headerIndex, List<String> cells) {
|
||||
this.headerIndex = headerIndex;
|
||||
this.cells = cells;
|
||||
}
|
||||
|
||||
/** Trimmed cell value for the named header, or "" when absent/blank. */
|
||||
public String get(String header) {
|
||||
Integer index = headerIndex.get(header);
|
||||
if (index == null || index >= cells.size()) return "";
|
||||
String value = cells.get(index);
|
||||
return value == null ? "" : value.trim();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all data rows from the first sheet, validating that every required header is
|
||||
* present. Throws a fail-closed {@link DomainException} on a missing header so a
|
||||
* loader never silently maps the wrong column.
|
||||
*/
|
||||
public static List<Row> readRows(File file, List<String> requiredHeaders) {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
||||
|
||||
Sheet sheet = workbook.getSheetAt(0);
|
||||
org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
|
||||
Map<String, Integer> headerIndex = mapHeaders(headerRow);
|
||||
requireHeaders(file, headerIndex, requiredHeaders);
|
||||
|
||||
List<Row> rows = new ArrayList<>();
|
||||
for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
|
||||
org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
|
||||
if (poiRow == null) continue;
|
||||
rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
|
||||
}
|
||||
return rows;
|
||||
} catch (DomainException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Unreadable canonical artifact: " + file.getName());
|
||||
}
|
||||
}
|
||||
|
||||
/** Splits a pipe-delimited list column into trimmed, non-empty segments. */
|
||||
public static List<String> splitList(String raw) {
|
||||
if (raw == null || raw.isBlank()) return List.of();
|
||||
return Arrays.stream(raw.split("\\|"))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.toList();
|
||||
}
|
||||
|
||||
private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
|
||||
if (headerRow == null) {
|
||||
return Map.of();
|
||||
}
|
||||
Map<String, Integer> headerIndex = new HashMap<>();
|
||||
for (int c = 0; c < headerRow.getLastCellNum(); c++) {
|
||||
String name = cellToString(headerRow.getCell(c)).trim();
|
||||
if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
|
||||
}
|
||||
return headerIndex;
|
||||
}
|
||||
|
||||
private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
|
||||
for (String header : requiredHeaders) {
|
||||
if (!headerIndex.containsKey(header)) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Missing required header '" + header + "' in artifact " + file.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
|
||||
int width = Math.max(columnCount, poiRow.getLastCellNum());
|
||||
List<String> cells = new ArrayList<>(width);
|
||||
for (int c = 0; c < width; c++) {
|
||||
cells.add(cellToString(poiRow.getCell(c)));
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
private static String cellToString(Cell cell) {
|
||||
if (cell == null) return "";
|
||||
return switch (cell.getCellType()) {
|
||||
case STRING -> cell.getStringCellValue();
|
||||
case NUMERIC -> {
|
||||
if (DateUtil.isCellDateFormatted(cell)) {
|
||||
yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
|
||||
}
|
||||
yield String.valueOf((long) cell.getNumericCellValue());
|
||||
}
|
||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
||||
default -> "";
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,380 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentTitleFactory;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
||||
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-documents.xlsx} into the document domain. Java performs no
|
||||
* semantic transformation: the normalizer already resolved people to slugs and dates to
|
||||
* ISO values. This loader maps columns by header name, routes each attribution
|
||||
* register-first (always retaining the raw cell in {@code sender_text}/{@code receiver_text}),
|
||||
* parses clean dates, and keeps the S3/thumbnail plumbing.
|
||||
*
|
||||
* <p>The import corpus is uniform — every PDF is named {@code <index>.pdf} flat in the import
|
||||
* dir — so a document's PDF is resolved <em>directly by its index</em>:
|
||||
* {@code importDir.resolve(index + ".pdf")}. The {@code index} is still hostile input
|
||||
* regardless of upstream trust (CWE-22 does not care it came from our Python tool): it is
|
||||
* validated against a strict catalog pattern with {@link #isValidImportIndex} (no path
|
||||
* separators, no {@code .}/{@code ..}, no absolute path, no slash homoglyphs) and the
|
||||
* resolved path is asserted to stay inside the import dir in {@link #resolvePdfByIndex} as
|
||||
* defense-in-depth. The {@code %PDF} magic-byte check still gates upload.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DocumentImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of(
|
||||
"index", "sender_person_id", "sender_name",
|
||||
"receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision");
|
||||
|
||||
// Catalog index shape: 1–4 letters (ASCII + Latin-1 letters, e.g. the German "ü" in
|
||||
// "Mü-0001"), one or more hyphens (the corpus has a few "C--0029" data-entry artefacts),
|
||||
// digits, and an optional trailing "x" the normalizer recognises. Anchored, with no
|
||||
// separator / dot / slash characters in the class, so "<index>.pdf" can never traverse.
|
||||
// NOTE: `\d` here is intentionally ASCII-only ([0-9]). Java's java.util.regex matches `\d`
|
||||
// against [0-9] unless Pattern.UNICODE_CHARACTER_CLASS is set — do NOT add that flag, or
|
||||
// Arabic-Indic / fullwidth digits would silently widen the accepted set.
|
||||
private static final Pattern INDEX_PATTERN =
|
||||
Pattern.compile("[A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF]{1,4}-+\\d+x?");
|
||||
|
||||
private final DocumentService documentService;
|
||||
private final DocumentTitleFactory documentTitleFactory;
|
||||
private final PersonService personService;
|
||||
private final TagService tagService;
|
||||
private final S3Client s3Client;
|
||||
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
private final FileStreamOpener fileStreamOpener;
|
||||
|
||||
@Value("${app.s3.bucket:familienarchiv}")
|
||||
private String bucketName;
|
||||
|
||||
@Value("${app.import.dir:/import}")
|
||||
private String importDir;
|
||||
|
||||
/** Outcome of loading the document sheet: processed count + per-file skips. */
|
||||
public record LoadResult(int processed, List<ImportStatus.SkippedFile> skippedFiles) {}
|
||||
|
||||
// One transaction for the whole sheet keeps the Hibernate session open so an existing
|
||||
// document's lazy receivers collection initialises during an idempotent re-import.
|
||||
// Invoked cross-bean from the orchestrator, so the @Transactional proxy applies.
|
||||
@Transactional
|
||||
public LoadResult load(File artifact) {
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
|
||||
int processed = 0;
|
||||
List<ImportStatus.SkippedFile> skipped = new ArrayList<>();
|
||||
// 1-based source row number for ops triage breadcrumbs (the spreadsheet header is row 1,
|
||||
// so the first data row is row 2 — matches what an operator sees in the .xlsx).
|
||||
int rowNumber = 1;
|
||||
for (CanonicalSheetReader.Row row : rows) {
|
||||
rowNumber++;
|
||||
String index = row.get("index");
|
||||
if (index.isBlank()) continue;
|
||||
Optional<ImportStatus.SkipReason> skipReason = importRow(row, index, rowNumber);
|
||||
if (skipReason.isPresent()) {
|
||||
skipped.add(new ImportStatus.SkippedFile(index, skipReason.get()));
|
||||
} else {
|
||||
processed++;
|
||||
}
|
||||
}
|
||||
log.info("Imported {} documents from {} ({} skipped)", processed, artifact.getName(), skipped.size());
|
||||
return new LoadResult(processed, skipped);
|
||||
}
|
||||
|
||||
private Optional<ImportStatus.SkipReason> importRow(CanonicalSheetReader.Row row, String index, int rowNumber) {
|
||||
if (!isValidImportIndex(index)) {
|
||||
// Breadcrumb is the source row number, NOT the raw (possibly-hostile) index — an
|
||||
// operator triaging the import can find the offending row in the .xlsx without us
|
||||
// echoing attacker-controlled input into the log.
|
||||
log.warn("Skipping import row {}: index rejected (fails catalog-shape validation)", rowNumber);
|
||||
return Optional.of(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
|
||||
}
|
||||
Optional<File> resolved = resolvePdfByIndex(index, rowNumber);
|
||||
if (resolved.isEmpty()) {
|
||||
// Distinct from the "index rejected" skip above: the index is VALID but no
|
||||
// <index>.pdf is on disk, so the row becomes a normal PLACEHOLDER (not skipped). The
|
||||
// index is a validated catalog id (no hostile content), so it is safe to log here —
|
||||
// this surfaces a corpus that drifts from the "<index>.pdf" assumption (e.g. a file
|
||||
// that arrived under a different name) rather than dropping it silently.
|
||||
log.info("Import row {}: index {} is valid but {}.pdf is absent — creating PLACEHOLDER",
|
||||
rowNumber, index, index);
|
||||
} else {
|
||||
try {
|
||||
if (!isPdfMagicBytes(resolved.get())) {
|
||||
return Optional.of(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Magic-byte check failed for row {}", index, e);
|
||||
return Optional.of(ImportStatus.SkipReason.FILE_READ_ERROR);
|
||||
}
|
||||
}
|
||||
return persist(row, index, resolved);
|
||||
}
|
||||
|
||||
private Optional<ImportStatus.SkipReason> persist(CanonicalSheetReader.Row row, String index, Optional<File> file) {
|
||||
Document existing = documentService.findByOriginalFilename(index).orElse(null);
|
||||
if (existing != null && existing.getStatus() != DocumentStatus.PLACEHOLDER) {
|
||||
return Optional.of(ImportStatus.SkipReason.ALREADY_EXISTS);
|
||||
}
|
||||
|
||||
String s3Key = null;
|
||||
String contentType = null;
|
||||
DocumentStatus status = DocumentStatus.PLACEHOLDER;
|
||||
if (file.isPresent()) {
|
||||
contentType = probeContentType(file.get());
|
||||
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
|
||||
try {
|
||||
uploadToS3(file.get(), s3Key, contentType);
|
||||
status = DocumentStatus.UPLOADED;
|
||||
} catch (Exception e) {
|
||||
log.error("S3 upload failed for {}", file.get().getName(), e);
|
||||
return Optional.of(ImportStatus.SkipReason.S3_UPLOAD_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
Document doc = buildDocument(row, index, existing, s3Key, contentType, status);
|
||||
Document saved = documentService.save(doc);
|
||||
if (file.isPresent()) {
|
||||
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
private Document buildDocument(CanonicalSheetReader.Row row, String index, Document existing,
|
||||
String s3Key, String contentType, DocumentStatus status) {
|
||||
Document doc = existing != null ? existing
|
||||
: Document.builder().originalFilename(index).build();
|
||||
applyAttribution(doc, row);
|
||||
applyDates(doc, row);
|
||||
applyAuthoritativeAssociations(doc, row);
|
||||
applyFileMetadata(doc, s3Key, contentType, status);
|
||||
applyComputedFlags(doc);
|
||||
return doc;
|
||||
}
|
||||
|
||||
// Sender + raw sender/receiver text. The raw cells are always retained verbatim, even
|
||||
// when a person is linked — the load-bearing invariant behind the merge story (ADR-025).
|
||||
private void applyAttribution(Document doc, CanonicalSheetReader.Row row) {
|
||||
String senderName = row.get("sender_name");
|
||||
String receiverNames = row.get("receiver_names");
|
||||
Person sender = resolveSender(row.get("sender_person_id"), senderName);
|
||||
doc.setSender(sender);
|
||||
doc.setSenderText(blankToNull(senderName));
|
||||
doc.setReceiverText(blankToNull(receiverNames));
|
||||
}
|
||||
|
||||
// Date triplet + raw + location. Pure value parsing, no semantic logic.
|
||||
private void applyDates(Document doc, CanonicalSheetReader.Row row) {
|
||||
doc.setDocumentDate(parseIsoDate(row.get("date_iso")));
|
||||
doc.setMetaDatePrecision(parsePrecision(row.get("date_precision")));
|
||||
doc.setMetaDateEnd(parseIsoDate(row.get("date_end")));
|
||||
doc.setMetaDateRaw(blankToNull(row.get("date_raw")));
|
||||
doc.setLocation(blankToNull(row.get("location")));
|
||||
doc.setSummary(blankToNull(row.get("summary")));
|
||||
}
|
||||
|
||||
// Receivers and tags are owned by the canonical row (ADR-025): clear then re-populate so a
|
||||
// shrunk set on re-import prunes stale links rather than accumulating them. The
|
||||
// "preserve human edits" rule does NOT extend to these collections.
|
||||
private void applyAuthoritativeAssociations(Document doc, CanonicalSheetReader.Row row) {
|
||||
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"), row.get("receiver_names"));
|
||||
doc.getReceivers().clear();
|
||||
doc.getReceivers().addAll(receivers);
|
||||
attachTag(doc, row.get("tags"));
|
||||
}
|
||||
|
||||
// S3 key, content type, status, and the index-derived title. The title formula lives in
|
||||
// the document package's DocumentTitleFactory (single source of truth, #726); by this point
|
||||
// applyDates has populated the date/location and originalFilename carries the index.
|
||||
private void applyFileMetadata(Document doc, String s3Key, String contentType,
|
||||
DocumentStatus status) {
|
||||
doc.setStatus(status);
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setTitle(documentTitleFactory.build(doc));
|
||||
}
|
||||
|
||||
// metadataComplete: a document counts as fully described if any of the three "who/when"
|
||||
// pieces is filled. Called last so the upstream setters have already populated the doc.
|
||||
private void applyComputedFlags(Document doc) {
|
||||
doc.setMetadataComplete(doc.getDocumentDate() != null
|
||||
|| doc.getSender() != null
|
||||
|| !doc.getReceivers().isEmpty());
|
||||
}
|
||||
|
||||
// ─── attribution routing — register-first, always retain raw ─────────────────────
|
||||
|
||||
private Person resolveSender(String slug, String rawName) {
|
||||
if (slug.isBlank()) return null;
|
||||
return resolvePerson(slug, rawName);
|
||||
}
|
||||
|
||||
// Zips the parallel `receiver_person_ids` and `receiver_names` columns by position so an
|
||||
// unresolved receiver becomes a provisional Person whose lastName is the human name from
|
||||
// `receiver_names`, not the slug. If the names list is shorter than the slugs list (rare —
|
||||
// canonical data zips them 1:1), missing entries fall back to slug-as-name.
|
||||
private Set<Person> resolveReceivers(String slugs, String names) {
|
||||
List<String> slugList = CanonicalSheetReader.splitList(slugs);
|
||||
List<String> nameList = CanonicalSheetReader.splitList(names);
|
||||
Set<Person> receivers = new LinkedHashSet<>();
|
||||
for (int i = 0; i < slugList.size(); i++) {
|
||||
String slug = slugList.get(i);
|
||||
String name = i < nameList.size() ? nameList.get(i) : slug;
|
||||
receivers.add(resolvePerson(slug, name));
|
||||
}
|
||||
return receivers;
|
||||
}
|
||||
|
||||
private Person resolvePerson(String slug, String rawName) {
|
||||
return personService.findBySourceRef(slug)
|
||||
.orElseGet(() -> personService.upsertBySourceRef(PersonUpsertCommand.builder()
|
||||
.sourceRef(slug)
|
||||
.lastName(blankToNull(rawName) == null ? slug : rawName)
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(true)
|
||||
.build()));
|
||||
}
|
||||
|
||||
// Authoritative: the canonical row defines the document's tags exactly. Clearing first
|
||||
// means a tag removed from the row is pruned on re-import (ADR-025).
|
||||
private void attachTag(Document doc, String tagPath) {
|
||||
doc.getTags().clear();
|
||||
if (tagPath.isBlank()) return;
|
||||
tagService.findBySourceRef(tagPath).ifPresent(tag -> doc.getTags().add(tag));
|
||||
}
|
||||
|
||||
// ─── clean-value parsing (no semantic logic) ─────────────────────────────────────
|
||||
|
||||
private static LocalDate parseIsoDate(String value) {
|
||||
if (value == null || value.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(value.trim());
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static DatePrecision parsePrecision(String value) {
|
||||
if (value == null || value.isBlank()) return DatePrecision.UNKNOWN;
|
||||
try {
|
||||
return DatePrecision.valueOf(value.trim());
|
||||
} catch (IllegalArgumentException e) {
|
||||
return DatePrecision.UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── file handling + S3 (small ≤20-line methods) ─────────────────────────────────
|
||||
|
||||
private String probeContentType(File file) {
|
||||
try {
|
||||
String probed = Files.probeContentType(file.toPath());
|
||||
return probed != null ? probed : "application/octet-stream";
|
||||
} catch (IOException e) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
}
|
||||
|
||||
private void uploadToS3(File file, String s3Key, String contentType) {
|
||||
s3Client.putObject(PutObjectRequest.builder()
|
||||
.bucket(bucketName)
|
||||
.key(s3Key)
|
||||
.contentType(contentType)
|
||||
.build(),
|
||||
RequestBody.fromFile(file));
|
||||
}
|
||||
|
||||
// ─── index validation + containment — defense-in-depth, do not weaken ────────────
|
||||
|
||||
// The index is the only thing that drives the on-disk lookup, so it must never contain a
|
||||
// path separator, traversal token, slash homoglyph, null byte, or absolute-path marker —
|
||||
// each guard mirrors the filename guards ported from MassImportService — and it must match
|
||||
// the strict catalog shape so anything unexpected is skipped loudly rather than read.
|
||||
private boolean isValidImportIndex(String index) {
|
||||
if (index == null || index.isBlank()) return false;
|
||||
if (index.contains("/")) return false;
|
||||
if (index.contains("\\")) return false;
|
||||
if (index.contains("∕")) return false; // U+2215 DIVISION SLASH
|
||||
if (index.contains("/")) return false; // U+FF0F FULLWIDTH SOLIDUS
|
||||
if (index.contains("⧵")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
|
||||
if (index.contains(".")) return false; // no dots — "<index>.pdf" is the only extension
|
||||
if (index.contains("\0")) return false;
|
||||
if (Paths.get(index).isAbsolute()) return false;
|
||||
return INDEX_PATTERN.matcher(index).matches();
|
||||
}
|
||||
|
||||
private boolean isPdfMagicBytes(File file) throws IOException {
|
||||
// FileStreamOpener is injected so tests can stub a throwing implementation for the
|
||||
// IO-error branch without spying on the importer itself.
|
||||
try (InputStream is = fileStreamOpener.open(file)) {
|
||||
byte[] header = is.readNBytes(4);
|
||||
return header.length == 4
|
||||
&& header[0] == 0x25 // %
|
||||
&& header[1] == 0x50 // P
|
||||
&& header[2] == 0x44 // D
|
||||
&& header[3] == 0x46; // F
|
||||
}
|
||||
}
|
||||
|
||||
// O(1) direct lookup: the PDF is exactly importDir/<index>.pdf. The caller has already
|
||||
// validated the index shape; the canonical-path containment assertion below is
|
||||
// defense-in-depth so even a symlinked <index>.pdf cannot read outside importDir.
|
||||
private Optional<File> resolvePdfByIndex(String index, int rowNumber) {
|
||||
File baseDir = new File(importDir);
|
||||
File candidate = baseDir.toPath().resolve(index + ".pdf").toFile();
|
||||
try {
|
||||
if (!candidate.isFile()) return Optional.empty();
|
||||
String baseDirCanonical = baseDir.getCanonicalPath();
|
||||
if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
|
||||
throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
|
||||
}
|
||||
return Optional.of(candidate);
|
||||
} catch (IOException e) {
|
||||
// Distinct from the deliberate symlink-escape abort above (which throws): canonical
|
||||
// resolution itself failed (e.g. the OS rejected the path mid-resolution). We fail
|
||||
// safe to a PLACEHOLDER, but never silently — log it so the asymmetry surfaces in ops.
|
||||
log.warn("Canonical path resolution failed for import row {}: treating {}.pdf as absent",
|
||||
rowNumber, index, e);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s;
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Test seam for opening a {@link File} as an {@link InputStream}. Extracted so the magic-byte
|
||||
* check in {@link DocumentImporter} can be unit-tested for the IO-error branch by injecting a
|
||||
* mock that throws, without needing a Mockito spy on the importer itself.
|
||||
*
|
||||
* <p>Production uses {@link DefaultFileStreamOpener}, a one-line delegate to
|
||||
* {@code new FileInputStream(file)}.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface FileStreamOpener {
|
||||
|
||||
/** Opens {@code file} for sequential reads. Caller closes the returned stream. */
|
||||
InputStream open(File file) throws IOException;
|
||||
|
||||
/** Default production implementation: plain {@code FileInputStream}. */
|
||||
@Component
|
||||
final class DefaultFileStreamOpener implements FileStreamOpener {
|
||||
|
||||
@Override
|
||||
public InputStream open(File file) throws IOException {
|
||||
return new FileInputStream(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Async import state surfaced to {@code admin/system/ImportStatusCard.svelte} via the
|
||||
* generated types. The shape ({@code state, statusCode, processed, skippedFiles, skipped})
|
||||
* is kept verbatim from the retired MassImportService so the admin UI keeps working.
|
||||
*/
|
||||
public record ImportStatus(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
|
||||
@JsonIgnore String message,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) List<SkippedFile> skippedFiles,
|
||||
LocalDateTime startedAt
|
||||
) {
|
||||
|
||||
public enum State { IDLE, RUNNING, DONE, FAILED }
|
||||
|
||||
public enum SkipReason {
|
||||
INVALID_FILENAME_PATH_TRAVERSAL,
|
||||
INVALID_PDF_SIGNATURE,
|
||||
FILE_READ_ERROR,
|
||||
ALREADY_EXISTS,
|
||||
S3_UPLOAD_FAILED
|
||||
}
|
||||
|
||||
public record SkippedFile(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
|
||||
) {}
|
||||
|
||||
// Note: @Schema on a record accessor method is not picked up by SpringDoc; the
|
||||
// "skipped" count is a computed convenience field derived from skippedFiles.size().
|
||||
@JsonProperty("skipped")
|
||||
public int skipped() {
|
||||
return skippedFiles.size();
|
||||
}
|
||||
|
||||
/** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
|
||||
public ImportStatus {
|
||||
skippedFiles = List.copyOf(skippedFiles);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,392 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import java.util.Objects;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonNameParser;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class MassImportService {
|
||||
|
||||
public enum State { IDLE, RUNNING, DONE, FAILED }
|
||||
|
||||
public record ImportStatus(State state, String message, int processed, LocalDateTime startedAt) {}
|
||||
|
||||
private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "Kein Import gestartet.", 0, null);
|
||||
|
||||
public ImportStatus getStatus() {
|
||||
return currentStatus;
|
||||
}
|
||||
|
||||
private final DocumentService documentService;
|
||||
private final PersonService personService;
|
||||
private final TagService tagService;
|
||||
private final S3Client s3Client;
|
||||
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
|
||||
@Value("${app.s3.bucket}")
|
||||
private String bucketName;
|
||||
|
||||
@Value("${app.import.col.index:0}")
|
||||
private int colIndex;
|
||||
|
||||
@Value("${app.import.col.box:1}")
|
||||
private int colBox;
|
||||
|
||||
@Value("${app.import.col.folder:2}")
|
||||
private int colFolder;
|
||||
|
||||
@Value("${app.import.col.sender:3}")
|
||||
private int colSender;
|
||||
|
||||
@Value("${app.import.col.receivers:5}")
|
||||
private int colReceivers;
|
||||
|
||||
@Value("${app.import.col.date:7}")
|
||||
private int colDate;
|
||||
|
||||
@Value("${app.import.col.location:9}")
|
||||
private int colLocation;
|
||||
|
||||
@Value("${app.import.col.tags:10}")
|
||||
private int colTags;
|
||||
|
||||
@Value("${app.import.col.summary:11}")
|
||||
private int colSummary;
|
||||
|
||||
@Value("${app.import.col.transcription:13}")
|
||||
private int colTranscription;
|
||||
|
||||
@Value("${app.import.dir:/import}")
|
||||
private String importDir;
|
||||
|
||||
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
||||
|
||||
// ODS XML namespaces
|
||||
private static final String NS_TABLE = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
|
||||
private static final String NS_TEXT = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
|
||||
|
||||
// We only need up to this many columns; caps repeated-empty-cell expansion
|
||||
private static final int MAX_COLS = 20;
|
||||
|
||||
@Async
|
||||
public void runImportAsync() {
|
||||
if (currentStatus.state() == State.RUNNING) {
|
||||
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
||||
}
|
||||
currentStatus = new ImportStatus(State.RUNNING, "Import läuft...", 0, LocalDateTime.now());
|
||||
try {
|
||||
File spreadsheet = findSpreadsheetFile();
|
||||
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
|
||||
int processed = processRows(readSpreadsheet(spreadsheet));
|
||||
currentStatus = new ImportStatus(State.DONE,
|
||||
"Import abgeschlossen. " + processed + " Dokumente verarbeitet.",
|
||||
processed, currentStatus.startedAt());
|
||||
} catch (Exception e) {
|
||||
log.error("Massenimport fehlgeschlagen", e);
|
||||
currentStatus = new ImportStatus(State.FAILED, "Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
|
||||
}
|
||||
}
|
||||
|
||||
private File findSpreadsheetFile() throws IOException {
|
||||
try (Stream<Path> files = Files.list(Paths.get(importDir))) {
|
||||
return files
|
||||
.filter(p -> {
|
||||
String name = p.toString().toLowerCase();
|
||||
return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
|
||||
})
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new RuntimeException(
|
||||
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
|
||||
.toFile();
|
||||
}
|
||||
}
|
||||
|
||||
// --- Spreadsheet reading (format-specific, produces neutral List<List<String>>) ---
|
||||
|
||||
private List<List<String>> readSpreadsheet(File file) throws Exception {
|
||||
String name = file.getName().toLowerCase();
|
||||
if (name.endsWith(".ods")) {
|
||||
return readOds(file);
|
||||
}
|
||||
return readXlsx(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an ODS file by parsing its content.xml directly (no extra library needed).
|
||||
* ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
|
||||
*/
|
||||
private List<List<String>> readOds(File file) throws Exception {
|
||||
List<List<String>> result = new ArrayList<>();
|
||||
|
||||
try (ZipFile zip = new ZipFile(file)) {
|
||||
var entry = zip.getEntry("content.xml");
|
||||
if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
|
||||
|
||||
var factory = DocumentBuilderFactory.newInstance();
|
||||
factory.setNamespaceAware(true);
|
||||
var builder = factory.newDocumentBuilder();
|
||||
var doc = builder.parse(zip.getInputStream(entry));
|
||||
|
||||
NodeList tables = doc.getElementsByTagNameNS(NS_TABLE, "table");
|
||||
if (tables.getLength() == 0) return result;
|
||||
|
||||
var table = (Element) tables.item(0);
|
||||
NodeList rows = table.getElementsByTagNameNS(NS_TABLE, "table-row");
|
||||
|
||||
for (int i = 0; i < rows.getLength(); i++) {
|
||||
var row = (Element) rows.item(i);
|
||||
List<String> rowData = new ArrayList<>();
|
||||
NodeList cells = row.getElementsByTagNameNS(NS_TABLE, "table-cell");
|
||||
|
||||
for (int j = 0; j < cells.getLength() && rowData.size() < MAX_COLS; j++) {
|
||||
var cell = (Element) cells.item(j);
|
||||
|
||||
// Read the display text (first <text:p>)
|
||||
String value = "";
|
||||
NodeList textNodes = cell.getElementsByTagNameNS(NS_TEXT, "p");
|
||||
if (textNodes.getLength() > 0) {
|
||||
value = textNodes.item(0).getTextContent().trim();
|
||||
}
|
||||
|
||||
// Expand number-columns-repeated (capped at MAX_COLS)
|
||||
String repeatAttr = cell.getAttributeNS(NS_TABLE, "number-columns-repeated");
|
||||
int repeat = repeatAttr.isEmpty() ? 1 : Integer.parseInt(repeatAttr);
|
||||
repeat = Math.min(repeat, MAX_COLS - rowData.size());
|
||||
|
||||
for (int r = 0; r < repeat; r++) {
|
||||
rowData.add(value);
|
||||
}
|
||||
}
|
||||
result.add(rowData);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Reads an XLSX/XLS file using Apache POI. Converts all cells to strings. */
|
||||
private List<List<String>> readXlsx(File file) throws Exception {
|
||||
List<List<String>> result = new ArrayList<>();
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
||||
|
||||
Sheet sheet = workbook.getSheetAt(0);
|
||||
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
|
||||
Row row = sheet.getRow(i);
|
||||
List<String> rowData = new ArrayList<>();
|
||||
if (row != null) {
|
||||
for (int j = 0; j < MAX_COLS; j++) {
|
||||
rowData.add(xlsxCellToString(row.getCell(j)));
|
||||
}
|
||||
}
|
||||
result.add(rowData);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private String xlsxCellToString(Cell cell) {
|
||||
if (cell == null) return "";
|
||||
return switch (cell.getCellType()) {
|
||||
case STRING -> cell.getStringCellValue();
|
||||
case NUMERIC -> {
|
||||
if (DateUtil.isCellDateFormatted(cell)) {
|
||||
yield cell.getLocalDateTimeCellValue().toLocalDate().toString(); // ISO
|
||||
}
|
||||
yield String.valueOf((int) cell.getNumericCellValue());
|
||||
}
|
||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
||||
default -> "";
|
||||
};
|
||||
}
|
||||
|
||||
// --- Import logic (works on neutral List<String> rows) ---
|
||||
|
||||
private int processRows(List<List<String>> rows) {
|
||||
int count = 0;
|
||||
for (int i = 1; i < rows.size(); i++) { // skip header row
|
||||
List<String> cells = rows.get(i);
|
||||
String index = getCell(cells, colIndex);
|
||||
if (index.isBlank()) continue;
|
||||
|
||||
String filename = index.contains(".") ? index : index + ".pdf";
|
||||
Optional<File> fileOnDisk = findFileRecursive(filename);
|
||||
if (fileOnDisk.isEmpty()) {
|
||||
log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename);
|
||||
}
|
||||
importSingleDocument(cells, fileOnDisk, filename, index);
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
protected void importSingleDocument(List<String> cells, Optional<File> file, String originalFilename, String index) {
|
||||
Optional<Document> existing = documentService.findByOriginalFilename(originalFilename);
|
||||
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
|
||||
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
|
||||
return;
|
||||
}
|
||||
|
||||
String archiveBox = getCell(cells, colBox);
|
||||
String archiveFolder = getCell(cells, colFolder);
|
||||
String senderRaw = getCell(cells, colSender);
|
||||
String receiversRaw = getCell(cells, colReceivers);
|
||||
LocalDate date = parseDate(getCell(cells, colDate));
|
||||
String location = getCell(cells, colLocation);
|
||||
String tagRaw = getCell(cells, colTags);
|
||||
String summary = getCell(cells, colSummary);
|
||||
String transcription = getCell(cells, colTranscription);
|
||||
|
||||
String s3Key = null;
|
||||
String contentType = null;
|
||||
DocumentStatus status = DocumentStatus.PLACEHOLDER;
|
||||
|
||||
if (file.isPresent()) {
|
||||
try {
|
||||
contentType = Files.probeContentType(file.get().toPath());
|
||||
} catch (IOException e) {
|
||||
contentType = null;
|
||||
}
|
||||
if (contentType == null) contentType = "application/octet-stream";
|
||||
|
||||
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
|
||||
try {
|
||||
s3Client.putObject(PutObjectRequest.builder()
|
||||
.bucket(bucketName)
|
||||
.key(s3Key)
|
||||
.contentType(contentType)
|
||||
.build(),
|
||||
RequestBody.fromFile(file.get()));
|
||||
status = DocumentStatus.UPLOADED;
|
||||
} catch (Exception e) {
|
||||
log.error("S3 Upload Fehler für {}", file.get().getName(), e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Person sender = senderRaw.isBlank() ? null : findOrCreatePerson(senderRaw);
|
||||
List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
|
||||
.map(this::findOrCreatePerson)
|
||||
.filter(Objects::nonNull)
|
||||
.toList();
|
||||
|
||||
Tag tag = null;
|
||||
if (!tagRaw.isBlank()) {
|
||||
tag = tagService.findOrCreate(tagRaw);
|
||||
}
|
||||
|
||||
Document doc = existing.orElse(Document.builder()
|
||||
.originalFilename(originalFilename)
|
||||
.build());
|
||||
|
||||
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
|
||||
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
|
||||
|
||||
doc.setTitle(buildTitle(index, date, location));
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setStatus(status);
|
||||
doc.setArchiveBox(archiveBox.isBlank() ? null : archiveBox);
|
||||
doc.setArchiveFolder(archiveFolder.isBlank() ? null : archiveFolder);
|
||||
doc.setDocumentDate(date);
|
||||
doc.setLocation(location.isBlank() ? null : location);
|
||||
doc.setSummary(summary.isBlank() ? null : summary);
|
||||
doc.setTranscription(transcription.isBlank() ? null : transcription);
|
||||
doc.setSender(sender);
|
||||
doc.getReceivers().addAll(receivers);
|
||||
if (tag != null) doc.getTags().add(tag);
|
||||
doc.setMetadataComplete(metadataComplete);
|
||||
|
||||
Document saved = documentService.save(doc);
|
||||
if (file.isPresent()) {
|
||||
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
|
||||
}
|
||||
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
private String getCell(List<String> cells, int col) {
|
||||
if (col >= cells.size()) return "";
|
||||
String val = cells.get(col);
|
||||
return val == null ? "" : val.trim();
|
||||
}
|
||||
|
||||
private LocalDate parseDate(String value) {
|
||||
if (value == null || value.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(value.trim());
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private String buildTitle(String index, LocalDate date, String location) {
|
||||
StringBuilder sb = new StringBuilder(index);
|
||||
if (date != null) {
|
||||
sb.append(" \u2013 ").append(date.format(GERMAN_DATE));
|
||||
}
|
||||
if (location != null && !location.isBlank()) {
|
||||
sb.append(" \u2013 ").append(location);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private Person findOrCreatePerson(String rawName) {
|
||||
return personService.findOrCreateByAlias(rawName);
|
||||
}
|
||||
|
||||
private Optional<File> findFileRecursive(String filename) {
|
||||
try (Stream<Path> walk = Files.walk(Paths.get(importDir))) {
|
||||
return walk.filter(p -> !Files.isDirectory(p))
|
||||
.filter(p -> p.getFileName().toString().equals(filename))
|
||||
.map(Path::toFile)
|
||||
.findFirst();
|
||||
} catch (IOException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.person.PersonGeneration;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-persons.xlsx} (the register) into the person domain via
|
||||
* {@link PersonService}, upserting each person by the normalizer {@code person_id}
|
||||
* (source_ref). Register persons are confident identities, so {@code provisional} is
|
||||
* driven by the sheet's already-clean value (normally {@code False}).
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class PersonRegisterImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional");
|
||||
|
||||
// Matches a leading optional G then a signed integer. Anchored at the
|
||||
// start so noise can't slip in before the number, but tolerant of trailing
|
||||
// commentary cells (e.g. "G 2 de Gruyter") since curated rows sometimes
|
||||
// carry an inline note. Out-of-range values are caught by the post-parse
|
||||
// range guard, not by the regex.
|
||||
private static final Pattern GENERATION_PATTERN = Pattern.compile("^\\s*G?\\s*(-?\\d+)");
|
||||
|
||||
private final PersonService personService;
|
||||
|
||||
public int load(File artifact) {
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
|
||||
int processed = 0;
|
||||
for (CanonicalSheetReader.Row row : rows) {
|
||||
String personId = row.get("person_id");
|
||||
if (personId.isBlank()) continue;
|
||||
personService.upsertBySourceRef(toCommand(row, personId));
|
||||
processed++;
|
||||
}
|
||||
log.info("Imported {} register persons from {}", processed, artifact.getName());
|
||||
return processed;
|
||||
}
|
||||
|
||||
private PersonUpsertCommand toCommand(CanonicalSheetReader.Row row, String personId) {
|
||||
return PersonUpsertCommand.builder()
|
||||
.sourceRef(personId)
|
||||
.lastName(blankToNull(row.get("last_name")))
|
||||
.firstName(blankToNull(row.get("first_name")))
|
||||
.maidenName(blankToNull(row.get("maiden_name")))
|
||||
.notes(blankToNull(row.get("notes")))
|
||||
.birthYear(yearOf(row.get("birth_date")))
|
||||
.deathYear(yearOf(row.get("death_date")))
|
||||
.generation(parseGeneration(row.get("generation"), personId))
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(Boolean.parseBoolean(row.get("provisional")))
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an optional {@code G n} generation cell. Returns null for blanks,
|
||||
* non-matching strings, and any value outside the {@link PersonGeneration}
|
||||
* bounds (mirroring the V70 CHECK). Out-of-range values log a WARN but
|
||||
* never abort the batch — REQ-IMP-001.
|
||||
*/
|
||||
static Integer parseGeneration(String raw, String personId) {
|
||||
if (raw == null || raw.isBlank()) return null;
|
||||
Matcher m = GENERATION_PATTERN.matcher(raw);
|
||||
if (!m.find()) return null;
|
||||
int parsed = Integer.parseInt(m.group(1));
|
||||
if (parsed < PersonGeneration.MIN_GENERATION || parsed > PersonGeneration.MAX_GENERATION) {
|
||||
log.warn("Skipping out-of-range generation '{}' for row {}", raw, personId);
|
||||
return null;
|
||||
}
|
||||
log.debug("Parsed generation '{}' for person {}", raw, personId);
|
||||
return parsed;
|
||||
}
|
||||
|
||||
private static Integer yearOf(String isoDate) {
|
||||
if (isoDate == null || isoDate.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(isoDate.trim()).getYear();
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s;
|
||||
}
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonGeneration;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationType;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationshipService;
|
||||
import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-persons-tree.json} into the person + relationship domains.
|
||||
* Tree persons are upserted via {@link PersonService} keyed on the shared
|
||||
* {@code personId} slug (which Phase 1 #670 now emits into the tree), so they reconcile
|
||||
* with the register rather than duplicating it. Relationships reference persons by the
|
||||
* tree's local {@code rowId}; each side is mapped to the upserted person's UUID and
|
||||
* created through {@link RelationshipService} (never the relationship repository —
|
||||
* layering rule). A duplicate relationship on re-import is swallowed for idempotency.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class PersonTreeImporter {
|
||||
|
||||
// The tree JSON is a local implementation detail, not a shared API payload, so the
|
||||
// importer owns its own mapper rather than depending on the web ObjectMapper bean.
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private final PersonService personService;
|
||||
private final RelationshipService relationshipService;
|
||||
|
||||
public int load(File artifact) {
|
||||
JsonNode root = readTree(artifact);
|
||||
Map<String, UUID> idByRowId = upsertPersons(root.path("persons"));
|
||||
int relationships = createRelationships(root.path("relationships"), idByRowId);
|
||||
log.info("Imported {} tree persons and {} relationships from {}",
|
||||
idByRowId.size(), relationships, artifact.getName());
|
||||
return idByRowId.size();
|
||||
}
|
||||
|
||||
private JsonNode readTree(File artifact) {
|
||||
try {
|
||||
return OBJECT_MAPPER.readTree(artifact);
|
||||
} catch (Exception e) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Unreadable canonical artifact: " + artifact.getName());
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, UUID> upsertPersons(JsonNode persons) {
|
||||
Map<String, UUID> idByRowId = new HashMap<>();
|
||||
for (JsonNode node : persons) {
|
||||
String personId = text(node, "personId");
|
||||
if (personId.isBlank()) continue;
|
||||
Person person = personService.upsertBySourceRef(toCommand(node, personId));
|
||||
idByRowId.put(text(node, "rowId"), person.getId());
|
||||
}
|
||||
return idByRowId;
|
||||
}
|
||||
|
||||
private PersonUpsertCommand toCommand(JsonNode node, String personId) {
|
||||
return PersonUpsertCommand.builder()
|
||||
.sourceRef(personId)
|
||||
.lastName(blankToNull(text(node, "lastName")))
|
||||
.firstName(blankToNull(text(node, "firstName")))
|
||||
.maidenName(blankToNull(text(node, "maidenName")))
|
||||
.notes(blankToNull(text(node, "notes")))
|
||||
.birthYear(intOrNull(node, "birthYear"))
|
||||
.deathYear(intOrNull(node, "deathYear"))
|
||||
.generation(generationOrNull(node, personId))
|
||||
.familyMember(node.path("familyMember").asBoolean(false))
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(false)
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the JSON {@code generation} value if present and within the
|
||||
* {@link PersonGeneration} bounds; null otherwise. Out-of-range values
|
||||
* log a WARN but never abort the batch — mirrors the register-importer
|
||||
* skip-and-warn policy.
|
||||
*/
|
||||
private static Integer generationOrNull(JsonNode node, String personId) {
|
||||
Integer raw = intOrNull(node, "generation");
|
||||
if (raw == null) return null;
|
||||
if (raw < PersonGeneration.MIN_GENERATION || raw > PersonGeneration.MAX_GENERATION) {
|
||||
log.warn("Skipping out-of-range generation '{}' for person {}", raw, personId);
|
||||
return null;
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
|
||||
private int createRelationships(JsonNode relationships, Map<String, UUID> idByRowId) {
|
||||
int created = 0;
|
||||
for (JsonNode node : relationships) {
|
||||
// Trap: a relationship node's personId / relatedPersonId fields carry the tree's
|
||||
// local rowId (e.g. "row_a"), NOT a person slug. They are resolved through
|
||||
// idByRowId to the upserted person's UUID.
|
||||
UUID person = idByRowId.get(text(node, "personId"));
|
||||
UUID related = idByRowId.get(text(node, "relatedPersonId"));
|
||||
if (person == null || related == null) {
|
||||
log.warn("Skipping tree relationship with unresolved rowId: {} -> {}",
|
||||
text(node, "personId"), text(node, "relatedPersonId"));
|
||||
continue;
|
||||
}
|
||||
if (addRelationshipIdempotently(person, related, text(node, "type"))) {
|
||||
created++;
|
||||
}
|
||||
}
|
||||
return created;
|
||||
}
|
||||
|
||||
private boolean addRelationshipIdempotently(UUID person, UUID related, String type) {
|
||||
try {
|
||||
relationshipService.addRelationship(person,
|
||||
new CreateRelationshipRequest(related, RelationType.valueOf(type), null, null, null));
|
||||
return true;
|
||||
} catch (DomainException e) {
|
||||
if (e.getCode() == ErrorCode.DUPLICATE_RELATIONSHIP
|
||||
|| e.getCode() == ErrorCode.CIRCULAR_RELATIONSHIP) {
|
||||
return false;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private static String text(JsonNode node, String field) {
|
||||
JsonNode value = node.get(field);
|
||||
return value == null || value.isNull() ? "" : value.asText();
|
||||
}
|
||||
|
||||
private static Integer intOrNull(JsonNode node, String field) {
|
||||
JsonNode value = node.get(field);
|
||||
return value == null || value.isNull() ? null : value.asInt();
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s;
|
||||
}
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-tag-tree.xlsx} into the tag domain via {@link TagService},
|
||||
* upserting each tag by its canonical {@code tag_path} (the source_ref). Parent links are
|
||||
* resolved by the parent's path, which is the child path with its last {@code /segment}
|
||||
* stripped. Rows are emitted parents-first by the normalizer, so a parent is always
|
||||
* resolved before any child references it.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class TagTreeImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of("tag_path", "parent_name", "tag_name");
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
|
||||
private final TagService tagService;
|
||||
|
||||
public int load(File artifact) {
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
|
||||
Map<String, UUID> idByPath = new HashMap<>();
|
||||
int processed = 0;
|
||||
for (CanonicalSheetReader.Row row : rows) {
|
||||
String path = row.get("tag_path");
|
||||
if (path.isBlank()) continue;
|
||||
UUID parentId = resolveParentId(path, idByPath);
|
||||
Tag tag = tagService.upsertBySourceRef(path, row.get("tag_name"), parentId);
|
||||
idByPath.put(path, tag.getId());
|
||||
processed++;
|
||||
}
|
||||
log.info("Imported {} tags from {}", processed, artifact.getName());
|
||||
return processed;
|
||||
}
|
||||
|
||||
private UUID resolveParentId(String path, Map<String, UUID> idByPath) {
|
||||
int lastSeparator = path.lastIndexOf(PATH_SEPARATOR);
|
||||
if (lastSeparator < 0) return null;
|
||||
String parentPath = path.substring(0, lastSeparator);
|
||||
return idByPath.get(parentPath);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
@@ -10,9 +9,6 @@ import org.raddatz.familienarchiv.user.DisplayNameFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
// prevents infinite recursion in JSON serialization; see ADR-022 for lazy-fetch context
|
||||
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
|
||||
@Entity
|
||||
@Table(name = "persons")
|
||||
@Data
|
||||
@@ -52,30 +48,11 @@ public class Person {
|
||||
private Integer birthYear;
|
||||
private Integer deathYear;
|
||||
|
||||
// Hand-curated generation index from canonical-persons.xlsx (G 0 = oldest).
|
||||
// Nullable for persons outside the curated family graph. Drives the
|
||||
// Stammbaum strict-rank seed (see #689) and re-import preserves human
|
||||
// edits via PersonService.preferHuman (ADR-025).
|
||||
@Column(name = "generation")
|
||||
private Integer generation;
|
||||
|
||||
@Column(name = "family_member", nullable = false)
|
||||
@Builder.Default
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private boolean familyMember = false;
|
||||
|
||||
// The normalizer person_id — join key and re-import idempotency key. Null for manually
|
||||
// created persons; unique among non-null values (see ADR-025).
|
||||
@Column(name = "source_ref")
|
||||
private String sourceRef;
|
||||
|
||||
// A provisional person is one the importer inferred but could not confidently identify.
|
||||
// Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3).
|
||||
@Column(name = "provisional", nullable = false)
|
||||
@Builder.Default
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private boolean provisional = false;
|
||||
|
||||
// Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText).
|
||||
// Uses entity relationship rather than cross-domain repository access, avoiding a
|
||||
// separate DB roundtrip while respecting domain boundaries.
|
||||
|
||||
@@ -22,15 +22,12 @@ import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import jakarta.validation.Valid;
|
||||
import jakarta.validation.constraints.Max;
|
||||
import jakarta.validation.constraints.Min;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/persons")
|
||||
@RequiredArgsConstructor
|
||||
@Validated
|
||||
public class PersonController {
|
||||
|
||||
private final PersonService personService;
|
||||
@@ -38,37 +35,15 @@ public class PersonController {
|
||||
|
||||
@GetMapping
|
||||
@RequirePermission(Permission.READ_ALL)
|
||||
public ResponseEntity<PersonSearchResult> getPersons(
|
||||
public ResponseEntity<List<PersonSummaryDTO>> getPersons(
|
||||
@RequestParam(required = false) String q,
|
||||
@RequestParam(required = false) PersonType type,
|
||||
@RequestParam(required = false) Boolean familyOnly,
|
||||
@RequestParam(required = false) Boolean hasDocuments,
|
||||
@RequestParam(required = false) Boolean provisional,
|
||||
// review=true reveals the import noise (transcriber view); absent/false keeps the
|
||||
// clean reader default (familyMember OR documentCount > 0). The explicit filters AND
|
||||
// within whichever base the review flag selects.
|
||||
@RequestParam(required = false, defaultValue = "false") boolean review,
|
||||
@RequestParam(required = false) String sort,
|
||||
@RequestParam(defaultValue = "0") @Min(0) int page,
|
||||
@RequestParam(defaultValue = "50") @Min(1) @Max(100) int size) {
|
||||
// Legacy top-N-by-document-count path (reader dashboard): preserved, wrapped in the
|
||||
// same envelope so /api/persons always returns one shape. It is explicitly NON-paged —
|
||||
// the top-N query returns the complete result, so PersonSearchResult.topN reports an
|
||||
// honest totalElements (= returned count) instead of pretending to be a page slice.
|
||||
if ("documentCount".equals(sort) && q == null) {
|
||||
@RequestParam(required = false, defaultValue = "0") int size,
|
||||
@RequestParam(required = false) String sort) {
|
||||
if ("documentCount".equals(sort) && size > 0 && q == null) {
|
||||
int safeSize = Math.min(size, 50);
|
||||
List<PersonSummaryDTO> top = personService.findTopByDocumentCount(safeSize);
|
||||
return ResponseEntity.ok(PersonSearchResult.topN(top));
|
||||
return ResponseEntity.ok(personService.findTopByDocumentCount(safeSize));
|
||||
}
|
||||
|
||||
PersonFilter filter = PersonFilter.builder()
|
||||
.type(type)
|
||||
.familyOnly(familyOnly)
|
||||
.hasDocuments(hasDocuments)
|
||||
.provisional(provisional)
|
||||
.readerDefault(!review)
|
||||
.build();
|
||||
return ResponseEntity.ok(personService.search(filter, page, size, q));
|
||||
return ResponseEntity.ok(personService.findAll(q));
|
||||
}
|
||||
|
||||
@GetMapping("/{id}")
|
||||
@@ -135,21 +110,6 @@ public class PersonController {
|
||||
personService.mergePersons(id, UUID.fromString(targetIdStr));
|
||||
}
|
||||
|
||||
// Dedicated state transition that clears the provisional flag. A separate verb (not a
|
||||
// mass-assignable DTO field) so provisional can never be smuggled in via create/update.
|
||||
@PatchMapping("/{id}/confirm")
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public ResponseEntity<Person> confirmPerson(@PathVariable UUID id) {
|
||||
return ResponseEntity.ok(personService.confirmPerson(id));
|
||||
}
|
||||
|
||||
@DeleteMapping("/{id}")
|
||||
@ResponseStatus(HttpStatus.NO_CONTENT)
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public void deletePerson(@PathVariable UUID id) {
|
||||
personService.deletePerson(id);
|
||||
}
|
||||
|
||||
// ─── Alias endpoints ────────────────────────────────────────────────────
|
||||
|
||||
@GetMapping("/{id}/aliases")
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import lombok.Builder;
|
||||
|
||||
/**
|
||||
* The reader/triage filter set for the persons directory, threaded as one value through
|
||||
* {@code PersonController -> PersonService -> PersonRepository}. Each field is nullable:
|
||||
* null means "do not constrain on this dimension".
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code type} — restrict to a single {@link PersonType}.</li>
|
||||
* <li>{@code familyOnly} — when true, only {@code familyMember} persons.</li>
|
||||
* <li>{@code hasDocuments} — when true, only persons with documentCount > 0.</li>
|
||||
* <li>{@code provisional} — match the {@code Person.provisional} flag exactly.</li>
|
||||
* <li>{@code readerDefault} — when true, restrict to {@code familyMember OR documentCount > 0}
|
||||
* (the clean reader view). The explicit filters above AND with this restriction.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Builder
|
||||
public record PersonFilter(
|
||||
PersonType type,
|
||||
Boolean familyOnly,
|
||||
Boolean hasDocuments,
|
||||
Boolean provisional,
|
||||
boolean readerDefault
|
||||
) {
|
||||
/** The unconstrained "show all" filter (transcriber view, no reader restriction). */
|
||||
public static PersonFilter showAll() {
|
||||
return PersonFilter.builder().readerDefault(false).build();
|
||||
}
|
||||
|
||||
/** The clean reader default: familyMember OR documentCount > 0, no other constraints. */
|
||||
public static PersonFilter cleanDefault() {
|
||||
return PersonFilter.builder().readerDefault(true).build();
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
/**
|
||||
* Single source of truth for the {@code persons.generation} value range.
|
||||
* The DB CHECK in V70, the {@code PersonUpdateDTO} Bean Validation annotations,
|
||||
* and the canonical importers all reference these constants so a future widening
|
||||
* (e.g. accepting {@code G −1} ancestors) happens in one place. Mirror this file
|
||||
* by hand in the V70 migration comment when adjusting bounds.
|
||||
*/
|
||||
public final class PersonGeneration {
|
||||
|
||||
public static final int MIN_GENERATION = 0;
|
||||
public static final int MAX_GENERATION = 10;
|
||||
|
||||
private PersonGeneration() {}
|
||||
}
|
||||
@@ -32,9 +32,6 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
// Lookup by full alias string, used during ODS mass import
|
||||
Optional<Person> findByAliasIgnoreCase(String alias);
|
||||
|
||||
// Lookup by the normalizer person_id, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Person> findBySourceRef(String sourceRef);
|
||||
|
||||
// Exact first+last name match, used for filename-based sender lookup
|
||||
Optional<Person> findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName);
|
||||
|
||||
@@ -44,7 +41,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
p.family_member AS familyMember,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -57,7 +54,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
p.family_member AS familyMember,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -66,7 +63,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional
|
||||
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member
|
||||
ORDER BY p.last_name ASC, p.first_name ASC
|
||||
""",
|
||||
nativeQuery = true)
|
||||
@@ -78,7 +75,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
p.family_member AS familyMember,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -88,61 +85,6 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
nativeQuery = true)
|
||||
List<PersonSummaryDTO> findTopByDocumentCount(@Param("limit") int limit);
|
||||
|
||||
// --- #667: filter-aware paged directory ---
|
||||
//
|
||||
// The slice query and the count query below MUST keep an IDENTICAL WHERE clause so the
|
||||
// rendered page and totalElements can never drift. Every filter is nullable: a null param
|
||||
// disables that predicate via the `:param IS NULL OR …` idiom. `readerDefault` (a plain
|
||||
// boolean) restricts to "familyMember OR has documents"; the explicit filters AND on top.
|
||||
// documentCount is recomputed inline (not via the SELECT alias) because WHERE cannot
|
||||
// reference a computed alias. All params are named — no string concatenation, no injection.
|
||||
String FILTER_WHERE = """
|
||||
WHERE (CAST(:type AS text) IS NULL OR p.person_type = CAST(:type AS text))
|
||||
AND (:familyOnly = FALSE OR :familyOnly IS NULL OR p.family_member = TRUE)
|
||||
AND (:hasDocuments = FALSE OR :hasDocuments IS NULL OR (
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id)) > 0)
|
||||
AND (:provisional IS NULL OR p.provisional = :provisional)
|
||||
AND (:readerDefault = FALSE OR (
|
||||
p.family_member = TRUE OR (
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id)) > 0))
|
||||
AND (CAST(:query AS text) IS NULL OR
|
||||
LOWER(CONCAT(COALESCE(p.first_name,''),' ',p.last_name)) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%'))
|
||||
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%'))
|
||||
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%')))
|
||||
""";
|
||||
|
||||
@Query(value = """
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
""" + FILTER_WHERE + """
|
||||
ORDER BY p.last_name ASC, p.first_name ASC
|
||||
LIMIT :limit OFFSET :offset
|
||||
""",
|
||||
nativeQuery = true)
|
||||
List<PersonSummaryDTO> findByFilter(@Param("type") String type,
|
||||
@Param("familyOnly") Boolean familyOnly,
|
||||
@Param("hasDocuments") Boolean hasDocuments,
|
||||
@Param("provisional") Boolean provisional,
|
||||
@Param("readerDefault") boolean readerDefault,
|
||||
@Param("query") String query,
|
||||
@Param("limit") int limit,
|
||||
@Param("offset") int offset);
|
||||
|
||||
@Query(value = "SELECT COUNT(*) FROM persons p " + FILTER_WHERE, nativeQuery = true)
|
||||
long countByFilter(@Param("type") String type,
|
||||
@Param("familyOnly") Boolean familyOnly,
|
||||
@Param("hasDocuments") Boolean hasDocuments,
|
||||
@Param("provisional") Boolean provisional,
|
||||
@Param("readerDefault") boolean readerDefault,
|
||||
@Param("query") String query);
|
||||
|
||||
// --- Correspondent queries ---
|
||||
|
||||
@Query(value = """
|
||||
@@ -189,15 +131,12 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
List<Person> findCorrespondentsWithFilter(@Param("personId") UUID personId, @Param("q") String q);
|
||||
|
||||
// --- Merge helpers (native SQL to bypass JPA entity layer) ---
|
||||
// clearAutomatically + flushAutomatically keep the L1 cache from desyncing: these bulk
|
||||
// updates run beneath Hibernate, and mergePersons follows them with a deleteById whose
|
||||
// ON DELETE CASCADE (V71) also fires beneath the session.
|
||||
|
||||
@Modifying(clearAutomatically = true, flushAutomatically = true)
|
||||
@Modifying
|
||||
@Query(value = "UPDATE documents SET sender_id = :target WHERE sender_id = :source", nativeQuery = true)
|
||||
void reassignSender(@Param("source") UUID source, @Param("target") UUID target);
|
||||
|
||||
@Modifying(clearAutomatically = true, flushAutomatically = true)
|
||||
@Modifying
|
||||
@Query(value = """
|
||||
INSERT INTO document_receivers (document_id, person_id)
|
||||
SELECT document_id, :target FROM document_receivers
|
||||
@@ -207,4 +146,8 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
)
|
||||
""", nativeQuery = true)
|
||||
void insertMissingReceiverReference(@Param("source") UUID source, @Param("target") UUID target);
|
||||
}
|
||||
|
||||
@Modifying
|
||||
@Query(value = "DELETE FROM document_receivers WHERE person_id = :source", nativeQuery = true)
|
||||
void deleteReceiverReferences(@Param("source") UUID source);
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Paged result for the /api/persons list endpoint.
|
||||
*
|
||||
* <p>Hand-written to mirror {@code document/DocumentSearchResult} field-for-field so the
|
||||
* frontend sees one paged shape across the app. Deliberately NOT Spring {@code Page<T>}
|
||||
* (unstable serialized shape across Spring versions, noisy in OpenAPI) and deliberately
|
||||
* NOT a reuse of the document DTO (would couple two feature modules — duplication beats
|
||||
* coupling here).
|
||||
*/
|
||||
public record PersonSearchResult(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<PersonSummaryDTO> items,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
long totalElements,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int pageNumber,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int pageSize,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int totalPages
|
||||
) {
|
||||
/**
|
||||
* Paged factory: derives {@code totalPages} from the full match count and the page size.
|
||||
* A zero count yields zero pages so the frontend hides the pagination control.
|
||||
*/
|
||||
public static PersonSearchResult paged(List<PersonSummaryDTO> slice, int pageNumber, int pageSize, long totalElements) {
|
||||
int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize);
|
||||
return new PersonSearchResult(slice, totalElements, pageNumber, pageSize, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-paged factory for the legacy {@code sort=documentCount} top-N dashboard path.
|
||||
* That query returns the <em>complete</em> result in one shot — there is no further page
|
||||
* to fetch — so the envelope reports reality rather than pretending to be a slice of a
|
||||
* larger set: {@code totalElements} equals the number of rows actually returned,
|
||||
* {@code pageSize} equals that same count, and {@code totalPages} is 1 (or 0 when empty).
|
||||
* This avoids the earlier ambiguity where {@code totalElements} looked like a paged total.
|
||||
*/
|
||||
public static PersonSearchResult topN(List<PersonSummaryDTO> all) {
|
||||
int count = all.size();
|
||||
int totalPages = count == 0 ? 0 : 1;
|
||||
return new PersonSearchResult(all, count, 0, count, totalPages);
|
||||
}
|
||||
}
|
||||
@@ -31,53 +31,20 @@ public class PersonService {
|
||||
private final PersonRepository personRepository;
|
||||
private final PersonNameAliasRepository aliasRepository;
|
||||
|
||||
public List<PersonSummaryDTO> findAll(String q) {
|
||||
if (q == null) {
|
||||
return personRepository.findAllWithDocumentCount();
|
||||
}
|
||||
if (q.isBlank()) {
|
||||
return List.of();
|
||||
}
|
||||
return personRepository.searchWithDocumentCount(q.trim());
|
||||
}
|
||||
|
||||
public List<PersonSummaryDTO> findTopByDocumentCount(int limit) {
|
||||
return personRepository.findTopByDocumentCount(limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filtered, paginated directory query. The slice and the total are derived from one
|
||||
* shared WHERE clause (see {@link PersonRepository#FILTER_WHERE}) so totalElements can
|
||||
* never drift from the rendered page. {@code type} is passed as the enum name because the
|
||||
* native query compares against the string column.
|
||||
*/
|
||||
public PersonSearchResult search(PersonFilter filter, int page, int size, String q) {
|
||||
String type = filter.type() == null ? null : filter.type().name();
|
||||
String query = (q == null || q.isBlank()) ? null : q.trim();
|
||||
int offset = page * size;
|
||||
|
||||
List<PersonSummaryDTO> items = personRepository.findByFilter(
|
||||
type, filter.familyOnly(), filter.hasDocuments(), filter.provisional(),
|
||||
filter.readerDefault(), query, size, offset);
|
||||
long total = personRepository.countByFilter(
|
||||
type, filter.familyOnly(), filter.hasDocuments(), filter.provisional(),
|
||||
filter.readerDefault(), query);
|
||||
|
||||
return PersonSearchResult.paged(items, page, size, total);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the {@code provisional} flag — a deliberate state transition exposed as
|
||||
* {@code PATCH /api/persons/{id}/confirm}, never as a mass-assignable DTO field (CWE-915).
|
||||
*/
|
||||
@Transactional
|
||||
public Person confirmPerson(UUID id) {
|
||||
Person person = getById(id);
|
||||
person.setProvisional(false);
|
||||
return personRepository.save(person);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard-deletes a person used by triage. Referential integrity is enforced by the database
|
||||
* (V71's {@code ON DELETE} constraints: sender_id {@code SET NULL}, receiver and @-mention
|
||||
* rows {@code CASCADE}), so the service stays thin — it only verifies existence then deletes.
|
||||
*/
|
||||
@Transactional
|
||||
public void deletePerson(UUID id) {
|
||||
getById(id);
|
||||
personRepository.deleteById(id);
|
||||
}
|
||||
|
||||
public Person getById(UUID id) {
|
||||
return personRepository.findById(id)
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.PERSON_NOT_FOUND, "Person not found: " + id));
|
||||
@@ -113,11 +80,6 @@ public class PersonService {
|
||||
return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
|
||||
}
|
||||
|
||||
/** Lookup by the normalizer person_id — used by the canonical importer for register-first matching. */
|
||||
public Optional<Person> findBySourceRef(String sourceRef) {
|
||||
return personRepository.findBySourceRef(sourceRef);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Transactional
|
||||
public Person findOrCreateByAlias(String rawName) {
|
||||
@@ -153,82 +115,6 @@ public class PersonService {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Idempotent upsert keyed on {@code sourceRef} (the normalizer person_id) for the
|
||||
* canonical importer (Phase 3, ADR-025). On first import the canonical fields are
|
||||
* written verbatim. On re-import the human-edit-preserve precedence applies:
|
||||
* a non-blank existing field is never overwritten, and {@code provisional} never
|
||||
* flips back to true once a human has confirmed the person.
|
||||
*/
|
||||
@Transactional
|
||||
public Person upsertBySourceRef(PersonUpsertCommand cmd) {
|
||||
return personRepository.findBySourceRef(cmd.sourceRef())
|
||||
.map(existing -> personRepository.save(mergeCanonical(existing, cmd)))
|
||||
.orElseGet(() -> fromCanonical(cmd));
|
||||
}
|
||||
|
||||
private Person fromCanonical(PersonUpsertCommand cmd) {
|
||||
Person person = personRepository.save(Person.builder()
|
||||
.sourceRef(cmd.sourceRef())
|
||||
.firstName(blankToNull(cmd.firstName()))
|
||||
.lastName(cmd.lastName())
|
||||
.notes(blankToNull(cmd.notes()))
|
||||
.birthYear(cmd.birthYear())
|
||||
.deathYear(cmd.deathYear())
|
||||
.generation(cmd.generation())
|
||||
.familyMember(cmd.familyMember())
|
||||
.personType(cmd.personType() == null ? PersonType.PERSON : cmd.personType())
|
||||
.provisional(cmd.provisional())
|
||||
.build());
|
||||
String maiden = blankToNull(cmd.maidenName());
|
||||
if (maiden != null) {
|
||||
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
|
||||
aliasRepository.save(PersonNameAlias.builder()
|
||||
.person(person)
|
||||
.lastName(maiden)
|
||||
.type(PersonNameAliasType.MAIDEN_NAME)
|
||||
.sortOrder(nextSortOrder)
|
||||
.build());
|
||||
}
|
||||
return person;
|
||||
}
|
||||
|
||||
private Person mergeCanonical(Person existing, PersonUpsertCommand cmd) {
|
||||
existing.setFirstName(preferHuman(existing.getFirstName(), cmd.firstName()));
|
||||
existing.setLastName(preferHuman(existing.getLastName(), cmd.lastName()));
|
||||
existing.setNotes(preferHuman(existing.getNotes(), cmd.notes()));
|
||||
existing.setBirthYear(preferHuman(existing.getBirthYear(), cmd.birthYear()));
|
||||
existing.setDeathYear(preferHuman(existing.getDeathYear(), cmd.deathYear()));
|
||||
existing.setGeneration(preferHuman(existing.getGeneration(), cmd.generation()));
|
||||
if (cmd.personType() != null && existing.getPersonType() == PersonType.PERSON) {
|
||||
existing.setPersonType(cmd.personType());
|
||||
}
|
||||
// provisional is monotonic-downward: once it is false it never reverts to true.
|
||||
// This also pins the cross-loader precedence (ADR-025): a register/tree person is
|
||||
// loaded before documents and already false, so a later document row that references
|
||||
// the same source_ref (provisional=true) can never flip it provisional — the guard
|
||||
// below only fires while existing is still provisional. Order of document rows is
|
||||
// therefore irrelevant.
|
||||
if (existing.isProvisional()) {
|
||||
existing.setProvisional(cmd.provisional());
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
|
||||
// preferHuman keeps an existing human-entered value and only falls back to the canonical
|
||||
// value when the existing one is absent — the single idiom for every fill-blank field.
|
||||
private static String preferHuman(String existing, String canonical) {
|
||||
return (existing == null || existing.isBlank()) ? blankToNull(canonical) : existing;
|
||||
}
|
||||
|
||||
private static Integer preferHuman(Integer existing, Integer canonical) {
|
||||
return existing != null ? existing : canonical;
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s.trim();
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Person createPerson(String firstName, String lastName, String alias) {
|
||||
Person person = Person.builder()
|
||||
@@ -254,7 +140,6 @@ public class PersonService {
|
||||
.notes(dto.getNotes() == null || dto.getNotes().isBlank() ? null : dto.getNotes().trim())
|
||||
.birthYear(dto.getBirthYear())
|
||||
.deathYear(dto.getDeathYear())
|
||||
.generation(dto.getGeneration())
|
||||
.build();
|
||||
return personRepository.save(person);
|
||||
}
|
||||
@@ -287,18 +172,9 @@ public class PersonService {
|
||||
person.setNotes(dto.getNotes() == null || dto.getNotes().isBlank() ? null : dto.getNotes().trim());
|
||||
person.setBirthYear(dto.getBirthYear());
|
||||
person.setDeathYear(dto.getDeathYear());
|
||||
// Form path: a human can clear generation back to null. Unlike the importer
|
||||
// which routes through preferHuman, we write the DTO value verbatim.
|
||||
person.setGeneration(dto.getGeneration());
|
||||
return personRepository.save(person);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the source person into the target, then deletes the source. Sender references move
|
||||
* to the target; receiver references the target lacks are inserted. The source's leftover
|
||||
* receiver join rows are not deleted explicitly — they cascade-drop via V71's
|
||||
* {@code ON DELETE CASCADE} on {@code document_receivers.person_id} when the source is deleted.
|
||||
*/
|
||||
@Transactional
|
||||
public void mergePersons(UUID sourceId, UUID targetId) {
|
||||
if (sourceId.equals(targetId)) {
|
||||
@@ -315,7 +191,9 @@ public class PersonService {
|
||||
// Add target as receiver where source is receiver but target is not yet
|
||||
personRepository.insertMissingReceiverReference(sourceId, targetId);
|
||||
|
||||
// Source's remaining receiver rows cascade-drop via V71's ON DELETE CASCADE.
|
||||
// Remove all remaining source receiver references (duplicates already handled)
|
||||
personRepository.deleteReceiverReferences(sourceId);
|
||||
|
||||
personRepository.deleteById(sourceId);
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ public interface PersonSummaryDTO {
|
||||
Integer getDeathYear();
|
||||
String getNotes();
|
||||
boolean isFamilyMember();
|
||||
boolean isProvisional();
|
||||
long getDocumentCount();
|
||||
|
||||
default String getDisplayName() {
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import jakarta.validation.constraints.Max;
|
||||
import jakarta.validation.constraints.Min;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Data;
|
||||
@@ -23,9 +21,4 @@ public class PersonUpdateDTO {
|
||||
private String notes;
|
||||
private Integer birthYear;
|
||||
private Integer deathYear;
|
||||
// Mirror of the persons.generation CHECK constraint (V70). Bounds live in
|
||||
// PersonGeneration so DB, DTO, and importer all read from one place.
|
||||
@Min(PersonGeneration.MIN_GENERATION)
|
||||
@Max(PersonGeneration.MAX_GENERATION)
|
||||
private Integer generation;
|
||||
}
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import lombok.Builder;
|
||||
|
||||
/**
|
||||
* Importer → {@link PersonService} command for an idempotent upsert keyed on
|
||||
* {@code sourceRef} (the normalizer's stable person_id). Carries only the canonical
|
||||
* fields the importer owns; the service applies the human-edit-preserve precedence
|
||||
* (see ADR-025): non-blank existing fields are never overwritten, and {@code provisional}
|
||||
* never flips back to true once a human has confirmed a person.
|
||||
*/
|
||||
@Builder
|
||||
public record PersonUpsertCommand(
|
||||
String sourceRef,
|
||||
String firstName,
|
||||
String lastName,
|
||||
String maidenName,
|
||||
String notes,
|
||||
Integer birthYear,
|
||||
Integer deathYear,
|
||||
Integer generation,
|
||||
boolean familyMember,
|
||||
PersonType personType,
|
||||
boolean provisional
|
||||
) {}
|
||||
@@ -96,8 +96,7 @@ public class RelationshipInferenceService {
|
||||
if (p == null) continue;
|
||||
List<RelationToken> path = shortestPaths.get(id);
|
||||
PersonNodeDTO node = new PersonNodeDTO(
|
||||
p.getId(), p.getDisplayName(), p.getBirthYear(), p.getDeathYear(),
|
||||
p.getGeneration(), p.isFamilyMember());
|
||||
p.getId(), p.getDisplayName(), p.getBirthYear(), p.getDeathYear(), p.isFamilyMember());
|
||||
out.add(new InferredRelationshipWithPersonDTO(node, labelFor(path), path.size()));
|
||||
}
|
||||
out.sort(Comparator.comparingInt(InferredRelationshipWithPersonDTO::hops)
|
||||
|
||||
@@ -31,12 +31,6 @@ import java.util.UUID;
|
||||
@RequiredArgsConstructor
|
||||
public class RelationshipService {
|
||||
|
||||
// Single source of truth for which relationship types are part of the family graph.
|
||||
// Consulted by addRelationship (to set family_member on both endpoints) and by
|
||||
// getFamilyNetwork (to filter the edges returned). FRIEND/COLLEAGUE/etc. are excluded.
|
||||
private static final List<RelationType> FAMILY_RELATION_TYPES =
|
||||
List.of(RelationType.PARENT_OF, RelationType.SPOUSE_OF, RelationType.SIBLING_OF);
|
||||
|
||||
private final PersonRelationshipRepository relationshipRepository;
|
||||
private final PersonService personService;
|
||||
private final RelationshipInferenceService inferenceService;
|
||||
@@ -66,12 +60,11 @@ public class RelationshipService {
|
||||
for (Person p : familyMembers) {
|
||||
familyIds.add(p.getId());
|
||||
nodes.add(new PersonNodeDTO(
|
||||
p.getId(), p.getDisplayName(), p.getBirthYear(), p.getDeathYear(),
|
||||
p.getGeneration(), true));
|
||||
p.getId(), p.getDisplayName(), p.getBirthYear(), p.getDeathYear(), true));
|
||||
}
|
||||
|
||||
List<PersonRelationship> familyEdges = relationshipRepository.findAllByRelationTypeIn(
|
||||
FAMILY_RELATION_TYPES);
|
||||
List.of(RelationType.PARENT_OF, RelationType.SPOUSE_OF, RelationType.SIBLING_OF));
|
||||
|
||||
List<RelationshipDTO> edges = new ArrayList<>();
|
||||
for (PersonRelationship r : familyEdges) {
|
||||
@@ -112,23 +105,15 @@ public class RelationshipService {
|
||||
.notes(blankToNull(dto.notes()))
|
||||
.build();
|
||||
|
||||
PersonRelationship saved;
|
||||
try {
|
||||
// saveAndFlush so the unique_rel constraint violates synchronously and is
|
||||
// caught here, not at commit time outside the @Transactional boundary.
|
||||
saved = relationshipRepository.saveAndFlush(rel);
|
||||
return toDTO(relationshipRepository.saveAndFlush(rel));
|
||||
} catch (DataIntegrityViolationException e) {
|
||||
throw DomainException.conflict(
|
||||
ErrorCode.DUPLICATE_RELATIONSHIP,
|
||||
"Relationship already exists for (" + personId + ", " + relatedPerson.getId() + ", " + dto.relationType() + ")");
|
||||
}
|
||||
// Family-graph edges imply both endpoints are family members. Idempotent: the
|
||||
// setter is a no-op when the person is already flagged, so re-imports stay clean.
|
||||
if (FAMILY_RELATION_TYPES.contains(dto.relationType())) {
|
||||
personService.setFamilyMember(person.getId(), true);
|
||||
personService.setFamilyMember(relatedPerson.getId(), true);
|
||||
}
|
||||
return toDTO(saved);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
|
||||
@@ -10,6 +10,5 @@ public record PersonNodeDTO(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String displayName,
|
||||
Integer birthYear,
|
||||
Integer deathYear,
|
||||
Integer generation,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) boolean familyMember
|
||||
) {}
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
package org.raddatz.familienarchiv.security;
|
||||
|
||||
import jakarta.servlet.FilterChain;
|
||||
import jakarta.servlet.ServletException;
|
||||
import jakarta.servlet.http.Cookie;
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
import jakarta.servlet.http.HttpServletRequestWrapper;
|
||||
import jakarta.servlet.http.HttpServletResponse;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.filter.OncePerRequestFilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URLDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
|
||||
/**
|
||||
* Promotes the {@code auth_token} cookie to an {@code Authorization} header
|
||||
* so that browser-side requests to {@code /api/*} authenticate the same way
|
||||
* SSR fetches do.
|
||||
*
|
||||
* <p>The SvelteKit login action stores the full HTTP Basic header value
|
||||
* ({@code "Basic <base64>"}) in an HttpOnly cookie. SSR fetches from
|
||||
* {@code hooks.server.ts} read the cookie and pass it explicitly as the
|
||||
* {@code Authorization} header. In the dev environment, Vite's proxy does
|
||||
* the same on every {@code /api/*} request (see {@code vite.config.ts}).
|
||||
* In production, Caddy proxies {@code /api/*} straight to the backend and
|
||||
* does NOT translate the cookie — so client-side {@code fetch} and
|
||||
* {@code EventSource} calls reach the backend without auth, get
|
||||
* {@code 401 WWW-Authenticate: Basic}, and the browser pops a native dialog.
|
||||
*
|
||||
* <p>This filter closes that gap: if a request has an {@code auth_token}
|
||||
* cookie but no explicit {@code Authorization} header, promote the cookie
|
||||
* value (URL-decoded) into the header before Spring Security inspects it.
|
||||
* Explicit {@code Authorization} headers are preserved unchanged.
|
||||
*
|
||||
* <p>See #520. Filter runs at {@code Ordered.HIGHEST_PRECEDENCE} so it
|
||||
* mutates the request before any Spring Security filter sees it.
|
||||
*
|
||||
* <p><b>Scope:</b> only {@code /api/*} requests are touched. The
|
||||
* {@code /actuator/*} block in Caddy plus the open auth/reset paths in
|
||||
* {@link SecurityConfig} must NOT receive a promoted Authorization.
|
||||
*
|
||||
* <p><b>⚠ Log-leakage warning:</b> the wrapped request exposes the
|
||||
* Authorization header via {@code getHeaderNames}/{@code getHeaders}. Any
|
||||
* filter or interceptor that iterates request headers will see the live
|
||||
* Basic credential. Do NOT add a request-header logger downstream of this
|
||||
* filter without explicitly scrubbing the {@code Authorization} field.
|
||||
*/
|
||||
@Component
|
||||
@Order(org.springframework.core.Ordered.HIGHEST_PRECEDENCE)
|
||||
public class AuthTokenCookieFilter extends OncePerRequestFilter {
|
||||
|
||||
static final String COOKIE_NAME = "auth_token";
|
||||
static final String SCOPE_PREFIX = "/api/";
|
||||
|
||||
@Override
|
||||
protected void doFilterInternal(HttpServletRequest request,
|
||||
HttpServletResponse response,
|
||||
FilterChain chain) throws ServletException, IOException {
|
||||
// Scope: only /api/* needs cookie promotion. /actuator/health (open),
|
||||
// /api/auth/forgot-password (open), /login etc. don't.
|
||||
if (!request.getRequestURI().startsWith(SCOPE_PREFIX)) {
|
||||
chain.doFilter(request, response);
|
||||
return;
|
||||
}
|
||||
// An explicit Authorization header wins — this is the SSR fetch path
|
||||
// (hooks.server.ts builds the header itself).
|
||||
if (request.getHeader(HttpHeaders.AUTHORIZATION) != null) {
|
||||
chain.doFilter(request, response);
|
||||
return;
|
||||
}
|
||||
Cookie[] cookies = request.getCookies();
|
||||
if (cookies == null) {
|
||||
chain.doFilter(request, response);
|
||||
return;
|
||||
}
|
||||
for (Cookie c : cookies) {
|
||||
if (COOKIE_NAME.equals(c.getName()) && c.getValue() != null && !c.getValue().isBlank()) {
|
||||
String decoded;
|
||||
try {
|
||||
decoded = URLDecoder.decode(c.getValue(), StandardCharsets.UTF_8);
|
||||
} catch (IllegalArgumentException malformed) {
|
||||
// Malformed percent-encoding — refuse to forward a bogus
|
||||
// Authorization header. Spring Security will treat the
|
||||
// request as unauthenticated.
|
||||
chain.doFilter(request, response);
|
||||
return;
|
||||
}
|
||||
chain.doFilter(new AuthHeaderRequest(request, decoded), response);
|
||||
return;
|
||||
}
|
||||
}
|
||||
chain.doFilter(request, response);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds (or overrides) the {@code Authorization} header on a wrapped request.
|
||||
* All other headers pass through unchanged.
|
||||
*/
|
||||
static final class AuthHeaderRequest extends HttpServletRequestWrapper {
|
||||
private final String authorization;
|
||||
|
||||
AuthHeaderRequest(HttpServletRequest request, String authorization) {
|
||||
super(request);
|
||||
this.authorization = authorization;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHeader(String name) {
|
||||
if (HttpHeaders.AUTHORIZATION.equalsIgnoreCase(name)) {
|
||||
return authorization;
|
||||
}
|
||||
return super.getHeader(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Enumeration<String> getHeaders(String name) {
|
||||
if (HttpHeaders.AUTHORIZATION.equalsIgnoreCase(name)) {
|
||||
return Collections.enumeration(Collections.singletonList(authorization));
|
||||
}
|
||||
return super.getHeaders(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Enumeration<String> getHeaderNames() {
|
||||
Enumeration<String> base = super.getHeaderNames();
|
||||
java.util.Set<String> names = new java.util.LinkedHashSet<>();
|
||||
while (base.hasMoreElements()) names.add(base.nextElement());
|
||||
names.add(HttpHeaders.AUTHORIZATION);
|
||||
return Collections.enumeration(names);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,42 +1,24 @@
|
||||
package org.raddatz.familienarchiv.security;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
|
||||
import jakarta.servlet.http.HttpServletResponse;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.security.authentication.AuthenticationManager;
|
||||
import org.springframework.security.authentication.dao.DaoAuthenticationProvider;
|
||||
import org.springframework.security.config.annotation.authentication.configuration.AuthenticationConfiguration;
|
||||
import org.springframework.security.config.Customizer;
|
||||
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
|
||||
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
|
||||
import org.springframework.security.config.annotation.web.configurers.AbstractHttpConfigurer;
|
||||
import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder;
|
||||
import org.springframework.security.crypto.password.PasswordEncoder;
|
||||
import org.springframework.security.web.SecurityFilterChain;
|
||||
import org.springframework.security.web.authentication.session.ChangeSessionIdAuthenticationStrategy;
|
||||
import org.springframework.security.web.authentication.session.SessionAuthenticationStrategy;
|
||||
import org.springframework.security.web.csrf.CookieCsrfTokenRepository;
|
||||
import org.springframework.security.web.csrf.CsrfException;
|
||||
import org.springframework.security.web.csrf.CsrfTokenRequestAttributeHandler;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@Configuration
|
||||
@EnableWebSecurity
|
||||
@RequiredArgsConstructor
|
||||
public class SecurityConfig {
|
||||
|
||||
// @WebMvcTest slices do not include JacksonAutoConfiguration, so ObjectMapper
|
||||
// cannot be injected here. A static instance is safe because the response
|
||||
// only serializes fixed String keys — no custom naming strategy or module needed.
|
||||
private static final ObjectMapper ERROR_WRITER = new ObjectMapper();
|
||||
|
||||
private final CustomUserDetailsService userDetailsService;
|
||||
private final Environment environment;
|
||||
|
||||
@@ -52,57 +34,28 @@ public class SecurityConfig {
|
||||
return authProvider;
|
||||
}
|
||||
|
||||
@Bean
|
||||
public AuthenticationManager authenticationManager(AuthenticationConfiguration config) throws Exception {
|
||||
return config.getAuthenticationManager();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public SessionAuthenticationStrategy sessionAuthenticationStrategy() {
|
||||
// ChangeSessionIdAuthenticationStrategy rotates the session ID via the Servlet 3.1+
|
||||
// HttpServletRequest.changeSessionId() — preserves attributes, mints a fresh ID.
|
||||
// Used by AuthSessionController.login to defend against session fixation (CWE-384).
|
||||
return new ChangeSessionIdAuthenticationStrategy();
|
||||
}
|
||||
|
||||
@Bean
|
||||
@Order(1)
|
||||
public SecurityFilterChain managementFilterChain(HttpSecurity http) throws Exception {
|
||||
http
|
||||
.securityMatcher("/actuator/**")
|
||||
.authorizeHttpRequests(auth -> {
|
||||
// Health and Prometheus are open — Docker health checks and Prometheus scraping need no credentials.
|
||||
auth.requestMatchers("/actuator/health", "/actuator/prometheus").permitAll();
|
||||
// All other actuator endpoints (metrics, info, env, heapdump…) require authentication.
|
||||
auth.anyRequest().authenticated();
|
||||
})
|
||||
// Explicitly return 401 for any unauthenticated actuator request.
|
||||
// Without this override, Spring Security's DelegatingAuthenticationEntryPoint
|
||||
// would redirect browser-like clients to the form-login page (302 → /login),
|
||||
// making it impossible to distinguish "not authenticated" from "not found" in tests.
|
||||
.exceptionHandling(ex -> ex.authenticationEntryPoint(
|
||||
(req, res, e) -> res.setStatus(HttpServletResponse.SC_UNAUTHORIZED)))
|
||||
.formLogin(AbstractHttpConfigurer::disable)
|
||||
.csrf(AbstractHttpConfigurer::disable);
|
||||
return http.build();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public SecurityFilterChain securityFilterChain(HttpSecurity http) throws Exception {
|
||||
http
|
||||
// CSRF protection via CookieCsrfTokenRepository (NFR-SEC-103).
|
||||
// The backend sets an XSRF-TOKEN cookie (not HttpOnly so JS can read it).
|
||||
// All state-changing requests must include X-XSRF-TOKEN matching the cookie.
|
||||
// See ADR-022 and issue #524 for the full security rationale.
|
||||
.csrf(csrf -> csrf
|
||||
.csrfTokenRepository(CookieCsrfTokenRepository.withHttpOnlyFalse())
|
||||
.csrfTokenRequestHandler(new CsrfTokenRequestAttributeHandler()))
|
||||
// CSRF is intentionally disabled. With the cookie-promotion model
|
||||
// (auth_token cookie → Authorization header via AuthTokenCookieFilter,
|
||||
// see #520), every authenticated request to /api/* now carries the
|
||||
// credential automatically once the cookie is set. The CSRF defence
|
||||
// for state-changing endpoints is therefore LOAD-BEARING on:
|
||||
//
|
||||
// 1. SameSite=strict on the auth_token cookie (login/+page.server.ts).
|
||||
// A cross-site POST from evil.com cannot include the cookie.
|
||||
// 2. CORS — Spring's default rejects cross-origin requests with
|
||||
// credentials unless explicitly allowed (no allowedOrigins config).
|
||||
//
|
||||
// If either of those is ever weakened (e.g. cookie flipped to
|
||||
// SameSite=lax, CORS allowedOrigins expanded), CSRF protection
|
||||
// MUST be re-enabled here.
|
||||
.csrf(csrf -> csrf.disable())
|
||||
|
||||
.authorizeHttpRequests(auth -> {
|
||||
// Actuator endpoints are governed by managementFilterChain (@Order(1)) above.
|
||||
auth.requestMatchers("/actuator/health", "/actuator/prometheus").permitAll();
|
||||
// Login is unauthenticated by definition
|
||||
auth.requestMatchers("/api/auth/login").permitAll();
|
||||
// Health endpoint must be open so CI/Docker health checks work without credentials
|
||||
auth.requestMatchers("/actuator/health").permitAll();
|
||||
// Password reset endpoints are unauthenticated by nature
|
||||
auth.requestMatchers("/api/auth/forgot-password", "/api/auth/reset-password").permitAll();
|
||||
// Invite-based registration endpoints are public
|
||||
@@ -122,18 +75,9 @@ public class SecurityConfig {
|
||||
// erlaubt pdf im Iframe
|
||||
.headers(headers -> headers
|
||||
.frameOptions(frameOptions -> frameOptions.sameOrigin()))
|
||||
// Return 401 for unauthenticated requests; 403+CSRF_TOKEN_MISSING for CSRF failures.
|
||||
.exceptionHandling(ex -> ex
|
||||
.authenticationEntryPoint(
|
||||
(req, res, e) -> res.setStatus(HttpServletResponse.SC_UNAUTHORIZED))
|
||||
.accessDeniedHandler((req, res, e) -> {
|
||||
res.setStatus(HttpServletResponse.SC_FORBIDDEN);
|
||||
res.setContentType("application/json;charset=UTF-8");
|
||||
ErrorCode code = (e instanceof CsrfException)
|
||||
? ErrorCode.CSRF_TOKEN_MISSING
|
||||
: ErrorCode.FORBIDDEN;
|
||||
res.getWriter().write(ERROR_WRITER.writeValueAsString(Map.of("code", code.name())));
|
||||
}));
|
||||
// Erlaubt Login via Browser-Popup oder REST-Header (Authorization: Basic ...)
|
||||
.httpBasic(Customizer.withDefaults())
|
||||
.formLogin(form -> form.usernameParameter("email"));
|
||||
|
||||
return http.build();
|
||||
}
|
||||
|
||||
@@ -7,13 +7,6 @@ Hierarchical document categories. Tags form a tree via a self-referencing `paren
|
||||
Entity: `Tag` (self-referencing `parent_id` tree).
|
||||
Features: tag CRUD, hierarchical deletion (cascade to descendants), tag typeahead, admin tag management (rename, reparent, merge).
|
||||
|
||||
## Tag tree counts (`getTagTree`)
|
||||
|
||||
`GET /api/tags/tree` returns each node with **two** document counts, from two aggregate queries (no N+1):
|
||||
|
||||
- `documentCount` — documents tagged with that **exact** tag (direct). Read by the admin surfaces (sidebar tree, merge preview, delete-impact guard), which describe direct-document operations.
|
||||
- `subtreeDocumentCount` — **distinct** documents tagged with that tag **or any descendant** (subtree rollup, recursive-CTE closure, depth guard ≤50). Read by the reader surfaces (`/themen` page, dashboard `ThemenWidget`) so the box number matches what `/documents?tag=X` actually finds.
|
||||
|
||||
## What this domain does NOT own
|
||||
|
||||
- Documents — the `document_tags` join table is on the document side. `Tag` does not hold document references.
|
||||
|
||||
@@ -2,13 +2,10 @@ package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
|
||||
// prevents infinite recursion in JSON serialization; see ADR-022 for lazy-fetch context
|
||||
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
|
||||
@Entity
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@@ -30,11 +27,4 @@ public class Tag {
|
||||
|
||||
/** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */
|
||||
private String color;
|
||||
|
||||
/**
|
||||
* Import identity key, keyed on the canonical tag_path. Null for manually created tags;
|
||||
* unique among non-null values. The importer (Phase 3) uses it for idempotent re-import.
|
||||
*/
|
||||
@Column(name = "source_ref")
|
||||
private String sourceRef;
|
||||
}
|
||||
|
||||
@@ -20,17 +20,7 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
|
||||
}
|
||||
|
||||
|
||||
// Tag-name resolution (see TagService.findOrCreate). Names that collide case-insensitively across
|
||||
// the canonical tree are VALID — a parent and its same-named lowercase child (e.g. "Geburt" /
|
||||
// "Geburt/geburt") are distinct nodes with their own source_ref and document attachments. So
|
||||
// resolution must be exact-case first, then a non-throwing list for the case-insensitive fallback.
|
||||
// Do NOT add a unique(lower(name)) constraint — it would reject these legitimate rows. See #730.
|
||||
Optional<Tag> findByName(String name);
|
||||
|
||||
List<Tag> findAllByNameIgnoreCase(String name);
|
||||
|
||||
// Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Tag> findBySourceRef(String sourceRef);
|
||||
Optional<Tag> findByNameIgnoreCase(String name);
|
||||
|
||||
List<Tag> findByNameContainingIgnoreCase(String name);
|
||||
|
||||
@@ -133,31 +123,4 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
|
||||
*/
|
||||
@Query(value = "SELECT tag_id AS tagId, COUNT(*) AS count FROM document_tags GROUP BY tag_id", nativeQuery = true)
|
||||
List<TagCount> findDocumentCountsPerTag();
|
||||
|
||||
/**
|
||||
* Returns (tagId, count) pairs where count is the number of <b>distinct</b> documents tagged
|
||||
* with that tag <b>or any of its descendants</b> (full subtree rollup).
|
||||
* <p>
|
||||
* Builds a tag closure of (ancestor_id, descendant_id) pairs via a recursive CTE — each tag is
|
||||
* its own ancestor at depth 0, then descends into children (depth guard of 50 levels prevents a
|
||||
* cycle or pathological depth from running away) — joins it to {@code document_tags} on the
|
||||
* descendant, and counts distinct documents per ancestor. A document tagged with several tags in
|
||||
* the same subtree is therefore counted once. Tags whose entire subtree holds no documents do
|
||||
* not appear in the result (they default to 0 in the tree). One aggregate query for all tags.
|
||||
*/
|
||||
@Query(value = """
|
||||
WITH RECURSIVE closure AS (
|
||||
SELECT id AS ancestor_id, id AS descendant_id, 0 AS depth FROM tag
|
||||
UNION ALL
|
||||
SELECT c.ancestor_id, t.id AS descendant_id, c.depth + 1
|
||||
FROM tag t
|
||||
JOIN closure c ON t.parent_id = c.descendant_id
|
||||
WHERE c.depth < 50
|
||||
)
|
||||
SELECT c.ancestor_id AS tagId, COUNT(DISTINCT dt.document_id) AS count
|
||||
FROM closure c
|
||||
JOIN document_tags dt ON dt.tag_id = c.descendant_id
|
||||
GROUP BY c.ancestor_id
|
||||
""", nativeQuery = true)
|
||||
List<TagCount> findSubtreeDocumentCountsPerTag();
|
||||
}
|
||||
|
||||
@@ -2,13 +2,11 @@ package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -51,46 +49,10 @@ public class TagService {
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.TAG_NOT_FOUND, "Tag not found: " + id));
|
||||
}
|
||||
|
||||
/** Lookup by the canonical tag_path — used by the canonical importer to attach a document's tag. */
|
||||
public Optional<Tag> findBySourceRef(String sourceRef) {
|
||||
return tagRepository.findBySourceRef(sourceRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a tag name to a single tag, creating one when absent. Never throws on case-insensitive
|
||||
* collisions: names that differ only by case are valid distinct nodes in the canonical tree (a
|
||||
* parent and its same-named lowercase child), so resolution prefers an exact-case match, then
|
||||
* falls back to the lowest-id case-insensitive match, then creates. See #730.
|
||||
*/
|
||||
public Tag findOrCreate(String name) {
|
||||
String cleanName = name.trim();
|
||||
Optional<Tag> exact = tagRepository.findByName(cleanName);
|
||||
if (exact.isPresent()) return exact.get(); // exact-case wins (edit round-trip replays the stored name)
|
||||
List<Tag> caseInsensitive = tagRepository.findAllByNameIgnoreCase(cleanName);
|
||||
if (!caseInsensitive.isEmpty()) {
|
||||
return caseInsensitive.stream().min(Comparator.comparing(Tag::getId)).orElseThrow(); // deterministic tie-break by id — list is non-empty, never throws
|
||||
}
|
||||
return tagRepository.save(Tag.builder().name(cleanName).build()); // create-when-absent (orphan tag: null sourceRef/parentId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Idempotent upsert keyed on {@code sourceRef} (the canonical tag_path) for the
|
||||
* Phase-3 importer (ADR-025). On first import the canonical name and parent are
|
||||
* written; on re-import a human-renamed tag name is preserved (the source_ref is the
|
||||
* stable identity, the name is a human-editable label).
|
||||
*/
|
||||
@Transactional
|
||||
public Tag upsertBySourceRef(String sourceRef, String name, UUID parentId) {
|
||||
return tagRepository.findBySourceRef(sourceRef)
|
||||
.map(existing -> {
|
||||
existing.setParentId(parentId);
|
||||
return tagRepository.save(existing);
|
||||
})
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder()
|
||||
.sourceRef(sourceRef)
|
||||
.name(name)
|
||||
.parentId(parentId)
|
||||
.build()));
|
||||
return tagRepository.findByNameIgnoreCase(cleanName)
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
@@ -184,27 +146,19 @@ public class TagService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all tags assembled into a tree, each node carrying two counts:
|
||||
* {@code documentCount} — documents tagged with that exact tag (direct) — and
|
||||
* {@code subtreeDocumentCount} — distinct documents tagged with that tag or any descendant
|
||||
* (subtree rollup). Each count comes from one aggregate query (no N+1).
|
||||
* NOTE: counts are global per tag, not scoped to any search filter.
|
||||
* Consumed by the reader surfaces (/themen page, dashboard ThemenWidget — which read the
|
||||
* subtree rollup) as well as the admin sidebar and tag operation previews (which read the
|
||||
* direct count).
|
||||
* Returns all tags assembled into a tree with document counts per node.
|
||||
* Uses a single aggregate query to avoid N+1 behaviour.
|
||||
* NOTE: document counts are global per tag, not scoped to any search filter.
|
||||
* The tree endpoint is only used for the admin sidebar, so this is intentional.
|
||||
*/
|
||||
public List<TagTreeNodeDTO> getTagTree() {
|
||||
List<Tag> all = tagRepository.findAll();
|
||||
Map<UUID, Long> counts = toCountMap(tagRepository.findDocumentCountsPerTag());
|
||||
Map<UUID, Long> subtreeCounts = toCountMap(tagRepository.findSubtreeDocumentCountsPerTag());
|
||||
return buildTree(all, counts, subtreeCounts);
|
||||
}
|
||||
|
||||
private static Map<UUID, Long> toCountMap(List<TagRepository.TagCount> counts) {
|
||||
return counts.stream().collect(Collectors.toMap(
|
||||
TagRepository.TagCount::getTagId,
|
||||
TagRepository.TagCount::getCount
|
||||
));
|
||||
Map<UUID, Long> counts = tagRepository.findDocumentCountsPerTag().stream()
|
||||
.collect(Collectors.toMap(
|
||||
TagRepository.TagCount::getTagId,
|
||||
TagRepository.TagCount::getCount
|
||||
));
|
||||
return buildTree(all, counts);
|
||||
}
|
||||
|
||||
// ─── private helpers ─────────────────────────────────────────────────────
|
||||
@@ -279,14 +233,12 @@ public class TagService {
|
||||
}
|
||||
}
|
||||
|
||||
private List<TagTreeNodeDTO> buildTree(List<Tag> tags, Map<UUID, Long> counts,
|
||||
Map<UUID, Long> subtreeCounts) {
|
||||
private List<TagTreeNodeDTO> buildTree(List<Tag> tags, Map<UUID, Long> counts) {
|
||||
Map<UUID, TagTreeNodeDTO> nodeById = new LinkedHashMap<>();
|
||||
for (Tag tag : tags) {
|
||||
int documentCount = counts.getOrDefault(tag.getId(), 0L).intValue();
|
||||
int subtreeDocumentCount = subtreeCounts.getOrDefault(tag.getId(), 0L).intValue();
|
||||
nodeById.put(tag.getId(), new TagTreeNodeDTO(
|
||||
tag.getId(), tag.getName(), tag.getColor(), documentCount, subtreeDocumentCount,
|
||||
tag.getId(), tag.getName(), tag.getColor(), documentCount,
|
||||
new ArrayList<>(), tag.getParentId()
|
||||
));
|
||||
}
|
||||
|
||||
@@ -10,8 +10,5 @@ public record TagTreeNodeDTO(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String name,
|
||||
String color,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int documentCount,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED,
|
||||
description = "Distinct documents tagged with this tag or any descendant tag (subtree rollup)")
|
||||
int subtreeDocumentCount,
|
||||
List<TagTreeNodeDTO> children,
|
||||
@Schema(description = "Parent tag ID, null for root tags") UUID parentId) {}
|
||||
|
||||
@@ -5,8 +5,7 @@ import org.raddatz.familienarchiv.security.Permission;
|
||||
import org.raddatz.familienarchiv.security.RequirePermission;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||
import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
|
||||
import org.raddatz.familienarchiv.importing.ImportStatus;
|
||||
import org.raddatz.familienarchiv.importing.MassImportService;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
@@ -22,20 +21,20 @@ import lombok.RequiredArgsConstructor;
|
||||
@RequiredArgsConstructor
|
||||
public class AdminController {
|
||||
|
||||
private final CanonicalImportOrchestrator importOrchestrator;
|
||||
private final MassImportService massImportService;
|
||||
private final DocumentService documentService;
|
||||
private final DocumentVersionService documentVersionService;
|
||||
private final ThumbnailBackfillService thumbnailBackfillService;
|
||||
|
||||
@PostMapping("/trigger-import")
|
||||
public ResponseEntity<ImportStatus> triggerMassImport() {
|
||||
importOrchestrator.runImportAsync();
|
||||
return ResponseEntity.accepted().body(importOrchestrator.getStatus());
|
||||
public ResponseEntity<MassImportService.ImportStatus> triggerMassImport() {
|
||||
massImportService.runImportAsync();
|
||||
return ResponseEntity.accepted().body(massImportService.getStatus());
|
||||
}
|
||||
|
||||
@GetMapping("/import-status")
|
||||
public ResponseEntity<ImportStatus> importStatus() {
|
||||
return ResponseEntity.ok(importOrchestrator.getStatus());
|
||||
public ResponseEntity<MassImportService.ImportStatus> importStatus() {
|
||||
return ResponseEntity.ok(massImportService.getStatus());
|
||||
}
|
||||
|
||||
@PostMapping("/backfill-versions")
|
||||
@@ -51,12 +50,6 @@ public class AdminController {
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
|
||||
@PostMapping("/backfill-titles")
|
||||
public ResponseEntity<BackfillResult> backfillTitles() {
|
||||
int count = documentService.backfillTitles();
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
|
||||
@PostMapping("/generate-thumbnails")
|
||||
public ResponseEntity<ThumbnailBackfillService.BackfillStatus> generateThumbnails() {
|
||||
thumbnailBackfillService.runBackfillAsync();
|
||||
|
||||
@@ -31,6 +31,5 @@ public class InviteListItemDTO {
|
||||
private String status;
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime createdAt;
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private String shareableUrl;
|
||||
}
|
||||
|
||||
@@ -52,11 +52,7 @@ public class InviteService {
|
||||
public InviteToken createInvite(CreateInviteRequest dto, AppUser creator) {
|
||||
Set<UUID> groupIds = new HashSet<>();
|
||||
if (dto.getGroupIds() != null && !dto.getGroupIds().isEmpty()) {
|
||||
Set<UUID> uniqueIds = new HashSet<>(dto.getGroupIds());
|
||||
List<UserGroup> groups = userService.findGroupsByIds(new ArrayList<>(uniqueIds));
|
||||
if (groups.size() != uniqueIds.size()) {
|
||||
throw DomainException.notFound(ErrorCode.GROUP_NOT_FOUND, "One or more group IDs do not exist");
|
||||
}
|
||||
List<UserGroup> groups = userService.findGroupsByIds(dto.getGroupIds());
|
||||
groups.forEach(g -> groupIds.add(g.getId()));
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,4 @@ public interface InviteTokenRepository extends JpaRepository<InviteToken, UUID>
|
||||
|
||||
@Query("SELECT t FROM InviteToken t ORDER BY t.createdAt DESC")
|
||||
List<InviteToken> findAllOrderedByCreatedAt();
|
||||
|
||||
@Query("SELECT CASE WHEN COUNT(t) > 0 THEN true ELSE false END FROM InviteToken t JOIN t.groupIds g WHERE g = :groupId AND t.revoked = false AND (t.expiresAt IS NULL OR t.expiresAt > CURRENT_TIMESTAMP) AND (t.maxUses IS NULL OR t.useCount < t.maxUses)")
|
||||
boolean existsActiveWithGroupId(@Param("groupId") UUID groupId);
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import java.time.LocalDateTime;
|
||||
import java.util.HexFormat;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.raddatz.familienarchiv.auth.AuthService;
|
||||
import org.raddatz.familienarchiv.user.ResetPasswordRequest;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
@@ -33,7 +32,6 @@ public class PasswordResetService {
|
||||
private final UserService userService;
|
||||
private final PasswordResetTokenRepository tokenRepository;
|
||||
private final PasswordEncoder passwordEncoder;
|
||||
private final AuthService authService;
|
||||
|
||||
@Autowired(required = false)
|
||||
private JavaMailSender mailSender;
|
||||
@@ -87,8 +85,6 @@ public class PasswordResetService {
|
||||
|
||||
resetToken.setUsed(true);
|
||||
tokenRepository.save(resetToken);
|
||||
|
||||
authService.revokeAllSessions(user.getEmail());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -4,11 +4,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import jakarta.servlet.http.HttpSession;
|
||||
import jakarta.validation.Valid;
|
||||
import org.raddatz.familienarchiv.audit.AuditKind;
|
||||
import org.raddatz.familienarchiv.audit.AuditService;
|
||||
import org.raddatz.familienarchiv.auth.AuthService;
|
||||
import org.raddatz.familienarchiv.user.AdminUpdateUserRequest;
|
||||
import org.raddatz.familienarchiv.user.ChangePasswordDTO;
|
||||
import org.raddatz.familienarchiv.user.CreateUserRequest;
|
||||
@@ -30,15 +26,13 @@ import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/")
|
||||
@RequiredArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class UserController {
|
||||
private final UserService userService;
|
||||
private final AuthService authService;
|
||||
private final AuditService auditService;
|
||||
private UserService userService;
|
||||
|
||||
@GetMapping("users/me")
|
||||
public ResponseEntity<AppUser> getCurrentUser(Authentication authentication) {
|
||||
@@ -62,14 +56,9 @@ public class UserController {
|
||||
@PostMapping("users/me/password")
|
||||
@ResponseStatus(HttpStatus.NO_CONTENT)
|
||||
public void changePassword(Authentication authentication,
|
||||
HttpSession session,
|
||||
@RequestBody ChangePasswordDTO dto) {
|
||||
AppUser current = userService.findByEmail(authentication.getName());
|
||||
userService.changePassword(current.getId(), dto);
|
||||
int revoked = authService.revokeOtherSessions(session.getId(), authentication.getName());
|
||||
auditService.log(AuditKind.LOGOUT, current.getId(), null, Map.of(
|
||||
"reason", "password_change",
|
||||
"revokedCount", revoked));
|
||||
}
|
||||
|
||||
@GetMapping("users/{id}")
|
||||
@@ -112,18 +101,6 @@ public class UserController {
|
||||
return ResponseEntity.ok().build();
|
||||
}
|
||||
|
||||
@PostMapping("/users/{id}/force-logout")
|
||||
@RequirePermission(Permission.ADMIN_USER)
|
||||
public ResponseEntity<Map<String, Object>> forceLogout(Authentication authentication,
|
||||
@PathVariable UUID id) {
|
||||
AppUser target = userService.getById(id);
|
||||
int revoked = authService.revokeAllSessions(target.getEmail());
|
||||
auditService.log(AuditKind.ADMIN_FORCE_LOGOUT, actorId(authentication), null, Map.of(
|
||||
"targetUserId", target.getId().toString(),
|
||||
"revokedCount", revoked));
|
||||
return ResponseEntity.ok(Map.of("revokedCount", revoked));
|
||||
}
|
||||
|
||||
private UUID actorId(Authentication auth) {
|
||||
return userService.findByEmail(auth.getName()).getId();
|
||||
}
|
||||
|
||||
@@ -37,9 +37,6 @@ public class UserService {
|
||||
|
||||
private final AppUserRepository userRepository;
|
||||
private final UserGroupRepository groupRepository;
|
||||
// Injected directly (not via InviteService) to avoid a constructor injection cycle:
|
||||
// InviteService → UserService → InviteService. Spring Framework 7 forbids such cycles.
|
||||
private final InviteTokenRepository inviteTokenRepository;
|
||||
private final PasswordEncoder passwordEncoder;
|
||||
private final AuditService auditService;
|
||||
|
||||
@@ -291,10 +288,6 @@ public class UserService {
|
||||
|
||||
@Transactional
|
||||
public void deleteGroup(UUID id) {
|
||||
if (inviteTokenRepository.existsActiveWithGroupId(id)) {
|
||||
throw DomainException.conflict(ErrorCode.GROUP_HAS_ACTIVE_INVITES,
|
||||
"Cannot delete group " + id + " — referenced by one or more active invites");
|
||||
}
|
||||
groupRepository.deleteById(id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
spring:
|
||||
jpa:
|
||||
show-sql: true
|
||||
# spring.session.cookie.secure is no longer a supported Boot 4.x property.
|
||||
# DefaultCookieSerializer auto-detects Secure from request.isSecure().
|
||||
# Direct HTTP in dev → isSecure()=false → cookie sent without Secure attribute.
|
||||
|
||||
springdoc:
|
||||
api-docs:
|
||||
|
||||
@@ -38,13 +38,6 @@ spring:
|
||||
starttls:
|
||||
enable: true
|
||||
|
||||
session:
|
||||
timeout: 28800s # 8 h idle timeout (MaxInactiveIntervalInSeconds)
|
||||
jdbc:
|
||||
initialize-schema: never # Flyway owns schema creation (V67)
|
||||
# Cookie name, SameSite, and Secure are configured via SpringSessionConfig#cookieSerializer
|
||||
# (spring.session.cookie.* is not supported in Spring Boot 4.x).
|
||||
|
||||
server:
|
||||
# Behind Caddy/reverse proxy: trust X-Forwarded-{Proto,For,Host} so that
|
||||
# request.getScheme(), redirect URLs, and Spring Session "Secure" cookies
|
||||
@@ -52,50 +45,9 @@ server:
|
||||
forward-headers-strategy: native
|
||||
|
||||
management:
|
||||
server:
|
||||
# Management port is separate from the app port so that:
|
||||
# (a) Caddy never proxies /actuator/* (it only routes :8080 → the app port)
|
||||
# (b) Prometheus scrapes backend:8081 directly inside archiv-net, not via Caddy
|
||||
# Note: in Spring Boot 4.0 the management port shares the security filter chain; /actuator/health
|
||||
# and /actuator/prometheus must be explicitly permitted in SecurityConfig — see SecurityConfig.java.
|
||||
port: 8081
|
||||
endpoints:
|
||||
web:
|
||||
exposure:
|
||||
include: health,info,prometheus,metrics
|
||||
endpoint:
|
||||
prometheus:
|
||||
enabled: true
|
||||
# Spring Boot 4.0: metrics export is disabled by default — explicitly opt in for Prometheus
|
||||
prometheus:
|
||||
metrics:
|
||||
export:
|
||||
enabled: true
|
||||
metrics:
|
||||
tags:
|
||||
# Common tag applied to every metric so Grafana's Spring Boot dashboard can filter by application name.
|
||||
# Override via MANAGEMENT_METRICS_TAGS_APPLICATION env var.
|
||||
application: ${spring.application.name}
|
||||
health:
|
||||
mail:
|
||||
enabled: false
|
||||
tracing:
|
||||
sampling:
|
||||
probability: 1.0 # 100% in dev; override via MANAGEMENT_TRACING_SAMPLING_PROBABILITY in prod compose
|
||||
|
||||
# OpenTelemetry trace export — failures are non-fatal (app starts cleanly without Tempo running)
|
||||
# Port 4318 = OTLP HTTP (the default transport for Spring Boot's HttpExporter).
|
||||
# Port 4317 is gRPC-only; sending HTTP/1.1 to it produces "Connection reset".
|
||||
otel:
|
||||
service:
|
||||
name: familienarchiv-backend
|
||||
exporter:
|
||||
otlp:
|
||||
endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4318}
|
||||
logs:
|
||||
exporter: none # Promtail captures Docker logs; disable OTLP log export (Tempo only accepts traces)
|
||||
metrics:
|
||||
exporter: none # Prometheus scrapes /actuator/prometheus; disable OTLP metric export to Tempo
|
||||
|
||||
springdoc:
|
||||
api-docs:
|
||||
@@ -125,27 +77,19 @@ app:
|
||||
password: ${APP_ADMIN_PASSWORD:admin123}
|
||||
|
||||
import:
|
||||
# Directory holding the normalizer's committed canonical artifacts
|
||||
# (canonical-{documents,persons,tag-tree}.xlsx + canonical-persons-tree.json).
|
||||
# The loader maps columns by header name — no positional indices (see ADR-025).
|
||||
dir: ${IMPORT_DIR:/import}
|
||||
col:
|
||||
index: 0
|
||||
box: 1
|
||||
folder: 2
|
||||
sender: 3
|
||||
receivers: 5
|
||||
date: 7
|
||||
location: 9
|
||||
tags: 10
|
||||
summary: 11
|
||||
transcription: 13
|
||||
|
||||
ocr:
|
||||
sender-model:
|
||||
activation-threshold: 100
|
||||
retrain-delta: 50
|
||||
|
||||
sentry:
|
||||
dsn: ${SENTRY_DSN:}
|
||||
environment: ${SPRING_PROFILES_ACTIVE:dev}
|
||||
traces-sample-rate: ${SENTRY_TRACES_SAMPLE_RATE:1.0}
|
||||
send-default-pii: false
|
||||
enable-tracing: true
|
||||
ignored-exceptions-for-type:
|
||||
- org.raddatz.familienarchiv.exception.DomainException
|
||||
|
||||
rate-limit:
|
||||
login:
|
||||
max-attempts-per-ip-email: 10
|
||||
max-attempts-per-ip: 20
|
||||
window-minutes: 15
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
-- Repeatable migration: sets the grafana_reader role's password from the
|
||||
-- ${grafanaDbPassword} placeholder (resolved by FlywayConfig from the
|
||||
-- GRAFANA_DB_PASSWORD environment variable). Flyway computes the checksum on
|
||||
-- the resolved migration content, so any change to GRAFANA_DB_PASSWORD changes
|
||||
-- the checksum and re-applies this migration on the next boot. That makes
|
||||
-- password rotation a "change env var + restart" operation — no manual psql.
|
||||
--
|
||||
-- V68 created the role itself (without a usable password). This file owns the
|
||||
-- password lifecycle; nothing else writes it.
|
||||
DO $$
|
||||
BEGIN
|
||||
EXECUTE format('ALTER ROLE grafana_reader WITH PASSWORD %L', '${grafanaDbPassword}');
|
||||
END
|
||||
$$;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- The composite PK (invite_token_id, group_id) does not support efficient lookups by group_id alone.
|
||||
-- Add a dedicated index to support existsActiveWithGroupId queries.
|
||||
CREATE INDEX idx_itg_group_id ON invite_token_group_ids (group_id);
|
||||
@@ -1,27 +0,0 @@
|
||||
-- Re-introduces the Spring Session JDBC tables that were dropped by V2 as unused.
|
||||
-- DDL copied verbatim from Spring Session 3.x schema-postgresql.sql.
|
||||
-- See ADR-020 and issue #523.
|
||||
|
||||
CREATE TABLE spring_session (
|
||||
PRIMARY_ID CHAR(36) NOT NULL,
|
||||
SESSION_ID CHAR(36) NOT NULL,
|
||||
CREATION_TIME BIGINT NOT NULL,
|
||||
LAST_ACCESS_TIME BIGINT NOT NULL,
|
||||
MAX_INACTIVE_INTERVAL INT NOT NULL,
|
||||
EXPIRY_TIME BIGINT NOT NULL,
|
||||
PRINCIPAL_NAME VARCHAR(100),
|
||||
CONSTRAINT spring_session_pk PRIMARY KEY (PRIMARY_ID)
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX spring_session_ix1 ON spring_session (SESSION_ID);
|
||||
CREATE INDEX spring_session_ix2 ON spring_session (EXPIRY_TIME);
|
||||
CREATE INDEX spring_session_ix3 ON spring_session (PRINCIPAL_NAME);
|
||||
|
||||
CREATE TABLE spring_session_attributes (
|
||||
SESSION_PRIMARY_ID CHAR(36) NOT NULL,
|
||||
ATTRIBUTE_NAME VARCHAR(200) NOT NULL,
|
||||
ATTRIBUTE_BYTES BYTEA NOT NULL,
|
||||
CONSTRAINT spring_session_attributes_pk PRIMARY KEY (SESSION_PRIMARY_ID, ATTRIBUTE_NAME),
|
||||
CONSTRAINT spring_session_attributes_fk FOREIGN KEY (SESSION_PRIMARY_ID)
|
||||
REFERENCES spring_session (PRIMARY_ID) ON DELETE CASCADE
|
||||
);
|
||||
@@ -1,17 +0,0 @@
|
||||
-- Read-only role used by the Grafana PostgreSQL datasource for the PO Overview
|
||||
-- dashboard (issue #651). The role is created here without a usable password
|
||||
-- (LOGIN-capable but no password set); R__grafana_reader_password.sql sets the
|
||||
-- password from GRAFANA_DB_PASSWORD on every boot, so rotation is just "bump
|
||||
-- the env var and restart the backend" — see docs/adr/024-* and the rotation
|
||||
-- runbook in docs/DEPLOYMENT.md.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'grafana_reader') THEN
|
||||
CREATE ROLE grafana_reader WITH LOGIN;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
GRANT CONNECT ON DATABASE ${flyway:database} TO grafana_reader;
|
||||
GRANT USAGE ON SCHEMA public TO grafana_reader;
|
||||
GRANT SELECT ON audit_log, documents, transcription_blocks TO grafana_reader;
|
||||
@@ -1,67 +0,0 @@
|
||||
-- Phase 2 of "Handling the Unknowns": the schema foundation.
|
||||
-- Consolidates every new import/precision/attribution/identity column into ONE
|
||||
-- migration with a single owner so downstream phases (importer, rendering, persons
|
||||
-- directory) compile against a finished, collision-free schema. See ADR-025.
|
||||
--
|
||||
-- This file is forward-only and immutable once shipped (Flyway checksum model):
|
||||
-- any fix goes in a later version, never an edit here.
|
||||
|
||||
-- ─── documents: date precision, range end, raw date, raw attribution ──────────
|
||||
|
||||
-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_end date;
|
||||
|
||||
-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_raw text;
|
||||
|
||||
-- Raw attribution preserved even when a person is linked.
|
||||
ALTER TABLE documents ADD COLUMN sender_text text;
|
||||
ALTER TABLE documents ADD COLUMN receiver_text text;
|
||||
|
||||
-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
|
||||
-- length cap in V18). Defense in depth against malformed/huge import cells.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
|
||||
|
||||
-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL.
|
||||
-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and
|
||||
-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane,
|
||||
-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it
|
||||
-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN.
|
||||
ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN';
|
||||
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
|
||||
|
||||
ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
|
||||
|
||||
-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
|
||||
-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
|
||||
CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
|
||||
|
||||
-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
|
||||
-- null end (open-ended range), so the rule is one-directional, not biconditional.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
|
||||
CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
|
||||
|
||||
-- For ranges with both endpoints, the end must not precede the start.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
|
||||
CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
|
||||
|
||||
-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
|
||||
|
||||
-- The normalizer person_id: join key for documents → persons and idempotency key for
|
||||
-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
|
||||
ALTER TABLE persons ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
|
||||
|
||||
-- A provisional person is one the importer inferred but could not confidently identify.
|
||||
-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
|
||||
ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
|
||||
|
||||
-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
|
||||
|
||||
ALTER TABLE tag ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);
|
||||
@@ -1,26 +0,0 @@
|
||||
-- #689: persist the hand-curated "G 0…G 5" generation index from
|
||||
-- canonical-persons.xlsx so the Stammbaum layout can use it as a strict
|
||||
-- rank anchor (replacing the current iterative longest-path heuristic that
|
||||
-- silently misplaces loose spouses with their own parents in the graph).
|
||||
--
|
||||
-- Nullable: pre-import rows and persons outside the curated family graph
|
||||
-- legitimately have no generation. The canonical importer back-fills via
|
||||
-- preferHuman on the next run; a human-edited value is never overwritten
|
||||
-- (see ADR-025).
|
||||
|
||||
ALTER TABLE persons ADD COLUMN generation SMALLINT;
|
||||
|
||||
-- Allowlist of valid generation indices. The 0..10 bounds mirror
|
||||
-- PersonGeneration.MIN_GENERATION / MAX_GENERATION in Java — keep the
|
||||
-- two in sync (the DTO @Min/@Max and both importer range guards read from
|
||||
-- those Java constants). Current data tops out at G 5, but a future G 6 →
|
||||
-- G 10 widening needs no migration. A G −1 ancestor would require a
|
||||
-- separate one-shot shift migration (out of scope here; the layout's
|
||||
-- normalise step already handles negative seeds at render time).
|
||||
ALTER TABLE persons ADD CONSTRAINT chk_generation_range
|
||||
CHECK (generation IS NULL OR generation BETWEEN 0 AND 10);
|
||||
|
||||
-- Partial index: only the curated rows (≈ 163 of 1,105) ever get a value,
|
||||
-- and the layout only ever queries for non-null rows.
|
||||
CREATE INDEX idx_persons_generation ON persons (generation)
|
||||
WHERE generation IS NOT NULL;
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Move person-delete referential integrity from application code into the database (#684).
|
||||
--
|
||||
-- Before this migration, PersonService.deletePerson nulled documents.sender_id and removed
|
||||
-- document_receivers rows in Java before deleting the person, because the two V1 FKs into
|
||||
-- persons had no ON DELETE behaviour. Any other delete path (a future endpoint, a manual
|
||||
-- psql, a batch job) could still orphan rows or 500. This migration makes the database the
|
||||
-- single source of truth so a person delete is safe from every path.
|
||||
--
|
||||
-- Cascade boundary: the cascade stays STRICTLY at the join/reference layer and NEVER reaches
|
||||
-- documents rows — a cascade into documents would destroy historical letters. sender_id is
|
||||
-- SET NULL (documents.senderText preserves the raw textual attribution); the receiver join
|
||||
-- row and the @-mention sidecar row are dropped.
|
||||
--
|
||||
-- No NOT VALID + VALIDATE two-step: these tables are small (thousands of rows → sub-second
|
||||
-- ACCESS EXCLUSIVE lock). Do NOT copy this drop-and-recreate pattern onto a large table.
|
||||
--
|
||||
-- Not audit-logged: a DB ON DELETE cascade runs below AuditService — a known, accepted trade.
|
||||
-- The person-delete action itself is still logged at the service layer.
|
||||
|
||||
-- documents.sender_id → ON DELETE SET NULL (deleted sender clears the link; the document survives).
|
||||
ALTER TABLE public.documents
|
||||
DROP CONSTRAINT fkl5xhww7es3b4um01vmly4y18m,
|
||||
ADD CONSTRAINT fkl5xhww7es3b4um01vmly4y18m
|
||||
FOREIGN KEY (sender_id) REFERENCES public.persons(id) ON DELETE SET NULL;
|
||||
|
||||
-- document_receivers.person_id → ON DELETE CASCADE (drop the join row), the symmetric
|
||||
-- completion of V14, which added the same to the document_id side of this table.
|
||||
ALTER TABLE public.document_receivers
|
||||
DROP CONSTRAINT fkcg7r68qvosqricx1betgrlt7s,
|
||||
ADD CONSTRAINT fkcg7r68qvosqricx1betgrlt7s
|
||||
FOREIGN KEY (person_id) REFERENCES public.persons(id) ON DELETE CASCADE;
|
||||
|
||||
-- Soft reference fix: transcription_block_mentioned_persons.person_id was a UUID with no FK
|
||||
-- (V56), so deleting a person left dangling mention rows. Give it a real FK with CASCADE.
|
||||
-- This reverses V56's deliberate "no FK on person_id" choice — that comment is now historical
|
||||
-- but is intentionally left untouched, because editing an already-applied migration changes its
|
||||
-- Flyway checksum and would fail validateOnMigrate in prod. ADR-032 is the authoritative record.
|
||||
-- Clean up pre-existing orphans first — production likely holds dangling rows because the old
|
||||
-- deletePerson never cleaned mention rows, and the ADD CONSTRAINT validation scan fails on them.
|
||||
-- A DO block with RAISE NOTICE surfaces the purge count: Flyway runs each statement via JDBC
|
||||
-- and discards a trailing SELECT's result set, so a "SELECT count(*)" would log nothing.
|
||||
DO $$
|
||||
DECLARE removed int;
|
||||
BEGIN
|
||||
DELETE FROM transcription_block_mentioned_persons m
|
||||
WHERE NOT EXISTS (SELECT 1 FROM persons p WHERE p.id = m.person_id);
|
||||
GET DIAGNOSTICS removed = ROW_COUNT;
|
||||
RAISE NOTICE 'V71 orphaned_mention_rows_removed=%', removed;
|
||||
END $$;
|
||||
|
||||
ALTER TABLE public.transcription_block_mentioned_persons
|
||||
ADD CONSTRAINT fk_tbmp_person
|
||||
FOREIGN KEY (person_id) REFERENCES public.persons(id) ON DELETE CASCADE;
|
||||
@@ -1,63 +0,0 @@
|
||||
package org.raddatz.familienarchiv;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.web.server.LocalManagementPort;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.web.client.DefaultResponseErrorHandler;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
class ActuatorPrometheusIT {
|
||||
|
||||
@LocalManagementPort
|
||||
private int managementPort;
|
||||
|
||||
@MockitoBean
|
||||
S3Client s3Client;
|
||||
|
||||
@Test
|
||||
void prometheus_endpoint_returns_200_without_credentials() {
|
||||
ResponseEntity<String> response = noThrowTemplate().getForEntity(
|
||||
"http://localhost:" + managementPort + "/actuator/prometheus", String.class);
|
||||
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(200);
|
||||
}
|
||||
|
||||
@Test
|
||||
void prometheus_endpoint_returns_jvm_metrics() {
|
||||
ResponseEntity<String> response = noThrowTemplate().getForEntity(
|
||||
"http://localhost:" + managementPort + "/actuator/prometheus", String.class);
|
||||
|
||||
assertThat(response.getBody()).contains("jvm_memory_used_bytes");
|
||||
}
|
||||
|
||||
@Test
|
||||
void actuator_metrics_requires_authentication() {
|
||||
ResponseEntity<String> response = noThrowTemplate().getForEntity(
|
||||
"http://localhost:" + managementPort + "/actuator/metrics", String.class);
|
||||
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(401);
|
||||
}
|
||||
|
||||
private RestTemplate noThrowTemplate() {
|
||||
RestTemplate template = new RestTemplate();
|
||||
template.setErrorHandler(new DefaultResponseErrorHandler() {
|
||||
@Override
|
||||
public boolean hasError(org.springframework.http.client.ClientHttpResponse response) throws IOException {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
return template;
|
||||
}
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
package org.raddatz.familienarchiv;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.web.server.LocalManagementPort;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.web.client.DefaultResponseErrorHandler;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
class ActuatorSecurityTest {
|
||||
|
||||
@LocalManagementPort
|
||||
private int managementPort;
|
||||
|
||||
@MockitoBean
|
||||
S3Client s3Client;
|
||||
|
||||
@Test
|
||||
void actuator_health_is_accessible_without_authentication() {
|
||||
ResponseEntity<String> response = noThrowTemplate().getForEntity(
|
||||
"http://localhost:" + managementPort + "/actuator/health", String.class);
|
||||
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(200);
|
||||
}
|
||||
|
||||
@Test
|
||||
void actuator_env_requires_authentication() {
|
||||
ResponseEntity<String> response = noThrowTemplate().getForEntity(
|
||||
"http://localhost:" + managementPort + "/actuator/env", String.class);
|
||||
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(401);
|
||||
}
|
||||
|
||||
private RestTemplate noThrowTemplate() {
|
||||
RestTemplate template = new RestTemplate();
|
||||
template.setErrorHandler(new DefaultResponseErrorHandler() {
|
||||
@Override
|
||||
public boolean hasError(org.springframework.http.client.ClientHttpResponse response) throws IOException {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
return template;
|
||||
}
|
||||
}
|
||||
@@ -1,18 +1,14 @@
|
||||
package org.raddatz.familienarchiv;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.testcontainers.service.connection.ServiceConnection;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.testcontainers.containers.PostgreSQLContainer;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@@ -21,18 +17,9 @@ class ApplicationContextTest {
|
||||
@MockitoBean
|
||||
S3Client s3Client;
|
||||
|
||||
@Autowired
|
||||
ApplicationContext ctx;
|
||||
|
||||
@Test
|
||||
void contextLoads() {
|
||||
// verifies that the Spring context starts successfully with all beans wired,
|
||||
// Flyway migrations applied, and no configuration errors
|
||||
}
|
||||
|
||||
@Test
|
||||
void sentry_is_disabled_when_no_dsn_is_configured() {
|
||||
// application-test.yaml has no sentry.dsn — SDK must stay inactive so tests are clean
|
||||
assertThat(io.sentry.Sentry.isEnabled()).isFalse();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -479,191 +479,6 @@ class MigrationIntegrationTest {
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
// ─── V69: import/precision/attribution/identity schema foundation ────────
|
||||
|
||||
@Test
|
||||
void v69_metaDatePrecisionColumn_isNotNull() {
|
||||
Integer count = jdbc.queryForObject(
|
||||
"""
|
||||
SELECT COUNT(*) FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = 'documents'
|
||||
AND column_name = 'meta_date_precision'
|
||||
AND is_nullable = 'NO'
|
||||
""",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_backfillSql_setsDatedRowsToDayPrecision() {
|
||||
// Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule.
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
jdbc.update(V69_BACKFILL_PRECISION_SQL);
|
||||
|
||||
String precision = jdbc.queryForObject(
|
||||
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
|
||||
assertThat(precision).isEqualTo("DAY");
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_backfillSql_setsUndatedRowsToUnknownPrecision() {
|
||||
UUID docId = createDocument(); // no meta_date
|
||||
|
||||
jdbc.update(V69_BACKFILL_PRECISION_SQL);
|
||||
|
||||
String precision = jdbc.queryForObject(
|
||||
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
|
||||
assertThat(precision).isEqualTo("UNKNOWN");
|
||||
}
|
||||
|
||||
// Mirrors the backfill UPDATE shipped in V69; idempotent for verification.
|
||||
private static final String V69_BACKFILL_PRECISION_SQL = """
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END
|
||||
""";
|
||||
|
||||
@Test
|
||||
void v69_precisionCheck_rejectsValueOutsideEnum() {
|
||||
UUID docId = createDocument();
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?",
|
||||
docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsRangeWithNullEnd() {
|
||||
// Loose semantics: the normalizer may emit an open-ended RANGE (start only).
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() {
|
||||
// Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set.
|
||||
// Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null
|
||||
// passes chk_meta_date_end_after_start), so the row survives. This locks the actual
|
||||
// DB behavior so a future tightening to a biconditional rule is a deliberate change.
|
||||
UUID docId = createDocument(); // null meta_date
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
|
||||
Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId);
|
||||
Object metaDateEnd = jdbc.queryForObject(
|
||||
"SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId);
|
||||
assertThat(metaDate).isNull();
|
||||
assertThat(metaDateEnd).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_rangeOrderCheck_rejectsEndBeforeStart() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?",
|
||||
docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateRawCheck_rejectsOverlongText() {
|
||||
UUID docId = createDocument();
|
||||
String tooLong = "x".repeat(10001);
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_senderTextAndReceiverText_storeRawAttribution() {
|
||||
UUID docId = createDocument();
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?",
|
||||
docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_personsSourceRef_uniqueIndexRejectsDuplicate() {
|
||||
jdbc.update(
|
||||
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')");
|
||||
try {
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update(
|
||||
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')")
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_personsSourceRef_allowsMultipleNulls() {
|
||||
UUID a = createPerson("Null", "RefA");
|
||||
UUID b = createPerson("Null", "RefB");
|
||||
try {
|
||||
String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a);
|
||||
String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b);
|
||||
assertThat(refA).isNull();
|
||||
assertThat(refB).isNull();
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_personsProvisional_defaultsToFalse() {
|
||||
UUID id = createPerson("Provisional", "Default");
|
||||
|
||||
Boolean provisional = jdbc.queryForObject(
|
||||
"SELECT provisional FROM persons WHERE id = ?", Boolean.class, id);
|
||||
assertThat(provisional).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_tagSourceRef_uniqueIndexRejectsDuplicate() {
|
||||
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')");
|
||||
try {
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')")
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'");
|
||||
}
|
||||
}
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
private UUID createPerson(String firstName, String lastName) {
|
||||
@@ -689,12 +504,6 @@ class MigrationIntegrationTest {
|
||||
return doc.getId();
|
||||
}
|
||||
|
||||
private UUID createDocumentWithDate(String isoDate) {
|
||||
UUID id = createDocument();
|
||||
jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
private UUID insertAnnotation(UUID docId) {
|
||||
UUID id = UUID.randomUUID();
|
||||
jdbc.update("""
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
package org.raddatz.familienarchiv.audit;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.support.TransactionTemplate;
|
||||
@@ -18,6 +18,7 @@ import static org.awaitility.Awaitility.await;
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
|
||||
class AuditServiceIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
@@ -25,11 +26,6 @@ class AuditServiceIntegrationTest {
|
||||
@Autowired AuditLogRepository auditLogRepository;
|
||||
@Autowired TransactionTemplate transactionTemplate;
|
||||
|
||||
@BeforeEach
|
||||
void resetAuditLog() {
|
||||
auditLogRepository.deleteAll();
|
||||
}
|
||||
|
||||
@Test
|
||||
void logAfterCommit_writes_ANNOTATION_CREATED_row_after_transaction_commits() {
|
||||
transactionTemplate.execute(status -> {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user