Compare commits
22 Commits
feature/66
...
778402fec7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
778402fec7 | ||
|
|
6db5c2d1c4 | ||
|
|
2f981ef69d | ||
|
|
7074c9e4ad | ||
|
|
8eced9c9da | ||
|
|
28de7da9a6 | ||
|
|
8189e14a4b | ||
|
|
bdc37b1156 | ||
|
|
314f686963 | ||
|
|
a23fa4c668 | ||
|
|
05ab8b13a0 | ||
|
|
1052295a6e | ||
|
|
c3d1bea623 | ||
|
|
97585a9cd4 | ||
|
|
c32607e133 | ||
|
|
d7eca25eb7 | ||
|
|
fdb9ae31ae | ||
|
|
14deae962a | ||
|
|
924c76f99f | ||
|
|
99a4230bb9 | ||
|
|
38818998e5 | ||
|
|
9b4da70f52 |
@@ -39,12 +39,6 @@ PORT_PROMETHEUS=9090
|
||||
# Grafana admin password — change this before exposing Grafana beyond localhost
|
||||
GRAFANA_ADMIN_PASSWORD=changeme
|
||||
|
||||
# Password for the read-only grafana_reader PostgreSQL role used by the PO
|
||||
# Overview dashboard. Consumed by Flyway V68 (to set the role's password) and
|
||||
# by Grafana's PostgreSQL datasource (to connect). REQUIRED in production —
|
||||
# generate with: openssl rand -hex 32
|
||||
GRAFANA_DB_PASSWORD=changeme-generate-with-openssl-rand-hex-32
|
||||
|
||||
# GlitchTip domain — production: use https://glitchtip.archiv.raddatz.cloud (must match Caddy vhost)
|
||||
GLITCHTIP_DOMAIN=http://localhost:3002
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ jobs:
|
||||
name: Unit & Component Tests
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: mcr.microsoft.com/playwright:v1.60.0-noble
|
||||
image: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -29,10 +29,6 @@ jobs:
|
||||
run: npm ci
|
||||
working-directory: frontend
|
||||
|
||||
- name: Security audit (no dev deps)
|
||||
run: npm audit --audit-level=high --omit=dev
|
||||
working-directory: frontend
|
||||
|
||||
- name: Compile Paraglide i18n
|
||||
run: npx @inlang/paraglide-js compile --project ./project.inlang --outdir ./src/lib/paraglide
|
||||
working-directory: frontend
|
||||
@@ -65,29 +61,6 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Assert no raw document date rendered via {@html} (CWE-79 — #666)
|
||||
shell: bash
|
||||
run: |
|
||||
# meta_date_raw is untrusted verbatim spreadsheet text — it must render via
|
||||
# Svelte default escaping, never {@html}. This guard flags any {@html ...}
|
||||
# whose expression references a raw-date variable. A comment mentioning
|
||||
# "{@html}" without a raw token inside the braces does NOT match.
|
||||
# The token list MUST cover every variable that carries the raw value:
|
||||
# DocumentDate.svelte exposes it via the `raw` prop, so `\braw\b` is included.
|
||||
# Grow this list whenever a new raw-bearing variable name is introduced.
|
||||
pattern='\{@html[^}]*(metaDateRaw|documentDateRaw|rawDate|\braw\b)'
|
||||
# Self-test: the regex must catch the dangerous forms and ignore the comment form.
|
||||
printf '{@html doc.metaDateRaw}\n' | grep -qP "$pattern" \
|
||||
|| { echo "FAIL: guard self-test — regex missed the unsafe {@html metaDateRaw} form"; exit 1; }
|
||||
printf '{@html raw}\n' | grep -qP "$pattern" \
|
||||
|| { echo "FAIL: guard self-test — regex missed the unsafe {@html raw} form (DocumentDate prop)"; exit 1; }
|
||||
printf 'never use {@html} for this\n' | grep -qvP "$pattern" \
|
||||
|| { echo "FAIL: guard self-test — regex wrongly flagged a {@html} comment"; exit 1; }
|
||||
if grep -rPln "$pattern" --include='*.svelte' frontend/src/; then
|
||||
echo "FAIL: meta_date_raw rendered via {@html} — use default {…} escaping (CWE-79, #666)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Assert no (upload|download)-artifact past v3
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
@@ -31,7 +31,6 @@ name: nightly
|
||||
# STAGING_APP_ADMIN_USERNAME
|
||||
# STAGING_APP_ADMIN_PASSWORD
|
||||
# GRAFANA_ADMIN_PASSWORD
|
||||
# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651)
|
||||
# GLITCHTIP_SECRET_KEY
|
||||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||||
|
||||
@@ -80,8 +79,6 @@ jobs:
|
||||
IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
|
||||
POSTGRES_USER=archiv
|
||||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
VITE_SENTRY_DSN=${{ secrets.VITE_SENTRY_DSN }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
EOF
|
||||
|
||||
- name: Verify backend /import:ro mount is wired
|
||||
@@ -145,7 +142,6 @@ jobs:
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||
POSTGRES_HOST=archiv-staging-db-1
|
||||
@@ -256,20 +252,20 @@ jobs:
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||
RESOLVE="--resolve $HOST:443:$HOST_IP"
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||
curl -fsS "$RESOLVE" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
status=$(curl -s "$RESOLVE" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@ name: release
|
||||
# MAIL_USERNAME
|
||||
# MAIL_PASSWORD
|
||||
# GRAFANA_ADMIN_PASSWORD
|
||||
# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651)
|
||||
# GLITCHTIP_SECRET_KEY
|
||||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||||
|
||||
@@ -78,7 +77,6 @@ jobs:
|
||||
IMPORT_HOST_DIR=/srv/familienarchiv-production/import
|
||||
POSTGRES_USER=archiv
|
||||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
EOF
|
||||
|
||||
- name: Build images
|
||||
@@ -112,7 +110,6 @@ jobs:
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
POSTGRES_PASSWORD=${{ secrets.PROD_POSTGRES_PASSWORD }}
|
||||
POSTGRES_HOST=archiv-production-db-1
|
||||
@@ -184,31 +181,28 @@ jobs:
|
||||
|
||||
- name: Smoke test deployed environment
|
||||
# See nightly.yml — same three checks, against the prod vhost.
|
||||
# --resolve stored as a Bash array so "${RESOLVE[@]}" expands to two
|
||||
# separate arguments; a quoted string would pass the flag and its value
|
||||
# as one token and curl would reject it as an unknown option.
|
||||
# Gateway detection via /proc/net/route — no iproute2 dependency.
|
||||
# See nightly.yml for the full network topology explanation.
|
||||
# --resolve pins to the bridge gateway IP (the host), not 127.0.0.1
|
||||
# — see nightly.yml for the full network topology explanation.
|
||||
run: |
|
||||
set -e
|
||||
HOST="archiv.raddatz.cloud"
|
||||
URL="https://$HOST"
|
||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||
HOST_IP=$(ip route show default | awk '/default/ {print $3}')
|
||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via 'ip route'"; exit 1; }
|
||||
RESOLVE="--resolve $HOST:443:$HOST_IP"
|
||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||
curl -fsS "$RESOLVE" --max-time 10 "$URL/login" -o /dev/null
|
||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||
# fail this check rather than pass it silently.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||
# Permissions-Policy denies APIs the app does not use (camera,
|
||||
# microphone, geolocation). A regression that loosens or drops the
|
||||
# header now fails the smoke step.
|
||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||
curl -fsS "$RESOLVE" --max-time 10 -I "$URL/" \
|
||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
status=$(curl -s "$RESOLVE" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
||||
echo "All smoke checks passed"
|
||||
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -26,7 +26,3 @@ node_modules/
|
||||
|
||||
# Repo uses npm; yarn.lock is ignored to avoid double-lockfile drift.
|
||||
frontend/yarn.lock
|
||||
|
||||
**/.venv/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
|
||||
@@ -87,7 +87,7 @@ backend/src/main/java/org/raddatz/familienarchiv/
|
||||
├── exception/ DomainException, ErrorCode, GlobalExceptionHandler
|
||||
├── filestorage/ FileService (S3/MinIO)
|
||||
├── geschichte/ Geschichte (story) domain
|
||||
├── importing/ CanonicalImportOrchestrator + four loaders (TagTree/PersonRegister/PersonTree/Document) + CanonicalSheetReader
|
||||
├── importing/ MassImportService
|
||||
├── notification/ Notification domain + SseEmitterRegistry
|
||||
├── ocr/ OCR domain — OcrService, OcrBatchService, training
|
||||
├── person/ Person domain
|
||||
@@ -160,7 +160,7 @@ Input DTOs live flat in the domain package. Response types are the model entitie
|
||||
|
||||
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
|
||||
|
||||
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
|
||||
**LLM reminder:** use `DomainException.notFound/forbidden/conflict/internal()` from service methods — never throw raw exceptions. When adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`.
|
||||
|
||||
### Security / Permissions
|
||||
|
||||
@@ -192,8 +192,7 @@ frontend/src/routes/
|
||||
├── persons/
|
||||
│ ├── [id]/ Person detail
|
||||
│ ├── [id]/edit/ Person edit form
|
||||
│ ├── new/ Create person form
|
||||
│ └── review/ Triage view — confirm/rename/merge/delete provisional persons
|
||||
│ └── new/ Create person form
|
||||
├── briefwechsel/ Bilateral conversation timeline (Briefwechsel)
|
||||
├── aktivitaeten/ Unified activity feed (Chronik)
|
||||
├── geschichten/ Stories — list, [id], [id]/edit, new
|
||||
@@ -268,7 +267,7 @@ Back button pattern — use the shared `<BackButton>` component from `$lib/share
|
||||
|
||||
→ See [CONTRIBUTING.md §Error handling](./CONTRIBUTING.md#error-handling)
|
||||
|
||||
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`. Valid error codes include: `TOO_MANY_LOGIN_ATTEMPTS` (returned by `LoginRateLimiter` as HTTP 429 when a brute-force threshold is exceeded).
|
||||
**LLM reminder:** when adding a new `ErrorCode`: (1) add to `ErrorCode.java`, (2) add to `ErrorCode` type in `frontend/src/lib/shared/errors.ts`, (3) add a `case` in `getErrorMessage()`, (4) add i18n keys in `messages/{de,en,es}.json`.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -263,7 +263,7 @@ if (!result.response.ok) {
|
||||
return { person: result.data! }; // non-null assertion is safe after the ok check
|
||||
```
|
||||
|
||||
For multipart/form-data (file uploads): bypass the typed client and use `event.fetch` directly — never global `fetch`. The typed client cannot handle multipart bodies, but `event.fetch` is still required so that `handleFetch` injects the session cookie.
|
||||
For multipart/form-data (file uploads): bypass the typed client and use raw `fetch` — the client cannot handle it.
|
||||
|
||||
### Date handling
|
||||
|
||||
@@ -272,7 +272,6 @@ For multipart/form-data (file uploads): bypass the typed client and use `event.f
|
||||
| Form display | German `dd.mm.yyyy` with auto-dot insertion via `handleDateInput()` |
|
||||
| Wire format | ISO 8601 via a hidden `<input type="hidden" name="documentDate" value={dateIso}>` |
|
||||
| Display | `new Intl.DateTimeFormat('de-DE', …).format(new Date(val + 'T12:00:00'))` |
|
||||
| Honest precision display | `formatDocumentDate(iso, precision, end?, raw?, locale?)` (`$lib/shared/utils/documentDate.ts`) or the `<DocumentDate>` component — renders a document date at exactly its `meta_date_precision` (MONTH → "Juni 1916", never a fabricated day). It mirrors the Java `DocumentTitleFormatter`; both are pinned to `docs/date-label-fixtures.json` so the title and UI labels can't drift. `meta_date_raw` is untrusted — render it via default escaping, never `{@html}` (a CI guard enforces this). |
|
||||
|
||||
### Security checklist (new endpoint)
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ src/main/java/org/raddatz/familienarchiv/
|
||||
├── exception/ # DomainException, ErrorCode, GlobalExceptionHandler
|
||||
├── filestorage/ # FileService (S3/MinIO)
|
||||
├── geschichte/ # Geschichte (story) domain
|
||||
├── importing/ # CanonicalImportOrchestrator + 4 loaders + CanonicalSheetReader
|
||||
├── importing/ # MassImportService
|
||||
├── notification/ # Notification domain + SseEmitterRegistry
|
||||
├── ocr/ # OCR domain — OcrService, OcrBatchService, training
|
||||
├── person/ # Person domain — Person, PersonService, PersonController
|
||||
@@ -97,10 +97,7 @@ public class MyEntity {
|
||||
|
||||
- Annotated with `@Service`, `@RequiredArgsConstructor`, optionally `@Slf4j`.
|
||||
- Write methods: `@Transactional`.
|
||||
- Read methods: no annotation (default non-transactional) — **except** when the method returns
|
||||
an entity whose lazy associations must remain accessible to the caller after the method
|
||||
returns. In that case, use `@Transactional(readOnly = true)` to keep the Hibernate session
|
||||
open. Removing this annotation causes `LazyInitializationException` in production. See ADR-022.
|
||||
- Read methods: no annotation (default non-transactional).
|
||||
- Cross-domain access goes through the other domain's service, never its repository.
|
||||
|
||||
## Error Handling
|
||||
|
||||
@@ -42,17 +42,16 @@ public class LoginRateLimiter {
|
||||
// For the current single-VPS setup this is the correct, simplest implementation.
|
||||
|
||||
public void checkAndConsume(String ip, String email) {
|
||||
long retryAfterSeconds = windowMinutes * 60L;
|
||||
String key = ip + ":" + email.toLowerCase(Locale.ROOT);
|
||||
if (!byIpEmail.get(key).tryConsume(1)) {
|
||||
throw DomainException.tooManyRequests(ErrorCode.TOO_MANY_LOGIN_ATTEMPTS,
|
||||
"Too many login attempts from " + ip, retryAfterSeconds);
|
||||
"Too many login attempts from " + ip);
|
||||
}
|
||||
if (!byIp.get(ip).tryConsume(1)) {
|
||||
// Refund the ipEmail token so IP-level blocking does not erode the per-email quota.
|
||||
byIpEmail.get(key).addTokens(1);
|
||||
throw DomainException.tooManyRequests(ErrorCode.TOO_MANY_LOGIN_ATTEMPTS,
|
||||
"Too many login attempts from " + ip, retryAfterSeconds);
|
||||
"Too many login attempts from " + ip);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,10 +5,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.flywaydb.core.Flyway;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.core.env.Environment;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
import java.util.Map;
|
||||
|
||||
@Configuration
|
||||
@RequiredArgsConstructor
|
||||
@@ -16,7 +14,6 @@ import java.util.Map;
|
||||
public class FlywayConfig {
|
||||
|
||||
private final DataSource dataSource;
|
||||
private final Environment environment;
|
||||
|
||||
@Bean(name = "flyway")
|
||||
public Flyway flyway() {
|
||||
@@ -24,7 +21,6 @@ public class FlywayConfig {
|
||||
Flyway flyway = Flyway.configure()
|
||||
.dataSource(dataSource)
|
||||
.locations("classpath:db/migration")
|
||||
.placeholders(Map.of("grafanaDbPassword", resolveGrafanaDbPassword()))
|
||||
.baselineOnMigrate(true)
|
||||
.baselineVersion("4")
|
||||
.load();
|
||||
@@ -32,22 +28,4 @@ public class FlywayConfig {
|
||||
log.info("Flyway: {} migration(s) applied.", result.migrationsExecuted);
|
||||
return flyway;
|
||||
}
|
||||
|
||||
// Fail-closed: refuse to boot when GRAFANA_DB_PASSWORD is unset. The
|
||||
// grafana_reader role's password is (re)set on every boot by
|
||||
// R__grafana_reader_password.sql, so a missing env var means we'd either
|
||||
// skip the rotation silently or — with a hardcoded fallback — publish a
|
||||
// well-known credential for a role with SELECT on audit_log, documents,
|
||||
// and transcription_blocks. Same shape as UserDataInitializer's refusal
|
||||
// to seed default admin credentials outside dev/test/e2e.
|
||||
String resolveGrafanaDbPassword() {
|
||||
String value = environment.getProperty("GRAFANA_DB_PASSWORD");
|
||||
if (value == null || value.isBlank()) {
|
||||
throw new IllegalStateException(
|
||||
"GRAFANA_DB_PASSWORD is required: it is consumed by "
|
||||
+ "R__grafana_reader_password.sql to (re)set the grafana_reader "
|
||||
+ "role's password on every boot. Generate with: openssl rand -hex 32");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@ public class RateLimitInterceptor implements HandlerInterceptor {
|
||||
AtomicInteger count = requestCounts.get(ip, k -> new AtomicInteger(0));
|
||||
if (count.incrementAndGet() > MAX_REQUESTS_PER_MINUTE) {
|
||||
response.setStatus(HttpStatus.TOO_MANY_REQUESTS.value());
|
||||
response.setHeader("Retry-After", "60");
|
||||
response.getWriter().write("{\"code\":\"RATE_LIMIT_EXCEEDED\",\"message\":\"Too many requests\"}");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
/**
|
||||
* Precision of a document's date. Verbatim mirror of the import normalizer's
|
||||
* {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the
|
||||
* contract, so there is no translation layer. Do not add, remove, or rename values without
|
||||
* also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025).
|
||||
*/
|
||||
public enum DatePrecision {
|
||||
DAY,
|
||||
MONTH,
|
||||
SEASON,
|
||||
YEAR,
|
||||
RANGE,
|
||||
APPROX,
|
||||
UNKNOWN
|
||||
}
|
||||
@@ -2,7 +2,6 @@ package org.raddatz.familienarchiv.document;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
import org.hibernate.annotations.BatchSize;
|
||||
import org.hibernate.annotations.CreationTimestamp;
|
||||
import org.hibernate.annotations.UpdateTimestamp;
|
||||
|
||||
@@ -22,17 +21,6 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
@NamedEntityGraph(name = "Document.full", attributeNodes = {
|
||||
@NamedAttributeNode("sender"),
|
||||
@NamedAttributeNode("receivers"),
|
||||
@NamedAttributeNode("tags"),
|
||||
@NamedAttributeNode("trainingLabels")
|
||||
})
|
||||
@NamedEntityGraph(name = "Document.list", attributeNodes = {
|
||||
@NamedAttributeNode("sender"),
|
||||
@NamedAttributeNode("receivers"),
|
||||
@NamedAttributeNode("tags")
|
||||
})
|
||||
@Entity
|
||||
@Table(name = "documents")
|
||||
@Data // Lombok: Generiert Getter, Setter, ToString, etc.
|
||||
@@ -91,29 +79,6 @@ public class Document {
|
||||
@Column(name = "meta_date")
|
||||
private LocalDate documentDate; // Wann wurde der Brief geschrieben?
|
||||
|
||||
// Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943").
|
||||
// Verbatim mirror of the normalizer's Precision enum (see ADR-025).
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "meta_date_precision", nullable = false, length = 16)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN;
|
||||
|
||||
// Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null).
|
||||
@Column(name = "meta_date_end")
|
||||
private LocalDate metaDateEnd;
|
||||
|
||||
// Original date cell, verbatim, preserved for provenance and "as written" display.
|
||||
@Column(name = "meta_date_raw", columnDefinition = "TEXT")
|
||||
private String metaDateRaw;
|
||||
|
||||
// Raw attribution preserved even when a person is linked via sender/receivers.
|
||||
@Column(name = "sender_text", columnDefinition = "TEXT")
|
||||
private String senderText;
|
||||
|
||||
@Column(name = "receiver_text", columnDefinition = "TEXT")
|
||||
private String receiverText;
|
||||
|
||||
@Column(name = "meta_location")
|
||||
private String location;
|
||||
|
||||
@@ -153,27 +118,24 @@ public class Document {
|
||||
@Builder.Default
|
||||
private ScriptType scriptType = ScriptType.UNKNOWN;
|
||||
|
||||
@ManyToMany(fetch = FetchType.LAZY)
|
||||
@ManyToMany(fetch = FetchType.EAGER)
|
||||
@JoinTable(name = "document_receivers", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "person_id"))
|
||||
@BatchSize(size = 50)
|
||||
@Builder.Default
|
||||
private Set<Person> receivers = new HashSet<>();
|
||||
|
||||
@ManyToOne(fetch = FetchType.LAZY)
|
||||
@ManyToOne
|
||||
@JoinColumn(name = "sender_id")
|
||||
private Person sender;
|
||||
|
||||
@ManyToMany(fetch = FetchType.LAZY)
|
||||
@ManyToMany(fetch = FetchType.EAGER)
|
||||
@JoinTable(name = "document_tags", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "tag_id"))
|
||||
@BatchSize(size = 50)
|
||||
@Builder.Default
|
||||
private Set<Tag> tags = new HashSet<>();
|
||||
|
||||
@ElementCollection(fetch = FetchType.LAZY)
|
||||
@ElementCollection(fetch = FetchType.EAGER)
|
||||
@CollectionTable(name = "document_training_labels", joinColumns = @JoinColumn(name = "document_id"))
|
||||
@Column(name = "label")
|
||||
@Enumerated(EnumType.STRING)
|
||||
@BatchSize(size = 50)
|
||||
@Builder.Default
|
||||
private Set<TrainingLabel> trainingLabels = new HashSet<>();
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@ public class DocumentBatchMetadataDTO {
|
||||
private UUID senderId;
|
||||
private List<UUID> receiverIds;
|
||||
private LocalDate documentDate;
|
||||
private DatePrecision metaDatePrecision;
|
||||
private LocalDate metaDateEnd;
|
||||
private String location;
|
||||
private List<String> tagNames;
|
||||
private Boolean metadataComplete;
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.raddatz.familienarchiv.audit.ActivityActorDTO;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
public record DocumentListItem(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
UUID id,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
String title,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
String originalFilename,
|
||||
String thumbnailUrl,
|
||||
LocalDate documentDate,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
DatePrecision metaDatePrecision,
|
||||
LocalDate metaDateEnd,
|
||||
Person sender,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<Person> receivers,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<Tag> tags,
|
||||
String archiveBox,
|
||||
String archiveFolder,
|
||||
String location,
|
||||
String summary,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int completionPercentage,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<ActivityActorDTO> contributors,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
SearchMatchData matchData
|
||||
) {}
|
||||
@@ -7,8 +7,6 @@ import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
import org.springframework.data.jpa.repository.EntityGraph;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
@@ -25,18 +23,6 @@ import java.util.UUID;
|
||||
@Repository
|
||||
public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSpecificationExecutor<Document> {
|
||||
|
||||
@EntityGraph("Document.full")
|
||||
Optional<Document> findById(UUID id);
|
||||
|
||||
@EntityGraph("Document.list")
|
||||
Page<Document> findAll(Specification<Document> spec, Pageable pageable);
|
||||
|
||||
@EntityGraph("Document.list")
|
||||
List<Document> findAll(Specification<Document> spec);
|
||||
|
||||
@EntityGraph("Document.list")
|
||||
Page<Document> findAll(Pageable pageable);
|
||||
|
||||
// Findet ein Dokument anhand des ursprünglichen Dateinamens
|
||||
// Wichtig für den Abgleich beim Excel-Import & Datei-Upload
|
||||
Optional<Document> findByOriginalFilename(String originalFilename);
|
||||
@@ -44,21 +30,17 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
// Wie oben, gibt aber nur das erste Ergebnis zurück — sicher wenn doppelte Dateinamen existieren
|
||||
Optional<Document> findFirstByOriginalFilename(String originalFilename);
|
||||
|
||||
// Callers access only status/id scalar fields — no graph needed.
|
||||
// Findet alle Dokumente mit einem bestimmten Status
|
||||
// z.B. um alle offenen "PLACEHOLDER" zu finden
|
||||
List<Document> findByStatus(DocumentStatus status);
|
||||
|
||||
// Prüft effizient, ob ein Dateiname schon existiert (gibt true/false zurück)
|
||||
boolean existsByOriginalFilename(String originalFilename);
|
||||
|
||||
// lazy – @BatchSize(50) fallback active; see ADR-022
|
||||
@EntityGraph("Document.full")
|
||||
List<Document> findBySenderId(UUID senderId);
|
||||
|
||||
// lazy – @BatchSize(50) fallback active; see ADR-022
|
||||
@EntityGraph("Document.full")
|
||||
List<Document> findByReceiversId(UUID receiverId);
|
||||
|
||||
// Callers access only doc.getTags() to mutate the set — receivers/sender not touched; no graph needed.
|
||||
List<Document> findByTags_Id(UUID tagId);
|
||||
|
||||
@Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)")
|
||||
@@ -73,15 +55,12 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
|
||||
long countByMetadataCompleteFalse();
|
||||
|
||||
// No production callers — only used if a future export path iterates the full list; no graph needed.
|
||||
List<Document> findByMetadataCompleteFalse(Sort sort);
|
||||
|
||||
// Callers map to IncompleteDocumentDTO using only scalar fields (id, title, createdAt) — no graph needed.
|
||||
Page<Document> findByMetadataCompleteFalse(Pageable pageable);
|
||||
|
||||
Optional<Document> findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort);
|
||||
|
||||
@EntityGraph("Document.full")
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
@@ -96,7 +75,6 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
@Param("to") LocalDate to,
|
||||
Sort sort);
|
||||
|
||||
@EntityGraph("Document.full")
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"LEFT JOIN d.receivers r " +
|
||||
"WHERE (d.sender.id = :personId OR r.id = :personId) " +
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.raddatz.familienarchiv.audit.ActivityActorDTO;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record DocumentSearchItem(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
Document document,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
SearchMatchData matchData,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int completionPercentage,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<ActivityActorDTO> contributors
|
||||
) {}
|
||||
@@ -7,7 +7,7 @@ import java.util.List;
|
||||
|
||||
public record DocumentSearchResult(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<DocumentListItem> items,
|
||||
List<DocumentSearchItem> items,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
long totalElements,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@@ -21,16 +21,16 @@ public record DocumentSearchResult(
|
||||
* Single-page convenience factory used by empty-result shortcuts and by tests that
|
||||
* don't care about paging. Treats the whole list as page 0 of itself.
|
||||
*/
|
||||
public static DocumentSearchResult of(List<DocumentListItem> items) {
|
||||
public static DocumentSearchResult of(List<DocumentSearchItem> items) {
|
||||
int size = items.size();
|
||||
return new DocumentSearchResult(items, size, 0, size, size == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Paged factory used by the service when it has a real Pageable + full match count
|
||||
* (e.g. from Spring's Page<T> or from an in-memory sort-then-slice).
|
||||
* (e.g. from Spring's Page<T> or from an in-memory sort-then-slice).
|
||||
*/
|
||||
public static DocumentSearchResult paged(List<DocumentListItem> slice, Pageable pageable, long totalElements) {
|
||||
public static DocumentSearchResult paged(List<DocumentSearchItem> slice, Pageable pageable, long totalElements) {
|
||||
int pageSize = pageable.getPageSize();
|
||||
int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize);
|
||||
return new DocumentSearchResult(slice, totalElements, pageable.getPageNumber(), pageSize, totalPages);
|
||||
|
||||
@@ -10,6 +10,7 @@ import org.raddatz.familienarchiv.audit.AuditService;
|
||||
import org.raddatz.familienarchiv.document.DocumentBatchMetadataDTO;
|
||||
import org.raddatz.familienarchiv.document.DocumentBatchSummary;
|
||||
import org.raddatz.familienarchiv.document.DocumentBulkEditDTO;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchItem;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.document.DocumentSort;
|
||||
import org.raddatz.familienarchiv.document.DocumentUpdateDTO;
|
||||
@@ -378,7 +379,6 @@ public class DocumentService {
|
||||
// 1. Einfache Felder Update
|
||||
doc.setTitle(dto.getTitle());
|
||||
doc.setDocumentDate(dto.getDocumentDate());
|
||||
applyDatePrecision(doc, dto);
|
||||
doc.setLocation(dto.getLocation());
|
||||
doc.setTranscription(dto.getTranscription());
|
||||
doc.setSummary(dto.getSummary());
|
||||
@@ -447,26 +447,6 @@ public class DocumentService {
|
||||
return saved;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the three date-precision fields only when the DTO carries them.
|
||||
* A null field means "not submitted" — overwriting the stored value with null
|
||||
* would fabricate a precision the user never chose, the exact dishonesty #666
|
||||
* exists to prevent. A row with a genuinely-unknown precision must keep it when
|
||||
* an unrelated edit (e.g. a location typo) is saved.
|
||||
*/
|
||||
private void applyDatePrecision(Document doc, DocumentUpdateDTO dto) {
|
||||
if (dto.getMetaDatePrecision() != null) {
|
||||
doc.setMetaDatePrecision(dto.getMetaDatePrecision());
|
||||
}
|
||||
if (dto.getMetaDateEnd() != null) {
|
||||
doc.setMetaDateEnd(dto.getMetaDateEnd());
|
||||
}
|
||||
if (dto.getMetaDateRaw() != null) {
|
||||
doc.setMetaDateRaw(dto.getMetaDateRaw());
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Document updateDocumentTags(UUID docId, List<String> tagNames) {
|
||||
Document doc = documentRepository.findById(docId)
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.DOCUMENT_NOT_FOUND, "Document not found: " + docId));
|
||||
@@ -655,7 +635,7 @@ public class DocumentService {
|
||||
return saved;
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
// 0. Zuletzt aktive Dokumente (sortiert nach updatedAt DESC)
|
||||
public List<Document> getRecentActivity(int size) {
|
||||
return documentRepository.findAll(
|
||||
PageRequest.of(0, size, Sort.by(Sort.Direction.DESC, "updatedAt"))
|
||||
@@ -755,7 +735,7 @@ public class DocumentService {
|
||||
return DocumentSearchResult.paged(enrichItems(slice, text), pageable, totalElements);
|
||||
}
|
||||
|
||||
private List<DocumentListItem> enrichItems(List<Document> documents, String text) {
|
||||
private List<DocumentSearchItem> enrichItems(List<Document> documents, String text) {
|
||||
List<Document> colorResolved = resolveDocumentTagColors(documents);
|
||||
Map<UUID, SearchMatchData> matchData = enrichWithMatchData(colorResolved, text);
|
||||
|
||||
@@ -763,7 +743,7 @@ public class DocumentService {
|
||||
Map<UUID, Integer> completionByDoc = fetchCompletionPercentages(docIds);
|
||||
Map<UUID, List<ActivityActorDTO>> contributorsByDoc = auditLogQueryService.findRecentContributorsPerDocument(docIds);
|
||||
|
||||
return colorResolved.stream().map(doc -> toListItem(
|
||||
return colorResolved.stream().map(doc -> new DocumentSearchItem(
|
||||
doc,
|
||||
matchData.getOrDefault(doc.getId(), SearchMatchData.empty()),
|
||||
completionByDoc.getOrDefault(doc.getId(), 0),
|
||||
@@ -771,28 +751,6 @@ public class DocumentService {
|
||||
)).toList();
|
||||
}
|
||||
|
||||
private DocumentListItem toListItem(Document doc, SearchMatchData match, int completionPct, List<ActivityActorDTO> contributors) {
|
||||
return new DocumentListItem(
|
||||
doc.getId(),
|
||||
doc.getTitle(),
|
||||
doc.getOriginalFilename(),
|
||||
doc.getThumbnailUrl(),
|
||||
doc.getDocumentDate(),
|
||||
doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(),
|
||||
doc.getSender(),
|
||||
List.copyOf(doc.getReceivers()),
|
||||
List.copyOf(doc.getTags()),
|
||||
doc.getArchiveBox(),
|
||||
doc.getArchiveFolder(),
|
||||
doc.getLocation(),
|
||||
doc.getSummary(),
|
||||
completionPct,
|
||||
contributors,
|
||||
match
|
||||
);
|
||||
}
|
||||
|
||||
private Map<UUID, Integer> fetchCompletionPercentages(List<UUID> docIds) {
|
||||
return transcriptionBlockQueryService.getCompletionStats(docIds);
|
||||
}
|
||||
@@ -885,7 +843,6 @@ public class DocumentService {
|
||||
documentRepository.save(doc);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public Document getDocumentById(UUID id) {
|
||||
Document doc = documentRepository.findById(id)
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.DOCUMENT_NOT_FOUND, "Document not found: " + id));
|
||||
|
||||
@@ -11,11 +11,6 @@ import org.raddatz.familienarchiv.ocr.ScriptType;
|
||||
public class DocumentUpdateDTO {
|
||||
private String title;
|
||||
private LocalDate documentDate;
|
||||
private DatePrecision metaDatePrecision;
|
||||
private LocalDate metaDateEnd;
|
||||
private String metaDateRaw;
|
||||
private String senderText;
|
||||
private String receiverText;
|
||||
private String location;
|
||||
private String documentLocation;
|
||||
private String archiveBox;
|
||||
|
||||
@@ -43,7 +43,7 @@ public class TranscriptionBlockController {
|
||||
|
||||
@PostMapping
|
||||
@ResponseStatus(HttpStatus.CREATED)
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock createBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@Valid @RequestBody CreateTranscriptionBlockDTO dto,
|
||||
@@ -53,7 +53,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/{blockId}")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock updateBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId,
|
||||
@@ -65,7 +65,7 @@ public class TranscriptionBlockController {
|
||||
|
||||
@DeleteMapping("/{blockId}")
|
||||
@ResponseStatus(HttpStatus.NO_CONTENT)
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public void deleteBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId) {
|
||||
@@ -73,7 +73,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/reorder")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public List<TranscriptionBlock> reorderBlocks(
|
||||
@PathVariable UUID documentId,
|
||||
@RequestBody ReorderTranscriptionBlocksDTO dto) {
|
||||
@@ -82,7 +82,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/{blockId}/review")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock reviewBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId,
|
||||
@@ -92,7 +92,7 @@ public class TranscriptionBlockController {
|
||||
}
|
||||
|
||||
@PutMapping("/review-all")
|
||||
@RequirePermission({Permission.ANNOTATE_ALL, Permission.WRITE_ALL})
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public List<TranscriptionBlock> markAllBlocksReviewed(
|
||||
@PathVariable UUID documentId,
|
||||
Authentication authentication) {
|
||||
|
||||
@@ -10,21 +10,11 @@ public class DomainException extends RuntimeException {
|
||||
|
||||
private final ErrorCode code;
|
||||
private final HttpStatus status;
|
||||
/** Seconds until the rate-limit window resets; {@code null} when not applicable. */
|
||||
private final Long retryAfterSeconds;
|
||||
|
||||
public DomainException(ErrorCode code, HttpStatus status, String developerMessage) {
|
||||
super(developerMessage);
|
||||
this.code = code;
|
||||
this.status = status;
|
||||
this.retryAfterSeconds = null;
|
||||
}
|
||||
|
||||
private DomainException(ErrorCode code, HttpStatus status, String developerMessage, Long retryAfterSeconds) {
|
||||
super(developerMessage);
|
||||
this.code = code;
|
||||
this.status = status;
|
||||
this.retryAfterSeconds = retryAfterSeconds;
|
||||
}
|
||||
|
||||
public ErrorCode getCode() {
|
||||
@@ -35,11 +25,6 @@ public class DomainException extends RuntimeException {
|
||||
return status;
|
||||
}
|
||||
|
||||
/** Returns the {@code Retry-After} value in seconds, or {@code null} if not set. */
|
||||
public Long getRetryAfterSeconds() {
|
||||
return retryAfterSeconds;
|
||||
}
|
||||
|
||||
// --- Static factories for common cases ---
|
||||
|
||||
public static DomainException notFound(ErrorCode code, String message) {
|
||||
@@ -74,8 +59,4 @@ public class DomainException extends RuntimeException {
|
||||
public static DomainException tooManyRequests(ErrorCode code, String message) {
|
||||
return new DomainException(code, HttpStatus.TOO_MANY_REQUESTS, message);
|
||||
}
|
||||
|
||||
public static DomainException tooManyRequests(ErrorCode code, String message, long retryAfterSeconds) {
|
||||
return new DomainException(code, HttpStatus.TOO_MANY_REQUESTS, message, retryAfterSeconds);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,8 +40,6 @@ public enum ErrorCode {
|
||||
// --- Import ---
|
||||
/** A mass import is already in progress; only one can run at a time. 409 */
|
||||
IMPORT_ALREADY_RUNNING,
|
||||
/** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
|
||||
IMPORT_ARTIFACT_INVALID,
|
||||
|
||||
// --- Thumbnails ---
|
||||
/** A thumbnail backfill is already in progress; only one can run at a time. 409 */
|
||||
|
||||
@@ -23,11 +23,9 @@ public class GlobalExceptionHandler {
|
||||
|
||||
@ExceptionHandler(DomainException.class)
|
||||
public ResponseEntity<ErrorResponse> handleDomain(DomainException ex) {
|
||||
var builder = ResponseEntity.status(ex.getStatus());
|
||||
if (ex.getRetryAfterSeconds() != null) {
|
||||
builder = builder.header("Retry-After", String.valueOf(ex.getRetryAfterSeconds()));
|
||||
}
|
||||
return builder.body(new ErrorResponse(ex.getCode(), ex.getMessage()));
|
||||
return ResponseEntity
|
||||
.status(ex.getStatus())
|
||||
.body(new ErrorResponse(ex.getCode(), ex.getMessage()));
|
||||
}
|
||||
|
||||
@ExceptionHandler(MethodArgumentNotValidException.class)
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Runs the four canonical loaders in their real dependency order — encoded explicitly
|
||||
* here, not implied by call order — and owns the async runner plus the {@link ImportStatus}
|
||||
* state machine the admin UI consumes. The orchestrator smoke-checks that all four
|
||||
* artifacts are present before starting, failing fast rather than half-loading tags but no
|
||||
* documents. A malformed artifact (a loader throwing) sets {@code FAILED}; an individual
|
||||
* bad file is surfaced through the {@link ImportStatus.SkippedFile} mechanism instead.
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class CanonicalImportOrchestrator {
|
||||
|
||||
private static final String TAG_TREE_ARTIFACT = "canonical-tag-tree.xlsx";
|
||||
private static final String PERSONS_ARTIFACT = "canonical-persons.xlsx";
|
||||
private static final String PERSONS_TREE_ARTIFACT = "canonical-persons-tree.json";
|
||||
private static final String DOCUMENTS_ARTIFACT = "canonical-documents.xlsx";
|
||||
|
||||
private final TagTreeImporter tagTreeImporter;
|
||||
private final PersonRegisterImporter personRegisterImporter;
|
||||
private final PersonTreeImporter personTreeImporter;
|
||||
private final DocumentImporter documentImporter;
|
||||
|
||||
@Value("${app.import.dir:/import}")
|
||||
private String canonicalDir;
|
||||
|
||||
private volatile ImportStatus currentStatus = new ImportStatus(
|
||||
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||
|
||||
public ImportStatus getStatus() {
|
||||
return currentStatus;
|
||||
}
|
||||
|
||||
@Async
|
||||
public void runImportAsync() {
|
||||
if (currentStatus.state() == ImportStatus.State.RUNNING) {
|
||||
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
||||
}
|
||||
runImport();
|
||||
}
|
||||
|
||||
/** Synchronous entry point — wrapped by {@link #runImportAsync()} and called directly in tests. */
|
||||
void runImport() {
|
||||
currentStatus = new ImportStatus(ImportStatus.State.RUNNING, "IMPORT_RUNNING",
|
||||
"Import läuft...", 0, List.of(), LocalDateTime.now());
|
||||
try {
|
||||
File tagTree = requireArtifact(TAG_TREE_ARTIFACT);
|
||||
File persons = requireArtifact(PERSONS_ARTIFACT);
|
||||
File personsTree = requireArtifact(PERSONS_TREE_ARTIFACT);
|
||||
File documents = requireArtifact(DOCUMENTS_ARTIFACT);
|
||||
|
||||
// Dependency DAG: documents need persons + tags; the tree needs persons.
|
||||
tagTreeImporter.load(tagTree);
|
||||
personRegisterImporter.load(persons);
|
||||
personTreeImporter.load(personsTree);
|
||||
DocumentImporter.LoadResult result = documentImporter.load(documents);
|
||||
|
||||
currentStatus = new ImportStatus(ImportStatus.State.DONE, "IMPORT_DONE",
|
||||
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
|
||||
result.processed(), result.skippedFiles(), currentStatus.startedAt());
|
||||
} catch (DomainException e) {
|
||||
log.error("Canonical import failed: {}", e.getMessage());
|
||||
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_ARTIFACT",
|
||||
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
||||
} catch (Exception e) {
|
||||
log.error("Canonical import failed", e);
|
||||
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_INTERNAL",
|
||||
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
||||
}
|
||||
}
|
||||
|
||||
private File requireArtifact(String name) {
|
||||
File artifact = new File(canonicalDir, name);
|
||||
if (!artifact.isFile()) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Missing canonical artifact: " + name);
|
||||
}
|
||||
return artifact;
|
||||
}
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.DateUtil;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Value-level POI helper for the canonical import artifacts. No Spring, no domain
|
||||
* knowledge: it opens a workbook, maps the header row to column indices by name, and
|
||||
* yields typed rows whose cells are looked up by header name — the seam that replaces
|
||||
* the old positional {@code @Value app.import.col.*} indices. List columns are split on
|
||||
* the pipe delimiter the normalizer emits.
|
||||
*/
|
||||
public final class CanonicalSheetReader {
|
||||
|
||||
private CanonicalSheetReader() {
|
||||
}
|
||||
|
||||
/** A single data row, addressable by canonical header name (never by index). */
|
||||
public static final class Row {
|
||||
|
||||
private final Map<String, Integer> headerIndex;
|
||||
private final List<String> cells;
|
||||
|
||||
private Row(Map<String, Integer> headerIndex, List<String> cells) {
|
||||
this.headerIndex = headerIndex;
|
||||
this.cells = cells;
|
||||
}
|
||||
|
||||
/** Trimmed cell value for the named header, or "" when absent/blank. */
|
||||
public String get(String header) {
|
||||
Integer index = headerIndex.get(header);
|
||||
if (index == null || index >= cells.size()) return "";
|
||||
String value = cells.get(index);
|
||||
return value == null ? "" : value.trim();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all data rows from the first sheet, validating that every required header is
|
||||
* present. Throws a fail-closed {@link DomainException} on a missing header so a
|
||||
* loader never silently maps the wrong column.
|
||||
*/
|
||||
public static List<Row> readRows(File file, List<String> requiredHeaders) {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
||||
|
||||
Sheet sheet = workbook.getSheetAt(0);
|
||||
org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
|
||||
Map<String, Integer> headerIndex = mapHeaders(headerRow);
|
||||
requireHeaders(file, headerIndex, requiredHeaders);
|
||||
|
||||
List<Row> rows = new ArrayList<>();
|
||||
for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
|
||||
org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
|
||||
if (poiRow == null) continue;
|
||||
rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
|
||||
}
|
||||
return rows;
|
||||
} catch (DomainException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Unreadable canonical artifact: " + file.getName());
|
||||
}
|
||||
}
|
||||
|
||||
/** Splits a pipe-delimited list column into trimmed, non-empty segments. */
|
||||
public static List<String> splitList(String raw) {
|
||||
if (raw == null || raw.isBlank()) return List.of();
|
||||
return Arrays.stream(raw.split("\\|"))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.toList();
|
||||
}
|
||||
|
||||
private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
|
||||
if (headerRow == null) {
|
||||
return Map.of();
|
||||
}
|
||||
Map<String, Integer> headerIndex = new HashMap<>();
|
||||
for (int c = 0; c < headerRow.getLastCellNum(); c++) {
|
||||
String name = cellToString(headerRow.getCell(c)).trim();
|
||||
if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
|
||||
}
|
||||
return headerIndex;
|
||||
}
|
||||
|
||||
private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
|
||||
for (String header : requiredHeaders) {
|
||||
if (!headerIndex.containsKey(header)) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Missing required header '" + header + "' in artifact " + file.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
|
||||
int width = Math.max(columnCount, poiRow.getLastCellNum());
|
||||
List<String> cells = new ArrayList<>(width);
|
||||
for (int c = 0; c < width; c++) {
|
||||
cells.add(cellToString(poiRow.getCell(c)));
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
private static String cellToString(Cell cell) {
|
||||
if (cell == null) return "";
|
||||
return switch (cell.getCellType()) {
|
||||
case STRING -> cell.getStringCellValue();
|
||||
case NUMERIC -> {
|
||||
if (DateUtil.isCellDateFormatted(cell)) {
|
||||
yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
|
||||
}
|
||||
yield String.valueOf((long) cell.getNumericCellValue());
|
||||
}
|
||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
||||
default -> "";
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,354 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
||||
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-documents.xlsx} into the document domain. Java performs no
|
||||
* semantic transformation: the normalizer already resolved people to slugs and dates to
|
||||
* ISO values. This loader maps columns by header name, routes each attribution
|
||||
* register-first (always retaining the raw cell in {@code sender_text}/{@code receiver_text}),
|
||||
* parses clean dates, and keeps the file/S3/thumbnail plumbing.
|
||||
*
|
||||
* <p>The {@code file} value is hostile input regardless of upstream trust (CWE-22 does not
|
||||
* care that it came from our Python tool): its basename is validated with
|
||||
* {@link #isValidImportFilename} and then resolved with canonical-path containment in
|
||||
* {@link #findFileRecursive}.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DocumentImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of(
|
||||
"index", "file", "sender_person_id", "sender_name",
|
||||
"receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision");
|
||||
|
||||
private final DocumentService documentService;
|
||||
private final PersonService personService;
|
||||
private final TagService tagService;
|
||||
private final S3Client s3Client;
|
||||
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
|
||||
@Value("${app.s3.bucket:familienarchiv}")
|
||||
private String bucketName;
|
||||
|
||||
@Value("${app.import.dir:/import}")
|
||||
private String importDir;
|
||||
|
||||
/** Outcome of loading the document sheet: processed count + per-file skips. */
|
||||
public record LoadResult(int processed, List<ImportStatus.SkippedFile> skippedFiles) {}
|
||||
|
||||
// One transaction for the whole sheet keeps the Hibernate session open so an existing
|
||||
// document's lazy receivers collection initialises during an idempotent re-import.
|
||||
// Invoked cross-bean from the orchestrator, so the @Transactional proxy applies.
|
||||
@Transactional
|
||||
public LoadResult load(File artifact) {
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
|
||||
int processed = 0;
|
||||
List<ImportStatus.SkippedFile> skipped = new ArrayList<>();
|
||||
for (CanonicalSheetReader.Row row : rows) {
|
||||
String index = row.get("index");
|
||||
if (index.isBlank()) continue;
|
||||
Optional<ImportStatus.SkipReason> skipReason = importRow(row, index, skipped);
|
||||
if (skipReason.isPresent()) {
|
||||
skipped.add(new ImportStatus.SkippedFile(displayName(row, index), skipReason.get()));
|
||||
} else {
|
||||
processed++;
|
||||
}
|
||||
}
|
||||
log.info("Imported {} documents from {} ({} skipped)", processed, artifact.getName(), skipped.size());
|
||||
return new LoadResult(processed, skipped);
|
||||
}
|
||||
|
||||
private Optional<ImportStatus.SkipReason> importRow(CanonicalSheetReader.Row row, String index,
|
||||
List<ImportStatus.SkippedFile> skipped) {
|
||||
Optional<File> resolved;
|
||||
try {
|
||||
resolved = resolveFile(row.get("file"));
|
||||
} catch (InvalidImportFilenameException e) {
|
||||
log.warn("Skipping import row {}: filename rejected", index);
|
||||
return Optional.of(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
|
||||
}
|
||||
if (resolved.isPresent()) {
|
||||
try {
|
||||
if (!isPdfMagicBytes(resolved.get())) {
|
||||
return Optional.of(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Magic-byte check failed for row {}", index, e);
|
||||
return Optional.of(ImportStatus.SkipReason.FILE_READ_ERROR);
|
||||
}
|
||||
}
|
||||
return persist(row, index, resolved);
|
||||
}
|
||||
|
||||
private Optional<ImportStatus.SkipReason> persist(CanonicalSheetReader.Row row, String index, Optional<File> file) {
|
||||
Document existing = documentService.findByOriginalFilename(index).orElse(null);
|
||||
if (existing != null && existing.getStatus() != DocumentStatus.PLACEHOLDER) {
|
||||
return Optional.of(ImportStatus.SkipReason.ALREADY_EXISTS);
|
||||
}
|
||||
|
||||
String s3Key = null;
|
||||
String contentType = null;
|
||||
DocumentStatus status = DocumentStatus.PLACEHOLDER;
|
||||
if (file.isPresent()) {
|
||||
contentType = probeContentType(file.get());
|
||||
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
|
||||
try {
|
||||
uploadToS3(file.get(), s3Key, contentType);
|
||||
status = DocumentStatus.UPLOADED;
|
||||
} catch (Exception e) {
|
||||
log.error("S3 upload failed for {}", file.get().getName(), e);
|
||||
return Optional.of(ImportStatus.SkipReason.S3_UPLOAD_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
Document doc = buildDocument(row, index, existing, s3Key, contentType, status);
|
||||
Document saved = documentService.save(doc);
|
||||
if (file.isPresent()) {
|
||||
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
private Document buildDocument(CanonicalSheetReader.Row row, String index, Document existing,
|
||||
String s3Key, String contentType, DocumentStatus status) {
|
||||
Document doc = existing != null ? existing
|
||||
: Document.builder().originalFilename(index).build();
|
||||
|
||||
String senderName = row.get("sender_name");
|
||||
String receiverNames = row.get("receiver_names");
|
||||
Person sender = resolveSender(row.get("sender_person_id"), senderName);
|
||||
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"));
|
||||
|
||||
LocalDate date = parseIsoDate(row.get("date_iso"));
|
||||
DatePrecision precision = parsePrecision(row.get("date_precision"));
|
||||
LocalDate dateEnd = parseIsoDate(row.get("date_end"));
|
||||
String dateRaw = blankToNull(row.get("date_raw"));
|
||||
String location = blankToNull(row.get("location"));
|
||||
|
||||
doc.setTitle(buildTitle(index, date, precision, dateEnd, dateRaw, location));
|
||||
doc.setStatus(status);
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setSender(sender);
|
||||
doc.setSenderText(blankToNull(senderName));
|
||||
// The canonical row is authoritative for receivers/tags (ADR-025): clear then
|
||||
// re-populate so a shrunk set on re-import prunes stale links rather than
|
||||
// accumulating them. The raw sender_text/receiver_text retention is separate.
|
||||
doc.getReceivers().clear();
|
||||
doc.getReceivers().addAll(receivers);
|
||||
doc.setReceiverText(blankToNull(receiverNames));
|
||||
doc.setDocumentDate(date);
|
||||
doc.setMetaDatePrecision(precision);
|
||||
doc.setMetaDateEnd(dateEnd);
|
||||
doc.setMetaDateRaw(dateRaw);
|
||||
doc.setLocation(location);
|
||||
doc.setSummary(blankToNull(row.get("summary")));
|
||||
attachTag(doc, row.get("tags"));
|
||||
doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty());
|
||||
return doc;
|
||||
}
|
||||
|
||||
// The title carries the date at the HONEST precision (never a fabricated day) via the
|
||||
// shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating.
|
||||
private static String buildTitle(String index, LocalDate date, DatePrecision precision,
|
||||
LocalDate end, String raw, String location) {
|
||||
StringBuilder title = new StringBuilder(index);
|
||||
if (date != null && precision != DatePrecision.UNKNOWN) {
|
||||
title.append(" – ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw));
|
||||
}
|
||||
if (location != null && !location.isBlank()) {
|
||||
title.append(" – ").append(location);
|
||||
}
|
||||
return title.toString();
|
||||
}
|
||||
|
||||
// ─── attribution routing — register-first, always retain raw ─────────────────────
|
||||
|
||||
private Person resolveSender(String slug, String rawName) {
|
||||
if (slug.isBlank()) return null;
|
||||
return resolvePerson(slug, rawName);
|
||||
}
|
||||
|
||||
private Set<Person> resolveReceivers(String slugs) {
|
||||
Set<Person> receivers = new LinkedHashSet<>();
|
||||
for (String slug : CanonicalSheetReader.splitList(slugs)) {
|
||||
receivers.add(resolvePerson(slug, slug));
|
||||
}
|
||||
return receivers;
|
||||
}
|
||||
|
||||
private Person resolvePerson(String slug, String rawName) {
|
||||
return personService.findBySourceRef(slug)
|
||||
.orElseGet(() -> personService.upsertBySourceRef(PersonUpsertCommand.builder()
|
||||
.sourceRef(slug)
|
||||
.lastName(blankToNull(rawName) == null ? slug : rawName)
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(true)
|
||||
.build()));
|
||||
}
|
||||
|
||||
// Authoritative: the canonical row defines the document's tags exactly. Clearing first
|
||||
// means a tag removed from the row is pruned on re-import (ADR-025).
|
||||
private void attachTag(Document doc, String tagPath) {
|
||||
doc.getTags().clear();
|
||||
if (tagPath.isBlank()) return;
|
||||
tagService.findBySourceRef(tagPath).ifPresent(tag -> doc.getTags().add(tag));
|
||||
}
|
||||
|
||||
// ─── clean-value parsing (no semantic logic) ─────────────────────────────────────
|
||||
|
||||
private static LocalDate parseIsoDate(String value) {
|
||||
if (value == null || value.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(value.trim());
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static DatePrecision parsePrecision(String value) {
|
||||
if (value == null || value.isBlank()) return DatePrecision.UNKNOWN;
|
||||
try {
|
||||
return DatePrecision.valueOf(value.trim());
|
||||
} catch (IllegalArgumentException e) {
|
||||
return DatePrecision.UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── file handling + S3 (small ≤20-line methods) ─────────────────────────────────
|
||||
|
||||
private Optional<File> resolveFile(String fileColumn) {
|
||||
if (fileColumn == null || fileColumn.isBlank()) return Optional.empty();
|
||||
String basename = basenameOf(fileColumn);
|
||||
if (!isValidImportFilename(basename)) {
|
||||
throw new InvalidImportFilenameException();
|
||||
}
|
||||
return findFileRecursive(basename);
|
||||
}
|
||||
|
||||
private static String basenameOf(String fileColumn) {
|
||||
String normalized = fileColumn.replace('\\', '/');
|
||||
int lastSlash = normalized.lastIndexOf('/');
|
||||
return lastSlash < 0 ? normalized.trim() : normalized.substring(lastSlash + 1).trim();
|
||||
}
|
||||
|
||||
private String probeContentType(File file) {
|
||||
try {
|
||||
String probed = Files.probeContentType(file.toPath());
|
||||
return probed != null ? probed : "application/octet-stream";
|
||||
} catch (IOException e) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
}
|
||||
|
||||
private void uploadToS3(File file, String s3Key, String contentType) {
|
||||
s3Client.putObject(PutObjectRequest.builder()
|
||||
.bucket(bucketName)
|
||||
.key(s3Key)
|
||||
.contentType(contentType)
|
||||
.build(),
|
||||
RequestBody.fromFile(file));
|
||||
}
|
||||
|
||||
// ─── security guards — ported verbatim from MassImportService — do not weaken ────
|
||||
|
||||
private boolean isValidImportFilename(String filename) {
|
||||
if (filename == null || filename.isBlank()) return false;
|
||||
if (filename.contains("/")) return false;
|
||||
if (filename.contains("\\")) return false;
|
||||
if (filename.contains("∕")) return false; // U+2215 DIVISION SLASH
|
||||
if (filename.contains("/")) return false; // U+FF0F FULLWIDTH SOLIDUS
|
||||
if (filename.contains("⧵")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
|
||||
if (filename.contains("..")) return false;
|
||||
if (filename.equals(".")) return false;
|
||||
if (filename.contains("\0")) return false;
|
||||
if (Paths.get(filename).isAbsolute()) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// package-private: a Mockito spy in tests can override to inject IOException
|
||||
InputStream openFileStream(File file) throws IOException {
|
||||
return new FileInputStream(file);
|
||||
}
|
||||
|
||||
private boolean isPdfMagicBytes(File file) throws IOException {
|
||||
try (InputStream is = openFileStream(file)) {
|
||||
byte[] header = is.readNBytes(4);
|
||||
return header.length == 4
|
||||
&& header[0] == 0x25 // %
|
||||
&& header[1] == 0x50 // P
|
||||
&& header[2] == 0x44 // D
|
||||
&& header[3] == 0x46; // F
|
||||
}
|
||||
}
|
||||
|
||||
private Optional<File> findFileRecursive(String filename) {
|
||||
File baseDir = new File(importDir);
|
||||
try (Stream<Path> walk = Files.walk(baseDir.toPath())) {
|
||||
Optional<Path> match = walk.filter(p -> !Files.isDirectory(p))
|
||||
.filter(p -> p.getFileName().toString().equals(filename))
|
||||
.findFirst();
|
||||
if (match.isEmpty()) return Optional.empty();
|
||||
File candidate = match.get().toFile();
|
||||
String baseDirCanonical = baseDir.getCanonicalPath();
|
||||
if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
|
||||
throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
|
||||
}
|
||||
return Optional.of(candidate);
|
||||
} catch (IOException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
private static String displayName(CanonicalSheetReader.Row row, String index) {
|
||||
String file = row.get("file");
|
||||
return file.isBlank() ? index : basenameOf(file);
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s;
|
||||
}
|
||||
|
||||
private static final class InvalidImportFilenameException extends RuntimeException {
|
||||
}
|
||||
}
|
||||
@@ -1,112 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Produces the honest German date label baked into an import title — at exactly
|
||||
* the precision the data claims, never finer. This is the Java half of the
|
||||
* single source of truth shared with the frontend {@code formatDocumentDate}
|
||||
* (TypeScript): both are asserted against {@code docs/date-label-fixtures.json}
|
||||
* so the two implementations cannot drift (see #666).
|
||||
*
|
||||
* <p>Import titles are always German, so the labels here are the German
|
||||
* canonical form (mirroring the {@code de} Paraglide messages used by the UI).
|
||||
*/
|
||||
final class DocumentTitleFormatter {
|
||||
|
||||
private static final DateTimeFormatter LONG = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
||||
private static final DateTimeFormatter MONTH_YEAR = DateTimeFormatter.ofPattern("MMMM yyyy", Locale.GERMAN);
|
||||
private static final DateTimeFormatter MEDIUM = DateTimeFormatter.ofPattern("d. MMM yyyy", Locale.GERMAN);
|
||||
private static final DateTimeFormatter DAY_MONTH = DateTimeFormatter.ofPattern("d. MMM", Locale.GERMAN);
|
||||
|
||||
private static final String UNKNOWN = "Datum unbekannt";
|
||||
private static final String APPROX_PREFIX = "ca.";
|
||||
private static final String OPEN_RANGE_PREFIX = "ab";
|
||||
|
||||
private DocumentTitleFormatter() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param date the sort/filter anchor day; null for UNKNOWN rows
|
||||
* @param precision descriptive precision metadata
|
||||
* @param end the RANGE end day; null means an open-ended range
|
||||
* @param raw the verbatim spreadsheet cell, used only to pick a season word
|
||||
* @return the honest German label
|
||||
*/
|
||||
static String formatTitleDate(LocalDate date, DatePrecision precision, LocalDate end, String raw) {
|
||||
if (precision == DatePrecision.UNKNOWN || date == null) {
|
||||
return UNKNOWN;
|
||||
}
|
||||
return switch (precision) {
|
||||
case DAY -> LONG.format(date);
|
||||
case MONTH -> MONTH_YEAR.format(date);
|
||||
case SEASON -> seasonLabel(date, raw);
|
||||
case YEAR -> String.valueOf(date.getYear());
|
||||
case APPROX -> APPROX_PREFIX + " " + date.getYear();
|
||||
case RANGE -> rangeLabel(date, end);
|
||||
case UNKNOWN -> UNKNOWN;
|
||||
};
|
||||
}
|
||||
|
||||
private static String seasonLabel(LocalDate date, String raw) {
|
||||
Season season = seasonFromRaw(raw);
|
||||
if (season == null) {
|
||||
season = seasonOfMonth(date.getMonthValue());
|
||||
}
|
||||
return season.german + " " + date.getYear();
|
||||
}
|
||||
|
||||
private static String rangeLabel(LocalDate start, LocalDate end) {
|
||||
if (end == null) {
|
||||
return OPEN_RANGE_PREFIX + " " + MEDIUM.format(start);
|
||||
}
|
||||
if (end.equals(start)) {
|
||||
return MEDIUM.format(start);
|
||||
}
|
||||
if (start.getYear() != end.getYear()) {
|
||||
return MEDIUM.format(start) + " – " + MEDIUM.format(end);
|
||||
}
|
||||
if (start.getMonthValue() == end.getMonthValue()) {
|
||||
return start.getDayOfMonth() + ".–" + MEDIUM.format(end);
|
||||
}
|
||||
return DAY_MONTH.format(start) + " – " + MEDIUM.format(end);
|
||||
}
|
||||
|
||||
// ─── season mapping — mirrors the normalizer's representative months ─────────────
|
||||
|
||||
private enum Season {
|
||||
SPRING("Frühling"),
|
||||
SUMMER("Sommer"),
|
||||
AUTUMN("Herbst"),
|
||||
WINTER("Winter");
|
||||
|
||||
private final String german;
|
||||
|
||||
Season(String german) {
|
||||
this.german = german;
|
||||
}
|
||||
}
|
||||
|
||||
private static Season seasonOfMonth(int month) {
|
||||
if (month >= 3 && month <= 5) return Season.SPRING;
|
||||
if (month >= 6 && month <= 8) return Season.SUMMER;
|
||||
if (month >= 9 && month <= 11) return Season.AUTUMN;
|
||||
return Season.WINTER;
|
||||
}
|
||||
|
||||
private static Season seasonFromRaw(String raw) {
|
||||
if (raw == null || raw.isBlank()) return null;
|
||||
String token = raw.trim().split("\\s+")[0].toLowerCase(Locale.GERMAN);
|
||||
return switch (token) {
|
||||
case "frühling", "frühjahr" -> Season.SPRING;
|
||||
case "sommer" -> Season.SUMMER;
|
||||
case "herbst" -> Season.AUTUMN;
|
||||
case "winter" -> Season.WINTER;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Async import state surfaced to {@code admin/system/ImportStatusCard.svelte} via the
|
||||
* generated types. The shape ({@code state, statusCode, processed, skippedFiles, skipped})
|
||||
* is kept verbatim from the retired MassImportService so the admin UI keeps working.
|
||||
*/
|
||||
public record ImportStatus(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
|
||||
@JsonIgnore String message,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) List<SkippedFile> skippedFiles,
|
||||
LocalDateTime startedAt
|
||||
) {
|
||||
|
||||
public enum State { IDLE, RUNNING, DONE, FAILED }
|
||||
|
||||
public enum SkipReason {
|
||||
INVALID_FILENAME_PATH_TRAVERSAL,
|
||||
INVALID_PDF_SIGNATURE,
|
||||
FILE_READ_ERROR,
|
||||
ALREADY_EXISTS,
|
||||
S3_UPLOAD_FAILED
|
||||
}
|
||||
|
||||
public record SkippedFile(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
|
||||
) {}
|
||||
|
||||
// Note: @Schema on a record accessor method is not picked up by SpringDoc; the
|
||||
// "skipped" count is a computed convenience field derived from skippedFiles.size().
|
||||
@JsonProperty("skipped")
|
||||
public int skipped() {
|
||||
return skippedFiles.size();
|
||||
}
|
||||
|
||||
/** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
|
||||
public ImportStatus {
|
||||
skippedFiles = List.copyOf(skippedFiles);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,402 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import java.util.Objects;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonNameParser;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class MassImportService {
|
||||
|
||||
public enum State { IDLE, RUNNING, DONE, FAILED }
|
||||
|
||||
public record ImportStatus(State state, String statusCode, @JsonIgnore String message, int processed, LocalDateTime startedAt) {}
|
||||
|
||||
private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
|
||||
|
||||
public ImportStatus getStatus() {
|
||||
return currentStatus;
|
||||
}
|
||||
|
||||
private final DocumentService documentService;
|
||||
private final PersonService personService;
|
||||
private final TagService tagService;
|
||||
private final S3Client s3Client;
|
||||
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
|
||||
@Value("${app.s3.bucket}")
|
||||
private String bucketName;
|
||||
|
||||
@Value("${app.import.col.index:0}")
|
||||
private int colIndex;
|
||||
|
||||
@Value("${app.import.col.box:1}")
|
||||
private int colBox;
|
||||
|
||||
@Value("${app.import.col.folder:2}")
|
||||
private int colFolder;
|
||||
|
||||
@Value("${app.import.col.sender:3}")
|
||||
private int colSender;
|
||||
|
||||
@Value("${app.import.col.receivers:5}")
|
||||
private int colReceivers;
|
||||
|
||||
@Value("${app.import.col.date:7}")
|
||||
private int colDate;
|
||||
|
||||
@Value("${app.import.col.location:9}")
|
||||
private int colLocation;
|
||||
|
||||
@Value("${app.import.col.tags:10}")
|
||||
private int colTags;
|
||||
|
||||
@Value("${app.import.col.summary:11}")
|
||||
private int colSummary;
|
||||
|
||||
@Value("${app.import.col.transcription:13}")
|
||||
private int colTranscription;
|
||||
|
||||
@Value("${app.import.dir:/import}")
|
||||
private String importDir;
|
||||
|
||||
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
||||
|
||||
// ODS XML namespaces
|
||||
private static final String NS_TABLE = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
|
||||
private static final String NS_TEXT = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
|
||||
|
||||
// We only need up to this many columns; caps repeated-empty-cell expansion
|
||||
private static final int MAX_COLS = 20;
|
||||
|
||||
@Async
|
||||
public void runImportAsync() {
|
||||
if (currentStatus.state() == State.RUNNING) {
|
||||
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
||||
}
|
||||
currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, LocalDateTime.now());
|
||||
try {
|
||||
File spreadsheet = findSpreadsheetFile();
|
||||
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
|
||||
int processed = processRows(readSpreadsheet(spreadsheet));
|
||||
currentStatus = new ImportStatus(State.DONE, "IMPORT_DONE",
|
||||
"Import abgeschlossen. " + processed + " Dokumente verarbeitet.",
|
||||
processed, currentStatus.startedAt());
|
||||
} catch (NoSpreadsheetException e) {
|
||||
log.error("Massenimport fehlgeschlagen: keine Tabellendatei", e);
|
||||
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_NO_SPREADSHEET",
|
||||
"Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
|
||||
} catch (Exception e) {
|
||||
log.error("Massenimport fehlgeschlagen", e);
|
||||
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_INTERNAL",
|
||||
"Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
|
||||
}
|
||||
}
|
||||
|
||||
private static class NoSpreadsheetException extends RuntimeException {
|
||||
NoSpreadsheetException(String message) { super(message); }
|
||||
}
|
||||
|
||||
private File findSpreadsheetFile() throws IOException {
|
||||
try (Stream<Path> files = Files.list(Paths.get(importDir))) {
|
||||
return files
|
||||
.filter(p -> {
|
||||
String name = p.toString().toLowerCase();
|
||||
return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
|
||||
})
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new NoSpreadsheetException(
|
||||
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
|
||||
.toFile();
|
||||
}
|
||||
}
|
||||
|
||||
// --- Spreadsheet reading (format-specific, produces neutral List<List<String>>) ---
|
||||
|
||||
private List<List<String>> readSpreadsheet(File file) throws Exception {
|
||||
String name = file.getName().toLowerCase();
|
||||
if (name.endsWith(".ods")) {
|
||||
return readOds(file);
|
||||
}
|
||||
return readXlsx(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an ODS file by parsing its content.xml directly (no extra library needed).
|
||||
* ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
|
||||
*/
|
||||
List<List<String>> readOds(File file) throws Exception {
|
||||
List<List<String>> result = new ArrayList<>();
|
||||
|
||||
try (ZipFile zip = new ZipFile(file)) {
|
||||
var entry = zip.getEntry("content.xml");
|
||||
if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
|
||||
|
||||
var factory = XxeSafeXmlParser.hardenedFactory();
|
||||
factory.setNamespaceAware(true);
|
||||
var builder = factory.newDocumentBuilder();
|
||||
var doc = builder.parse(zip.getInputStream(entry));
|
||||
|
||||
NodeList tables = doc.getElementsByTagNameNS(NS_TABLE, "table");
|
||||
if (tables.getLength() == 0) return result;
|
||||
|
||||
var table = (Element) tables.item(0);
|
||||
NodeList rows = table.getElementsByTagNameNS(NS_TABLE, "table-row");
|
||||
|
||||
for (int i = 0; i < rows.getLength(); i++) {
|
||||
var row = (Element) rows.item(i);
|
||||
List<String> rowData = new ArrayList<>();
|
||||
NodeList cells = row.getElementsByTagNameNS(NS_TABLE, "table-cell");
|
||||
|
||||
for (int j = 0; j < cells.getLength() && rowData.size() < MAX_COLS; j++) {
|
||||
var cell = (Element) cells.item(j);
|
||||
|
||||
// Read the display text (first <text:p>)
|
||||
String value = "";
|
||||
NodeList textNodes = cell.getElementsByTagNameNS(NS_TEXT, "p");
|
||||
if (textNodes.getLength() > 0) {
|
||||
value = textNodes.item(0).getTextContent().trim();
|
||||
}
|
||||
|
||||
// Expand number-columns-repeated (capped at MAX_COLS)
|
||||
String repeatAttr = cell.getAttributeNS(NS_TABLE, "number-columns-repeated");
|
||||
int repeat = repeatAttr.isEmpty() ? 1 : Integer.parseInt(repeatAttr);
|
||||
repeat = Math.min(repeat, MAX_COLS - rowData.size());
|
||||
|
||||
for (int r = 0; r < repeat; r++) {
|
||||
rowData.add(value);
|
||||
}
|
||||
}
|
||||
result.add(rowData);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Reads an XLSX/XLS file using Apache POI. Converts all cells to strings. */
|
||||
private List<List<String>> readXlsx(File file) throws Exception {
|
||||
List<List<String>> result = new ArrayList<>();
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
||||
|
||||
Sheet sheet = workbook.getSheetAt(0);
|
||||
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
|
||||
Row row = sheet.getRow(i);
|
||||
List<String> rowData = new ArrayList<>();
|
||||
if (row != null) {
|
||||
for (int j = 0; j < MAX_COLS; j++) {
|
||||
rowData.add(xlsxCellToString(row.getCell(j)));
|
||||
}
|
||||
}
|
||||
result.add(rowData);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private String xlsxCellToString(Cell cell) {
|
||||
if (cell == null) return "";
|
||||
return switch (cell.getCellType()) {
|
||||
case STRING -> cell.getStringCellValue();
|
||||
case NUMERIC -> {
|
||||
if (DateUtil.isCellDateFormatted(cell)) {
|
||||
yield cell.getLocalDateTimeCellValue().toLocalDate().toString(); // ISO
|
||||
}
|
||||
yield String.valueOf((int) cell.getNumericCellValue());
|
||||
}
|
||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
||||
default -> "";
|
||||
};
|
||||
}
|
||||
|
||||
// --- Import logic (works on neutral List<String> rows) ---
|
||||
|
||||
private int processRows(List<List<String>> rows) {
|
||||
int count = 0;
|
||||
for (int i = 1; i < rows.size(); i++) { // skip header row
|
||||
List<String> cells = rows.get(i);
|
||||
String index = getCell(cells, colIndex);
|
||||
if (index.isBlank()) continue;
|
||||
|
||||
String filename = index.contains(".") ? index : index + ".pdf";
|
||||
Optional<File> fileOnDisk = findFileRecursive(filename);
|
||||
if (fileOnDisk.isEmpty()) {
|
||||
log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename);
|
||||
}
|
||||
importSingleDocument(cells, fileOnDisk, filename, index);
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
protected void importSingleDocument(List<String> cells, Optional<File> file, String originalFilename, String index) {
|
||||
Optional<Document> existing = documentService.findByOriginalFilename(originalFilename);
|
||||
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
|
||||
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
|
||||
return;
|
||||
}
|
||||
|
||||
String archiveBox = getCell(cells, colBox);
|
||||
String archiveFolder = getCell(cells, colFolder);
|
||||
String senderRaw = getCell(cells, colSender);
|
||||
String receiversRaw = getCell(cells, colReceivers);
|
||||
LocalDate date = parseDate(getCell(cells, colDate));
|
||||
String location = getCell(cells, colLocation);
|
||||
String tagRaw = getCell(cells, colTags);
|
||||
String summary = getCell(cells, colSummary);
|
||||
String transcription = getCell(cells, colTranscription);
|
||||
|
||||
String s3Key = null;
|
||||
String contentType = null;
|
||||
DocumentStatus status = DocumentStatus.PLACEHOLDER;
|
||||
|
||||
if (file.isPresent()) {
|
||||
try {
|
||||
contentType = Files.probeContentType(file.get().toPath());
|
||||
} catch (IOException e) {
|
||||
contentType = null;
|
||||
}
|
||||
if (contentType == null) contentType = "application/octet-stream";
|
||||
|
||||
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
|
||||
try {
|
||||
s3Client.putObject(PutObjectRequest.builder()
|
||||
.bucket(bucketName)
|
||||
.key(s3Key)
|
||||
.contentType(contentType)
|
||||
.build(),
|
||||
RequestBody.fromFile(file.get()));
|
||||
status = DocumentStatus.UPLOADED;
|
||||
} catch (Exception e) {
|
||||
log.error("S3 Upload Fehler für {}", file.get().getName(), e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Person sender = senderRaw.isBlank() ? null : findOrCreatePerson(senderRaw);
|
||||
List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
|
||||
.map(this::findOrCreatePerson)
|
||||
.filter(Objects::nonNull)
|
||||
.toList();
|
||||
|
||||
Tag tag = null;
|
||||
if (!tagRaw.isBlank()) {
|
||||
tag = tagService.findOrCreate(tagRaw);
|
||||
}
|
||||
|
||||
Document doc = existing.orElse(Document.builder()
|
||||
.originalFilename(originalFilename)
|
||||
.build());
|
||||
|
||||
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
|
||||
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
|
||||
|
||||
doc.setTitle(buildTitle(index, date, location));
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setStatus(status);
|
||||
doc.setArchiveBox(archiveBox.isBlank() ? null : archiveBox);
|
||||
doc.setArchiveFolder(archiveFolder.isBlank() ? null : archiveFolder);
|
||||
doc.setDocumentDate(date);
|
||||
doc.setLocation(location.isBlank() ? null : location);
|
||||
doc.setSummary(summary.isBlank() ? null : summary);
|
||||
doc.setTranscription(transcription.isBlank() ? null : transcription);
|
||||
doc.setSender(sender);
|
||||
doc.getReceivers().addAll(receivers);
|
||||
if (tag != null) doc.getTags().add(tag);
|
||||
doc.setMetadataComplete(metadataComplete);
|
||||
|
||||
Document saved = documentService.save(doc);
|
||||
if (file.isPresent()) {
|
||||
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
|
||||
}
|
||||
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
private String getCell(List<String> cells, int col) {
|
||||
if (col >= cells.size()) return "";
|
||||
String val = cells.get(col);
|
||||
return val == null ? "" : val.trim();
|
||||
}
|
||||
|
||||
private LocalDate parseDate(String value) {
|
||||
if (value == null || value.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(value.trim());
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private String buildTitle(String index, LocalDate date, String location) {
|
||||
StringBuilder sb = new StringBuilder(index);
|
||||
if (date != null) {
|
||||
sb.append(" \u2013 ").append(date.format(GERMAN_DATE));
|
||||
}
|
||||
if (location != null && !location.isBlank()) {
|
||||
sb.append(" \u2013 ").append(location);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private Person findOrCreatePerson(String rawName) {
|
||||
return personService.findOrCreateByAlias(rawName);
|
||||
}
|
||||
|
||||
private Optional<File> findFileRecursive(String filename) {
|
||||
try (Stream<Path> walk = Files.walk(Paths.get(importDir))) {
|
||||
return walk.filter(p -> !Files.isDirectory(p))
|
||||
.filter(p -> p.getFileName().toString().equals(filename))
|
||||
.map(Path::toFile)
|
||||
.findFirst();
|
||||
} catch (IOException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-persons.xlsx} (the register) into the person domain via
|
||||
* {@link PersonService}, upserting each person by the normalizer {@code person_id}
|
||||
* (source_ref). Register persons are confident identities, so {@code provisional} is
|
||||
* driven by the sheet's already-clean value (normally {@code False}).
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class PersonRegisterImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional");
|
||||
|
||||
private final PersonService personService;
|
||||
|
||||
public int load(File artifact) {
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
|
||||
int processed = 0;
|
||||
for (CanonicalSheetReader.Row row : rows) {
|
||||
String personId = row.get("person_id");
|
||||
if (personId.isBlank()) continue;
|
||||
personService.upsertBySourceRef(toCommand(row, personId));
|
||||
processed++;
|
||||
}
|
||||
log.info("Imported {} register persons from {}", processed, artifact.getName());
|
||||
return processed;
|
||||
}
|
||||
|
||||
private PersonUpsertCommand toCommand(CanonicalSheetReader.Row row, String personId) {
|
||||
return PersonUpsertCommand.builder()
|
||||
.sourceRef(personId)
|
||||
.lastName(blankToNull(row.get("last_name")))
|
||||
.firstName(blankToNull(row.get("first_name")))
|
||||
.maidenName(blankToNull(row.get("maiden_name")))
|
||||
.notes(blankToNull(row.get("notes")))
|
||||
.birthYear(yearOf(row.get("birth_date")))
|
||||
.deathYear(yearOf(row.get("death_date")))
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(Boolean.parseBoolean(row.get("provisional")))
|
||||
.build();
|
||||
}
|
||||
|
||||
private static Integer yearOf(String isoDate) {
|
||||
if (isoDate == null || isoDate.isBlank()) return null;
|
||||
try {
|
||||
return LocalDate.parse(isoDate.trim()).getYear();
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s;
|
||||
}
|
||||
}
|
||||
@@ -1,135 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationType;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationshipService;
|
||||
import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-persons-tree.json} into the person + relationship domains.
|
||||
* Tree persons are upserted via {@link PersonService} keyed on the shared
|
||||
* {@code personId} slug (which Phase 1 #670 now emits into the tree), so they reconcile
|
||||
* with the register rather than duplicating it. Relationships reference persons by the
|
||||
* tree's local {@code rowId}; each side is mapped to the upserted person's UUID and
|
||||
* created through {@link RelationshipService} (never the relationship repository —
|
||||
* layering rule). A duplicate relationship on re-import is swallowed for idempotency.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class PersonTreeImporter {
|
||||
|
||||
// The tree JSON is a local implementation detail, not a shared API payload, so the
|
||||
// importer owns its own mapper rather than depending on the web ObjectMapper bean.
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private final PersonService personService;
|
||||
private final RelationshipService relationshipService;
|
||||
|
||||
public int load(File artifact) {
|
||||
JsonNode root = readTree(artifact);
|
||||
Map<String, UUID> idByRowId = upsertPersons(root.path("persons"));
|
||||
int relationships = createRelationships(root.path("relationships"), idByRowId);
|
||||
log.info("Imported {} tree persons and {} relationships from {}",
|
||||
idByRowId.size(), relationships, artifact.getName());
|
||||
return idByRowId.size();
|
||||
}
|
||||
|
||||
private JsonNode readTree(File artifact) {
|
||||
try {
|
||||
return OBJECT_MAPPER.readTree(artifact);
|
||||
} catch (Exception e) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Unreadable canonical artifact: " + artifact.getName());
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, UUID> upsertPersons(JsonNode persons) {
|
||||
Map<String, UUID> idByRowId = new HashMap<>();
|
||||
for (JsonNode node : persons) {
|
||||
String personId = text(node, "personId");
|
||||
if (personId.isBlank()) continue;
|
||||
Person person = personService.upsertBySourceRef(toCommand(node, personId));
|
||||
idByRowId.put(text(node, "rowId"), person.getId());
|
||||
}
|
||||
return idByRowId;
|
||||
}
|
||||
|
||||
private PersonUpsertCommand toCommand(JsonNode node, String personId) {
|
||||
return PersonUpsertCommand.builder()
|
||||
.sourceRef(personId)
|
||||
.lastName(blankToNull(text(node, "lastName")))
|
||||
.firstName(blankToNull(text(node, "firstName")))
|
||||
.maidenName(blankToNull(text(node, "maidenName")))
|
||||
.notes(blankToNull(text(node, "notes")))
|
||||
.birthYear(intOrNull(node, "birthYear"))
|
||||
.deathYear(intOrNull(node, "deathYear"))
|
||||
.familyMember(node.path("familyMember").asBoolean(false))
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(false)
|
||||
.build();
|
||||
}
|
||||
|
||||
private int createRelationships(JsonNode relationships, Map<String, UUID> idByRowId) {
|
||||
int created = 0;
|
||||
for (JsonNode node : relationships) {
|
||||
// Trap: a relationship node's personId / relatedPersonId fields carry the tree's
|
||||
// local rowId (e.g. "row_a"), NOT a person slug. They are resolved through
|
||||
// idByRowId to the upserted person's UUID.
|
||||
UUID person = idByRowId.get(text(node, "personId"));
|
||||
UUID related = idByRowId.get(text(node, "relatedPersonId"));
|
||||
if (person == null || related == null) {
|
||||
log.warn("Skipping tree relationship with unresolved rowId: {} -> {}",
|
||||
text(node, "personId"), text(node, "relatedPersonId"));
|
||||
continue;
|
||||
}
|
||||
if (addRelationshipIdempotently(person, related, text(node, "type"))) {
|
||||
created++;
|
||||
}
|
||||
}
|
||||
return created;
|
||||
}
|
||||
|
||||
private boolean addRelationshipIdempotently(UUID person, UUID related, String type) {
|
||||
try {
|
||||
relationshipService.addRelationship(person,
|
||||
new CreateRelationshipRequest(related, RelationType.valueOf(type), null, null, null));
|
||||
return true;
|
||||
} catch (DomainException e) {
|
||||
if (e.getCode() == ErrorCode.DUPLICATE_RELATIONSHIP
|
||||
|| e.getCode() == ErrorCode.CIRCULAR_RELATIONSHIP) {
|
||||
return false;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private static String text(JsonNode node, String field) {
|
||||
JsonNode value = node.get(field);
|
||||
return value == null || value.isNull() ? "" : value.asText();
|
||||
}
|
||||
|
||||
private static Integer intOrNull(JsonNode node, String field) {
|
||||
JsonNode value = node.get(field);
|
||||
return value == null || value.isNull() ? null : value.asInt();
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s;
|
||||
}
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-tag-tree.xlsx} into the tag domain via {@link TagService},
|
||||
* upserting each tag by its canonical {@code tag_path} (the source_ref). Parent links are
|
||||
* resolved by the parent's path, which is the child path with its last {@code /segment}
|
||||
* stripped. Rows are emitted parents-first by the normalizer, so a parent is always
|
||||
* resolved before any child references it.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class TagTreeImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of("tag_path", "parent_name", "tag_name");
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
|
||||
private final TagService tagService;
|
||||
|
||||
public int load(File artifact) {
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
|
||||
Map<String, UUID> idByPath = new HashMap<>();
|
||||
int processed = 0;
|
||||
for (CanonicalSheetReader.Row row : rows) {
|
||||
String path = row.get("tag_path");
|
||||
if (path.isBlank()) continue;
|
||||
UUID parentId = resolveParentId(path, idByPath);
|
||||
Tag tag = tagService.upsertBySourceRef(path, row.get("tag_name"), parentId);
|
||||
idByPath.put(path, tag.getId());
|
||||
processed++;
|
||||
}
|
||||
log.info("Imported {} tags from {}", processed, artifact.getName());
|
||||
return processed;
|
||||
}
|
||||
|
||||
private UUID resolveParentId(String path, Map<String, UUID> idByPath) {
|
||||
int lastSeparator = path.lastIndexOf(PATH_SEPARATOR);
|
||||
if (lastSeparator < 0) return null;
|
||||
String parentPath = path.substring(0, lastSeparator);
|
||||
return idByPath.get(parentPath);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
||||
class XxeSafeXmlParser {
|
||||
|
||||
private XxeSafeXmlParser() {}
|
||||
|
||||
static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException {
|
||||
var factory = DocumentBuilderFactory.newInstance();
|
||||
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
||||
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
|
||||
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
||||
factory.setXIncludeAware(false);
|
||||
factory.setExpandEntityReferences(false);
|
||||
return factory;
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
@@ -10,9 +9,6 @@ import org.raddatz.familienarchiv.user.DisplayNameFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
// prevents infinite recursion in JSON serialization; see ADR-022 for lazy-fetch context
|
||||
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
|
||||
@Entity
|
||||
@Table(name = "persons")
|
||||
@Data
|
||||
@@ -57,18 +53,6 @@ public class Person {
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private boolean familyMember = false;
|
||||
|
||||
// The normalizer person_id — join key and re-import idempotency key. Null for manually
|
||||
// created persons; unique among non-null values (see ADR-025).
|
||||
@Column(name = "source_ref")
|
||||
private String sourceRef;
|
||||
|
||||
// A provisional person is one the importer inferred but could not confidently identify.
|
||||
// Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3).
|
||||
@Column(name = "provisional", nullable = false)
|
||||
@Builder.Default
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private boolean provisional = false;
|
||||
|
||||
// Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText).
|
||||
// Uses entity relationship rather than cross-domain repository access, avoiding a
|
||||
// separate DB roundtrip while respecting domain boundaries.
|
||||
|
||||
@@ -22,15 +22,12 @@ import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import jakarta.validation.Valid;
|
||||
import jakarta.validation.constraints.Max;
|
||||
import jakarta.validation.constraints.Min;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/persons")
|
||||
@RequiredArgsConstructor
|
||||
@Validated
|
||||
public class PersonController {
|
||||
|
||||
private final PersonService personService;
|
||||
@@ -38,37 +35,15 @@ public class PersonController {
|
||||
|
||||
@GetMapping
|
||||
@RequirePermission(Permission.READ_ALL)
|
||||
public ResponseEntity<PersonSearchResult> getPersons(
|
||||
public ResponseEntity<List<PersonSummaryDTO>> getPersons(
|
||||
@RequestParam(required = false) String q,
|
||||
@RequestParam(required = false) PersonType type,
|
||||
@RequestParam(required = false) Boolean familyOnly,
|
||||
@RequestParam(required = false) Boolean hasDocuments,
|
||||
@RequestParam(required = false) Boolean provisional,
|
||||
// review=true reveals the import noise (transcriber view); absent/false keeps the
|
||||
// clean reader default (familyMember OR documentCount > 0). The explicit filters AND
|
||||
// within whichever base the review flag selects.
|
||||
@RequestParam(required = false, defaultValue = "false") boolean review,
|
||||
@RequestParam(required = false) String sort,
|
||||
@RequestParam(defaultValue = "0") @Min(0) int page,
|
||||
@RequestParam(defaultValue = "50") @Min(1) @Max(100) int size) {
|
||||
// Legacy top-N-by-document-count path (reader dashboard): preserved, wrapped in the
|
||||
// same envelope so /api/persons always returns one shape. It is explicitly NON-paged —
|
||||
// the top-N query returns the complete result, so PersonSearchResult.topN reports an
|
||||
// honest totalElements (= returned count) instead of pretending to be a page slice.
|
||||
if ("documentCount".equals(sort) && q == null) {
|
||||
@RequestParam(required = false, defaultValue = "0") int size,
|
||||
@RequestParam(required = false) String sort) {
|
||||
if ("documentCount".equals(sort) && size > 0 && q == null) {
|
||||
int safeSize = Math.min(size, 50);
|
||||
List<PersonSummaryDTO> top = personService.findTopByDocumentCount(safeSize);
|
||||
return ResponseEntity.ok(PersonSearchResult.topN(top));
|
||||
return ResponseEntity.ok(personService.findTopByDocumentCount(safeSize));
|
||||
}
|
||||
|
||||
PersonFilter filter = PersonFilter.builder()
|
||||
.type(type)
|
||||
.familyOnly(familyOnly)
|
||||
.hasDocuments(hasDocuments)
|
||||
.provisional(provisional)
|
||||
.readerDefault(!review)
|
||||
.build();
|
||||
return ResponseEntity.ok(personService.search(filter, page, size, q));
|
||||
return ResponseEntity.ok(personService.findAll(q));
|
||||
}
|
||||
|
||||
@GetMapping("/{id}")
|
||||
@@ -135,21 +110,6 @@ public class PersonController {
|
||||
personService.mergePersons(id, UUID.fromString(targetIdStr));
|
||||
}
|
||||
|
||||
// Dedicated state transition that clears the provisional flag. A separate verb (not a
|
||||
// mass-assignable DTO field) so provisional can never be smuggled in via create/update.
|
||||
@PatchMapping("/{id}/confirm")
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public ResponseEntity<Person> confirmPerson(@PathVariable UUID id) {
|
||||
return ResponseEntity.ok(personService.confirmPerson(id));
|
||||
}
|
||||
|
||||
@DeleteMapping("/{id}")
|
||||
@ResponseStatus(HttpStatus.NO_CONTENT)
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public void deletePerson(@PathVariable UUID id) {
|
||||
personService.deletePerson(id);
|
||||
}
|
||||
|
||||
// ─── Alias endpoints ────────────────────────────────────────────────────
|
||||
|
||||
@GetMapping("/{id}/aliases")
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import lombok.Builder;
|
||||
|
||||
/**
|
||||
* The reader/triage filter set for the persons directory, threaded as one value through
|
||||
* {@code PersonController -> PersonService -> PersonRepository}. Each field is nullable:
|
||||
* null means "do not constrain on this dimension".
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code type} — restrict to a single {@link PersonType}.</li>
|
||||
* <li>{@code familyOnly} — when true, only {@code familyMember} persons.</li>
|
||||
* <li>{@code hasDocuments} — when true, only persons with documentCount > 0.</li>
|
||||
* <li>{@code provisional} — match the {@code Person.provisional} flag exactly.</li>
|
||||
* <li>{@code readerDefault} — when true, restrict to {@code familyMember OR documentCount > 0}
|
||||
* (the clean reader view). The explicit filters above AND with this restriction.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Builder
|
||||
public record PersonFilter(
|
||||
PersonType type,
|
||||
Boolean familyOnly,
|
||||
Boolean hasDocuments,
|
||||
Boolean provisional,
|
||||
boolean readerDefault
|
||||
) {
|
||||
/** The unconstrained "show all" filter (transcriber view, no reader restriction). */
|
||||
public static PersonFilter showAll() {
|
||||
return PersonFilter.builder().readerDefault(false).build();
|
||||
}
|
||||
|
||||
/** The clean reader default: familyMember OR documentCount > 0, no other constraints. */
|
||||
public static PersonFilter cleanDefault() {
|
||||
return PersonFilter.builder().readerDefault(true).build();
|
||||
}
|
||||
}
|
||||
@@ -32,9 +32,6 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
// Lookup by full alias string, used during ODS mass import
|
||||
Optional<Person> findByAliasIgnoreCase(String alias);
|
||||
|
||||
// Lookup by the normalizer person_id, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Person> findBySourceRef(String sourceRef);
|
||||
|
||||
// Exact first+last name match, used for filename-based sender lookup
|
||||
Optional<Person> findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName);
|
||||
|
||||
@@ -44,7 +41,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
p.family_member AS familyMember,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -57,7 +54,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
p.family_member AS familyMember,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -66,7 +63,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional
|
||||
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member
|
||||
ORDER BY p.last_name ASC, p.first_name ASC
|
||||
""",
|
||||
nativeQuery = true)
|
||||
@@ -78,7 +75,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
p.family_member AS familyMember,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -88,61 +85,6 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
nativeQuery = true)
|
||||
List<PersonSummaryDTO> findTopByDocumentCount(@Param("limit") int limit);
|
||||
|
||||
// --- #667: filter-aware paged directory ---
|
||||
//
|
||||
// The slice query and the count query below MUST keep an IDENTICAL WHERE clause so the
|
||||
// rendered page and totalElements can never drift. Every filter is nullable: a null param
|
||||
// disables that predicate via the `:param IS NULL OR …` idiom. `readerDefault` (a plain
|
||||
// boolean) restricts to "familyMember OR has documents"; the explicit filters AND on top.
|
||||
// documentCount is recomputed inline (not via the SELECT alias) because WHERE cannot
|
||||
// reference a computed alias. All params are named — no string concatenation, no injection.
|
||||
String FILTER_WHERE = """
|
||||
WHERE (CAST(:type AS text) IS NULL OR p.person_type = CAST(:type AS text))
|
||||
AND (:familyOnly = FALSE OR :familyOnly IS NULL OR p.family_member = TRUE)
|
||||
AND (:hasDocuments = FALSE OR :hasDocuments IS NULL OR (
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id)) > 0)
|
||||
AND (:provisional IS NULL OR p.provisional = :provisional)
|
||||
AND (:readerDefault = FALSE OR (
|
||||
p.family_member = TRUE OR (
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id)) > 0))
|
||||
AND (CAST(:query AS text) IS NULL OR
|
||||
LOWER(CONCAT(COALESCE(p.first_name,''),' ',p.last_name)) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%'))
|
||||
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%'))
|
||||
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%')))
|
||||
""";
|
||||
|
||||
@Query(value = """
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
""" + FILTER_WHERE + """
|
||||
ORDER BY p.last_name ASC, p.first_name ASC
|
||||
LIMIT :limit OFFSET :offset
|
||||
""",
|
||||
nativeQuery = true)
|
||||
List<PersonSummaryDTO> findByFilter(@Param("type") String type,
|
||||
@Param("familyOnly") Boolean familyOnly,
|
||||
@Param("hasDocuments") Boolean hasDocuments,
|
||||
@Param("provisional") Boolean provisional,
|
||||
@Param("readerDefault") boolean readerDefault,
|
||||
@Param("query") String query,
|
||||
@Param("limit") int limit,
|
||||
@Param("offset") int offset);
|
||||
|
||||
@Query(value = "SELECT COUNT(*) FROM persons p " + FILTER_WHERE, nativeQuery = true)
|
||||
long countByFilter(@Param("type") String type,
|
||||
@Param("familyOnly") Boolean familyOnly,
|
||||
@Param("hasDocuments") Boolean hasDocuments,
|
||||
@Param("provisional") Boolean provisional,
|
||||
@Param("readerDefault") boolean readerDefault,
|
||||
@Param("query") String query);
|
||||
|
||||
// --- Correspondent queries ---
|
||||
|
||||
@Query(value = """
|
||||
@@ -194,12 +136,6 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
@Query(value = "UPDATE documents SET sender_id = :target WHERE sender_id = :source", nativeQuery = true)
|
||||
void reassignSender(@Param("source") UUID source, @Param("target") UUID target);
|
||||
|
||||
// Used by deletePerson: detach a deleted person from documents they sent, so the hard
|
||||
// delete cannot orphan a documents.sender_id FK (the column is nullable).
|
||||
@Modifying
|
||||
@Query(value = "UPDATE documents SET sender_id = NULL WHERE sender_id = :source", nativeQuery = true)
|
||||
void reassignSenderToNull(@Param("source") UUID source);
|
||||
|
||||
@Modifying
|
||||
@Query(value = """
|
||||
INSERT INTO document_receivers (document_id, person_id)
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Paged result for the /api/persons list endpoint.
|
||||
*
|
||||
* <p>Hand-written to mirror {@code document/DocumentSearchResult} field-for-field so the
|
||||
* frontend sees one paged shape across the app. Deliberately NOT Spring {@code Page<T>}
|
||||
* (unstable serialized shape across Spring versions, noisy in OpenAPI) and deliberately
|
||||
* NOT a reuse of the document DTO (would couple two feature modules — duplication beats
|
||||
* coupling here).
|
||||
*/
|
||||
public record PersonSearchResult(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<PersonSummaryDTO> items,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
long totalElements,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int pageNumber,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int pageSize,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int totalPages
|
||||
) {
|
||||
/**
|
||||
* Paged factory: derives {@code totalPages} from the full match count and the page size.
|
||||
* A zero count yields zero pages so the frontend hides the pagination control.
|
||||
*/
|
||||
public static PersonSearchResult paged(List<PersonSummaryDTO> slice, int pageNumber, int pageSize, long totalElements) {
|
||||
int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize);
|
||||
return new PersonSearchResult(slice, totalElements, pageNumber, pageSize, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-paged factory for the legacy {@code sort=documentCount} top-N dashboard path.
|
||||
* That query returns the <em>complete</em> result in one shot — there is no further page
|
||||
* to fetch — so the envelope reports reality rather than pretending to be a slice of a
|
||||
* larger set: {@code totalElements} equals the number of rows actually returned,
|
||||
* {@code pageSize} equals that same count, and {@code totalPages} is 1 (or 0 when empty).
|
||||
* This avoids the earlier ambiguity where {@code totalElements} looked like a paged total.
|
||||
*/
|
||||
public static PersonSearchResult topN(List<PersonSummaryDTO> all) {
|
||||
int count = all.size();
|
||||
int totalPages = count == 0 ? 0 : 1;
|
||||
return new PersonSearchResult(all, count, 0, count, totalPages);
|
||||
}
|
||||
}
|
||||
@@ -31,55 +31,20 @@ public class PersonService {
|
||||
private final PersonRepository personRepository;
|
||||
private final PersonNameAliasRepository aliasRepository;
|
||||
|
||||
public List<PersonSummaryDTO> findAll(String q) {
|
||||
if (q == null) {
|
||||
return personRepository.findAllWithDocumentCount();
|
||||
}
|
||||
if (q.isBlank()) {
|
||||
return List.of();
|
||||
}
|
||||
return personRepository.searchWithDocumentCount(q.trim());
|
||||
}
|
||||
|
||||
public List<PersonSummaryDTO> findTopByDocumentCount(int limit) {
|
||||
return personRepository.findTopByDocumentCount(limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filtered, paginated directory query. The slice and the total are derived from one
|
||||
* shared WHERE clause (see {@link PersonRepository#FILTER_WHERE}) so totalElements can
|
||||
* never drift from the rendered page. {@code type} is passed as the enum name because the
|
||||
* native query compares against the string column.
|
||||
*/
|
||||
public PersonSearchResult search(PersonFilter filter, int page, int size, String q) {
|
||||
String type = filter.type() == null ? null : filter.type().name();
|
||||
String query = (q == null || q.isBlank()) ? null : q.trim();
|
||||
int offset = page * size;
|
||||
|
||||
List<PersonSummaryDTO> items = personRepository.findByFilter(
|
||||
type, filter.familyOnly(), filter.hasDocuments(), filter.provisional(),
|
||||
filter.readerDefault(), query, size, offset);
|
||||
long total = personRepository.countByFilter(
|
||||
type, filter.familyOnly(), filter.hasDocuments(), filter.provisional(),
|
||||
filter.readerDefault(), query);
|
||||
|
||||
return PersonSearchResult.paged(items, page, size, total);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the {@code provisional} flag — a deliberate state transition exposed as
|
||||
* {@code PATCH /api/persons/{id}/confirm}, never as a mass-assignable DTO field (CWE-915).
|
||||
*/
|
||||
@Transactional
|
||||
public Person confirmPerson(UUID id) {
|
||||
Person person = getById(id);
|
||||
person.setProvisional(false);
|
||||
return personRepository.save(person);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard-deletes a person used by triage. Detaches the person from any documents they
|
||||
* sent (nulls sender_id) and from any received-document references first, so the delete
|
||||
* cannot orphan an FK and fail with a 500.
|
||||
*/
|
||||
@Transactional
|
||||
public void deletePerson(UUID id) {
|
||||
getById(id);
|
||||
personRepository.reassignSenderToNull(id);
|
||||
personRepository.deleteReceiverReferences(id);
|
||||
personRepository.deleteById(id);
|
||||
}
|
||||
|
||||
public Person getById(UUID id) {
|
||||
return personRepository.findById(id)
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.PERSON_NOT_FOUND, "Person not found: " + id));
|
||||
@@ -115,11 +80,6 @@ public class PersonService {
|
||||
return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
|
||||
}
|
||||
|
||||
/** Lookup by the normalizer person_id — used by the canonical importer for register-first matching. */
|
||||
public Optional<Person> findBySourceRef(String sourceRef) {
|
||||
return personRepository.findBySourceRef(sourceRef);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Transactional
|
||||
public Person findOrCreateByAlias(String rawName) {
|
||||
@@ -155,80 +115,6 @@ public class PersonService {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Idempotent upsert keyed on {@code sourceRef} (the normalizer person_id) for the
|
||||
* canonical importer (Phase 3, ADR-025). On first import the canonical fields are
|
||||
* written verbatim. On re-import the human-edit-preserve precedence applies:
|
||||
* a non-blank existing field is never overwritten, and {@code provisional} never
|
||||
* flips back to true once a human has confirmed the person.
|
||||
*/
|
||||
@Transactional
|
||||
public Person upsertBySourceRef(PersonUpsertCommand cmd) {
|
||||
return personRepository.findBySourceRef(cmd.sourceRef())
|
||||
.map(existing -> personRepository.save(mergeCanonical(existing, cmd)))
|
||||
.orElseGet(() -> fromCanonical(cmd));
|
||||
}
|
||||
|
||||
private Person fromCanonical(PersonUpsertCommand cmd) {
|
||||
Person person = personRepository.save(Person.builder()
|
||||
.sourceRef(cmd.sourceRef())
|
||||
.firstName(blankToNull(cmd.firstName()))
|
||||
.lastName(cmd.lastName())
|
||||
.notes(blankToNull(cmd.notes()))
|
||||
.birthYear(cmd.birthYear())
|
||||
.deathYear(cmd.deathYear())
|
||||
.familyMember(cmd.familyMember())
|
||||
.personType(cmd.personType() == null ? PersonType.PERSON : cmd.personType())
|
||||
.provisional(cmd.provisional())
|
||||
.build());
|
||||
String maiden = blankToNull(cmd.maidenName());
|
||||
if (maiden != null) {
|
||||
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
|
||||
aliasRepository.save(PersonNameAlias.builder()
|
||||
.person(person)
|
||||
.lastName(maiden)
|
||||
.type(PersonNameAliasType.MAIDEN_NAME)
|
||||
.sortOrder(nextSortOrder)
|
||||
.build());
|
||||
}
|
||||
return person;
|
||||
}
|
||||
|
||||
private Person mergeCanonical(Person existing, PersonUpsertCommand cmd) {
|
||||
existing.setFirstName(preferHuman(existing.getFirstName(), cmd.firstName()));
|
||||
existing.setLastName(preferHuman(existing.getLastName(), cmd.lastName()));
|
||||
existing.setNotes(preferHuman(existing.getNotes(), cmd.notes()));
|
||||
existing.setBirthYear(preferHuman(existing.getBirthYear(), cmd.birthYear()));
|
||||
existing.setDeathYear(preferHuman(existing.getDeathYear(), cmd.deathYear()));
|
||||
if (cmd.personType() != null && existing.getPersonType() == PersonType.PERSON) {
|
||||
existing.setPersonType(cmd.personType());
|
||||
}
|
||||
// provisional is monotonic-downward: once it is false it never reverts to true.
|
||||
// This also pins the cross-loader precedence (ADR-025): a register/tree person is
|
||||
// loaded before documents and already false, so a later document row that references
|
||||
// the same source_ref (provisional=true) can never flip it provisional — the guard
|
||||
// below only fires while existing is still provisional. Order of document rows is
|
||||
// therefore irrelevant.
|
||||
if (existing.isProvisional()) {
|
||||
existing.setProvisional(cmd.provisional());
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
|
||||
// preferHuman keeps an existing human-entered value and only falls back to the canonical
|
||||
// value when the existing one is absent — the single idiom for every fill-blank field.
|
||||
private static String preferHuman(String existing, String canonical) {
|
||||
return (existing == null || existing.isBlank()) ? blankToNull(canonical) : existing;
|
||||
}
|
||||
|
||||
private static Integer preferHuman(Integer existing, Integer canonical) {
|
||||
return existing != null ? existing : canonical;
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s.trim();
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Person createPerson(String firstName, String lastName, String alias) {
|
||||
Person person = Person.builder()
|
||||
|
||||
@@ -18,7 +18,6 @@ public interface PersonSummaryDTO {
|
||||
Integer getDeathYear();
|
||||
String getNotes();
|
||||
boolean isFamilyMember();
|
||||
boolean isProvisional();
|
||||
long getDocumentCount();
|
||||
|
||||
default String getDisplayName() {
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import lombok.Builder;
|
||||
|
||||
/**
|
||||
* Importer → {@link PersonService} command for an idempotent upsert keyed on
|
||||
* {@code sourceRef} (the normalizer's stable person_id). Carries only the canonical
|
||||
* fields the importer owns; the service applies the human-edit-preserve precedence
|
||||
* (see ADR-025): non-blank existing fields are never overwritten, and {@code provisional}
|
||||
* never flips back to true once a human has confirmed a person.
|
||||
*/
|
||||
@Builder
|
||||
public record PersonUpsertCommand(
|
||||
String sourceRef,
|
||||
String firstName,
|
||||
String lastName,
|
||||
String maidenName,
|
||||
String notes,
|
||||
Integer birthYear,
|
||||
Integer deathYear,
|
||||
boolean familyMember,
|
||||
PersonType personType,
|
||||
boolean provisional
|
||||
) {}
|
||||
@@ -2,13 +2,10 @@ package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
|
||||
// prevents infinite recursion in JSON serialization; see ADR-022 for lazy-fetch context
|
||||
@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})
|
||||
@Entity
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@@ -30,11 +27,4 @@ public class Tag {
|
||||
|
||||
/** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */
|
||||
private String color;
|
||||
|
||||
/**
|
||||
* Import identity key, keyed on the canonical tag_path. Null for manually created tags;
|
||||
* unique among non-null values. The importer (Phase 3) uses it for idempotent re-import.
|
||||
*/
|
||||
@Column(name = "source_ref")
|
||||
private String sourceRef;
|
||||
}
|
||||
|
||||
@@ -22,9 +22,6 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
|
||||
|
||||
Optional<Tag> findByNameIgnoreCase(String name);
|
||||
|
||||
// Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Tag> findBySourceRef(String sourceRef);
|
||||
|
||||
List<Tag> findByNameContainingIgnoreCase(String name);
|
||||
|
||||
/**
|
||||
|
||||
@@ -7,7 +7,6 @@ import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -50,37 +49,12 @@ public class TagService {
|
||||
.orElseThrow(() -> DomainException.notFound(ErrorCode.TAG_NOT_FOUND, "Tag not found: " + id));
|
||||
}
|
||||
|
||||
/** Lookup by the canonical tag_path — used by the canonical importer to attach a document's tag. */
|
||||
public Optional<Tag> findBySourceRef(String sourceRef) {
|
||||
return tagRepository.findBySourceRef(sourceRef);
|
||||
}
|
||||
|
||||
public Tag findOrCreate(String name) {
|
||||
String cleanName = name.trim();
|
||||
return tagRepository.findByNameIgnoreCase(cleanName)
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Idempotent upsert keyed on {@code sourceRef} (the canonical tag_path) for the
|
||||
* Phase-3 importer (ADR-025). On first import the canonical name and parent are
|
||||
* written; on re-import a human-renamed tag name is preserved (the source_ref is the
|
||||
* stable identity, the name is a human-editable label).
|
||||
*/
|
||||
@Transactional
|
||||
public Tag upsertBySourceRef(String sourceRef, String name, UUID parentId) {
|
||||
return tagRepository.findBySourceRef(sourceRef)
|
||||
.map(existing -> {
|
||||
existing.setParentId(parentId);
|
||||
return tagRepository.save(existing);
|
||||
})
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder()
|
||||
.sourceRef(sourceRef)
|
||||
.name(name)
|
||||
.parentId(parentId)
|
||||
.build()));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Tag update(UUID id, TagUpdateDTO dto) {
|
||||
Tag tag = getById(id);
|
||||
|
||||
@@ -5,8 +5,7 @@ import org.raddatz.familienarchiv.security.Permission;
|
||||
import org.raddatz.familienarchiv.security.RequirePermission;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||
import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
|
||||
import org.raddatz.familienarchiv.importing.ImportStatus;
|
||||
import org.raddatz.familienarchiv.importing.MassImportService;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
@@ -22,20 +21,20 @@ import lombok.RequiredArgsConstructor;
|
||||
@RequiredArgsConstructor
|
||||
public class AdminController {
|
||||
|
||||
private final CanonicalImportOrchestrator importOrchestrator;
|
||||
private final MassImportService massImportService;
|
||||
private final DocumentService documentService;
|
||||
private final DocumentVersionService documentVersionService;
|
||||
private final ThumbnailBackfillService thumbnailBackfillService;
|
||||
|
||||
@PostMapping("/trigger-import")
|
||||
public ResponseEntity<ImportStatus> triggerMassImport() {
|
||||
importOrchestrator.runImportAsync();
|
||||
return ResponseEntity.accepted().body(importOrchestrator.getStatus());
|
||||
public ResponseEntity<MassImportService.ImportStatus> triggerMassImport() {
|
||||
massImportService.runImportAsync();
|
||||
return ResponseEntity.accepted().body(massImportService.getStatus());
|
||||
}
|
||||
|
||||
@GetMapping("/import-status")
|
||||
public ResponseEntity<ImportStatus> importStatus() {
|
||||
return ResponseEntity.ok(importOrchestrator.getStatus());
|
||||
public ResponseEntity<MassImportService.ImportStatus> importStatus() {
|
||||
return ResponseEntity.ok(massImportService.getStatus());
|
||||
}
|
||||
|
||||
@PostMapping("/backfill-versions")
|
||||
|
||||
@@ -31,6 +31,5 @@ public class InviteListItemDTO {
|
||||
private String status;
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime createdAt;
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private String shareableUrl;
|
||||
}
|
||||
|
||||
@@ -125,10 +125,17 @@ app:
|
||||
password: ${APP_ADMIN_PASSWORD:admin123}
|
||||
|
||||
import:
|
||||
# Directory holding the normalizer's committed canonical artifacts
|
||||
# (canonical-{documents,persons,tag-tree}.xlsx + canonical-persons-tree.json).
|
||||
# The loader maps columns by header name — no positional indices (see ADR-025).
|
||||
dir: ${IMPORT_DIR:/import}
|
||||
col:
|
||||
index: 0
|
||||
box: 1
|
||||
folder: 2
|
||||
sender: 3
|
||||
receivers: 5
|
||||
date: 7
|
||||
location: 9
|
||||
tags: 10
|
||||
summary: 11
|
||||
transcription: 13
|
||||
|
||||
ocr:
|
||||
sender-model:
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
-- Repeatable migration: sets the grafana_reader role's password from the
|
||||
-- ${grafanaDbPassword} placeholder (resolved by FlywayConfig from the
|
||||
-- GRAFANA_DB_PASSWORD environment variable). Flyway computes the checksum on
|
||||
-- the resolved migration content, so any change to GRAFANA_DB_PASSWORD changes
|
||||
-- the checksum and re-applies this migration on the next boot. That makes
|
||||
-- password rotation a "change env var + restart" operation — no manual psql.
|
||||
--
|
||||
-- V68 created the role itself (without a usable password). This file owns the
|
||||
-- password lifecycle; nothing else writes it.
|
||||
DO $$
|
||||
BEGIN
|
||||
EXECUTE format('ALTER ROLE grafana_reader WITH PASSWORD %L', '${grafanaDbPassword}');
|
||||
END
|
||||
$$;
|
||||
@@ -1,17 +0,0 @@
|
||||
-- Read-only role used by the Grafana PostgreSQL datasource for the PO Overview
|
||||
-- dashboard (issue #651). The role is created here without a usable password
|
||||
-- (LOGIN-capable but no password set); R__grafana_reader_password.sql sets the
|
||||
-- password from GRAFANA_DB_PASSWORD on every boot, so rotation is just "bump
|
||||
-- the env var and restart the backend" — see docs/adr/024-* and the rotation
|
||||
-- runbook in docs/DEPLOYMENT.md.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'grafana_reader') THEN
|
||||
CREATE ROLE grafana_reader WITH LOGIN;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
GRANT CONNECT ON DATABASE ${flyway:database} TO grafana_reader;
|
||||
GRANT USAGE ON SCHEMA public TO grafana_reader;
|
||||
GRANT SELECT ON audit_log, documents, transcription_blocks TO grafana_reader;
|
||||
@@ -1,67 +0,0 @@
|
||||
-- Phase 2 of "Handling the Unknowns": the schema foundation.
|
||||
-- Consolidates every new import/precision/attribution/identity column into ONE
|
||||
-- migration with a single owner so downstream phases (importer, rendering, persons
|
||||
-- directory) compile against a finished, collision-free schema. See ADR-025.
|
||||
--
|
||||
-- This file is forward-only and immutable once shipped (Flyway checksum model):
|
||||
-- any fix goes in a later version, never an edit here.
|
||||
|
||||
-- ─── documents: date precision, range end, raw date, raw attribution ──────────
|
||||
|
||||
-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_end date;
|
||||
|
||||
-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_raw text;
|
||||
|
||||
-- Raw attribution preserved even when a person is linked.
|
||||
ALTER TABLE documents ADD COLUMN sender_text text;
|
||||
ALTER TABLE documents ADD COLUMN receiver_text text;
|
||||
|
||||
-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
|
||||
-- length cap in V18). Defense in depth against malformed/huge import cells.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
|
||||
|
||||
-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL.
|
||||
-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and
|
||||
-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane,
|
||||
-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it
|
||||
-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN.
|
||||
ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN';
|
||||
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
|
||||
|
||||
ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
|
||||
|
||||
-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
|
||||
-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
|
||||
CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
|
||||
|
||||
-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
|
||||
-- null end (open-ended range), so the rule is one-directional, not biconditional.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
|
||||
CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
|
||||
|
||||
-- For ranges with both endpoints, the end must not precede the start.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
|
||||
CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
|
||||
|
||||
-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
|
||||
|
||||
-- The normalizer person_id: join key for documents → persons and idempotency key for
|
||||
-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
|
||||
ALTER TABLE persons ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
|
||||
|
||||
-- A provisional person is one the importer inferred but could not confidently identify.
|
||||
-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
|
||||
ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
|
||||
|
||||
-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
|
||||
|
||||
ALTER TABLE tag ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);
|
||||
@@ -479,191 +479,6 @@ class MigrationIntegrationTest {
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
// ─── V69: import/precision/attribution/identity schema foundation ────────
|
||||
|
||||
@Test
|
||||
void v69_metaDatePrecisionColumn_isNotNull() {
|
||||
Integer count = jdbc.queryForObject(
|
||||
"""
|
||||
SELECT COUNT(*) FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = 'documents'
|
||||
AND column_name = 'meta_date_precision'
|
||||
AND is_nullable = 'NO'
|
||||
""",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_backfillSql_setsDatedRowsToDayPrecision() {
|
||||
// Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule.
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
jdbc.update(V69_BACKFILL_PRECISION_SQL);
|
||||
|
||||
String precision = jdbc.queryForObject(
|
||||
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
|
||||
assertThat(precision).isEqualTo("DAY");
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_backfillSql_setsUndatedRowsToUnknownPrecision() {
|
||||
UUID docId = createDocument(); // no meta_date
|
||||
|
||||
jdbc.update(V69_BACKFILL_PRECISION_SQL);
|
||||
|
||||
String precision = jdbc.queryForObject(
|
||||
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
|
||||
assertThat(precision).isEqualTo("UNKNOWN");
|
||||
}
|
||||
|
||||
// Mirrors the backfill UPDATE shipped in V69; idempotent for verification.
|
||||
private static final String V69_BACKFILL_PRECISION_SQL = """
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END
|
||||
""";
|
||||
|
||||
@Test
|
||||
void v69_precisionCheck_rejectsValueOutsideEnum() {
|
||||
UUID docId = createDocument();
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?",
|
||||
docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsRangeWithNullEnd() {
|
||||
// Loose semantics: the normalizer may emit an open-ended RANGE (start only).
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() {
|
||||
// Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set.
|
||||
// Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null
|
||||
// passes chk_meta_date_end_after_start), so the row survives. This locks the actual
|
||||
// DB behavior so a future tightening to a biconditional rule is a deliberate change.
|
||||
UUID docId = createDocument(); // null meta_date
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
|
||||
Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId);
|
||||
Object metaDateEnd = jdbc.queryForObject(
|
||||
"SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId);
|
||||
assertThat(metaDate).isNull();
|
||||
assertThat(metaDateEnd).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_rangeOrderCheck_rejectsEndBeforeStart() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?",
|
||||
docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateRawCheck_rejectsOverlongText() {
|
||||
UUID docId = createDocument();
|
||||
String tooLong = "x".repeat(10001);
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_senderTextAndReceiverText_storeRawAttribution() {
|
||||
UUID docId = createDocument();
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?",
|
||||
docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_personsSourceRef_uniqueIndexRejectsDuplicate() {
|
||||
jdbc.update(
|
||||
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')");
|
||||
try {
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update(
|
||||
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')")
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_personsSourceRef_allowsMultipleNulls() {
|
||||
UUID a = createPerson("Null", "RefA");
|
||||
UUID b = createPerson("Null", "RefB");
|
||||
try {
|
||||
String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a);
|
||||
String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b);
|
||||
assertThat(refA).isNull();
|
||||
assertThat(refB).isNull();
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_personsProvisional_defaultsToFalse() {
|
||||
UUID id = createPerson("Provisional", "Default");
|
||||
|
||||
Boolean provisional = jdbc.queryForObject(
|
||||
"SELECT provisional FROM persons WHERE id = ?", Boolean.class, id);
|
||||
assertThat(provisional).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_tagSourceRef_uniqueIndexRejectsDuplicate() {
|
||||
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')");
|
||||
try {
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')")
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'");
|
||||
}
|
||||
}
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
private UUID createPerson(String firstName, String lastName) {
|
||||
@@ -689,12 +504,6 @@ class MigrationIntegrationTest {
|
||||
return doc.getId();
|
||||
}
|
||||
|
||||
private UUID createDocumentWithDate(String isoDate) {
|
||||
UUID id = createDocument();
|
||||
jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
private UUID insertAnnotation(UUID docId) {
|
||||
UUID id = UUID.randomUUID();
|
||||
jdbc.update("""
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
package org.raddatz.familienarchiv.auth;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.support.TransactionTemplate;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Integration test for {@link JdbcSessionRevocationAdapter} that verifies
|
||||
* session rows are actually written to / removed from the {@code spring_session}
|
||||
* table backed by a real PostgreSQL container.
|
||||
*
|
||||
* <p>Sessions are inserted via raw JDBC to avoid the module-access restriction on
|
||||
* {@code JdbcIndexedSessionRepository.JdbcSession}. The {@link SessionRevocationPort}
|
||||
* bean injected here is the real {@link JdbcSessionRevocationAdapter} wired by Spring.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
class JdbcSessionRevocationAdapterIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
|
||||
@Autowired SessionRevocationPort adapter;
|
||||
@Autowired JdbcTemplate jdbcTemplate;
|
||||
@Autowired TransactionTemplate transactionTemplate;
|
||||
|
||||
private static final String PRINCIPAL = "revocation-it@test.de";
|
||||
|
||||
@BeforeEach
|
||||
void clearSessions() {
|
||||
// spring_session_attributes cascades on delete
|
||||
transactionTemplate.execute(status -> {
|
||||
jdbcTemplate.update("DELETE FROM spring_session");
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
// ── helper ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Inserts a minimal {@code spring_session} row attributed to {@value #PRINCIPAL}
|
||||
* and returns its opaque primary-key ID (the value the repository uses as the
|
||||
* session identifier, not the {@code SESSION_ID} column which holds the public token).
|
||||
*
|
||||
* <p>Column layout mirrors the Flyway-managed schema shipped with the app:
|
||||
* PRIMARY_ID, SESSION_ID, CREATION_TIME, LAST_ACCESS_TIME, MAX_INACTIVE_INTERVAL,
|
||||
* EXPIRY_TIME, PRINCIPAL_NAME.
|
||||
*/
|
||||
/**
|
||||
* Inserts a persisted session row for {@value #PRINCIPAL} and returns the
|
||||
* {@code SESSION_ID} column value — this is the opaque identifier that
|
||||
* {@link JdbcIndexedSessionRepository} uses as the session's public key
|
||||
* (returned by {@code JdbcSession.getId()} and expected by
|
||||
* {@link JdbcIndexedSessionRepository#deleteById}).
|
||||
*
|
||||
* <p>The inserts run inside a {@link TransactionTemplate} so the rows are
|
||||
* committed before {@code findByPrincipalName} opens its own transaction and
|
||||
* can see the data via Read Committed isolation.
|
||||
*/
|
||||
private String insertSession() {
|
||||
String primaryId = UUID.randomUUID().toString();
|
||||
// SESSION_ID is the value used by JdbcSession.getId() and findByPrincipalName map keys.
|
||||
String sessionId = UUID.randomUUID().toString();
|
||||
long now = Instant.now().toEpochMilli();
|
||||
long expiry = now + 8L * 3600 * 1000; // 8-hour TTL
|
||||
transactionTemplate.execute(status -> {
|
||||
jdbcTemplate.update("""
|
||||
INSERT INTO spring_session
|
||||
(PRIMARY_ID, SESSION_ID, CREATION_TIME, LAST_ACCESS_TIME,
|
||||
MAX_INACTIVE_INTERVAL, EXPIRY_TIME, PRINCIPAL_NAME)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
primaryId, sessionId, now, now, 28800, expiry, PRINCIPAL);
|
||||
// Spring Session's listSessionsByPrincipalName query joins spring_session_attributes;
|
||||
// insert a minimal attribute row so the session appears in the result set.
|
||||
jdbcTemplate.update("""
|
||||
INSERT INTO spring_session_attributes
|
||||
(SESSION_PRIMARY_ID, ATTRIBUTE_NAME, ATTRIBUTE_BYTES)
|
||||
VALUES (?, ?, ?)
|
||||
""",
|
||||
primaryId, "test_attr", new byte[]{0});
|
||||
return null;
|
||||
});
|
||||
return sessionId; // the public key used by JdbcSession.getId() and deleteById()
|
||||
}
|
||||
|
||||
// ── tests ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void revokeAllSessions_removes_every_row_from_spring_session_table() {
|
||||
insertSession();
|
||||
insertSession();
|
||||
|
||||
int count = adapter.revokeAllSessions(PRINCIPAL);
|
||||
|
||||
assertThat(count).isEqualTo(2);
|
||||
assertThat(jdbcTemplate.queryForObject(
|
||||
"SELECT COUNT(*) FROM spring_session WHERE PRINCIPAL_NAME = ?",
|
||||
Long.class, PRINCIPAL))
|
||||
.isZero();
|
||||
}
|
||||
|
||||
@Test
|
||||
void revokeOtherSessions_deletes_non_current_rows_and_keeps_current_session() {
|
||||
String keepId = insertSession();
|
||||
insertSession();
|
||||
insertSession();
|
||||
|
||||
int count = adapter.revokeOtherSessions(keepId, PRINCIPAL);
|
||||
|
||||
assertThat(count).isEqualTo(2);
|
||||
// The current session row must still be present (keyed by SESSION_ID)
|
||||
assertThat(jdbcTemplate.queryForObject(
|
||||
"SELECT COUNT(*) FROM spring_session WHERE SESSION_ID = ?",
|
||||
Long.class, keepId))
|
||||
.isEqualTo(1L);
|
||||
// The total for this principal is now exactly 1
|
||||
assertThat(jdbcTemplate.queryForObject(
|
||||
"SELECT COUNT(*) FROM spring_session WHERE PRINCIPAL_NAME = ?",
|
||||
Long.class, PRINCIPAL))
|
||||
.isEqualTo(1L);
|
||||
}
|
||||
}
|
||||
@@ -42,18 +42,6 @@ class LoginRateLimiterTest {
|
||||
.isEqualTo(ErrorCode.TOO_MANY_LOGIN_ATTEMPTS));
|
||||
}
|
||||
|
||||
@Test
|
||||
void blocked_attempt_carries_retry_after_seconds_equal_to_window_duration() {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
rateLimiter.checkAndConsume("1.2.3.4", "user@example.com");
|
||||
}
|
||||
|
||||
assertThatThrownBy(() -> rateLimiter.checkAndConsume("1.2.3.4", "user@example.com"))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.satisfies(ex -> assertThat(((DomainException) ex).getRetryAfterSeconds())
|
||||
.isEqualTo(15 * 60L)); // windowMinutes=15 → 900 seconds
|
||||
}
|
||||
|
||||
@Test
|
||||
void success_after_10_failures_resets_ip_email_bucket() {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
package org.raddatz.familienarchiv.config;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.mock.env.MockEnvironment;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class FlywayConfigTest {
|
||||
|
||||
@Test
|
||||
void resolveGrafanaDbPassword_throws_when_env_unset() {
|
||||
FlywayConfig config = new FlywayConfig(null, new MockEnvironment());
|
||||
|
||||
assertThatThrownBy(config::resolveGrafanaDbPassword)
|
||||
.isInstanceOf(IllegalStateException.class)
|
||||
.hasMessageContaining("GRAFANA_DB_PASSWORD is required");
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveGrafanaDbPassword_throws_when_env_blank() {
|
||||
MockEnvironment env = new MockEnvironment().withProperty("GRAFANA_DB_PASSWORD", " ");
|
||||
FlywayConfig config = new FlywayConfig(null, env);
|
||||
|
||||
assertThatThrownBy(config::resolveGrafanaDbPassword)
|
||||
.isInstanceOf(IllegalStateException.class)
|
||||
.hasMessageContaining("GRAFANA_DB_PASSWORD is required");
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveGrafanaDbPassword_returns_value_when_env_set() {
|
||||
MockEnvironment env = new MockEnvironment().withProperty("GRAFANA_DB_PASSWORD", "abc");
|
||||
FlywayConfig config = new FlywayConfig(null, env);
|
||||
|
||||
assertThat(config.resolveGrafanaDbPassword()).isEqualTo("abc");
|
||||
}
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
package org.raddatz.familienarchiv.config;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
|
||||
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
// GRAFANA_DB_PASSWORD is supplied via the global test default in
|
||||
// src/test/resources/application.properties — FlywayConfig fails closed
|
||||
// when it is unset, so all tests that load the migration path need it.
|
||||
@DataJpaTest
|
||||
@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE)
|
||||
@Import({PostgresContainerConfig.class, FlywayConfig.class})
|
||||
class GrafanaReaderRoleIntegrationTest {
|
||||
|
||||
@Autowired JdbcTemplate jdbc;
|
||||
|
||||
// --- positive grants (SELECT on the three explicitly granted tables) ---
|
||||
|
||||
@Test
|
||||
void grafana_reader_has_select_on_audit_log() {
|
||||
assertThat(hasPrivilege("audit_log", "SELECT")).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void grafana_reader_has_select_on_documents() {
|
||||
assertThat(hasPrivilege("documents", "SELECT")).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void grafana_reader_has_select_on_transcription_blocks() {
|
||||
assertThat(hasPrivilege("transcription_blocks", "SELECT")).isTrue();
|
||||
}
|
||||
|
||||
// --- write-deny on the granted tables: SELECT-only means SELECT-only.
|
||||
// A future migration that GRANTs INSERT/UPDATE/DELETE on any of these
|
||||
// would fail these tests, even though the original positive grants still
|
||||
// pass. Locks the boundary in both directions.
|
||||
|
||||
@Test
|
||||
void grafana_reader_has_no_INSERT_on_documents() {
|
||||
assertThat(hasPrivilege("documents", "INSERT")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void grafana_reader_has_no_UPDATE_on_audit_log() {
|
||||
assertThat(hasPrivilege("audit_log", "UPDATE")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void grafana_reader_has_no_DELETE_on_transcription_blocks() {
|
||||
assertThat(hasPrivilege("transcription_blocks", "DELETE")).isFalse();
|
||||
}
|
||||
|
||||
// --- negative grants: PII / sensitive tables MUST NOT be readable.
|
||||
// The parameterized form catches the "someone widened the grant to
|
||||
// ALL TABLES IN SCHEMA public" footgun — three specific positive grants
|
||||
// would still pass while this sweep turns red.
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(strings = {
|
||||
"app_users",
|
||||
"user_groups",
|
||||
"persons",
|
||||
"notifications",
|
||||
"document_comments",
|
||||
"document_annotations",
|
||||
"geschichten"
|
||||
})
|
||||
void grafana_reader_has_no_SELECT_on_protected_table(String table) {
|
||||
assertThat(hasPrivilege(table, "SELECT")).isFalse();
|
||||
}
|
||||
|
||||
private boolean hasPrivilege(String table, String privilege) {
|
||||
Boolean result = jdbc.queryForObject(
|
||||
"SELECT has_table_privilege('grafana_reader', ?, ?)",
|
||||
Boolean.class,
|
||||
table,
|
||||
privilege);
|
||||
return Boolean.TRUE.equals(result);
|
||||
}
|
||||
}
|
||||
@@ -45,15 +45,6 @@ class RateLimitInterceptorTest {
|
||||
verify(response).setStatus(HttpStatus.TOO_MANY_REQUESTS.value());
|
||||
}
|
||||
|
||||
@Test
|
||||
void blocked_response_includes_retry_after_header() throws Exception {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
interceptor.preHandle(request, response, null);
|
||||
}
|
||||
interceptor.preHandle(request, response, null);
|
||||
verify(response).setHeader("Retry-After", "60");
|
||||
}
|
||||
|
||||
@Test
|
||||
void different_ips_have_independent_limits() throws Exception {
|
||||
HttpServletRequest other = mock(HttpServletRequest.class);
|
||||
|
||||
@@ -27,6 +27,7 @@ import org.springframework.security.test.context.support.WithMockUser;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.test.web.servlet.MockMvc;
|
||||
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchItem;
|
||||
import org.raddatz.familienarchiv.document.SearchMatchData;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
@@ -129,14 +130,16 @@ class DocumentControllerTest {
|
||||
@WithMockUser
|
||||
void search_responseBodyItemsContainMatchData() throws Exception {
|
||||
UUID docId = UUID.randomUUID();
|
||||
Document doc = Document.builder()
|
||||
.id(docId)
|
||||
.title("Brief an Anna")
|
||||
.originalFilename("brief.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.build();
|
||||
var matchData = new SearchMatchData(
|
||||
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
|
||||
docId, "Brief an Anna", "brief.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
0, List.of(), matchData))));
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentSearchItem(doc, matchData, 0, List.of()))));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search").param("q", "Brief"))
|
||||
.andExpect(status().isOk())
|
||||
@@ -145,28 +148,6 @@ class DocumentControllerTest {
|
||||
.value("Er schrieb einen langen Brief"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void search_returns_flat_item_with_id_and_without_sensitive_fields() throws Exception {
|
||||
UUID docId = UUID.randomUUID();
|
||||
var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
|
||||
docId, "Brief an Anna", "brief.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
0, List.of(), matchData))));
|
||||
|
||||
mockMvc.perform(get("/api/documents/search"))
|
||||
.andExpect(status().isOk())
|
||||
// flat id field present at top of item (not nested under $.items[0].document.id)
|
||||
.andExpect(jsonPath("$.items[0].id").value(docId.toString()))
|
||||
// sensitive storage fields must never appear in list response
|
||||
.andExpect(jsonPath("$.items[0].transcription").doesNotExist())
|
||||
.andExpect(jsonPath("$.items[0].filePath").doesNotExist())
|
||||
.andExpect(jsonPath("$.items[0].fileHash").doesNotExist());
|
||||
}
|
||||
|
||||
// ─── /api/documents/search pagination ─────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -294,34 +275,6 @@ class DocumentControllerTest {
|
||||
.andExpect(status().isOk());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void updateDocument_bindsPrecisionFormFields_toDTO() throws Exception {
|
||||
// Pins the wire contract: the edit form's metaDatePrecision / metaDateEnd /
|
||||
// metaDateRaw multipart field names must bind to DocumentUpdateDTO. A rename
|
||||
// on either side silently drops the precision edit; this captures the DTO.
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(id).title("Brief").originalFilename("brief.pdf").build();
|
||||
when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
|
||||
|
||||
org.mockito.ArgumentCaptor<DocumentUpdateDTO> captor =
|
||||
org.mockito.ArgumentCaptor.forClass(DocumentUpdateDTO.class);
|
||||
when(documentService.updateDocument(eq(id), captor.capture(), any(), any())).thenReturn(doc);
|
||||
|
||||
mockMvc.perform(multipart("/api/documents/" + id)
|
||||
.param("metaDatePrecision", "RANGE")
|
||||
.param("metaDateEnd", "1917-01-11")
|
||||
.param("metaDateRaw", "10.–11. Januar 1917")
|
||||
.with(req -> { req.setMethod("PUT"); return req; }).with(csrf()))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
DocumentUpdateDTO bound = captor.getValue();
|
||||
org.assertj.core.api.Assertions.assertThat(bound.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE);
|
||||
org.assertj.core.api.Assertions.assertThat(bound.getMetaDateEnd())
|
||||
.isEqualTo(java.time.LocalDate.of(1917, 1, 11));
|
||||
org.assertj.core.api.Assertions.assertThat(bound.getMetaDateRaw()).isEqualTo("10.–11. Januar 1917");
|
||||
}
|
||||
|
||||
// ─── DELETE /api/documents/{id} ──────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
@@ -1,178 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.audit.AuditLogQueryService;
|
||||
import org.raddatz.familienarchiv.dashboard.DashboardService;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonRepository;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagRepository;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
/**
|
||||
* Verifies that lazy-loaded associations on {@link Document} are accessible after a service
|
||||
* method returns — i.e. no {@link org.hibernate.LazyInitializationException} is thrown outside
|
||||
* the Hibernate session that loaded the entity.
|
||||
*
|
||||
* <p><b>Known limitation:</b> calling {@code getDocumentById} (or any other service method) from
|
||||
* within an already-open transaction is not covered here. When an outer transaction is active,
|
||||
* the service's own {@code @Transactional} merges into it and Hibernate keeps the same session
|
||||
* open, so the lazy-init guard behaves differently than in a non-transactional caller. This is a
|
||||
* known constraint of the test setup, not a bug in the production code.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
class DocumentLazyLoadingTest {
|
||||
|
||||
@MockitoBean
|
||||
S3Client s3Client;
|
||||
|
||||
@Autowired
|
||||
DocumentRepository documentRepository;
|
||||
|
||||
@Autowired
|
||||
PersonRepository personRepository;
|
||||
|
||||
@Autowired
|
||||
TagRepository tagRepository;
|
||||
|
||||
@Autowired
|
||||
DocumentService documentService;
|
||||
|
||||
@Autowired
|
||||
DashboardService dashboardService;
|
||||
|
||||
@MockitoBean
|
||||
AuditLogQueryService auditLogQueryService;
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
documentRepository.deleteAll();
|
||||
tagRepository.deleteAll();
|
||||
personRepository.deleteAll();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentById_tagsAndReceiversAccessible_afterReturnFromService() {
|
||||
Person sender = savedPerson("Max", "LzSender");
|
||||
Person receiver = savedPerson("Anna", "LzReceiver");
|
||||
Tag tag = savedTag("LzTag");
|
||||
Document doc = savedDocument("LazyTest", "lazy_test.pdf", sender, Set.of(receiver), Set.of(tag));
|
||||
|
||||
Document result = documentService.getDocumentById(doc.getId());
|
||||
|
||||
// Only the collection access itself is in assertThatCode — guards against LazyInitializationException.
|
||||
// Value assertions live outside so failures surface as AssertionError, not as unexpected exception.
|
||||
assertThatCode(() -> {
|
||||
result.getTags().size();
|
||||
result.getReceivers().size();
|
||||
}).doesNotThrowAnyException();
|
||||
assertThat(result.getTags()).isNotEmpty();
|
||||
result.getTags().forEach(t -> assertThat(t.getName()).isNotNull());
|
||||
assertThat(result.getReceivers()).isNotEmpty();
|
||||
result.getReceivers().forEach(r -> assertThat(r.getLastName()).isNotNull());
|
||||
}
|
||||
|
||||
@Test
|
||||
void getRecentActivity_collectionsAccessibleAfterReturn() {
|
||||
Person sender = savedPerson("Hans", "RaSender");
|
||||
Tag tag = savedTag("RaTag");
|
||||
for (int i = 0; i < 3; i++) {
|
||||
savedDocument("RaDoc " + i, "ra_doc" + i + ".pdf", sender, Set.of(), Set.of(tag));
|
||||
}
|
||||
|
||||
List<Document> results = documentService.getRecentActivity(3);
|
||||
|
||||
// Access lazy fields inside assertThatCode — guards against LazyInitializationException.
|
||||
// Value assertions live outside so failures surface as AssertionError, not as unexpected exception.
|
||||
assertThatCode(() -> {
|
||||
results.forEach(d -> d.getSender().getLastName());
|
||||
results.forEach(d -> d.getTags().size());
|
||||
}).doesNotThrowAnyException();
|
||||
results.forEach(d -> assertThat(d.getSender()).isNotNull());
|
||||
results.forEach(d -> assertThat(d.getSender().getLastName()).isNotNull());
|
||||
results.forEach(d -> assertThat(d.getTags()).isNotEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchDocuments_receiverSort_doesNotThrowLazyInitializationException() {
|
||||
Person sender = savedPerson("Hans", "SrSender");
|
||||
Person receiver = savedPerson("Anna", "SrReceiver");
|
||||
Tag tag = savedTag("SrTag");
|
||||
savedDocument("SrDoc", "sr_doc.pdf", sender, Set.of(receiver), Set.of(tag));
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.RECEIVER, "asc", null,
|
||||
PageRequest.of(0, 20));
|
||||
assertThat(result.totalElements()).isGreaterThan(0);
|
||||
assertThatCode(() ->
|
||||
result.items().forEach(i -> { if (i.sender() != null) i.sender().getLastName(); }))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchDocuments_senderSort_doesNotThrowLazyInitializationException() {
|
||||
Person sender = savedPerson("Hans", "SsSender");
|
||||
Tag tag = savedTag("SsTag");
|
||||
savedDocument("SsDoc", "ss_doc.pdf", sender, Set.of(), Set.of(tag));
|
||||
|
||||
assertThatCode(() -> documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.SENDER, "asc", null,
|
||||
PageRequest.of(0, 20)))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
@Test
|
||||
void dashboardService_getResume_accessesReceiversViaGetDocumentById_withoutException() {
|
||||
Person sender = savedPerson("Max", "DsSender");
|
||||
Person receiver = savedPerson("Anna", "DsReceiver");
|
||||
Document doc = savedDocument("DashboardTest", "dashboard_test.pdf", sender, Set.of(receiver), Set.of());
|
||||
UUID fakeUserId = UUID.randomUUID();
|
||||
when(auditLogQueryService.findMostRecentDocumentForUser(any())).thenReturn(Optional.of(doc.getId()));
|
||||
when(auditLogQueryService.findRecentContributorsPerDocument(any())).thenReturn(java.util.Map.of());
|
||||
|
||||
assertThatCode(() -> dashboardService.getResume(fakeUserId))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
private Person savedPerson(String firstName, String lastName) {
|
||||
return personRepository.save(Person.builder().firstName(firstName).lastName(lastName).build());
|
||||
}
|
||||
|
||||
private Tag savedTag(String name) {
|
||||
return tagRepository.save(Tag.builder().name(name).build());
|
||||
}
|
||||
|
||||
private Document savedDocument(String title, String filename, Person sender,
|
||||
Set<Person> receivers, Set<Tag> tags) {
|
||||
return documentRepository.save(Document.builder()
|
||||
.title(title).originalFilename(filename)
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender)
|
||||
.receivers(new HashSet<>(receivers))
|
||||
.tags(new HashSet<>(tags))
|
||||
.build());
|
||||
}
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.audit.AuditLogQueryService;
|
||||
import org.raddatz.familienarchiv.ocr.TrainingLabel;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
|
||||
/**
|
||||
* AC #2: Document with trainingLabels does not cause LazyInitializationException in search.
|
||||
* AC #3: Detail API still returns trainingLabels after the Document.list graph change.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
class DocumentListItemIntegrationTest {
|
||||
|
||||
@MockitoBean
|
||||
S3Client s3Client;
|
||||
|
||||
@MockitoBean
|
||||
AuditLogQueryService auditLogQueryService;
|
||||
|
||||
@Autowired
|
||||
DocumentRepository documentRepository;
|
||||
|
||||
@Autowired
|
||||
DocumentService documentService;
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
documentRepository.deleteAll();
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_doesNotThrow_whenDocumentHasTrainingLabels() {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Kurrent Brief")
|
||||
.originalFilename("kurrent.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
|
||||
.build());
|
||||
|
||||
assertThatCode(() -> documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null,
|
||||
PageRequest.of(0, 50)))
|
||||
.doesNotThrowAnyException();
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_returns_list_item_without_sensitive_fields_when_document_has_training_labels() {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Kurrent Brief")
|
||||
.originalFilename("kurrent2.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
|
||||
.build());
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null,
|
||||
PageRequest.of(0, 50));
|
||||
|
||||
assertThat(result.totalElements()).isGreaterThan(0);
|
||||
DocumentListItem item = result.items().get(0);
|
||||
assertThat(item.id()).isNotNull();
|
||||
assertThat(item.title()).isEqualTo("Kurrent Brief");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_listItem_carriesMetaDatePrecisionAndEnd() {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Range Brief")
|
||||
.originalFilename("range.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.documentDate(java.time.LocalDate.of(1943, 1, 1))
|
||||
.metaDatePrecision(DatePrecision.RANGE)
|
||||
.metaDateEnd(java.time.LocalDate.of(1943, 12, 31))
|
||||
.build());
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null,
|
||||
PageRequest.of(0, 50));
|
||||
|
||||
DocumentListItem item = result.items().stream()
|
||||
.filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow();
|
||||
assertThat(item.metaDatePrecision()).isEqualTo(DatePrecision.RANGE);
|
||||
assertThat(item.metaDateEnd()).isEqualTo(java.time.LocalDate.of(1943, 12, 31));
|
||||
}
|
||||
|
||||
@Test
|
||||
void detail_stillReturnsTrainingLabels() {
|
||||
Document saved = documentRepository.save(Document.builder()
|
||||
.title("Detail Test")
|
||||
.originalFilename("detail_test.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
|
||||
.build());
|
||||
|
||||
// Document.full entity graph (used by getDocumentById) must still load trainingLabels
|
||||
Document loaded = documentService.getDocumentById(saved.getId());
|
||||
|
||||
assertThat(loaded.getTrainingLabels()).containsExactly(TrainingLabel.KURRENT_RECOGNITION);
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,5 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.persistence.EntityManagerFactory;
|
||||
import org.hibernate.SessionFactory;
|
||||
import org.hibernate.stat.Statistics;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.config.FlywayConfig;
|
||||
@@ -25,7 +21,6 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
||||
import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
@@ -60,12 +55,6 @@ class DocumentRepositoryTest {
|
||||
@Autowired
|
||||
private TranscriptionBlockRepository transcriptionBlockRepository;
|
||||
|
||||
@Autowired
|
||||
private EntityManagerFactory entityManagerFactory;
|
||||
|
||||
@Autowired
|
||||
private EntityManager entityManager;
|
||||
|
||||
// ─── save and findById ────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -501,117 +490,6 @@ class DocumentRepositoryTest {
|
||||
assertThat(ids).containsExactlyInAnyOrder(grandparent.getId(), parent2.getId(), child2.getId());
|
||||
}
|
||||
|
||||
// ─── query-count — entity-graph assertions ────────────────────────────────
|
||||
|
||||
@Test
|
||||
void findAll_withSpecAndPageable_loadsDocumentsInAtMostFiveStatements() {
|
||||
Person sender = personRepository.save(Person.builder().firstName("Hans").lastName("QcSender").build());
|
||||
Person receiver = personRepository.save(Person.builder().firstName("Anna").lastName("QcReceiver").build());
|
||||
Tag tag = tagRepository.save(Tag.builder().name("QcTag").build());
|
||||
for (int i = 0; i < 10; i++) {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("QcDoc " + i).originalFilename("qcdoc" + i + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender)
|
||||
.receivers(new HashSet<>(Set.of(receiver)))
|
||||
.tags(new HashSet<>(Set.of(tag)))
|
||||
.build());
|
||||
}
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
Statistics stats = entityManagerFactory.unwrap(SessionFactory.class).getStatistics();
|
||||
stats.setStatisticsEnabled(true);
|
||||
stats.clear();
|
||||
|
||||
Specification<Document> allDocs = (root, query, cb) -> null;
|
||||
documentRepository.findAll(allDocs, PageRequest.of(0, 10));
|
||||
|
||||
assertThat(stats.getPrepareStatementCount())
|
||||
.as("@EntityGraph(Document.list) must load 10 docs in ≤5 statements, not N+1")
|
||||
.isLessThanOrEqualTo(5);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findById_loadsSenderReceiversAndTagsInAtMostTwoStatements() {
|
||||
Person sender = personRepository.save(Person.builder().firstName("Max").lastName("FbSender").build());
|
||||
Set<Person> receivers = new HashSet<>();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
receivers.add(personRepository.save(
|
||||
Person.builder().firstName("R" + i).lastName("FbReceiver").build()));
|
||||
}
|
||||
Set<Tag> tags = new HashSet<>();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
tags.add(tagRepository.save(Tag.builder().name("FbTag" + i).build()));
|
||||
}
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.title("FindByIdQc").originalFilename("findbyid_qc.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender).receivers(receivers).tags(tags)
|
||||
.build());
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
Statistics stats = entityManagerFactory.unwrap(SessionFactory.class).getStatistics();
|
||||
stats.setStatisticsEnabled(true);
|
||||
stats.clear();
|
||||
|
||||
documentRepository.findById(doc.getId());
|
||||
|
||||
assertThat(stats.getPrepareStatementCount())
|
||||
.as("@EntityGraph(Document.full) must load sender+receivers+tags in ≤2 statements, not 4")
|
||||
.isLessThanOrEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findAll_withPageable_loadsSenderWithoutNPlusOne() {
|
||||
Person sender = personRepository.save(Person.builder().firstName("Maria").lastName("RaSender").build());
|
||||
Tag tag = tagRepository.save(Tag.builder().name("RaTag2").build());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("RaDoc2 " + i).originalFilename("radoc2_" + i + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender)
|
||||
.tags(new HashSet<>(Set.of(tag)))
|
||||
.build());
|
||||
}
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
Statistics stats = entityManagerFactory.unwrap(SessionFactory.class).getStatistics();
|
||||
stats.setStatisticsEnabled(true);
|
||||
stats.clear();
|
||||
|
||||
documentRepository.findAll(PageRequest.of(0, 5, Sort.by(Sort.Direction.DESC, "updatedAt")));
|
||||
|
||||
assertThat(stats.getPrepareStatementCount())
|
||||
.as("@EntityGraph(Document.list) via findAll(Pageable) must not N+1 sender for 5 docs")
|
||||
.isLessThanOrEqualTo(5);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findAll_withSpecOnly_appliesEntityGraphInAtMostFiveStatements() {
|
||||
Person sender = personRepository.save(Person.builder().firstName("Otto").lastName("SoSender").build());
|
||||
Tag tag = tagRepository.save(Tag.builder().name("SoTag").build());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("SoDoc " + i).originalFilename("sodoc_" + i + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(sender)
|
||||
.tags(new HashSet<>(Set.of(tag)))
|
||||
.build());
|
||||
}
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
Statistics stats = entityManagerFactory.unwrap(SessionFactory.class).getStatistics();
|
||||
stats.setStatisticsEnabled(true);
|
||||
stats.clear();
|
||||
|
||||
Specification<Document> allDocs = (root, query, cb) -> null;
|
||||
documentRepository.findAll(allDocs);
|
||||
|
||||
assertThat(stats.getPrepareStatementCount())
|
||||
.as("@EntityGraph(Document.list) via findAll(Spec) must not N+1 sender for 5 docs")
|
||||
.isLessThanOrEqualTo(5);
|
||||
}
|
||||
|
||||
// ─── seeding helpers ─────────────────────────────────────────────────────
|
||||
|
||||
private Document uploaded(String title) {
|
||||
|
||||
@@ -125,10 +125,10 @@ class DocumentSearchPagedIntegrationTest {
|
||||
|
||||
// No document id should appear on both pages — slicing must be exclusive.
|
||||
var idsOnPage0 = page0.items().stream()
|
||||
.map(item -> item.id())
|
||||
.map(item -> item.document().getId())
|
||||
.toList();
|
||||
var idsOnPage1 = page1.items().stream()
|
||||
.map(item -> item.id())
|
||||
.map(item -> item.document().getId())
|
||||
.toList();
|
||||
for (UUID id : idsOnPage0) {
|
||||
assertThat(idsOnPage1).doesNotContain(id);
|
||||
|
||||
@@ -3,6 +3,8 @@ package org.raddatz.familienarchiv.document;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.audit.ActivityActorDTO;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
|
||||
import java.util.List;
|
||||
@@ -12,12 +14,14 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class DocumentSearchResultTest {
|
||||
|
||||
private DocumentListItem item(UUID docId) {
|
||||
return new DocumentListItem(
|
||||
docId, "Test", "test.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
0, List.of(), SearchMatchData.empty());
|
||||
private DocumentSearchItem item(UUID docId) {
|
||||
Document doc = Document.builder()
|
||||
.id(docId)
|
||||
.title("Test")
|
||||
.originalFilename("test.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.build();
|
||||
return new DocumentSearchItem(doc, SearchMatchData.empty(), 0, List.of());
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -41,7 +45,7 @@ class DocumentSearchResultTest {
|
||||
|
||||
@Test
|
||||
void paged_factory_populates_paging_fields_from_pageable_and_total() {
|
||||
List<DocumentListItem> slice = List.of(item(UUID.randomUUID()), item(UUID.randomUUID()));
|
||||
List<DocumentSearchItem> slice = List.of(item(UUID.randomUUID()), item(UUID.randomUUID()));
|
||||
|
||||
DocumentSearchResult result = DocumentSearchResult.paged(slice, PageRequest.of(1, 50), 120L);
|
||||
|
||||
@@ -64,11 +68,9 @@ class DocumentSearchResultTest {
|
||||
void of_exposes_items_with_completion_and_contributors() {
|
||||
UUID id = UUID.randomUUID();
|
||||
ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun");
|
||||
DocumentListItem item = new DocumentListItem(
|
||||
id, "T", "t.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
75, List.of(actor), SearchMatchData.empty());
|
||||
Document doc = Document.builder().id(id).title("T").originalFilename("t.pdf")
|
||||
.status(DocumentStatus.UPLOADED).build();
|
||||
DocumentSearchItem item = new DocumentSearchItem(doc, SearchMatchData.empty(), 75, List.of(actor));
|
||||
|
||||
DocumentSearchResult result = DocumentSearchResult.of(List.of(item));
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ class DocumentServiceSortTest {
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null, PAGE);
|
||||
|
||||
assertThat(result.items()).hasSize(2);
|
||||
assertThat(result.items().get(0).id()).isEqualTo(id2); // newer first
|
||||
assertThat(result.items().get(0).document().getId()).isEqualTo(id2); // newer first
|
||||
}
|
||||
|
||||
// ─── RELEVANCE sort — pure text (no filters) ──────────────────────────────
|
||||
@@ -104,7 +104,7 @@ class DocumentServiceSortTest {
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, PAGE);
|
||||
|
||||
assertThat(result.items().get(0).id()).isEqualTo(id1);
|
||||
assertThat(result.items().get(0).document().getId()).isEqualTo(id1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -121,7 +121,7 @@ class DocumentServiceSortTest {
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
"Brief", null, null, null, null, null, null, null, null, null, null, PAGE);
|
||||
|
||||
assertThat(result.items().get(0).id()).isEqualTo(id1);
|
||||
assertThat(result.items().get(0).document().getId()).isEqualTo(id1);
|
||||
}
|
||||
|
||||
// ─── RELEVANCE sort — overflow guard ─────────────────────────────────────
|
||||
@@ -156,7 +156,7 @@ class DocumentServiceSortTest {
|
||||
DocumentSort.RELEVANCE, null, null, PAGE);
|
||||
|
||||
assertThat(result.items()).hasSize(1);
|
||||
assertThat(result.items().get(0).id()).isEqualTo(uuidId);
|
||||
assertThat(result.items().get(0).document().getId()).isEqualTo(uuidId);
|
||||
}
|
||||
|
||||
// ─── RELEVANCE sort — text + active filter ────────────────────────────────
|
||||
|
||||
@@ -11,7 +11,7 @@ import org.raddatz.familienarchiv.audit.AuditLogQueryService;
|
||||
import org.raddatz.familienarchiv.audit.AuditService;
|
||||
import org.raddatz.familienarchiv.document.annotation.AnnotationService;
|
||||
import org.raddatz.familienarchiv.document.transcription.TranscriptionBlockQueryService;
|
||||
import org.raddatz.familienarchiv.document.DocumentListItem;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchItem;
|
||||
import org.raddatz.familienarchiv.document.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.document.DocumentSort;
|
||||
import org.raddatz.familienarchiv.document.DocumentUpdateDTO;
|
||||
@@ -144,53 +144,6 @@ class DocumentServiceTest {
|
||||
assertThat(doc.getArchiveFolder()).isEqualTo("Mappe B");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_persistsDatePrecisionEndAndRaw() throws Exception {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(id).receivers(new HashSet<>()).tags(new HashSet<>()).build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(documentRepository.save(any())).thenReturn(doc);
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setDocumentDate(LocalDate.of(1917, 1, 10));
|
||||
dto.setMetaDatePrecision(DatePrecision.RANGE);
|
||||
dto.setMetaDateEnd(LocalDate.of(1917, 1, 11));
|
||||
dto.setMetaDateRaw("10.–11. Januar 1917");
|
||||
|
||||
documentService.updateDocument(id, dto, null, null);
|
||||
|
||||
assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE);
|
||||
assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1917, 1, 11));
|
||||
assertThat(doc.getMetaDateRaw()).isEqualTo("10.–11. Januar 1917");
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_preservesStoredPrecision_whenDtoOmitsIt() throws Exception {
|
||||
// Editing a doc (e.g. fixing a location typo) without touching the precision
|
||||
// controls must NOT fabricate a precision. The form omits the three precision
|
||||
// fields → they arrive null on the DTO → the stored values must be preserved.
|
||||
UUID id = UUID.randomUUID();
|
||||
Document doc = Document.builder()
|
||||
.id(id)
|
||||
.metaDatePrecision(DatePrecision.MONTH)
|
||||
.metaDateEnd(LocalDate.of(1916, 6, 30))
|
||||
.metaDateRaw("Juni 1916")
|
||||
.receivers(new HashSet<>())
|
||||
.tags(new HashSet<>())
|
||||
.build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
|
||||
when(documentRepository.save(any())).thenReturn(doc);
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setLocation("Berlin"); // unrelated edit; precision fields left null
|
||||
|
||||
documentService.updateDocument(id, dto, null, null);
|
||||
|
||||
assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.MONTH);
|
||||
assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1916, 6, 30));
|
||||
assertThat(doc.getMetaDateRaw()).isEqualTo("Juni 1916");
|
||||
}
|
||||
|
||||
// ─── deleteTagCascading ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -1491,7 +1444,7 @@ class DocumentServiceTest {
|
||||
assertThat(result.totalPages()).isEqualTo(3);
|
||||
assertThat(result.items()).hasSize(50);
|
||||
// Page 1 (offset 50) under ascending sender sort should start at L050
|
||||
assertThat(result.items().get(0).sender().getLastName()).isEqualTo("L050");
|
||||
assertThat(result.items().get(0).document().getSender().getLastName()).isEqualTo("L050");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -1612,7 +1565,7 @@ class DocumentServiceTest {
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, UNPAGED);
|
||||
|
||||
assertThat(result.items()).hasSize(2);
|
||||
assertThat(result.items()).extracting(DocumentListItem::title).containsExactly("Has Sender", "No Sender");
|
||||
assertThat(result.items()).extracting(item -> item.document().getTitle()).containsExactly("Has Sender", "No Sender");
|
||||
}
|
||||
|
||||
// ─── searchDocuments — RECEIVER sort, empty receivers ───────────────────────
|
||||
@@ -1631,7 +1584,7 @@ class DocumentServiceTest {
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null, UNPAGED);
|
||||
|
||||
assertThat(result.items()).extracting(DocumentListItem::title)
|
||||
assertThat(result.items()).extracting(item -> item.document().getTitle())
|
||||
.containsExactly("Has Receiver", "No Receivers");
|
||||
}
|
||||
|
||||
@@ -1654,7 +1607,7 @@ class DocumentServiceTest {
|
||||
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, UNPAGED);
|
||||
|
||||
// null lastName should sort to end (treated as empty), not before "smith" (as "null")
|
||||
assertThat(result.items()).extracting(DocumentListItem::title)
|
||||
assertThat(result.items()).extracting(item -> item.document().getTitle())
|
||||
.containsExactly("smith doc", "Null lastname doc");
|
||||
}
|
||||
|
||||
|
||||
@@ -1,229 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentRepository;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonRepository;
|
||||
import org.raddatz.familienarchiv.tag.TagRepository;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.test.util.ReflectionTestUtils;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Real Postgres (Testcontainers) integration test for the canonical importer. The
|
||||
* {@code UNIQUE(source_ref)} constraint and the upsert-on-conflict behaviour only exist
|
||||
* in real Postgres (never H2), so idempotency is verified here. S3 is mocked — the
|
||||
* synthetic document rows carry no on-disk files, so every document is a PLACEHOLDER and
|
||||
* no upload is attempted.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
class CanonicalImportIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
|
||||
@Autowired CanonicalImportOrchestrator orchestrator;
|
||||
@Autowired PersonRepository personRepository;
|
||||
@Autowired TagRepository tagRepository;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
|
||||
Path artifactDir;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
documentRepository.deleteAll();
|
||||
personRepository.deleteAll();
|
||||
tagRepository.deleteAll();
|
||||
artifactDir = Files.createTempDirectory("canonical-import-it");
|
||||
writeArtifacts(artifactDir);
|
||||
ReflectionTestUtils.setField(orchestrator, "canonicalDir", artifactDir.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* The import commits through its own transactions (the orchestrator is not transactional),
|
||||
* so this test cannot rely on {@code @Transactional} rollback for isolation. Delete the
|
||||
* committed rows after each test — otherwise the last test's documents (dated 1888-02) and
|
||||
* persons/tags leak into the shared Testcontainers Postgres and pollute other integration
|
||||
* tests that assume a known seed (e.g. DocumentDensityIntegrationTest,
|
||||
* DocumentSearchPagedIntegrationTest). Mirrors the @AfterEach deleteAll convention used by
|
||||
* DocumentListItemIntegrationTest.
|
||||
*/
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
documentRepository.deleteAll();
|
||||
personRepository.deleteAll();
|
||||
tagRepository.deleteAll();
|
||||
}
|
||||
|
||||
@Test
|
||||
void reimport_isIdempotent_noDuplicatePersonsTagsOrDocuments() {
|
||||
orchestrator.runImport();
|
||||
long personsAfterFirst = personRepository.count();
|
||||
long tagsAfterFirst = tagRepository.count();
|
||||
long documentsAfterFirst = documentRepository.count();
|
||||
assertThat(orchestrator.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
|
||||
assertThat(personsAfterFirst).isPositive();
|
||||
assertThat(tagsAfterFirst).isPositive();
|
||||
assertThat(documentsAfterFirst).isPositive();
|
||||
|
||||
orchestrator.runImport();
|
||||
|
||||
assertThat(personRepository.count()).isEqualTo(personsAfterFirst);
|
||||
assertThat(tagRepository.count()).isEqualTo(tagsAfterFirst);
|
||||
assertThat(documentRepository.count()).isEqualTo(documentsAfterFirst);
|
||||
}
|
||||
|
||||
@Test
|
||||
void reimport_preservesHumanEditedPersonField() {
|
||||
orchestrator.runImport();
|
||||
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
|
||||
walter.setNotes("Verified by archivist");
|
||||
walter.setFirstName("Walther");
|
||||
personRepository.save(walter);
|
||||
|
||||
orchestrator.runImport();
|
||||
|
||||
Person reimported = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
|
||||
assertThat(reimported.getNotes()).isEqualTo("Verified by archivist");
|
||||
assertThat(reimported.getFirstName()).isEqualTo("Walther");
|
||||
}
|
||||
|
||||
@Test
|
||||
void import_linksDocumentSenderToRegisterPerson_andRetainsRawText() {
|
||||
orchestrator.runImport();
|
||||
|
||||
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
|
||||
Document doc = documentRepository.findByOriginalFilename("W-0001").orElseThrow();
|
||||
assertThat(doc.getSender()).isNotNull();
|
||||
assertThat(doc.getSender().getId()).isEqualTo(walter.getId());
|
||||
assertThat(doc.getSenderText()).isEqualTo("Walter de Gruyter");
|
||||
assertThat(doc.getStatus()).isEqualTo(DocumentStatus.PLACEHOLDER);
|
||||
}
|
||||
|
||||
@Test
|
||||
void import_provisionalFlag_trueForImporterCreated_falseForRegister() {
|
||||
orchestrator.runImport();
|
||||
|
||||
Optional<Person> register = personRepository.findBySourceRef("de-gruyter-walter");
|
||||
assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
void reimport_prunesRemovedReceiverAndTag_whenCanonicalRowShrinks() throws Exception {
|
||||
orchestrator.runImport();
|
||||
// findById uses the Document.full entity graph so receivers/tags initialise eagerly.
|
||||
Document before = documentRepository.findById(
|
||||
documentRepository.findByOriginalFilename("W-0001").orElseThrow().getId()).orElseThrow();
|
||||
assertThat(before.getReceivers()).isNotEmpty();
|
||||
assertThat(before.getTags()).isNotEmpty();
|
||||
|
||||
// Re-stage the document sheet with W-0001's receiver and tag removed.
|
||||
writeSheet(artifactDir.resolve("canonical-documents.xlsx"),
|
||||
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
|
||||
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
|
||||
List.of(
|
||||
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
|
||||
"", "", "1888-02-15", "15.2.1888", "DAY", "", "Rotterdam", "", "Geschäftsreise"),
|
||||
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
|
||||
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
|
||||
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
|
||||
|
||||
orchestrator.runImport();
|
||||
|
||||
Document after = documentRepository.findById(before.getId()).orElseThrow();
|
||||
assertThat(after.getReceivers()).isEmpty();
|
||||
assertThat(after.getTags()).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void import_neverFlipsRegisterPersonToProvisional_whenReferencedByDocumentRow() {
|
||||
// de-gruyter-walter is a register person (provisional=false) AND the sender of W-0001.
|
||||
// The orchestrator loads the register before documents, so the document loader's
|
||||
// register-first match links the existing person and never mints a provisional one.
|
||||
// A second run (documents reference the same person again) must not flip it true.
|
||||
orchestrator.runImport();
|
||||
orchestrator.runImport();
|
||||
|
||||
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
|
||||
assertThat(walter.isProvisional()).isFalse();
|
||||
Person eugenie = personRepository.findBySourceRef("de-gruyter-eugenie").orElseThrow();
|
||||
assertThat(eugenie.isProvisional()).isFalse();
|
||||
}
|
||||
|
||||
// ─── synthetic-but-real artifact set ─────────────────────────────────────────────
|
||||
|
||||
private void writeArtifacts(Path dir) throws Exception {
|
||||
writeSheet(dir.resolve("canonical-tag-tree.xlsx"),
|
||||
List.of("tag_path", "parent_name", "tag_name"),
|
||||
List.of(
|
||||
List.of("Themen", "", "Themen"),
|
||||
List.of("Themen/Brautbriefe", "Themen", "Brautbriefe")));
|
||||
|
||||
writeSheet(dir.resolve("canonical-persons.xlsx"),
|
||||
List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "birth_date", "death_date", "provisional"),
|
||||
List.of(
|
||||
List.of("de-gruyter-walter", "de Gruyter", "Walter", "", "", "1865-01-01", "", "False"),
|
||||
List.of("de-gruyter-eugenie", "de Gruyter", "Eugenie", "Wöhler", "", "", "", "False")));
|
||||
|
||||
Files.writeString(dir.resolve("canonical-persons-tree.json"), """
|
||||
{"persons":[
|
||||
{"rowId":"row_1","firstName":"Walter","lastName":"de Gruyter","familyMember":true,"personId":"de-gruyter-walter"},
|
||||
{"rowId":"row_2","firstName":"Eugenie","lastName":"de Gruyter","maidenName":"Wöhler","familyMember":true,"personId":"de-gruyter-eugenie"}
|
||||
],"relationships":[
|
||||
{"personId":"row_1","relatedPersonId":"row_2","type":"SPOUSE_OF","source":"verheiratet_mit"}
|
||||
]}
|
||||
""");
|
||||
|
||||
writeSheet(dir.resolve("canonical-documents.xlsx"),
|
||||
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
|
||||
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
|
||||
List.of(
|
||||
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
|
||||
"de-gruyter-eugenie", "Eugenie de Gruyter", "1888-02-15", "15.2.1888", "DAY", "",
|
||||
"Rotterdam", "Themen/Brautbriefe", "Geschäftsreise"),
|
||||
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
|
||||
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
|
||||
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
|
||||
}
|
||||
|
||||
private void writeSheet(Path file, List<String> headers, List<List<String>> rows) throws Exception {
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
header.createCell(i).setCellValue(headers.get(i));
|
||||
}
|
||||
for (int r = 0; r < rows.size(); r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
List<String> values = rows.get(r);
|
||||
for (int c = 0; c < values.size(); c++) {
|
||||
row.createCell(c).setCellValue(values.get(c));
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(file)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.InOrder;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.springframework.test.util.ReflectionTestUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.inOrder;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class CanonicalImportOrchestratorTest {
|
||||
|
||||
@Mock TagTreeImporter tagTreeImporter;
|
||||
@Mock PersonRegisterImporter personRegisterImporter;
|
||||
@Mock PersonTreeImporter personTreeImporter;
|
||||
@Mock DocumentImporter documentImporter;
|
||||
|
||||
private CanonicalImportOrchestrator orchestrator(Path dir) {
|
||||
CanonicalImportOrchestrator o = new CanonicalImportOrchestrator(
|
||||
tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
|
||||
ReflectionTestUtils.setField(o, "canonicalDir", dir.toString());
|
||||
return o;
|
||||
}
|
||||
|
||||
private void writeAllArtifacts(Path dir) throws Exception {
|
||||
Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
|
||||
Files.writeString(dir.resolve("canonical-persons.xlsx"), "x");
|
||||
Files.writeString(dir.resolve("canonical-persons-tree.json"), "x");
|
||||
Files.writeString(dir.resolve("canonical-documents.xlsx"), "x");
|
||||
}
|
||||
|
||||
@Test
|
||||
void getStatus_isIdleByDefault(@TempDir Path dir) {
|
||||
assertThat(orchestrator(dir).getStatus().state()).isEqualTo(ImportStatus.State.IDLE);
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImport_loadsTagsAndPersonsBeforeDocuments(@TempDir Path dir) throws Exception {
|
||||
writeAllArtifacts(dir);
|
||||
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(0, List.of()));
|
||||
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||
|
||||
o.runImport();
|
||||
|
||||
InOrder order = inOrder(tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
|
||||
order.verify(tagTreeImporter).load(any());
|
||||
order.verify(personRegisterImporter).load(any());
|
||||
order.verify(personTreeImporter).load(any());
|
||||
order.verify(documentImporter).load(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImport_setsStatusDone_onSuccess(@TempDir Path dir) throws Exception {
|
||||
writeAllArtifacts(dir);
|
||||
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(3, List.of()));
|
||||
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||
|
||||
o.runImport();
|
||||
|
||||
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
|
||||
assertThat(o.getStatus().processed()).isEqualTo(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImport_failsClosed_whenAnArtifactIsMissing(@TempDir Path dir) throws Exception {
|
||||
Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
|
||||
// the other three artifacts are absent
|
||||
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||
|
||||
o.runImport();
|
||||
|
||||
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
|
||||
verify(tagTreeImporter, never()).load(any());
|
||||
verify(documentImporter, never()).load(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImport_setsStatusFailed_whenLoaderThrows(@TempDir Path dir) throws Exception {
|
||||
writeAllArtifacts(dir);
|
||||
when(tagTreeImporter.load(any())).thenThrow(DomainException.badRequest(
|
||||
org.raddatz.familienarchiv.exception.ErrorCode.IMPORT_ARTIFACT_INVALID, "bad"));
|
||||
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||
|
||||
o.runImport();
|
||||
|
||||
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
|
||||
verify(documentImporter, never()).load(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_throwsConflict_whenAlreadyRunning(@TempDir Path dir) {
|
||||
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||
ReflectionTestUtils.setField(o, "currentStatus", new ImportStatus(
|
||||
ImportStatus.State.RUNNING, "IMPORT_RUNNING", "running", 0, List.of(), null));
|
||||
|
||||
assertThatThrownBy(o::runImportAsync)
|
||||
.isInstanceOf(DomainException.class)
|
||||
.hasMessageContaining("already in progress");
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImport_aggregatesDocumentSkips(@TempDir Path dir) throws Exception {
|
||||
writeAllArtifacts(dir);
|
||||
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(1,
|
||||
List.of(new ImportStatus.SkippedFile("fake.pdf", ImportStatus.SkipReason.INVALID_PDF_SIGNATURE))));
|
||||
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||
|
||||
o.runImport();
|
||||
|
||||
assertThat(o.getStatus().skipped()).isEqualTo(1);
|
||||
assertThat(o.getStatus().skippedFiles())
|
||||
.extracting(ImportStatus.SkippedFile::filename)
|
||||
.containsExactly("fake.pdf");
|
||||
}
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class CanonicalSheetReaderTest {
|
||||
|
||||
@Test
|
||||
void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
|
||||
|
||||
assertThat(rows).hasSize(1);
|
||||
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
|
||||
assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
|
||||
}
|
||||
|
||||
@Test
|
||||
void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
|
||||
|
||||
assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.hasMessageContaining("file");
|
||||
}
|
||||
|
||||
@Test
|
||||
void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
|
||||
|
||||
assertThat(rows.get(0).get("file")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
|
||||
|
||||
assertThat(rows.get(0).get("does_not_exist")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void get_returnsEmptyString_forTrailingColumns_whenRowShorterThanHeader(@TempDir Path tempDir) throws Exception {
|
||||
// POI omits trailing empty cells, so a real-world artifact row can be narrower than
|
||||
// the header. The missing columns must read as "" rather than throwing.
|
||||
Path xlsx = write(tempDir,
|
||||
List.of("index", "file", "summary"),
|
||||
List.of(List.of("W-0001")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file", "summary"));
|
||||
|
||||
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
|
||||
assertThat(rows.get(0).get("file")).isEmpty();
|
||||
assertThat(rows.get(0).get("summary")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_splitsOnPipe() {
|
||||
assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_returnsEmptyList_forBlank() {
|
||||
assertThat(CanonicalSheetReader.splitList("")).isEmpty();
|
||||
assertThat(CanonicalSheetReader.splitList(" ")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_returnsSingleElement_whenNoPipe() {
|
||||
assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_trimsAndDropsEmptySegments() {
|
||||
assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
|
||||
}
|
||||
|
||||
private Path write(Path dir, List<String> headers, List<List<String>> dataRows) throws Exception {
|
||||
Path xlsx = dir.resolve("sheet.xlsx");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
header.createCell(i).setCellValue(headers.get(i));
|
||||
}
|
||||
for (int r = 0; r < dataRows.size(); r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
List<String> values = dataRows.get(r);
|
||||
for (int c = 0; c < values.size(); c++) {
|
||||
row.createCell(c).setCellValue(values.get(c));
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
}
|
||||
@@ -1,503 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.test.util.ReflectionTestUtils;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDate;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class DocumentImporterTest {
|
||||
|
||||
@Mock DocumentService documentService;
|
||||
@Mock PersonService personService;
|
||||
@Mock TagService tagService;
|
||||
@Mock S3Client s3Client;
|
||||
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
|
||||
DocumentImporter importer;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
importer = new DocumentImporter(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
|
||||
ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
|
||||
}
|
||||
|
||||
// ─── security regression — ported from MassImportServiceTest — do not remove ─────
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenNull() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", (String) null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenBlank() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", " ")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenForwardSlash() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "etc/passwd")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenBackslash() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "..\\etc\\passwd")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenDotDot() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "doc..evil.pdf")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenIsDotDot() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "..")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenAbsolutePath() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "/etc/passwd")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenNullByte() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "file\0.pdf")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenUnicodeDivisionSlash() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "foo∕bar.pdf")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenFullwidthSlash() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "foo/bar.pdf")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsFalse_whenReverseSolidusOperator() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "foo⧵bar.pdf")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsTrue_whenPlainBasename() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "document.pdf")).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsTrue_whenLeadingDot() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", ".hidden.pdf")).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void isValidImportFilename_returnsTrue_whenHasSpaces() {
|
||||
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "Brief an Oma.pdf")).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findFileRecursive_throwsDomainException_whenSymlinkEscapesImportDir(
|
||||
@TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception {
|
||||
Path outsideFile = outsideDir.resolve("secret.pdf");
|
||||
Files.writeString(outsideFile, "sensitive");
|
||||
Files.createSymbolicLink(importDirPath.resolve("secret.pdf"), outsideFile);
|
||||
ReflectionTestUtils.setField(importer, "importDir", importDirPath.toString());
|
||||
|
||||
org.assertj.core.api.Assertions.assertThatThrownBy(
|
||||
() -> ReflectionTestUtils.invokeMethod(importer, "findFileRecursive", "secret.pdf"))
|
||||
.isInstanceOf(org.raddatz.familienarchiv.exception.DomainException.class);
|
||||
}
|
||||
|
||||
// ─── path traversal in the file column cannot escape importDir ───────────────────
|
||||
|
||||
@Test
|
||||
void load_rejectsFileColumn_whenBasenameIsTraversalToken(@TempDir Path tempDir) throws Exception {
|
||||
// A file column whose basename is itself a traversal token must be rejected
|
||||
// outright, never used for disk I/O.
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "evil/..", "", "", "", "", "", "", "", ""));
|
||||
|
||||
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
|
||||
|
||||
assertThat(result.skippedFiles())
|
||||
.extracting(ImportStatus.SkippedFile::reason)
|
||||
.containsExactly(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
|
||||
verify(documentService, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_traversalFileColumn_cannotEscapeImportDir_yieldsPlaceholder(@TempDir Path tempDir) throws Exception {
|
||||
// ../../etc/cron.d/x reduces to basename "x"; the disk lookup is confined to
|
||||
// importDir, so no file is found, nothing is uploaded, and the row becomes a
|
||||
// metadata-only PLACEHOLDER — the file outside importDir is never read.
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "../../etc/cron.d/x", "", "", "", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.PLACEHOLDER));
|
||||
}
|
||||
|
||||
// ─── PDF magic-byte guard — ported — do not remove ──────────────────────────────
|
||||
|
||||
@Test
|
||||
void load_skipsFile_whenNotPdfMagicBytes(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Files.writeString(tempDir.resolve("W-0001.pdf"), "not a pdf");
|
||||
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "..\\__scan\\W-0001.pdf", "", "", "", "", "", "", "", ""));
|
||||
|
||||
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
|
||||
|
||||
assertThat(result.skippedFiles())
|
||||
.extracting(ImportStatus.SkippedFile::reason)
|
||||
.containsExactly(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
|
||||
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_skipsFile_whenMagicByteCheckThrowsIoException(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Files.writeString(tempDir.resolve("W-0001.pdf"), "content");
|
||||
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "..\\__scan\\W-0001.pdf", "", "", "", "", "", "", "", ""));
|
||||
|
||||
DocumentImporter spyImporter = org.mockito.Mockito.spy(importer);
|
||||
org.mockito.Mockito.doThrow(new java.io.IOException("read error"))
|
||||
.when(spyImporter).openFileStream(any(File.class));
|
||||
|
||||
DocumentImporter.LoadResult result = spyImporter.load(xlsx.toFile());
|
||||
|
||||
assertThat(result.skippedFiles())
|
||||
.extracting(ImportStatus.SkippedFile::reason)
|
||||
.containsExactly(ImportStatus.SkipReason.FILE_READ_ERROR);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_skipsAlreadyExists_whenDocumentUploadedNotPlaceholder(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Document existing = Document.builder().id(UUID.randomUUID())
|
||||
.originalFilename("W-0001").status(DocumentStatus.UPLOADED).build();
|
||||
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.of(existing));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", "", ""));
|
||||
|
||||
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
|
||||
|
||||
assertThat(result.skippedFiles())
|
||||
.extracting(ImportStatus.SkippedFile::reason)
|
||||
.containsExactly(ImportStatus.SkipReason.ALREADY_EXISTS);
|
||||
verify(documentService, never()).save(any());
|
||||
}
|
||||
|
||||
// ─── file column drives status: present → UPLOADED, empty → PLACEHOLDER ───────────
|
||||
|
||||
@Test
|
||||
void load_uploadsToS3_andSetsStatusUploaded_whenFilePresent(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
byte[] pdf = {0x25, 0x50, 0x44, 0x46, 0x2D};
|
||||
Files.write(tempDir.resolve("W-0001.pdf"), pdf);
|
||||
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "..\\__scan\\W-0001.pdf", "", "", "", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.UPLOADED));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_setsStatusPlaceholder_whenFileColumnEmpty(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0099")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0099", "", "", "", "", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.PLACEHOLDER));
|
||||
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
}
|
||||
|
||||
// ─── attribution routing — register-first + always retain raw ────────────────────
|
||||
|
||||
@Test
|
||||
void load_linksRegisterSender_andRetainsRawSenderText(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Person walter = Person.builder().id(UUID.randomUUID()).sourceRef("de-gruyter-walter")
|
||||
.firstName("Walter").lastName("de Gruyter").build();
|
||||
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(personService.findBySourceRef("de-gruyter-walter")).thenReturn(Optional.of(walter));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
|
||||
"", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getSender() == walter && "Walter de Gruyter".equals(d.getSenderText())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_createsProvisionalSender_whenSlugUnmatchedInRegister(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Person provisional = Person.builder().id(UUID.randomUUID()).sourceRef("schwester-hanni")
|
||||
.lastName("Schwester Hanni").provisional(true).build();
|
||||
when(documentService.findByOriginalFilename("W-0002")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(personService.findBySourceRef("schwester-hanni")).thenReturn(Optional.empty());
|
||||
when(personService.upsertBySourceRef(any())).thenReturn(provisional);
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0002", "", "schwester-hanni", "Schwester Hanni",
|
||||
"", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
org.mockito.ArgumentCaptor<PersonUpsertCommand> captor =
|
||||
org.mockito.ArgumentCaptor.forClass(PersonUpsertCommand.class);
|
||||
verify(personService).upsertBySourceRef(captor.capture());
|
||||
assertThat(captor.getValue().provisional()).isTrue();
|
||||
assertThat(captor.getValue().lastName()).isEqualTo("Schwester Hanni");
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_createsNoSenderPerson_whenSlugEmptyButRawPresent(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0003")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0003", "", "", "?",
|
||||
"", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(personService, never()).findBySourceRef(any());
|
||||
verify(personService, never()).upsertBySourceRef(any());
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getSender() == null && "?".equals(d.getSenderText())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_splitsMultipleReceivers_andRetainsRawReceiverText(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Person herbert = Person.builder().id(UUID.randomUUID()).sourceRef("cram-herbert").lastName("Cram").build();
|
||||
Person clara = Person.builder().id(UUID.randomUUID()).sourceRef("clara").lastName("Clara").build();
|
||||
when(documentService.findByOriginalFilename("W-0004")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(personService.findBySourceRef("cram-herbert")).thenReturn(Optional.of(herbert));
|
||||
when(personService.findBySourceRef("clara")).thenReturn(Optional.of(clara));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0004", "", "", "",
|
||||
"cram-herbert|clara", "Herbert Cram|Clara", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getReceivers().size() == 2
|
||||
&& d.getReceivers().contains(herbert)
|
||||
&& d.getReceivers().contains(clara)
|
||||
&& "Herbert Cram|Clara".equals(d.getReceiverText())));
|
||||
}
|
||||
|
||||
// ─── clean date values parse without semantic logic ──────────────────────────────
|
||||
|
||||
@Test
|
||||
void load_parsesCleanDateAndPrecision(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0005")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0005", "", "", "",
|
||||
"", "", "1916-06-01", "1.6.1916", "MONTH", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
LocalDate.of(1916, 6, 1).equals(d.getDocumentDate())
|
||||
&& d.getMetaDatePrecision() == org.raddatz.familienarchiv.document.DatePrecision.MONTH
|
||||
&& "1.6.1916".equals(d.getMetaDateRaw())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_attachesTagBySourceRef(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Tag tag = Tag.builder().id(UUID.randomUUID()).name("Brautbriefe").sourceRef("Themen/Brautbriefe").build();
|
||||
when(documentService.findByOriginalFilename("W-0006")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(tagService.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(tag));
|
||||
Path xlsx = writeDocs(tempDir, docRowWithTag("W-0006", "Themen/Brautbriefe"));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getTags().contains(tag)));
|
||||
}
|
||||
|
||||
// ─── idempotency — update existing document in place by index ─────────────────────
|
||||
|
||||
@Test
|
||||
void load_updatesExistingDocumentInPlace_whenIndexExists(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Document existing = Document.builder().id(UUID.randomUUID())
|
||||
.originalFilename("W-0007").status(DocumentStatus.PLACEHOLDER).build();
|
||||
when(documentService.findByOriginalFilename("W-0007")).thenReturn(Optional.of(existing));
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0007", "", "", "", "", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getId().equals(existing.getId())));
|
||||
}
|
||||
|
||||
// ─── canonical collections are authoritative — re-import prunes removed links ──────
|
||||
|
||||
@Test
|
||||
void load_prunesReceiversAndTags_whenCanonicalRowShrinks(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Person staleReceiver = Person.builder().id(UUID.randomUUID()).sourceRef("stale-receiver").lastName("Stale").build();
|
||||
Tag staleTag = Tag.builder().id(UUID.randomUUID()).name("Stale").sourceRef("Themen/Stale").build();
|
||||
Document existing = Document.builder().id(UUID.randomUUID())
|
||||
.originalFilename("W-0008").status(DocumentStatus.PLACEHOLDER).build();
|
||||
existing.getReceivers().add(staleReceiver);
|
||||
existing.getTags().add(staleTag);
|
||||
when(documentService.findByOriginalFilename("W-0008")).thenReturn(Optional.of(existing));
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
// The canonical row now carries no receiver and no tag: both stale links must go.
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0008", "", "", "", "", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getReceivers().isEmpty() && d.getTags().isEmpty()));
|
||||
}
|
||||
|
||||
// ─── title carries the honest date label — never a precision the data lacks ───────
|
||||
|
||||
@Test
|
||||
void load_buildsTitleWithMonthLabel_whenPrecisionIsMonth(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0100")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0100", "", "", "", "", "",
|
||||
"1916-06-01", "Juni 1916", "MONTH", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getTitle().contains("Juni 1916") && !d.getTitle().contains("1. Juni")));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_buildsTitleWithFullDate_whenPrecisionIsDay(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0101")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0101", "", "", "", "", "",
|
||||
"1943-12-24", "24.12.1943", "DAY", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getTitle().contains("24. Dezember 1943")));
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_buildsTitleFromIndexOnly_whenDateUnknown(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
when(documentService.findByOriginalFilename("W-0102")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0102", "", "", "", "", "",
|
||||
"", "?", "UNKNOWN", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getTitle().equals("W-0102")));
|
||||
}
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
private Map<String, String> docRow(String index, String file, String senderId, String senderName,
|
||||
String receiverIds, String receiverNames, String dateIso,
|
||||
String dateRaw, String datePrecision, String dateEnd) {
|
||||
Map<String, String> r = new LinkedHashMap<>();
|
||||
r.put("index", index);
|
||||
r.put("file", file);
|
||||
r.put("sender_person_id", senderId);
|
||||
r.put("sender_name", senderName);
|
||||
r.put("receiver_person_ids", receiverIds);
|
||||
r.put("receiver_names", receiverNames);
|
||||
r.put("date_iso", dateIso);
|
||||
r.put("date_raw", dateRaw);
|
||||
r.put("date_precision", datePrecision);
|
||||
r.put("date_end", dateEnd);
|
||||
r.put("location", "");
|
||||
r.put("tags", "");
|
||||
r.put("summary", "");
|
||||
return r;
|
||||
}
|
||||
|
||||
private Map<String, String> docRowWithTag(String index, String tagPath) {
|
||||
Map<String, String> r = docRow(index, "", "", "", "", "", "", "", "", "");
|
||||
r.put("tags", tagPath);
|
||||
return r;
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
private Path writeDocs(Path dir, Map<String, String>... rows) throws Exception {
|
||||
Path xlsx = dir.resolve("canonical-documents.xlsx");
|
||||
List<String> headers = List.of("index", "file", "sender_person_id", "sender_name",
|
||||
"receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision",
|
||||
"date_end", "location", "tags", "summary");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
header.createCell(i).setCellValue(headers.get(i));
|
||||
}
|
||||
for (int r = 0; r < rows.length; r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
for (int c = 0; c < headers.size(); c++) {
|
||||
row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.junit.jupiter.api.DynamicTest;
|
||||
import org.junit.jupiter.api.TestFactory;
|
||||
import org.raddatz.familienarchiv.document.DatePrecision;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Asserts the Java title label against the SAME shared fixture table the TS
|
||||
* formatter spec uses ({@code docs/date-label-fixtures.json}). This is the
|
||||
* drift guard requested in #666 review: the two label implementations cannot
|
||||
* silently diverge (en-dash vs hyphen, "ca." vs "circa", season words, range
|
||||
* collapse) because both are pinned to one committed rule set.
|
||||
*/
|
||||
class DocumentTitleFormatterTest {
|
||||
|
||||
@TestFactory
|
||||
List<DynamicTest> matchesSharedFixtureTable() throws Exception {
|
||||
// Maven runs tests from the backend/ module dir; the fixture lives at repo-root docs/.
|
||||
Path fixture = Path.of("..", "docs", "date-label-fixtures.json");
|
||||
JsonNode root = new ObjectMapper().readTree(Files.readString(fixture));
|
||||
List<DynamicTest> tests = new ArrayList<>();
|
||||
for (JsonNode c : root.get("cases")) {
|
||||
String name = c.get("name").asText();
|
||||
LocalDate anchor = parseDate(c.get("anchor"));
|
||||
DatePrecision precision = DatePrecision.valueOf(c.get("precision").asText());
|
||||
LocalDate end = parseDate(c.get("end"));
|
||||
String raw = c.get("raw").isNull() ? null : c.get("raw").asText();
|
||||
String expected = c.get("expected").asText();
|
||||
tests.add(DynamicTest.dynamicTest(name, () ->
|
||||
assertThat(DocumentTitleFormatter.formatTitleDate(anchor, precision, end, raw))
|
||||
.isEqualTo(expected)));
|
||||
}
|
||||
return tests;
|
||||
}
|
||||
|
||||
private static LocalDate parseDate(JsonNode node) {
|
||||
return node == null || node.isNull() ? null : LocalDate.parse(node.asText());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,624 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.springframework.test.util.ReflectionTestUtils;
|
||||
import software.amazon.awssdk.core.sync.RequestBody;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
||||
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class MassImportServiceTest {
|
||||
|
||||
@Mock DocumentService documentService;
|
||||
@Mock PersonService personService;
|
||||
@Mock TagService tagService;
|
||||
@Mock S3Client s3Client;
|
||||
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
|
||||
|
||||
MassImportService service;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
|
||||
ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
|
||||
ReflectionTestUtils.setField(service, "importDir", "/import");
|
||||
ReflectionTestUtils.setField(service, "colIndex", 0);
|
||||
ReflectionTestUtils.setField(service, "colBox", 1);
|
||||
ReflectionTestUtils.setField(service, "colFolder", 2);
|
||||
ReflectionTestUtils.setField(service, "colSender", 3);
|
||||
ReflectionTestUtils.setField(service, "colReceivers", 5);
|
||||
ReflectionTestUtils.setField(service, "colDate", 7);
|
||||
ReflectionTestUtils.setField(service, "colLocation", 9);
|
||||
ReflectionTestUtils.setField(service, "colTags", 10);
|
||||
ReflectionTestUtils.setField(service, "colSummary", 11);
|
||||
ReflectionTestUtils.setField(service, "colTranscription", 13);
|
||||
}
|
||||
|
||||
// ─── getStatus ────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getStatus_returnsIdleByDefault() {
|
||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.IDLE);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getStatus_hasStatusCode_IMPORT_IDLE_byDefault() {
|
||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_IDLE");
|
||||
}
|
||||
|
||||
// ─── runImportAsync ───────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void runImportAsync_setsFailedStatus_whenImportDirectoryDoesNotExist() {
|
||||
// /import directory doesn't exist in test environment → IOException → IMPORT_FAILED_INTERNAL
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_INTERNAL");
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) {
|
||||
// Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the
|
||||
// configured path in the message. Proves the field, not a constant,
|
||||
// drives the lookup.
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
||||
assertThat(service.getStatus().message()).contains(tempDir.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_setsStatusCode_IMPORT_FAILED_NO_SPREADSHEET_whenDirIsEmpty(@TempDir Path tempDir) {
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_NO_SPREADSHEET");
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_setsStatusCode_IMPORT_DONE_whenSpreadsheetHasNoDataRows(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = tempDir.resolve("import.xlsx");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
wb.createSheet("Sheet1");
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_DONE");
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_throwsConflict_whenAlreadyRunning() {
|
||||
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
|
||||
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, LocalDateTime.now());
|
||||
ReflectionTestUtils.setField(service, "currentStatus", running);
|
||||
|
||||
assertThatThrownBy(() -> service.runImportAsync())
|
||||
.isInstanceOf(DomainException.class)
|
||||
.hasMessageContaining("already in progress");
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — skip already uploaded ─────────────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_skips_whenDocumentAlreadyUploadedNotPlaceholder() {
|
||||
Document existing = Document.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.originalFilename("doc001.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.build();
|
||||
when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.of(existing));
|
||||
|
||||
service.importSingleDocument(minimalCells("doc001.pdf"), Optional.empty(), "doc001.pdf", "doc001");
|
||||
|
||||
verify(documentService, never()).save(any());
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — create new document (metadata only) ───────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_createsNewDocument_whenNotExists() {
|
||||
when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
service.importSingleDocument(minimalCells("doc002.pdf"), Optional.empty(), "doc002.pdf", "doc002");
|
||||
|
||||
verify(documentService).save(argThat(d ->
|
||||
d.getOriginalFilename().equals("doc002.pdf")
|
||||
&& d.getStatus() == DocumentStatus.PLACEHOLDER));
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — update existing placeholder ──────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_updatesExistingPlaceholder() {
|
||||
Document placeholder = Document.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.originalFilename("existing.pdf")
|
||||
.status(DocumentStatus.PLACEHOLDER)
|
||||
.build();
|
||||
when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(placeholder));
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
service.importSingleDocument(minimalCells("existing.pdf"), Optional.empty(), "existing.pdf", "existing");
|
||||
|
||||
verify(documentService).save(same(placeholder));
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — with file (S3 upload) ─────────────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_uploadsFileToS3_andSetsStatusUploaded(@TempDir Path tempDir) throws Exception {
|
||||
Path tempFile = tempDir.resolve("doc003.pdf");
|
||||
Files.write(tempFile, "PDF content".getBytes());
|
||||
|
||||
when(documentService.findByOriginalFilename("doc003.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
service.importSingleDocument(
|
||||
minimalCells("doc003.pdf"), Optional.of(tempFile.toFile()), "doc003.pdf", "doc003");
|
||||
|
||||
verify(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
verify(documentService).save(argThat(d -> d.getStatus() == DocumentStatus.UPLOADED));
|
||||
}
|
||||
|
||||
@Test
|
||||
void importSingleDocument_returnsEarly_whenS3UploadFails(@TempDir Path tempDir) throws Exception {
|
||||
Path tempFile = tempDir.resolve("fail.pdf");
|
||||
Files.write(tempFile, "data".getBytes());
|
||||
|
||||
when(documentService.findByOriginalFilename("fail.pdf")).thenReturn(Optional.empty());
|
||||
doThrow(new RuntimeException("S3 error"))
|
||||
.when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
|
||||
service.importSingleDocument(
|
||||
minimalCells("fail.pdf"), Optional.of(tempFile.toFile()), "fail.pdf", "fail");
|
||||
|
||||
verify(documentService, never()).save(any());
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — sender handling ───────────────────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_setsNullSender_whenSenderCellIsBlank() {
|
||||
when(documentService.findByOriginalFilename("nosender.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<String> cells = buildCells("nosender.pdf", "", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "nosender.pdf", "nosender");
|
||||
|
||||
verify(documentService).save(argThat(d -> d.getSender() == null));
|
||||
verify(personService, never()).findOrCreateByAlias(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void importSingleDocument_createsSender_whenSenderCellIsNonBlank() {
|
||||
Person sender = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
|
||||
when(documentService.findByOriginalFilename("withsender.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(sender);
|
||||
|
||||
List<String> cells = buildCells("withsender.pdf", "Walter Müller", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "withsender.pdf", "withsender");
|
||||
|
||||
verify(personService).findOrCreateByAlias("Walter Müller");
|
||||
verify(documentService).save(argThat(d -> d.getSender() == sender));
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — tag handling ─────────────────────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_createsTag_whenTagCellIsNonBlank() {
|
||||
Tag tag = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
|
||||
when(documentService.findByOriginalFilename("tagged.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(tagService.findOrCreate("Familie")).thenReturn(tag);
|
||||
|
||||
List<String> cells = buildCells("tagged.pdf", "", "", "Familie");
|
||||
service.importSingleDocument(cells, Optional.empty(), "tagged.pdf", "tagged");
|
||||
|
||||
verify(tagService).findOrCreate("Familie");
|
||||
}
|
||||
|
||||
@Test
|
||||
void importSingleDocument_doesNotCreateTag_whenTagCellIsBlank() {
|
||||
when(documentService.findByOriginalFilename("notag.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<String> cells = buildCells("notag.pdf", "", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "notag.pdf", "notag");
|
||||
|
||||
verify(tagService, never()).findOrCreate(any());
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — metadataComplete heuristic ───────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_metadataComplete_whenSenderPresent() {
|
||||
Person sender = Person.builder().id(UUID.randomUUID()).firstName("A").lastName("B").build();
|
||||
when(documentService.findByOriginalFilename("meta.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(personService.findOrCreateByAlias("A B")).thenReturn(sender);
|
||||
|
||||
List<String> cells = buildCells("meta.pdf", "A B", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "meta.pdf", "meta");
|
||||
|
||||
verify(documentService).save(argThat(Document::isMetadataComplete));
|
||||
}
|
||||
|
||||
@Test
|
||||
void importSingleDocument_metadataIncomplete_whenNoKeyFieldsPresent() {
|
||||
when(documentService.findByOriginalFilename("nometa.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<String> cells = buildCells("nometa.pdf", "", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "nometa.pdf", "nometa");
|
||||
|
||||
verify(documentService).save(argThat(d -> !d.isMetadataComplete()));
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — blank fields set to null ─────────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_setsBlankFieldsToNull() {
|
||||
when(documentService.findByOriginalFilename("blank.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<String> cells = buildCells("blank.pdf", "", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "blank.pdf", "blank");
|
||||
|
||||
verify(documentService).save(argThat(d ->
|
||||
d.getLocation() == null &&
|
||||
d.getSummary() == null &&
|
||||
d.getTranscription() == null &&
|
||||
d.getArchiveBox() == null &&
|
||||
d.getArchiveFolder() == null));
|
||||
}
|
||||
|
||||
// ─── processRows — via ReflectionTestUtils ────────────────────────────────
|
||||
|
||||
@Test
|
||||
void processRows_returnsZero_whenOnlyHeaderRow() {
|
||||
List<List<String>> rows = List.of(List.of("header", "col1"));
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
assertThat(result).isEqualTo(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void processRows_skipsRowWithBlankIndex() {
|
||||
List<List<String>> rows = List.of(
|
||||
List.of("header"),
|
||||
minimalCells("") // blank index
|
||||
);
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
assertThat(result).isEqualTo(0);
|
||||
verify(documentService, never()).findByOriginalFilename(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void processRows_addsExtension_whenIndexHasNoDot() {
|
||||
when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<List<String>> rows = List.of(
|
||||
List.of("header"),
|
||||
minimalCells("doc001") // no dot → appends ".pdf"
|
||||
);
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
|
||||
assertThat(result).isEqualTo(1);
|
||||
verify(documentService).findByOriginalFilename("doc001.pdf");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processRows_usesFilenameAsIs_whenIndexHasDot() {
|
||||
when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<List<String>> rows = List.of(
|
||||
List.of("header"),
|
||||
minimalCells("doc002.pdf") // has dot → used as-is
|
||||
);
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
|
||||
assertThat(result).isEqualTo(1);
|
||||
verify(documentService).findByOriginalFilename("doc002.pdf");
|
||||
}
|
||||
|
||||
// ─── importSingleDocument — non-blank optional fields ────────────────────
|
||||
|
||||
@Test
|
||||
void importSingleDocument_setsNonNullOptionalFields_whenPresent() {
|
||||
when(documentService.findByOriginalFilename("rich.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
// box=1, folder=2, location=9, summary=11, transcription=13
|
||||
List<String> cells = List.of(
|
||||
"rich.pdf", // 0: index
|
||||
"Box A", // 1: box
|
||||
"Folder B", // 2: folder
|
||||
"", // 3: sender
|
||||
"", // 4: unused
|
||||
"", // 5: receivers
|
||||
"", // 6: unused
|
||||
"", // 7: date
|
||||
"", // 8: unused
|
||||
"Hamburg", // 9: location
|
||||
"", // 10: tags
|
||||
"A summary", // 11: summary
|
||||
"", // 12: unused
|
||||
"A transcript" // 13: transcription
|
||||
);
|
||||
|
||||
service.importSingleDocument(cells, Optional.empty(), "rich.pdf", "rich");
|
||||
|
||||
verify(documentService).save(argThat(d ->
|
||||
"Box A".equals(d.getArchiveBox()) &&
|
||||
"Folder B".equals(d.getArchiveFolder()) &&
|
||||
"Hamburg".equals(d.getLocation()) &&
|
||||
"A summary".equals(d.getSummary()) &&
|
||||
"A transcript".equals(d.getTranscription())));
|
||||
}
|
||||
|
||||
@Test
|
||||
void importSingleDocument_setsMetadataComplete_whenReceiversArePresent() {
|
||||
Person receiver = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
|
||||
when(documentService.findByOriginalFilename("rcv.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(receiver);
|
||||
|
||||
List<String> cells = List.of(
|
||||
"rcv.pdf", "", "", "", "", "Walter Müller", "", "", "", "", "", "", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "rcv.pdf", "rcv");
|
||||
|
||||
verify(documentService).save(argThat(Document::isMetadataComplete));
|
||||
}
|
||||
|
||||
@Test
|
||||
void importSingleDocument_setsMetadataComplete_whenDateIsPresent() {
|
||||
when(documentService.findByOriginalFilename("dated.pdf")).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
List<String> cells = List.of(
|
||||
"dated.pdf", "", "", "", "", "", "", "2024-03-15", "", "", "", "", "", "");
|
||||
service.importSingleDocument(cells, Optional.empty(), "dated.pdf", "dated");
|
||||
|
||||
verify(documentService).save(argThat(Document::isMetadataComplete));
|
||||
}
|
||||
|
||||
// ─── buildTitle — null location ───────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void buildTitle_withNullLocation_skipsLocationPart() {
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
||||
"doc005", LocalDate.of(1940, 5, 1), (String) null);
|
||||
assertThat(result).contains("doc005").contains("1940");
|
||||
assertThat(result).doesNotContain("Berlin");
|
||||
}
|
||||
|
||||
// ─── parseDate — via ReflectionTestUtils ─────────────────────────────────
|
||||
|
||||
@Test
|
||||
void parseDate_returnsNull_whenValueIsNull() {
|
||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", (String) null);
|
||||
assertThat(result).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseDate_returnsNull_whenValueIsBlank() {
|
||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", " ");
|
||||
assertThat(result).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseDate_returnsDate_whenValidIsoFormat() {
|
||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "2024-03-15");
|
||||
assertThat(result).isEqualTo(LocalDate.of(2024, 3, 15));
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseDate_returnsNull_whenInvalidDateString() {
|
||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "15.03.2024");
|
||||
assertThat(result).isNull();
|
||||
}
|
||||
|
||||
// ─── buildTitle — via ReflectionTestUtils ────────────────────────────────
|
||||
|
||||
@Test
|
||||
void buildTitle_withDateAndLocation() {
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
||||
"doc001", LocalDate.of(1940, 5, 1), "Berlin");
|
||||
assertThat(result).contains("doc001").contains("Berlin").contains("1940");
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTitle_withDateOnly() {
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
||||
"doc002", LocalDate.of(1960, 8, 15), "");
|
||||
assertThat(result).contains("doc002").contains("1960");
|
||||
assertThat(result).doesNotContain("Berlin");
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTitle_withIndexOnly_whenDateAndLocationAreNull() {
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
||||
"doc003", null, "");
|
||||
assertThat(result).isEqualTo("doc003");
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTitle_withLocationOnly_whenDateIsNull() {
|
||||
// date=null, location present → date part skipped, location appended
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
||||
"doc004", null, "Berlin");
|
||||
assertThat(result).contains("doc004").contains("Berlin");
|
||||
assertThat(result).doesNotContain("("); // no date part
|
||||
}
|
||||
|
||||
// ─── getCell — via ReflectionTestUtils ───────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getCell_returnsEmptyString_whenColBeyondListSize() {
|
||||
List<String> cells = List.of("a", "b");
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 5);
|
||||
assertThat(result).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getCell_returnsEmptyString_whenValueIsNull() {
|
||||
List<String> cells = new ArrayList<>();
|
||||
cells.add(null);
|
||||
cells.add("b");
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
|
||||
assertThat(result).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getCell_returnsTrimmedValue() {
|
||||
List<String> cells = List.of(" hello ", "world");
|
||||
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
|
||||
assertThat(result).isEqualTo("hello");
|
||||
}
|
||||
|
||||
// ─── readOds — XXE security regression ───────────────────────────────────
|
||||
|
||||
// Security regression — do not remove.
|
||||
@Test
|
||||
void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception {
|
||||
File malicious = buildXxeOds(tempDir, "file:///etc/hostname");
|
||||
assertThatThrownBy(() -> service.readOds(malicious))
|
||||
.isInstanceOf(SAXParseException.class)
|
||||
.hasMessageContaining("DOCTYPE is disallowed");
|
||||
}
|
||||
|
||||
@Test
|
||||
void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception {
|
||||
File valid = buildValidOds(tempDir, "Mustermann");
|
||||
List<List<String>> rows = service.readOds(valid);
|
||||
assertThat(rows).isNotEmpty();
|
||||
assertThat(rows.get(0)).contains("Mustermann");
|
||||
}
|
||||
|
||||
// ─── helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Builds a minimal 14-element cell row with the given filename at index 0
|
||||
* and blanks for all optional fields.
|
||||
*/
|
||||
private List<String> minimalCells(String filename) {
|
||||
return buildCells(filename, "", "", "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a cell row with sender, receiver, and tag controls.
|
||||
* Layout matches the default column indices set in setUp().
|
||||
*/
|
||||
private List<String> buildCells(String filename, String sender, String receivers, String tag) {
|
||||
// 14 elements: index=0,box=1,folder=2,sender=3,[4],receivers=5,[6],date=7,[8],location=9,tag=10,summary=11,[12],transcription=13
|
||||
return List.of(
|
||||
filename, // 0: index
|
||||
"", // 1: box
|
||||
"", // 2: folder
|
||||
sender, // 3: sender
|
||||
"", // 4: (unused)
|
||||
receivers, // 5: receivers
|
||||
"", // 6: (unused)
|
||||
"", // 7: date
|
||||
"", // 8: (unused)
|
||||
"", // 9: location
|
||||
tag, // 10: tags
|
||||
"", // 11: summary
|
||||
"", // 12: (unused)
|
||||
"" // 13: transcription
|
||||
);
|
||||
}
|
||||
|
||||
/** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */
|
||||
private File buildXxeOds(Path dir, String entityTarget) throws Exception {
|
||||
String xml = "<?xml version=\"1.0\"?>"
|
||||
+ "<!DOCTYPE foo [<!ENTITY xxe SYSTEM \"" + entityTarget + "\">]>"
|
||||
+ "<office:document-content"
|
||||
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
|
||||
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
|
||||
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
|
||||
+ "<office:body><office:spreadsheet>"
|
||||
+ "<table:table><table:table-row><table:table-cell>"
|
||||
+ "<text:p>&xxe;</text:p>"
|
||||
+ "</table:table-cell></table:table-row></table:table>"
|
||||
+ "</office:spreadsheet></office:body>"
|
||||
+ "</office:document-content>";
|
||||
return writeOdsZip(dir.resolve("malicious.ods"), xml);
|
||||
}
|
||||
|
||||
/** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value.
|
||||
* cellValue must not contain XML metacharacters ({@code < > &}). */
|
||||
private File buildValidOds(Path dir, String cellValue) throws Exception {
|
||||
String xml = "<?xml version=\"1.0\"?>"
|
||||
+ "<office:document-content"
|
||||
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
|
||||
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
|
||||
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
|
||||
+ "<office:body><office:spreadsheet>"
|
||||
+ "<table:table><table:table-row><table:table-cell>"
|
||||
+ "<text:p>" + cellValue + "</text:p>"
|
||||
+ "</table:table-cell></table:table-row></table:table>"
|
||||
+ "</office:spreadsheet></office:body>"
|
||||
+ "</office:document-content>";
|
||||
return writeOdsZip(dir.resolve("valid.ods"), xml);
|
||||
}
|
||||
|
||||
private File writeOdsZip(Path destination, String contentXml) throws Exception {
|
||||
try (OutputStream fos = Files.newOutputStream(destination);
|
||||
ZipOutputStream zip = new ZipOutputStream(fos)) {
|
||||
zip.putNextEntry(new ZipEntry("content.xml"));
|
||||
zip.write(contentXml.getBytes(StandardCharsets.UTF_8));
|
||||
zip.closeEntry();
|
||||
}
|
||||
return destination.toFile();
|
||||
}
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class PersonRegisterImporterTest {
|
||||
|
||||
@Test
|
||||
void load_upsertsPersonBySourceRef_withProvisionalFalse(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path xlsx = writePersons(tempDir, row(
|
||||
"allemeyer-elsgard", "Allemeyer", "Elsgard", "Wöhler", "Nichte von Herbert", "False"));
|
||||
|
||||
new PersonRegisterImporter(personService).load(xlsx.toFile());
|
||||
|
||||
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
|
||||
verify(personService).upsertBySourceRef(captor.capture());
|
||||
PersonUpsertCommand cmd = captor.getValue();
|
||||
assertThat(cmd.sourceRef()).isEqualTo("allemeyer-elsgard");
|
||||
assertThat(cmd.lastName()).isEqualTo("Allemeyer");
|
||||
assertThat(cmd.firstName()).isEqualTo("Elsgard");
|
||||
assertThat(cmd.maidenName()).isEqualTo("Wöhler");
|
||||
assertThat(cmd.notes()).isEqualTo("Nichte von Herbert");
|
||||
assertThat(cmd.provisional()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_parsesCapitalisedPythonBool_True(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path xlsx = writePersons(tempDir, row(
|
||||
"noise-geschirr", "Geschirr", "", "", "", "True"));
|
||||
|
||||
new PersonRegisterImporter(personService).load(xlsx.toFile());
|
||||
|
||||
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
|
||||
verify(personService).upsertBySourceRef(captor.capture());
|
||||
assertThat(captor.getValue().provisional()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_skipsRowWithBlankPersonId(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
Path xlsx = writePersons(tempDir, row("", "NoId", "", "", "", "False"));
|
||||
|
||||
new PersonRegisterImporter(personService).load(xlsx.toFile());
|
||||
|
||||
verify(personService, times(0)).upsertBySourceRef(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_returnsCountOfProcessedRows(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path xlsx = writePersons(tempDir,
|
||||
row("a-one", "One", "A", "", "", "False"),
|
||||
row("a-two", "Two", "B", "", "", "False"));
|
||||
|
||||
int processed = new PersonRegisterImporter(personService).load(xlsx.toFile());
|
||||
|
||||
assertThat(processed).isEqualTo(2);
|
||||
}
|
||||
|
||||
private static Person personOf(PersonUpsertCommand cmd) {
|
||||
return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef())
|
||||
.firstName(cmd.firstName()).lastName(cmd.lastName())
|
||||
.provisional(cmd.provisional()).build();
|
||||
}
|
||||
|
||||
private Map<String, String> row(String personId, String lastName, String firstName,
|
||||
String maidenName, String notes, String provisional) {
|
||||
Map<String, String> r = new LinkedHashMap<>();
|
||||
r.put("person_id", personId);
|
||||
r.put("last_name", lastName);
|
||||
r.put("first_name", firstName);
|
||||
r.put("maiden_name", maidenName);
|
||||
r.put("notes", notes);
|
||||
r.put("provisional", provisional);
|
||||
return r;
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
private Path writePersons(Path dir, Map<String, String>... rows) throws Exception {
|
||||
Path xlsx = dir.resolve("canonical-persons.xlsx");
|
||||
List<String> headers = List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "provisional");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
header.createCell(i).setCellValue(headers.get(i));
|
||||
}
|
||||
for (int r = 0; r < rows.length; r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
for (int c = 0; c < headers.size(); c++) {
|
||||
row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
}
|
||||
@@ -1,163 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonService;
|
||||
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationType;
|
||||
import org.raddatz.familienarchiv.person.relationship.RelationshipService;
|
||||
import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.doThrow;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class PersonTreeImporterTest {
|
||||
|
||||
@Test
|
||||
void load_upsertsTreePersonBySourceRef_withFamilyMemberFlag(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
RelationshipService relationshipService = mock(RelationshipService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path json = write(tempDir, """
|
||||
{"persons":[
|
||||
{"rowId":"row_002","firstName":"Elsgard","lastName":"Allemeyer","maidenName":"Wöhler",
|
||||
"notes":"Nichte","birthYear":1920,"deathYear":1999,"familyMember":true,"personId":"allemeyer-elsgard"}
|
||||
],"relationships":[]}
|
||||
""");
|
||||
|
||||
new PersonTreeImporter(personService, relationshipService)
|
||||
.load(json.toFile());
|
||||
|
||||
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
|
||||
verify(personService).upsertBySourceRef(captor.capture());
|
||||
PersonUpsertCommand cmd = captor.getValue();
|
||||
assertThat(cmd.sourceRef()).isEqualTo("allemeyer-elsgard");
|
||||
assertThat(cmd.familyMember()).isTrue();
|
||||
assertThat(cmd.provisional()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_createsRelationship_resolvingRowIdsToUpsertedPersons(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
RelationshipService relationshipService = mock(RelationshipService.class);
|
||||
UUID idA = UUID.randomUUID();
|
||||
UUID idB = UUID.randomUUID();
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> {
|
||||
PersonUpsertCommand c = inv.getArgument(0);
|
||||
return Person.builder().id(c.sourceRef().equals("a") ? idA : idB)
|
||||
.sourceRef(c.sourceRef()).lastName(c.lastName()).build();
|
||||
});
|
||||
Path json = write(tempDir, """
|
||||
{"persons":[
|
||||
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
|
||||
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
|
||||
],"relationships":[
|
||||
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
|
||||
]}
|
||||
""");
|
||||
|
||||
new PersonTreeImporter(personService, relationshipService)
|
||||
.load(json.toFile());
|
||||
|
||||
ArgumentCaptor<CreateRelationshipRequest> captor = ArgumentCaptor.forClass(CreateRelationshipRequest.class);
|
||||
verify(relationshipService).addRelationship(eq(idA), captor.capture());
|
||||
assertThat(captor.getValue().relatedPersonId()).isEqualTo(idB);
|
||||
assertThat(captor.getValue().relationType()).isEqualTo(RelationType.SPOUSE_OF);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_swallowsDuplicateRelationship_forIdempotentReimport(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
RelationshipService relationshipService = mock(RelationshipService.class);
|
||||
when(personService.upsertBySourceRef(any()))
|
||||
.thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
doThrow(DomainException.conflict(ErrorCode.DUPLICATE_RELATIONSHIP, "exists"))
|
||||
.when(relationshipService).addRelationship(any(), any());
|
||||
Path json = write(tempDir, """
|
||||
{"persons":[
|
||||
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
|
||||
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
|
||||
],"relationships":[
|
||||
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
|
||||
]}
|
||||
""");
|
||||
|
||||
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
|
||||
|
||||
// Must not propagate the conflict — re-import is idempotent.
|
||||
importer.load(json.toFile());
|
||||
|
||||
verify(relationshipService).addRelationship(any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_propagatesUnexpectedDomainException_fromAddRelationship(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
RelationshipService relationshipService = mock(RelationshipService.class);
|
||||
when(personService.upsertBySourceRef(any()))
|
||||
.thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
// An unexpected ErrorCode (not DUPLICATE/CIRCULAR) must NOT be swallowed.
|
||||
doThrow(DomainException.internal(ErrorCode.INTERNAL_ERROR, "boom"))
|
||||
.when(relationshipService).addRelationship(any(), any());
|
||||
Path json = write(tempDir, """
|
||||
{"persons":[
|
||||
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
|
||||
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
|
||||
],"relationships":[
|
||||
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
|
||||
]}
|
||||
""");
|
||||
|
||||
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
|
||||
|
||||
assertThatThrownBy(() -> importer.load(json.toFile()))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting("code").isEqualTo(ErrorCode.INTERNAL_ERROR);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_skipsRelationship_whenRowIdUnresolved(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
RelationshipService relationshipService = mock(RelationshipService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path json = write(tempDir, """
|
||||
{"persons":[
|
||||
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"}
|
||||
],"relationships":[
|
||||
{"personId":"row_a","relatedPersonId":"row_ghost","type":"SPOUSE_OF","source":"x"}
|
||||
]}
|
||||
""");
|
||||
|
||||
new PersonTreeImporter(personService, relationshipService)
|
||||
.load(json.toFile());
|
||||
|
||||
verify(relationshipService, org.mockito.Mockito.never()).addRelationship(any(), any());
|
||||
}
|
||||
|
||||
private static Person personOf(PersonUpsertCommand cmd) {
|
||||
return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef()).lastName(cmd.lastName()).build();
|
||||
}
|
||||
|
||||
private Path write(Path dir, String json) throws Exception {
|
||||
Path file = dir.resolve("canonical-persons-tree.json");
|
||||
Files.writeString(file, json);
|
||||
return file;
|
||||
}
|
||||
}
|
||||
@@ -1,103 +0,0 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.ArgumentMatchers.isNull;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class TagTreeImporterTest {
|
||||
|
||||
@Test
|
||||
void load_upsertsRootTagWithNullParent(@TempDir Path tempDir) throws Exception {
|
||||
TagService tagService = mock(TagService.class);
|
||||
when(tagService.upsertBySourceRef(any(), any(), any()))
|
||||
.thenAnswer(inv -> tagOf(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2)));
|
||||
Path xlsx = writeTagTree(tempDir, List.<String[]>of(
|
||||
new String[]{"Themen", "", "Themen"}));
|
||||
|
||||
new TagTreeImporter(tagService).load(xlsx.toFile());
|
||||
|
||||
verify(tagService).upsertBySourceRef("Themen", "Themen", null);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_resolvesParentByPath_forChildTag(@TempDir Path tempDir) throws Exception {
|
||||
TagService tagService = mock(TagService.class);
|
||||
UUID rootId = UUID.randomUUID();
|
||||
when(tagService.upsertBySourceRef(eq("Themen"), eq("Themen"), isNull()))
|
||||
.thenReturn(tagOf("Themen", "Themen", null, rootId));
|
||||
when(tagService.upsertBySourceRef(eq("Themen/Brautbriefe"), eq("Brautbriefe"), eq(rootId)))
|
||||
.thenReturn(tagOf("Themen/Brautbriefe", "Brautbriefe", rootId));
|
||||
Path xlsx = writeTagTree(tempDir, List.<String[]>of(
|
||||
new String[]{"Themen", "", "Themen"},
|
||||
new String[]{"Themen/Brautbriefe", "Themen", "Brautbriefe"}));
|
||||
|
||||
new TagTreeImporter(tagService).load(xlsx.toFile());
|
||||
|
||||
verify(tagService).upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", rootId);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_returnsCountOfProcessedRows(@TempDir Path tempDir) throws Exception {
|
||||
TagService tagService = mock(TagService.class);
|
||||
when(tagService.upsertBySourceRef(any(), any(), any()))
|
||||
.thenAnswer(inv -> tagOf(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2)));
|
||||
Path xlsx = writeTagTree(tempDir, List.<String[]>of(
|
||||
new String[]{"Themen", "", "Themen"},
|
||||
new String[]{"Themen/Brautbriefe", "Themen", "Brautbriefe"}));
|
||||
|
||||
int processed = new TagTreeImporter(tagService).load(xlsx.toFile());
|
||||
|
||||
assertThat(processed).isEqualTo(2);
|
||||
}
|
||||
|
||||
private static Tag tagOf(String sourceRef, String name, UUID parentId) {
|
||||
return tagOf(sourceRef, name, parentId, UUID.randomUUID());
|
||||
}
|
||||
|
||||
private static Tag tagOf(String sourceRef, String name, UUID parentId, UUID id) {
|
||||
return Tag.builder().id(id).sourceRef(sourceRef).name(name).parentId(parentId).build();
|
||||
}
|
||||
|
||||
private Path writeTagTree(Path dir, List<String[]> rows) throws Exception {
|
||||
Path xlsx = dir.resolve("canonical-tag-tree.xlsx");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
header.createCell(0).setCellValue("tag_path");
|
||||
header.createCell(1).setCellValue("parent_name");
|
||||
header.createCell(2).setCellValue("tag_name");
|
||||
for (int r = 0; r < rows.size(); r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
String[] values = rows.get(r);
|
||||
for (int c = 0; c < values.length; c++) {
|
||||
row.createCell(c).setCellValue(values[c]);
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
}
|
||||
@@ -65,144 +65,44 @@ class PersonControllerTest {
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_returns200_withEmptyPagedResult() throws Exception {
|
||||
when(personService.search(any(), eq(0), eq(50), eq(null)))
|
||||
.thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
|
||||
void getPersons_returns200_withEmptyList() throws Exception {
|
||||
when(personService.findAll(null)).thenReturn(Collections.emptyList());
|
||||
mockMvc.perform(get("/api/persons"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.items").isArray())
|
||||
.andExpect(jsonPath("$.totalElements").value(0));
|
||||
.andExpect(status().isOk());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_delegatesQueryParam_toService() throws Exception {
|
||||
PersonSummaryDTO dto = mockPersonSummary("Hans", "Müller");
|
||||
when(personService.search(any(), eq(0), eq(50), eq("Hans")))
|
||||
.thenReturn(PersonSearchResult.paged(List.of(dto), 0, 50, 1));
|
||||
when(personService.findAll("Hans")).thenReturn(List.of(dto));
|
||||
|
||||
mockMvc.perform(get("/api/persons").param("q", "Hans"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.items[0].firstName").value("Hans"));
|
||||
.andExpect(jsonPath("$[0].firstName").value("Hans"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_passesFilterParams_toService() throws Exception {
|
||||
ArgumentCaptor<PersonFilter> filterCaptor = ArgumentCaptor.forClass(PersonFilter.class);
|
||||
when(personService.search(filterCaptor.capture(), eq(0), eq(50), eq(null)))
|
||||
.thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
|
||||
|
||||
mockMvc.perform(get("/api/persons")
|
||||
.param("type", "INSTITUTION")
|
||||
.param("familyOnly", "true")
|
||||
.param("hasDocuments", "true")
|
||||
.param("provisional", "false"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
PersonFilter captured = filterCaptor.getValue();
|
||||
assertThat(captured.type()).isEqualTo(PersonType.INSTITUTION);
|
||||
assertThat(captured.familyOnly()).isTrue();
|
||||
assertThat(captured.hasDocuments()).isTrue();
|
||||
assertThat(captured.provisional()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_defaultsToReaderDefault_whenNoReviewFlag() throws Exception {
|
||||
ArgumentCaptor<PersonFilter> filterCaptor = ArgumentCaptor.forClass(PersonFilter.class);
|
||||
when(personService.search(filterCaptor.capture(), eq(0), eq(50), eq(null)))
|
||||
.thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
|
||||
|
||||
mockMvc.perform(get("/api/persons")).andExpect(status().isOk());
|
||||
|
||||
assertThat(filterCaptor.getValue().readerDefault()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_dropsReaderDefault_whenReviewFlagSet() throws Exception {
|
||||
ArgumentCaptor<PersonFilter> filterCaptor = ArgumentCaptor.forClass(PersonFilter.class);
|
||||
when(personService.search(filterCaptor.capture(), eq(0), eq(50), eq(null)))
|
||||
.thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
|
||||
|
||||
mockMvc.perform(get("/api/persons").param("review", "true")).andExpect(status().isOk());
|
||||
|
||||
assertThat(filterCaptor.getValue().readerDefault()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_passesPageAndSize_toService() throws Exception {
|
||||
when(personService.search(any(), eq(2), eq(25), eq(null)))
|
||||
.thenReturn(PersonSearchResult.paged(Collections.emptyList(), 2, 25, 0));
|
||||
|
||||
mockMvc.perform(get("/api/persons").param("page", "2").param("size", "25"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
verify(personService).search(any(), eq(2), eq(25), eq(null));
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_returns400_whenSizeIsZero() throws Exception {
|
||||
mockMvc.perform(get("/api/persons").param("size", "0"))
|
||||
.andExpect(status().isBadRequest());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_returns400_whenSizeExceeds100() throws Exception {
|
||||
mockMvc.perform(get("/api/persons").param("size", "101"))
|
||||
.andExpect(status().isBadRequest());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_returns400_whenPageIsNegative() throws Exception {
|
||||
mockMvc.perform(get("/api/persons").param("page", "-1"))
|
||||
.andExpect(status().isBadRequest());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_delegatesTopByDocumentCount_whenSortGiven() throws Exception {
|
||||
void getPersons_delegatesTopByDocumentCount_whenSortAndSizeGiven() throws Exception {
|
||||
PersonSummaryDTO top = mockPersonSummary("Käthe", "Raddatz");
|
||||
when(personService.findTopByDocumentCount(4)).thenReturn(List.of(top));
|
||||
|
||||
mockMvc.perform(get("/api/persons").param("sort", "documentCount").param("size", "4"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.items[0].firstName").value("Käthe"));
|
||||
.andExpect(jsonPath("$[0].firstName").value("Käthe"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_topByDocumentCount_isNonPaged_totalElementsEqualsReturnedCount() throws Exception {
|
||||
// The top-N dashboard path is deliberately NON-paged: it returns the complete result
|
||||
// (no further page exists), so totalElements equals the number of rows returned and
|
||||
// totalPages is 1. Pinned so nobody "fixes" it into a misleading paged total.
|
||||
when(personService.findTopByDocumentCount(50))
|
||||
.thenReturn(List.of(mockPersonSummary("Käthe", "Raddatz"),
|
||||
mockPersonSummary("Hans", "Müller")));
|
||||
void getPersons_capsTopByDocumentCount_atFifty() throws Exception {
|
||||
ArgumentCaptor<Integer> sizeCaptor = ArgumentCaptor.forClass(Integer.class);
|
||||
when(personService.findTopByDocumentCount(sizeCaptor.capture())).thenReturn(Collections.emptyList());
|
||||
|
||||
mockMvc.perform(get("/api/persons").param("sort", "documentCount"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.items.length()").value(2))
|
||||
.andExpect(jsonPath("$.totalElements").value(2))
|
||||
.andExpect(jsonPath("$.pageNumber").value(0))
|
||||
.andExpect(jsonPath("$.pageSize").value(2))
|
||||
.andExpect(jsonPath("$.totalPages").value(1));
|
||||
}
|
||||
mockMvc.perform(get("/api/persons").param("sort", "documentCount").param("size", "999"))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void getPersons_topByDocumentCount_emptyResult_reportsZeroPages() throws Exception {
|
||||
when(personService.findTopByDocumentCount(50)).thenReturn(Collections.emptyList());
|
||||
|
||||
mockMvc.perform(get("/api/persons").param("sort", "documentCount"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.totalElements").value(0))
|
||||
.andExpect(jsonPath("$.totalPages").value(0));
|
||||
assertThat(sizeCaptor.getValue()).isEqualTo(50);
|
||||
}
|
||||
|
||||
private PersonSummaryDTO mockPersonSummary(String firstName, String lastName) {
|
||||
@@ -217,7 +117,6 @@ class PersonControllerTest {
|
||||
public Integer getDeathYear() { return null; }
|
||||
public String getNotes() { return null; }
|
||||
public boolean isFamilyMember() { return false; }
|
||||
public boolean isProvisional() { return false; }
|
||||
public long getDocumentCount() { return 0; }
|
||||
};
|
||||
}
|
||||
@@ -498,61 +397,6 @@ class PersonControllerTest {
|
||||
.andExpect(status().isNoContent());
|
||||
}
|
||||
|
||||
// ─── PATCH /api/persons/{id}/confirm ──────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void confirmPerson_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(patch("/api/persons/{id}/confirm", UUID.randomUUID()).with(csrf()))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void confirmPerson_returns403_whenUserHasOnlyReadPermission() throws Exception {
|
||||
mockMvc.perform(patch("/api/persons/{id}/confirm", UUID.randomUUID()).with(csrf()))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void confirmPerson_returns200_andClearsProvisional() throws Exception {
|
||||
UUID id = UUID.randomUUID();
|
||||
Person confirmed = Person.builder().id(id).firstName("Bald").lastName("Bestaetigt").provisional(false).build();
|
||||
when(personService.confirmPerson(id)).thenReturn(confirmed);
|
||||
|
||||
mockMvc.perform(patch("/api/persons/{id}/confirm", id).with(csrf()))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.provisional").value(false));
|
||||
|
||||
verify(personService).confirmPerson(id);
|
||||
}
|
||||
|
||||
// ─── DELETE /api/persons/{id} ──────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void deletePerson_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(delete("/api/persons/{id}", UUID.randomUUID()).with(csrf()))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "READ_ALL")
|
||||
void deletePerson_returns403_whenUserHasOnlyReadPermission() throws Exception {
|
||||
mockMvc.perform(delete("/api/persons/{id}", UUID.randomUUID()).with(csrf()))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "WRITE_ALL")
|
||||
void deletePerson_returns204_whenValid() throws Exception {
|
||||
UUID id = UUID.randomUUID();
|
||||
|
||||
mockMvc.perform(delete("/api/persons/{id}", id).with(csrf()))
|
||||
.andExpect(status().isNoContent());
|
||||
|
||||
verify(personService).deletePerson(id);
|
||||
}
|
||||
|
||||
// ─── PUT /api/persons/{id} — lastName blank branch ────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
@@ -1,151 +0,0 @@
|
||||
package org.raddatz.familienarchiv.person;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.argThat;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class PersonImportUpsertTest {
|
||||
|
||||
@Mock PersonRepository personRepository;
|
||||
@Mock PersonNameAliasRepository aliasRepository;
|
||||
@InjectMocks PersonService personService;
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_insertsNewPerson_whenSourceRefUnknown() {
|
||||
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.empty());
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
|
||||
.personType(PersonType.PERSON).provisional(false).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
assertThat(result.getSourceRef()).isEqualTo("clara-cram");
|
||||
assertThat(result.getFirstName()).isEqualTo("Clara");
|
||||
assertThat(result.getLastName()).isEqualTo("Cram");
|
||||
assertThat(result.isProvisional()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_updatesInPlace_whenSourceRefExists() {
|
||||
Person existing = Person.builder()
|
||||
.id(UUID.randomUUID()).sourceRef("clara-cram")
|
||||
.firstName("Clara").lastName("Cram").build();
|
||||
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
|
||||
.notes("Updated note").personType(PersonType.PERSON).provisional(false).build();
|
||||
|
||||
personService.upsertBySourceRef(cmd);
|
||||
|
||||
verify(personRepository).save(argThat(p -> p.getId().equals(existing.getId())));
|
||||
verify(personRepository, never()).save(argThat(p -> p.getId() == null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_preservesHumanEditedNonBlankFields() {
|
||||
// A human renamed the maiden-name register person and added notes in-app.
|
||||
Person humanEdited = Person.builder()
|
||||
.id(UUID.randomUUID()).sourceRef("clara-cram")
|
||||
.firstName("Klara").lastName("Cram-Müller").notes("Verified by Marcel").build();
|
||||
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(humanEdited));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
|
||||
.notes("Auto note").personType(PersonType.PERSON).provisional(false).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
// Human edits survive the re-import.
|
||||
assertThat(result.getFirstName()).isEqualTo("Klara");
|
||||
assertThat(result.getLastName()).isEqualTo("Cram-Müller");
|
||||
assertThat(result.getNotes()).isEqualTo("Verified by Marcel");
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_fillsOnlyBlankFields_onReimport() {
|
||||
Person existing = Person.builder()
|
||||
.id(UUID.randomUUID()).sourceRef("clara-cram")
|
||||
.firstName("Clara").lastName("Cram").notes(null).build();
|
||||
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
|
||||
.notes("Nichte von Herbert").personType(PersonType.PERSON).provisional(false).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
// Blank field gets filled by canonical value.
|
||||
assertThat(result.getNotes()).isEqualTo("Nichte von Herbert");
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_fillsBlankYears_butPreservesHumanEditedYears_onReimport() {
|
||||
// Existing has a human-set birthYear and a blank deathYear.
|
||||
Person existing = Person.builder()
|
||||
.id(UUID.randomUUID()).sourceRef("clara-cram")
|
||||
.lastName("Cram").birthYear(1890).deathYear(null).build();
|
||||
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("clara-cram").lastName("Cram")
|
||||
.birthYear(1888).deathYear(1965)
|
||||
.personType(PersonType.PERSON).provisional(false).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
assertThat(result.getBirthYear()).isEqualTo(1890); // human value kept
|
||||
assertThat(result.getDeathYear()).isEqualTo(1965); // blank filled from canonical
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_neverFlipsProvisionalBackToTrue_onceHumanConfirmed() {
|
||||
// A human confirmed this provisional importer-created person (provisional -> false).
|
||||
Person confirmed = Person.builder()
|
||||
.id(UUID.randomUUID()).sourceRef("schwester-hanni")
|
||||
.firstName(null).lastName("Schwester Hanni").provisional(false).build();
|
||||
when(personRepository.findBySourceRef("schwester-hanni")).thenReturn(Optional.of(confirmed));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("schwester-hanni").lastName("Schwester Hanni")
|
||||
.personType(PersonType.PERSON).provisional(true).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
assertThat(result.isProvisional()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_setsProvisionalTrue_forNewProvisionalPerson() {
|
||||
when(personRepository.findBySourceRef("noise-geschirr")).thenReturn(Optional.empty());
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("noise-geschirr").lastName("Tante Tüten")
|
||||
.personType(PersonType.PERSON).provisional(true).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
assertThat(result.isProvisional()).isTrue();
|
||||
}
|
||||
}
|
||||
@@ -463,213 +463,4 @@ class PersonRepositoryTest {
|
||||
assertThat(result).hasSize(1);
|
||||
assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages");
|
||||
}
|
||||
|
||||
// ─── #671: provisional must be SELECTed in all three native projections ───
|
||||
// Adding isProvisional() to the interface compiles even if a native query forgets
|
||||
// to SELECT p.provisional — it then silently returns false. These tests are the only
|
||||
// guard against that trap, so they must run against real Postgres.
|
||||
|
||||
@Test
|
||||
void findAllWithDocumentCount_projectsProvisionalTrue() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Inferred").lastName("Person").provisional(true).build());
|
||||
|
||||
List<PersonSummaryDTO> result = personRepository.findAllWithDocumentCount();
|
||||
|
||||
assertThat(result).anyMatch(PersonSummaryDTO::isProvisional);
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchWithDocumentCount_projectsProvisionalTrue() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Provisorisch").lastName("Müller").provisional(true).build());
|
||||
|
||||
List<PersonSummaryDTO> result = personRepository.searchWithDocumentCount("Provisorisch");
|
||||
|
||||
assertThat(result).hasSize(1);
|
||||
assertThat(result.get(0).isProvisional()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findTopByDocumentCount_projectsProvisionalTrue() {
|
||||
Person provisional = personRepository.save(Person.builder()
|
||||
.firstName("Top").lastName("Provisional").provisional(true).build());
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Brief").originalFilename("b.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(provisional).build());
|
||||
|
||||
List<PersonSummaryDTO> result = personRepository.findTopByDocumentCount(10);
|
||||
|
||||
PersonSummaryDTO summary = result.stream()
|
||||
.filter(p -> p.getId().equals(provisional.getId())).findFirst().orElseThrow();
|
||||
assertThat(summary.isProvisional()).isTrue();
|
||||
}
|
||||
|
||||
// ─── #667: filter-aware paged slice + paired COUNT (Postgres-only) ────────
|
||||
// The slice query (findByFilter) and the count query (countByFilter) MUST share one
|
||||
// WHERE clause so totalElements can never drift from the rendered page. These tests run
|
||||
// against real Postgres because the slice ORDER BY uses a computed alias that fails on H2.
|
||||
|
||||
private void seedDirectoryFixture() {
|
||||
// Register family member, no documents — visible by reader default (familyMember)
|
||||
personRepository.save(Person.builder().firstName("Karl").lastName("Register").familyMember(true).build());
|
||||
// Person with one document — visible by reader default (documentCount > 0)
|
||||
Person hasDoc = personRepository.save(Person.builder().firstName("Doku").lastName("Person").build());
|
||||
documentRepository.save(Document.builder().title("B").originalFilename("b.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(hasDoc).build());
|
||||
// Provisional, zero-document, non-family — hidden by reader default
|
||||
personRepository.save(Person.builder().firstName("Unbe").lastName("Staetigt").provisional(true).build());
|
||||
// An institution with no documents, non-family, non-provisional
|
||||
personRepository.save(Person.builder().lastName("Verlag GmbH").personType(PersonType.INSTITUTION).build());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_readerDefault_returnsOnlyFamilyOrWithDocuments() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, null, null, true, null, 50, 0);
|
||||
|
||||
assertThat(slice).extracting(PersonSummaryDTO::getLastName)
|
||||
.containsExactlyInAnyOrder("Register", "Person");
|
||||
}
|
||||
|
||||
@Test
|
||||
void countByFilter_readerDefault_matchesSliceSize() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
long count = personRepository.countByFilter(null, null, null, null, true, null);
|
||||
|
||||
assertThat(count).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_showAll_returnsEveryone() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, null, null, false, null, 50, 0);
|
||||
|
||||
assertThat(slice).hasSize(4);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_typeInstitution_returnsOnlyInstitutions() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
"INSTITUTION", null, null, null, false, null, 50, 0);
|
||||
|
||||
assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Verlag GmbH");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_familyOnly_returnsOnlyFamilyMembers() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, true, null, null, false, null, 50, 0);
|
||||
|
||||
assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Register");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_hasDocuments_returnsOnlyPersonsWithDocuments() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, true, null, false, null, 50, 0);
|
||||
|
||||
assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Person");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_provisionalTrue_returnsOnlyProvisional() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, null, true, false, null, 50, 0);
|
||||
|
||||
assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Staetigt");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_combinedFilters_andTogether() {
|
||||
seedDirectoryFixture();
|
||||
// family + has-documents → intersection is empty (Register has no docs, Doku is not family)
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, true, true, null, false, null, 50, 0);
|
||||
|
||||
assertThat(slice).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_query_combinesWithFilters() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, null, null, false, "Verlag", 50, 0);
|
||||
|
||||
assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Verlag GmbH");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_pageBeyondRange_returnsEmptySlice() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, null, null, false, null, 50, 999 * 50);
|
||||
|
||||
assertThat(slice).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_respectsPageSize() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> firstPage = personRepository.findByFilter(
|
||||
null, null, null, null, false, null, 2, 0);
|
||||
List<PersonSummaryDTO> secondPage = personRepository.findByFilter(
|
||||
null, null, null, null, false, null, 2, 2);
|
||||
|
||||
assertThat(firstPage).hasSize(2);
|
||||
assertThat(secondPage).hasSize(2);
|
||||
assertThat(firstPage).extracting(PersonSummaryDTO::getId)
|
||||
.doesNotContainAnyElementsOf(secondPage.stream().map(PersonSummaryDTO::getId).toList());
|
||||
}
|
||||
|
||||
@Test
|
||||
void countByFilter_typeInstitution_matchesSlice() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
long count = personRepository.countByFilter("INSTITUTION", null, null, null, false, null);
|
||||
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void countByFilter_query_matchesSliceSize() {
|
||||
// The whole point of the shared FILTER_WHERE is that the slice and the count can never
|
||||
// drift. Pin the query (LIKE) path explicitly: countByFilter must equal the slice size
|
||||
// so a future edit to one query's LIKE clause is caught.
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, null, null, false, "Verlag", 50, 0);
|
||||
long count = personRepository.countByFilter(null, null, null, null, false, "Verlag");
|
||||
|
||||
assertThat(count).isEqualTo(slice.size());
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByFilter_projectsDocumentCount() {
|
||||
seedDirectoryFixture();
|
||||
|
||||
List<PersonSummaryDTO> slice = personRepository.findByFilter(
|
||||
null, null, true, null, false, null, 50, 0);
|
||||
|
||||
assertThat(slice.get(0).getDocumentCount()).isEqualTo(1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,9 +2,6 @@ package org.raddatz.familienarchiv.person;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.document.Document;
|
||||
import org.raddatz.familienarchiv.document.DocumentRepository;
|
||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
import org.raddatz.familienarchiv.person.PersonType;
|
||||
import org.raddatz.familienarchiv.person.PersonRepository;
|
||||
@@ -16,11 +13,6 @@ import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.persistence.PersistenceContext;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@@ -32,9 +24,6 @@ class PersonServiceIntegrationTest {
|
||||
@MockitoBean S3Client s3Client;
|
||||
@Autowired PersonService personService;
|
||||
@Autowired PersonRepository personRepository;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
|
||||
@PersistenceContext EntityManager entityManager;
|
||||
|
||||
@Test
|
||||
void findOrCreateByAlias_skipReturnsNull_noRecordCreated() {
|
||||
@@ -74,97 +63,4 @@ class PersonServiceIntegrationTest {
|
||||
assertThat(result.getFirstName()).isEqualTo("Clara");
|
||||
assertThat(result.getLastName()).isEqualTo("Cram");
|
||||
}
|
||||
|
||||
// ─── #667: confirm round-trip + reader-default semantics ──────────────────
|
||||
|
||||
@Test
|
||||
void search_readerDefault_hidesProvisionalZeroDocumentPerson() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Unbe").lastName("Staetigt").provisional(true).build());
|
||||
|
||||
PersonSearchResult result = personService.search(PersonFilter.cleanDefault(), 0, 50, null);
|
||||
|
||||
assertThat(result.items()).noneMatch(p -> p.getLastName().equals("Staetigt"));
|
||||
assertThat(result.totalElements()).isEqualTo(result.items().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_showAll_includesProvisionalZeroDocumentPerson() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Unbe").lastName("Staetigt").provisional(true).build());
|
||||
|
||||
PersonSearchResult result = personService.search(PersonFilter.showAll(), 0, 50, null);
|
||||
|
||||
assertThat(result.items()).anyMatch(p -> p.getLastName().equals("Staetigt"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void confirmPerson_clearsProvisional_andShowAllTreatsItAsConfirmed() {
|
||||
Person provisional = personRepository.save(Person.builder()
|
||||
.firstName("Bald").lastName("Bestaetigt").provisional(true).build());
|
||||
|
||||
personService.confirmPerson(provisional.getId());
|
||||
|
||||
Person reloaded = personRepository.findById(provisional.getId()).orElseThrow();
|
||||
assertThat(reloaded.isProvisional()).isFalse();
|
||||
|
||||
PersonSearchResult showAll = personService.search(PersonFilter.showAll(), 0, 50, null);
|
||||
assertThat(showAll.items())
|
||||
.filteredOn(p -> p.getId().equals(provisional.getId()))
|
||||
.allMatch(p -> !p.isProvisional());
|
||||
}
|
||||
|
||||
@Test
|
||||
void deletePerson_removesPerson() {
|
||||
Person target = personRepository.save(Person.builder()
|
||||
.firstName("Weg").lastName("Person").provisional(true).build());
|
||||
|
||||
personService.deletePerson(target.getId());
|
||||
|
||||
assertThat(personRepository.findById(target.getId())).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void deletePerson_detachesSentAndReceivedReferences_beforeDelete_noOrphan() {
|
||||
// A person referenced as BOTH a document sender and a document receiver must delete
|
||||
// cleanly: deletePerson nulls the sender_id FK and removes the receiver join row first
|
||||
// (reassignSenderToNull → deleteReceiverReferences → deleteById), so no FK orphan and
|
||||
// the documents themselves survive.
|
||||
Person target = personRepository.save(Person.builder()
|
||||
.firstName("Weg").lastName("Person").provisional(true).build());
|
||||
Person bystander = personRepository.save(Person.builder()
|
||||
.firstName("Bleibt").lastName("Hier").build());
|
||||
|
||||
Document sent = documentRepository.save(Document.builder()
|
||||
.title("Sent letter").originalFilename("sent.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(target).build());
|
||||
Document received = documentRepository.save(Document.builder()
|
||||
.title("Received letter").originalFilename("received.pdf")
|
||||
.status(DocumentStatus.UPLOADED).sender(bystander)
|
||||
.receivers(new java.util.HashSet<>(Set.of(target))).build());
|
||||
|
||||
// Persist the fixture and detach everything so the native @Modifying deletes operate on
|
||||
// the database directly without the persistence context holding stale references that
|
||||
// would re-flush a now-deleted person as a transient association.
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
personService.deletePerson(target.getId());
|
||||
|
||||
// Native @Modifying queries bypass the persistence context — clear it so the asserting
|
||||
// reads observe the post-delete database state, not stale managed entities.
|
||||
entityManager.flush();
|
||||
entityManager.clear();
|
||||
|
||||
assertThat(personRepository.findById(target.getId())).isEmpty();
|
||||
|
||||
Document reloadedSent = documentRepository.findById(sent.getId()).orElseThrow();
|
||||
assertThat(reloadedSent.getSender()).isNull();
|
||||
|
||||
Document reloadedReceived = documentRepository.findById(received.getId()).orElseThrow();
|
||||
assertThat(reloadedReceived.getReceivers())
|
||||
.noneMatch(p -> p.getId().equals(target.getId()));
|
||||
// The other person and the documents themselves survive the delete.
|
||||
assertThat(personRepository.findById(bystander.getId())).isPresent();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,109 +58,33 @@ class PersonServiceTest {
|
||||
assertThat(personService.getById(id)).isEqualTo(person);
|
||||
}
|
||||
|
||||
// ─── #667: search (filter + pagination) ──────────────────────────────────
|
||||
// ─── findAll ─────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void search_returnsPagedResult_withTotalsFromCountQuery() {
|
||||
PersonFilter filter = PersonFilter.cleanDefault();
|
||||
when(personRepository.countByFilter(null, null, null, null, true, null)).thenReturn(120L);
|
||||
when(personRepository.findByFilter(null, null, null, null, true, null, 50, 0))
|
||||
.thenReturn(List.of());
|
||||
void findAll_returnsAll_whenQueryIsNull() {
|
||||
List<PersonSummaryDTO> expected = List.of();
|
||||
when(personRepository.findAllWithDocumentCount()).thenReturn(expected);
|
||||
|
||||
PersonSearchResult result = personService.search(filter, 0, 50, null);
|
||||
|
||||
assertThat(result.totalElements()).isEqualTo(120L);
|
||||
assertThat(result.pageNumber()).isEqualTo(0);
|
||||
assertThat(result.pageSize()).isEqualTo(50);
|
||||
assertThat(result.totalPages()).isEqualTo(3); // ceil(120 / 50)
|
||||
assertThat(personService.findAll(null)).isEqualTo(expected);
|
||||
verify(personRepository).findAllWithDocumentCount();
|
||||
verify(personRepository, never()).searchWithDocumentCount(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_passesTypeAsEnumName_toRepository() {
|
||||
PersonFilter filter = PersonFilter.builder().type(PersonType.INSTITUTION).build();
|
||||
when(personRepository.countByFilter("INSTITUTION", null, null, null, false, null)).thenReturn(0L);
|
||||
when(personRepository.findByFilter("INSTITUTION", null, null, null, false, null, 50, 0))
|
||||
.thenReturn(List.of());
|
||||
|
||||
personService.search(filter, 0, 50, null);
|
||||
|
||||
verify(personRepository).findByFilter("INSTITUTION", null, null, null, false, null, 50, 0);
|
||||
void findAll_returnsEmpty_whenQueryIsWhitespaceOnly() {
|
||||
assertThat(personService.findAll(" ")).isEmpty();
|
||||
verify(personRepository, never()).findAllWithDocumentCount();
|
||||
verify(personRepository, never()).searchWithDocumentCount(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_computesOffset_fromPageAndSize() {
|
||||
PersonFilter filter = PersonFilter.showAll();
|
||||
when(personRepository.countByFilter(null, null, null, null, false, null)).thenReturn(0L);
|
||||
when(personRepository.findByFilter(null, null, null, null, false, null, 20, 40))
|
||||
.thenReturn(List.of());
|
||||
void findAll_searchesByName_whenQueryIsNonBlank() {
|
||||
List<PersonSummaryDTO> expected = List.of();
|
||||
when(personRepository.searchWithDocumentCount("Anna")).thenReturn(expected);
|
||||
|
||||
personService.search(filter, 2, 20, null); // offset = page * size = 40
|
||||
|
||||
verify(personRepository).findByFilter(null, null, null, null, false, null, 20, 40);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_trimsBlankQueryToNull() {
|
||||
PersonFilter filter = PersonFilter.showAll();
|
||||
when(personRepository.countByFilter(null, null, null, null, false, null)).thenReturn(0L);
|
||||
when(personRepository.findByFilter(null, null, null, null, false, null, 50, 0))
|
||||
.thenReturn(List.of());
|
||||
|
||||
personService.search(filter, 0, 50, " ");
|
||||
|
||||
verify(personRepository).findByFilter(null, null, null, null, false, null, 50, 0);
|
||||
}
|
||||
|
||||
// ─── #667: confirmPerson ──────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void confirmPerson_clearsProvisionalFlag() {
|
||||
UUID id = UUID.randomUUID();
|
||||
Person provisional = Person.builder().id(id).firstName("Inferred").lastName("Person").provisional(true).build();
|
||||
when(personRepository.findById(id)).thenReturn(Optional.of(provisional));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
Person result = personService.confirmPerson(id);
|
||||
|
||||
assertThat(result.isProvisional()).isFalse();
|
||||
verify(personRepository).save(argThat(p -> !p.isProvisional()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void confirmPerson_throwsNotFound_whenMissing() {
|
||||
UUID id = UUID.randomUUID();
|
||||
when(personRepository.findById(id)).thenReturn(Optional.empty());
|
||||
|
||||
assertThatThrownBy(() -> personService.confirmPerson(id))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getStatus().value())
|
||||
.isEqualTo(404);
|
||||
}
|
||||
|
||||
// ─── #667: deletePerson ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void deletePerson_deletes_whenPersonExists() {
|
||||
UUID id = UUID.randomUUID();
|
||||
Person person = Person.builder().id(id).firstName("Weg").lastName("Person").build();
|
||||
when(personRepository.findById(id)).thenReturn(Optional.of(person));
|
||||
|
||||
personService.deletePerson(id);
|
||||
|
||||
verify(personRepository).reassignSenderToNull(id);
|
||||
verify(personRepository).deleteReceiverReferences(id);
|
||||
verify(personRepository).deleteById(id);
|
||||
}
|
||||
|
||||
@Test
|
||||
void deletePerson_throwsNotFound_whenMissing() {
|
||||
UUID id = UUID.randomUUID();
|
||||
when(personRepository.findById(id)).thenReturn(Optional.empty());
|
||||
|
||||
assertThatThrownBy(() -> personService.deletePerson(id))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting(e -> ((DomainException) e).getStatus().value())
|
||||
.isEqualTo(404);
|
||||
assertThat(personService.findAll("Anna")).isEqualTo(expected);
|
||||
verify(personRepository).searchWithDocumentCount("Anna");
|
||||
verify(personRepository, never()).findAllWithDocumentCount();
|
||||
}
|
||||
|
||||
// ─── createPerson ─────────────────────────────────────────────────────────
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.argThat;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class TagImportUpsertTest {
|
||||
|
||||
@Mock TagRepository tagRepository;
|
||||
@InjectMocks TagService tagService;
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_insertsNewTag_whenSourceRefUnknown() {
|
||||
when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.empty());
|
||||
when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
UUID parentId = UUID.randomUUID();
|
||||
Tag result = tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", parentId);
|
||||
|
||||
assertThat(result.getSourceRef()).isEqualTo("Themen/Brautbriefe");
|
||||
assertThat(result.getName()).isEqualTo("Brautbriefe");
|
||||
assertThat(result.getParentId()).isEqualTo(parentId);
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_updatesInPlace_whenSourceRefExists() {
|
||||
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Brautbriefe")
|
||||
.sourceRef("Themen/Brautbriefe").build();
|
||||
when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(existing));
|
||||
when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", null);
|
||||
|
||||
verify(tagRepository).save(argThat(t -> t.getId().equals(existing.getId())));
|
||||
verify(tagRepository, never()).save(argThat(t -> t.getId() == null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_preservesHumanRenamedTag_onReimport() {
|
||||
Tag humanRenamed = Tag.builder().id(UUID.randomUUID()).name("Verlobungsbriefe")
|
||||
.sourceRef("Themen/Brautbriefe").build();
|
||||
when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(humanRenamed));
|
||||
when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
Tag result = tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", null);
|
||||
|
||||
assertThat(result.getName()).isEqualTo("Verlobungsbriefe");
|
||||
}
|
||||
}
|
||||
@@ -7,8 +7,7 @@ import org.raddatz.familienarchiv.security.PermissionAspect;
|
||||
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
|
||||
import org.raddatz.familienarchiv.document.DocumentService;
|
||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||
import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
|
||||
import org.raddatz.familienarchiv.importing.ImportStatus;
|
||||
import org.raddatz.familienarchiv.importing.MassImportService;
|
||||
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
|
||||
@@ -36,7 +35,7 @@ class AdminControllerTest {
|
||||
|
||||
@Autowired MockMvc mockMvc;
|
||||
|
||||
@MockitoBean CanonicalImportOrchestrator importOrchestrator;
|
||||
@MockitoBean MassImportService massImportService;
|
||||
@MockitoBean DocumentService documentService;
|
||||
@MockitoBean DocumentVersionService documentVersionService;
|
||||
@MockitoBean ThumbnailBackfillService thumbnailBackfillService;
|
||||
@@ -47,9 +46,9 @@ class AdminControllerTest {
|
||||
@Test
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
|
||||
ImportStatus status = new ImportStatus(
|
||||
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||
when(importOrchestrator.getStatus()).thenReturn(status);
|
||||
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
|
||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
|
||||
when(massImportService.getStatus()).thenReturn(status);
|
||||
|
||||
mockMvc.perform(get("/api/admin/import-status"))
|
||||
.andExpect(status().isOk())
|
||||
@@ -61,9 +60,9 @@ class AdminControllerTest {
|
||||
@Test
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void importStatus_messageField_notPresentInApiResponse() throws Exception {
|
||||
ImportStatus status = new ImportStatus(
|
||||
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||
when(importOrchestrator.getStatus()).thenReturn(status);
|
||||
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
|
||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
|
||||
when(massImportService.getStatus()).thenReturn(status);
|
||||
|
||||
mockMvc.perform(get("/api/admin/import-status"))
|
||||
.andExpect(status().isOk())
|
||||
|
||||
@@ -1,8 +1,2 @@
|
||||
logging.level.root=WARN
|
||||
logging.level.org.raddatz=INFO
|
||||
|
||||
# Default test value so FlywayConfig's fail-closed check passes without each
|
||||
# test having to set GRAFANA_DB_PASSWORD explicitly. The actual value is
|
||||
# irrelevant in tests — Flyway only uses it to set the grafana_reader role's
|
||||
# password, which no test connects with.
|
||||
GRAFANA_DB_PASSWORD=test-grafana-reader-password
|
||||
|
||||
@@ -147,9 +147,6 @@ services:
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL:-http://localhost:3003}
|
||||
# Read-only password for the grafana_reader PostgreSQL role; interpolated
|
||||
# into the provisioned PostgreSQL datasource (see datasources.yml).
|
||||
GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD}
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./infra/observability/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
@@ -168,7 +165,6 @@ services:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- obs-net
|
||||
- archiv-net # PO Overview dashboard queries archive-db via the grafana_reader role
|
||||
|
||||
# --- Error Tracking: GlitchTip ---
|
||||
|
||||
|
||||
@@ -227,9 +227,6 @@ services:
|
||||
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
|
||||
SPRING_DATASOURCE_USERNAME: archiv
|
||||
SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
# Consumed by Flyway V68 via the ${grafanaDbPassword} placeholder to set
|
||||
# the read-only grafana_reader role's password.
|
||||
GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD}
|
||||
# Application uses the bucket-scoped service account, not MinIO root.
|
||||
S3_ENDPOINT: http://minio:9000
|
||||
S3_ACCESS_KEY: archiv-app
|
||||
@@ -255,8 +252,6 @@ services:
|
||||
OTEL_METRICS_EXPORTER: none
|
||||
MANAGEMENT_METRICS_TAGS_APPLICATION: Familienarchiv
|
||||
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: ${MANAGEMENT_TRACING_SAMPLING_PROBABILITY:-0.1}
|
||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||
LOGGING_STRUCTURED_FORMAT_CONSOLE: ecs
|
||||
networks:
|
||||
- archiv-net
|
||||
healthcheck:
|
||||
@@ -271,10 +266,6 @@ services:
|
||||
build:
|
||||
context: ./frontend
|
||||
target: production
|
||||
args:
|
||||
# Vite build-time variable — baked into the JS bundle at build time.
|
||||
# Empty default so deploys succeed before the secret is configured.
|
||||
VITE_SENTRY_DSN: ${VITE_SENTRY_DSN:-}
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
backend:
|
||||
@@ -285,9 +276,6 @@ services:
|
||||
# SSR fetches go inside the docker network; clients hit https://${APP_DOMAIN}
|
||||
API_INTERNAL_URL: http://backend:8080
|
||||
ORIGIN: https://${APP_DOMAIN}
|
||||
# Enforce upload size limit in the adapter-node layer (fixes GHSA-2crg-3p73-43xp bypass).
|
||||
# Must be ≤ client_max_body_size in the Caddy reverse proxy to avoid 413 mismatches.
|
||||
BODY_SIZE_LIMIT: 50M
|
||||
networks:
|
||||
- archiv-net
|
||||
healthcheck:
|
||||
|
||||
@@ -163,9 +163,6 @@ services:
|
||||
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB}
|
||||
SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER}
|
||||
SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
# Consumed by Flyway V68 via the ${grafanaDbPassword} placeholder to set
|
||||
# the read-only grafana_reader role's password.
|
||||
GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD}
|
||||
S3_ENDPOINT: http://minio:9000
|
||||
S3_ACCESS_KEY: ${MINIO_ROOT_USER}
|
||||
S3_SECRET_KEY: ${MINIO_ROOT_PASSWORD}
|
||||
@@ -231,9 +228,6 @@ services:
|
||||
API_INTERNAL_URL: http://backend:8080
|
||||
# Vite dev proxy forwards /api from browser to the backend container
|
||||
API_PROXY_TARGET: http://backend:8080
|
||||
# Upload size limit for adapter-node (production target). Not enforced by Vite dev server
|
||||
# but kept here to match docker-compose.prod.yml and prevent config drift.
|
||||
BODY_SIZE_LIMIT: 50M
|
||||
ports:
|
||||
- "${PORT_FRONTEND}:5173"
|
||||
networks:
|
||||
|
||||
@@ -152,7 +152,6 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
|
||||
| `PORT_GRAFANA` | Host port for the Grafana UI (bound to `127.0.0.1` only) | `3003` | — | — |
|
||||
| `POSTGRES_HOST` | PostgreSQL hostname for GlitchTip's db-init job and workers. Override when only the staging stack is running and `archive-db` is not resolvable by that name. | `archive-db` | — | — |
|
||||
| `GRAFANA_ADMIN_PASSWORD` | Grafana `admin` user password | `changeme` | YES (prod) | YES |
|
||||
| `GRAFANA_DB_PASSWORD` | Password for the read-only `grafana_reader` PostgreSQL role used by the PO Overview dashboard (issue #651). Consumed by Flyway V68 and the Grafana PostgreSQL datasource. Generate with `openssl rand -hex 32`. | — | YES (prod) | YES |
|
||||
| `PORT_GLITCHTIP` | Host port for the GlitchTip UI (bound to `127.0.0.1` only) | `3002` | — | — |
|
||||
| `GLITCHTIP_DOMAIN` | Public-facing base URL for GlitchTip (used in email links and CORS) | `http://localhost:3002` | YES (prod) | — |
|
||||
| `GLITCHTIP_SECRET_KEY` | Django secret key for GlitchTip — generate with `python3 -c "import secrets; print(secrets.token_hex(32))"` | — | YES | YES |
|
||||
@@ -257,7 +256,6 @@ git.raddatz.cloud A <server IP>
|
||||
| `MAIL_USERNAME` | release.yml | SMTP user |
|
||||
| `MAIL_PASSWORD` | release.yml | SMTP password |
|
||||
| `GRAFANA_ADMIN_PASSWORD` | both | Grafana `admin` login — generate a strong password |
|
||||
| `GRAFANA_DB_PASSWORD` | both | Read-only `grafana_reader` role password — `openssl rand -hex 32` |
|
||||
| `GLITCHTIP_SECRET_KEY` | both | Django secret key — `openssl rand -hex 32` |
|
||||
| `SENTRY_DSN` | both | GlitchTip project DSN — set after first-run (§4); leave empty to keep Sentry disabled |
|
||||
| `VITE_SENTRY_DSN` | both | GlitchTip frontend project DSN — set after first-run (§4); leave empty to keep Sentry disabled |
|
||||
@@ -359,7 +357,6 @@ Both files are passed explicitly via `--env-file` to the compose command, so the
|
||||
| Gitea secret | Notes |
|
||||
|---|---|
|
||||
| `GRAFANA_ADMIN_PASSWORD` | Strong unique password; shared by nightly and release |
|
||||
| `GRAFANA_DB_PASSWORD` | `openssl rand -hex 32`; shared by nightly and release — read-only DB role for the PO Overview dashboard |
|
||||
| `GLITCHTIP_SECRET_KEY` | `openssl rand -hex 32`; shared by nightly and release |
|
||||
| `STAGING_POSTGRES_PASSWORD` / `PROD_POSTGRES_PASSWORD` | Must match the running PostgreSQL container |
|
||||
|
||||
@@ -430,31 +427,6 @@ docker exec obs-loki wget -qO- \
|
||||
|
||||
Prometheus port `9090` and Grafana port `3003` (default; configurable via `PORT_GRAFANA`) are bound to `127.0.0.1` on the host. No other observability ports are host-bound.
|
||||
|
||||
##### Rotate the `grafana_reader` DB password
|
||||
|
||||
The PO Overview dashboard reads `audit_log`, `documents`, and `transcription_blocks` through the SELECT-only `grafana_reader` PostgreSQL role (issue #651, ADR-024). The role's password is owned by `R__grafana_reader_password.sql` — a Flyway *repeatable* migration that re-runs whenever the resolved `${grafanaDbPassword}` placeholder changes. That makes rotation a two-restart operation, no manual `psql` required.
|
||||
|
||||
```bash
|
||||
# 1. Generate a new value
|
||||
openssl rand -hex 32
|
||||
|
||||
# 2. Update both sides:
|
||||
# - Gitea secret GRAFANA_DB_PASSWORD (nightly + release workflows pick it up)
|
||||
# - Local .env on the server / dev machine
|
||||
|
||||
# 3. Restart the backend. Flyway sees that R__'s resolved checksum changed and
|
||||
# re-applies it, issuing ALTER ROLE grafana_reader WITH PASSWORD '<new>'.
|
||||
docker compose restart backend
|
||||
|
||||
# 4. Restart obs-grafana so the provisioned datasource picks up the new env value.
|
||||
docker compose -f docker-compose.observability.yml restart obs-grafana
|
||||
|
||||
# 5. Verify the dashboard loads — PO Overview's Postgres panels should populate
|
||||
# instead of "Data source error".
|
||||
```
|
||||
|
||||
If `GRAFANA_DB_PASSWORD` is unset, the backend **refuses to start** (`IllegalStateException`). That is deliberate — see `FlywayConfig.resolveGrafanaDbPassword()` and the rationale in ADR-024.
|
||||
|
||||
#### GlitchTip
|
||||
|
||||
| Item | Value |
|
||||
@@ -559,40 +531,20 @@ bash scripts/download-kraken-models.sh
|
||||
|
||||
> Downloads the Kurrent/Sütterlin HTR models. Run once after a fresh clone or when models are updated.
|
||||
|
||||
### Trigger a canonical import
|
||||
### Trigger a mass import (Excel/ODS)
|
||||
|
||||
The importer no longer parses the raw spreadsheet. It consumes the **canonical artifacts**
|
||||
produced by the normalizer (`tools/import-normalizer/`) — `canonical-tag-tree.xlsx`,
|
||||
`canonical-persons.xlsx`, `canonical-persons-tree.json`, `canonical-documents.xlsx` — which
|
||||
are committed under `tools/import-normalizer/out/`. The semantic transformation
|
||||
(German-date parsing, name classification) lives entirely in the normalizer; the backend
|
||||
maps the clean columns by header name. See [ADR-025](adr/025-canonical-import-and-single-migration-schema-foundation.md).
|
||||
|
||||
**Prerequisite — regenerate the artifacts when the source data changes:**
|
||||
|
||||
```bash
|
||||
cd tools/import-normalizer
|
||||
python3 -m venv .venv && .venv/bin/pip install -r requirements.txt # once, on a fresh clone
|
||||
.venv/bin/python normalize.py
|
||||
# writes the four canonical artifacts into ./out/
|
||||
```
|
||||
|
||||
**Dev:** place all four canonical artifacts **plus** the referenced PDFs into `./import/`
|
||||
at the repo root (the dev compose bind-mounts it to `/import`, which is `app.import.dir`).
|
||||
The orchestrator smoke-checks that all four artifacts are present before starting and fails
|
||||
closed (`IMPORT_ARTIFACT_INVALID`) if any is missing.
|
||||
**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically.
|
||||
|
||||
**Staging/production:**
|
||||
|
||||
1. Pre-stage the four canonical artifacts + PDFs on the host. Convention:
|
||||
`/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`.
|
||||
1. Pre-stage the payload on the host. Convention: `/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`.
|
||||
```bash
|
||||
rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/
|
||||
```
|
||||
2. Make sure `IMPORT_HOST_DIR=<host-path>` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it.
|
||||
3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4.
|
||||
4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`.
|
||||
5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs. Re-running is safe and idempotent (upsert by `source_ref` / document `index`). Person and tag scalar fields you edited in the app are preserved on re-import; a document's sender/receivers/tags are **canonical-authoritative** — a re-import re-applies them to exactly match the export, so a link removed from the export is removed from the document (the raw sender/receiver cell text is always kept).
|
||||
5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -25,11 +25,6 @@ _Not to be confused with [AppUser](#appuser-appuser)_ — `Person` is a historic
|
||||
|
||||
**UserGroup** (`UserGroup`) — a named permission bundle assigned to one or more `AppUser`s. A user's effective permissions are the union of all permissions across all groups they belong to.
|
||||
|
||||
**source_ref** (`Person.sourceRef`, `Tag.sourceRef`) — the import normalizer's stable identity for a `Person` (its `person_id`) or `Tag` (its canonical `tag_path`). It is the join key linking normalized records to documents and the idempotency key for re-import; null for manually created records and unique among non-null values.
|
||||
|
||||
**provisional person** (`Person.provisional`) — a `Person` the importer inferred from raw attribution text but could not confidently match to a known individual. The flag lets the persons directory surface uncertainty honestly rather than fabricate a confident identity; it defaults to `false` and is set `true` only by the importer.
|
||||
_Not to be confused with `family_member`_ — `provisional` expresses import confidence, while `family_member` is a genealogical fact about whether the person belongs to the family tree.
|
||||
|
||||
---
|
||||
|
||||
## Document-Related Terms
|
||||
@@ -41,10 +36,6 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._
|
||||
|
||||
**Document** (`Document`) — a single archival item (letter, postcard, photograph) with a file stored in MinIO/S3 and associated metadata (sender, receivers, date, tags, transcription blocks).
|
||||
|
||||
**date precision** (`Document.metaDatePrecision`, enum `DatePrecision`) — how exactly a document's date is known, one of `DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`. A verbatim mirror of the import normalizer's `Precision` enum so honest dates can be rendered (`APPROX` → "ca.", `RANGE` uses `meta_date_end`) instead of fabricating a false `DAY`-level date. `UNKNOWN` is the explicit value for undated documents.
|
||||
|
||||
**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view.
|
||||
|
||||
**DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level.
|
||||
|
||||
**Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure.
|
||||
@@ -64,15 +55,7 @@ _See also [Annotation](#annotation-documentannotation)._
|
||||
- `REVIEWED`: a reviewer has approved the transcription.
|
||||
- `ARCHIVED`: the document is finalized and read-only.
|
||||
|
||||
**Canonical import** — an asynchronous batch process (`CanonicalImportOrchestrator`) that consumes the normalizer's committed canonical artifacts and creates `Tag`s, `Person`s (register + tree), family relationships, and `Document`s. Four idempotent loaders run in a fixed dependency order — `TagTreeImporter` → `PersonRegisterImporter` → `PersonTreeImporter` → `DocumentImporter` — each calling the owning domain's service. Re-running it never duplicates rows (upsert by `source_ref` / document `index`) and never overwrites a human-edited field. Only one import can run at a time (`IMPORT_ALREADY_RUNNING` error if attempted concurrently); a missing or malformed artifact fails closed (`IMPORT_ARTIFACT_INVALID`). Replaced the legacy raw-spreadsheet `MassImportService` (see ADR-025).
|
||||
|
||||
**canonical artifact** — one of the four files the normalizer (`tools/import-normalizer/`) emits and commits to `tools/import-normalizer/out/`: `canonical-tag-tree.xlsx`, `canonical-persons.xlsx`, `canonical-persons-tree.json`, `canonical-documents.xlsx`. They are the contract the backend importer reads (mapped by header name); the semantic transformation (German-date parsing, name classification) lives only in the normalizer, never in Java.
|
||||
|
||||
**CanonicalSheetReader** — the value-level POI helper that opens a canonical `.xlsx`, maps the header row to column indices by name (replacing the brittle positional column config), splits pipe-delimited list columns, and throws `IMPORT_ARTIFACT_INVALID` on a missing required header rather than NPE-ing on a null index.
|
||||
|
||||
**SkippedFile** (`ImportStatus.SkippedFile`) — a file that was presented for import but not processed, recorded with a `filename` and a `reason` code. Possible reasons: `INVALID_FILENAME_PATH_TRAVERSAL` (the file-column basename failed the path-traversal guard), `INVALID_PDF_SIGNATURE` (magic-byte validation failed), `S3_UPLOAD_FAILED` (file upload to MinIO/S3 threw an exception), `FILE_READ_ERROR` (the file could not be opened for reading), or `ALREADY_EXISTS` (a document with the same `index` already exists in the archive with a status other than `PLACEHOLDER`).
|
||||
|
||||
**skipped count** — the total number of `SkippedFile` entries accumulated during a single import run (`ImportStatus.skipped()`). Shown in the amber warning section of the Import Status Card in the admin UI; a value of zero suppresses the section entirely.
|
||||
**Mass import** — an asynchronous batch process (`MassImportService`) that reads an Excel or ODS file and creates `Person`s, `Tag`s, and `PLACEHOLDER` `Document`s in one shot. Only one import can run at a time (`IMPORT_ALREADY_RUNNING` error if attempted concurrently).
|
||||
|
||||
**Transcription queue** — the set of `Document`s and `TranscriptionBlock`s awaiting work, computed on-the-fly from `Document`/`Block` status. Three views: segmentation queue, transcription queue, ready-to-read queue. NOT a persistent entity — no `transcription_queues` table exists.
|
||||
_See also [DocumentStatus lifecycle](#documentstatus-lifecycle)._
|
||||
@@ -93,14 +76,6 @@ _See also [DocumentStatus lifecycle](#documentstatus-lifecycle)._
|
||||
|
||||
**Sütterlin** — A specific standardized style of Kurrent taught in German schools from 1915 to 1941.
|
||||
|
||||
**Illegible word** — a word whose recognition confidence falls below the configured threshold; replaced with the literal token `[unleserlich]` in the rendered block text and counted in the `ocr_illegible_words_total` Prometheus counter.
|
||||
|
||||
**Models-ready gauge** — the `ocr_models_ready` Prometheus gauge, flipped from `0` to `1` once the FastAPI lifespan startup has finished loading the Kraken model and the spell-checker. Used both for the `/health` endpoint and as the supervised signal for the `ocr_models_ready < 1 for 2m` alert.
|
||||
|
||||
**Recognition model accuracy** — the accuracy reported by `ketos train` for the recognition (text-line) model, exposed as `ocr_model_accuracy{kind="recognition"}`. Sourced from `_parse_best_checkpoint` on the highest-scoring checkpoint after training.
|
||||
|
||||
**Segmentation model accuracy** — the accuracy reported by `ketos segtrain` for the baseline layout analysis (`blla`) model, exposed as `ocr_model_accuracy{kind="segmentation"}`. Distinct from recognition accuracy because the two models are trained and improved independently.
|
||||
|
||||
---
|
||||
|
||||
## Other Domain Terms
|
||||
|
||||
@@ -118,14 +118,11 @@ To find a trace for a specific request in staging/production, either increase th
|
||||
|
||||
## Metrics (Prometheus → Grafana)
|
||||
|
||||
Prometheus scrapes two targets every 15 s:
|
||||
Prometheus scrapes the backend management endpoint every 15 s:
|
||||
|
||||
```
|
||||
Target: backend:8081/actuator/prometheus
|
||||
Labels: job="spring-boot", application="Familienarchiv"
|
||||
|
||||
Target: ocr:8000/metrics
|
||||
Labels: job="ocr-service"
|
||||
```
|
||||
|
||||
All Spring Boot metrics carry the `application="Familienarchiv"` tag, which is how the Grafana Spring Boot Observability dashboard (ID 17175) filters to this service.
|
||||
@@ -149,70 +146,6 @@ jvm_memory_used_bytes{area="heap", application="Familienarchiv"}
|
||||
hikaricp_connections_active
|
||||
```
|
||||
|
||||
### OCR-service custom metrics
|
||||
|
||||
Exposed at `ocr:8000/metrics` by `prometheus-fastapi-instrumentator`. The
|
||||
`http_*` metrics describe the FastAPI request layer; the `ocr_*` series are
|
||||
domain-specific. **Never label these with PII or document content** — labels
|
||||
have unbounded cardinality risk and are visible to anyone with Grafana access.
|
||||
|
||||
| Metric | Type | Labels | Unit | What it tracks |
|
||||
|---|---|---|---|---|
|
||||
| `ocr_jobs_total` | Counter | `engine` (`surya`/`kraken`), `script_type` | jobs | OCR jobs that started after a successful PDF download |
|
||||
| `ocr_pages_total` | Counter | `engine` | pages | Successfully OCR'd pages in the streaming generator |
|
||||
| `ocr_skipped_pages_total` | Counter | — | pages | Pages skipped because the engine raised on them |
|
||||
| `ocr_words_total` | Counter | — | words | Recognized words summed across every block |
|
||||
| `ocr_illegible_words_total` | Counter | — | words | Words below the confidence threshold (rendered as `[unleserlich]`) |
|
||||
| `ocr_processing_seconds` | Histogram | `engine` | seconds | Per-page (stream) or per-document (`/ocr`) engine time, excluding preprocessing |
|
||||
| `ocr_training_runs_total` | Counter | `kind` (`recognition`/`segmentation`), `outcome` (`success`/`error`) | runs | Completed training runs |
|
||||
| `ocr_model_accuracy` | Gauge | `kind` | ratio (0–1) | Latest accuracy reported by a successful training run |
|
||||
| `ocr_models_ready` | Gauge | — | 0\|1 | 1 once the lifespan startup has finished loading models |
|
||||
|
||||
Canonical example queries (the same ones referenced in issue #652):
|
||||
|
||||
```promql
|
||||
# OCR throughput by engine
|
||||
sum by (engine) (rate(ocr_pages_total[5m]))
|
||||
|
||||
# Share of words rendered as [unleserlich]
|
||||
sum(rate(ocr_illegible_words_total[5m]))
|
||||
/ sum(rate(ocr_words_total[5m]))
|
||||
|
||||
# p95 page processing time per engine
|
||||
histogram_quantile(0.95, sum by (engine, le) (
|
||||
rate(ocr_processing_seconds_bucket[5m])
|
||||
))
|
||||
|
||||
# Training error rate
|
||||
sum(rate(ocr_training_runs_total{outcome="error"}[1h]))
|
||||
/ sum(rate(ocr_training_runs_total[1h]))
|
||||
|
||||
# Latest recognition vs segmentation accuracy
|
||||
ocr_model_accuracy
|
||||
```
|
||||
|
||||
### Internal-only endpoints
|
||||
|
||||
`/metrics` is exposed by the OCR service over plain HTTP without
|
||||
authentication. The container is reachable only on the internal Docker
|
||||
network — Caddy never proxies to it directly. If the service is ever
|
||||
exposed (e.g. a `ports:` mapping is added), block the endpoint at the
|
||||
reverse proxy:
|
||||
|
||||
```caddy
|
||||
ocr.example.com {
|
||||
@internal_only path /metrics /health
|
||||
respond @internal_only 404
|
||||
reverse_proxy ocr:8000
|
||||
}
|
||||
```
|
||||
|
||||
The `MetricsPathFilter` in `ocr-service/main.py` suppresses uvicorn's
|
||||
**stdout** access log lines for `/metrics` and `/health` so the container
|
||||
console stays focused on real OCR traffic. Promtail/Loki still receive
|
||||
access lines from any other source. Treat the filter as console
|
||||
noise-control, not an audit-suppression mechanism.
|
||||
|
||||
## Errors (GlitchTip)
|
||||
|
||||
GlitchTip receives errors from both the backend (via Sentry Java SDK) and the frontend (via Sentry JavaScript SDK). It groups events by fingerprint, tracks first/last seen times, and links to the release that introduced the error.
|
||||
|
||||
@@ -104,12 +104,3 @@ source.
|
||||
because `@WebMvcTest` slices exclude `JacksonAutoConfiguration`. The response
|
||||
only serialises a fixed String key (`"code"`) so naming strategy and custom
|
||||
modules are irrelevant.
|
||||
- IP extraction uses `HttpServletRequest.getRemoteAddr()`. In deployments behind
|
||||
a reverse proxy the `X-Forwarded-For` header is not trusted — doing so would
|
||||
let clients spoof their IP and trivially bypass the per-IP limit. Trusting
|
||||
proxy headers requires separate work (e.g. Spring's `ForwardedHeaderFilter`
|
||||
with an allowlist of trusted proxy addresses).
|
||||
- IPv6 and IPv4-mapped addresses (e.g. `::ffff:1.2.3.4`) are not normalised to
|
||||
a canonical form. An attacker with access to multiple IPv6 addresses could
|
||||
rotate addresses to bypass the per-IP bucket. This is a known limitation of
|
||||
address-based rate limiting and is acceptable for the current deployment.
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
# ADR-022 — EAGER→LAZY Fetch Strategy for Document Collections
|
||||
|
||||
**Date:** 2026-05-18
|
||||
**Status:** Accepted
|
||||
**Issue:** #467
|
||||
**PR:** #622
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
A pre-production query audit of 24 HTTP requests to the document list and detail endpoints
|
||||
produced **2,733 SQL statements** — primarily N+1 queries caused by `FetchType.EAGER` on
|
||||
`Document.receivers`, `Document.tags`, `Document.trainingLabels`, and `Document.sender`.
|
||||
|
||||
With EAGER fetch, every `Document` loaded by any repository method immediately triggers
|
||||
additional `SELECT` statements for each associated collection, regardless of whether the
|
||||
caller needs those associations. For a list of 100 documents, this means up to 400 extra
|
||||
queries for `receivers` alone.
|
||||
|
||||
---
|
||||
|
||||
## Decision
|
||||
|
||||
Switch all four associations to `FetchType.LAZY` and use a two-tier strategy to load exactly
|
||||
what each code path needs:
|
||||
|
||||
**Tier 1 — Named entity graphs on `Document` + `@EntityGraph` overrides on `DocumentRepository`:**
|
||||
|
||||
- `Document.full` — loads `sender`, `receivers`, `tags` — used by `findById` (detail view)
|
||||
- `Document.list` — loads `sender`, `tags` — used by `findAll(Spec, Pageable)`,
|
||||
`findAll(Spec)`, and `findAll(Pageable)` (list/search/dashboard paths)
|
||||
|
||||
Each repository method that is called from a hot code path has an `@EntityGraph` override
|
||||
that declares exactly which associations to JOIN-fetch, collapsing N+1 into 1–2 queries.
|
||||
|
||||
**Tier 2 — `@BatchSize(50)` fallback on all four associations:**
|
||||
|
||||
For any lazy access path not covered by an entity graph (e.g., a future ad-hoc query or an
|
||||
in-memory sort that touches `trainingLabels`), Hibernate batches the secondary `SELECT` to
|
||||
at most one statement per 50 entities instead of one per entity.
|
||||
|
||||
**Session lifetime for post-return lazy access:**
|
||||
|
||||
`getDocumentById` and `getRecentActivity` return entities to callers that may access lazy
|
||||
associations after the repository call returns. Both methods are annotated
|
||||
`@Transactional(readOnly = true)` to keep the Hibernate session open until the service method
|
||||
returns, making those post-return accesses safe.
|
||||
|
||||
This is an intentional exception to the project convention that read methods are not annotated
|
||||
(see `CLAUDE.md §Services`). The convention remains correct for all other read methods; this
|
||||
exception applies only to methods that serve lazy-initialized associations to their callers.
|
||||
|
||||
---
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### `@BatchSize`-only (no entity graphs)
|
||||
|
||||
`@BatchSize(50)` on all associations would eliminate the worst N+1 cases (100 documents → 2
|
||||
batch queries instead of 100 individual queries) without requiring repository overrides. Simpler
|
||||
to maintain — no named graph definitions, no per-method overrides.
|
||||
|
||||
Rejected because batch loading is best-effort: it depends on what Hibernate happens to find in
|
||||
the first-level cache and produces a variable number of statements. Entity graphs produce a
|
||||
deterministic, verifiable statement count that can be asserted in tests. The query-count test
|
||||
suite (`DocumentRepositoryTest`) validates the exact statement bounds on every CI run.
|
||||
|
||||
### Single unified entity graph (`Document.full` everywhere)
|
||||
|
||||
Loading `receivers` on every list query is wasteful — the document list view only needs
|
||||
`sender` and `tags`. `receivers` is a `@ManyToMany` collection that, when JOIN-fetched together
|
||||
with `tags`, forces Hibernate to split into two queries anyway (to avoid Cartesian product).
|
||||
Using a single graph on list paths would load data the UI does not display.
|
||||
|
||||
Rejected in favour of two graphs with distinct scopes: `Document.list` for list paths
|
||||
(sender + tags), `Document.full` for detail paths (sender + receivers + tags).
|
||||
|
||||
### `@Transactional` on the Spring Data repository methods
|
||||
|
||||
Spring Data allows `@Transactional` on repository interfaces directly. This would keep the
|
||||
session open for all calls to those methods without touching the service layer.
|
||||
|
||||
Rejected because the transaction boundary belongs at the service layer — repositories should
|
||||
not own transaction lifecycle. The service methods are the natural scope for "keep the session
|
||||
open long enough for the caller to use the result."
|
||||
|
||||
---
|
||||
|
||||
## Consequences
|
||||
|
||||
- **Query count reduced from ~2,733 to ≤10 statements per 24 HTTP requests** — verified by
|
||||
`DocumentRepositoryTest` query-count assertions and `DocumentLazyLoadingTest` smoke tests.
|
||||
- **Read methods that return lazily-initialized entities must carry `@Transactional(readOnly = true)`.**
|
||||
Any future service method that loads a `Document` and returns it to a caller that accesses
|
||||
lazy associations must follow this pattern. Removing the annotation causes
|
||||
`LazyInitializationException` in production.
|
||||
- **New lazy code paths need an entity graph or `@BatchSize` review.** Any new
|
||||
`DocumentRepository` method added to a hot code path should be assessed for N+1 risk and
|
||||
given an `@EntityGraph` override if warranted.
|
||||
- **`@JsonIgnoreProperties({"hibernateLazyInitializer", "handler"})` required on serialized lazy-proxy entities.**
|
||||
`Person` and `Tag` carry this annotation to prevent Jackson from attempting to serialize
|
||||
Hibernate proxy internals when the association is not initialized. Any new entity that is
|
||||
used as a lazy association and serialized directly (without a DTO) needs the same annotation.
|
||||
- **Named graph strings in `Document.java` and `DocumentRepository.java` must stay in sync.**
|
||||
The `@NamedEntityGraph(name = "Document.full")` / `@NamedEntityGraph(name = "Document.list")`
|
||||
definitions on `Document` are referenced by string in every `@EntityGraph(value = "...")` on
|
||||
`DocumentRepository`. If the names diverge (e.g. a graph is renamed in one place but not the
|
||||
other), Spring Data throws at application startup. Always update both files together when
|
||||
renaming or restructuring a named graph.
|
||||
@@ -1,94 +0,0 @@
|
||||
# ADR-023: Prometheus Instrumentator and Metrics Registry Injection
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Until issue #652 the OCR service exposed no `/metrics` endpoint. The
|
||||
observability stack already scrapes the Spring Boot backend's actuator
|
||||
endpoint, but it had nothing to scrape on the Python side. Without HTTP-
|
||||
and domain-level metrics from `ocr-service` we cannot answer questions
|
||||
like "what is the share of words rendered as `[unleserlich]`" or
|
||||
"is the training error rate above its budget" from Grafana.
|
||||
|
||||
Two implementation requirements influenced the design:
|
||||
|
||||
1. **Counter / gauge isolation in tests.** `prometheus_client` collectors
|
||||
are module-level singletons keyed by name on the global `REGISTRY`.
|
||||
Re-importing or naively re-instantiating them raises a duplicated-
|
||||
collector error and cross-test state leaks (a `.inc()` in test A is
|
||||
still readable by test B). A test harness needs a way to swap the
|
||||
active container for a fresh per-test instance.
|
||||
|
||||
2. **Minimal blast radius on the request path.** We did not want to
|
||||
hand-instrument every endpoint with FastAPI middleware. The
|
||||
`prometheus-fastapi-instrumentator` library already provides
|
||||
`http_requests_total`, `http_request_duration_seconds`, and the
|
||||
`/metrics` exposition route, all idiomatic Prometheus names.
|
||||
|
||||
## Decision
|
||||
|
||||
- Add `prometheus-fastapi-instrumentator==7.0.0` and pin its transitive
|
||||
dependency `prometheus-client==0.25.0` explicitly in
|
||||
`ocr-service/requirements.txt`.
|
||||
- Mount the instrumentator once at module load:
|
||||
`Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)`.
|
||||
This adds `/metrics` and an HTTP-level dashboard surface without
|
||||
changing any endpoint code.
|
||||
- Define every domain metric (`ocr_jobs_total`, `ocr_pages_total`,
|
||||
`ocr_processing_seconds`, …) inside a `build_metrics(registry)`
|
||||
factory in `ocr-service/metrics.py` that returns a frozen `OcrMetrics`
|
||||
dataclass. Production code binds the container to the default
|
||||
`REGISTRY` once: `metrics: OcrMetrics = build_metrics(REGISTRY)`.
|
||||
- Tests use a `fresh_metrics` fixture that builds a new
|
||||
`CollectorRegistry()` per test and monkeypatches `main.metrics` with
|
||||
a container bound to it. The endpoint code keeps reading
|
||||
`metrics.<name>` without knowing whether it is talking to the global
|
||||
registry or a per-test one.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive**
|
||||
|
||||
- One reusable factory captures the metric definitions; future metrics
|
||||
go in one place.
|
||||
- Tests run with full counter isolation. Cross-test state leakage is
|
||||
impossible because each test sees its own dataclass instance.
|
||||
- The instrumentator gives us `http_*` metrics for free, including a
|
||||
Grafana-ready histogram that pairs with the Spring Boot one.
|
||||
|
||||
**Negative**
|
||||
|
||||
- One extra level of indirection: any test that asserts on metric
|
||||
values must remember to monkeypatch `main.metrics`, not the registry
|
||||
directly. Rebinding through the registry is harmless but useless —
|
||||
the dataclass holds references to the original collectors.
|
||||
- `prometheus-client` is now pinned. Upgrading it requires an explicit
|
||||
bump and re-checking the instrumentator's compatibility range.
|
||||
- `/metrics` is exposed unauthenticated and relies on the Docker
|
||||
internal network for confidentiality. See
|
||||
[docs/OBSERVABILITY.md §Internal-only endpoints](../OBSERVABILITY.md)
|
||||
for the Caddy snippet that must be added if the service ever gets a
|
||||
host-side port mapping.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **Hand-roll the `/metrics` endpoint.** Rejected: would have meant
|
||||
duplicating what `prometheus-fastapi-instrumentator` ships, plus
|
||||
middleware for the HTTP histograms.
|
||||
- **Skip the factory; pass `registry` as a function argument
|
||||
everywhere.** Rejected: clutters every endpoint signature and breaks
|
||||
the symmetry with the Spring Boot side, which also relies on a
|
||||
process-global Micrometer registry.
|
||||
- **Use a `pytest` autouse fixture that resets `REGISTRY` between
|
||||
tests.** Rejected: `prometheus_client` does not expose a clean
|
||||
"unregister all" hook, and we would be relying on private APIs.
|
||||
|
||||
## References
|
||||
|
||||
- Issue: [#652](https://git.raddatz.cloud/marcel/familienarchiv/issues/652)
|
||||
- Library: <https://github.com/trallnag/prometheus-fastapi-instrumentator>
|
||||
- Code: `ocr-service/metrics.py`, `ocr-service/main.py`,
|
||||
`ocr-service/test_metrics.py`
|
||||
@@ -1,123 +0,0 @@
|
||||
# ADR-024: Grafana reads archive-db via a bridged network and a SELECT-only role
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Issue #651 (the PO Overview Grafana dashboard) needs aggregates over three
|
||||
tables in the main application database — `audit_log`, `documents`, and
|
||||
`transcription_blocks` — to answer the operator's four weekly questions: is
|
||||
everything working, are people using it, is the archive making progress, is
|
||||
OCR working well.
|
||||
|
||||
Until now, `obs-grafana` and the rest of the observability stack lived on
|
||||
their own Docker network (`obs-net`) and never touched `archiv-net`, where
|
||||
`archive-db` runs. The two were intentionally isolated: a compromise of any
|
||||
observability container could not pivot to the application database.
|
||||
|
||||
The PO Overview's archive-progress and user-activity panels need rolling
|
||||
7-day SQL aggregates that cannot be served by Prometheus or Loki. That
|
||||
forces a connection from `obs-grafana` to `archive-db` for the first time.
|
||||
|
||||
Two implementation requirements shaped the design:
|
||||
|
||||
1. **Least privilege on the database side.** The Spring Boot application
|
||||
role (`archiv`) has full read/write on every table. Letting Grafana
|
||||
connect with that role would mean a Grafana compromise becomes an
|
||||
application compromise. The dashboard only needs SELECT on three
|
||||
tables; the role must reflect that and nothing more.
|
||||
|
||||
2. **Operational simplicity of secret rotation.** The role's password is
|
||||
shared between the migration that sets it and the Grafana datasource
|
||||
that uses it. A first version of this work put the password in a
|
||||
versioned Flyway migration (V68), which Flyway only applies once —
|
||||
leaving rotation as an out-of-band `psql ALTER ROLE` step that no
|
||||
runbook documented. The shape must support rotation without manual
|
||||
SQL.
|
||||
|
||||
## Decision
|
||||
|
||||
- Provision a dedicated PostgreSQL role `grafana_reader` with `LOGIN` plus
|
||||
`GRANT SELECT` on `audit_log`, `documents`, `transcription_blocks` only.
|
||||
No INSERT/UPDATE/DELETE on any table, no access to any other table —
|
||||
enforced by the database, locked in by both positive and parameterized
|
||||
negative tests in `GrafanaReaderRoleIntegrationTest`.
|
||||
- Split the role's lifecycle across two migrations:
|
||||
- `V68__add_grafana_reader_role.sql` — versioned, immutable, idempotent.
|
||||
Creates the role and applies the grants. Runs exactly once per
|
||||
database, like every other versioned migration.
|
||||
- `R__grafana_reader_password.sql` — Flyway *repeatable* migration that
|
||||
issues `ALTER ROLE grafana_reader WITH PASSWORD '${grafanaDbPassword}'`.
|
||||
Flyway computes the checksum on the resolved content, so any change
|
||||
to `GRAFANA_DB_PASSWORD` flips the checksum and re-applies the
|
||||
migration on the next boot. Rotation becomes "bump env var, restart
|
||||
backend, restart obs-grafana" — see the runbook in
|
||||
`docs/DEPLOYMENT.md §4 → Rotate the grafana_reader DB password`.
|
||||
- Resolve the password through Spring's `Environment` rather than a raw
|
||||
`System.getenv()` call, so tests inject via `application.properties`
|
||||
and the resolver is unit-testable with `MockEnvironment`. Fail closed
|
||||
with `IllegalStateException` when the variable is unset — no fallback
|
||||
string. Same shape as `UserDataInitializer`'s refusal to seed default
|
||||
admin credentials outside dev/test/e2e.
|
||||
- Join `obs-grafana` to `archiv-net` in addition to `obs-net`. Only the
|
||||
Grafana container crosses the boundary; Loki, Tempo, Prometheus,
|
||||
GlitchTip, and the worker containers remain `obs-net`-only.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive**
|
||||
|
||||
- Database-level least privilege: a Grafana compromise gains SELECT on
|
||||
three tables. Cannot write, cannot read PII tables like `app_users`,
|
||||
`persons`, `notifications`, `document_comments`, `geschichten`. The
|
||||
parameterized PII negative sweep in `GrafanaReaderRoleIntegrationTest`
|
||||
is the regression gate; new sensitive tables get added to that list.
|
||||
- Rotation is documented, idempotent, and survives operator turnover.
|
||||
No "the password set on day 1 is the password forever" failure mode.
|
||||
- Tests pin down both sides of the boundary: positive grants must hold,
|
||||
write-deny must hold, and the PII negative list must stay empty.
|
||||
|
||||
**Negative / trade-offs**
|
||||
|
||||
- `obs-net` is no longer fully isolated from `archiv-net`. A Grafana RCE
|
||||
(e.g. via a future Grafana CVE) gains a TCP path to `archive-db` —
|
||||
contained, but not impossible. The least-privilege role is the
|
||||
mitigation; we accept that mitigation as sufficient for a single
|
||||
bridged container.
|
||||
- The backend must hold `GRAFANA_DB_PASSWORD` in its environment forever,
|
||||
so Flyway can resolve the placeholder on every boot. A backend RCE
|
||||
therefore also leaks the Grafana datasource password. Acceptable
|
||||
because that password's blast radius is itself bounded by the
|
||||
least-privilege grants on `grafana_reader`.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **Prometheus PostgreSQL exporter, no direct connection.** Loses ad-hoc
|
||||
SQL aggregates — the dashboard would need every metric pre-defined as
|
||||
an exporter query, with a redeploy to add a new one. The PO Overview
|
||||
is the type of dashboard that grows panels over time; pre-defining
|
||||
every aggregate is the wrong shape.
|
||||
- **Read replica or logical-replication slot dedicated to Grafana.**
|
||||
Real operational cost (extra Postgres instance, replication monitoring,
|
||||
storage doubled) disproportionate to a weekly PO glance.
|
||||
- **Versioned migration with `flyway repair` for rotation.** Rejected:
|
||||
conflates schema lifecycle with credential lifecycle, requires manual
|
||||
intervention to rotate, and the repair command's semantics are
|
||||
surprising to operators unfamiliar with Flyway internals.
|
||||
- **Hardcoded fallback password when env var is unset.** Rejected as a
|
||||
security blocker: publishes a known credential for a role with read
|
||||
access to user activity and full letter text. The fail-closed
|
||||
behavior is the explicit defense.
|
||||
|
||||
## References
|
||||
|
||||
- Issue #651 — PO Overview Grafana dashboard
|
||||
- `backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql`
|
||||
- `backend/src/main/resources/db/migration/R__grafana_reader_password.sql`
|
||||
- `backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java`
|
||||
- `backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java`
|
||||
- `infra/observability/grafana/provisioning/datasources/datasources.yml`
|
||||
- `docker-compose.observability.yml` — `archiv-net` bridge on `obs-grafana`
|
||||
- `docs/DEPLOYMENT.md §4` — rotation runbook
|
||||
@@ -1,150 +0,0 @@
|
||||
# ADR-025 — Canonical Import Output as Contract & Single-Migration Schema Foundation
|
||||
|
||||
**Date:** 2026-05-27
|
||||
**Status:** Accepted
|
||||
**Issue:** #671 (schema, decisions 1–2); #669 (importer architecture, decision 3)
|
||||
**Milestone:** Handling the Unknowns — honest uncertainty in dates & people
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
The "Handling the Unknowns" milestone introduces honest uncertainty into the archive:
|
||||
documents whose dates are known only approximately or as a range, and people the importer
|
||||
infers from raw attribution text but cannot confidently identify. Three sibling issues —
|
||||
date precision (#666), name triage (#665), and the importer (#669) — each independently
|
||||
planned a Flyway `V69` migration that altered `persons`. Three `V69`s is a boot failure
|
||||
(Flyway versions must be unique), and `persons.provisional` was at risk of being defined
|
||||
twice.
|
||||
|
||||
Two durable decisions had to be made before any application code in Phases 3–6 could
|
||||
compile against the new schema.
|
||||
|
||||
---
|
||||
|
||||
## Decision
|
||||
|
||||
### 1. All import/precision/attribution/identity schema lives in ONE migration with a single owner
|
||||
|
||||
`V69__import_precision_attribution_identity_schema.sql` adds every new column for this
|
||||
milestone in a single, atomic, forward-only migration:
|
||||
|
||||
- `documents`: `meta_date_precision` (backfilled `DAY` where dated / `UNKNOWN` where not,
|
||||
then `NOT NULL`), `meta_date_end`, `meta_date_raw`, `sender_text`, `receiver_text`.
|
||||
- `persons`: `source_ref` (unique index, nullable), `provisional` (`NOT NULL DEFAULT false`).
|
||||
- `tag`: `source_ref` (unique index, nullable).
|
||||
|
||||
Integrity is pushed to the database as fail-closed `CHECK` constraints (the precedent is
|
||||
`V22`'s `person_type` allowlist):
|
||||
|
||||
- `meta_date_precision` must be one of the seven enum values.
|
||||
- `meta_date_end` may be non-null **only** when precision = `RANGE` (one-directional, not
|
||||
biconditional — see Consequences).
|
||||
- `meta_date_end >= meta_date` for ranges with both endpoints (a `CHECK`, not a trigger).
|
||||
- `meta_date_raw`, `sender_text`, `receiver_text` are length-capped at 10 000 (mirrors the
|
||||
`transcription_blocks` cap in `V18`).
|
||||
|
||||
No sibling issue adds another migration that alters `persons` or `documents` in this
|
||||
milestone.
|
||||
|
||||
### 2. The backend `DatePrecision` enum is a verbatim mirror of the normalizer's `Precision`; the canonical output is the contract
|
||||
|
||||
The importer reads the Python normalizer's canonical output
|
||||
(`tools/import-normalizer/`). The backend `DatePrecision` enum
|
||||
(`DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`) is a verbatim copy of the normalizer's
|
||||
`Precision(StrEnum)` (`dates.py`). There is **no translation layer**: the normalizer's
|
||||
output strings are persisted as-is. The same applies to `source_ref`, which carries the
|
||||
normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key.
|
||||
|
||||
### 3. The importer is four idempotent loaders over the canonical artifacts; Java no longer parses the raw spreadsheet (Phase 3, #669)
|
||||
|
||||
The legacy `MassImportService` read the *raw* original spreadsheet by positional column
|
||||
index (`@Value app.import.col.*`) and re-derived everything in Java (ISO-only date parsing,
|
||||
name classification via `findOrCreateByAlias`, an ODS/XXE XML path). It is **deleted**.
|
||||
|
||||
The rebuild is a `CanonicalImportOrchestrator` driving four single-responsibility loaders in
|
||||
an explicit dependency DAG — `TagTreeImporter` → `PersonRegisterImporter` →
|
||||
`PersonTreeImporter` → `DocumentImporter` — that **consume the committed canonical artifacts**
|
||||
(`tools/import-normalizer/out/`). A shared `CanonicalSheetReader` maps columns **by header
|
||||
name** (not by index) and fails closed (`IMPORT_ARTIFACT_INVALID`) on a missing header. Each
|
||||
loader calls the **owning domain's service**, never a repository (layering rule); the tree
|
||||
loader uses `RelationshipService`, never the relationship repository.
|
||||
|
||||
Settled sub-decisions:
|
||||
|
||||
- **Idempotency precedence is domain-specific.** Persons/tags upsert by `source_ref`,
|
||||
documents by `index`. Two distinct rules apply:
|
||||
- **Person/Tag scalar fields = preserve human edits.** On re-import a non-blank field a human
|
||||
changed in-app is never overwritten (blank fields are filled from canonical via the single
|
||||
`preferHuman` idiom), and `provisional` is monotonic-downward — once a human confirms a
|
||||
person (`false`) it never reverts to `true`. Because the orchestrator loads the register and
|
||||
tree *before* documents, a person already `false` can never be flipped provisional by a
|
||||
later document row that references the same `source_ref`, regardless of document-row order.
|
||||
- **Document sender/receivers/tags = canonical-authoritative.** A document's sender, receiver
|
||||
set, and tag set are owned by the canonical row, not the archivist. On re-import of a
|
||||
PLACEHOLDER document `DocumentImporter` clears and re-populates `receivers`/`tags` so a row
|
||||
whose set *shrinks* prunes the removed links rather than accumulating stale ones. The
|
||||
"preserve human edits" rule above does **not** extend to these collections. The raw
|
||||
`sender_text`/`receiver_text` cells are always retained verbatim (a separate invariant).
|
||||
Note non-PLACEHOLDER documents are skipped entirely (`ALREADY_EXISTS`), so once a document
|
||||
has a file the importer never touches it again — this bounds the authoritative-overwrite
|
||||
blast radius to placeholder rows.
|
||||
Verified against real Postgres in `CanonicalImportIntegrationTest`
|
||||
(`reimport_preservesHumanEditedPersonField`, `reimport_prunesRemovedReceiverAndTag…`,
|
||||
`import_neverFlipsRegisterPersonToProvisional…`).
|
||||
- **Name policy = Option A.** The normalizer resolved attribution upstream: the document sheet
|
||||
carries the resolved slug in `sender_person_id` / `receiver_person_ids` and the raw cell in
|
||||
`sender_name` / `receiver_names`. The importer routes register-first by `source_ref`
|
||||
(provisional `Person` when a slug is unmatched), and **always retains the raw cell** in
|
||||
`sender_text` / `receiver_text` even when a person is linked — the load-bearing invariant
|
||||
behind the merge story. A row with no slug but raw text (prose / `?` / object-noise) links
|
||||
no person and keeps only the raw text.
|
||||
- **`provisional` is now populated.** Importer-minted persons are `provisional = true`;
|
||||
register and tree persons stay `false`. This is the Phase-3 contract the schema (decision 1)
|
||||
left at default-`false`.
|
||||
- **Security guards are defense-in-depth, not upstream-trust.** The `file` column is treated as
|
||||
hostile (CWE-22 does not care it came from our tool): its basename is validated
|
||||
(`isValidImportFilename` — slash/backslash, three Unicode slash homoglyphs, `..`, null byte,
|
||||
absolute path) and resolved only inside the import dir with canonical-path containment, so a
|
||||
traversal value can never escape. The `%PDF` magic-byte check gates upload. These guards and
|
||||
their tests were ported from `MassImportService` **before** it was deleted.
|
||||
|
||||
---
|
||||
|
||||
## Consequences
|
||||
|
||||
- **RANGE is one-directional, not biconditional.** A `RANGE` row may have a null
|
||||
`meta_date_end` (an open-ended range with only a start), because the normalizer can emit
|
||||
start-only ranges. A biconditional `RANGE ⟺ end IS NOT NULL` rule would reject valid
|
||||
normalizer output, so it was rejected. Phase 4 rendering must handle a `RANGE` with no end
|
||||
gracefully.
|
||||
- **`provisional` stays `false` throughout this phase.** The column and flag exist, but no
|
||||
code path sets it `true`; the importer (Phase 3) is the only writer. This is intentional,
|
||||
not a half-built feature.
|
||||
- **A future dev must not "improve" the enum.** Renaming or dropping a `DatePrecision` value
|
||||
without changing the normalizer silently breaks import idempotency and date rendering. The
|
||||
enum's Javadoc states this; the DB `CHECK` enforces validity independent of the Java enum.
|
||||
- **`source_ref` is unique + nullable.** Manually created persons/tags have `source_ref =
|
||||
NULL`; Postgres allows multiple NULLs under a plain unique index, so no backfill is needed.
|
||||
- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix
|
||||
goes in a later version. There is no down-migration — rollback means restoring from the
|
||||
nightly `pg_dump`, the standard procedure.
|
||||
- **`runImport()` is non-transactional — per-loader transactions only.** The orchestrator
|
||||
does not wrap the four loaders in a single transaction; each loader (or the per-call
|
||||
`upsertBySourceRef` / `DocumentImporter.load`) carries its own `@Transactional` boundary. A
|
||||
partial failure mid-run (e.g. the document loader throws after tags + persons committed)
|
||||
leaves the earlier loaders' data committed and the `ImportStatus` set to `FAILED`. This is
|
||||
acceptable precisely because the import is idempotent: re-running is safe and converges to
|
||||
the same state, so the operational recovery for a partial failure is simply to fix the
|
||||
offending artifact and re-trigger the import — no manual cleanup of half-written data is
|
||||
required. A future maintainer must not assume all-or-nothing semantics.
|
||||
- **Path-escape aborts the whole import (fail-closed), by design.** A path-traversal or
|
||||
symlink-escape in a row's file path is treated as an attack signal: the import aborts rather
|
||||
than recording the row as a `SkippedFile` and continuing. This is a deliberate owner decision
|
||||
(2026-05-27) over a per-file skip — a malicious path must surface loudly, not be silently
|
||||
tolerated.
|
||||
- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native
|
||||
interface projection; because the projection is backed by native SQL, the column had to be
|
||||
added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`,
|
||||
`findTopByDocumentCount`) or it would silently return `false`. Guarded by integration tests
|
||||
against real Postgres.
|
||||
@@ -43,12 +43,9 @@ Rel(ocr, storage, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
|
||||
Rel(mc, storage, "Bootstraps bucket + service account on startup", "MinIO Client CLI")
|
||||
Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API")
|
||||
Rel(backend, tempo, "Sends distributed traces via OTLP", "HTTP / OTLP / port 4318 (archiv-net)")
|
||||
Rel(prometheus, backend, "Scrapes JVM + HTTP metrics", "HTTP 8081 /actuator/prometheus")
|
||||
Rel(prometheus, ocr, "Scrapes OCR + http_* metrics", "HTTP 8000 /metrics")
|
||||
Rel(grafana, prometheus, "Queries metrics", "HTTP 9090")
|
||||
Rel(grafana, loki, "Queries logs", "HTTP 3100")
|
||||
Rel(grafana, tempo, "Queries traces", "HTTP 3200")
|
||||
Rel(grafana, db, "Read-only dashboard queries via grafana_reader role", "PostgreSQL / archiv-net")
|
||||
Rel(glitchtip, db, "Stores error events in glitchtip DB", "PostgreSQL / archiv-net")
|
||||
Rel(obs_glitchtip_worker, obs_redis, "Processes Celery tasks", "Redis / obs-net")
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
@startuml
|
||||
!include <C4/C4_Component>
|
||||
|
||||
title Component Diagram: API Backend — Document Management & Canonical Import
|
||||
title Component Diagram: API Backend — Document Management & Import
|
||||
|
||||
Container(frontend, "Web Frontend", "SvelteKit")
|
||||
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
|
||||
@@ -9,50 +9,30 @@ ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
|
||||
|
||||
System_Boundary(backend, "API Backend (Spring Boot)") {
|
||||
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, conversation thread, batch metadata updates, and per-month density aggregation for the timeline filter widget.")
|
||||
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).")
|
||||
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers asynchronous Excel/ODS mass import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).")
|
||||
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
|
||||
Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.")
|
||||
Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.")
|
||||
Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).")
|
||||
Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).")
|
||||
Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.")
|
||||
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds an honest precision-aware title via DocumentTitleFormatter, keeps the S3 upload + thumbnail plumbing, and ports the path-traversal / homoglyph / absolute-path / %PDF magic-byte security guards.")
|
||||
Component(titleFmt, "DocumentTitleFormatter", "Pure helper", "Formats the date label baked into an import title at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).")
|
||||
Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.")
|
||||
Component(massImport, "MassImportService", "Spring Service — @Async", "Reads Excel/ODS files from /import mount. Tracks import state (IDLE/RUNNING/DONE/FAILED) and delegates to ExcelService. Returns immediately; processing runs asynchronously.")
|
||||
Component(excelSvc, "ExcelService", "Spring Service", "Parses Excel/ODS workbooks (Apache POI). Column indices configurable via application.properties. Creates/updates document records per row.")
|
||||
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
|
||||
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, bidirectional conversation thread queries, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
|
||||
Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).")
|
||||
}
|
||||
|
||||
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Resolves sender / receiver persons by ID; upserts persons by source_ref for the importer.")
|
||||
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Finds or creates tags by name; upserts tags by source_ref for the importer.")
|
||||
Component(relSvc, "RelationshipService", "Spring Service", "See diagram 3e. Creates family relationships from the person tree during import.")
|
||||
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Called by DocumentService to resolve sender / receiver persons by ID.")
|
||||
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Called by DocumentService to find or create tags by name.")
|
||||
|
||||
Rel(frontend, docCtrl, "Document requests", "HTTP / JSON")
|
||||
Rel(frontend, adminCtrl, "Trigger import", "HTTP / JSON")
|
||||
Rel(docCtrl, docSvc, "Delegates to")
|
||||
Rel(adminCtrl, importOrch, "Triggers")
|
||||
Rel(adminCtrl, massImport, "Triggers")
|
||||
Rel(docSvc, fileSvc, "Upload / download files")
|
||||
Rel(docSvc, docRepo, "Reads / writes documents")
|
||||
Rel(docSvc, docSpec, "Builds search predicates")
|
||||
Rel(docSvc, personSvc, "Resolves sender / receivers")
|
||||
Rel(docSvc, tagSvc, "Finds or creates tags")
|
||||
Rel(importOrch, tagTreeLoader, "1. Loads tags")
|
||||
Rel(importOrch, personRegLoader, "2. Loads register persons")
|
||||
Rel(importOrch, personTreeLoader, "3. Loads tree persons + relationships")
|
||||
Rel(importOrch, docLoader, "4. Loads documents")
|
||||
Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx")
|
||||
Rel(personRegLoader, sheetReader, "Reads canonical .xlsx")
|
||||
Rel(docLoader, sheetReader, "Reads canonical .xlsx")
|
||||
Rel(docLoader, titleFmt, "Builds honest title date")
|
||||
Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref")
|
||||
Rel(personRegLoader, personSvc, "Upserts persons by source_ref")
|
||||
Rel(personTreeLoader, personSvc, "Upserts persons by source_ref")
|
||||
Rel(personTreeLoader, relSvc, "Creates relationships")
|
||||
Rel(docLoader, docSvc, "Upserts documents by index")
|
||||
Rel(docLoader, personSvc, "Register-first match / provisional person")
|
||||
Rel(docLoader, tagSvc, "Attaches tag by source_ref")
|
||||
Rel(docLoader, fileSvc, "Uploads resolved file")
|
||||
Rel(massImport, excelSvc, "Parses Excel/ODS file")
|
||||
Rel(excelSvc, docSvc, "Creates / updates documents")
|
||||
Rel(minioConf, fileSvc, "Provides S3Client and S3Presigner beans")
|
||||
Rel(fileSvc, minio, "PUT / GET / presigned URL objects", "S3 API / HTTP")
|
||||
Rel(docRepo, db, "SQL queries", "JDBC")
|
||||
|
||||
@@ -7,12 +7,12 @@ Container(frontend, "Web Frontend", "SvelteKit")
|
||||
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
|
||||
|
||||
System_Boundary(backend, "API Backend (Spring Boot)") {
|
||||
Component(personCtrl, "PersonController", "Spring MVC — /api/persons", "Filtered, paginated directory (type/familyOnly/hasDocuments/provisional + page/size -> PersonSearchResult). Returns documents sent/received, correspondent suggestions, person summaries with counts. PATCH /{id}/confirm clears provisional; DELETE /{id} removes a person (both WRITE_ALL).")
|
||||
Component(personCtrl, "PersonController", "Spring MVC — /api/persons", "Lists and searches family members. Returns documents sent by or received by a person, correspondent suggestions, and person summary with document counts.")
|
||||
Component(relCtrl, "RelationshipController", "Spring MVC — /api/network, /api/persons/{id}/relationships", "CRUD for explicit person relationships and the full family network graph (nodes + edges) used by the Stammbaum view.")
|
||||
Component(personSvc, "PersonService", "Spring Service", "Person CRUD, alias management, filtered paged search (PersonFilter -> paired slice/count), confirm (clears provisional), delete (detaches document refs first), and merge operations (reassigns all document sender/receiver references before deleting duplicate persons).")
|
||||
Component(personSvc, "PersonService", "Spring Service", "Person CRUD, alias management, and merge operations (reassigns all document sender/receiver references before deleting duplicate persons).")
|
||||
Component(relSvc, "RelationshipService", "Spring Service", "Manages explicit directional family relationships (PARENT_OF, SPOUSE_OF, SIBLING_OF, etc.) with optional date ranges and notes.")
|
||||
Component(relInference, "RelationshipInferenceService", "Spring Service", "Computes transitive family relationships from explicit edges to infer grandparent/grandchild, aunt/uncle, and other extended-family links for the network graph.")
|
||||
Component(personRepo, "PersonRepository", "Spring Data JPA", "Queries persons with name search (including aliases), correspondent discovery, person summaries with document counts, paired filter-aware slice + COUNT queries (one shared WHERE clause), and merge/reassignment helpers.")
|
||||
Component(personRepo, "PersonRepository", "Spring Data JPA", "Queries persons with name search (including aliases), correspondent discovery, person summaries with document counts, and merge/reassignment helpers.")
|
||||
Component(relRepo, "PersonRelationshipRepository", "Spring Data JPA", "Reads and writes PersonRelationship records. Supports lookup by person ID, by relation type, and existence checks for deduplication.")
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,8 @@ Person(user, "User")
|
||||
Container(backend, "API Backend", "Spring Boot")
|
||||
|
||||
System_Boundary(frontend, "Web Frontend (SvelteKit / SSR)") {
|
||||
Component(personsPage, "/persons and /persons/[id]", "SvelteKit Routes", "Person directory (server-side filtered + paginated) and detail. Directory: type/family/has-documents chips, reader default (familyMember OR documentCount > 0), writer-only show-all toggle. Detail: metadata, document list sent/received, correspondents, family relationships.")
|
||||
Component(personsPage, "/persons and /persons/[id]", "SvelteKit Routes", "Person directory and detail. Detail: metadata, document list sent/received, correspondents, explicit and inferred family relationships.")
|
||||
Component(personEdit, "/persons/[id]/edit and /persons/new", "SvelteKit Routes", "Create and edit person forms. Edit: metadata, aliases, explicit relationships. Actions: PUT/POST /api/persons.")
|
||||
Component(personReview, "/persons/review", "SvelteKit Route", "Transcriber triage view (WRITE-gated link). Lists provisional persons; per-row Merge / Umbenennen / Bestätigen / Löschen. Actions: POST /merge, PUT /{id}, PATCH /{id}/confirm, DELETE /{id}.")
|
||||
Component(briefwechsel, "/briefwechsel", "SvelteKit Route", "Bilateral conversation timeline. Selects two persons via PersonTypeahead, fetches GET /api/documents/conversation, displays chronological exchange.")
|
||||
Component(aktivitaeten, "/aktivitaeten", "SvelteKit Route", "Unified activity feed (Chronik). Loader: GET /api/dashboard/activity and GET /api/notifications?read=false.")
|
||||
Component(geschichten, "/geschichten and /geschichten/[id]", "SvelteKit Routes", "Story list and detail pages. Loader: GET /api/geschichten?status=PUBLISHED.")
|
||||
@@ -20,9 +19,8 @@ System_Boundary(frontend, "Web Frontend (SvelteKit / SSR)") {
|
||||
}
|
||||
|
||||
Rel(user, personsPage, "Browses family members", "HTTPS / Browser")
|
||||
Rel(personsPage, backend, "GET /api/persons (filter + page params -> PersonSearchResult), GET /api/persons/{id}", "HTTP / JSON")
|
||||
Rel(personsPage, backend, "GET /api/persons, GET /api/persons/{id}", "HTTP / JSON")
|
||||
Rel(personEdit, backend, "GET /api/persons/{id}, PUT /api/persons/{id}, POST /api/persons", "HTTP / JSON")
|
||||
Rel(personReview, backend, "GET /api/persons?provisional=true, PATCH /api/persons/{id}/confirm, DELETE /api/persons/{id}, POST /api/persons/{id}/merge", "HTTP / JSON")
|
||||
Rel(briefwechsel, backend, "GET /api/documents/conversation", "HTTP / JSON")
|
||||
Rel(aktivitaeten, backend, "GET /api/dashboard/activity, GET /api/notifications", "HTTP / JSON")
|
||||
Rel(geschichten, backend, "GET /api/geschichten", "HTTP / JSON")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
@startuml db-orm
|
||||
' Schema source: Flyway V1–V69 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V69 (2026-05-27)
|
||||
' Schema source: Flyway V1–V60 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V60 (2026-05-06)
|
||||
' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
|
||||
|
||||
hide circle
|
||||
@@ -88,11 +88,6 @@ package "Documents" {
|
||||
summary : TEXT
|
||||
transcription : TEXT
|
||||
meta_date : DATE
|
||||
meta_date_precision : VARCHAR(16) NOT NULL
|
||||
meta_date_end : DATE
|
||||
meta_date_raw : TEXT
|
||||
sender_text : TEXT
|
||||
receiver_text : TEXT
|
||||
meta_location : VARCHAR(255)
|
||||
meta_document_location : VARCHAR(255)
|
||||
archive_box : VARCHAR(255)
|
||||
@@ -187,8 +182,6 @@ package "Persons" {
|
||||
birth_year : INTEGER
|
||||
death_year : INTEGER
|
||||
family_member : BOOLEAN NOT NULL
|
||||
source_ref : VARCHAR(255) UNIQUE
|
||||
provisional : BOOLEAN NOT NULL
|
||||
}
|
||||
|
||||
entity person_name_aliases {
|
||||
@@ -224,7 +217,6 @@ package "Tags" {
|
||||
name : VARCHAR(255) NOT NULL UNIQUE
|
||||
parent_id : UUID <<FK>>
|
||||
color : VARCHAR(20)
|
||||
source_ref : VARCHAR(255) UNIQUE
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
@startuml db-relationships
|
||||
' Schema source: Flyway V1–V69 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V69 (2026-05-27)
|
||||
' Schema source: Flyway V1–V60 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V60 (2026-05-06)
|
||||
' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
|
||||
' Note: V69 adds columns only (persons.source_ref, tag.source_ref, document
|
||||
' precision/attribution fields); no new FK relationships, so this diagram is unchanged.
|
||||
|
||||
hide circle
|
||||
skinparam linetype ortho
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
{
|
||||
"_comment": "Single source of truth for the honest date-label rule set shared by the TS formatDocumentDate (frontend/src/lib/shared/utils/documentDate.ts) and the Java formatTitleDate (backend importing/DocumentTitleFormatter.java). The 'cases' array holds the GERMAN (de) canonical form and is asserted by BOTH suites — that is the Java<->TS drift guard (en-dash vs hyphen, 'ca.' vs 'circa', season words, range collapse). The Java title formatter intentionally renders German server-side (import titles are always German); only the TS UI formatter is locale-aware, so 'localeCases' (en/es month-name output) is asserted by the TS spec ONLY and must NOT be fed to the Java test. Do not edit one side's expectation without editing this file and the relevant test(s). Season->month mapping note: the Python import normalizer (tools/import-normalizer) is the UPSTREAM authority for which representative month a season maps to (4/7/10/1); both formatters mirror it but it sits OUTSIDE this Java<->TS guard, so a normalizer change is not caught here. See issue #666 and the Markus/Sara drift-guard decision.",
|
||||
"cases": [
|
||||
{
|
||||
"name": "DAY renders a full long date",
|
||||
"precision": "DAY",
|
||||
"anchor": "1943-12-24",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"expected": "24. Dezember 1943"
|
||||
},
|
||||
{
|
||||
"name": "MONTH renders month and year only — never a fabricated day",
|
||||
"precision": "MONTH",
|
||||
"anchor": "1916-06-01",
|
||||
"end": null,
|
||||
"raw": "Juni 1916",
|
||||
"expected": "Juni 1916"
|
||||
},
|
||||
{
|
||||
"name": "SEASON renders the season word from raw",
|
||||
"precision": "SEASON",
|
||||
"anchor": "1916-06-01",
|
||||
"end": null,
|
||||
"raw": "Sommer 1916",
|
||||
"expected": "Sommer 1916"
|
||||
},
|
||||
{
|
||||
"name": "SEASON with null raw derives the season from the anchor month",
|
||||
"precision": "SEASON",
|
||||
"anchor": "1916-04-01",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"expected": "Frühling 1916"
|
||||
},
|
||||
{
|
||||
"name": "YEAR renders the year only — suppresses month and day",
|
||||
"precision": "YEAR",
|
||||
"anchor": "1916-06-15",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"expected": "1916"
|
||||
},
|
||||
{
|
||||
"name": "APPROX renders a ca. prefix before the year",
|
||||
"precision": "APPROX",
|
||||
"anchor": "1920-01-01",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"expected": "ca. 1920"
|
||||
},
|
||||
{
|
||||
"name": "RANGE in the same month collapses the shared month and year",
|
||||
"precision": "RANGE",
|
||||
"anchor": "1917-01-10",
|
||||
"end": "1917-01-11",
|
||||
"raw": null,
|
||||
"expected": "10.–11. Jan. 1917"
|
||||
},
|
||||
{
|
||||
"name": "RANGE across months expands both months, sharing the year",
|
||||
"precision": "RANGE",
|
||||
"anchor": "1917-01-30",
|
||||
"end": "1917-02-02",
|
||||
"raw": null,
|
||||
"expected": "30. Jan. – 2. Feb. 1917"
|
||||
},
|
||||
{
|
||||
"name": "RANGE across a year boundary expands both full dates",
|
||||
"precision": "RANGE",
|
||||
"anchor": "1916-12-30",
|
||||
"end": "1917-01-02",
|
||||
"raw": null,
|
||||
"expected": "30. Dez. 1916 – 2. Jan. 1917"
|
||||
},
|
||||
{
|
||||
"name": "RANGE where end equals start collapses to a single day",
|
||||
"precision": "RANGE",
|
||||
"anchor": "1917-01-10",
|
||||
"end": "1917-01-10",
|
||||
"raw": null,
|
||||
"expected": "10. Jan. 1917"
|
||||
},
|
||||
{
|
||||
"name": "RANGE with a null end renders an open-range indicator, never a fabricated end",
|
||||
"precision": "RANGE",
|
||||
"anchor": "1917-01-10",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"expected": "ab 10. Jan. 1917"
|
||||
},
|
||||
{
|
||||
"name": "UNKNOWN renders the unknown label regardless of anchor",
|
||||
"precision": "UNKNOWN",
|
||||
"anchor": null,
|
||||
"end": null,
|
||||
"raw": "?",
|
||||
"expected": "Datum unbekannt"
|
||||
}
|
||||
],
|
||||
"localeComment": "TS-only locale parity for the read path (the younger phone audience may use en/es). Asserted ONLY by documentDate.spec.ts — the Java title formatter is German-only by design, so these MUST NOT be fed to DocumentTitleFormatterTest. Each case pins the localized month-name output for DAY and MONTH so a locale regression (e.g. a future de-DE hard-coding) is caught by the drift table, not just by ad-hoc tests.",
|
||||
"localeCases": [
|
||||
{
|
||||
"name": "DAY in English renders the English month name",
|
||||
"precision": "DAY",
|
||||
"anchor": "1943-12-24",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"locale": "en",
|
||||
"expected": "December 24, 1943"
|
||||
},
|
||||
{
|
||||
"name": "DAY in Spanish renders the Spanish month name",
|
||||
"precision": "DAY",
|
||||
"anchor": "1943-12-24",
|
||||
"end": null,
|
||||
"raw": null,
|
||||
"locale": "es",
|
||||
"expected": "24 de diciembre de 1943"
|
||||
},
|
||||
{
|
||||
"name": "MONTH in English renders the English month name, never a day",
|
||||
"precision": "MONTH",
|
||||
"anchor": "1916-06-01",
|
||||
"end": null,
|
||||
"raw": "Juni 1916",
|
||||
"locale": "en",
|
||||
"expected": "June 1916"
|
||||
},
|
||||
{
|
||||
"name": "MONTH in Spanish renders the Spanish month name, never a day",
|
||||
"precision": "MONTH",
|
||||
"anchor": "1916-06-01",
|
||||
"end": null,
|
||||
"raw": "Juni 1916",
|
||||
"locale": "es",
|
||||
"expected": "junio de 1916"
|
||||
}
|
||||
]
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user