Files
familienarchiv/docs/architecture/c4/l3-backend-3b-document-management.puml
Marcel cf457cb96f
Some checks failed
CI / Unit & Component Tests (pull_request) Failing after 2m32s
CI / OCR Service Tests (pull_request) Successful in 26s
CI / Backend Unit Tests (pull_request) Successful in 3m35s
CI / fail2ban Regex (pull_request) Successful in 44s
CI / Semgrep Security Scan (pull_request) Successful in 22s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m6s
docs(document): ADR-031 + glossary/c4/api_tests for auto-title sync (#726)
ADR-031 records the shared document-package title factory, the exact-match save-time
regeneration, and the grammar-heuristic one-time backfill (with the ReDoS / no-version-spam
/ file-replace-is-manual decisions). Adds an "auto-generated title" glossary entry, extends
the document-management c4 diagram with DocumentTitleFactory / DocumentTitleBackfillMatcher
and the backfill flows, and documents POST /api/admin/backfill-titles in Admin-Auth.http as
a one-shot ADMIN call hitting port 8080 directly.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 16:44:56 +02:00

67 lines
6.7 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
@startuml
!include <C4/C4_Component>
title Component Diagram: API Backend — Document Management & Canonical Import
Container(frontend, "Web Frontend", "SvelteKit")
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
System_Boundary(backend, "API Backend (Spring Boot)") {
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, batch metadata updates, and per-month density aggregation for the timeline filter widget.")
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED). Hosts the one-shot maintenance backfills (versions, file-hashes, titles) — synchronous, ADMIN-only.")
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. On update, regenerates an unchanged auto-title from the new date/location (exact old-vs-new match, #726); exposes backfillTitles() to clean already-stale titles in one sweep. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.")
Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.")
Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).")
Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).")
Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.")
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds the title via DocumentTitleFactory, keeps the S3 upload + thumbnail plumbing, and resolves each PDF by index (importDir/<index>.pdf) guarded by strict index validation + canonical-path containment + %PDF magic-byte check (no recursive walk).")
Component(titleFactory, "DocumentTitleFactory", "Spring Component", "Single source of truth for the auto-title {index} {dateLabel} {location} (#726). The document package owns this formula; importer, save-time regeneration, and the backfill all build through it so they never diverge.")
Component(titleFmt, "DocumentTitleFormatter", "Pure helper (document pkg)", "Formats the date label at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).")
Component(titleMatcher, "DocumentTitleBackfillMatcher", "Pure helper", "Backfill-only heuristic deciding whether a STORED title is machine-generated (overwritable) vs hand-written prose. Index matched literally (no regex injection / ReDoS); fail-closed.")
Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.")
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).")
}
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Resolves sender / receiver persons by ID; upserts persons by source_ref for the importer.")
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Finds or creates tags by name; upserts tags by source_ref for the importer.")
Component(relSvc, "RelationshipService", "Spring Service", "See diagram 3e. Creates family relationships from the person tree during import.")
Rel(frontend, docCtrl, "Document requests", "HTTP / JSON")
Rel(frontend, adminCtrl, "Trigger import", "HTTP / JSON")
Rel(docCtrl, docSvc, "Delegates to")
Rel(adminCtrl, importOrch, "Triggers")
Rel(docSvc, fileSvc, "Upload / download files")
Rel(docSvc, docRepo, "Reads / writes documents")
Rel(docSvc, docSpec, "Builds search predicates")
Rel(docSvc, personSvc, "Resolves sender / receivers")
Rel(docSvc, tagSvc, "Finds or creates tags")
Rel(importOrch, tagTreeLoader, "1. Loads tags")
Rel(importOrch, personRegLoader, "2. Loads register persons")
Rel(importOrch, personTreeLoader, "3. Loads tree persons + relationships")
Rel(importOrch, docLoader, "4. Loads documents")
Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx")
Rel(personRegLoader, sheetReader, "Reads canonical .xlsx")
Rel(docLoader, sheetReader, "Reads canonical .xlsx")
Rel(docLoader, titleFactory, "Builds the auto-title")
Rel(docSvc, titleFactory, "Regenerates auto-title (save-time + backfill)")
Rel(docSvc, titleMatcher, "Backfill overwrite test")
Rel(titleFactory, titleFmt, "Formats the honest date label")
Rel(adminCtrl, docSvc, "backfillTitles() / backfillFileHashes()")
Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref")
Rel(personRegLoader, personSvc, "Upserts persons by source_ref")
Rel(personTreeLoader, personSvc, "Upserts persons by source_ref")
Rel(personTreeLoader, relSvc, "Creates relationships")
Rel(docLoader, docSvc, "Upserts documents by index")
Rel(docLoader, personSvc, "Register-first match / provisional person")
Rel(docLoader, tagSvc, "Attaches tag by source_ref")
Rel(docLoader, fileSvc, "Uploads resolved file")
Rel(minioConf, fileSvc, "Provides S3Client and S3Presigner beans")
Rel(fileSvc, minio, "PUT / GET / presigned URL objects", "S3 API / HTTP")
Rel(docRepo, db, "SQL queries", "JDBC")
@enduml