Some checks failed
CI / Unit & Component Tests (pull_request) Failing after 2m32s
CI / OCR Service Tests (pull_request) Successful in 26s
CI / Backend Unit Tests (pull_request) Successful in 3m35s
CI / fail2ban Regex (pull_request) Successful in 44s
CI / Semgrep Security Scan (pull_request) Successful in 22s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m6s
ADR-031 records the shared document-package title factory, the exact-match save-time regeneration, and the grammar-heuristic one-time backfill (with the ReDoS / no-version-spam / file-replace-is-manual decisions). Adds an "auto-generated title" glossary entry, extends the document-management c4 diagram with DocumentTitleFactory / DocumentTitleBackfillMatcher and the backfill flows, and documents POST /api/admin/backfill-titles in Admin-Auth.http as a one-shot ADMIN call hitting port 8080 directly. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
67 lines
6.7 KiB
Plaintext
67 lines
6.7 KiB
Plaintext
@startuml
|
||
!include <C4/C4_Component>
|
||
|
||
title Component Diagram: API Backend — Document Management & Canonical Import
|
||
|
||
Container(frontend, "Web Frontend", "SvelteKit")
|
||
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
|
||
ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
|
||
|
||
System_Boundary(backend, "API Backend (Spring Boot)") {
|
||
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, batch metadata updates, and per-month density aggregation for the timeline filter widget.")
|
||
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED). Hosts the one-shot maintenance backfills (versions, file-hashes, titles) — synchronous, ADMIN-only.")
|
||
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. On update, regenerates an unchanged auto-title from the new date/location (exact old-vs-new match, #726); exposes backfillTitles() to clean already-stale titles in one sweep. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
|
||
Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.")
|
||
Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.")
|
||
Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).")
|
||
Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).")
|
||
Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.")
|
||
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds the title via DocumentTitleFactory, keeps the S3 upload + thumbnail plumbing, and resolves each PDF by index (importDir/<index>.pdf) guarded by strict index validation + canonical-path containment + %PDF magic-byte check (no recursive walk).")
|
||
Component(titleFactory, "DocumentTitleFactory", "Spring Component", "Single source of truth for the auto-title {index} – {dateLabel} – {location} (#726). The document package owns this formula; importer, save-time regeneration, and the backfill all build through it so they never diverge.")
|
||
Component(titleFmt, "DocumentTitleFormatter", "Pure helper (document pkg)", "Formats the date label at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).")
|
||
Component(titleMatcher, "DocumentTitleBackfillMatcher", "Pure helper", "Backfill-only heuristic deciding whether a STORED title is machine-generated (overwritable) vs hand-written prose. Index matched literally (no regex injection / ReDoS); fail-closed.")
|
||
Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.")
|
||
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
|
||
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
|
||
Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).")
|
||
}
|
||
|
||
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Resolves sender / receiver persons by ID; upserts persons by source_ref for the importer.")
|
||
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Finds or creates tags by name; upserts tags by source_ref for the importer.")
|
||
Component(relSvc, "RelationshipService", "Spring Service", "See diagram 3e. Creates family relationships from the person tree during import.")
|
||
|
||
Rel(frontend, docCtrl, "Document requests", "HTTP / JSON")
|
||
Rel(frontend, adminCtrl, "Trigger import", "HTTP / JSON")
|
||
Rel(docCtrl, docSvc, "Delegates to")
|
||
Rel(adminCtrl, importOrch, "Triggers")
|
||
Rel(docSvc, fileSvc, "Upload / download files")
|
||
Rel(docSvc, docRepo, "Reads / writes documents")
|
||
Rel(docSvc, docSpec, "Builds search predicates")
|
||
Rel(docSvc, personSvc, "Resolves sender / receivers")
|
||
Rel(docSvc, tagSvc, "Finds or creates tags")
|
||
Rel(importOrch, tagTreeLoader, "1. Loads tags")
|
||
Rel(importOrch, personRegLoader, "2. Loads register persons")
|
||
Rel(importOrch, personTreeLoader, "3. Loads tree persons + relationships")
|
||
Rel(importOrch, docLoader, "4. Loads documents")
|
||
Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx")
|
||
Rel(personRegLoader, sheetReader, "Reads canonical .xlsx")
|
||
Rel(docLoader, sheetReader, "Reads canonical .xlsx")
|
||
Rel(docLoader, titleFactory, "Builds the auto-title")
|
||
Rel(docSvc, titleFactory, "Regenerates auto-title (save-time + backfill)")
|
||
Rel(docSvc, titleMatcher, "Backfill overwrite test")
|
||
Rel(titleFactory, titleFmt, "Formats the honest date label")
|
||
Rel(adminCtrl, docSvc, "backfillTitles() / backfillFileHashes()")
|
||
Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref")
|
||
Rel(personRegLoader, personSvc, "Upserts persons by source_ref")
|
||
Rel(personTreeLoader, personSvc, "Upserts persons by source_ref")
|
||
Rel(personTreeLoader, relSvc, "Creates relationships")
|
||
Rel(docLoader, docSvc, "Upserts documents by index")
|
||
Rel(docLoader, personSvc, "Register-first match / provisional person")
|
||
Rel(docLoader, tagSvc, "Attaches tag by source_ref")
|
||
Rel(docLoader, fileSvc, "Uploads resolved file")
|
||
Rel(minioConf, fileSvc, "Provides S3Client and S3Presigner beans")
|
||
Rel(fileSvc, minio, "PUT / GET / presigned URL objects", "S3 API / HTTP")
|
||
Rel(docRepo, db, "SQL queries", "JDBC")
|
||
|
||
@enduml
|