docs(importing): document the canonical importer rebuild
- ADR-025: add decision 3 (four idempotent loaders over canonical artifacts; raw spreadsheet no longer parsed by Java) with the settled Option-A name policy, human-edit-preserve precedence, provisional contract, and ported security guards. - l3-backend-3b diagram: replace MassImportService/ExcelService with the orchestrator, the four loaders, and CanonicalSheetReader, with the loader dependency edges. - GLOSSARY: Canonical import / canonical artifact / CanonicalSheetReader terms; refresh SkippedFile (new INVALID_FILENAME_PATH_TRAVERSAL reason, index key). - DEPLOYMENT §6: canonical-artifact prerequisite runbook (run normalizer → place four artifacts → trigger import); note idempotent re-run. - CLAUDE.md (root + backend): importing/ package now lists the orchestrator + loaders + CanonicalSheetReader. OpenAPI: no generate:api needed — the ImportStatus/SkippedFile generated schemas already match the new types byte-for-byte (same fields + SkipReason enum), so the API surface is unchanged. Closes #669 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
@startuml
|
||||
!include <C4/C4_Component>
|
||||
|
||||
title Component Diagram: API Backend — Document Management & Import
|
||||
title Component Diagram: API Backend — Document Management & Canonical Import
|
||||
|
||||
Container(frontend, "Web Frontend", "SvelteKit")
|
||||
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
|
||||
@@ -9,30 +9,48 @@ ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
|
||||
|
||||
System_Boundary(backend, "API Backend (Spring Boot)") {
|
||||
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, conversation thread, batch metadata updates, and per-month density aggregation for the timeline filter widget.")
|
||||
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers asynchronous Excel/ODS mass import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).")
|
||||
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).")
|
||||
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
|
||||
Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.")
|
||||
Component(massImport, "MassImportService", "Spring Service — @Async", "Reads Excel/ODS files from /import mount. Tracks import state (IDLE/RUNNING/DONE/FAILED) and delegates to ExcelService. Returns immediately; processing runs asynchronously.")
|
||||
Component(excelSvc, "ExcelService", "Spring Service", "Parses Excel/ODS workbooks (Apache POI). Column indices configurable via application.properties. Creates/updates document records per row.")
|
||||
Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.")
|
||||
Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).")
|
||||
Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).")
|
||||
Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.")
|
||||
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, keeps the S3 upload + thumbnail plumbing, and ports the path-traversal / homoglyph / absolute-path / %PDF magic-byte security guards.")
|
||||
Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.")
|
||||
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
|
||||
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, bidirectional conversation thread queries, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
|
||||
Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).")
|
||||
}
|
||||
|
||||
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Called by DocumentService to resolve sender / receiver persons by ID.")
|
||||
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Called by DocumentService to find or create tags by name.")
|
||||
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Resolves sender / receiver persons by ID; upserts persons by source_ref for the importer.")
|
||||
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Finds or creates tags by name; upserts tags by source_ref for the importer.")
|
||||
Component(relSvc, "RelationshipService", "Spring Service", "See diagram 3e. Creates family relationships from the person tree during import.")
|
||||
|
||||
Rel(frontend, docCtrl, "Document requests", "HTTP / JSON")
|
||||
Rel(frontend, adminCtrl, "Trigger import", "HTTP / JSON")
|
||||
Rel(docCtrl, docSvc, "Delegates to")
|
||||
Rel(adminCtrl, massImport, "Triggers")
|
||||
Rel(adminCtrl, importOrch, "Triggers")
|
||||
Rel(docSvc, fileSvc, "Upload / download files")
|
||||
Rel(docSvc, docRepo, "Reads / writes documents")
|
||||
Rel(docSvc, docSpec, "Builds search predicates")
|
||||
Rel(docSvc, personSvc, "Resolves sender / receivers")
|
||||
Rel(docSvc, tagSvc, "Finds or creates tags")
|
||||
Rel(massImport, excelSvc, "Parses Excel/ODS file")
|
||||
Rel(excelSvc, docSvc, "Creates / updates documents")
|
||||
Rel(importOrch, tagTreeLoader, "1. Loads tags")
|
||||
Rel(importOrch, personRegLoader, "2. Loads register persons")
|
||||
Rel(importOrch, personTreeLoader, "3. Loads tree persons + relationships")
|
||||
Rel(importOrch, docLoader, "4. Loads documents")
|
||||
Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx")
|
||||
Rel(personRegLoader, sheetReader, "Reads canonical .xlsx")
|
||||
Rel(docLoader, sheetReader, "Reads canonical .xlsx")
|
||||
Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref")
|
||||
Rel(personRegLoader, personSvc, "Upserts persons by source_ref")
|
||||
Rel(personTreeLoader, personSvc, "Upserts persons by source_ref")
|
||||
Rel(personTreeLoader, relSvc, "Creates relationships")
|
||||
Rel(docLoader, docSvc, "Upserts documents by index")
|
||||
Rel(docLoader, personSvc, "Register-first match / provisional person")
|
||||
Rel(docLoader, tagSvc, "Attaches tag by source_ref")
|
||||
Rel(docLoader, fileSvc, "Uploads resolved file")
|
||||
Rel(minioConf, fileSvc, "Provides S3Client and S3Presigner beans")
|
||||
Rel(fileSvc, minio, "PUT / GET / presigned URL objects", "S3 API / HTTP")
|
||||
Rel(docRepo, db, "SQL queries", "JDBC")
|
||||
|
||||
Reference in New Issue
Block a user