From f744e8c59daeb72d868416b709c098a9bc34f83c Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 6 May 2026 09:58:55 +0200 Subject: [PATCH] docs(c4): add 3d OCR orchestration and 3e supporting domains --- docs/architecture/c4-diagrams.md | 79 ++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/docs/architecture/c4-diagrams.md b/docs/architecture/c4-diagrams.md index 6161aa01..ea7483db 100644 --- a/docs/architecture/c4-diagrams.md +++ b/docs/architecture/c4-diagrams.md @@ -244,6 +244,85 @@ C4Component Rel(personRepo, db, "SQL queries", "JDBC") ``` +### 3d — OCR Orchestration + +How the Spring Boot backend manages OCR jobs, streams results, and trains recognition models. + +```mermaid +C4Component + title Component Diagram: API Backend — OCR Orchestration + + Container(frontend, "Web Frontend", "SvelteKit") + ContainerDb(db, "PostgreSQL") + ContainerDb(minio, "MinIO") + Container(ocrPy, "OCR Service", "Python FastAPI") + + System_Boundary(backend, "API Backend (Spring Boot)") { + Component(ocrCtrl, "OcrController", "Spring MVC — /api/ocr", "REST entry point: trigger single or batch OCR jobs, stream progress via SSE, query job status, and manage training runs and per-sender models.") + Component(ocrSvc, "OcrService", "Spring Service", "Creates OcrJob and OcrJobDocument records, checks Python service health, and delegates async execution to OcrAsyncRunner.") + Component(ocrBatch, "OcrBatchService", "Spring Service", "Orchestrates multi-document OCR jobs, iterating documents and delegating each to OcrAsyncRunner.") + Component(ocrAsync, "OcrAsyncRunner", "Spring Component — @Async", "Async worker that streams OCR results from Python page by page, persists transcription blocks and annotations via domain services, and emits progress via SSE.") + Component(ocrClient, "RestClientOcrClient", "Spring Component", "HTTP client wrapping the Python service: POST /ocr/stream (NDJSON), /train, /segtrain, and /train-sender. Falls back from streaming to batch on 404.") + Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.") + } + + Rel(frontend, ocrCtrl, "OCR trigger, status, and progress requests", "HTTP / JSON / SSE") + Rel(ocrCtrl, ocrSvc, "Single-document jobs", "") + Rel(ocrCtrl, ocrBatch, "Batch jobs", "") + Rel(ocrCtrl, ocrTraining, "Training runs", "") + Rel(ocrSvc, ocrAsync, "Delegates async execution", "") + Rel(ocrBatch, ocrAsync, "Delegates async execution", "") + Rel(ocrAsync, ocrClient, "Streams OCR results page by page", "HTTP / NDJSON") + Rel(ocrTraining, ocrClient, "Sends training data ZIP", "HTTP / multipart") + Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST") + Rel(ocrAsync, db, "Reads / writes job state, transcription blocks, annotations", "JDBC") + Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API") + Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned") + Rel(ocrTraining, db, "Persists training run metrics", "JDBC") +``` + +### 3e — Supporting Domains + +Audit logging, dashboard stats, SSE notifications, stories (Geschichten), and cross-cutting exception handling. + +```mermaid +C4Component + title Component Diagram: API Backend — Supporting Domains + + Container(frontend, "Web Frontend", "SvelteKit") + ContainerDb(db, "PostgreSQL") + + System_Boundary(backend, "API Backend (Spring Boot)") { + Component(auditSvc, "AuditService", "Spring Service — @Async", "Writes audit log entries asynchronously via a dedicated TaskExecutor, with transaction-aware logging to prevent deadlocks on concurrent saves.") + Component(auditQuery, "AuditLogQueryService", "Spring Service", "Queries audit logs for activity feeds, pulse stats, recent contributors, and per-document history. Facade over AuditLogRepository.") + + Component(statsCtrl, "StatsController", "Spring MVC — /api/stats", "Returns aggregate counts (total persons, total documents) for the UI stats bar.") + Component(dashSvc, "DashboardService", "Spring Service", "Assembles the user dashboard: recent document resume, weekly transcription pulse stats, and activity feed with contributor avatars.") + + Component(notifCtrl, "NotificationController", "Spring MVC — /api/notifications", "REST and SSE endpoints for notification stream, history with filtering, read/unread state, and per-user preference management.") + Component(notifSvc, "NotificationService", "Spring Service", "Creates REPLY and MENTION notifications, optionally sends email, marks as read, and pushes events to connected clients via SseEmitterRegistry.") + Component(sseRegistry, "SseEmitterRegistry", "Spring Component", "In-memory ConcurrentHashMap of Spring SseEmitter instances per user. Handles registration, deregistration, and JSON event broadcasts.") + + Component(geschCtrl, "GeschichteController", "Spring MVC — /api/geschichten", "CRUD for publishable stories that link persons and documents. Requires BLOG_WRITE permission for write operations.") + Component(geschSvc, "GeschichteService", "Spring Service", "Manages story lifecycle (DRAFT → PUBLISHED with timestamp). Sanitizes HTML body with an allowlist policy.") + + Component(exHandler, "GlobalExceptionHandler", "Spring @RestControllerAdvice", "Converts DomainException, validation errors, and generic exceptions to ErrorResponse JSON with machine-readable ErrorCode and HTTP status.") + } + + Rel(frontend, statsCtrl, "GET /api/stats", "HTTP / JSON") + Rel(frontend, notifCtrl, "Notification stream and history", "HTTP / JSON / SSE") + Rel(frontend, geschCtrl, "Story requests", "HTTP / JSON") + Rel(dashSvc, auditQuery, "Fetches activity feed and pulse stats", "") + Rel(notifCtrl, notifSvc, "Delegates to", "") + Rel(notifCtrl, sseRegistry, "Registers client SSE connection", "") + Rel(notifSvc, sseRegistry, "Broadcasts events to connected clients", "") + Rel(geschCtrl, geschSvc, "Delegates to", "") + Rel(auditSvc, db, "Writes audit_log", "JDBC") + Rel(auditQuery, db, "Reads audit_log", "JDBC") + Rel(notifSvc, db, "Reads / writes notifications", "JDBC") + Rel(geschSvc, db, "Reads / writes geschichten", "JDBC") +``` + --- ## Level 3 — Components: Web Frontend