docs(c4): add 3d OCR orchestration and 3e supporting domains

This commit is contained in:
Marcel
2026-05-06 09:58:55 +02:00
parent dbbb989aa6
commit f744e8c59d

View File

@@ -244,6 +244,85 @@ C4Component
Rel(personRepo, db, "SQL queries", "JDBC")
```
### 3d — OCR Orchestration
How the Spring Boot backend manages OCR jobs, streams results, and trains recognition models.
```mermaid
C4Component
title Component Diagram: API Backend — OCR Orchestration
Container(frontend, "Web Frontend", "SvelteKit")
ContainerDb(db, "PostgreSQL")
ContainerDb(minio, "MinIO")
Container(ocrPy, "OCR Service", "Python FastAPI")
System_Boundary(backend, "API Backend (Spring Boot)") {
Component(ocrCtrl, "OcrController", "Spring MVC — /api/ocr", "REST entry point: trigger single or batch OCR jobs, stream progress via SSE, query job status, and manage training runs and per-sender models.")
Component(ocrSvc, "OcrService", "Spring Service", "Creates OcrJob and OcrJobDocument records, checks Python service health, and delegates async execution to OcrAsyncRunner.")
Component(ocrBatch, "OcrBatchService", "Spring Service", "Orchestrates multi-document OCR jobs, iterating documents and delegating each to OcrAsyncRunner.")
Component(ocrAsync, "OcrAsyncRunner", "Spring Component — @Async", "Async worker that streams OCR results from Python page by page, persists transcription blocks and annotations via domain services, and emits progress via SSE.")
Component(ocrClient, "RestClientOcrClient", "Spring Component", "HTTP client wrapping the Python service: POST /ocr/stream (NDJSON), /train, /segtrain, and /train-sender. Falls back from streaming to batch on 404.")
Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.")
}
Rel(frontend, ocrCtrl, "OCR trigger, status, and progress requests", "HTTP / JSON / SSE")
Rel(ocrCtrl, ocrSvc, "Single-document jobs", "")
Rel(ocrCtrl, ocrBatch, "Batch jobs", "")
Rel(ocrCtrl, ocrTraining, "Training runs", "")
Rel(ocrSvc, ocrAsync, "Delegates async execution", "")
Rel(ocrBatch, ocrAsync, "Delegates async execution", "")
Rel(ocrAsync, ocrClient, "Streams OCR results page by page", "HTTP / NDJSON")
Rel(ocrTraining, ocrClient, "Sends training data ZIP", "HTTP / multipart")
Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST")
Rel(ocrAsync, db, "Reads / writes job state, transcription blocks, annotations", "JDBC")
Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API")
Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
Rel(ocrTraining, db, "Persists training run metrics", "JDBC")
```
### 3e — Supporting Domains
Audit logging, dashboard stats, SSE notifications, stories (Geschichten), and cross-cutting exception handling.
```mermaid
C4Component
title Component Diagram: API Backend — Supporting Domains
Container(frontend, "Web Frontend", "SvelteKit")
ContainerDb(db, "PostgreSQL")
System_Boundary(backend, "API Backend (Spring Boot)") {
Component(auditSvc, "AuditService", "Spring Service — @Async", "Writes audit log entries asynchronously via a dedicated TaskExecutor, with transaction-aware logging to prevent deadlocks on concurrent saves.")
Component(auditQuery, "AuditLogQueryService", "Spring Service", "Queries audit logs for activity feeds, pulse stats, recent contributors, and per-document history. Facade over AuditLogRepository.")
Component(statsCtrl, "StatsController", "Spring MVC — /api/stats", "Returns aggregate counts (total persons, total documents) for the UI stats bar.")
Component(dashSvc, "DashboardService", "Spring Service", "Assembles the user dashboard: recent document resume, weekly transcription pulse stats, and activity feed with contributor avatars.")
Component(notifCtrl, "NotificationController", "Spring MVC — /api/notifications", "REST and SSE endpoints for notification stream, history with filtering, read/unread state, and per-user preference management.")
Component(notifSvc, "NotificationService", "Spring Service", "Creates REPLY and MENTION notifications, optionally sends email, marks as read, and pushes events to connected clients via SseEmitterRegistry.")
Component(sseRegistry, "SseEmitterRegistry", "Spring Component", "In-memory ConcurrentHashMap of Spring SseEmitter instances per user. Handles registration, deregistration, and JSON event broadcasts.")
Component(geschCtrl, "GeschichteController", "Spring MVC — /api/geschichten", "CRUD for publishable stories that link persons and documents. Requires BLOG_WRITE permission for write operations.")
Component(geschSvc, "GeschichteService", "Spring Service", "Manages story lifecycle (DRAFT → PUBLISHED with timestamp). Sanitizes HTML body with an allowlist policy.")
Component(exHandler, "GlobalExceptionHandler", "Spring @RestControllerAdvice", "Converts DomainException, validation errors, and generic exceptions to ErrorResponse JSON with machine-readable ErrorCode and HTTP status.")
}
Rel(frontend, statsCtrl, "GET /api/stats", "HTTP / JSON")
Rel(frontend, notifCtrl, "Notification stream and history", "HTTP / JSON / SSE")
Rel(frontend, geschCtrl, "Story requests", "HTTP / JSON")
Rel(dashSvc, auditQuery, "Fetches activity feed and pulse stats", "")
Rel(notifCtrl, notifSvc, "Delegates to", "")
Rel(notifCtrl, sseRegistry, "Registers client SSE connection", "")
Rel(notifSvc, sseRegistry, "Broadcasts events to connected clients", "")
Rel(geschCtrl, geschSvc, "Delegates to", "")
Rel(auditSvc, db, "Writes audit_log", "JDBC")
Rel(auditQuery, db, "Reads audit_log", "JDBC")
Rel(notifSvc, db, "Reads / writes notifications", "JDBC")
Rel(geschSvc, db, "Reads / writes geschichten", "JDBC")
```
---
## Level 3 — Components: Web Frontend