docs(c4): add C4-PlantUML diagram files for all architecture views #450

Merged
marcel merged 34 commits from docs/post-refactor-accuracy-audit into main 2026-05-06 22:52:39 +02:00
Showing only changes of commit 83f4f8352c - Show all commits

View File

@@ -277,6 +277,7 @@ C4Component
Component(ocrAsync, "OcrAsyncRunner", "Spring Component — @Async", "Async worker that streams OCR results from Python page by page, persists transcription blocks and annotations via domain services, and emits progress via SSE.")
Component(ocrClient, "RestClientOcrClient", "Spring Component", "HTTP client wrapping the Python service: POST /ocr/stream (NDJSON), /train, /segtrain, and /train-sender. Falls back from streaming to batch on 404.")
Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.")
Component(ocrJobRepo, "OcrJobRepository, OcrJobDocumentRepository", "Spring Data JPA", "Reads and writes OcrJob and OcrJobDocument records. Tracks job status (RUNNING/DONE/FAILED), per-document progress, page counts, and error messages.")
}
Component(transcriptionSvc, "TranscriptionService", "Spring Service", "See diagram 3b.2. Called by OcrAsyncRunner to persist transcription blocks per page.")
@@ -293,7 +294,8 @@ C4Component
Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST")
Rel(ocrAsync, transcriptionSvc, "Saves transcription blocks per page", "")
Rel(ocrAsync, annotationSvc, "Saves annotation regions per page", "")
Rel(ocrAsync, db, "Reads / writes OCR job state", "JDBC")
Rel(ocrAsync, ocrJobRepo, "Reads / writes OCR job state", "")
Rel(ocrJobRepo, db, "SQL queries", "JDBC")
Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API")
Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
Rel(ocrTraining, db, "Persists training run metrics", "JDBC")