docs(c4): accuracy audit — split L3 diagrams, add 6 new sub-diagrams, fix all stale content #448

Merged
marcel merged 27 commits from docs/post-refactor-accuracy-audit into main 2026-05-06 20:00:16 +02:00
Showing only changes of commit 35d82814a6 - Show all commits

View File

@@ -277,6 +277,9 @@ C4Component
Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.") Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.")
} }
Component(transcriptionSvc, "TranscriptionService", "Spring Service", "See diagram 3b.2. Called by OcrAsyncRunner to persist transcription blocks per page.")
Component(annotationSvc, "AnnotationService", "Spring Service", "See diagram 3b.2. Called by OcrAsyncRunner to persist OCR-generated annotation regions per page.")
Rel(frontend, ocrCtrl, "OCR trigger, status, and progress requests", "HTTP / JSON / SSE") Rel(frontend, ocrCtrl, "OCR trigger, status, and progress requests", "HTTP / JSON / SSE")
Rel(ocrCtrl, ocrSvc, "Single-document jobs", "") Rel(ocrCtrl, ocrSvc, "Single-document jobs", "")
Rel(ocrCtrl, ocrBatch, "Batch jobs", "") Rel(ocrCtrl, ocrBatch, "Batch jobs", "")
@@ -286,7 +289,9 @@ C4Component
Rel(ocrAsync, ocrClient, "Streams OCR results page by page", "HTTP / NDJSON") Rel(ocrAsync, ocrClient, "Streams OCR results page by page", "HTTP / NDJSON")
Rel(ocrTraining, ocrClient, "Sends training data ZIP", "HTTP / multipart") Rel(ocrTraining, ocrClient, "Sends training data ZIP", "HTTP / multipart")
Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST") Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST")
Rel(ocrAsync, db, "Reads / writes job state, transcription blocks, annotations", "JDBC") Rel(ocrAsync, transcriptionSvc, "Saves transcription blocks per page", "")
Rel(ocrAsync, annotationSvc, "Saves annotation regions per page", "")
Rel(ocrAsync, db, "Reads / writes OCR job state", "JDBC")
Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API") Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API")
Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned") Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
Rel(ocrTraining, db, "Persists training run metrics", "JDBC") Rel(ocrTraining, db, "Persists training run metrics", "JDBC")