docs(c4): add OcrJobRepository intermediary in 3d — route ocrAsync through repo, not bare db

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-06 12:16:54 +02:00
committed by marcel
parent f2a901eabf
commit b1f9f1603c

View File

@@ -277,6 +277,7 @@ C4Component
Component(ocrAsync, "OcrAsyncRunner", "Spring Component — @Async", "Async worker that streams OCR results from Python page by page, persists transcription blocks and annotations via domain services, and emits progress via SSE.")
Component(ocrClient, "RestClientOcrClient", "Spring Component", "HTTP client wrapping the Python service: POST /ocr/stream (NDJSON), /train, /segtrain, and /train-sender. Falls back from streaming to batch on 404.")
Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.")
Component(ocrJobRepo, "OcrJobRepository, OcrJobDocumentRepository", "Spring Data JPA", "Reads and writes OcrJob and OcrJobDocument records. Tracks job status (RUNNING/DONE/FAILED), per-document progress, page counts, and error messages.")
}
Component(transcriptionSvc, "TranscriptionService", "Spring Service", "See diagram 3b.2. Called by OcrAsyncRunner to persist transcription blocks per page.")
@@ -293,7 +294,8 @@ C4Component
Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST")
Rel(ocrAsync, transcriptionSvc, "Saves transcription blocks per page", "")
Rel(ocrAsync, annotationSvc, "Saves annotation regions per page", "")
Rel(ocrAsync, db, "Reads / writes OCR job state", "JDBC")
Rel(ocrAsync, ocrJobRepo, "Reads / writes OCR job state", "")
Rel(ocrJobRepo, db, "SQL queries", "JDBC")
Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API")
Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
Rel(ocrTraining, db, "Persists training run metrics", "JDBC")