diff --git a/docs/architecture/c4-diagrams.md b/docs/architecture/c4-diagrams.md index 032897b0..4645083c 100644 --- a/docs/architecture/c4-diagrams.md +++ b/docs/architecture/c4-diagrams.md @@ -277,6 +277,7 @@ C4Component Component(ocrAsync, "OcrAsyncRunner", "Spring Component — @Async", "Async worker that streams OCR results from Python page by page, persists transcription blocks and annotations via domain services, and emits progress via SSE.") Component(ocrClient, "RestClientOcrClient", "Spring Component", "HTTP client wrapping the Python service: POST /ocr/stream (NDJSON), /train, /segtrain, and /train-sender. Falls back from streaming to batch on 404.") Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.") + Component(ocrJobRepo, "OcrJobRepository, OcrJobDocumentRepository", "Spring Data JPA", "Reads and writes OcrJob and OcrJobDocument records. Tracks job status (RUNNING/DONE/FAILED), per-document progress, page counts, and error messages.") } Component(transcriptionSvc, "TranscriptionService", "Spring Service", "See diagram 3b.2. Called by OcrAsyncRunner to persist transcription blocks per page.") @@ -293,7 +294,8 @@ C4Component Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST") Rel(ocrAsync, transcriptionSvc, "Saves transcription blocks per page", "") Rel(ocrAsync, annotationSvc, "Saves annotation regions per page", "") - Rel(ocrAsync, db, "Reads / writes OCR job state", "JDBC") + Rel(ocrAsync, ocrJobRepo, "Reads / writes OCR job state", "") + Rel(ocrJobRepo, db, "SQL queries", "JDBC") Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API") Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned") Rel(ocrTraining, db, "Persists training run metrics", "JDBC")