42 lines
3.1 KiB
Plaintext
42 lines
3.1 KiB
Plaintext
@startuml
|
|
!include <C4/C4_Component>
|
|
|
|
title Component Diagram: API Backend — OCR Orchestration
|
|
|
|
Container(frontend, "Web Frontend", "SvelteKit")
|
|
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
|
|
ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
|
|
Container(ocrPy, "OCR Service", "Python FastAPI")
|
|
|
|
System_Boundary(backend, "API Backend (Spring Boot)") {
|
|
Component(ocrCtrl, "OcrController", "Spring MVC — /api/ocr", "REST entry point: trigger single or batch OCR jobs, stream progress via SSE, query job status, and manage training runs and per-sender models.")
|
|
Component(ocrSvc, "OcrService", "Spring Service", "Creates OcrJob and OcrJobDocument records, checks Python service health, and delegates async execution to OcrAsyncRunner.")
|
|
Component(ocrBatch, "OcrBatchService", "Spring Service", "Orchestrates multi-document OCR jobs, iterating documents and delegating each to OcrAsyncRunner.")
|
|
Component(ocrAsync, "OcrAsyncRunner", "Spring Component — @Async", "Async worker that streams OCR results from Python page by page, persists transcription blocks and annotations via domain services, and emits progress via SSE.")
|
|
Component(ocrClient, "RestClientOcrClient", "Spring Component", "HTTP client wrapping the Python service: POST /ocr/stream (NDJSON), /train, /segtrain, and /train-sender. Falls back from streaming to batch on 404.")
|
|
Component(ocrTraining, "OcrTrainingService", "Spring Service", "Orchestrates model training: exports training data as ZIP, calls Python /train or /segtrain, persists training metrics in OcrTrainingRunRepository.")
|
|
Component(ocrJobRepo, "OcrJobRepository, OcrJobDocumentRepository", "Spring Data JPA", "Reads and writes OcrJob and OcrJobDocument records. Tracks job status (RUNNING/DONE/FAILED), per-document progress, page counts, and error messages.")
|
|
}
|
|
|
|
Component(transcriptionSvc, "TranscriptionService", "Spring Service", "See diagram 3c. Called by OcrAsyncRunner to persist transcription blocks per page.")
|
|
Component(annotationSvc, "AnnotationService", "Spring Service", "See diagram 3c. Called by OcrAsyncRunner to persist OCR-generated annotation regions per page.")
|
|
|
|
Rel(frontend, ocrCtrl, "OCR trigger, status, and progress requests", "HTTP / JSON / SSE")
|
|
Rel(ocrCtrl, ocrSvc, "Single-document jobs")
|
|
Rel(ocrCtrl, ocrBatch, "Batch jobs")
|
|
Rel(ocrCtrl, ocrTraining, "Training runs")
|
|
Rel(ocrSvc, ocrAsync, "Delegates async execution")
|
|
Rel(ocrBatch, ocrAsync, "Delegates async execution")
|
|
Rel(ocrAsync, ocrClient, "Streams OCR results page by page", "HTTP / NDJSON")
|
|
Rel(ocrTraining, ocrClient, "Sends training data ZIP", "HTTP / multipart")
|
|
Rel(ocrClient, ocrPy, "POST /ocr/stream, /train, /segtrain, /train-sender", "HTTP / REST")
|
|
Rel(ocrAsync, transcriptionSvc, "Saves transcription blocks per page")
|
|
Rel(ocrAsync, annotationSvc, "Saves annotation regions per page")
|
|
Rel(ocrAsync, ocrJobRepo, "Reads / writes OCR job state")
|
|
Rel(ocrJobRepo, db, "SQL queries", "JDBC")
|
|
Rel(ocrAsync, minio, "Generates presigned URLs for PDF fetch", "S3 API")
|
|
Rel(ocrPy, minio, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
|
|
Rel(ocrTraining, db, "Persists training run metrics", "JDBC")
|
|
|
|
@enduml
|