feat(ocr): full OCR pipeline with polygon annotations, training, and guided mode #232

Merged
marcel merged 40 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-14 10:31:35 +02:00
2 changed files with 7 additions and 0 deletions
Showing only changes of commit 287920a982 - Show all commits

View File

@@ -42,6 +42,10 @@ public class OcrTrainingService {
List<OcrTrainingRun> runs
) {}
// Not safe for horizontal scaling: training reloads the Kraken model in-process on the
// Python OCR service after each run. The DB-level RUNNING constraint (V30 partial unique
// index) prevents concurrent training API calls, but cannot prevent two OCR service replicas
// from diverging on model state. Deploy as a single instance only. See ADR-001.
@Transactional
public OcrTrainingRun triggerTraining(UUID triggeredBy) {
if (trainingRunRepository.findFirstByStatus(TrainingStatus.RUNNING).isPresent()) {

View File

@@ -72,6 +72,9 @@ services:
- archive-net
# --- OCR: Python microservice (Surya + Kraken) ---
# Single-node only: OCR training reloads the model in-process after each run.
# Running multiple replicas would cause training conflicts and model-state divergence.
# See ADR-001 for the architectural rationale.
ocr-service:
build:
context: ./ocr-service