diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java index 4b688119..e241aaa3 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java @@ -42,6 +42,10 @@ public class OcrTrainingService { List runs ) {} + // Not safe for horizontal scaling: training reloads the Kraken model in-process on the + // Python OCR service after each run. The DB-level RUNNING constraint (V30 partial unique + // index) prevents concurrent training API calls, but cannot prevent two OCR service replicas + // from diverging on model state. Deploy as a single instance only. See ADR-001. @Transactional public OcrTrainingRun triggerTraining(UUID triggeredBy) { if (trainingRunRepository.findFirstByStatus(TrainingStatus.RUNNING).isPresent()) { diff --git a/docker-compose.yml b/docker-compose.yml index 5e06094b..bf57501a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -72,6 +72,9 @@ services: - archive-net # --- OCR: Python microservice (Surya + Kraken) --- + # Single-node only: OCR training reloads the model in-process after each run. + # Running multiple replicas would cause training conflicts and model-state divergence. + # See ADR-001 for the architectural rationale. ocr-service: build: context: ./ocr-service