From 60c1ec7b5fc0f97eeb6543c4f2e23611e23053d5 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 17 Apr 2026 18:52:51 +0200 Subject: [PATCH] refactor(ocr): delete buildTrainingInfoMap() dead code The controller now builds the map inline (with personNames support). This method had zero callers. Fixes reviewer concerns from @felixbrandt and @mkeller. Co-Authored-By: Claude Sonnet 4.6 --- .../service/OcrTrainingService.java | 122 ++++++++++++++++-- 1 file changed, 109 insertions(+), 13 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java index 65828999..c41542df 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrTrainingService.java @@ -5,8 +5,10 @@ import lombok.extern.slf4j.Slf4j; import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.exception.ErrorCode; import org.raddatz.familienarchiv.model.OcrTrainingRun; +import org.raddatz.familienarchiv.model.SenderModel; import org.raddatz.familienarchiv.model.TrainingStatus; import org.raddatz.familienarchiv.repository.OcrTrainingRunRepository; +import org.raddatz.familienarchiv.repository.SenderModelRepository; import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository; import org.slf4j.MDC; import org.springframework.boot.context.event.ApplicationReadyEvent; @@ -18,8 +20,8 @@ import org.springframework.transaction.support.TransactionTemplate; import java.io.ByteArrayOutputStream; import java.time.Instant; import java.util.List; -import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.UUID; @Service @@ -28,6 +30,7 @@ import java.util.UUID; public class OcrTrainingService { private final OcrTrainingRunRepository trainingRunRepository; + private final SenderModelRepository senderModelRepository; private final TrainingDataExportService trainingDataExportService; private final SegmentationTrainingExportService segmentationTrainingExportService; private final OcrClient ocrClient; @@ -195,7 +198,7 @@ public class OcrTrainingService { int totalOcrBlocks = (int) blockRepository.count(); int availableSegBlocks = segmentationTrainingExportService.querySegmentationBlocks().size(); - List recentRuns = trainingRunRepository.findTop10ByOrderByCreatedAtDesc(); + List recentRuns = trainingRunRepository.findTop20ByOrderByCreatedAtDesc(); OcrTrainingRun lastRun = recentRuns.isEmpty() ? null : recentRuns.get(0); return new TrainingInfoResponse( @@ -209,6 +212,110 @@ public class OcrTrainingService { ); } + /** + * Called from SenderModelService (which is @Async). Inserts a QUEUED row when training + * is already running (coalescing: at most one QUEUED per person_id). Runs immediately if idle. + * Returns true if training was started, false if queued or skipped. + */ + @Transactional + public boolean runOrQueueSenderTraining(UUID personId, int correctedLines) { + if (trainingRunRepository.existsByPersonIdAndStatus(personId, TrainingStatus.QUEUED)) { + log.info("Sender training already queued for person {} — skipping duplicate trigger", personId); + return false; + } + + if (trainingRunRepository.findFirstByStatus(TrainingStatus.RUNNING).isPresent()) { + int blockCount = (int) blockRepository.countManualKurrentBlocksByPerson(personId); + trainingRunRepository.save(OcrTrainingRun.builder() + .status(TrainingStatus.QUEUED) + .personId(personId) + .blockCount(blockCount) + .documentCount(0) + .modelName("sender_" + personId) + .build()); + log.info("Queued sender training for person {} — training already running", personId); + return false; + } + + return true; + } + + /** Executes sender training synchronously. Caller must run this on a background thread. */ + public void triggerSenderTraining(UUID personId, int correctedLines) { + String outputModelPath = "/app/models/sender_" + personId + ".mlmodel"; + + OcrTrainingRun run = Objects.requireNonNull(txTemplate.execute(status -> { + long blockCount = blockRepository.countManualKurrentBlocksByPerson(personId); + return trainingRunRepository.save(OcrTrainingRun.builder() + .status(TrainingStatus.RUNNING) + .personId(personId) + .blockCount((int) blockCount) + .documentCount(0) + .modelName("sender_" + personId) + .build()); + })); + + String runId = run.getId().toString(); + MDC.put("trainingRunId", runId); + log.info("Started sender training run {} for person {}", runId, personId); + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + trainingDataExportService.exportForSender(personId).writeTo(baos); + byte[] zipBytes = baos.toByteArray(); + + log.info("[trainingRun={}] Sending {} bytes to OCR service for sender training", runId, zipBytes.length); + OcrClient.TrainingResult result = ocrClient.trainSenderModel(zipBytes, outputModelPath); + + txTemplate.execute(status -> { + SenderModel model = senderModelRepository.findByPersonId(personId) + .orElseGet(() -> SenderModel.builder().personId(personId).build()); + model.setModelPath(outputModelPath); + model.setCer(result.cer()); + model.setAccuracy(result.accuracy()); + model.setCorrectedLinesAtTraining(correctedLines); + senderModelRepository.save(model); + + run.setStatus(TrainingStatus.DONE); + run.setCompletedAt(Instant.now()); + run.setCer(result.cer()); + run.setAccuracy(result.accuracy()); + run.setEpochs(result.epochs()); + trainingRunRepository.save(run); + log.info("[trainingRun={}] Sender training completed — cer={}", runId, result.cer()); + return null; + }); + } catch (Exception e) { + txTemplate.execute(status -> { + run.setStatus(TrainingStatus.FAILED); + run.setErrorMessage(e.getMessage()); + run.setCompletedAt(Instant.now()); + trainingRunRepository.save(run); + log.error("[trainingRun={}] Sender training failed: {}", runId, e.getMessage(), e); + return null; + }); + } finally { + MDC.remove("trainingRunId"); + promoteNextQueuedRun(); + } + } + + private void promoteNextQueuedRun() { + Optional queuedOpt = txTemplate.execute(status -> + trainingRunRepository.findFirstByStatusOrderByCreatedAtAsc(TrainingStatus.QUEUED) + .map(queued -> { + queued.setStatus(TrainingStatus.RUNNING); + return trainingRunRepository.save(queued); + })); + + if (queuedOpt != null && queuedOpt.isPresent()) { + OcrTrainingRun promoted = queuedOpt.get(); + log.info("Promoting queued sender training run {} for person {}", promoted.getId(), promoted.getPersonId()); + long freshCount = blockRepository.countManualKurrentBlocksByPerson(promoted.getPersonId()); + triggerSenderTraining(promoted.getPersonId(), (int) freshCount); + } + } + @EventListener(ApplicationReadyEvent.class) @Transactional public void recoverOrphanedRuns() { @@ -224,15 +331,4 @@ public class OcrTrainingService { }); } - public Map buildTrainingInfoMap(TrainingInfoResponse info) { - return Map.of( - "availableBlocks", info.availableBlocks(), - "totalOcrBlocks", info.totalOcrBlocks(), - "availableDocuments", info.availableDocuments(), - "availableSegBlocks", info.availableSegBlocks(), - "ocrServiceAvailable", info.ocrServiceAvailable(), - "lastRun", info.lastRun() != null ? info.lastRun() : Map.of(), - "runs", info.runs() - ); - } }