refactor(ocr): delete buildTrainingInfoMap() dead code
The controller now builds the map inline (with personNames support). This method had zero callers. Fixes reviewer concerns from @felixbrandt and @mkeller. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,8 +5,10 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.model.OcrTrainingRun;
|
||||
import org.raddatz.familienarchiv.model.SenderModel;
|
||||
import org.raddatz.familienarchiv.model.TrainingStatus;
|
||||
import org.raddatz.familienarchiv.repository.OcrTrainingRunRepository;
|
||||
import org.raddatz.familienarchiv.repository.SenderModelRepository;
|
||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
@@ -18,8 +20,8 @@ import org.springframework.transaction.support.TransactionTemplate;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
@Service
|
||||
@@ -28,6 +30,7 @@ import java.util.UUID;
|
||||
public class OcrTrainingService {
|
||||
|
||||
private final OcrTrainingRunRepository trainingRunRepository;
|
||||
private final SenderModelRepository senderModelRepository;
|
||||
private final TrainingDataExportService trainingDataExportService;
|
||||
private final SegmentationTrainingExportService segmentationTrainingExportService;
|
||||
private final OcrClient ocrClient;
|
||||
@@ -195,7 +198,7 @@ public class OcrTrainingService {
|
||||
int totalOcrBlocks = (int) blockRepository.count();
|
||||
int availableSegBlocks = segmentationTrainingExportService.querySegmentationBlocks().size();
|
||||
|
||||
List<OcrTrainingRun> recentRuns = trainingRunRepository.findTop10ByOrderByCreatedAtDesc();
|
||||
List<OcrTrainingRun> recentRuns = trainingRunRepository.findTop20ByOrderByCreatedAtDesc();
|
||||
OcrTrainingRun lastRun = recentRuns.isEmpty() ? null : recentRuns.get(0);
|
||||
|
||||
return new TrainingInfoResponse(
|
||||
@@ -209,6 +212,110 @@ public class OcrTrainingService {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called from SenderModelService (which is @Async). Inserts a QUEUED row when training
|
||||
* is already running (coalescing: at most one QUEUED per person_id). Runs immediately if idle.
|
||||
* Returns true if training was started, false if queued or skipped.
|
||||
*/
|
||||
@Transactional
|
||||
public boolean runOrQueueSenderTraining(UUID personId, int correctedLines) {
|
||||
if (trainingRunRepository.existsByPersonIdAndStatus(personId, TrainingStatus.QUEUED)) {
|
||||
log.info("Sender training already queued for person {} — skipping duplicate trigger", personId);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (trainingRunRepository.findFirstByStatus(TrainingStatus.RUNNING).isPresent()) {
|
||||
int blockCount = (int) blockRepository.countManualKurrentBlocksByPerson(personId);
|
||||
trainingRunRepository.save(OcrTrainingRun.builder()
|
||||
.status(TrainingStatus.QUEUED)
|
||||
.personId(personId)
|
||||
.blockCount(blockCount)
|
||||
.documentCount(0)
|
||||
.modelName("sender_" + personId)
|
||||
.build());
|
||||
log.info("Queued sender training for person {} — training already running", personId);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Executes sender training synchronously. Caller must run this on a background thread. */
|
||||
public void triggerSenderTraining(UUID personId, int correctedLines) {
|
||||
String outputModelPath = "/app/models/sender_" + personId + ".mlmodel";
|
||||
|
||||
OcrTrainingRun run = Objects.requireNonNull(txTemplate.execute(status -> {
|
||||
long blockCount = blockRepository.countManualKurrentBlocksByPerson(personId);
|
||||
return trainingRunRepository.save(OcrTrainingRun.builder()
|
||||
.status(TrainingStatus.RUNNING)
|
||||
.personId(personId)
|
||||
.blockCount((int) blockCount)
|
||||
.documentCount(0)
|
||||
.modelName("sender_" + personId)
|
||||
.build());
|
||||
}));
|
||||
|
||||
String runId = run.getId().toString();
|
||||
MDC.put("trainingRunId", runId);
|
||||
log.info("Started sender training run {} for person {}", runId, personId);
|
||||
|
||||
try {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
trainingDataExportService.exportForSender(personId).writeTo(baos);
|
||||
byte[] zipBytes = baos.toByteArray();
|
||||
|
||||
log.info("[trainingRun={}] Sending {} bytes to OCR service for sender training", runId, zipBytes.length);
|
||||
OcrClient.TrainingResult result = ocrClient.trainSenderModel(zipBytes, outputModelPath);
|
||||
|
||||
txTemplate.execute(status -> {
|
||||
SenderModel model = senderModelRepository.findByPersonId(personId)
|
||||
.orElseGet(() -> SenderModel.builder().personId(personId).build());
|
||||
model.setModelPath(outputModelPath);
|
||||
model.setCer(result.cer());
|
||||
model.setAccuracy(result.accuracy());
|
||||
model.setCorrectedLinesAtTraining(correctedLines);
|
||||
senderModelRepository.save(model);
|
||||
|
||||
run.setStatus(TrainingStatus.DONE);
|
||||
run.setCompletedAt(Instant.now());
|
||||
run.setCer(result.cer());
|
||||
run.setAccuracy(result.accuracy());
|
||||
run.setEpochs(result.epochs());
|
||||
trainingRunRepository.save(run);
|
||||
log.info("[trainingRun={}] Sender training completed — cer={}", runId, result.cer());
|
||||
return null;
|
||||
});
|
||||
} catch (Exception e) {
|
||||
txTemplate.execute(status -> {
|
||||
run.setStatus(TrainingStatus.FAILED);
|
||||
run.setErrorMessage(e.getMessage());
|
||||
run.setCompletedAt(Instant.now());
|
||||
trainingRunRepository.save(run);
|
||||
log.error("[trainingRun={}] Sender training failed: {}", runId, e.getMessage(), e);
|
||||
return null;
|
||||
});
|
||||
} finally {
|
||||
MDC.remove("trainingRunId");
|
||||
promoteNextQueuedRun();
|
||||
}
|
||||
}
|
||||
|
||||
private void promoteNextQueuedRun() {
|
||||
Optional<OcrTrainingRun> queuedOpt = txTemplate.execute(status ->
|
||||
trainingRunRepository.findFirstByStatusOrderByCreatedAtAsc(TrainingStatus.QUEUED)
|
||||
.map(queued -> {
|
||||
queued.setStatus(TrainingStatus.RUNNING);
|
||||
return trainingRunRepository.save(queued);
|
||||
}));
|
||||
|
||||
if (queuedOpt != null && queuedOpt.isPresent()) {
|
||||
OcrTrainingRun promoted = queuedOpt.get();
|
||||
log.info("Promoting queued sender training run {} for person {}", promoted.getId(), promoted.getPersonId());
|
||||
long freshCount = blockRepository.countManualKurrentBlocksByPerson(promoted.getPersonId());
|
||||
triggerSenderTraining(promoted.getPersonId(), (int) freshCount);
|
||||
}
|
||||
}
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
@Transactional
|
||||
public void recoverOrphanedRuns() {
|
||||
@@ -224,15 +331,4 @@ public class OcrTrainingService {
|
||||
});
|
||||
}
|
||||
|
||||
public Map<String, Object> buildTrainingInfoMap(TrainingInfoResponse info) {
|
||||
return Map.of(
|
||||
"availableBlocks", info.availableBlocks(),
|
||||
"totalOcrBlocks", info.totalOcrBlocks(),
|
||||
"availableDocuments", info.availableDocuments(),
|
||||
"availableSegBlocks", info.availableSegBlocks(),
|
||||
"ocrServiceAvailable", info.ocrServiceAvailable(),
|
||||
"lastRun", info.lastRun() != null ? info.lastRun() : Map.of(),
|
||||
"runs", info.runs()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user