fix(ocr): eliminate race window in runOrQueueSenderTraining by creating RUNNING row atomically
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,8 @@ public interface OcrTrainingRunRepository extends JpaRepository<OcrTrainingRun,
|
||||
|
||||
Optional<OcrTrainingRun> findFirstByStatusOrderByCreatedAtAsc(TrainingStatus status);
|
||||
|
||||
Optional<OcrTrainingRun> findFirstByPersonIdAndStatus(UUID personId, TrainingStatus status);
|
||||
|
||||
boolean existsByPersonIdAndStatus(UUID personId, TrainingStatus status);
|
||||
|
||||
List<OcrTrainingRun> findTop20ByOrderByCreatedAtDesc();
|
||||
|
||||
@@ -72,9 +72,10 @@ public class SenderModelService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a QUEUED row when training is already running (coalescing: at most one QUEUED per
|
||||
* person_id). Runs immediately if idle. Returns true if training should start now, false if
|
||||
* queued or skipped.
|
||||
* Atomically checks the queue state and either creates a RUNNING row (returns true) or a
|
||||
* QUEUED row (returns false). All three operations — idle check, duplicate-queue guard, and
|
||||
* RUNNING row creation — happen in one transaction, eliminating the race window that would
|
||||
* otherwise exist between the check and a separate RUNNING row creation.
|
||||
*/
|
||||
@Transactional
|
||||
public boolean runOrQueueSenderTraining(UUID personId, int correctedLines) {
|
||||
@@ -96,23 +97,29 @@ public class SenderModelService {
|
||||
return false;
|
||||
}
|
||||
|
||||
long blockCount = blockRepository.countManualKurrentBlocksByPerson(personId);
|
||||
trainingRunRepository.save(OcrTrainingRun.builder()
|
||||
.status(TrainingStatus.RUNNING)
|
||||
.personId(personId)
|
||||
.blockCount((int) blockCount)
|
||||
.documentCount(0)
|
||||
.modelName("sender_" + personId)
|
||||
.build());
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Executes sender training synchronously. Caller must run this on a background thread. */
|
||||
/**
|
||||
* Executes sender training synchronously. Caller must run this on a background thread.
|
||||
* The RUNNING row is expected to already exist — created atomically by
|
||||
* runOrQueueSenderTraining (for new runs) or by promoteNextQueuedRun (for promoted runs).
|
||||
*/
|
||||
public void triggerSenderTraining(UUID personId, int correctedLines) {
|
||||
String outputModelPath = "/app/models/sender_" + personId + ".mlmodel";
|
||||
|
||||
OcrTrainingRun run = Objects.requireNonNull(txTemplate.execute(status -> {
|
||||
long blockCount = blockRepository.countManualKurrentBlocksByPerson(personId);
|
||||
return trainingRunRepository.save(OcrTrainingRun.builder()
|
||||
.status(TrainingStatus.RUNNING)
|
||||
.personId(personId)
|
||||
.blockCount((int) blockCount)
|
||||
.documentCount(0)
|
||||
.modelName("sender_" + personId)
|
||||
.build());
|
||||
}));
|
||||
OcrTrainingRun run = Objects.requireNonNull(txTemplate.execute(status ->
|
||||
trainingRunRepository.findFirstByPersonIdAndStatus(personId, TrainingStatus.RUNNING)
|
||||
.orElseThrow(() -> new IllegalStateException(
|
||||
"Expected RUNNING row for person " + personId + " but none found"))));
|
||||
|
||||
String runId = run.getId().toString();
|
||||
MDC.put("trainingRunId", runId);
|
||||
|
||||
Reference in New Issue
Block a user