feat(ocr): per-sender specialized Kurrent models with automatic active-learning retraining #263

Merged
marcel merged 32 commits from feat/issue-253-sender-models into main 2026-04-18 12:30:57 +02:00
2 changed files with 9 additions and 3 deletions
Showing only changes of commit 92f3c04d54 - Show all commits

View File

@@ -0,0 +1,3 @@
CREATE UNIQUE INDEX idx_training_runs_queued_per_person
ON ocr_training_runs(person_id)
WHERE status = 'QUEUED';

View File

@@ -2,6 +2,8 @@ package org.raddatz.familienarchiv.service;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.model.OcrTrainingRun;
import org.raddatz.familienarchiv.model.SenderModel;
import org.raddatz.familienarchiv.model.TrainingStatus;
@@ -19,6 +21,7 @@ import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
class SenderModelServiceTest {
SenderModelRepository senderModelRepository;
@@ -40,8 +43,9 @@ class SenderModelServiceTest {
txTemplate = mock(TransactionTemplate.class);
trainingDataExportService = mock(TrainingDataExportService.class);
// Execute transaction callbacks inline so unit tests run without a real DataSource
when(txTemplate.execute(any())).thenAnswer(inv -> {
// Execute transaction callbacks inline so unit tests run without a real DataSource.
// lenient: not every test hits the txTemplate path, but the setup is shared.
lenient().when(txTemplate.execute(any())).thenAnswer(inv -> {
TransactionCallback<?> callback = inv.getArgument(0);
return callback.doInTransaction(null);
});
@@ -265,7 +269,6 @@ class SenderModelServiceTest {
.modelName("sender_" + personId).build();
when(trainingRunRepository.findFirstByPersonIdAndStatus(personId, TrainingStatus.RUNNING))
.thenReturn(Optional.of(runningRun));
when(blockRepository.countManualKurrentBlocksByPerson(personId)).thenReturn(10L);
when(trainingRunRepository.save(any())).thenAnswer(inv -> {
OcrTrainingRun r = inv.getArgument(0);
if (r.getId() == null) r.setId(UUID.randomUUID());