fix(training): use KURRENT_RECOGNITION label for sender-based block queries

scriptType is only set after OCR runs, which can't happen before we have
a trained model. Both sender-based queries now filter on the training label
instead, consistent with findEligibleKurrentBlocks.

Also adds missing test coverage for findManualKurrentBlocksByPerson and
countManualKurrentBlocksByPerson (4 cases + count parity check).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-30 10:15:25 +02:00
committed by marcel
parent aae005d5e6
commit dd9c4d57ee
2 changed files with 96 additions and 2 deletions

View File

@@ -65,7 +65,7 @@ public interface TranscriptionBlockRepository extends JpaRepository<Transcriptio
JOIN Document d ON d.id = b.documentId
WHERE b.source = 'MANUAL'
AND d.sender.id = :personId
AND d.scriptType = 'HANDWRITING_KURRENT'
AND 'KURRENT_RECOGNITION' MEMBER OF d.trainingLabels
""")
long countManualKurrentBlocksByPerson(@Param("personId") UUID personId);
@@ -74,7 +74,7 @@ public interface TranscriptionBlockRepository extends JpaRepository<Transcriptio
JOIN Document d ON d.id = b.documentId
WHERE b.source = 'MANUAL'
AND d.sender.id = :personId
AND d.scriptType = 'HANDWRITING_KURRENT'
AND 'KURRENT_RECOGNITION' MEMBER OF d.trainingLabels
""")
List<TranscriptionBlock> findManualKurrentBlocksByPerson(@Param("personId") UUID personId);
}

View File

@@ -24,6 +24,7 @@ class TrainingBlockQueryTest {
@Autowired TranscriptionBlockRepository blockRepository;
@Autowired DocumentRepository documentRepository;
@Autowired AnnotationRepository annotationRepository;
@Autowired PersonRepository personRepository;
private UUID kurrentDocId;
private UUID typewriterDocId;
@@ -111,6 +112,99 @@ class TrainingBlockQueryTest {
assertThat(result).hasSize(2);
}
// ─── sender-based queries ─────────────────────────────────────────────────
@Test
void findManualKurrentBlocksByPerson_includesBlockFromKurrentLabelledDocument() {
Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build());
Document doc = documentRepository.save(Document.builder()
.title("Brief von Karl")
.originalFilename("karl.pdf")
.status(DocumentStatus.UPLOADED)
.sender(sender)
.trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
.build());
UUID annId = annotationRepository.save(annotation(doc.getId())).getId();
blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false));
List<TranscriptionBlock> result = blockRepository.findManualKurrentBlocksByPerson(sender.getId());
assertThat(result).hasSize(1);
}
@Test
void findManualKurrentBlocksByPerson_excludesDocumentWithoutKurrentLabel() {
Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build());
Document doc = documentRepository.save(Document.builder()
.title("Brief von Karl")
.originalFilename("karl.pdf")
.status(DocumentStatus.UPLOADED)
.sender(sender)
.build());
UUID annId = annotationRepository.save(annotation(doc.getId())).getId();
blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false));
List<TranscriptionBlock> result = blockRepository.findManualKurrentBlocksByPerson(sender.getId());
assertThat(result).isEmpty();
}
@Test
void findManualKurrentBlocksByPerson_excludesOcrBlocks() {
Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build());
Document doc = documentRepository.save(Document.builder()
.title("Brief von Karl")
.originalFilename("karl.pdf")
.status(DocumentStatus.UPLOADED)
.sender(sender)
.trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
.build());
UUID annId = annotationRepository.save(annotation(doc.getId())).getId();
blockRepository.save(block(doc.getId(), annId, BlockSource.OCR, false));
List<TranscriptionBlock> result = blockRepository.findManualKurrentBlocksByPerson(sender.getId());
assertThat(result).isEmpty();
}
@Test
void findManualKurrentBlocksByPerson_excludesOtherSender() {
Person karl = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build());
Person anna = personRepository.save(Person.builder().firstName("Anna").lastName("Test").build());
Document doc = documentRepository.save(Document.builder()
.title("Brief von Karl")
.originalFilename("karl.pdf")
.status(DocumentStatus.UPLOADED)
.sender(karl)
.trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
.build());
UUID annId = annotationRepository.save(annotation(doc.getId())).getId();
blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false));
List<TranscriptionBlock> result = blockRepository.findManualKurrentBlocksByPerson(anna.getId());
assertThat(result).isEmpty();
}
@Test
void countManualKurrentBlocksByPerson_matchesFindResult() {
Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build());
Document doc = documentRepository.save(Document.builder()
.title("Brief von Karl")
.originalFilename("karl.pdf")
.status(DocumentStatus.UPLOADED)
.sender(sender)
.trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION)))
.build());
UUID annId = annotationRepository.save(annotation(doc.getId())).getId();
blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false));
blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, true));
long count = blockRepository.countManualKurrentBlocksByPerson(sender.getId());
assertThat(count).isEqualTo(2);
}
// ─── helpers ─────────────────────────────────────────────────────────────
private DocumentAnnotation annotation(UUID docId) {