From dd9c4d57eecfa7b0657bef13d3d0588670c8e8e0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 30 Apr 2026 10:15:25 +0200 Subject: [PATCH] fix(training): use KURRENT_RECOGNITION label for sender-based block queries scriptType is only set after OCR runs, which can't happen before we have a trained model. Both sender-based queries now filter on the training label instead, consistent with findEligibleKurrentBlocks. Also adds missing test coverage for findManualKurrentBlocksByPerson and countManualKurrentBlocksByPerson (4 cases + count parity check). Co-Authored-By: Claude Sonnet 4.6 --- .../TranscriptionBlockRepository.java | 4 +- .../repository/TrainingBlockQueryTest.java | 94 +++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java index e138cbe7..8e61e09d 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java @@ -65,7 +65,7 @@ public interface TranscriptionBlockRepository extends JpaRepository findManualKurrentBlocksByPerson(@Param("personId") UUID personId); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java index 57e9f3e1..c81186d7 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java @@ -24,6 +24,7 @@ class TrainingBlockQueryTest { @Autowired TranscriptionBlockRepository blockRepository; @Autowired DocumentRepository documentRepository; @Autowired AnnotationRepository annotationRepository; + @Autowired PersonRepository personRepository; private UUID kurrentDocId; private UUID typewriterDocId; @@ -111,6 +112,99 @@ class TrainingBlockQueryTest { assertThat(result).hasSize(2); } + // ─── sender-based queries ───────────────────────────────────────────────── + + @Test + void findManualKurrentBlocksByPerson_includesBlockFromKurrentLabelledDocument() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); + + assertThat(result).hasSize(1); + } + + @Test + void findManualKurrentBlocksByPerson_excludesDocumentWithoutKurrentLabel() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); + + assertThat(result).isEmpty(); + } + + @Test + void findManualKurrentBlocksByPerson_excludesOcrBlocks() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.OCR, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); + + assertThat(result).isEmpty(); + } + + @Test + void findManualKurrentBlocksByPerson_excludesOtherSender() { + Person karl = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Person anna = personRepository.save(Person.builder().firstName("Anna").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(karl) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(anna.getId()); + + assertThat(result).isEmpty(); + } + + @Test + void countManualKurrentBlocksByPerson_matchesFindResult() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, true)); + + long count = blockRepository.countManualKurrentBlocksByPerson(sender.getId()); + + assertThat(count).isEqualTo(2); + } + // ─── helpers ───────────────────────────────────────────────────────────── private DocumentAnnotation annotation(UUID docId) {