From c905b81fd3fbcca66fd713e5cf34887be07a2e12 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 30 Apr 2026 10:15:25 +0200 Subject: [PATCH 1/2] fix(training): use KURRENT_RECOGNITION label for sender-based block queries scriptType is only set after OCR runs, which can't happen before we have a trained model. Both sender-based queries now filter on the training label instead, consistent with findEligibleKurrentBlocks. Also adds missing test coverage for findManualKurrentBlocksByPerson and countManualKurrentBlocksByPerson (4 cases + count parity check). Co-Authored-By: Claude Sonnet 4.6 --- .../TranscriptionBlockRepository.java | 4 +- .../repository/TrainingBlockQueryTest.java | 94 +++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java index e138cbe7..8e61e09d 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java @@ -65,7 +65,7 @@ public interface TranscriptionBlockRepository extends JpaRepository findManualKurrentBlocksByPerson(@Param("personId") UUID personId); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java index 57e9f3e1..c81186d7 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java @@ -24,6 +24,7 @@ class TrainingBlockQueryTest { @Autowired TranscriptionBlockRepository blockRepository; @Autowired DocumentRepository documentRepository; @Autowired AnnotationRepository annotationRepository; + @Autowired PersonRepository personRepository; private UUID kurrentDocId; private UUID typewriterDocId; @@ -111,6 +112,99 @@ class TrainingBlockQueryTest { assertThat(result).hasSize(2); } + // ─── sender-based queries ───────────────────────────────────────────────── + + @Test + void findManualKurrentBlocksByPerson_includesBlockFromKurrentLabelledDocument() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); + + assertThat(result).hasSize(1); + } + + @Test + void findManualKurrentBlocksByPerson_excludesDocumentWithoutKurrentLabel() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); + + assertThat(result).isEmpty(); + } + + @Test + void findManualKurrentBlocksByPerson_excludesOcrBlocks() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.OCR, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); + + assertThat(result).isEmpty(); + } + + @Test + void findManualKurrentBlocksByPerson_excludesOtherSender() { + Person karl = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Person anna = personRepository.save(Person.builder().firstName("Anna").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(karl) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + + List result = blockRepository.findManualKurrentBlocksByPerson(anna.getId()); + + assertThat(result).isEmpty(); + } + + @Test + void countManualKurrentBlocksByPerson_matchesFindResult() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + Document doc = documentRepository.save(Document.builder() + .title("Brief von Karl") + .originalFilename("karl.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .build()); + UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); + blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, true)); + + long count = blockRepository.countManualKurrentBlocksByPerson(sender.getId()); + + assertThat(count).isEqualTo(2); + } + // ─── helpers ───────────────────────────────────────────────────────────── private DocumentAnnotation annotation(UUID docId) { -- 2.49.1 From 6399321d0e9920f0b2dbed55285c6d6547a80d62 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 4 May 2026 15:12:38 +0200 Subject: [PATCH 2/2] test(training): strengthen TrainingBlockQueryTest assertions Co-Authored-By: Claude Sonnet 4.6 --- .../repository/TrainingBlockQueryTest.java | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java index c81186d7..8d7fc344 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/TrainingBlockQueryTest.java @@ -11,6 +11,7 @@ import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabas import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; import org.springframework.context.annotation.Import; +import java.util.HashSet; import java.util.List; import java.util.UUID; @@ -37,7 +38,7 @@ class TrainingBlockQueryTest { .title("Kurrent Brief") .originalFilename("kurrent.pdf") .status(DocumentStatus.UPLOADED) - .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) .build()); kurrentDocId = kurrentDoc.getId(); @@ -122,7 +123,7 @@ class TrainingBlockQueryTest { .originalFilename("karl.pdf") .status(DocumentStatus.UPLOADED) .sender(sender) - .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) .build()); UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); @@ -157,7 +158,7 @@ class TrainingBlockQueryTest { .originalFilename("karl.pdf") .status(DocumentStatus.UPLOADED) .sender(sender) - .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) .build()); UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); blockRepository.save(block(doc.getId(), annId, BlockSource.OCR, false)); @@ -176,7 +177,7 @@ class TrainingBlockQueryTest { .originalFilename("karl.pdf") .status(DocumentStatus.UPLOADED) .sender(karl) - .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) .build()); UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); @@ -194,15 +195,25 @@ class TrainingBlockQueryTest { .originalFilename("karl.pdf") .status(DocumentStatus.UPLOADED) .sender(sender) - .trainingLabels(new java.util.HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) + .trainingLabels(new HashSet<>(Set.of(TrainingLabel.KURRENT_RECOGNITION))) .build()); UUID annId = annotationRepository.save(annotation(doc.getId())).getId(); blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, false)); blockRepository.save(block(doc.getId(), annId, BlockSource.MANUAL, true)); long count = blockRepository.countManualKurrentBlocksByPerson(sender.getId()); + List found = blockRepository.findManualKurrentBlocksByPerson(sender.getId()); - assertThat(count).isEqualTo(2); + assertThat(count).isEqualTo(found.size()); + } + + @Test + void countManualKurrentBlocksByPerson_returnsZeroWhenNoBlocksMatch() { + Person sender = personRepository.save(Person.builder().firstName("Karl").lastName("Test").build()); + + long count = blockRepository.countManualKurrentBlocksByPerson(sender.getId()); + + assertThat(count).isZero(); } // ─── helpers ───────────────────────────────────────────────────────────── -- 2.49.1