test(search): add DocumentSearchEnrichmentTest for findEnrichmentData native query

Tests lateral join best-block selection, chr(1)/chr(2) headline delimiters, sender/receiver/tag match flags, and null cases for missing relations. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-15 17:40:47 +02:00
parent 741eebc276
commit c235151075
2 changed files with 354 additions and 0 deletions
--- a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java
@@ -0,0 +1,303 @@
+package org.raddatz.familienarchiv.repository;
+
+import jakarta.persistence.EntityManager;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.raddatz.familienarchiv.PostgresContainerConfig;
+import org.raddatz.familienarchiv.config.FlywayConfig;
+import org.raddatz.familienarchiv.model.Document;
+import org.raddatz.familienarchiv.model.DocumentAnnotation;
+import org.raddatz.familienarchiv.model.DocumentStatus;
+import org.raddatz.familienarchiv.model.Person;
+import org.raddatz.familienarchiv.model.Tag;
+import org.raddatz.familienarchiv.model.TranscriptionBlock;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
+import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
+import org.springframework.context.annotation.Import;
+
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@DataJpaTest
+@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE)
+@Import({PostgresContainerConfig.class, FlywayConfig.class})
+class DocumentSearchEnrichmentTest {
+
+    @Autowired DocumentRepository documentRepository;
+    @Autowired PersonRepository personRepository;
+    @Autowired TagRepository tagRepository;
+    @Autowired AnnotationRepository annotationRepository;
+    @Autowired TranscriptionBlockRepository blockRepository;
+    @Autowired EntityManager em;
+
+    @BeforeEach
+    void setUp() {
+        blockRepository.deleteAll();
+        documentRepository.deleteAll();
+        personRepository.deleteAll();
+        tagRepository.deleteAll();
+    }
+
+    // ─── Lateral join: best transcription snippet ──────────────────────────────
+
+    @Test
+    void lateral_join_returns_highest_ranked_transcription_block() {
+        Document doc = documentRepository.saveAndFlush(document("Brief an Anna"));
+        UUID annotId = annotation(doc.getId());
+        // Three blocks — the one with two occurrences has highest rank
+        blockRepository.saveAndFlush(block(doc.getId(), annotId, "Das Wetter war schön", 0));
+        blockRepository.saveAndFlush(block(doc.getId(), annotId, "Brief Brief Brief", 1));  // highest rank for "Brief"
+        blockRepository.saveAndFlush(block(doc.getId(), annotId, "Ein Brief liegt vor", 2));  // one occurrence
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        String snippet = (String) rows.get(0)[2];
+        assertThat(snippet).isEqualTo("Brief Brief Brief");
+    }
+
+    @Test
+    void document_with_no_transcription_blocks_has_null_snippet() {
+        Document doc = documentRepository.saveAndFlush(document("Foto ohne Text"));
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Foto");
+
+        assertThat(rows).hasSize(1);
+        Object snippet = rows.get(0)[2];
+        assertThat(snippet).isNull();
+    }
+
+    @Test
+    void document_with_non_matching_blocks_has_null_snippet() {
+        Document doc = documentRepository.saveAndFlush(document("Dok"));
+        UUID annotId = annotation(doc.getId());
+        blockRepository.saveAndFlush(block(doc.getId(), annotId, "Kein Match hier", 0));
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        assertThat(rows.get(0)[2]).isNull();
+    }
+
+    // ─── Title headline: delimiter-based offset detection ─────────────────────
+
+    @Test
+    void title_headline_contains_delimiters_when_title_matches() {
+        Document doc = documentRepository.saveAndFlush(document("Brief an die Familie"));
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        String headline = (String) rows.get(0)[1];
+        // chr(1) marks the start of the highlighted term
+        assertThat(headline).contains("\u0001");
+        assertThat(headline).contains("\u0002");
+    }
+
+    @Test
+    void title_headline_has_no_delimiters_when_title_does_not_match() {
+        Document doc = documentRepository.saveAndFlush(document("Familienfoto"));
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        String headline = (String) rows.get(0)[1];
+        assertThat(headline).doesNotContain("\u0001");
+        assertThat(headline).doesNotContain("\u0002");
+    }
+
+    @Test
+    void title_headline_matches_stemmed_form() {
+        // "Brief" (singular, query) should match "Briefe" (plural, in title) via German FTS stemming.
+        // Both reduce to the stem "brief" under the Snowball German algorithm — verified by the
+        // existing should_find_document_by_stemmed_inflected_form test in DocumentFtsTest.
+        Document doc = documentRepository.saveAndFlush(document("Alte Briefe aus Berlin"));
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        String headline = (String) rows.get(0)[1];
+        assertThat(headline).contains("\u0001");
+    }
+
+    // ─── Sender match ──────────────────────────────────────────────────────────
+
+    @Test
+    void sender_matched_is_true_when_sender_last_name_matches_query() {
+        Person sender = personRepository.saveAndFlush(
+                Person.builder().firstName("Walter").lastName("Raddatz").build());
+        Document doc = documentRepository.saveAndFlush(Document.builder()
+                .title("Brief")
+                .originalFilename("brief.pdf")
+                .status(DocumentStatus.UPLOADED)
+                .sender(sender)
+                .build());
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Raddatz");
+
+        assertThat(rows).hasSize(1);
+        Boolean senderMatched = (Boolean) rows.get(0)[3];
+        assertThat(senderMatched).isTrue();
+    }
+
+    @Test
+    void sender_matched_is_false_when_sender_name_does_not_match() {
+        Person sender = personRepository.saveAndFlush(
+                Person.builder().firstName("Walter").lastName("Raddatz").build());
+        Document doc = documentRepository.saveAndFlush(Document.builder()
+                .title("Brief")
+                .originalFilename("brief.pdf")
+                .status(DocumentStatus.UPLOADED)
+                .sender(sender)
+                .build());
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Schmidt");
+
+        assertThat(rows).hasSize(1);
+        Boolean senderMatched = (Boolean) rows.get(0)[3];
+        assertThat(senderMatched).isFalse();
+    }
+
+    @Test
+    void sender_matched_is_false_when_document_has_no_sender() {
+        Document doc = documentRepository.saveAndFlush(document("Brief von unbekannt"));
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        Boolean senderMatched = (Boolean) rows.get(0)[3];
+        assertThat(senderMatched).isFalse();
+    }
+
+    // ─── Receiver match ────────────────────────────────────────────────────────
+
+    @Test
+    void matched_receiver_ids_contains_uuid_of_matching_receiver() {
+        Person receiver = personRepository.saveAndFlush(
+                Person.builder().firstName("Anna").lastName("Schmidt").build());
+        Document doc = documentRepository.saveAndFlush(Document.builder()
+                .title("Brief")
+                .originalFilename("brief.pdf")
+                .status(DocumentStatus.UPLOADED)
+                .receivers(Set.of(receiver))
+                .build());
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Schmidt");
+
+        assertThat(rows).hasSize(1);
+        String receiverIds = (String) rows.get(0)[4];
+        assertThat(receiverIds).contains(receiver.getId().toString());
+    }
+
+    @Test
+    void matched_receiver_ids_is_null_when_no_receiver_matches() {
+        Person receiver = personRepository.saveAndFlush(
+                Person.builder().firstName("Anna").lastName("Schmidt").build());
+        Document doc = documentRepository.saveAndFlush(Document.builder()
+                .title("Brief")
+                .originalFilename("brief.pdf")
+                .status(DocumentStatus.UPLOADED)
+                .receivers(Set.of(receiver))
+                .build());
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Raddatz");
+
+        assertThat(rows).hasSize(1);
+        assertThat(rows.get(0)[4]).isNull();
+    }
+
+    // ─── Tag match ─────────────────────────────────────────────────────────────
+
+    @Test
+    void matched_tag_ids_contains_uuid_of_matching_tag() {
+        Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build());
+        Document doc = documentRepository.saveAndFlush(Document.builder()
+                .title("Dokument")
+                .originalFilename("dok.pdf")
+                .status(DocumentStatus.UPLOADED)
+                .tags(Set.of(tag))
+                .build());
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Familiengeschichte");
+
+        assertThat(rows).hasSize(1);
+        String tagIds = (String) rows.get(0)[5];
+        assertThat(tagIds).contains(tag.getId().toString());
+    }
+
+    @Test
+    void matched_tag_ids_is_null_when_no_tag_matches() {
+        Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build());
+        Document doc = documentRepository.saveAndFlush(Document.builder()
+                .title("Dokument")
+                .originalFilename("dok.pdf")
+                .status(DocumentStatus.UPLOADED)
+                .tags(Set.of(tag))
+                .build());
+        em.flush();
+        em.clear();
+
+        List<Object[]> rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief");
+
+        assertThat(rows).hasSize(1);
+        assertThat(rows.get(0)[5]).isNull();
+    }
+
+    // ─── Helpers ───────────────────────────────────────────────────────────────
+
+    private Document document(String title) {
+        return Document.builder()
+                .title(title)
+                .originalFilename(title.replace(" ", "_") + ".pdf")
+                .status(DocumentStatus.UPLOADED)
+                .build();
+    }
+
+    private UUID annotation(UUID documentId) {
+        DocumentAnnotation ann = annotationRepository.save(DocumentAnnotation.builder()
+                .documentId(documentId)
+                .pageNumber(1)
+                .x(0.1).y(0.2).width(0.3).height(0.4)
+                .color("#00C7B1")
+                .build());
+        em.flush();
+        return ann.getId();
+    }
+
+    private TranscriptionBlock block(UUID documentId, UUID annotationId, String text, int order) {
+        return TranscriptionBlock.builder()
+                .documentId(documentId)
+                .annotationId(annotationId)
+                .text(text)
+                .sortOrder(order)
+                .build();
+    }
+}