From 47da0fa21675649178b2bf0bd155ade9ab0a7891 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 15:30:53 +0200 Subject: [PATCH 01/20] feat(search): add MatchOffset record for character-level highlight positions Co-Authored-By: Claude Sonnet 4.6 --- .../familienarchiv/dto/MatchOffset.java | 14 ++++++++++++ .../familienarchiv/dto/MatchOffsetTest.java | 22 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java new file mode 100644 index 00000000..65d72918 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java @@ -0,0 +1,14 @@ +package org.raddatz.familienarchiv.dto; + +import io.swagger.v3.oas.annotations.media.Schema; + +/** + * Character-level offset of a highlighted term within a text field. + * Offsets are Java {@code String} character positions (UTF-16 code units), + * which are identical to JavaScript string positions — consistent end-to-end + * for all German BMP characters (ä, ö, ü, ß, etc.). + */ +public record MatchOffset( + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int start, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int length +) {} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java b/backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java new file mode 100644 index 00000000..2021a640 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java @@ -0,0 +1,22 @@ +package org.raddatz.familienarchiv.dto; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class MatchOffsetTest { + + @Test + void should_hold_start_and_length() { + MatchOffset offset = new MatchOffset(6, 5); + + assertThat(offset.start()).isEqualTo(6); + assertThat(offset.length()).isEqualTo(5); + } + + @Test + void should_implement_value_equality() { + assertThat(new MatchOffset(0, 3)).isEqualTo(new MatchOffset(0, 3)); + assertThat(new MatchOffset(0, 3)).isNotEqualTo(new MatchOffset(0, 4)); + } +} -- 2.49.1 From 8cbecd452bd20e2045ecfb5e58e331d08ef28512 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 15:32:26 +0200 Subject: [PATCH 02/20] feat(search): add SearchMatchData record for per-document match signals Co-Authored-By: Claude Sonnet 4.6 --- .../familienarchiv/dto/SearchMatchData.java | 48 +++++++++++++++++++ .../dto/SearchMatchDataTest.java | 44 +++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java new file mode 100644 index 00000000..7cd06d85 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java @@ -0,0 +1,48 @@ +package org.raddatz.familienarchiv.dto; + +import io.swagger.v3.oas.annotations.media.Schema; + +import java.util.List; +import java.util.UUID; + +/** + * Match signals for a single document in a full-text search result. + * All fields are non-null except {@code transcriptionSnippet}, which is null + * when no transcription block matched the query. + */ +public record SearchMatchData( + /** + * Best-ranked matching transcription line, or null if no block matched. + */ + String transcriptionSnippet, + + /** + * Character offsets of highlighted terms within the document title. + * Empty when the title did not contribute to the match. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List titleOffsets, + + /** + * True when the sender's name matched the query. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + boolean senderMatched, + + /** + * IDs of receiver persons whose names matched the query. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List matchedReceiverIds, + + /** + * IDs of tags whose names matched the query. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List matchedTagIds +) { + /** Canonical "no match data" value for a single document. */ + public static SearchMatchData empty() { + return new SearchMatchData(null, List.of(), false, List.of(), List.of()); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java b/backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java new file mode 100644 index 00000000..8135aafa --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java @@ -0,0 +1,44 @@ +package org.raddatz.familienarchiv.dto; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +class SearchMatchDataTest { + + @Test + void transcription_snippet_is_nullable() { + SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of()); + + assertThat(data.transcriptionSnippet()).isNull(); + } + + @Test + void non_null_list_fields_are_empty_by_default_in_empty_factory() { + SearchMatchData data = SearchMatchData.empty(); + + assertThat(data.transcriptionSnippet()).isNull(); + assertThat(data.titleOffsets()).isEmpty(); + assertThat(data.matchedReceiverIds()).isEmpty(); + assertThat(data.matchedTagIds()).isEmpty(); + assertThat(data.senderMatched()).isFalse(); + } + + @Test + void holds_all_field_values() { + MatchOffset offset = new MatchOffset(0, 4); + SearchMatchData data = new SearchMatchData( + "schreibt dir aus dem Feld", + List.of(offset), + true, + List.of(), + List.of() + ); + + assertThat(data.transcriptionSnippet()).isEqualTo("schreibt dir aus dem Feld"); + assertThat(data.titleOffsets()).containsExactly(offset); + assertThat(data.senderMatched()).isTrue(); + } +} -- 2.49.1 From 003d68ed21e32ca2f0fb6018ae87ef01acce237f Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 15:34:00 +0200 Subject: [PATCH 03/20] feat(search): add DocumentSearchResult.withMatchData() factory with match overlay map Co-Authored-By: Claude Sonnet 4.6 --- .../dto/DocumentSearchResult.java | 23 ++++++-- .../dto/DocumentSearchResultTest.java | 53 +++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java index a0c4af45..b7b59d68 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java @@ -1,16 +1,33 @@ package org.raddatz.familienarchiv.dto; +import io.swagger.v3.oas.annotations.media.Schema; import org.raddatz.familienarchiv.model.Document; import java.util.List; +import java.util.Map; +import java.util.UUID; -public record DocumentSearchResult(List documents, long total) { +public record DocumentSearchResult( + List documents, + long total, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + Map matchData +) { /** - * Creates a result where total equals the list size. + * Creates a fully-enriched result from documents and their match overlay data. + * Absent map entries (e.g. document deleted between FTS and enrichment) are safe — + * the frontend treats a missing entry as "no match data". + */ + public static DocumentSearchResult withMatchData(List documents, Map matchData) { + return new DocumentSearchResult(documents, documents.size(), matchData); + } + + /** + * Creates a result without match data — used for filter-only searches (no text query). * No pagination yet — the full matched set is always returned. * When pagination is added, total must come from a DB COUNT query, not list.size(). */ public static DocumentSearchResult of(List documents) { - return new DocumentSearchResult(documents, documents.size()); + return withMatchData(documents, Map.of()); } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java b/backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java new file mode 100644 index 00000000..3802daf9 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java @@ -0,0 +1,53 @@ +package org.raddatz.familienarchiv.dto; + +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.model.Document; +import org.raddatz.familienarchiv.model.DocumentStatus; + +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +class DocumentSearchResultTest { + + private Document doc(UUID id) { + return Document.builder() + .id(id) + .title("Test") + .originalFilename("test.pdf") + .status(DocumentStatus.UPLOADED) + .build(); + } + + @Test + void withMatchData_total_equals_list_size() { + UUID id = UUID.randomUUID(); + List docs = List.of(doc(id)); + Map matchData = Map.of(id, SearchMatchData.empty()); + + DocumentSearchResult result = DocumentSearchResult.withMatchData(docs, matchData); + + assertThat(result.total()).isEqualTo(1L); + } + + @Test + void withMatchData_exposes_match_data_map() { + UUID id = UUID.randomUUID(); + SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of()); + DocumentSearchResult result = DocumentSearchResult.withMatchData(List.of(doc(id)), Map.of(id, data)); + + assertThat(result.matchData()).containsKey(id); + assertThat(result.matchData().get(id).transcriptionSnippet()).isEqualTo("snippet"); + } + + @Test + void of_factory_returns_empty_match_data() { + UUID id = UUID.randomUUID(); + DocumentSearchResult result = DocumentSearchResult.of(List.of(doc(id))); + + assertThat(result.matchData()).isEmpty(); + assertThat(result.total()).isEqualTo(1L); + } +} -- 2.49.1 From 8526e6c0a13c371fff696eec6b69e5c328c050a3 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 17:40:47 +0200 Subject: [PATCH 04/20] test(search): add DocumentSearchEnrichmentTest for findEnrichmentData native query Tests lateral join best-block selection, chr(1)/chr(2) headline delimiters, sender/receiver/tag match flags, and null cases for missing relations. Co-Authored-By: Claude Sonnet 4.6 --- .../repository/DocumentRepository.java | 51 +++ .../DocumentSearchEnrichmentTest.java | 303 ++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java index 3a183ded..f256025a 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java @@ -89,4 +89,55 @@ public interface DocumentRepository extends JpaRepository, JpaSp """) List findRankedIdsByFts(@Param("query") String query); + /** + * Returns match-enrichment data for a set of documents identified by their IDs. + * Each row contains (in column order): + *
    + *
  1. UUID — document id
  2. + *
  3. String — title headline with \x01/\x02 delimiters around matched terms
  4. + *
  5. String — best-ranked matching transcription block text, or null
  6. + *
  7. Boolean — whether the sender's name matched the query
  8. + *
  9. String — comma-separated matched receiver UUIDs, or null
  10. + *
  11. String — comma-separated matched tag UUIDs, or null
  12. + *
+ * Short-circuit before calling this method when {@code ids} is empty or {@code query} is blank. + */ + @Query(nativeQuery = true, value = """ + SELECT + d.id, + ts_headline('german', d.title, websearch_to_tsquery('german', :query), + 'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',HighlightAll=true') + AS title_headline, + best_block.text AS transcription_snippet, + (s.id IS NOT NULL AND + to_tsvector('german', COALESCE(s.first_name, '') || ' ' || COALESCE(s.last_name, '')) + @@ websearch_to_tsquery('german', :query)) + AS sender_matched, + (SELECT string_agg(r.id::text, ',') + FROM document_receivers dr + JOIN persons r ON r.id = dr.person_id + WHERE dr.document_id = d.id + AND to_tsvector('german', COALESCE(r.first_name, '') || ' ' || r.last_name) + @@ websearch_to_tsquery('german', :query) + ) AS matched_receiver_ids, + (SELECT string_agg(t.id::text, ',') + FROM document_tags dt + JOIN tag t ON t.id = dt.tag_id + WHERE dt.document_id = d.id + AND to_tsvector('german', t.name) @@ websearch_to_tsquery('german', :query) + ) AS matched_tag_ids + FROM documents d + LEFT JOIN persons s ON s.id = d.sender_id + LEFT JOIN LATERAL ( + SELECT tb.text + FROM transcription_blocks tb + WHERE tb.document_id = d.id + AND to_tsvector('german', tb.text) @@ websearch_to_tsquery('german', :query) + ORDER BY ts_rank(to_tsvector('german', tb.text), websearch_to_tsquery('german', :query)) DESC + LIMIT 1 + ) best_block ON true + WHERE d.id IN :ids + """) + List findEnrichmentData(@Param("ids") Collection ids, @Param("query") String query); + } \ No newline at end of file diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java new file mode 100644 index 00000000..4454c741 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java @@ -0,0 +1,303 @@ +package org.raddatz.familienarchiv.repository; + +import jakarta.persistence.EntityManager; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.raddatz.familienarchiv.config.FlywayConfig; +import org.raddatz.familienarchiv.model.Document; +import org.raddatz.familienarchiv.model.DocumentAnnotation; +import org.raddatz.familienarchiv.model.DocumentStatus; +import org.raddatz.familienarchiv.model.Person; +import org.raddatz.familienarchiv.model.Tag; +import org.raddatz.familienarchiv.model.TranscriptionBlock; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; +import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase; +import org.springframework.context.annotation.Import; + +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +@DataJpaTest +@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) +@Import({PostgresContainerConfig.class, FlywayConfig.class}) +class DocumentSearchEnrichmentTest { + + @Autowired DocumentRepository documentRepository; + @Autowired PersonRepository personRepository; + @Autowired TagRepository tagRepository; + @Autowired AnnotationRepository annotationRepository; + @Autowired TranscriptionBlockRepository blockRepository; + @Autowired EntityManager em; + + @BeforeEach + void setUp() { + blockRepository.deleteAll(); + documentRepository.deleteAll(); + personRepository.deleteAll(); + tagRepository.deleteAll(); + } + + // ─── Lateral join: best transcription snippet ────────────────────────────── + + @Test + void lateral_join_returns_highest_ranked_transcription_block() { + Document doc = documentRepository.saveAndFlush(document("Brief an Anna")); + UUID annotId = annotation(doc.getId()); + // Three blocks — the one with two occurrences has highest rank + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Das Wetter war schön", 0)); + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Brief Brief Brief", 1)); // highest rank for "Brief" + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Ein Brief liegt vor", 2)); // one occurrence + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String snippet = (String) rows.get(0)[2]; + assertThat(snippet).isEqualTo("Brief Brief Brief"); + } + + @Test + void document_with_no_transcription_blocks_has_null_snippet() { + Document doc = documentRepository.saveAndFlush(document("Foto ohne Text")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Foto"); + + assertThat(rows).hasSize(1); + Object snippet = rows.get(0)[2]; + assertThat(snippet).isNull(); + } + + @Test + void document_with_non_matching_blocks_has_null_snippet() { + Document doc = documentRepository.saveAndFlush(document("Dok")); + UUID annotId = annotation(doc.getId()); + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Kein Match hier", 0)); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + assertThat(rows.get(0)[2]).isNull(); + } + + // ─── Title headline: delimiter-based offset detection ───────────────────── + + @Test + void title_headline_contains_delimiters_when_title_matches() { + Document doc = documentRepository.saveAndFlush(document("Brief an die Familie")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String headline = (String) rows.get(0)[1]; + // chr(1) marks the start of the highlighted term + assertThat(headline).contains("\u0001"); + assertThat(headline).contains("\u0002"); + } + + @Test + void title_headline_has_no_delimiters_when_title_does_not_match() { + Document doc = documentRepository.saveAndFlush(document("Familienfoto")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String headline = (String) rows.get(0)[1]; + assertThat(headline).doesNotContain("\u0001"); + assertThat(headline).doesNotContain("\u0002"); + } + + @Test + void title_headline_matches_stemmed_form() { + // "Brief" (singular, query) should match "Briefe" (plural, in title) via German FTS stemming. + // Both reduce to the stem "brief" under the Snowball German algorithm — verified by the + // existing should_find_document_by_stemmed_inflected_form test in DocumentFtsTest. + Document doc = documentRepository.saveAndFlush(document("Alte Briefe aus Berlin")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String headline = (String) rows.get(0)[1]; + assertThat(headline).contains("\u0001"); + } + + // ─── Sender match ────────────────────────────────────────────────────────── + + @Test + void sender_matched_is_true_when_sender_last_name_matches_query() { + Person sender = personRepository.saveAndFlush( + Person.builder().firstName("Walter").lastName("Raddatz").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Raddatz"); + + assertThat(rows).hasSize(1); + Boolean senderMatched = (Boolean) rows.get(0)[3]; + assertThat(senderMatched).isTrue(); + } + + @Test + void sender_matched_is_false_when_sender_name_does_not_match() { + Person sender = personRepository.saveAndFlush( + Person.builder().firstName("Walter").lastName("Raddatz").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Schmidt"); + + assertThat(rows).hasSize(1); + Boolean senderMatched = (Boolean) rows.get(0)[3]; + assertThat(senderMatched).isFalse(); + } + + @Test + void sender_matched_is_false_when_document_has_no_sender() { + Document doc = documentRepository.saveAndFlush(document("Brief von unbekannt")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + Boolean senderMatched = (Boolean) rows.get(0)[3]; + assertThat(senderMatched).isFalse(); + } + + // ─── Receiver match ──────────────────────────────────────────────────────── + + @Test + void matched_receiver_ids_contains_uuid_of_matching_receiver() { + Person receiver = personRepository.saveAndFlush( + Person.builder().firstName("Anna").lastName("Schmidt").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .receivers(Set.of(receiver)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Schmidt"); + + assertThat(rows).hasSize(1); + String receiverIds = (String) rows.get(0)[4]; + assertThat(receiverIds).contains(receiver.getId().toString()); + } + + @Test + void matched_receiver_ids_is_null_when_no_receiver_matches() { + Person receiver = personRepository.saveAndFlush( + Person.builder().firstName("Anna").lastName("Schmidt").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .receivers(Set.of(receiver)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Raddatz"); + + assertThat(rows).hasSize(1); + assertThat(rows.get(0)[4]).isNull(); + } + + // ─── Tag match ───────────────────────────────────────────────────────────── + + @Test + void matched_tag_ids_contains_uuid_of_matching_tag() { + Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Dokument") + .originalFilename("dok.pdf") + .status(DocumentStatus.UPLOADED) + .tags(Set.of(tag)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Familiengeschichte"); + + assertThat(rows).hasSize(1); + String tagIds = (String) rows.get(0)[5]; + assertThat(tagIds).contains(tag.getId().toString()); + } + + @Test + void matched_tag_ids_is_null_when_no_tag_matches() { + Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Dokument") + .originalFilename("dok.pdf") + .status(DocumentStatus.UPLOADED) + .tags(Set.of(tag)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + assertThat(rows.get(0)[5]).isNull(); + } + + // ─── Helpers ─────────────────────────────────────────────────────────────── + + private Document document(String title) { + return Document.builder() + .title(title) + .originalFilename(title.replace(" ", "_") + ".pdf") + .status(DocumentStatus.UPLOADED) + .build(); + } + + private UUID annotation(UUID documentId) { + DocumentAnnotation ann = annotationRepository.save(DocumentAnnotation.builder() + .documentId(documentId) + .pageNumber(1) + .x(0.1).y(0.2).width(0.3).height(0.4) + .color("#00C7B1") + .build()); + em.flush(); + return ann.getId(); + } + + private TranscriptionBlock block(UUID documentId, UUID annotationId, String text, int order) { + return TranscriptionBlock.builder() + .documentId(documentId) + .annotationId(annotationId) + .text(text) + .sortOrder(order) + .build(); + } +} -- 2.49.1 From 8c7ce147a00b17619ae6e5ce2d6f2be5ac1a115a Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 17:53:57 +0200 Subject: [PATCH 05/20] feat(search): enrich searchDocuments with per-document match data DocumentService.searchDocuments now returns DocumentSearchResult with matchData populated from findEnrichmentData. Title highlights are parsed from chr(1)/chr(2) delimiters into MatchOffset lists; transcription snippet and sender/receiver/tag match flags are extracted from the same native SQL row. Co-Authored-By: Claude Sonnet 4.6 --- .../controller/DocumentController.java | 3 +- .../service/DocumentService.java | 87 +++++++++++++++++-- .../controller/DocumentControllerTest.java | 7 +- .../service/DocumentServiceSortTest.java | 15 ++-- .../service/DocumentServiceTest.java | 72 +++++++++++++-- 5 files changed, 157 insertions(+), 27 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java index 0bff2476..91e3c250 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java @@ -208,8 +208,7 @@ public class DocumentController { if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) { throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC"); } - List results = documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir); - return ResponseEntity.ok(DocumentSearchResult.of(results)); + return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir)); } // --- TRAINING LABELS --- diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index 46d4a4de..a028a3a5 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -3,10 +3,13 @@ package org.raddatz.familienarchiv.service; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; +import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; +import org.raddatz.familienarchiv.dto.MatchOffset; +import org.raddatz.familienarchiv.dto.SearchMatchData; import org.raddatz.familienarchiv.model.Document; -import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.ScriptType; import org.raddatz.familienarchiv.model.TrainingLabel; @@ -290,13 +293,13 @@ public class DocumentService { } // 1. Allgemeine Suche (für das Suchfeld im Frontend) - public List searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { + public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { boolean hasText = StringUtils.hasText(text); List rankedIds = null; if (hasText) { rankedIds = documentRepository.findRankedIdsByFts(text); - if (rankedIds.isEmpty()) return List.of(); + if (rankedIds.isEmpty()) return DocumentSearchResult.withMatchData(List.of(), Map.of()); } Specification textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null; @@ -312,11 +315,13 @@ public class DocumentService { // generates an INNER JOIN that silently drops documents with null sender/receivers. if (sort == DocumentSort.RECEIVER) { List results = documentRepository.findAll(spec); - return sortByFirstReceiver(results, dir); + List sorted = sortByFirstReceiver(results, dir); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } if (sort == DocumentSort.SENDER) { List results = documentRepository.findAll(spec); - return sortBySender(results, dir); + List sorted = sortBySender(results, dir); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } // RELEVANCE: default when text present and no explicit sort given @@ -325,14 +330,16 @@ public class DocumentService { List results = documentRepository.findAll(spec); Map rankMap = new HashMap<>(); for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i); - return results.stream() + List sorted = results.stream() .sorted(Comparator.comparingInt( doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE))) .toList(); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } Sort springSort = resolveSort(sort, dir); - return documentRepository.findAll(spec, springSort); + List results = documentRepository.findAll(spec, springSort); + return DocumentSearchResult.withMatchData(results, enrichWithMatchData(results, text)); } private Sort resolveSort(DocumentSort sort, String dir) { @@ -584,6 +591,72 @@ public class DocumentService { return null; } + /** + * Calls {@code findEnrichmentData} and converts the raw Object[] rows into a + * {@link SearchMatchData} per document. Short-circuits when the list is empty or + * the query is blank (no text search active). + */ + private Map enrichWithMatchData(List docs, String query) { + if (docs.isEmpty() || !StringUtils.hasText(query)) return Map.of(); + List ids = docs.stream().map(Document::getId).toList(); + Map result = new HashMap<>(); + for (Object[] row : documentRepository.findEnrichmentData(ids, query)) { + UUID docId = (UUID) row[0]; + String titleHeadline = (String) row[1]; + String transcriptionSnippet = (String) row[2]; + Boolean senderMatched = (Boolean) row[3]; + String receiverIdsStr = (String) row[4]; + String tagIdsStr = (String) row[5]; + result.put(docId, new SearchMatchData( + transcriptionSnippet, + parseTitleOffsets(titleHeadline), + senderMatched != null && senderMatched, + parseUUIDs(receiverIdsStr), + parseUUIDs(tagIdsStr) + )); + } + return result; + } + + /** + * Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as + * start/stop delimiters and converts each delimited span into a {@link MatchOffset} + * whose {@code start} and {@code length} are positions in the clean text + * (delimiters stripped). These values align with JavaScript {@code String} indexing. + */ + private static List parseTitleOffsets(String headline) { + if (headline == null) return List.of(); + List offsets = new ArrayList<>(); + int i = 0; + int pos = 0; // char position in the clean string (no delimiters) + while (i < headline.length()) { + char c = headline.charAt(i); + if (c == '\u0001') { + int start = pos; + i++; + while (i < headline.length() && headline.charAt(i) != '\u0002') { + i++; + pos++; + } + offsets.add(new MatchOffset(start, pos - start)); + i++; // skip \u0002 + } else { + i++; + pos++; + } + } + return offsets; + } + + private static List parseUUIDs(String csv) { + if (csv == null || csv.isBlank()) return List.of(); + return Arrays.stream(csv.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(UUID::fromString) + .toList(); + } + private static String sha256Hex(byte[] bytes) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java index 3b48e59d..dfce90af 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.controller; import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; import org.raddatz.familienarchiv.dto.DocumentVersionSummary; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; import org.raddatz.familienarchiv.model.Document; @@ -61,7 +62,7 @@ class DocumentControllerTest { @WithMockUser void search_returns200_whenAuthenticated() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search")) .andExpect(status().isOk()); @@ -71,7 +72,7 @@ class DocumentControllerTest { @WithMockUser void search_withStatusParam_passesItToService() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED")) .andExpect(status().isOk()); @@ -104,7 +105,7 @@ class DocumentControllerTest { @WithMockUser void search_responseContainsTotalCount() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search")) .andExpect(status().isOk()) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java index 44a7a51e..f089635c 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java @@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; @@ -51,12 +52,12 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class), any(Sort.class))) .thenReturn(List.of(newer, older)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC"); // Expect: date order (newer 1960 first), NOT rank order (older 1940 first) - assertThat(result).hasSize(2); - assertThat(result.get(0).getId()).isEqualTo(id2); // newer doc first + assertThat(result.documents()).hasSize(2); + assertThat(result.documents().get(0).getId()).isEqualTo(id2); // newer doc first } // ─── searchDocuments — RELEVANCE sort ───────────────────────────────────── @@ -73,11 +74,11 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class))) .thenReturn(List.of(doc2, doc1)); // unordered from DB - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); // Expect: rank order restored (id1 first) - assertThat(result.get(0).getId()).isEqualTo(id1); + assertThat(result.documents().get(0).getId()).isEqualTo(id1); } @Test @@ -92,9 +93,9 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class))) .thenReturn(List.of(doc2, doc1)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, null, null); - assertThat(result.get(0).getId()).isEqualTo(id1); + assertThat(result.documents().get(0).getId()).isEqualTo(id1); } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java index 67db2519..9d6d666b 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java @@ -6,11 +6,14 @@ import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; +import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; +import org.raddatz.familienarchiv.dto.MatchOffset; +import org.raddatz.familienarchiv.dto.SearchMatchData; import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.model.Document; -import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.Person; import org.raddatz.familienarchiv.model.Tag; @@ -22,6 +25,7 @@ import org.springframework.data.domain.Sort; import org.springframework.mock.web.MockMultipartFile; import java.time.LocalDate; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Optional; @@ -1287,11 +1291,11 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(withSender, noSender)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); - assertThat(result).hasSize(2); - assertThat(result).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); + assertThat(result.documents()).hasSize(2); + assertThat(result.documents()).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); } // ─── searchDocuments — RECEIVER sort, empty receivers ─────────────────────── @@ -1307,10 +1311,10 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(noReceivers, withReceiver)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc"); - assertThat(result).extracting(Document::getTitle) + assertThat(result.documents()).extracting(Document::getTitle) .containsExactly("Has Receiver", "No Receivers"); } @@ -1329,11 +1333,63 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(docNullName, docSmith)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); // null lastName should sort to end (treated as empty), not before "smith" (as "null") - assertThat(result).extracting(Document::getTitle) + assertThat(result.documents()).extracting(Document::getTitle) .containsExactly("smith doc", "Null lastname doc"); } + + // ─── searchDocuments — match data enrichment ────────────────────────────── + + @Test + void searchDocuments_withTextQuery_includesMatchDataWithTitleOffsets() { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).title("Brief an Anna").build(); + // chr(1)=\u0001 marks start, chr(2)=\u0002 marks end of highlighted term + List rows = Collections.singletonList(new Object[]{docId, "\u0001Brief\u0002 an Anna", null, false, null, null}); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId)); + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) + .thenReturn(List.of(doc)); + when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows); + + DocumentSearchResult result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + assertThat(result.matchData()).containsKey(docId); + SearchMatchData md = result.matchData().get(docId); + assertThat(md.titleOffsets()).hasSize(1); + assertThat(md.titleOffsets().get(0)).isEqualTo(new MatchOffset(0, 5)); // "Brief" = 5 chars at pos 0 + } + + @Test + void searchDocuments_withoutTextQuery_returnsEmptyMatchData() { + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class))) + .thenReturn(List.of()); + + DocumentSearchResult result = documentService.searchDocuments( + null, null, null, null, null, null, null, null, null, null); + + assertThat(result.matchData()).isEmpty(); + } + + @Test + void searchDocuments_withTextQuery_includesTranscriptionSnippetWhenPresent() { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).title("Dok").build(); + List rows = Collections.singletonList(new Object[]{docId, "Dok", "Hier ist der Brief aus Berlin", false, null, null}); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId)); + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) + .thenReturn(List.of(doc)); + when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows); + + DocumentSearchResult result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + SearchMatchData md = result.matchData().get(docId); + assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin"); + } } -- 2.49.1 From 9673cefe44b27099b01fe6662d4bacc96c3038ce Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 18:05:07 +0200 Subject: [PATCH 06/20] feat(search): add applyOffsets utility and regenerate API types with MatchOffset/SearchMatchData Co-Authored-By: Claude Sonnet 4.6 --- frontend/src/lib/generated/api.ts | 143 ++++++++++++++++++++---------- frontend/src/lib/search.spec.ts | 95 ++++++++++++++++++++ frontend/src/lib/search.ts | 46 ++++++++++ 3 files changed, 239 insertions(+), 45 deletions(-) create mode 100644 frontend/src/lib/search.spec.ts create mode 100644 frontend/src/lib/search.ts diff --git a/frontend/src/lib/generated/api.ts b/frontend/src/lib/generated/api.ts index fba18932..a111e919 100644 --- a/frontend/src/lib/generated/api.ts +++ b/frontend/src/lib/generated/api.ts @@ -628,6 +628,22 @@ export interface paths { patch: operations["editComment"]; trace?: never; }; + "/api/documents/{documentId}/annotations/{annotationId}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + post?: never; + delete: operations["deleteAnnotation"]; + options?: never; + head?: never; + patch: operations["updateAnnotation"]; + trace?: never; + }; "/api/users/search": { parameters: { query?: never; @@ -1060,22 +1076,6 @@ export interface paths { patch?: never; trace?: never; }; - "/api/documents/{documentId}/annotations/{annotationId}": { - parameters: { - query?: never; - header?: never; - path?: never; - cookie?: never; - }; - get?: never; - put?: never; - post?: never; - delete: operations["deleteAnnotation"]; - options?: never; - head?: never; - patch?: never; - trace?: never; - }; } export type webhooks = Record; export interface components { @@ -1440,6 +1440,16 @@ export interface components { label?: string; enrolled?: boolean; }; + UpdateAnnotationDTO: { + /** Format: double */ + x?: number; + /** Format: double */ + y?: number; + /** Format: double */ + width?: number; + /** Format: double */ + height?: number; + }; StatsDTO: { /** Format: int64 */ totalPersons?: number; @@ -1451,17 +1461,17 @@ export interface components { /** Format: uuid */ id?: string; displayName?: string; + personType?: string; firstName?: string; lastName?: string; + /** Format: int64 */ + documentCount?: number; /** Format: int32 */ birthYear?: number; /** Format: int32 */ deathYear?: number; alias?: string; notes?: string; - /** Format: int64 */ - documentCount?: number; - personType?: string; }; TrainingInfoResponse: { /** Format: int32 */ @@ -1508,6 +1518,8 @@ export interface components { /** Format: int64 */ totalElements?: number; pageable?: components["schemas"]["PageableObject"]; + first?: boolean; + last?: boolean; /** Format: int32 */ size?: number; content?: components["schemas"]["NotificationDTO"][]; @@ -1516,8 +1528,6 @@ export interface components { sort?: components["schemas"]["SortObject"]; /** Format: int32 */ numberOfElements?: number; - first?: boolean; - last?: boolean; empty?: boolean; }; PageableObject: { @@ -1581,6 +1591,22 @@ export interface components { documents?: components["schemas"]["Document"][]; /** Format: int64 */ total?: number; + matchData: { + [key: string]: components["schemas"]["SearchMatchData"]; + }; + }; + MatchOffset: { + /** Format: int32 */ + start: number; + /** Format: int32 */ + length: number; + }; + SearchMatchData: { + transcriptionSnippet?: string; + titleOffsets: components["schemas"]["MatchOffset"][]; + senderMatched: boolean; + matchedReceiverIds: string[]; + matchedTagIds: string[]; }; IncompleteDocumentDTO: { /** Format: uuid */ @@ -2938,8 +2964,8 @@ export interface operations { }; }; responses: { - /** @description OK */ - 200: { + /** @description No Content */ + 204: { headers: { [name: string]: unknown; }; @@ -2995,6 +3021,54 @@ export interface operations { }; }; }; + deleteAnnotation: { + parameters: { + query?: never; + header?: never; + path: { + documentId: string; + annotationId: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description No Content */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + updateAnnotation: { + parameters: { + query?: never; + header?: never; + path: { + documentId: string; + annotationId: string; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["UpdateAnnotationDTO"]; + }; + }; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "*/*": components["schemas"]["DocumentAnnotation"]; + }; + }; + }; + }; search: { parameters: { query?: { @@ -3425,7 +3499,7 @@ export interface operations { /** @description Filter by document status */ status?: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED"; /** @description Sort field */ - sort?: "DATE" | "TITLE" | "SENDER" | "RECEIVER" | "UPLOAD_DATE"; + sort?: "DATE" | "TITLE" | "SENDER" | "RECEIVER" | "UPLOAD_DATE" | "RELEVANCE"; /** @description Sort direction: ASC or DESC */ dir?: string; }; @@ -3602,25 +3676,4 @@ export interface operations { }; }; }; - deleteAnnotation: { - parameters: { - query?: never; - header?: never; - path: { - documentId: string; - annotationId: string; - }; - cookie?: never; - }; - requestBody?: never; - responses: { - /** @description No Content */ - 204: { - headers: { - [name: string]: unknown; - }; - content?: never; - }; - }; - }; } diff --git a/frontend/src/lib/search.spec.ts b/frontend/src/lib/search.spec.ts new file mode 100644 index 00000000..d6bd1397 --- /dev/null +++ b/frontend/src/lib/search.spec.ts @@ -0,0 +1,95 @@ +import { describe, expect, it } from 'vitest'; +import { applyOffsets } from './search'; + +describe('applyOffsets', () => { + it('returns single plain segment when offsets is empty', () => { + expect(applyOffsets('Hallo Welt', [])).toEqual([{ text: 'Hallo Welt', highlight: false }]); + }); + + it('highlights a single term at the start', () => { + expect(applyOffsets('Brief an Anna', [{ start: 0, length: 5 }])).toEqual([ + { text: 'Brief', highlight: true }, + { text: ' an Anna', highlight: false } + ]); + }); + + it('highlights a term in the middle', () => { + expect(applyOffsets('Der Brief von Anna', [{ start: 4, length: 5 }])).toEqual([ + { text: 'Der ', highlight: false }, + { text: 'Brief', highlight: true }, + { text: ' von Anna', highlight: false } + ]); + }); + + it('highlights a term at the end', () => { + expect(applyOffsets('Brief an Anna', [{ start: 9, length: 4 }])).toEqual([ + { text: 'Brief an ', highlight: false }, + { text: 'Anna', highlight: true } + ]); + }); + + it('handles two non-overlapping offsets in order', () => { + expect( + applyOffsets('Anna und Brief', [ + { start: 0, length: 4 }, + { start: 9, length: 5 } + ]) + ).toEqual([ + { text: 'Anna', highlight: true }, + { text: ' und ', highlight: false }, + { text: 'Brief', highlight: true } + ]); + }); + + it('merges overlapping offsets into the longest span', () => { + // [0,7) and [3,9) overlap → merged [0,max(7,9)) = [0,9) = "Hello wor" + expect( + applyOffsets('Hello world', [ + { start: 0, length: 7 }, + { start: 3, length: 6 } + ]) + ).toEqual([ + { text: 'Hello wor', highlight: true }, + { text: 'ld', highlight: false } + ]); + }); + + it('merges adjacent (touching) offsets', () => { + // [0,3) and [3,6) are adjacent → merged [0,6) + expect( + applyOffsets('Hallo Welt', [ + { start: 0, length: 3 }, + { start: 3, length: 3 } + ]) + ).toEqual([ + { text: 'Hallo ', highlight: true }, + { text: 'Welt', highlight: false } + ]); + }); + + it('clamps offset that extends beyond text length', () => { + expect(applyOffsets('Hi', [{ start: 0, length: 100 }])).toEqual([ + { text: 'Hi', highlight: true } + ]); + }); + + it('ignores a completely out-of-bounds offset', () => { + expect(applyOffsets('Hi', [{ start: 10, length: 5 }])).toEqual([ + { text: 'Hi', highlight: false } + ]); + }); + + it('sorts unsorted offsets correctly', () => { + // Offsets provided in reverse order: second term first + expect( + applyOffsets('Anna und Brief', [ + { start: 9, length: 5 }, + { start: 0, length: 4 } + ]) + ).toEqual([ + { text: 'Anna', highlight: true }, + { text: ' und ', highlight: false }, + { text: 'Brief', highlight: true } + ]); + }); +}); diff --git a/frontend/src/lib/search.ts b/frontend/src/lib/search.ts new file mode 100644 index 00000000..51b28b86 --- /dev/null +++ b/frontend/src/lib/search.ts @@ -0,0 +1,46 @@ +export type TextSegment = { text: string; highlight: boolean }; + +export type MatchOffset = { start: number; length: number }; + +/** + * Converts a flat string and a list of character-level highlight offsets into + * an array of text segments that can be rendered without {@html}. + * + * Offsets are sorted and merged (overlapping spans become the longest enclosing + * span) before processing. Out-of-bounds offsets are clamped or dropped. + * + * @param text The display text (no delimiter characters). + * @param offsets Character offsets produced by the backend (Java char positions, + * compatible with JavaScript String indexing). + */ +export function applyOffsets(text: string, offsets: MatchOffset[]): TextSegment[] { + if (!offsets.length) return [{ text, highlight: false }]; + + // Sort by start position and merge overlapping / adjacent spans + const sorted = [...offsets].sort((a, b) => a.start - b.start); + const merged: { start: number; end: number }[] = []; + for (const { start, length } of sorted) { + const end = start + length; + if (end <= 0 || start >= text.length) continue; // completely out of bounds + const clampedStart = Math.max(0, start); + const clampedEnd = Math.min(text.length, end); + const last = merged[merged.length - 1]; + if (!last || clampedStart > last.end) { + merged.push({ start: clampedStart, end: clampedEnd }); + } else { + last.end = Math.max(last.end, clampedEnd); + } + } + + if (!merged.length) return [{ text, highlight: false }]; + + const segments: TextSegment[] = []; + let pos = 0; + for (const { start, end } of merged) { + if (pos < start) segments.push({ text: text.slice(pos, start), highlight: false }); + segments.push({ text: text.slice(start, end), highlight: true }); + pos = end; + } + if (pos < text.length) segments.push({ text: text.slice(pos), highlight: false }); + return segments; +} -- 2.49.1 From 93c78433cf445367ed9b4ce6d2267228061e1625 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 18:34:14 +0200 Subject: [PATCH 07/20] feat(search): render title highlights and transcription snippets in DocumentList Co-Authored-By: Claude Sonnet 4.6 --- frontend/src/routes/DocumentList.svelte | 25 +++++- .../src/routes/DocumentList.svelte.spec.ts | 78 ++++++++++++++++++- 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/frontend/src/routes/DocumentList.svelte b/frontend/src/routes/DocumentList.svelte index 78aaa09e..06dc446b 100644 --- a/frontend/src/routes/DocumentList.svelte +++ b/frontend/src/routes/DocumentList.svelte @@ -4,6 +4,8 @@ import { m } from '$lib/paraglide/messages.js'; import { formatDate } from '$lib/utils/date'; import { groupDocuments } from '$lib/utils/groupDocuments'; import GroupDivider from '$lib/components/GroupDivider.svelte'; +import { applyOffsets } from '$lib/search'; +import type { components } from '$lib/generated/api'; let { documents, @@ -11,7 +13,8 @@ let { error, total = 0, q = '', - sort + sort, + matchData = {} }: { documents: { id: string; @@ -28,6 +31,7 @@ let { total?: number; q?: string; sort?: string; + matchData?: Record; } = $props(); const fallbackLabel = $derived( @@ -75,6 +79,10 @@ const showDividers = $derived(groupedDocuments.length >= 2); {/if}