diff --git a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java index 0bff2476..91e3c250 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java @@ -208,8 +208,7 @@ public class DocumentController { if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) { throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC"); } - List results = documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir); - return ResponseEntity.ok(DocumentSearchResult.of(results)); + return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir)); } // --- TRAINING LABELS --- diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java index a0c4af45..525dec84 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSearchResult.java @@ -1,16 +1,35 @@ package org.raddatz.familienarchiv.dto; +import io.swagger.v3.oas.annotations.media.Schema; import org.raddatz.familienarchiv.model.Document; import java.util.List; +import java.util.Map; +import java.util.UUID; -public record DocumentSearchResult(List documents, long total) { +public record DocumentSearchResult( + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List documents, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + long total, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + Map matchData +) { /** - * Creates a result where total equals the list size. + * Creates a fully-enriched result from documents and their match overlay data. + * Absent map entries (e.g. document deleted between FTS and enrichment) are safe — + * the frontend treats a missing entry as "no match data". + */ + public static DocumentSearchResult withMatchData(List documents, Map matchData) { + return new DocumentSearchResult(documents, documents.size(), matchData); + } + + /** + * Creates a result without match data — used for filter-only searches (no text query). * No pagination yet — the full matched set is always returned. * When pagination is added, total must come from a DB COUNT query, not list.size(). */ public static DocumentSearchResult of(List documents) { - return new DocumentSearchResult(documents, documents.size()); + return withMatchData(documents, Map.of()); } } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java new file mode 100644 index 00000000..65d72918 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/MatchOffset.java @@ -0,0 +1,14 @@ +package org.raddatz.familienarchiv.dto; + +import io.swagger.v3.oas.annotations.media.Schema; + +/** + * Character-level offset of a highlighted term within a text field. + * Offsets are Java {@code String} character positions (UTF-16 code units), + * which are identical to JavaScript string positions — consistent end-to-end + * for all German BMP characters (ä, ö, ü, ß, etc.). + */ +public record MatchOffset( + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int start, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int length +) {} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java new file mode 100644 index 00000000..a1cc142c --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/SearchMatchData.java @@ -0,0 +1,67 @@ +package org.raddatz.familienarchiv.dto; + +import io.swagger.v3.oas.annotations.media.Schema; + +import java.util.List; +import java.util.UUID; + +/** + * Match signals for a single document in a full-text search result. + * All fields are non-null except {@code transcriptionSnippet} and {@code summarySnippet}, + * which are null when the respective field did not match the query. + */ +public record SearchMatchData( + /** + * Best-ranked matching transcription line, or null if no block matched. + */ + String transcriptionSnippet, + + /** + * Character offsets of highlighted terms within the document title. + * Empty when the title did not contribute to the match. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List titleOffsets, + + /** + * True when the sender's name matched the query. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + boolean senderMatched, + + /** + * IDs of receiver persons whose names matched the query. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List matchedReceiverIds, + + /** + * IDs of tags whose names matched the query. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List matchedTagIds, + + /** + * Character offsets of highlighted terms within the transcription snippet. + * Empty when no transcription block matched or the snippet has no highlights. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List snippetOffsets, + + /** + * Highlighted summary excerpt, or null if the summary did not match the query. + */ + String summarySnippet, + + /** + * Character offsets of highlighted terms within the summary snippet. + * Empty when the summary did not match or has no highlights. + */ + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List summaryOffsets +) { + /** Canonical "no match data" value for a single document. */ + public static SearchMatchData empty() { + return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of()); + } +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java index 3a183ded..022a2ebb 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java @@ -83,10 +83,88 @@ public interface DocumentRepository extends JpaRepository, JpaSp @Query(nativeQuery = true, value = """ SELECT d.id FROM documents d - WHERE d.search_vector @@ websearch_to_tsquery('german', :query) - ORDER BY ts_rank(d.search_vector, websearch_to_tsquery('german', :query)) DESC, + CROSS JOIN LATERAL ( + SELECT CASE WHEN websearch_to_tsquery('german', :query)::text <> '' + THEN to_tsquery('german', regexp_replace( + websearch_to_tsquery('german', :query)::text, + '''([^'']+)''', + '''\\1'':*', + 'g')) + END AS pq + ) q + WHERE d.search_vector @@ q.pq + ORDER BY ts_rank(d.search_vector, q.pq) DESC, d.meta_date DESC NULLS LAST """) List findRankedIdsByFts(@Param("query") String query); + /** + * Returns match-enrichment data for a set of documents identified by their IDs. + * Each row contains (in column order): + *
    + *
  1. UUID — document id
  2. + *
  3. String — title headline with \x01/\x02 delimiters around matched terms
  4. + *
  5. String — best-ranked transcription snippet with \x01/\x02 delimiters, or null
  6. + *
  7. Boolean — whether the sender's name matched the query
  8. + *
  9. String — comma-separated matched receiver UUIDs, or null
  10. + *
  11. String — comma-separated matched tag UUIDs, or null
  12. + *
  13. String — summary snippet with \x01/\x02 delimiters, or null if summary didn't match
  14. + *
+ * Short-circuit before calling this method when {@code ids} is empty or {@code query} is blank. + */ + @Query(nativeQuery = true, value = """ + SELECT + d.id, + ts_headline('german', d.title, q.pq, + 'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',HighlightAll=true') + AS title_headline, + CASE WHEN best_block.text IS NOT NULL THEN + ts_headline('german', best_block.text, q.pq, + 'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',MaxWords=50,MinWords=20') + END AS transcription_snippet, + (s.id IS NOT NULL AND + to_tsvector('german', COALESCE(s.first_name, '') || ' ' || COALESCE(s.last_name, '')) + @@ q.pq) + AS sender_matched, + (SELECT string_agg(r.id::text, ',') + FROM document_receivers dr + JOIN persons r ON r.id = dr.person_id + WHERE dr.document_id = d.id + AND to_tsvector('german', COALESCE(r.first_name, '') || ' ' || r.last_name) + @@ q.pq + ) AS matched_receiver_ids, + (SELECT string_agg(t.id::text, ',') + FROM document_tags dt + JOIN tag t ON t.id = dt.tag_id + WHERE dt.document_id = d.id + AND to_tsvector('german', t.name) @@ q.pq + ) AS matched_tag_ids, + CASE WHEN d.summary IS NOT NULL AND d.summary <> '' + AND to_tsvector('german', d.summary) @@ q.pq + THEN ts_headline('german', d.summary, q.pq, + 'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',MaxWords=50,MinWords=20') + END AS summary_snippet + FROM documents d + CROSS JOIN LATERAL ( + SELECT CASE WHEN websearch_to_tsquery('german', :query)::text <> '' + THEN to_tsquery('german', regexp_replace( + websearch_to_tsquery('german', :query)::text, + '''([^'']+)''', + '''\\1'':*', + 'g')) + END AS pq + ) q + LEFT JOIN persons s ON s.id = d.sender_id + LEFT JOIN LATERAL ( + SELECT tb.text + FROM transcription_blocks tb + WHERE tb.document_id = d.id + AND to_tsvector('german', tb.text) @@ q.pq + ORDER BY ts_rank(to_tsvector('german', tb.text), q.pq) DESC + LIMIT 1 + ) best_block ON true + WHERE d.id IN :ids + """) + List findEnrichmentData(@Param("ids") Collection ids, @Param("query") String query); + } \ No newline at end of file diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index 46d4a4de..ab50ab22 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -3,10 +3,13 @@ package org.raddatz.familienarchiv.service; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; +import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; +import org.raddatz.familienarchiv.dto.MatchOffset; +import org.raddatz.familienarchiv.dto.SearchMatchData; import org.raddatz.familienarchiv.model.Document; -import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.ScriptType; import org.raddatz.familienarchiv.model.TrainingLabel; @@ -290,13 +293,13 @@ public class DocumentService { } // 1. Allgemeine Suche (für das Suchfeld im Frontend) - public List searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { + public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { boolean hasText = StringUtils.hasText(text); List rankedIds = null; if (hasText) { rankedIds = documentRepository.findRankedIdsByFts(text); - if (rankedIds.isEmpty()) return List.of(); + if (rankedIds.isEmpty()) return DocumentSearchResult.withMatchData(List.of(), Map.of()); } Specification textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null; @@ -312,11 +315,13 @@ public class DocumentService { // generates an INNER JOIN that silently drops documents with null sender/receivers. if (sort == DocumentSort.RECEIVER) { List results = documentRepository.findAll(spec); - return sortByFirstReceiver(results, dir); + List sorted = sortByFirstReceiver(results, dir); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } if (sort == DocumentSort.SENDER) { List results = documentRepository.findAll(spec); - return sortBySender(results, dir); + List sorted = sortBySender(results, dir); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } // RELEVANCE: default when text present and no explicit sort given @@ -325,14 +330,16 @@ public class DocumentService { List results = documentRepository.findAll(spec); Map rankMap = new HashMap<>(); for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i); - return results.stream() + List sorted = results.stream() .sorted(Comparator.comparingInt( doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE))) .toList(); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } Sort springSort = resolveSort(sort, dir); - return documentRepository.findAll(spec, springSort); + List results = documentRepository.findAll(spec, springSort); + return DocumentSearchResult.withMatchData(results, enrichWithMatchData(results, text)); } private Sort resolveSort(DocumentSort sort, String dir) { @@ -584,6 +591,93 @@ public class DocumentService { return null; } + /** + * Calls {@code findEnrichmentData} and converts the raw Object[] rows into a + * {@link SearchMatchData} per document. Short-circuits when the list is empty or + * the query is blank (no text search active). + */ + private Map enrichWithMatchData(List docs, String query) { + if (docs.isEmpty() || !StringUtils.hasText(query)) return Map.of(); + List ids = docs.stream().map(Document::getId).toList(); + Map result = new HashMap<>(); + for (Object[] row : documentRepository.findEnrichmentData(ids, query)) { + UUID docId = (UUID) row[0]; + String titleHeadline = (String) row[1]; + String snippetHeadline = (String) row[2]; + Boolean senderMatched = (Boolean) row[3]; + String receiverIdsStr = (String) row[4]; + String tagIdsStr = (String) row[5]; + String summaryHeadline = (String) row[6]; + ParsedHighlight snippet = parseHighlight(snippetHeadline); + ParsedHighlight summary = parseHighlight(summaryHeadline); + result.put(docId, new SearchMatchData( + snippet != null ? snippet.cleanText() : null, + parseTitleOffsets(titleHeadline), + senderMatched != null && senderMatched, + parseUUIDs(receiverIdsStr), + parseUUIDs(tagIdsStr), + snippet != null ? snippet.offsets() : List.of(), + summary != null ? summary.cleanText() : null, + summary != null ? summary.offsets() : List.of() + )); + } + return result; + } + + /** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */ + public record ParsedHighlight(String cleanText, List offsets) {} + + /** + * Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as + * start/stop delimiters. Returns the clean text (delimiters stripped) together with + * the character offsets of each highlighted span. Returns {@code null} when + * {@code headline} is {@code null}. + */ + public static ParsedHighlight parseHighlight(String headline) { + if (headline == null) return null; + StringBuilder clean = new StringBuilder(headline.length()); + List offsets = new ArrayList<>(); + int i = 0; + int pos = 0; // position in the clean string (no delimiters) + while (i < headline.length()) { + char c = headline.charAt(i); + if (c == '\u0001') { + int start = pos; + i++; + while (i < headline.length() && headline.charAt(i) != '\u0002') { + clean.append(headline.charAt(i)); + i++; + pos++; + } + offsets.add(new MatchOffset(start, pos - start)); + i++; // skip \u0002 + } else { + clean.append(c); + i++; + pos++; + } + } + return new ParsedHighlight(clean.toString(), offsets); + } + + /** + * Extracts only the {@link MatchOffset} list from a title headline. + * The clean title text comes from the {@link Document} entity itself. + */ + private static List parseTitleOffsets(String headline) { + ParsedHighlight parsed = parseHighlight(headline); + return parsed != null ? parsed.offsets() : List.of(); + } + + private static List parseUUIDs(String csv) { + if (csv == null || csv.isBlank()) return List.of(); + return Arrays.stream(csv.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(UUID::fromString) + .toList(); + } + private static String sha256Hex(byte[] bytes) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); diff --git a/backend/src/main/resources/db/migration/V36__add_index_transcription_blocks_document_id.sql b/backend/src/main/resources/db/migration/V36__add_index_transcription_blocks_document_id.sql new file mode 100644 index 00000000..2c109324 --- /dev/null +++ b/backend/src/main/resources/db/migration/V36__add_index_transcription_blocks_document_id.sql @@ -0,0 +1,4 @@ +-- Index on transcription_blocks.document_id to speed up the LATERAL join +-- used in DocumentService.findEnrichmentData (FTS match enrichment). +CREATE INDEX IF NOT EXISTS idx_transcription_blocks_document_id + ON transcription_blocks (document_id); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java index 3b48e59d..c7e2f279 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.controller; import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; import org.raddatz.familienarchiv.dto.DocumentVersionSummary; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; import org.raddatz.familienarchiv.model.Document; @@ -24,6 +25,7 @@ import org.springframework.test.web.servlet.MockMvc; import java.time.LocalDateTime; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -61,7 +63,7 @@ class DocumentControllerTest { @WithMockUser void search_returns200_whenAuthenticated() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search")) .andExpect(status().isOk()); @@ -71,7 +73,7 @@ class DocumentControllerTest { @WithMockUser void search_withStatusParam_passesItToService() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED")) .andExpect(status().isOk()); @@ -104,7 +106,7 @@ class DocumentControllerTest { @WithMockUser void search_responseContainsTotalCount() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search")) .andExpect(status().isOk()) @@ -112,6 +114,28 @@ class DocumentControllerTest { .andExpect(jsonPath("$.documents").isArray()); } + @Test + @WithMockUser + void search_responseBodyContainsMatchDataKey() throws Exception { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder() + .id(docId) + .title("Brief an Anna") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .build(); + var matchData = new org.raddatz.familienarchiv.dto.SearchMatchData( + "Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of()); + when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) + .thenReturn(DocumentSearchResult.withMatchData(List.of(doc), Map.of(docId, matchData))); + + mockMvc.perform(get("/api/documents/search").param("q", "Brief")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.matchData").isMap()) + .andExpect(jsonPath("$.matchData." + docId + ".transcriptionSnippet") + .value("Er schrieb einen langen Brief")); + } + // ─── POST /api/documents ───────────────────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java b/backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java new file mode 100644 index 00000000..36c50a23 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/dto/DocumentSearchResultTest.java @@ -0,0 +1,68 @@ +package org.raddatz.familienarchiv.dto; + +import io.swagger.v3.oas.annotations.media.Schema; +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.model.Document; +import org.raddatz.familienarchiv.model.DocumentStatus; + +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +class DocumentSearchResultTest { + + private Document doc(UUID id) { + return Document.builder() + .id(id) + .title("Test") + .originalFilename("test.pdf") + .status(DocumentStatus.UPLOADED) + .build(); + } + + @Test + void withMatchData_total_equals_list_size() { + UUID id = UUID.randomUUID(); + List docs = List.of(doc(id)); + Map matchData = Map.of(id, SearchMatchData.empty()); + + DocumentSearchResult result = DocumentSearchResult.withMatchData(docs, matchData); + + assertThat(result.total()).isEqualTo(1L); + } + + @Test + void withMatchData_exposes_match_data_map() { + UUID id = UUID.randomUUID(); + SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of(), null, List.of()); + DocumentSearchResult result = DocumentSearchResult.withMatchData(List.of(doc(id)), Map.of(id, data)); + + assertThat(result.matchData()).containsKey(id); + assertThat(result.matchData().get(id).transcriptionSnippet()).isEqualTo("snippet"); + } + + @Test + void of_factory_returns_empty_match_data() { + UUID id = UUID.randomUUID(); + DocumentSearchResult result = DocumentSearchResult.of(List.of(doc(id))); + + assertThat(result.matchData()).isEmpty(); + assertThat(result.total()).isEqualTo(1L); + } + + @Test + void documents_component_is_annotated_as_required_in_openapi_schema() throws NoSuchFieldException { + Schema schema = DocumentSearchResult.class.getDeclaredField("documents").getAnnotation(Schema.class); + assertThat(schema).isNotNull(); + assertThat(schema.requiredMode()).isEqualTo(Schema.RequiredMode.REQUIRED); + } + + @Test + void total_component_is_annotated_as_required_in_openapi_schema() throws NoSuchFieldException { + Schema schema = DocumentSearchResult.class.getDeclaredField("total").getAnnotation(Schema.class); + assertThat(schema).isNotNull(); + assertThat(schema.requiredMode()).isEqualTo(Schema.RequiredMode.REQUIRED); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java b/backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java new file mode 100644 index 00000000..2021a640 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/dto/MatchOffsetTest.java @@ -0,0 +1,22 @@ +package org.raddatz.familienarchiv.dto; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class MatchOffsetTest { + + @Test + void should_hold_start_and_length() { + MatchOffset offset = new MatchOffset(6, 5); + + assertThat(offset.start()).isEqualTo(6); + assertThat(offset.length()).isEqualTo(5); + } + + @Test + void should_implement_value_equality() { + assertThat(new MatchOffset(0, 3)).isEqualTo(new MatchOffset(0, 3)); + assertThat(new MatchOffset(0, 3)).isNotEqualTo(new MatchOffset(0, 4)); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java b/backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java new file mode 100644 index 00000000..a761fd7b --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/dto/SearchMatchDataTest.java @@ -0,0 +1,69 @@ +package org.raddatz.familienarchiv.dto; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +class SearchMatchDataTest { + + @Test + void transcription_snippet_is_nullable() { + SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of()); + + assertThat(data.transcriptionSnippet()).isNull(); + } + + @Test + void non_null_list_fields_are_empty_by_default_in_empty_factory() { + SearchMatchData data = SearchMatchData.empty(); + + assertThat(data.transcriptionSnippet()).isNull(); + assertThat(data.titleOffsets()).isEmpty(); + assertThat(data.matchedReceiverIds()).isEmpty(); + assertThat(data.matchedTagIds()).isEmpty(); + assertThat(data.senderMatched()).isFalse(); + } + + @Test + void holds_all_field_values() { + MatchOffset offset = new MatchOffset(0, 4); + SearchMatchData data = new SearchMatchData( + "schreibt dir aus dem Feld", + List.of(offset), + true, + List.of(), + List.of(), + List.of(), + null, + List.of() + ); + + assertThat(data.transcriptionSnippet()).isEqualTo("schreibt dir aus dem Feld"); + assertThat(data.titleOffsets()).containsExactly(offset); + assertThat(data.senderMatched()).isTrue(); + } + + @Test + void snippet_offsets_are_empty_in_empty_factory() { + SearchMatchData data = SearchMatchData.empty(); + assertThat(data.snippetOffsets()).isEmpty(); + } + + @Test + void snippet_offsets_carry_through_constructor() { + MatchOffset offset = new MatchOffset(5, 3); + SearchMatchData data = new SearchMatchData( + "Das ist ein furchtbares Bild", + List.of(), + false, + List.of(), + List.of(), + List.of(offset), + null, + List.of() + ); + assertThat(data.snippetOffsets()).containsExactly(offset); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java index 581cb063..38f64bc9 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java @@ -79,6 +79,16 @@ class DocumentFtsTest { assertThat(ids).hasSize(1); } + @Test + void should_find_document_by_partial_word_prefix() { + documentRepository.saveAndFlush(document("Ein furchtbarer Brief")); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("furchtb"); + + assertThat(ids).hasSize(1); + } + @Test void should_not_find_document_when_term_absent() { documentRepository.saveAndFlush(document("Familienfoto")); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java new file mode 100644 index 00000000..b0b69ad7 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSearchEnrichmentTest.java @@ -0,0 +1,307 @@ +package org.raddatz.familienarchiv.repository; + +import jakarta.persistence.EntityManager; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.raddatz.familienarchiv.service.DocumentService; +import org.raddatz.familienarchiv.config.FlywayConfig; +import org.raddatz.familienarchiv.model.Document; +import org.raddatz.familienarchiv.model.DocumentAnnotation; +import org.raddatz.familienarchiv.model.DocumentStatus; +import org.raddatz.familienarchiv.model.Person; +import org.raddatz.familienarchiv.model.Tag; +import org.raddatz.familienarchiv.model.TranscriptionBlock; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; +import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase; +import org.springframework.context.annotation.Import; + +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +@DataJpaTest +@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) +@Import({PostgresContainerConfig.class, FlywayConfig.class}) +class DocumentSearchEnrichmentTest { + + @Autowired DocumentRepository documentRepository; + @Autowired PersonRepository personRepository; + @Autowired TagRepository tagRepository; + @Autowired AnnotationRepository annotationRepository; + @Autowired TranscriptionBlockRepository blockRepository; + @Autowired EntityManager em; + + @BeforeEach + void setUp() { + blockRepository.deleteAll(); + documentRepository.deleteAll(); + personRepository.deleteAll(); + tagRepository.deleteAll(); + } + + // ─── Lateral join: best transcription snippet ────────────────────────────── + + @Test + void lateral_join_returns_highest_ranked_transcription_block() { + Document doc = documentRepository.saveAndFlush(document("Brief an Anna")); + UUID annotId = annotation(doc.getId()); + // Three blocks — the one with three occurrences has highest rank + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Das Wetter war schön", 0)); + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Brief Brief Brief", 1)); // highest rank for "Brief" + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Ein Brief liegt vor", 2)); // one occurrence + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + // row[2] is now a ts_headline result with sentinel chars — parse it for clean text + DocumentService.ParsedHighlight parsed = DocumentService.parseHighlight((String) rows.get(0)[2]); + assertThat(parsed).isNotNull(); + assertThat(parsed.cleanText()).isEqualTo("Brief Brief Brief"); + assertThat(parsed.offsets()).isNotEmpty(); // at least one "Brief" is highlighted + } + + @Test + void document_with_no_transcription_blocks_has_null_snippet() { + Document doc = documentRepository.saveAndFlush(document("Foto ohne Text")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Foto"); + + assertThat(rows).hasSize(1); + Object snippet = rows.get(0)[2]; + assertThat(snippet).isNull(); + } + + @Test + void document_with_non_matching_blocks_has_null_snippet() { + Document doc = documentRepository.saveAndFlush(document("Dok")); + UUID annotId = annotation(doc.getId()); + blockRepository.saveAndFlush(block(doc.getId(), annotId, "Kein Match hier", 0)); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + assertThat(rows.get(0)[2]).isNull(); + } + + // ─── Title headline: delimiter-based offset detection ───────────────────── + + @Test + void title_headline_contains_delimiters_when_title_matches() { + Document doc = documentRepository.saveAndFlush(document("Brief an die Familie")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String headline = (String) rows.get(0)[1]; + // chr(1) marks the start of the highlighted term + assertThat(headline).contains("\u0001"); + assertThat(headline).contains("\u0002"); + } + + @Test + void title_headline_has_no_delimiters_when_title_does_not_match() { + Document doc = documentRepository.saveAndFlush(document("Familienfoto")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String headline = (String) rows.get(0)[1]; + assertThat(headline).doesNotContain("\u0001"); + assertThat(headline).doesNotContain("\u0002"); + } + + @Test + void title_headline_matches_stemmed_form() { + // "Brief" (singular, query) should match "Briefe" (plural, in title) via German FTS stemming. + // Both reduce to the stem "brief" under the Snowball German algorithm — verified by the + // existing should_find_document_by_stemmed_inflected_form test in DocumentFtsTest. + Document doc = documentRepository.saveAndFlush(document("Alte Briefe aus Berlin")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + String headline = (String) rows.get(0)[1]; + assertThat(headline).contains("\u0001"); + } + + // ─── Sender match ────────────────────────────────────────────────────────── + + @Test + void sender_matched_is_true_when_sender_last_name_matches_query() { + Person sender = personRepository.saveAndFlush( + Person.builder().firstName("Walter").lastName("Raddatz").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Raddatz"); + + assertThat(rows).hasSize(1); + Boolean senderMatched = (Boolean) rows.get(0)[3]; + assertThat(senderMatched).isTrue(); + } + + @Test + void sender_matched_is_false_when_sender_name_does_not_match() { + Person sender = personRepository.saveAndFlush( + Person.builder().firstName("Walter").lastName("Raddatz").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Schmidt"); + + assertThat(rows).hasSize(1); + Boolean senderMatched = (Boolean) rows.get(0)[3]; + assertThat(senderMatched).isFalse(); + } + + @Test + void sender_matched_is_false_when_document_has_no_sender() { + Document doc = documentRepository.saveAndFlush(document("Brief von unbekannt")); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + Boolean senderMatched = (Boolean) rows.get(0)[3]; + assertThat(senderMatched).isFalse(); + } + + // ─── Receiver match ──────────────────────────────────────────────────────── + + @Test + void matched_receiver_ids_contains_uuid_of_matching_receiver() { + Person receiver = personRepository.saveAndFlush( + Person.builder().firstName("Anna").lastName("Schmidt").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .receivers(Set.of(receiver)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Schmidt"); + + assertThat(rows).hasSize(1); + String receiverIds = (String) rows.get(0)[4]; + assertThat(receiverIds).contains(receiver.getId().toString()); + } + + @Test + void matched_receiver_ids_is_null_when_no_receiver_matches() { + Person receiver = personRepository.saveAndFlush( + Person.builder().firstName("Anna").lastName("Schmidt").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .receivers(Set.of(receiver)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Raddatz"); + + assertThat(rows).hasSize(1); + assertThat(rows.get(0)[4]).isNull(); + } + + // ─── Tag match ───────────────────────────────────────────────────────────── + + @Test + void matched_tag_ids_contains_uuid_of_matching_tag() { + Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Dokument") + .originalFilename("dok.pdf") + .status(DocumentStatus.UPLOADED) + .tags(Set.of(tag)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Familiengeschichte"); + + assertThat(rows).hasSize(1); + String tagIds = (String) rows.get(0)[5]; + assertThat(tagIds).contains(tag.getId().toString()); + } + + @Test + void matched_tag_ids_is_null_when_no_tag_matches() { + Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Dokument") + .originalFilename("dok.pdf") + .status(DocumentStatus.UPLOADED) + .tags(Set.of(tag)) + .build()); + em.flush(); + em.clear(); + + List rows = documentRepository.findEnrichmentData(List.of(doc.getId()), "Brief"); + + assertThat(rows).hasSize(1); + assertThat(rows.get(0)[5]).isNull(); + } + + // ─── Helpers ─────────────────────────────────────────────────────────────── + + private Document document(String title) { + return Document.builder() + .title(title) + .originalFilename(title.replace(" ", "_") + ".pdf") + .status(DocumentStatus.UPLOADED) + .build(); + } + + private UUID annotation(UUID documentId) { + DocumentAnnotation ann = annotationRepository.save(DocumentAnnotation.builder() + .documentId(documentId) + .pageNumber(1) + .x(0.1).y(0.2).width(0.3).height(0.4) + .color("#00C7B1") + .build()); + em.flush(); + return ann.getId(); + } + + private TranscriptionBlock block(UUID documentId, UUID annotationId, String text, int order) { + return TranscriptionBlock.builder() + .documentId(documentId) + .annotationId(annotationId) + .text(text) + .sortOrder(order) + .build(); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java index 44a7a51e..f089635c 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java @@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; @@ -51,12 +52,12 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class), any(Sort.class))) .thenReturn(List.of(newer, older)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC"); // Expect: date order (newer 1960 first), NOT rank order (older 1940 first) - assertThat(result).hasSize(2); - assertThat(result.get(0).getId()).isEqualTo(id2); // newer doc first + assertThat(result.documents()).hasSize(2); + assertThat(result.documents().get(0).getId()).isEqualTo(id2); // newer doc first } // ─── searchDocuments — RELEVANCE sort ───────────────────────────────────── @@ -73,11 +74,11 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class))) .thenReturn(List.of(doc2, doc1)); // unordered from DB - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); // Expect: rank order restored (id1 first) - assertThat(result.get(0).getId()).isEqualTo(id1); + assertThat(result.documents().get(0).getId()).isEqualTo(id1); } @Test @@ -92,9 +93,9 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class))) .thenReturn(List.of(doc2, doc1)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, null, null); - assertThat(result.get(0).getId()).isEqualTo(id1); + assertThat(result.documents().get(0).getId()).isEqualTo(id1); } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java index 67db2519..e4baab52 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java @@ -6,11 +6,14 @@ import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; +import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; +import org.raddatz.familienarchiv.dto.MatchOffset; +import org.raddatz.familienarchiv.dto.SearchMatchData; import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.model.Document; -import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.Person; import org.raddatz.familienarchiv.model.Tag; @@ -22,6 +25,7 @@ import org.springframework.data.domain.Sort; import org.springframework.mock.web.MockMultipartFile; import java.time.LocalDate; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Optional; @@ -1287,11 +1291,11 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(withSender, noSender)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); - assertThat(result).hasSize(2); - assertThat(result).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); + assertThat(result.documents()).hasSize(2); + assertThat(result.documents()).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); } // ─── searchDocuments — RECEIVER sort, empty receivers ─────────────────────── @@ -1307,10 +1311,10 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(noReceivers, withReceiver)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc"); - assertThat(result).extracting(Document::getTitle) + assertThat(result.documents()).extracting(Document::getTitle) .containsExactly("Has Receiver", "No Receivers"); } @@ -1329,11 +1333,99 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(docNullName, docSmith)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); // null lastName should sort to end (treated as empty), not before "smith" (as "null") - assertThat(result).extracting(Document::getTitle) + assertThat(result.documents()).extracting(Document::getTitle) .containsExactly("smith doc", "Null lastname doc"); } + + // ─── searchDocuments — match data enrichment ────────────────────────────── + + @Test + void searchDocuments_withTextQuery_includesMatchDataWithTitleOffsets() { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).title("Brief an Anna").build(); + // chr(1)=\u0001 marks start, chr(2)=\u0002 marks end of highlighted term + List rows = Collections.singletonList(new Object[]{docId, "\u0001Brief\u0002 an Anna", null, false, null, null}); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId)); + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) + .thenReturn(List.of(doc)); + when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows); + + DocumentSearchResult result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + assertThat(result.matchData()).containsKey(docId); + SearchMatchData md = result.matchData().get(docId); + assertThat(md.titleOffsets()).hasSize(1); + assertThat(md.titleOffsets().get(0)).isEqualTo(new MatchOffset(0, 5)); // "Brief" = 5 chars at pos 0 + } + + @Test + void searchDocuments_withoutTextQuery_returnsEmptyMatchData() { + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class))) + .thenReturn(List.of()); + + DocumentSearchResult result = documentService.searchDocuments( + null, null, null, null, null, null, null, null, null, null); + + assertThat(result.matchData()).isEmpty(); + } + + @Test + void searchDocuments_withTextQuery_includesTranscriptionSnippetWhenPresent() { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).title("Dok").build(); + // Simulate ts_headline output with sentinel markers around the matched word + String snippetHeadline = "Hier ist der \u0001Brief\u0002 aus Berlin"; + List rows = Collections.singletonList(new Object[]{docId, "Dok", snippetHeadline, false, null, null}); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId)); + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) + .thenReturn(List.of(doc)); + when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows); + + DocumentSearchResult result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + SearchMatchData md = result.matchData().get(docId); + assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin"); + assertThat(md.snippetOffsets()).containsExactly(new MatchOffset(13, 5)); // "Brief" at pos 13 + } + + // ─── parseHighlight unit tests ──────────────────────────────────────────── + + @Test + void parseHighlight_returnsNull_whenInputIsNull() { + assertThat(DocumentService.parseHighlight(null)).isNull(); + } + + @Test + void parseHighlight_returnsCleanTextAndEmptyOffsets_whenNoSentinels() { + DocumentService.ParsedHighlight result = DocumentService.parseHighlight("plain text"); + assertThat(result.cleanText()).isEqualTo("plain text"); + assertThat(result.offsets()).isEmpty(); + } + + @Test + void parseHighlight_extractsOffsetAndStripsDelimiters() { + // \u0001 = start sentinel, \u0002 = stop sentinel + DocumentService.ParsedHighlight result = DocumentService.parseHighlight("Das \u0001furchtbare\u0002 Wort"); + assertThat(result.cleanText()).isEqualTo("Das furchtbare Wort"); + assertThat(result.offsets()).containsExactly(new MatchOffset(4, 10)); // "furchtbare" at pos 4, len 10 + } + + @Test + void parseHighlight_handlesMultipleHighlightedTerms() { + DocumentService.ParsedHighlight result = + DocumentService.parseHighlight("\u0001Hallo\u0002 und \u0001Welt\u0002"); + assertThat(result.cleanText()).isEqualTo("Hallo und Welt"); + assertThat(result.offsets()).containsExactly( + new MatchOffset(0, 5), // "Hallo" + new MatchOffset(10, 4) // "Welt" + ); + } } diff --git a/frontend/messages/de.json b/frontend/messages/de.json index c2f33689..81350a62 100644 --- a/frontend/messages/de.json +++ b/frontend/messages/de.json @@ -78,6 +78,8 @@ "docs_empty_btn_clear": "Alle Filter löschen", "docs_list_from": "Von", "docs_list_to": "An", + "docs_list_content": "Inhalt", + "docs_list_summary": "Zusammenfassung", "docs_list_unknown": "Unbekannt", "docs_group_undated": "Undatiert", "docs_group_unknown": "Unbekannt", diff --git a/frontend/messages/en.json b/frontend/messages/en.json index c7f784a5..bbbd0f07 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -78,6 +78,8 @@ "docs_empty_btn_clear": "Clear all filters", "docs_list_from": "From", "docs_list_to": "To", + "docs_list_content": "Content", + "docs_list_summary": "Summary", "docs_list_unknown": "Unknown", "docs_group_undated": "Undated", "docs_group_unknown": "Unknown", diff --git a/frontend/messages/es.json b/frontend/messages/es.json index 2e06e421..2d7aba00 100644 --- a/frontend/messages/es.json +++ b/frontend/messages/es.json @@ -78,6 +78,8 @@ "docs_empty_btn_clear": "Borrar todos los filtros", "docs_list_from": "De", "docs_list_to": "Para", + "docs_list_content": "Contenido", + "docs_list_summary": "Resumen", "docs_list_unknown": "Desconocido", "docs_group_undated": "Sin fecha", "docs_group_unknown": "Desconocido", diff --git a/frontend/src/lib/components/PdfViewer.svelte b/frontend/src/lib/components/PdfViewer.svelte index 87a651f3..844aa338 100644 --- a/frontend/src/lib/components/PdfViewer.svelte +++ b/frontend/src/lib/components/PdfViewer.svelte @@ -56,21 +56,20 @@ onMount(async () => { await renderer.init(); }); -// Wire DOM elements to the renderer after they mount -$effect(() => { - if (canvasEl && textLayerEl) { - renderer.setElements(canvasEl, textLayerEl); - } -}); - $effect(() => { if (renderer.pdfjsReady && url) { renderer.loadDocument(url); } }); +// Wire DOM elements to the renderer and trigger rendering. +// canvasEl is read synchronously so Svelte tracks it as a dependency: +// when the canvas reappears after the loading spinner (loading → false), +// this effect re-fires and renders the already-loaded PDF. $effect(() => { - // Read scale and currentPage synchronously so Svelte tracks them as dependencies. + if (!canvasEl || !textLayerEl) return; + renderer.setElements(canvasEl, textLayerEl); + // Also track currentPage and scale so page-nav / zoom re-renders work. if (renderer.isLoaded && renderer.currentPage && renderer.scale > 0) { renderer.renderCurrentPage().then(() => renderer.prerender()); } diff --git a/frontend/src/lib/generated/api.ts b/frontend/src/lib/generated/api.ts index fba18932..5d17827c 100644 --- a/frontend/src/lib/generated/api.ts +++ b/frontend/src/lib/generated/api.ts @@ -628,6 +628,22 @@ export interface paths { patch: operations["editComment"]; trace?: never; }; + "/api/documents/{documentId}/annotations/{annotationId}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + post?: never; + delete: operations["deleteAnnotation"]; + options?: never; + head?: never; + patch: operations["updateAnnotation"]; + trace?: never; + }; "/api/users/search": { parameters: { query?: never; @@ -1060,22 +1076,6 @@ export interface paths { patch?: never; trace?: never; }; - "/api/documents/{documentId}/annotations/{annotationId}": { - parameters: { - query?: never; - header?: never; - path?: never; - cookie?: never; - }; - get?: never; - put?: never; - post?: never; - delete: operations["deleteAnnotation"]; - options?: never; - head?: never; - patch?: never; - trace?: never; - }; } export type webhooks = Record; export interface components { @@ -1440,6 +1440,16 @@ export interface components { label?: string; enrolled?: boolean; }; + UpdateAnnotationDTO: { + /** Format: double */ + x?: number; + /** Format: double */ + y?: number; + /** Format: double */ + width?: number; + /** Format: double */ + height?: number; + }; StatsDTO: { /** Format: int64 */ totalPersons?: number; @@ -1451,17 +1461,17 @@ export interface components { /** Format: uuid */ id?: string; displayName?: string; + personType?: string; firstName?: string; lastName?: string; + /** Format: int64 */ + documentCount?: number; /** Format: int32 */ birthYear?: number; /** Format: int32 */ deathYear?: number; alias?: string; notes?: string; - /** Format: int64 */ - documentCount?: number; - personType?: string; }; TrainingInfoResponse: { /** Format: int32 */ @@ -1508,6 +1518,8 @@ export interface components { /** Format: int64 */ totalElements?: number; pageable?: components["schemas"]["PageableObject"]; + first?: boolean; + last?: boolean; /** Format: int32 */ size?: number; content?: components["schemas"]["NotificationDTO"][]; @@ -1516,8 +1528,6 @@ export interface components { sort?: components["schemas"]["SortObject"]; /** Format: int32 */ numberOfElements?: number; - first?: boolean; - last?: boolean; empty?: boolean; }; PageableObject: { @@ -1578,9 +1588,28 @@ export interface components { totalPages?: number; }; DocumentSearchResult: { - documents?: components["schemas"]["Document"][]; + documents: components["schemas"]["Document"][]; /** Format: int64 */ - total?: number; + total: number; + matchData: { + [key: string]: components["schemas"]["SearchMatchData"]; + }; + }; + MatchOffset: { + /** Format: int32 */ + start: number; + /** Format: int32 */ + length: number; + }; + SearchMatchData: { + transcriptionSnippet?: string; + titleOffsets: components["schemas"]["MatchOffset"][]; + senderMatched: boolean; + matchedReceiverIds: string[]; + matchedTagIds: string[]; + snippetOffsets: components["schemas"]["MatchOffset"][]; + summarySnippet?: string; + summaryOffsets: components["schemas"]["MatchOffset"][]; }; IncompleteDocumentDTO: { /** Format: uuid */ @@ -2938,8 +2967,8 @@ export interface operations { }; }; responses: { - /** @description OK */ - 200: { + /** @description No Content */ + 204: { headers: { [name: string]: unknown; }; @@ -2995,6 +3024,54 @@ export interface operations { }; }; }; + deleteAnnotation: { + parameters: { + query?: never; + header?: never; + path: { + documentId: string; + annotationId: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description No Content */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; + updateAnnotation: { + parameters: { + query?: never; + header?: never; + path: { + documentId: string; + annotationId: string; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["UpdateAnnotationDTO"]; + }; + }; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "*/*": components["schemas"]["DocumentAnnotation"]; + }; + }; + }; + }; search: { parameters: { query?: { @@ -3425,7 +3502,7 @@ export interface operations { /** @description Filter by document status */ status?: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED"; /** @description Sort field */ - sort?: "DATE" | "TITLE" | "SENDER" | "RECEIVER" | "UPLOAD_DATE"; + sort?: "DATE" | "TITLE" | "SENDER" | "RECEIVER" | "UPLOAD_DATE" | "RELEVANCE"; /** @description Sort direction: ASC or DESC */ dir?: string; }; @@ -3602,25 +3679,4 @@ export interface operations { }; }; }; - deleteAnnotation: { - parameters: { - query?: never; - header?: never; - path: { - documentId: string; - annotationId: string; - }; - cookie?: never; - }; - requestBody?: never; - responses: { - /** @description No Content */ - 204: { - headers: { - [name: string]: unknown; - }; - content?: never; - }; - }; - }; } diff --git a/frontend/src/lib/search.spec.ts b/frontend/src/lib/search.spec.ts new file mode 100644 index 00000000..512235c9 --- /dev/null +++ b/frontend/src/lib/search.spec.ts @@ -0,0 +1,110 @@ +import { describe, expect, it } from 'vitest'; +import { applyOffsets } from './search'; + +describe('applyOffsets', () => { + it('returns single plain segment when offsets is empty', () => { + expect(applyOffsets('Hallo Welt', [])).toEqual([{ text: 'Hallo Welt', highlight: false }]); + }); + + it('highlights a single term at the start', () => { + expect(applyOffsets('Brief an Anna', [{ start: 0, length: 5 }])).toEqual([ + { text: 'Brief', highlight: true }, + { text: ' an Anna', highlight: false } + ]); + }); + + it('highlights a term in the middle', () => { + expect(applyOffsets('Der Brief von Anna', [{ start: 4, length: 5 }])).toEqual([ + { text: 'Der ', highlight: false }, + { text: 'Brief', highlight: true }, + { text: ' von Anna', highlight: false } + ]); + }); + + it('highlights a term at the end', () => { + expect(applyOffsets('Brief an Anna', [{ start: 9, length: 4 }])).toEqual([ + { text: 'Brief an ', highlight: false }, + { text: 'Anna', highlight: true } + ]); + }); + + it('handles two non-overlapping offsets in order', () => { + expect( + applyOffsets('Anna und Brief', [ + { start: 0, length: 4 }, + { start: 9, length: 5 } + ]) + ).toEqual([ + { text: 'Anna', highlight: true }, + { text: ' und ', highlight: false }, + { text: 'Brief', highlight: true } + ]); + }); + + it('merges overlapping offsets into the longest span', () => { + // [0,7) and [3,9) overlap → merged [0,max(7,9)) = [0,9) = "Hello wor" + expect( + applyOffsets('Hello world', [ + { start: 0, length: 7 }, + { start: 3, length: 6 } + ]) + ).toEqual([ + { text: 'Hello wor', highlight: true }, + { text: 'ld', highlight: false } + ]); + }); + + it('merges adjacent (touching) offsets', () => { + // [0,3) and [3,6) are adjacent → merged [0,6) + expect( + applyOffsets('Hallo Welt', [ + { start: 0, length: 3 }, + { start: 3, length: 3 } + ]) + ).toEqual([ + { text: 'Hallo ', highlight: true }, + { text: 'Welt', highlight: false } + ]); + }); + + it('clamps offset that extends beyond text length', () => { + expect(applyOffsets('Hi', [{ start: 0, length: 100 }])).toEqual([ + { text: 'Hi', highlight: true } + ]); + }); + + it('ignores a completely out-of-bounds offset', () => { + expect(applyOffsets('Hi', [{ start: 10, length: 5 }])).toEqual([ + { text: 'Hi', highlight: false } + ]); + }); + + it('sorts unsorted offsets correctly', () => { + // Offsets provided in reverse order: second term first + expect( + applyOffsets('Anna und Brief', [ + { start: 9, length: 5 }, + { start: 0, length: 4 } + ]) + ).toEqual([ + { text: 'Anna', highlight: true }, + { text: ' und ', highlight: false }, + { text: 'Brief', highlight: true } + ]); + }); + + it('clamps negative start to 0 and highlights from the beginning', () => { + // start = -2, length = 5 → effective range [-2, 3) → clamped to [0, 3) + expect(applyOffsets('Hello', [{ start: -2, length: 5 }])).toEqual([ + { text: 'Hel', highlight: true }, + { text: 'lo', highlight: false } + ]); + }); + + it('ignores offset whose end is also negative', () => { + // start = -5, length = 2 → end = -3, completely before text + expect(applyOffsets('Hi', [{ start: -5, length: 2 }])).toEqual([ + { text: 'Hi', highlight: false } + ]); + }); +}); diff --git a/frontend/src/lib/search.ts b/frontend/src/lib/search.ts new file mode 100644 index 00000000..51b28b86 --- /dev/null +++ b/frontend/src/lib/search.ts @@ -0,0 +1,46 @@ +export type TextSegment = { text: string; highlight: boolean }; + +export type MatchOffset = { start: number; length: number }; + +/** + * Converts a flat string and a list of character-level highlight offsets into + * an array of text segments that can be rendered without {@html}. + * + * Offsets are sorted and merged (overlapping spans become the longest enclosing + * span) before processing. Out-of-bounds offsets are clamped or dropped. + * + * @param text The display text (no delimiter characters). + * @param offsets Character offsets produced by the backend (Java char positions, + * compatible with JavaScript String indexing). + */ +export function applyOffsets(text: string, offsets: MatchOffset[]): TextSegment[] { + if (!offsets.length) return [{ text, highlight: false }]; + + // Sort by start position and merge overlapping / adjacent spans + const sorted = [...offsets].sort((a, b) => a.start - b.start); + const merged: { start: number; end: number }[] = []; + for (const { start, length } of sorted) { + const end = start + length; + if (end <= 0 || start >= text.length) continue; // completely out of bounds + const clampedStart = Math.max(0, start); + const clampedEnd = Math.min(text.length, end); + const last = merged[merged.length - 1]; + if (!last || clampedStart > last.end) { + merged.push({ start: clampedStart, end: clampedEnd }); + } else { + last.end = Math.max(last.end, clampedEnd); + } + } + + if (!merged.length) return [{ text, highlight: false }]; + + const segments: TextSegment[] = []; + let pos = 0; + for (const { start, end } of merged) { + if (pos < start) segments.push({ text: text.slice(pos, start), highlight: false }); + segments.push({ text: text.slice(start, end), highlight: true }); + pos = end; + } + if (pos < text.length) segments.push({ text: text.slice(pos), highlight: false }); + return segments; +} diff --git a/frontend/src/routes/+page.server.ts b/frontend/src/routes/+page.server.ts index da2a12e2..e1386db8 100644 --- a/frontend/src/routes/+page.server.ts +++ b/frontend/src/routes/+page.server.ts @@ -5,6 +5,7 @@ import type { components } from '$lib/generated/api'; type IncompleteDocumentDTO = components['schemas']['IncompleteDocumentDTO']; type StatsDTO = components['schemas']['StatsDTO']; type Document = components['schemas']['Document']; +type SearchMatchData = components['schemas']['SearchMatchData']; export async function load({ url, fetch }) { const q = url.searchParams.get('q') || ''; @@ -60,9 +61,14 @@ export async function load({ url, fetch }) { throw redirect(302, '/login'); } - const searchResult = docsResult?.data as { documents?: Document[]; total?: number } | null; + const searchResult = docsResult?.data as { + documents?: Document[]; + total?: number; + matchData?: Record; + } | null; const documents: Document[] = searchResult?.documents ?? []; const total: number = searchResult?.total ?? 0; + const matchData: Record = searchResult?.matchData ?? {}; const allPersons = (personsResult.data ?? []) as { id: string; firstName: string; @@ -99,6 +105,7 @@ export async function load({ url, fetch }) { isDashboard, documents, total, + matchData, stats, incompleteDocs, recentDocs, @@ -116,6 +123,7 @@ export async function load({ url, fetch }) { isDashboard, documents: [], total: 0, + matchData: {} as Record, stats: null, incompleteDocs: [], recentDocs: [], diff --git a/frontend/src/routes/+page.svelte b/frontend/src/routes/+page.svelte index ff5c7ebd..bbdfae44 100644 --- a/frontend/src/routes/+page.svelte +++ b/frontend/src/routes/+page.svelte @@ -140,6 +140,7 @@ const showRightColumn = $derived(data.canWrite || (data.incompleteDocs?.length ? total={data.total ?? 0} q={q} sort={sort} + matchData={data.matchData ?? {}} /> {/if} diff --git a/frontend/src/routes/DocumentList.svelte b/frontend/src/routes/DocumentList.svelte index 78aaa09e..bbcd609f 100644 --- a/frontend/src/routes/DocumentList.svelte +++ b/frontend/src/routes/DocumentList.svelte @@ -4,6 +4,8 @@ import { m } from '$lib/paraglide/messages.js'; import { formatDate } from '$lib/utils/date'; import { groupDocuments } from '$lib/utils/groupDocuments'; import GroupDivider from '$lib/components/GroupDivider.svelte'; +import { applyOffsets } from '$lib/search'; +import type { components } from '$lib/generated/api'; let { documents, @@ -11,7 +13,8 @@ let { error, total = 0, q = '', - sort + sort, + matchData = {} }: { documents: { id: string; @@ -19,8 +22,13 @@ let { originalFilename: string; documentDate?: string | null; location?: string | null; - sender?: { firstName?: string | null; lastName: string; displayName: string } | null; - receivers?: { firstName?: string | null; lastName: string; displayName: string }[]; + sender?: { + id?: string; + firstName?: string | null; + lastName: string; + displayName: string; + } | null; + receivers?: { id?: string; firstName?: string | null; lastName: string; displayName: string }[]; tags?: { id: string; name: string }[]; }[]; canWrite: boolean; @@ -28,6 +36,7 @@ let { total?: number; q?: string; sort?: string; + matchData?: Record; } = $props(); const fallbackLabel = $derived( @@ -75,6 +84,17 @@ const showDividers = $derived(groupedDocuments.length >= 2); {/if}