diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 416b8597..f9553ab2 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -65,6 +65,29 @@ jobs: exit 1 fi + - name: Assert no raw document date rendered via {@html} (CWE-79 — #666) + shell: bash + run: | + # meta_date_raw is untrusted verbatim spreadsheet text — it must render via + # Svelte default escaping, never {@html}. This guard flags any {@html ...} + # whose expression references a raw-date variable. A comment mentioning + # "{@html}" without a raw token inside the braces does NOT match. + # The token list MUST cover every variable that carries the raw value: + # DocumentDate.svelte exposes it via the `raw` prop, so `\braw\b` is included. + # Grow this list whenever a new raw-bearing variable name is introduced. + pattern='\{@html[^}]*(metaDateRaw|documentDateRaw|rawDate|\braw\b)' + # Self-test: the regex must catch the dangerous forms and ignore the comment form. + printf '{@html doc.metaDateRaw}\n' | grep -qP "$pattern" \ + || { echo "FAIL: guard self-test — regex missed the unsafe {@html metaDateRaw} form"; exit 1; } + printf '{@html raw}\n' | grep -qP "$pattern" \ + || { echo "FAIL: guard self-test — regex missed the unsafe {@html raw} form (DocumentDate prop)"; exit 1; } + printf 'never use {@html} for this\n' | grep -qvP "$pattern" \ + || { echo "FAIL: guard self-test — regex wrongly flagged a {@html} comment"; exit 1; } + if grep -rPln "$pattern" --include='*.svelte' frontend/src/; then + echo "FAIL: meta_date_raw rendered via {@html} — use default {…} escaping (CWE-79, #666)." + exit 1 + fi + - name: Assert no (upload|download)-artifact past v3 shell: bash run: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bace28d0..7b7ba8a0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -272,6 +272,7 @@ For multipart/form-data (file uploads): bypass the typed client and use `event.f | Form display | German `dd.mm.yyyy` with auto-dot insertion via `handleDateInput()` | | Wire format | ISO 8601 via a hidden `` | | Display | `new Intl.DateTimeFormat('de-DE', …).format(new Date(val + 'T12:00:00'))` | +| Honest precision display | `formatDocumentDate(iso, precision, end?, raw?, locale?)` (`$lib/shared/utils/documentDate.ts`) or the `` component — renders a document date at exactly its `meta_date_precision` (MONTH → "Juni 1916", never a fabricated day). It mirrors the Java `DocumentTitleFormatter`; both are pinned to `docs/date-label-fixtures.json` so the title and UI labels can't drift. `meta_date_raw` is untrusted — render it via default escaping, never `{@html}` (a CI guard enforces this). | ### Security checklist (new endpoint) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java index edeedee6..4106332a 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java @@ -378,6 +378,7 @@ public class DocumentService { // 1. Einfache Felder Update doc.setTitle(dto.getTitle()); doc.setDocumentDate(dto.getDocumentDate()); + applyDatePrecision(doc, dto); doc.setLocation(dto.getLocation()); doc.setTranscription(dto.getTranscription()); doc.setSummary(dto.getSummary()); @@ -446,6 +447,25 @@ public class DocumentService { return saved; } + /** + * Applies the three date-precision fields only when the DTO carries them. + * A null field means "not submitted" — overwriting the stored value with null + * would fabricate a precision the user never chose, the exact dishonesty #666 + * exists to prevent. A row with a genuinely-unknown precision must keep it when + * an unrelated edit (e.g. a location typo) is saved. + */ + private void applyDatePrecision(Document doc, DocumentUpdateDTO dto) { + if (dto.getMetaDatePrecision() != null) { + doc.setMetaDatePrecision(dto.getMetaDatePrecision()); + } + if (dto.getMetaDateEnd() != null) { + doc.setMetaDateEnd(dto.getMetaDateEnd()); + } + if (dto.getMetaDateRaw() != null) { + doc.setMetaDateRaw(dto.getMetaDateRaw()); + } + } + @Transactional public Document updateDocumentTags(UUID docId, List tagNames) { Document doc = documentRepository.findById(docId) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java index 6be566ab..085a4ff4 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java @@ -159,7 +159,13 @@ public class DocumentImporter { Person sender = resolveSender(row.get("sender_person_id"), senderName); Set receivers = resolveReceivers(row.get("receiver_person_ids")); - doc.setTitle(index); + LocalDate date = parseIsoDate(row.get("date_iso")); + DatePrecision precision = parsePrecision(row.get("date_precision")); + LocalDate dateEnd = parseIsoDate(row.get("date_end")); + String dateRaw = blankToNull(row.get("date_raw")); + String location = blankToNull(row.get("location")); + + doc.setTitle(buildTitle(index, date, precision, dateEnd, dateRaw, location)); doc.setStatus(status); doc.setFilePath(s3Key); doc.setContentType(contentType); @@ -171,17 +177,31 @@ public class DocumentImporter { doc.getReceivers().clear(); doc.getReceivers().addAll(receivers); doc.setReceiverText(blankToNull(receiverNames)); - doc.setDocumentDate(parseIsoDate(row.get("date_iso"))); - doc.setMetaDatePrecision(parsePrecision(row.get("date_precision"))); - doc.setMetaDateEnd(parseIsoDate(row.get("date_end"))); - doc.setMetaDateRaw(blankToNull(row.get("date_raw"))); - doc.setLocation(blankToNull(row.get("location"))); + doc.setDocumentDate(date); + doc.setMetaDatePrecision(precision); + doc.setMetaDateEnd(dateEnd); + doc.setMetaDateRaw(dateRaw); + doc.setLocation(location); doc.setSummary(blankToNull(row.get("summary"))); attachTag(doc, row.get("tags")); doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty()); return doc; } + // The title carries the date at the HONEST precision (never a fabricated day) via the + // shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating. + private static String buildTitle(String index, LocalDate date, DatePrecision precision, + LocalDate end, String raw, String location) { + StringBuilder title = new StringBuilder(index); + if (date != null && precision != DatePrecision.UNKNOWN) { + title.append(" – ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw)); + } + if (location != null && !location.isBlank()) { + title.append(" – ").append(location); + } + return title.toString(); + } + // ─── attribution routing — register-first, always retain raw ───────────────────── private Person resolveSender(String slug, String rawName) { diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java new file mode 100644 index 00000000..65120004 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java @@ -0,0 +1,112 @@ +package org.raddatz.familienarchiv.importing; + +import org.raddatz.familienarchiv.document.DatePrecision; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.Locale; + +/** + * Produces the honest German date label baked into an import title — at exactly + * the precision the data claims, never finer. This is the Java half of the + * single source of truth shared with the frontend {@code formatDocumentDate} + * (TypeScript): both are asserted against {@code docs/date-label-fixtures.json} + * so the two implementations cannot drift (see #666). + * + *

Import titles are always German, so the labels here are the German + * canonical form (mirroring the {@code de} Paraglide messages used by the UI). + */ +final class DocumentTitleFormatter { + + private static final DateTimeFormatter LONG = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN); + private static final DateTimeFormatter MONTH_YEAR = DateTimeFormatter.ofPattern("MMMM yyyy", Locale.GERMAN); + private static final DateTimeFormatter MEDIUM = DateTimeFormatter.ofPattern("d. MMM yyyy", Locale.GERMAN); + private static final DateTimeFormatter DAY_MONTH = DateTimeFormatter.ofPattern("d. MMM", Locale.GERMAN); + + private static final String UNKNOWN = "Datum unbekannt"; + private static final String APPROX_PREFIX = "ca."; + private static final String OPEN_RANGE_PREFIX = "ab"; + + private DocumentTitleFormatter() { + } + + /** + * @param date the sort/filter anchor day; null for UNKNOWN rows + * @param precision descriptive precision metadata + * @param end the RANGE end day; null means an open-ended range + * @param raw the verbatim spreadsheet cell, used only to pick a season word + * @return the honest German label + */ + static String formatTitleDate(LocalDate date, DatePrecision precision, LocalDate end, String raw) { + if (precision == DatePrecision.UNKNOWN || date == null) { + return UNKNOWN; + } + return switch (precision) { + case DAY -> LONG.format(date); + case MONTH -> MONTH_YEAR.format(date); + case SEASON -> seasonLabel(date, raw); + case YEAR -> String.valueOf(date.getYear()); + case APPROX -> APPROX_PREFIX + " " + date.getYear(); + case RANGE -> rangeLabel(date, end); + case UNKNOWN -> UNKNOWN; + }; + } + + private static String seasonLabel(LocalDate date, String raw) { + Season season = seasonFromRaw(raw); + if (season == null) { + season = seasonOfMonth(date.getMonthValue()); + } + return season.german + " " + date.getYear(); + } + + private static String rangeLabel(LocalDate start, LocalDate end) { + if (end == null) { + return OPEN_RANGE_PREFIX + " " + MEDIUM.format(start); + } + if (end.equals(start)) { + return MEDIUM.format(start); + } + if (start.getYear() != end.getYear()) { + return MEDIUM.format(start) + " – " + MEDIUM.format(end); + } + if (start.getMonthValue() == end.getMonthValue()) { + return start.getDayOfMonth() + ".–" + MEDIUM.format(end); + } + return DAY_MONTH.format(start) + " – " + MEDIUM.format(end); + } + + // ─── season mapping — mirrors the normalizer's representative months ───────────── + + private enum Season { + SPRING("Frühling"), + SUMMER("Sommer"), + AUTUMN("Herbst"), + WINTER("Winter"); + + private final String german; + + Season(String german) { + this.german = german; + } + } + + private static Season seasonOfMonth(int month) { + if (month >= 3 && month <= 5) return Season.SPRING; + if (month >= 6 && month <= 8) return Season.SUMMER; + if (month >= 9 && month <= 11) return Season.AUTUMN; + return Season.WINTER; + } + + private static Season seasonFromRaw(String raw) { + if (raw == null || raw.isBlank()) return null; + String token = raw.trim().split("\\s+")[0].toLowerCase(Locale.GERMAN); + return switch (token) { + case "frühling", "frühjahr" -> Season.SPRING; + case "sommer" -> Season.SUMMER; + case "herbst" -> Season.AUTUMN; + case "winter" -> Season.WINTER; + default -> null; + }; + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java index d2f91d91..00b69ce5 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java @@ -294,6 +294,34 @@ class DocumentControllerTest { .andExpect(status().isOk()); } + @Test + @WithMockUser(authorities = "WRITE_ALL") + void updateDocument_bindsPrecisionFormFields_toDTO() throws Exception { + // Pins the wire contract: the edit form's metaDatePrecision / metaDateEnd / + // metaDateRaw multipart field names must bind to DocumentUpdateDTO. A rename + // on either side silently drops the precision edit; this captures the DTO. + UUID id = UUID.randomUUID(); + Document doc = Document.builder().id(id).title("Brief").originalFilename("brief.pdf").build(); + when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build()); + + org.mockito.ArgumentCaptor captor = + org.mockito.ArgumentCaptor.forClass(DocumentUpdateDTO.class); + when(documentService.updateDocument(eq(id), captor.capture(), any(), any())).thenReturn(doc); + + mockMvc.perform(multipart("/api/documents/" + id) + .param("metaDatePrecision", "RANGE") + .param("metaDateEnd", "1917-01-11") + .param("metaDateRaw", "10.–11. Januar 1917") + .with(req -> { req.setMethod("PUT"); return req; }).with(csrf())) + .andExpect(status().isOk()); + + DocumentUpdateDTO bound = captor.getValue(); + org.assertj.core.api.Assertions.assertThat(bound.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE); + org.assertj.core.api.Assertions.assertThat(bound.getMetaDateEnd()) + .isEqualTo(java.time.LocalDate.of(1917, 1, 11)); + org.assertj.core.api.Assertions.assertThat(bound.getMetaDateRaw()).isEqualTo("10.–11. Januar 1917"); + } + // ─── DELETE /api/documents/{id} ────────────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java index 8ef7a6f2..658d4c31 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java @@ -144,6 +144,53 @@ class DocumentServiceTest { assertThat(doc.getArchiveFolder()).isEqualTo("Mappe B"); } + @Test + void updateDocument_persistsDatePrecisionEndAndRaw() throws Exception { + UUID id = UUID.randomUUID(); + Document doc = Document.builder().id(id).receivers(new HashSet<>()).tags(new HashSet<>()).build(); + when(documentRepository.findById(id)).thenReturn(Optional.of(doc)); + when(documentRepository.save(any())).thenReturn(doc); + + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setDocumentDate(LocalDate.of(1917, 1, 10)); + dto.setMetaDatePrecision(DatePrecision.RANGE); + dto.setMetaDateEnd(LocalDate.of(1917, 1, 11)); + dto.setMetaDateRaw("10.–11. Januar 1917"); + + documentService.updateDocument(id, dto, null, null); + + assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE); + assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1917, 1, 11)); + assertThat(doc.getMetaDateRaw()).isEqualTo("10.–11. Januar 1917"); + } + + @Test + void updateDocument_preservesStoredPrecision_whenDtoOmitsIt() throws Exception { + // Editing a doc (e.g. fixing a location typo) without touching the precision + // controls must NOT fabricate a precision. The form omits the three precision + // fields → they arrive null on the DTO → the stored values must be preserved. + UUID id = UUID.randomUUID(); + Document doc = Document.builder() + .id(id) + .metaDatePrecision(DatePrecision.MONTH) + .metaDateEnd(LocalDate.of(1916, 6, 30)) + .metaDateRaw("Juni 1916") + .receivers(new HashSet<>()) + .tags(new HashSet<>()) + .build(); + when(documentRepository.findById(id)).thenReturn(Optional.of(doc)); + when(documentRepository.save(any())).thenReturn(doc); + + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setLocation("Berlin"); // unrelated edit; precision fields left null + + documentService.updateDocument(id, dto, null, null); + + assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.MONTH); + assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1916, 6, 30)); + assertThat(doc.getMetaDateRaw()).isEqualTo("Juni 1916"); + } + // ─── deleteTagCascading ─────────────────────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java index f0b2263b..99d7bd5c 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java @@ -404,6 +404,50 @@ class DocumentImporterTest { d.getReceivers().isEmpty() && d.getTags().isEmpty())); } + // ─── title carries the honest date label — never a precision the data lacks ─────── + + @Test + void load_buildsTitleWithMonthLabel_whenPrecisionIsMonth(@TempDir Path tempDir) throws Exception { + ReflectionTestUtils.setField(importer, "importDir", tempDir.toString()); + when(documentService.findByOriginalFilename("W-0100")).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + Path xlsx = writeDocs(tempDir, docRow("W-0100", "", "", "", "", "", + "1916-06-01", "Juni 1916", "MONTH", "")); + + importer.load(xlsx.toFile()); + + verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> + d.getTitle().contains("Juni 1916") && !d.getTitle().contains("1. Juni"))); + } + + @Test + void load_buildsTitleWithFullDate_whenPrecisionIsDay(@TempDir Path tempDir) throws Exception { + ReflectionTestUtils.setField(importer, "importDir", tempDir.toString()); + when(documentService.findByOriginalFilename("W-0101")).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + Path xlsx = writeDocs(tempDir, docRow("W-0101", "", "", "", "", "", + "1943-12-24", "24.12.1943", "DAY", "")); + + importer.load(xlsx.toFile()); + + verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> + d.getTitle().contains("24. Dezember 1943"))); + } + + @Test + void load_buildsTitleFromIndexOnly_whenDateUnknown(@TempDir Path tempDir) throws Exception { + ReflectionTestUtils.setField(importer, "importDir", tempDir.toString()); + when(documentService.findByOriginalFilename("W-0102")).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + Path xlsx = writeDocs(tempDir, docRow("W-0102", "", "", "", "", "", + "", "?", "UNKNOWN", "")); + + importer.load(xlsx.toFile()); + + verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> + d.getTitle().equals("W-0102"))); + } + // ─── helpers ───────────────────────────────────────────────────────────────────── private Map docRow(String index, String file, String senderId, String senderName, diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java new file mode 100644 index 00000000..d8f66b6e --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java @@ -0,0 +1,49 @@ +package org.raddatz.familienarchiv.importing; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.DynamicTest; +import org.junit.jupiter.api.TestFactory; +import org.raddatz.familienarchiv.document.DatePrecision; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Asserts the Java title label against the SAME shared fixture table the TS + * formatter spec uses ({@code docs/date-label-fixtures.json}). This is the + * drift guard requested in #666 review: the two label implementations cannot + * silently diverge (en-dash vs hyphen, "ca." vs "circa", season words, range + * collapse) because both are pinned to one committed rule set. + */ +class DocumentTitleFormatterTest { + + @TestFactory + List matchesSharedFixtureTable() throws Exception { + // Maven runs tests from the backend/ module dir; the fixture lives at repo-root docs/. + Path fixture = Path.of("..", "docs", "date-label-fixtures.json"); + JsonNode root = new ObjectMapper().readTree(Files.readString(fixture)); + List tests = new ArrayList<>(); + for (JsonNode c : root.get("cases")) { + String name = c.get("name").asText(); + LocalDate anchor = parseDate(c.get("anchor")); + DatePrecision precision = DatePrecision.valueOf(c.get("precision").asText()); + LocalDate end = parseDate(c.get("end")); + String raw = c.get("raw").isNull() ? null : c.get("raw").asText(); + String expected = c.get("expected").asText(); + tests.add(DynamicTest.dynamicTest(name, () -> + assertThat(DocumentTitleFormatter.formatTitleDate(anchor, precision, end, raw)) + .isEqualTo(expected))); + } + return tests; + } + + private static LocalDate parseDate(JsonNode node) { + return node == null || node.isNull() ? null : LocalDate.parse(node.asText()); + } +} diff --git a/docs/architecture/c4/l3-backend-3b-document-management.puml b/docs/architecture/c4/l3-backend-3b-document-management.puml index 89d4a68b..cca25d75 100644 --- a/docs/architecture/c4/l3-backend-3b-document-management.puml +++ b/docs/architecture/c4/l3-backend-3b-document-management.puml @@ -16,7 +16,8 @@ System_Boundary(backend, "API Backend (Spring Boot)") { Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).") Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).") Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.") - Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, keeps the S3 upload + thumbnail plumbing, and ports the path-traversal / homoglyph / absolute-path / %PDF magic-byte security guards.") + Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds an honest precision-aware title via DocumentTitleFormatter, keeps the S3 upload + thumbnail plumbing, and ports the path-traversal / homoglyph / absolute-path / %PDF magic-byte security guards.") + Component(titleFmt, "DocumentTitleFormatter", "Pure helper", "Formats the date label baked into an import title at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).") Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.") Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.") Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, bidirectional conversation thread queries, full-text search with ranking and match highlighting, and transcription pipeline queue projections.") @@ -43,6 +44,7 @@ Rel(importOrch, docLoader, "4. Loads documents") Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx") Rel(personRegLoader, sheetReader, "Reads canonical .xlsx") Rel(docLoader, sheetReader, "Reads canonical .xlsx") +Rel(docLoader, titleFmt, "Builds honest title date") Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref") Rel(personRegLoader, personSvc, "Upserts persons by source_ref") Rel(personTreeLoader, personSvc, "Upserts persons by source_ref") diff --git a/docs/date-label-fixtures.json b/docs/date-label-fixtures.json new file mode 100644 index 00000000..c6aed293 --- /dev/null +++ b/docs/date-label-fixtures.json @@ -0,0 +1,140 @@ +{ + "_comment": "Single source of truth for the honest date-label rule set shared by the TS formatDocumentDate (frontend/src/lib/shared/utils/documentDate.ts) and the Java formatTitleDate (backend importing/DocumentTitleFormatter.java). The 'cases' array holds the GERMAN (de) canonical form and is asserted by BOTH suites — that is the Java<->TS drift guard (en-dash vs hyphen, 'ca.' vs 'circa', season words, range collapse). The Java title formatter intentionally renders German server-side (import titles are always German); only the TS UI formatter is locale-aware, so 'localeCases' (en/es month-name output) is asserted by the TS spec ONLY and must NOT be fed to the Java test. Do not edit one side's expectation without editing this file and the relevant test(s). Season->month mapping note: the Python import normalizer (tools/import-normalizer) is the UPSTREAM authority for which representative month a season maps to (4/7/10/1); both formatters mirror it but it sits OUTSIDE this Java<->TS guard, so a normalizer change is not caught here. See issue #666 and the Markus/Sara drift-guard decision.", + "cases": [ + { + "name": "DAY renders a full long date", + "precision": "DAY", + "anchor": "1943-12-24", + "end": null, + "raw": null, + "expected": "24. Dezember 1943" + }, + { + "name": "MONTH renders month and year only — never a fabricated day", + "precision": "MONTH", + "anchor": "1916-06-01", + "end": null, + "raw": "Juni 1916", + "expected": "Juni 1916" + }, + { + "name": "SEASON renders the season word from raw", + "precision": "SEASON", + "anchor": "1916-06-01", + "end": null, + "raw": "Sommer 1916", + "expected": "Sommer 1916" + }, + { + "name": "SEASON with null raw derives the season from the anchor month", + "precision": "SEASON", + "anchor": "1916-04-01", + "end": null, + "raw": null, + "expected": "Frühling 1916" + }, + { + "name": "YEAR renders the year only — suppresses month and day", + "precision": "YEAR", + "anchor": "1916-06-15", + "end": null, + "raw": null, + "expected": "1916" + }, + { + "name": "APPROX renders a ca. prefix before the year", + "precision": "APPROX", + "anchor": "1920-01-01", + "end": null, + "raw": null, + "expected": "ca. 1920" + }, + { + "name": "RANGE in the same month collapses the shared month and year", + "precision": "RANGE", + "anchor": "1917-01-10", + "end": "1917-01-11", + "raw": null, + "expected": "10.–11. Jan. 1917" + }, + { + "name": "RANGE across months expands both months, sharing the year", + "precision": "RANGE", + "anchor": "1917-01-30", + "end": "1917-02-02", + "raw": null, + "expected": "30. Jan. – 2. Feb. 1917" + }, + { + "name": "RANGE across a year boundary expands both full dates", + "precision": "RANGE", + "anchor": "1916-12-30", + "end": "1917-01-02", + "raw": null, + "expected": "30. Dez. 1916 – 2. Jan. 1917" + }, + { + "name": "RANGE where end equals start collapses to a single day", + "precision": "RANGE", + "anchor": "1917-01-10", + "end": "1917-01-10", + "raw": null, + "expected": "10. Jan. 1917" + }, + { + "name": "RANGE with a null end renders an open-range indicator, never a fabricated end", + "precision": "RANGE", + "anchor": "1917-01-10", + "end": null, + "raw": null, + "expected": "ab 10. Jan. 1917" + }, + { + "name": "UNKNOWN renders the unknown label regardless of anchor", + "precision": "UNKNOWN", + "anchor": null, + "end": null, + "raw": "?", + "expected": "Datum unbekannt" + } + ], + "localeComment": "TS-only locale parity for the read path (the younger phone audience may use en/es). Asserted ONLY by documentDate.spec.ts — the Java title formatter is German-only by design, so these MUST NOT be fed to DocumentTitleFormatterTest. Each case pins the localized month-name output for DAY and MONTH so a locale regression (e.g. a future de-DE hard-coding) is caught by the drift table, not just by ad-hoc tests.", + "localeCases": [ + { + "name": "DAY in English renders the English month name", + "precision": "DAY", + "anchor": "1943-12-24", + "end": null, + "raw": null, + "locale": "en", + "expected": "December 24, 1943" + }, + { + "name": "DAY in Spanish renders the Spanish month name", + "precision": "DAY", + "anchor": "1943-12-24", + "end": null, + "raw": null, + "locale": "es", + "expected": "24 de diciembre de 1943" + }, + { + "name": "MONTH in English renders the English month name, never a day", + "precision": "MONTH", + "anchor": "1916-06-01", + "end": null, + "raw": "Juni 1916", + "locale": "en", + "expected": "June 1916" + }, + { + "name": "MONTH in Spanish renders the Spanish month name, never a day", + "precision": "MONTH", + "anchor": "1916-06-01", + "end": null, + "raw": "Juni 1916", + "locale": "es", + "expected": "junio de 1916" + } + ] +} diff --git a/frontend/messages/de.json b/frontend/messages/de.json index a54ab59e..0ac0a807 100644 --- a/frontend/messages/de.json +++ b/frontend/messages/de.json @@ -261,6 +261,24 @@ "doc_preview_iframe_title": "Dokumentvorschau", "doc_image_alt": "Original-Scan", "doc_no_date": "Kein Datum", + "date_precision_unknown": "Datum unbekannt", + "date_precision_approx_prefix": "ca.", + "date_range_open_prefix": "ab", + "date_season_spring": "Frühling", + "date_season_summer": "Sommer", + "date_season_autumn": "Herbst", + "date_season_winter": "Winter", + "date_original_label": "Originaltext:", + "date_unknown_icon_label": "Datum unbekannt", + "form_label_date_precision": "Datumsgenauigkeit", + "form_label_date_end": "Enddatum", + "date_precision_option_day": "Genauer Tag", + "date_precision_option_month": "Monat", + "date_precision_option_season": "Jahreszeit", + "date_precision_option_year": "Jahr", + "date_precision_option_range": "Zeitraum", + "date_precision_option_approx": "Ungefähr", + "date_precision_option_unknown": "Unbekannt", "person_merge_will_be_deleted": "wird gelöscht.", "comp_typeahead_placeholder": "Namen tippen...", "comp_typeahead_loading": "Suche...", diff --git a/frontend/messages/en.json b/frontend/messages/en.json index 5c6ca80a..269e95d3 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -261,6 +261,24 @@ "doc_preview_iframe_title": "Document Preview", "doc_image_alt": "Original scan", "doc_no_date": "No date", + "date_precision_unknown": "Date unknown", + "date_precision_approx_prefix": "c.", + "date_range_open_prefix": "from", + "date_season_spring": "Spring", + "date_season_summer": "Summer", + "date_season_autumn": "Autumn", + "date_season_winter": "Winter", + "date_original_label": "Original:", + "date_unknown_icon_label": "Date unknown", + "form_label_date_precision": "Date precision", + "form_label_date_end": "End date", + "date_precision_option_day": "Exact day", + "date_precision_option_month": "Month", + "date_precision_option_season": "Season", + "date_precision_option_year": "Year", + "date_precision_option_range": "Range", + "date_precision_option_approx": "Approximate", + "date_precision_option_unknown": "Unknown", "person_merge_will_be_deleted": "will be deleted.", "comp_typeahead_placeholder": "Type a name...", "comp_typeahead_loading": "Searching...", diff --git a/frontend/messages/es.json b/frontend/messages/es.json index cbda7fab..1cbd3eda 100644 --- a/frontend/messages/es.json +++ b/frontend/messages/es.json @@ -261,6 +261,24 @@ "doc_preview_iframe_title": "Vista previa del documento", "doc_image_alt": "Escaneado original", "doc_no_date": "Sin fecha", + "date_precision_unknown": "Fecha desconocida", + "date_precision_approx_prefix": "ca.", + "date_range_open_prefix": "desde", + "date_season_spring": "Primavera", + "date_season_summer": "Verano", + "date_season_autumn": "Otoño", + "date_season_winter": "Invierno", + "date_original_label": "Texto original:", + "date_unknown_icon_label": "Fecha desconocida", + "form_label_date_precision": "Precisión de la fecha", + "form_label_date_end": "Fecha final", + "date_precision_option_day": "Día exacto", + "date_precision_option_month": "Mes", + "date_precision_option_season": "Estación", + "date_precision_option_year": "Año", + "date_precision_option_range": "Periodo", + "date_precision_option_approx": "Aproximada", + "date_precision_option_unknown": "Desconocida", "person_merge_will_be_deleted": "será eliminado.", "comp_typeahead_placeholder": "Escriba un nombre...", "comp_typeahead_loading": "Buscando...", diff --git a/frontend/src/lib/document/DocumentDate.svelte b/frontend/src/lib/document/DocumentDate.svelte new file mode 100644 index 00000000..a3539c31 --- /dev/null +++ b/frontend/src/lib/document/DocumentDate.svelte @@ -0,0 +1,60 @@ + + + + + {#if isUnknown} + + + {/if} + {label} + + {#if showRawLine} + + {m.date_original_label()} {raw} + {/if} + diff --git a/frontend/src/lib/document/DocumentDate.svelte.test.ts b/frontend/src/lib/document/DocumentDate.svelte.test.ts new file mode 100644 index 00000000..fa842b7b --- /dev/null +++ b/frontend/src/lib/document/DocumentDate.svelte.test.ts @@ -0,0 +1,35 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { cleanup, render } from 'vitest-browser-svelte'; +import { page } from 'vitest/browser'; +import DocumentDate from './DocumentDate.svelte'; + +// Browser-project (Playwright) tests — CI only. + +afterEach(cleanup); + +describe('DocumentDate', () => { + it('renders a DAY date as a full long date', async () => { + render(DocumentDate, { props: { iso: '1943-12-24', precision: 'DAY' } }); + await expect.element(page.getByText('24. Dezember 1943')).toBeInTheDocument(); + }); + + it('renders MONTH precision as month + year, never a day', async () => { + render(DocumentDate, { props: { iso: '1916-06-01', precision: 'MONTH', raw: 'Juni 1916' } }); + await expect.element(page.getByText('Juni 1916')).toBeInTheDocument(); + }); + + it('shows the verbatim raw cell as a visible secondary line for UNKNOWN (not tooltip-only)', async () => { + render(DocumentDate, { props: { iso: null, precision: 'UNKNOWN', raw: 'Sommer?' } }); + // Real, visible text — not hidden behind a title attribute. + await expect.element(page.getByText('Datum unbekannt')).toBeInTheDocument(); + await expect.element(page.getByText(/Sommer\?/)).toBeVisible(); + }); + + it('renders a malicious raw value as inert escaped text (no element injected)', async () => { + const malicious = ''; + render(DocumentDate, { props: { iso: null, precision: 'UNKNOWN', raw: malicious } }); + // The payload appears as literal text, and no is created in the DOM. + await expect.element(page.getByText(/([]), dateIso = $bindable(''), + datePrecision = $bindable('DAY'), + dateEndIso = $bindable(''), currentTitle = $bindable(''), topbar, actionbar @@ -38,6 +41,8 @@ let { senderId?: string; selectedReceivers?: Person[]; dateIso?: string; + datePrecision?: DatePrecision; + dateEndIso?: string; currentTitle?: string; topbar: Snippet; actionbar: Snippet; @@ -47,6 +52,8 @@ tags = untrack(() => (doc.tags as Tag[]) ?? []); senderId = untrack(() => doc.sender?.id ?? ''); selectedReceivers = untrack(() => (doc.receivers as Person[]) ?? []); dateIso = untrack(() => doc.documentDate ?? ''); +datePrecision = untrack(() => doc.metaDatePrecision ?? (doc.documentDate ? 'DAY' : 'UNKNOWN')); +dateEndIso = untrack(() => doc.metaDateEnd ?? ''); currentTitle = untrack(() => doc.title ?? ''); const fileLoader = createFileLoader(); @@ -199,6 +206,9 @@ async function handleReplaceFile(e: Event) { bind:senderId={senderId} bind:selectedReceivers={selectedReceivers} bind:dateIso={dateIso} + bind:precision={datePrecision} + bind:endDateIso={dateEndIso} + rawDate={doc.metaDateRaw ?? ''} initialDateIso={doc.documentDate ?? ''} initialLocation={doc.location ?? ''} initialSenderName={doc.sender?.displayName ?? ''} diff --git a/frontend/src/lib/document/DocumentMetadataDrawer.svelte b/frontend/src/lib/document/DocumentMetadataDrawer.svelte index 01ecc62a..4b8081e9 100644 --- a/frontend/src/lib/document/DocumentMetadataDrawer.svelte +++ b/frontend/src/lib/document/DocumentMetadataDrawer.svelte @@ -4,6 +4,8 @@ import { formatDate } from '$lib/shared/utils/date'; import { formatDocumentStatus } from '$lib/document/documentStatusLabel'; import { getInitials, personAvatarColor } from '$lib/person/personFormat'; import RelationshipPill from '$lib/person/relationship/RelationshipPill.svelte'; +import DocumentDate from './DocumentDate.svelte'; +import type { DatePrecision } from '$lib/shared/utils/documentDate'; type Person = { id: string; firstName?: string | null; lastName: string; displayName: string }; type Tag = { id: string; name: string }; @@ -16,6 +18,9 @@ type GeschichteSummary = { type Props = { documentDate: string | null; + metaDatePrecision?: DatePrecision | null; + metaDateEnd?: string | null; + metaDateRaw?: string | null; location: string | null; status: string; sender: Person | null; @@ -29,6 +34,9 @@ type Props = { let { documentDate, + metaDatePrecision = null, + metaDateEnd = null, + metaDateRaw = null, location, status, sender, @@ -59,7 +67,6 @@ function formatGeschichteDate(g: GeschichteSummary): string { return formatDate(g.publishedAt.slice(0, 10), 'short'); } -const formattedDate = $derived(documentDate ? formatDate(documentDate) : '—'); const displayLocation = $derived(location ?? '—'); const statusLabel = $derived(formatDocumentStatus(status)); const visibleReceivers = $derived(receivers.slice(0, VISIBLE_RECEIVER_LIMIT)); @@ -105,7 +112,18 @@ function getFullName(person: Person): string {

{m.doc_details_field_date()}
-
{formattedDate}
+
+ {#if documentDate || metaDateRaw} + + {:else} + — + {/if} +
{m.form_label_location()}
diff --git a/frontend/src/lib/document/DocumentMultiSelect.svelte b/frontend/src/lib/document/DocumentMultiSelect.svelte index 0196544b..fbfd59a9 100644 --- a/frontend/src/lib/document/DocumentMultiSelect.svelte +++ b/frontend/src/lib/document/DocumentMultiSelect.svelte @@ -2,13 +2,23 @@ import type { components } from '$lib/generated/api'; import { m } from '$lib/paraglide/messages.js'; import { clickOutside } from '$lib/shared/actions/clickOutside'; -import { formatDate } from '$lib/shared/utils/date'; +import { formatDocumentDate, type DatePrecision } from '$lib/shared/utils/documentDate'; +import { getLocale } from '$lib/paraglide/runtime.js'; -type Document = components['schemas']['Document']; type DocumentListItem = components['schemas']['DocumentListItem']; +/** + * Exactly the fields this picker reads — id for selection/dedup, the rest for + * the honest date label. A full `Document` and a `DocumentListItem` are both + * structurally assignable, so the search results need no cast. + */ +type DocumentOption = Pick< + DocumentListItem, + 'id' | 'title' | 'documentDate' | 'metaDatePrecision' | 'metaDateEnd' +>; + interface Props { - selectedDocuments?: Document[]; + selectedDocuments?: DocumentOption[]; placeholder?: string; hiddenInputName?: string; } @@ -20,7 +30,7 @@ let { }: Props = $props(); let searchTerm = $state(''); -let results: Document[] = $state([]); +let results: DocumentOption[] = $state([]); let showDropdown = $state(false); let loading = $state(false); let debounceTimer: ReturnType; @@ -46,11 +56,13 @@ function handleInput() { const res = await fetch(`/api/documents/search?q=${encodeURIComponent(searchTerm)}&size=10`); if (res.ok) { const body: { items: DocumentListItem[] } = await res.json(); - const docs = body.items.map((it) => ({ + const docs: DocumentOption[] = body.items.map((it) => ({ id: it.id, title: it.title, - documentDate: it.documentDate - })) as unknown as Document[]; + documentDate: it.documentDate, + metaDatePrecision: it.metaDatePrecision, + metaDateEnd: it.metaDateEnd + })); results = docs.filter((d) => !selectedDocuments.some((s) => s.id === d.id)); } } catch { @@ -61,7 +73,7 @@ function handleInput() { }, 300); } -function selectDocument(doc: Document) { +function selectDocument(doc: DocumentOption) { selectedDocuments = [...selectedDocuments, doc]; searchTerm = ''; showDropdown = false; @@ -72,9 +84,16 @@ function removeDocument(id: string | undefined) { selectedDocuments = selectedDocuments.filter((d) => d.id !== id); } -function formatDocLabel(doc: Document): string { - if (doc.documentDate) return `${doc.title} · ${formatDate(doc.documentDate, 'short')}`; - return doc.title; +function formatDocLabel(doc: DocumentOption): string { + if (!doc.documentDate) return doc.title; + const label = formatDocumentDate( + doc.documentDate, + doc.metaDatePrecision as DatePrecision, + doc.metaDateEnd, + null, + getLocale() + ); + return `${doc.title} · ${label}`; } diff --git a/frontend/src/lib/document/DocumentMultiSelect.svelte.spec.ts b/frontend/src/lib/document/DocumentMultiSelect.svelte.spec.ts index 6514ab55..d348026c 100644 --- a/frontend/src/lib/document/DocumentMultiSelect.svelte.spec.ts +++ b/frontend/src/lib/document/DocumentMultiSelect.svelte.spec.ts @@ -9,6 +9,7 @@ const docFactory = (id: string, title: string, date = '1880-01-01') => ({ id, title, documentDate: date, + metaDatePrecision: 'DAY' as const, originalFilename: `${title}.pdf`, receivers: [], tags: [], @@ -55,7 +56,8 @@ describe('DocumentMultiSelect — rendering', () => { selectedDocuments: [docFactory('d1', 'Brief vom 1. Mai', '1882-05-01')] }); await expect.element(page.getByText(/Brief vom 1\. Mai/)).toBeInTheDocument(); - await expect.element(page.getByText(/01\.05\.1882/)).toBeInTheDocument(); + // DAY precision renders the honest long date (formatDocumentDate), not 01.05.1882. + await expect.element(page.getByText(/1\. Mai 1882/)).toBeInTheDocument(); }); it('emits a hidden documentIds input for each pre-selected document', async () => { diff --git a/frontend/src/lib/document/DocumentRow.svelte b/frontend/src/lib/document/DocumentRow.svelte index 903ed727..076ccc14 100644 --- a/frontend/src/lib/document/DocumentRow.svelte +++ b/frontend/src/lib/document/DocumentRow.svelte @@ -2,7 +2,7 @@ import { goto } from '$app/navigation'; import type { components } from '$lib/generated/api'; import { applyOffsets } from '$lib/document/search'; -import { formatDate } from '$lib/shared/utils/date'; +import DocumentDate from './DocumentDate.svelte'; import * as m from '$lib/paraglide/messages.js'; import { bulkSelectionStore } from '$lib/document/bulkSelection.svelte'; import ProgressRing from '$lib/shared/primitives/ProgressRing.svelte'; @@ -164,7 +164,20 @@ function safeTagColor(color: string | null | undefined): string {
- {doc.documentDate ? formatDate(doc.documentDate) : '—'} + + {#if doc.documentDate} + + {:else} + — + {/if}
@@ -178,7 +191,16 @@ function safeTagColor(color: string | null | undefined): string {