diff --git a/docs/date-label-fixtures.json b/docs/date-label-fixtures.json new file mode 100644 index 00000000..c1508829 --- /dev/null +++ b/docs/date-label-fixtures.json @@ -0,0 +1,101 @@ +{ + "_comment": "Single source of truth for the honest date-label rule set shared by the TS formatDocumentDate (frontend/src/lib/shared/utils/documentDate.ts) and the Java formatTitleDate (backend importing/DocumentTitleFormatter.java). Both test suites assert against THIS table so the two implementations cannot drift (en-dash vs hyphen, 'ca.' vs 'circa', season words, range collapse). Expected labels are the GERMAN (de) canonical form: import titles are always German, and the TS formatter defaults to the de locale. Do not edit one side's expectation without editing this file and both tests. See issue #666 and the Markus/Sara drift-guard decision.", + "cases": [ + { + "name": "DAY renders a full long date", + "precision": "DAY", + "anchor": "1943-12-24", + "end": null, + "raw": null, + "expected": "24. Dezember 1943" + }, + { + "name": "MONTH renders month and year only — never a fabricated day", + "precision": "MONTH", + "anchor": "1916-06-01", + "end": null, + "raw": "Juni 1916", + "expected": "Juni 1916" + }, + { + "name": "SEASON renders the season word from raw", + "precision": "SEASON", + "anchor": "1916-06-01", + "end": null, + "raw": "Sommer 1916", + "expected": "Sommer 1916" + }, + { + "name": "SEASON with null raw derives the season from the anchor month", + "precision": "SEASON", + "anchor": "1916-04-01", + "end": null, + "raw": null, + "expected": "Frühling 1916" + }, + { + "name": "YEAR renders the year only — suppresses month and day", + "precision": "YEAR", + "anchor": "1916-06-15", + "end": null, + "raw": null, + "expected": "1916" + }, + { + "name": "APPROX renders a ca. prefix before the year", + "precision": "APPROX", + "anchor": "1920-01-01", + "end": null, + "raw": null, + "expected": "ca. 1920" + }, + { + "name": "RANGE in the same month collapses the shared month and year", + "precision": "RANGE", + "anchor": "1917-01-10", + "end": "1917-01-11", + "raw": null, + "expected": "10.–11. Jan. 1917" + }, + { + "name": "RANGE across months expands both months, sharing the year", + "precision": "RANGE", + "anchor": "1917-01-30", + "end": "1917-02-02", + "raw": null, + "expected": "30. Jan. – 2. Feb. 1917" + }, + { + "name": "RANGE across a year boundary expands both full dates", + "precision": "RANGE", + "anchor": "1916-12-30", + "end": "1917-01-02", + "raw": null, + "expected": "30. Dez. 1916 – 2. Jan. 1917" + }, + { + "name": "RANGE where end equals start collapses to a single day", + "precision": "RANGE", + "anchor": "1917-01-10", + "end": "1917-01-10", + "raw": null, + "expected": "10. Jan. 1917" + }, + { + "name": "RANGE with a null end renders an open-range indicator, never a fabricated end", + "precision": "RANGE", + "anchor": "1917-01-10", + "end": null, + "raw": null, + "expected": "ab 10. Jan. 1917" + }, + { + "name": "UNKNOWN renders the unknown label regardless of anchor", + "precision": "UNKNOWN", + "anchor": null, + "end": null, + "raw": "?", + "expected": "Datum unbekannt" + } + ] +} diff --git a/frontend/messages/de.json b/frontend/messages/de.json index a54ab59e..0ac0a807 100644 --- a/frontend/messages/de.json +++ b/frontend/messages/de.json @@ -261,6 +261,24 @@ "doc_preview_iframe_title": "Dokumentvorschau", "doc_image_alt": "Original-Scan", "doc_no_date": "Kein Datum", + "date_precision_unknown": "Datum unbekannt", + "date_precision_approx_prefix": "ca.", + "date_range_open_prefix": "ab", + "date_season_spring": "Frühling", + "date_season_summer": "Sommer", + "date_season_autumn": "Herbst", + "date_season_winter": "Winter", + "date_original_label": "Originaltext:", + "date_unknown_icon_label": "Datum unbekannt", + "form_label_date_precision": "Datumsgenauigkeit", + "form_label_date_end": "Enddatum", + "date_precision_option_day": "Genauer Tag", + "date_precision_option_month": "Monat", + "date_precision_option_season": "Jahreszeit", + "date_precision_option_year": "Jahr", + "date_precision_option_range": "Zeitraum", + "date_precision_option_approx": "Ungefähr", + "date_precision_option_unknown": "Unbekannt", "person_merge_will_be_deleted": "wird gelöscht.", "comp_typeahead_placeholder": "Namen tippen...", "comp_typeahead_loading": "Suche...", diff --git a/frontend/messages/en.json b/frontend/messages/en.json index 5c6ca80a..269e95d3 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -261,6 +261,24 @@ "doc_preview_iframe_title": "Document Preview", "doc_image_alt": "Original scan", "doc_no_date": "No date", + "date_precision_unknown": "Date unknown", + "date_precision_approx_prefix": "c.", + "date_range_open_prefix": "from", + "date_season_spring": "Spring", + "date_season_summer": "Summer", + "date_season_autumn": "Autumn", + "date_season_winter": "Winter", + "date_original_label": "Original:", + "date_unknown_icon_label": "Date unknown", + "form_label_date_precision": "Date precision", + "form_label_date_end": "End date", + "date_precision_option_day": "Exact day", + "date_precision_option_month": "Month", + "date_precision_option_season": "Season", + "date_precision_option_year": "Year", + "date_precision_option_range": "Range", + "date_precision_option_approx": "Approximate", + "date_precision_option_unknown": "Unknown", "person_merge_will_be_deleted": "will be deleted.", "comp_typeahead_placeholder": "Type a name...", "comp_typeahead_loading": "Searching...", diff --git a/frontend/messages/es.json b/frontend/messages/es.json index cbda7fab..1cbd3eda 100644 --- a/frontend/messages/es.json +++ b/frontend/messages/es.json @@ -261,6 +261,24 @@ "doc_preview_iframe_title": "Vista previa del documento", "doc_image_alt": "Escaneado original", "doc_no_date": "Sin fecha", + "date_precision_unknown": "Fecha desconocida", + "date_precision_approx_prefix": "ca.", + "date_range_open_prefix": "desde", + "date_season_spring": "Primavera", + "date_season_summer": "Verano", + "date_season_autumn": "Otoño", + "date_season_winter": "Invierno", + "date_original_label": "Texto original:", + "date_unknown_icon_label": "Fecha desconocida", + "form_label_date_precision": "Precisión de la fecha", + "form_label_date_end": "Fecha final", + "date_precision_option_day": "Día exacto", + "date_precision_option_month": "Mes", + "date_precision_option_season": "Estación", + "date_precision_option_year": "Año", + "date_precision_option_range": "Periodo", + "date_precision_option_approx": "Aproximada", + "date_precision_option_unknown": "Desconocida", "person_merge_will_be_deleted": "será eliminado.", "comp_typeahead_placeholder": "Escriba un nombre...", "comp_typeahead_loading": "Buscando...", diff --git a/frontend/src/lib/shared/utils/documentDate.spec.ts b/frontend/src/lib/shared/utils/documentDate.spec.ts new file mode 100644 index 00000000..3b2d13c2 --- /dev/null +++ b/frontend/src/lib/shared/utils/documentDate.spec.ts @@ -0,0 +1,105 @@ +import { describe, expect, it } from 'vitest'; +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { formatDocumentDate } from './documentDate'; +import { m } from '$lib/paraglide/messages.js'; + +// ─── Shared drift-guard fixture ───────────────────────────────────────────── +// The same table is asserted by the Java DocumentTitleFormatter test so the two +// label implementations cannot drift. Expected values are the German canonical +// form (see docs/date-label-fixtures.json). +type FixtureCase = { + name: string; + precision: string; + anchor: string | null; + end: string | null; + raw: string | null; + expected: string; +}; + +const fixtures = JSON.parse( + readFileSync(resolve(process.cwd(), '../docs/date-label-fixtures.json'), 'utf-8') +) as { cases: FixtureCase[] }; + +describe('formatDocumentDate – shared fixture table (de)', () => { + for (const c of fixtures.cases) { + it(c.name, () => { + expect( + formatDocumentDate( + c.anchor, + c.precision as Parameters[1], + c.end, + c.raw, + 'de' + ) + ).toBe(c.expected); + }); + } +}); + +// ─── Anti-fabrication: suppressed components never leak ────────────────────── + +describe('formatDocumentDate – suppressed precision components', () => { + it('YEAR of a June date renders the year only, never the month', () => { + const label = formatDocumentDate('1916-06-15', 'YEAR'); + expect(label).toBe('1916'); + expect(label).not.toContain('Juni'); + expect(label).not.toContain('15'); + }); + + it('MONTH never renders the day-of-month', () => { + const label = formatDocumentDate('1916-06-01', 'MONTH', null, 'Juni 1916'); + expect(label).toBe('Juni 1916'); + expect(label).not.toMatch(/\b1\.\s/); + }); +}); + +// ─── i18n: localized structured label ─────────────────────────────────────── + +describe('formatDocumentDate – localization', () => { + it('localizes the UNKNOWN label per locale', () => { + expect(formatDocumentDate(null, 'UNKNOWN', null, '?', 'en')).toBe( + m.date_precision_unknown(undefined, { locale: 'en' }) + ); + }); + + it('localizes the APPROX prefix per locale', () => { + expect(formatDocumentDate('1920-01-01', 'APPROX', null, null, 'en')).toBe( + `${m.date_precision_approx_prefix(undefined, { locale: 'en' })} 1920` + ); + }); + + it('localizes the SEASON word per locale when raw is absent', () => { + expect(formatDocumentDate('1916-07-01', 'SEASON', null, null, 'en')).toBe( + `${m.date_season_summer(undefined, { locale: 'en' })} 1916` + ); + }); + + it('localizes the SEASON word even when the raw cell is verbatim German (Decision 4)', () => { + expect(formatDocumentDate('1916-06-01', 'SEASON', null, 'Sommer 1916', 'en')).toBe( + `${m.date_season_summer(undefined, { locale: 'en' })} 1916` + ); + }); +}); + +// ─── Security: untrusted raw must never influence the structured label ─────── + +describe('formatDocumentDate – security', () => { + it('ignores a malicious raw value for the structured label (raw is rendered separately, escaped)', () => { + const label = formatDocumentDate(null, 'UNKNOWN', null, ''); + expect(label).toBe('Datum unbekannt'); + expect(label).not.toContain(' { + it('renders the unknown label when the anchor is null but precision is not UNKNOWN', () => { + expect(formatDocumentDate(null, 'DAY')).toBe('Datum unbekannt'); + }); + + it('falls back to start-day only for a RANGE whose end is null', () => { + expect(formatDocumentDate('1917-01-10', 'RANGE', null)).toBe('ab 10. Jan. 1917'); + }); +}); diff --git a/frontend/src/lib/shared/utils/documentDate.ts b/frontend/src/lib/shared/utils/documentDate.ts new file mode 100644 index 00000000..7f401974 --- /dev/null +++ b/frontend/src/lib/shared/utils/documentDate.ts @@ -0,0 +1,159 @@ +import { formatDate, formatMCDate } from './date'; +import { m } from '$lib/paraglide/messages.js'; + +/** + * Precision of a document's date — mirrors the backend {@code DatePrecision} enum + * and the import normalizer's seven values verbatim. + */ +export type DatePrecision = 'DAY' | 'MONTH' | 'SEASON' | 'YEAR' | 'RANGE' | 'APPROX' | 'UNKNOWN'; + +/** + * Renders a document date at exactly the precision the data claims — never finer. + * + * Delegates to the {@link formatDate}/{@link formatMCDate} helpers (so the + * `T12:00:00` UTC-safety convention and the German Intl formatting are shared, + * not reimplemented) and routes every localized word through Paraglide. + * + * The label is the SINGLE SOURCE OF TRUTH shared with the Java + * {@code DocumentTitleFormatter}: both are asserted against + * `docs/date-label-fixtures.json` so they cannot drift. The untrusted `raw` + * cell is only used to derive a season word (a known German season token) — it + * is otherwise rendered separately by the caller via Svelte default escaping, + * never interpolated into HTML here. + * + * @param iso the sort/filter anchor day (`YYYY-MM-DD`), nullable for UNKNOWN rows + * @param precision descriptive precision metadata + * @param end the RANGE end day; null means an open-ended range + * @param raw the verbatim spreadsheet cell, used only for the SEASON word + * @param locale BCP 47 tag for the localized structured parts (default `de-DE`) + */ +export function formatDocumentDate( + iso: string | null | undefined, + precision: DatePrecision, + end?: string | null, + raw?: string | null, + locale: string = 'de-DE' +): string { + if (precision === 'UNKNOWN' || !iso) { + return m.date_precision_unknown(undefined, { locale: messageLocale(locale) }); + } + + const year = iso.slice(0, 4); + + switch (precision) { + case 'DAY': + return formatDate(iso, 'long'); + case 'MONTH': + return monthYear(iso, locale); + case 'SEASON': + return seasonLabel(iso, raw, locale, year); + case 'YEAR': + return year; + case 'APPROX': + return `${m.date_precision_approx_prefix(undefined, { locale: messageLocale(locale) })} ${year}`; + case 'RANGE': + return rangeLabel(iso, end, locale); + default: + return m.date_precision_unknown(undefined, { locale: messageLocale(locale) }); + } +} + +// ─── precision branches ────────────────────────────────────────────────────── + +function monthYear(iso: string, locale: string): string { + return new Intl.DateTimeFormat(locale, { month: 'long', year: 'numeric' }).format(noon(iso)); +} + +function seasonLabel( + iso: string, + raw: string | null | undefined, + locale: string, + year: string +): string { + const month = Number(iso.slice(5, 7)); + // Prefer the season named in the raw cell; fall back to deriving it from the + // anchor month. Either way the WORD is localized (Decision 4) — the verbatim + // German raw cell is preserved separately as the visible secondary line. + const season = seasonFromRaw(raw) ?? seasonOfMonth(month); + return `${seasonWord(season, locale)} ${year}`; +} + +function rangeLabel(iso: string, end: string | null | undefined, locale: string): string { + if (!end) { + return `${m.date_range_open_prefix(undefined, { locale: messageLocale(locale) })} ${formatMCDate(iso, locale)}`; + } + if (end === iso) { + return formatMCDate(iso, locale); + } + const start = noon(iso); + const finish = noon(end); + if (start.getFullYear() === finish.getFullYear()) { + return sameYearRange(end, start, finish, locale); + } + return `${formatMCDate(iso, locale)} – ${formatMCDate(end, locale)}`; +} + +function sameYearRange(end: string, start: Date, finish: Date, locale: string): string { + if (start.getMonth() === finish.getMonth()) { + // Collapse the shared month/year: only the end carries "DD. Mon. YYYY". + return `${start.getDate()}.–${formatMCDate(end, locale)}`; + } + const startNoYear = new Intl.DateTimeFormat(locale, { day: 'numeric', month: 'short' }).format( + start + ); + return `${startNoYear} – ${formatMCDate(end, locale)}`; +} + +// ─── season helpers ────────────────────────────────────────────────────────── + +type Season = 'spring' | 'summer' | 'autumn' | 'winter'; + +/** Quarter buckets; matches the normalizer's representative months (4/7/10/1). */ +function seasonOfMonth(month: number): Season { + if (month >= 3 && month <= 5) return 'spring'; + if (month >= 6 && month <= 8) return 'summer'; + if (month >= 9 && month <= 11) return 'autumn'; + return 'winter'; +} + +function seasonWord(season: Season, locale: string): string { + const opts = { locale: messageLocale(locale) }; + switch (season) { + case 'spring': + return m.date_season_spring(undefined, opts); + case 'summer': + return m.date_season_summer(undefined, opts); + case 'autumn': + return m.date_season_autumn(undefined, opts); + case 'winter': + return m.date_season_winter(undefined, opts); + } +} + +/** Maps a German season token at the start of the raw cell to a Season, else null. */ +function seasonFromRaw(raw: string | null | undefined): Season | null { + if (!raw) return null; + const token = raw.trim().split(/\s+/)[0].toLowerCase(); + const byToken: Record = { + frühling: 'spring', + frühjahr: 'spring', + sommer: 'summer', + herbst: 'autumn', + winter: 'winter' + }; + return byToken[token] ?? null; +} + +// ─── shared utilities ──────────────────────────────────────────────────────── + +function noon(iso: string): Date { + return new Date(iso + 'T12:00:00'); +} + +/** Paraglide expects a registered locale tag; map `de-DE` → `de` etc. */ +function messageLocale(locale: string): 'de' | 'en' | 'es' { + const base = locale.slice(0, 2); + if (base === 'en') return 'en'; + if (base === 'es') return 'es'; + return 'de'; +}