From b1f77bcfb6594e78f46b4a7a569f11afde02bed8 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 16:15:00 +0200 Subject: [PATCH 1/9] refactor(document): extract title composition into shared DocumentTitleFactory (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move DocumentTitleFormatter from importing into the document package and introduce DocumentTitleFactory there as the single source of truth for the {index} – {dateLabel} – {location} formula. DocumentImporter now consumes the factory instead of owning the composition; the document package owns the rule, importing depends on it (not the reverse). No behavioral change — importer title assertions and the #666 fixture parity test stay green. Co-Authored-By: Claude Opus 4.8 --- .../document/DocumentTitleFactory.java | 37 +++++++++ .../DocumentTitleFormatter.java | 4 +- .../importing/DocumentImporter.java | 27 ++---- .../document/DocumentTitleFactoryTest.java | 82 +++++++++++++++++++ .../DocumentTitleFormatterTest.java | 3 +- .../importing/DocumentImporterTest.java | 7 +- 6 files changed, 134 insertions(+), 26 deletions(-) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java rename backend/src/main/java/org/raddatz/familienarchiv/{importing => document}/DocumentTitleFormatter.java (97%) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java rename backend/src/test/java/org/raddatz/familienarchiv/{importing => document}/DocumentTitleFormatterTest.java (95%) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java new file mode 100644 index 00000000..e25b3ce8 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java @@ -0,0 +1,37 @@ +package org.raddatz.familienarchiv.document; + +import org.springframework.stereotype.Component; + +/** + * Single source of truth for the auto-generated document title + * {@code {index} – {dateLabel} – {location}}. + * + *

The {@code document} package owns this formula; {@code importing} consumes it + * (see ADR for issue #726). The leading {@code index} is the document's + * {@code originalFilename}; the date label is the honest German label produced by + * {@link DocumentTitleFormatter} (the Java half of the #666 date-label split); the + * trailing location is the {@code meta_location} verbatim, omitted when blank. + */ +@Component +public class DocumentTitleFactory { + + static final String SEPARATOR = " – "; + + /** + * Composes the auto-title from the document's current state. The date segment is + * dropped for UNKNOWN precision or a null date (the honest "no date" case); the + * location segment is dropped when blank. + */ + public String build(Document doc) { + StringBuilder title = new StringBuilder(doc.getOriginalFilename()); + if (doc.getDocumentDate() != null && doc.getMetaDatePrecision() != DatePrecision.UNKNOWN) { + title.append(SEPARATOR).append(DocumentTitleFormatter.formatTitleDate( + doc.getDocumentDate(), doc.getMetaDatePrecision(), + doc.getMetaDateEnd(), doc.getMetaDateRaw())); + } + if (doc.getLocation() != null && !doc.getLocation().isBlank()) { + title.append(SEPARATOR).append(doc.getLocation()); + } + return title.toString(); + } +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFormatter.java similarity index 97% rename from backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java rename to backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFormatter.java index 65120004..89885666 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFormatter.java @@ -1,6 +1,4 @@ -package org.raddatz.familienarchiv.importing; - -import org.raddatz.familienarchiv.document.DatePrecision; +package org.raddatz.familienarchiv.document; import java.time.LocalDate; import java.time.format.DateTimeFormatter; diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java index b85a8cc6..91b11d31 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java @@ -5,6 +5,7 @@ import lombok.extern.slf4j.Slf4j; import org.raddatz.familienarchiv.document.DatePrecision; import org.raddatz.familienarchiv.document.Document; import org.raddatz.familienarchiv.document.DocumentService; +import org.raddatz.familienarchiv.document.DocumentTitleFactory; import org.raddatz.familienarchiv.document.DocumentStatus; import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner; import org.raddatz.familienarchiv.exception.DomainException; @@ -74,6 +75,7 @@ public class DocumentImporter { Pattern.compile("[A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF]{1,4}-+\\d+x?"); private final DocumentService documentService; + private final DocumentTitleFactory documentTitleFactory; private final PersonService personService; private final TagService tagService; private final S3Client s3Client; @@ -181,7 +183,7 @@ public class DocumentImporter { applyAttribution(doc, row); applyDates(doc, row); applyAuthoritativeAssociations(doc, row); - applyFileMetadata(doc, s3Key, contentType, status, index); + applyFileMetadata(doc, s3Key, contentType, status); applyComputedFlags(doc); return doc; } @@ -217,14 +219,15 @@ public class DocumentImporter { attachTag(doc, row.get("tags")); } - // S3 key, content type, status, and the index-derived title. + // S3 key, content type, status, and the index-derived title. The title formula lives in + // the document package's DocumentTitleFactory (single source of truth, #726); by this point + // applyDates has populated the date/location and originalFilename carries the index. private void applyFileMetadata(Document doc, String s3Key, String contentType, - DocumentStatus status, String index) { + DocumentStatus status) { doc.setStatus(status); doc.setFilePath(s3Key); doc.setContentType(contentType); - doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(), - doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation())); + doc.setTitle(documentTitleFactory.build(doc)); } // metadataComplete: a document counts as fully described if any of the three "who/when" @@ -235,20 +238,6 @@ public class DocumentImporter { || !doc.getReceivers().isEmpty()); } - // The title carries the date at the HONEST precision (never a fabricated day) via the - // shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating. - private static String buildTitle(String index, LocalDate date, DatePrecision precision, - LocalDate end, String raw, String location) { - StringBuilder title = new StringBuilder(index); - if (date != null && precision != DatePrecision.UNKNOWN) { - title.append(" – ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw)); - } - if (location != null && !location.isBlank()) { - title.append(" – ").append(location); - } - return title.toString(); - } - // ─── attribution routing — register-first, always retain raw ───────────────────── private Person resolveSender(String slug, String rawName) { diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java new file mode 100644 index 00000000..d01801c9 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java @@ -0,0 +1,82 @@ +package org.raddatz.familienarchiv.document; + +import org.junit.jupiter.api.Test; + +import java.time.LocalDate; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * The auto-title composition {@code {index} – {dateLabel} – {location}} in isolation. + * The honest date-label forms themselves are pinned by {@link DocumentTitleFormatterTest} + * against the shared #666 fixture; here we assert only how the factory composes the + * three segments and which segments it omits. + */ +class DocumentTitleFactoryTest { + + private final DocumentTitleFactory factory = new DocumentTitleFactory(); + + private static Document.DocumentBuilder doc(String index) { + return Document.builder() + .originalFilename(index) + .metaDatePrecision(DatePrecision.UNKNOWN); + } + + @Test + void index_only_when_no_date_and_no_location() { + assertThat(factory.build(doc("C-0029").build())).isEqualTo("C-0029"); + } + + @Test + void index_and_year_date() { + Document d = doc("C-0029") + .documentDate(LocalDate.of(1928, 1, 15)) + .metaDatePrecision(DatePrecision.YEAR) + .build(); + assertThat(factory.build(d)).isEqualTo("C-0029 – 1928"); + } + + @Test + void index_date_and_location() { + Document d = doc("C-0029") + .documentDate(LocalDate.of(1928, 1, 15)) + .metaDatePrecision(DatePrecision.YEAR) + .location("Berlin") + .build(); + assertThat(factory.build(d)).isEqualTo("C-0029 – 1928 – Berlin"); + } + + @Test + void location_without_date_attaches_directly_to_index() { + Document d = doc("C-0029").location("Berlin").build(); + assertThat(factory.build(d)).isEqualTo("C-0029 – Berlin"); + } + + @Test + void unknown_precision_omits_the_date_segment() { + Document d = doc("C-0029") + .documentDate(LocalDate.of(1928, 1, 15)) + .metaDatePrecision(DatePrecision.UNKNOWN) + .build(); + assertThat(factory.build(d)).isEqualTo("C-0029"); + } + + @Test + void blank_location_is_omitted() { + Document d = doc("C-0029") + .documentDate(LocalDate.of(1928, 1, 15)) + .metaDatePrecision(DatePrecision.YEAR) + .location(" ") + .build(); + assertThat(factory.build(d)).isEqualTo("C-0029 – 1928"); + } + + @Test + void day_precision_renders_the_full_german_label() { + Document d = doc("C-0029") + .documentDate(LocalDate.of(1928, 1, 15)) + .metaDatePrecision(DatePrecision.DAY) + .build(); + assertThat(factory.build(d)).isEqualTo("C-0029 – 15. Januar 1928"); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFormatterTest.java similarity index 95% rename from backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java rename to backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFormatterTest.java index d8f66b6e..ddc39b12 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFormatterTest.java @@ -1,10 +1,9 @@ -package org.raddatz.familienarchiv.importing; +package org.raddatz.familienarchiv.document; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.jupiter.api.DynamicTest; import org.junit.jupiter.api.TestFactory; -import org.raddatz.familienarchiv.document.DatePrecision; import java.nio.file.Files; import java.nio.file.Path; diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java index c97de87b..06504cbd 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java @@ -11,6 +11,7 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.raddatz.familienarchiv.document.Document; import org.raddatz.familienarchiv.document.DocumentService; +import org.raddatz.familienarchiv.document.DocumentTitleFactory; import org.raddatz.familienarchiv.document.DocumentStatus; import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner; import org.raddatz.familienarchiv.person.Person; @@ -59,8 +60,10 @@ class DocumentImporterTest { // override this stub locally (load_skipsFile_whenMagicByteCheckThrowsIoException). lenient().when(fileStreamOpener.open(any(File.class))) .thenAnswer(inv -> new java.io.FileInputStream(inv.getArgument(0, File.class))); - importer = new DocumentImporter(documentService, personService, tagService, s3Client, - thumbnailAsyncRunner, fileStreamOpener); + // Real factory (pure, dependency-free) so the title-content assertions below exercise + // the shared composition rather than a stub — the #726 single source of truth. + importer = new DocumentImporter(documentService, new DocumentTitleFactory(), personService, + tagService, s3Client, thumbnailAsyncRunner, fileStreamOpener); ReflectionTestUtils.setField(importer, "bucketName", "test-bucket"); } -- 2.49.1 From e6ce00035e33786ceed64c76a46234d1c541c04a Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 16:20:46 +0200 Subject: [PATCH 2/9] feat(document): regenerate auto-title on save when date/location change (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updateDocument now captures the machine title from the persisted state before any setter runs, and rebuilds it from the new state only when the submitted title still equals that machine value — an exact comparison that relies on the edit form round-tripping an untouched title verbatim. A hand-written or freshly-typed title is kept; a blank submission falls back to the rebuilt auto-title (title is always present); a file-replaced document no longer matches its import-time title and is treated as manual. projectedState mirrors the setter asymmetry exactly (date/location overwrite incl. null-clear; precision/end/raw skip-null from the entity). Co-Authored-By: Claude Opus 4.8 --- .../document/DocumentService.java | 50 ++++- .../document/DocumentTitleFactory.java | 4 +- .../document/DocumentServiceTest.java | 177 ++++++++++++++++++ 3 files changed, 229 insertions(+), 2 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java index 66b10da0..9db6e5d2 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java @@ -68,6 +68,7 @@ import static org.raddatz.familienarchiv.document.DocumentSpecifications.*; public class DocumentService { private final DocumentRepository documentRepository; + private final DocumentTitleFactory documentTitleFactory; private final PersonService personService; private final FileService fileService; private final TagService tagService; @@ -379,8 +380,14 @@ public class DocumentService { DocumentStatus statusBefore = doc.getStatus(); + // Auto-title sync (#726): capture the machine title from the CURRENTLY-persisted state + // BEFORE any setter runs — the setters below overwrite date/location and applyDatePrecision + // skips nulls, so the old state must be read first. The submitted title is the catalog + // auto-title iff it equals this; only then does it follow date/location forward. + String autoTitleBefore = documentTitleFactory.build(doc); + // 1. Einfache Felder Update - doc.setTitle(dto.getTitle()); + doc.setTitle(resolveTitle(dto.getTitle(), autoTitleBefore, doc, dto)); doc.setDocumentDate(dto.getDocumentDate()); applyDatePrecision(doc, dto); validateDateRange(doc); // guard before any save (updateDocumentTags below persists) @@ -452,6 +459,47 @@ public class DocumentService { return saved; } + /** + * Decides the title to persist on an edit (#726). The submitted title is the catalog + * auto-title only when it equals {@code autoBefore} (built from the stored state) — an exact + * comparison with no heuristic, relying on the edit form round-tripping the stored title + * verbatim when untouched. A machine title is rebuilt from the new state so a corrected + * date/location flows into it; a hand-written or freshly-typed title is kept verbatim. A blank + * submission is never persisted (title is always present) — it falls back to the rebuilt + * auto-title, which always carries at least the index. + */ + private String resolveTitle(String submitted, String autoBefore, Document doc, DocumentUpdateDTO dto) { + if (submitted == null || submitted.isBlank()) { + return documentTitleFactory.build(projectedState(doc, dto)); + } + if (!Objects.equals(submitted, autoBefore)) { + return submitted; + } + return documentTitleFactory.build(projectedState(doc, dto)); + } + + /** + * The document state the regenerated title is built from, mirroring {@code updateDocument}'s + * setter asymmetry exactly: {@code documentDate}/{@code location} are overwritten from the DTO + * (a null value clears the field), while precision/end/raw are taken from the DTO only when + * non-null and otherwise kept from the stored entity (see {@link #applyDatePrecision}). The + * index ({@code originalFilename}) is never touched by a metadata edit. A mismatch here would + * silently produce a wrong label, so it is kept lock-step with the real setters. + */ + private Document projectedState(Document doc, DocumentUpdateDTO dto) { + return Document.builder() + .originalFilename(doc.getOriginalFilename()) + .documentDate(dto.getDocumentDate()) + .location(dto.getLocation()) + .metaDatePrecision(dto.getMetaDatePrecision() != null + ? dto.getMetaDatePrecision() : doc.getMetaDatePrecision()) + .metaDateEnd(dto.getMetaDateEnd() != null + ? dto.getMetaDateEnd() : doc.getMetaDateEnd()) + .metaDateRaw(dto.getMetaDateRaw() != null + ? dto.getMetaDateRaw() : doc.getMetaDateRaw()) + .build(); + } + /** * Applies the three date-precision fields only when the DTO carries them. * A null field means "not submitted" — overwriting the stored value with null diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java index e25b3ce8..e2f514cf 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleFactory.java @@ -23,7 +23,9 @@ public class DocumentTitleFactory { * location segment is dropped when blank. */ public String build(Document doc) { - StringBuilder title = new StringBuilder(doc.getOriginalFilename()); + // originalFilename is NOT NULL in production; guard only so a synthetic/partial entity + // never trips StringBuilder(null) with an opaque NPE. + StringBuilder title = new StringBuilder(doc.getOriginalFilename() == null ? "" : doc.getOriginalFilename()); if (doc.getDocumentDate() != null && doc.getMetaDatePrecision() != DatePrecision.UNKNOWN) { title.append(SEPARATOR).append(DocumentTitleFormatter.formatTitleDate( doc.getDocumentDate(), doc.getMetaDatePrecision(), diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java index a1ff86bf..76c95dca 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java @@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; +import org.mockito.Spy; import org.mockito.junit.jupiter.MockitoExtension; import org.raddatz.familienarchiv.audit.AuditKind; import org.raddatz.familienarchiv.audit.AuditLogQueryService; @@ -74,6 +75,9 @@ class DocumentServiceTest { @Mock AuditLogQueryService auditLogQueryService; @Mock TranscriptionBlockQueryService transcriptionBlockQueryService; @Mock ThumbnailAsyncRunner thumbnailAsyncRunner; + // Real factory (pure, dependency-free) so save-time title-regeneration tests exercise the + // shared composition rather than a stub — the #726 single source of truth. + @Spy DocumentTitleFactory documentTitleFactory = new DocumentTitleFactory(); @InjectMocks DocumentService documentService; // ─── deleteDocument ─────────────────────────────────────────────────────── @@ -228,6 +232,179 @@ class DocumentServiceTest { assertThat(doc.getMetaDateRaw()).isEqualTo("Juni 1916"); } + // ─── updateDocument save-time auto-title regeneration (#726) ────────────── + // + // Exact old-vs-new comparison: the title is the catalog auto-title iff the submitted + // title equals what the factory builds from the CURRENTLY-persisted state. The edit form + // round-trips the stored title verbatim when untouched, so an equal submission means the + // user did not type over it. makeStored() seeds index/date/precision/location and sets the + // stored title to the matching auto-title, mirroring a freshly-imported row. + + private Document makeStored(String index, LocalDate date, DatePrecision precision, String location) { + Document doc = Document.builder() + .id(UUID.randomUUID()) + .originalFilename(index) + .documentDate(date) + .metaDatePrecision(precision) + .location(location) + .receivers(new HashSet<>()) + .tags(new HashSet<>()) + .build(); + doc.setTitle(documentTitleFactory.build(doc)); + return doc; + } + + /** A DTO that round-trips the stored auto-title untouched, with new date/precision/location. */ + private static DocumentUpdateDTO editDto(String submittedTitle, LocalDate date, + DatePrecision precision, String location) { + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setTitle(submittedTitle); + dto.setDocumentDate(date); + dto.setMetaDatePrecision(precision); + dto.setLocation(location); + return dto; + } + + private Document runUpdate(Document stored, DocumentUpdateDTO dto) throws Exception { + when(documentRepository.findById(stored.getId())).thenReturn(Optional.of(stored)); + when(documentRepository.save(any())).thenReturn(stored); + documentService.updateDocument(stored.getId(), dto, null, null); + return stored; + } + + @Test + void updateDocument_regeneratesAutoTitle_whenDateChanges() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin"); + // title untouched ("C-0029 – 2028 – Berlin"), date corrected to 1928 + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928 – Berlin"); + } + + @Test + void updateDocument_keepsHandWrittenTitle_whenDateChanges() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null); + stored.setTitle("C-0029 – Brief an Mutter"); // hand-written, ≠ auto-title + DocumentUpdateDTO dto = editDto("C-0029 – Brief an Mutter", LocalDate.of(1930, 1, 1), DatePrecision.YEAR, null); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – Brief an Mutter"); + } + + @Test + void updateDocument_freshlyTypedTitleWins_overRegeneration() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin"); + // user changed the date AND typed a new title in the same save + DocumentUpdateDTO dto = editDto("Geburtsanzeige", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("Geburtsanzeige"); + } + + @Test + void updateDocument_regeneratesWithNewDateAndLocation() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin"); + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "München"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928 – München"); + } + + @Test + void updateDocument_dropsTrailingLocationSegment_whenLocationCleared() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + // location cleared (null), title untouched + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928"); + } + + @Test + void updateDocument_regeneratedTitle_doesNotContainOldDate() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin"); + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).doesNotContain("2028"); + } + + @Test + void updateDocument_relabelsOnPrecisionChange_yearToDay() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null); + // stored auto-title "C-0029 – 1928"; set a full day at DAY precision + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 15), DatePrecision.DAY, null); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 15. Januar 1928"); + } + + @Test + void updateDocument_populatesTitle_whenDateAddedToUnknownRow() throws Exception { + Document stored = makeStored("C-0029", null, DatePrecision.UNKNOWN, null); + // stored auto-title is just "C-0029"; add a 1928 YEAR date + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928"); + } + + @Test + void updateDocument_roundTripsSeasonLabel() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null); + stored.setMetaDateRaw("Frühling 1943"); + stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 – Frühling 1943" + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null); + dto.setMetaDateRaw("Frühling 1943"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – Frühling 1943"); + } + + @Test + void updateDocument_doesNotRegenerateToBlank_whenSubmittedTitleEmpty() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + DocumentUpdateDTO dto = editDto("", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isNotBlank(); + } + + @Test + void updateDocument_treatsFileReplacedDoc_asManual() throws Exception { + // originalFilename was reassigned by an earlier file-replace, so the stored title (built + // at import from the old index) no longer matches build(currentState) → treated as manual. + Document stored = makeStored("scan_2024.pdf", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + stored.setTitle("C-0029 – 1928 – Berlin"); // legacy import title, ≠ build("scan_2024.pdf"…) + DocumentUpdateDTO dto = editDto("C-0029 – 1928 – Berlin", LocalDate.of(1930, 1, 1), DatePrecision.YEAR, "Berlin"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 1928 – Berlin"); + } + + @Test + void updateDocument_idempotent_whenNothingChanges() throws Exception { + Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + String before = stored.getTitle(); + DocumentUpdateDTO dto = editDto(before, LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo(before); + } + // ─── updateDocument date-range validation (#678) ────────────────────────── /** Builds a stored doc ready for an updateDocument call (collections initialised). */ -- 2.49.1 From 26b45f1c780b7ac44efe763b8f0e5832ddc88cd2 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 16:29:57 +0200 Subject: [PATCH 3/9] feat(document): one-time backfill endpoint for stale auto-titles (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds POST /api/admin/backfill-titles (ADMIN-only, synchronous) which rebuilds every machine-generated title from the row's current state. A grammar heuristic (DocumentTitleBackfillMatcher) decides overwritability: index matched literally via startsWith (originalFilename is user-controlled — no regex injection / ReDoS, CWE-1333), date-label forms derived from the same Locale.GERMAN formatters as the factory so they cannot drift, prose left untouched, fail-closed on any surprise. Saves via the repository directly (no recordVersion — follows backfillFileHashes), so the mechanical rename never version-spams document_versions. Idempotent: a second run rewrites nothing. Emits one SLF4J-parameterized scanned/updated/skipped line. Co-Authored-By: Claude Opus 4.8 --- .../document/DocumentService.java | 37 +++++ .../DocumentTitleBackfillMatcher.java | 101 ++++++++++++ .../familienarchiv/user/AdminController.java | 6 + .../document/DocumentServiceTest.java | 53 ++++++ .../DocumentTitleBackfillMatcherTest.java | 151 ++++++++++++++++++ .../user/AdminControllerTest.java | 25 +++ 6 files changed, 373 insertions(+) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java index 9db6e5d2..44b0a0d9 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java @@ -1058,6 +1058,43 @@ public class DocumentService { tagService.delete(tagId); } + /** + * One-time cleanup of already-stale auto-titles (#726, FR-003). For every document whose + * stored title passes the {@link DocumentTitleBackfillMatcher} overwrite heuristic, rebuilds + * the title from the row's current state and persists it only when it actually changed. + * Idempotent: a second run rebuilds the same value and saves nothing. Hand-written prose is + * left untouched. + * + *

Saves via {@code documentRepository.save} directly — it must NOT route through + * {@link #updateDocument} (which versions every write), following the {@link #backfillFileHashes} + * precedent: a mechanical rename must not snapshot the whole corpus into {@code document_versions}. + * + * @return the number of documents whose title was rewritten + */ + @Transactional + public int backfillTitles() { + List docs = documentRepository.findAll(); + int updated = 0; + int skipped = 0; + for (Document doc : docs) { + if (!DocumentTitleBackfillMatcher.isOverwritable( + doc.getTitle(), doc.getOriginalFilename(), doc.getLocation())) { + skipped++; + continue; + } + String rebuilt = documentTitleFactory.build(doc); + if (rebuilt.equals(doc.getTitle())) { + skipped++; // already correct — keep idempotent, no write + continue; + } + doc.setTitle(rebuilt); + documentRepository.save(doc); // direct save, no recordVersion (mechanical rename) + updated++; + } + log.info("Title backfill complete: scanned={} updated={} skipped={}", docs.size(), updated, skipped); + return updated; + } + @Transactional public int backfillFileHashes() { List docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull(); diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java new file mode 100644 index 00000000..ee6efc2c --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java @@ -0,0 +1,101 @@ +package org.raddatz.familienarchiv.document; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.LinkedHashSet; +import java.util.Locale; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Heuristic overwrite test for the one-time title backfill (#726, FR-004): decides whether a + * STORED title is a machine-generated auto-title (and so may be rebuilt from the row's current + * state) versus hand-written prose (left untouched). Used ONLY by the backfill — save-time + * regeneration uses an exact old-vs-new comparison instead, with no heuristic. + * + *

A stored title is overwritable iff, after stripping the literal {@code index} prefix: + *

    + *
  1. it is exactly {@code {index}}, or
  2. + *
  3. {@code {index} – {dateLabel}} with an optional trailing {@code – {location}} segment + * (any location — a present, valid date label is itself strong evidence of a machine + * title), or
  4. + *
  5. {@code {index} – {location}} where the segment equals the document's current location + * (no date label, so the segment must match the known location to be distinguished from + * prose).
  6. + *
+ * + *

Security: the {@code index} is compared literally via {@link String#startsWith} + * (never compiled into a regex) because {@code originalFilename} is user-controlled and may carry + * regex metacharacters — an unquoted pattern would be a ReDoS / regex-injection vector + * (CWE-1333 / CWE-625). The date-label sub-patterns use only bounded, non-nested quantifiers over + * short tokens, so there is no catastrophic backtracking. Fail-closed: any null/blank index or + * structural surprise returns {@code false}. + */ +final class DocumentTitleBackfillMatcher { + + private static final String SEPARATOR = " – "; + + // German month tokens derived from the SAME Locale.GERMAN formatters DocumentTitleFormatter + // uses, so the matcher's accepted spellings cannot drift from what the factory emits (full + // names "Januar"…"Dezember"; abbreviations "Jan."…"Dez." — note May/June/July/März carry no + // period). Pattern.quote each so a "." in an abbreviation is literal, never a wildcard. + private static final String FULL_MONTH = monthAlternation("MMMM"); + private static final String ABBR_MONTH = monthAlternation("MMM"); + private static final String SEASON = "(?:Frühling|Sommer|Herbst|Winter)"; + private static final String YEAR = "\\d{1,4}"; + private static final String DAY_NUM = "\\d{1,2}"; + + // One complete date label, anchored, optionally followed by a free-form trailing location + // segment. Only bounded/non-nested quantifiers over short tokens plus a single trailing + // ".+" → linear, no catastrophic backtracking (FR-004 ReDoS guard). + private static final Pattern DATE_LABEL_WITH_OPTIONAL_LOCATION = Pattern.compile( + "^(?:" + String.join("|", + YEAR, // 1916 + "ca\\. " + YEAR, // ca. 1920 + FULL_MONTH + " " + YEAR, // Juni 1916 + DAY_NUM + "\\. " + FULL_MONTH + " " + YEAR, // 24. Dezember 1943 + SEASON + " " + YEAR, // Sommer 1916 + "Datum unbekannt", + DAY_NUM + "\\.–" + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10.–11. Jan. 1917 + DAY_NUM + "\\. " + ABBR_MONTH + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Jan. – 2. Feb. 1917 + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Dez. 1916 – 2. Jan. 1917 + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10. Jan. 1917 (range end == start) + "ab " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR) // ab 10. Jan. 1917 + + ")(?: – .+)?$"); + + private DocumentTitleBackfillMatcher() { + } + + static boolean isOverwritable(String title, String index, String location) { + if (title == null || index == null || index.isBlank()) { + return false; // fail closed + } + if (!title.startsWith(index)) { + return false; // index is matched LITERALLY, never as a regex + } + String tail = title.substring(index.length()); + if (tail.isEmpty()) { + return true; // exactly {index} + } + if (!tail.startsWith(SEPARATOR)) { + return false; + } + String body = tail.substring(SEPARATOR.length()); + if (DATE_LABEL_WITH_OPTIONAL_LOCATION.matcher(body).matches()) { + return true; // {dateLabel} (+ optional trailing location) + } + // No date label: the lone segment must equal the document's current location to be + // distinguished from hand-written prose. + return location != null && !location.isBlank() && body.equals(location); + } + + private static String monthAlternation(String pattern) { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern, Locale.GERMAN); + Set tokens = new LinkedHashSet<>(); + for (int month = 1; month <= 12; month++) { + tokens.add(formatter.format(LocalDate.of(2000, month, 15))); + } + return tokens.stream().map(Pattern::quote).collect(Collectors.joining("|", "(?:", ")")); + } +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java b/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java index 74b5d643..1c5f66b0 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java @@ -51,6 +51,12 @@ public class AdminController { return ResponseEntity.ok(new BackfillResult(count)); } + @PostMapping("/backfill-titles") + public ResponseEntity backfillTitles() { + int count = documentService.backfillTitles(); + return ResponseEntity.ok(new BackfillResult(count)); + } + @PostMapping("/generate-thumbnails") public ResponseEntity generateThumbnails() { thumbnailBackfillService.runBackfillAsync(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java index 76c95dca..fced773a 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java @@ -658,6 +658,59 @@ class DocumentServiceTest { verify(documentVersionService).recordVersion(any(Document.class)); } + // ─── backfillTitles — one-time stale-title cleanup (#726, FR-003) ───────── + + @Test + void backfillTitles_rewritesStaleAutoTitle_andCountsIt() { + Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + stale.setTitle("C-0029 – 2028 – Berlin"); // stale stored title (date typo never fixed) + when(documentRepository.findAll()).thenReturn(List.of(stale)); + when(documentRepository.save(any())).thenReturn(stale); + + int count = documentService.backfillTitles(); + + assertThat(count).isEqualTo(1); + assertThat(stale.getTitle()).isEqualTo("C-0029 – 1928 – Berlin"); + verify(documentRepository).save(stale); + } + + @Test + void backfillTitles_skipsProse() { + Document prose = makeStored("C-0030", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null); + prose.setTitle("C-0030 – Brief an Mutter"); + when(documentRepository.findAll()).thenReturn(List.of(prose)); + + int count = documentService.backfillTitles(); + + assertThat(count).isZero(); + assertThat(prose.getTitle()).isEqualTo("C-0030 – Brief an Mutter"); + verify(documentRepository, never()).save(any()); + } + + @Test + void backfillTitles_isIdempotent_forAlreadyCorrectTitle() { + Document fresh = makeStored("C-0031", LocalDate.of(1940, 1, 1), DatePrecision.YEAR, null); + // title already equals build(current state) → nothing to do + when(documentRepository.findAll()).thenReturn(List.of(fresh)); + + int count = documentService.backfillTitles(); + + assertThat(count).isZero(); + verify(documentRepository, never()).save(any()); + } + + @Test + void backfillTitles_neverRecordsVersions() { + Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + stale.setTitle("C-0029 – 2028 – Berlin"); + when(documentRepository.findAll()).thenReturn(List.of(stale)); + when(documentRepository.save(any())).thenReturn(stale); + + documentService.backfillTitles(); + + verify(documentVersionService, never()).recordVersion(any()); + } + // ─── thumbnail dispatch ─────────────────────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java new file mode 100644 index 00000000..be5e02be --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java @@ -0,0 +1,151 @@ +package org.raddatz.familienarchiv.document; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.util.concurrent.TimeUnit; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * The backfill overwrite heuristic (FR-004) in isolation — every emittable date-label form is + * recognised, prose is left alone, and a regex-metacharacter index is matched literally without + * hanging. The exact label spellings mirror {@code docs/date-label-fixtures.json}. + */ +class DocumentTitleBackfillMatcherTest { + + private static boolean overwritable(String title, String location) { + return DocumentTitleBackfillMatcher.isOverwritable(title, "C-0029", location); + } + + // ─── each date-label form (index + form) is overwritable ────────────────── + + @Test + void year_form() { + assertThat(overwritable("C-0029 – 1916", null)).isTrue(); + } + + @Test + void approx_form() { + assertThat(overwritable("C-0029 – ca. 1920", null)).isTrue(); + } + + @Test + void month_form() { + assertThat(overwritable("C-0029 – Juni 1916", null)).isTrue(); + } + + @Test + void day_form() { + assertThat(overwritable("C-0029 – 24. Dezember 1943", null)).isTrue(); + } + + @Test + void season_form() { + assertThat(overwritable("C-0029 – Sommer 1916", null)).isTrue(); + } + + @Test + void unknown_label_form() { + assertThat(overwritable("C-0029 – Datum unbekannt", null)).isTrue(); + } + + @Test + void range_same_month_form() { + assertThat(overwritable("C-0029 – 10.–11. Jan. 1917", null)).isTrue(); + } + + @Test + void range_cross_month_form() { + assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917", null)).isTrue(); + } + + @Test + void range_cross_year_form() { + assertThat(overwritable("C-0029 – 30. Dez. 1916 – 2. Jan. 1917", null)).isTrue(); + } + + @Test + void range_single_day_form() { + assertThat(overwritable("C-0029 – 10. Jan. 1917", null)).isTrue(); + } + + @Test + void range_open_form() { + assertThat(overwritable("C-0029 – ab 10. Jan. 1917", null)).isTrue(); + } + + // ─── date label + trailing location (any location) ──────────────────────── + + @Test + void date_form_with_trailing_location() { + assertThat(overwritable("C-0029 – 1916 – Berlin", null)).isTrue(); + } + + @Test + void range_with_internal_separator_plus_trailing_location() { + // The range label itself contains " – "; the trailing " – Berlin" must still be peeled. + assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917 – Berlin", null)).isTrue(); + } + + // ─── index-only and index+location cases ────────────────────────────────── + + @Test + void exactly_index() { + assertThat(overwritable("C-0029", null)).isTrue(); + } + + @Test + void index_plus_location_equal_to_current() { + assertThat(overwritable("C-0029 – Berlin", "Berlin")).isTrue(); + } + + // ─── prose is left untouched ────────────────────────────────────────────── + + @Test + void prose_segment_not_matching_location_is_skipped() { + assertThat(overwritable("C-0029 – Brief an Mutter", "Berlin")).isFalse(); + } + + @Test + void location_only_segment_is_skipped_when_no_current_location() { + // No date label, and the doc has no location to compare against → cannot prove machine. + assertThat(overwritable("C-0029 – Berlin", null)).isFalse(); + } + + @Test + void title_not_starting_with_index_is_skipped() { + assertThat(overwritable("Ganz anderer Titel", null)).isFalse(); + } + + // ─── fail-closed guards ─────────────────────────────────────────────────── + + @Test + void null_title_is_not_overwritable() { + assertThat(DocumentTitleBackfillMatcher.isOverwritable(null, "C-0029", null)).isFalse(); + } + + @Test + void null_index_is_not_overwritable() { + assertThat(DocumentTitleBackfillMatcher.isOverwritable("C-0029 – 1916", null, null)).isFalse(); + } + + @Test + void blank_index_is_not_overwritable() { + assertThat(DocumentTitleBackfillMatcher.isOverwritable(" – 1916", " ", null)).isFalse(); + } + + // ─── ReDoS / regex-metacharacter index is matched literally and terminates ─ + + @Test + @Timeout(value = 5, unit = TimeUnit.SECONDS) + void index_with_regex_metacharacters_is_matched_literally_and_terminates() { + String hostileIndex = "C-0029(.*).pdf"; + // Literal prefix → matches; trailing date label → overwritable. Must not hang. + assertThat(DocumentTitleBackfillMatcher.isOverwritable( + hostileIndex + " – 1916", hostileIndex, null)).isTrue(); + // A title that does NOT start with the literal hostile index is skipped, also fast. + assertThat(DocumentTitleBackfillMatcher.isOverwritable( + "C-0029 – 1916", hostileIndex, null)).isFalse(); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java index 8e51fad7..6429d3dc 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java @@ -132,6 +132,31 @@ class AdminControllerTest { .andExpect(jsonPath("$.count").value(3)); } + // ─── POST /api/admin/backfill-titles (#726) ──────────────────────────────── + + @Test + void backfillTitles_returns401_whenUnauthenticated() throws Exception { + mockMvc.perform(post("/api/admin/backfill-titles").with(csrf())) + .andExpect(status().isUnauthorized()); + } + + @Test + @WithMockUser(roles = "USER") + void backfillTitles_returns403_whenNotAdmin() throws Exception { + mockMvc.perform(post("/api/admin/backfill-titles").with(csrf())) + .andExpect(status().isForbidden()); + } + + @Test + @WithMockUser(authorities = "ADMIN") + void backfillTitles_returns200_withCount_whenAdmin() throws Exception { + when(documentService.backfillTitles()).thenReturn(7); + + mockMvc.perform(post("/api/admin/backfill-titles").with(csrf())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.count").value(7)); + } + // ─── POST /api/admin/generate-thumbnails ─────────────────────────────────── @Test -- 2.49.1 From 12db7b3596a3c00439ec662b5505c2d4ea4147f6 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 16:32:07 +0200 Subject: [PATCH 4/9] test(document): integration-test title backfill against real Postgres (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pins backfill behaviour on postgres:16-alpine (H2 unusable — title is NOT NULL): a stale auto-title is rewritten, the sweep is idempotent (second run touches nothing), prose is left alone, and the mechanical rename adds no document_versions rows. Permission (401/403) stays in the faster @WebMvcTest slice. Co-Authored-By: Claude Opus 4.8 --- .../DocumentTitleBackfillIntegrationTest.java | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java new file mode 100644 index 00000000..a209e1a2 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java @@ -0,0 +1,90 @@ +package org.raddatz.familienarchiv.document; + +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.bean.override.mockito.MockitoBean; +import org.springframework.transaction.annotation.Transactional; +import software.amazon.awssdk.services.s3.S3Client; + +import java.time.LocalDate; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * End-to-end backfill against a real Postgres (#726, FR-003). H2 is unusable here — the + * {@code title} column is NOT NULL and the title-sync semantics depend on that — so this pins the + * behaviour on {@code postgres:16-alpine}: a stale auto-title is rewritten, the sweep is + * idempotent, prose is left alone, and the mechanical rename writes no {@code document_versions} + * rows. Permission enforcement (401/403) is covered faster by the {@code @WebMvcTest} slice in + * {@code AdminControllerTest}. + */ +@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE) +@ActiveProfiles("test") +@Import(PostgresContainerConfig.class) +@Transactional +class DocumentTitleBackfillIntegrationTest { + + @MockitoBean S3Client s3Client; + @Autowired DocumentService documentService; + @Autowired DocumentRepository documentRepository; + @Autowired DocumentVersionRepository documentVersionRepository; + + private Document persist(String index, String title, LocalDate date, DatePrecision precision, String location) { + return documentRepository.save(Document.builder() + .originalFilename(index) + .title(title) + .documentDate(date) + .metaDatePrecision(precision) + .location(location) + .status(DocumentStatus.PLACEHOLDER) + .build()); + } + + @Test + void backfill_rewritesStaleAutoTitle() { + Document stale = persist("C-0029", "C-0029 – 2028 – Berlin", + LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + int count = documentService.backfillTitles(); + + assertThat(count).isGreaterThanOrEqualTo(1); + assertThat(documentRepository.findById(stale.getId()).orElseThrow().getTitle()) + .isEqualTo("C-0029 – 1928 – Berlin"); + } + + @Test + void backfill_isIdempotent_secondRunChangesNothing() { + persist("C-0029", "C-0029 – 2028 – Berlin", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + + documentService.backfillTitles(); + int secondRun = documentService.backfillTitles(); + + assertThat(secondRun).isZero(); + } + + @Test + void backfill_skipsProse() { + Document prose = persist("C-0030", "C-0030 – Brief an Mutter", + LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null); + + documentService.backfillTitles(); + + assertThat(documentRepository.findById(prose.getId()).orElseThrow().getTitle()) + .isEqualTo("C-0030 – Brief an Mutter"); + } + + @Test + void backfill_addsNoDocumentVersionRows() { + persist("C-0029", "C-0029 – 2028 – Berlin", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); + long versionsBefore = documentVersionRepository.count(); + + documentService.backfillTitles(); + + assertThat(documentVersionRepository.count()).isEqualTo(versionsBefore); + } +} -- 2.49.1 From 83e0afb466242039778ac070de5459a62bca2614 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 16:41:52 +0200 Subject: [PATCH 5/9] feat(document): explain auto-generated title under the edit title field (#726) Adds the FR-TITLE-005 helper line under the title input in DescriptionSection, shown only on the single-document edit form via a new showTitleHelp prop (off for the new-document and bulk-edit forms). It is wired to the input with aria-describedby and uses text-ink-3 (WCAG AA on bg-surface). New Paraglide key form_helper_title_autogenerated in de/en/es. Adds a component test for the helper + aria wiring and an end-to-end pass: create an auto-titled doc, edit its date, and see the title follow on the detail page. Co-Authored-By: Claude Opus 4.8 --- frontend/e2e/document-title-autosync.spec.ts | 36 +++++++++++++++++++ frontend/messages/de.json | 1 + frontend/messages/en.json | 1 + frontend/messages/es.json | 1 + .../lib/document/DescriptionSection.svelte | 8 +++++ .../DescriptionSection.svelte.spec.ts | 23 ++++++++++++ .../lib/document/DocumentEditLayout.svelte | 1 + 7 files changed, 71 insertions(+) create mode 100644 frontend/e2e/document-title-autosync.spec.ts diff --git a/frontend/e2e/document-title-autosync.spec.ts b/frontend/e2e/document-title-autosync.spec.ts new file mode 100644 index 00000000..2f1bcb91 --- /dev/null +++ b/frontend/e2e/document-title-autosync.spec.ts @@ -0,0 +1,36 @@ +import { expect, test } from '@playwright/test'; + +/** + * Auto-title sync, full-stack happy path (#726). A document whose stored title equals its + * machine-generated auto-title must follow a date correction forward on save; a hand-edit would + * be kept. The exhaustive permutations live in the backend unit/integration suites — this is the + * single end-to-end pass, and it also asserts the FR-005 helper line is present on the edit form. + */ +test.describe('Document auto-title sync (#726)', () => { + test('editing the date rebuilds the auto-title, and the edit form explains it', async ({ + page + }) => { + // 1. Create a document with no date/location, so its stored title == its auto-title + // (originalFilename only). createDocument derives originalFilename from the title. + await page.goto('/documents/new'); + await page.waitForSelector('[data-hydrated]'); + await page.getByLabel('Titel').fill('E2E Auto-Titel Sync'); + await page.getByRole('button', { name: 'Speichern', exact: true }).click(); + await expect(page).toHaveURL(/\/documents\/[^/]+$/); + const detailUrl = page.url(); + + // 2. The edit form carries the FR-005 helper explaining the auto-generated title. + await page.goto(`${detailUrl}/edit`); + await page.waitForSelector('[data-hydrated]'); + await expect(page.locator('#title-help')).toBeVisible(); + + // 3. Add a YEAR-precision date WITHOUT touching the title, then save. + await page.locator('#documentDate').fill('15.01.1928'); + await page.locator('#metaDatePrecision').selectOption('YEAR'); + await page.getByRole('button', { name: 'Speichern', exact: true }).click(); + + // 4. The detail page shows the regenerated title carrying the new year. + await expect(page).toHaveURL(/\/documents\/[^/]+$/); + await expect(page.getByRole('heading', { name: /E2E Auto-Titel Sync.*1928/ })).toBeVisible(); + }); +}); diff --git a/frontend/messages/de.json b/frontend/messages/de.json index 7601d996..25bc0fa0 100644 --- a/frontend/messages/de.json +++ b/frontend/messages/de.json @@ -56,6 +56,7 @@ "form_label_sender": "Absender", "form_label_receivers": "Empfänger", "form_label_title": "Titel", + "form_helper_title_autogenerated": "Wird automatisch aus Datum und Ort gebildet — sobald du den Titel änderst, bleibt deine Version erhalten.", "form_label_tags": "Schlagworte", "form_label_content": "Inhalt", "form_placeholder_content": "Kurze Beschreibung des Inhalts…", diff --git a/frontend/messages/en.json b/frontend/messages/en.json index 722bac6b..bd71f0ba 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -56,6 +56,7 @@ "form_label_sender": "Sender", "form_label_receivers": "Recipients", "form_label_title": "Title", + "form_helper_title_autogenerated": "Generated automatically from the date and place — as soon as you edit the title, your version is kept.", "form_label_tags": "Tags", "form_label_content": "Content", "form_placeholder_content": "Brief description of the content…", diff --git a/frontend/messages/es.json b/frontend/messages/es.json index 746f4fc7..07cd1087 100644 --- a/frontend/messages/es.json +++ b/frontend/messages/es.json @@ -56,6 +56,7 @@ "form_label_sender": "Remitente", "form_label_receivers": "Destinatarios", "form_label_title": "Título", + "form_helper_title_autogenerated": "Se genera automáticamente a partir de la fecha y el lugar; en cuanto edites el título, se conservará tu versión.", "form_label_tags": "Etiquetas", "form_label_content": "Contenido", "form_placeholder_content": "Breve descripción del contenido…", diff --git a/frontend/src/lib/document/DescriptionSection.svelte b/frontend/src/lib/document/DescriptionSection.svelte index b18cc16a..3e4f12b3 100644 --- a/frontend/src/lib/document/DescriptionSection.svelte +++ b/frontend/src/lib/document/DescriptionSection.svelte @@ -17,6 +17,7 @@ let { titleRequired = false, suggestedTitle = '', hideTitle = false, + showTitleHelp = false, editMode = false }: { tags?: Tag[]; @@ -31,6 +32,7 @@ let { titleRequired?: boolean; suggestedTitle?: string; hideTitle?: boolean; + showTitleHelp?: boolean; editMode?: boolean; } = $props(); @@ -72,8 +74,14 @@ const titleValue = $derived(titleDirty ? currentTitle : suggestedTitle || curren titleDirty = true; }} required={titleRequired} + aria-describedby={showTitleHelp ? 'title-help' : undefined} class="block w-full rounded border border-line p-2 text-sm shadow-sm focus:outline-none focus-visible:ring-2 focus-visible:ring-focus-ring" /> + {#if showTitleHelp} +

+ {m.form_helper_title_autogenerated()} +

+ {/if} {/if} diff --git a/frontend/src/lib/document/DescriptionSection.svelte.spec.ts b/frontend/src/lib/document/DescriptionSection.svelte.spec.ts index f0c05599..bc47a98a 100644 --- a/frontend/src/lib/document/DescriptionSection.svelte.spec.ts +++ b/frontend/src/lib/document/DescriptionSection.svelte.spec.ts @@ -55,3 +55,26 @@ describe('DescriptionSection — onMount seeding (Felix B1/B2 fix regression fen expect(input.value).toBe('Parent Value'); }); }); + +describe('DescriptionSection — auto-generated title helper (FR-TITLE-005)', () => { + it('shows the helper and wires aria-describedby when showTitleHelp is set', async () => { + render(DescriptionSection, { showTitleHelp: true }); + const help = document.querySelector('#title-help') as HTMLElement; + expect(help).not.toBeNull(); + expect(help.textContent?.trim().length ?? 0).toBeGreaterThan(0); + const titleInput = document.querySelector('input#title') as HTMLInputElement; + expect(titleInput.getAttribute('aria-describedby')).toBe('title-help'); + }); + + it('omits the helper by default (e.g. the new-document form)', async () => { + render(DescriptionSection, {}); + expect(document.querySelector('#title-help')).toBeNull(); + const titleInput = document.querySelector('input#title') as HTMLInputElement; + expect(titleInput.getAttribute('aria-describedby')).toBeNull(); + }); + + it('omits the helper when the title field is hidden (bulk edit)', async () => { + render(DescriptionSection, { showTitleHelp: true, hideTitle: true }); + expect(document.querySelector('#title-help')).toBeNull(); + }); +}); diff --git a/frontend/src/lib/document/DocumentEditLayout.svelte b/frontend/src/lib/document/DocumentEditLayout.svelte index 2b72e58c..e0867517 100644 --- a/frontend/src/lib/document/DocumentEditLayout.svelte +++ b/frontend/src/lib/document/DocumentEditLayout.svelte @@ -221,6 +221,7 @@ async function handleReplaceFile(e: Event) { initialArchiveFolder={doc.archiveFolder ?? ''} initialSummary={doc.summary ?? ''} titleRequired={true} + showTitleHelp={true} /> -- 2.49.1 From cf457cb96fbc88f86add2ca26d4dcbac731d1581 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 16:44:56 +0200 Subject: [PATCH 6/9] docs(document): ADR-031 + glossary/c4/api_tests for auto-title sync (#726) ADR-031 records the shared document-package title factory, the exact-match save-time regeneration, and the grammar-heuristic one-time backfill (with the ReDoS / no-version-spam / file-replace-is-manual decisions). Adds an "auto-generated title" glossary entry, extends the document-management c4 diagram with DocumentTitleFactory / DocumentTitleBackfillMatcher and the backfill flows, and documents POST /api/admin/backfill-titles in Admin-Auth.http as a one-shot ADMIN call hitting port 8080 directly. Co-Authored-By: Claude Opus 4.8 --- backend/api_tests/Admin-Auth.http | 16 ++- docs/GLOSSARY.md | 3 + ...ared-factory-and-save-time-regeneration.md | 112 ++++++++++++++++++ .../c4/l3-backend-3b-document-management.puml | 16 ++- 4 files changed, 141 insertions(+), 6 deletions(-) create mode 100644 docs/adr/031-document-title-shared-factory-and-save-time-regeneration.md diff --git a/backend/api_tests/Admin-Auth.http b/backend/api_tests/Admin-Auth.http index 7bf2f8de..1beb6aeb 100644 --- a/backend/api_tests/Admin-Auth.http +++ b/backend/api_tests/Admin-Auth.http @@ -28,4 +28,18 @@ Authorization: Basic Gast_User gast ###Groups #GET GET http://localhost:8080/api/admin/tags -Authorization: Basic admin admin123 \ No newline at end of file +Authorization: Basic admin admin123 + +### One-time backfill: re-sync already-stale auto-titles (#726) +# RUNBOOK: a one-shot ADMIN maintenance call, NOT part of normal operation. Run it ONCE +# after deploying #726 to clean the existing backlog of stale titles (e.g. a title still +# showing "2028" after the date was corrected to "1928"). It is synchronous and idempotent +# — a second run returns {"count": 0} and writes nothing. Hit the backend DIRECTLY on +# port 8080 (NOT through the SvelteKit proxy) so the sweep can't trip the proxy timeout. +# Returns {"count": }. +POST http://localhost:8080/api/admin/backfill-titles +Authorization: Basic admin admin123 + +### NEGATIV-TEST: ein Nicht-Admin darf den Backfill NICHT auslösen -> 403 Forbidden +POST http://localhost:8080/api/admin/backfill-titles +Authorization: Basic Gast_User gast \ No newline at end of file diff --git a/docs/GLOSSARY.md b/docs/GLOSSARY.md index 200ecabb..8bb508ab 100644 --- a/docs/GLOSSARY.md +++ b/docs/GLOSSARY.md @@ -45,6 +45,9 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._ **raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view. +**auto-generated title** (`DocumentTitleFactory`) — a `Document` title composed by the formula `{index} – {dateLabel} – {location}` (index = `originalFilename`; date label honest at the row's precision; location omitted when blank). On edit, an unchanged auto-title follows a corrected date/location forward (exact old-vs-new match in `DocumentService.updateDocument`); a hand-written title is kept verbatim. `POST /api/admin/backfill-titles` rewrites already-stale ones in one sweep using a grammar heuristic (`DocumentTitleBackfillMatcher`). +_Not to be confused with a hand-written title_ — only a title that still equals what the factory builds is treated as machine-generated and rewritten; prose is left untouched. + **DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level. **Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure. diff --git a/docs/adr/031-document-title-shared-factory-and-save-time-regeneration.md b/docs/adr/031-document-title-shared-factory-and-save-time-regeneration.md new file mode 100644 index 00000000..e3e97392 --- /dev/null +++ b/docs/adr/031-document-title-shared-factory-and-save-time-regeneration.md @@ -0,0 +1,112 @@ +# ADR-031 — The document title is a shared `document`-package factory, re-synced by an exact match on save and a grammar heuristic on a one-time backfill + +**Date:** 2026-06-04 +**Status:** Accepted +**Issue:** #726 (auto-sync document titles with date/location: save-time + one-time backfill) +**Milestone:** — + +--- + +## Context + +A document title was a string built **once**, at import time, by a private +`DocumentImporter.buildTitle()` composing `{index} – {dateLabel} – {location}` (index = +`originalFilename`, date label honest at the row's precision via `DocumentTitleFormatter`, +location verbatim). Nothing rebuilt it afterwards. When an archivist later corrected a date +or location in the edit form, the title kept its stale value (e.g. it still read `2028` +after the date was fixed to `1928`), because the edit form round-trips the stored title +verbatim and `updateDocument` simply re-persisted it. + +Two distinct problems live here: + +1. **Going forward**, an edit to date/location must flow into a title that was machine-built + — but must never overwrite a title a human wrote. +2. **The existing backlog** of already-stale titles must be cleaned once. For these rows the + pre-edit state is gone, so there is no exact value to compare against. + +The composition formula also existed only inside `importing`, which is the wrong owner: a +title is a `document` concern, and three call sites (import, save-time, backfill) must share +one rule or they will drift. + +## Decision + +### 1. One formula, owned by the `document` package (`DocumentTitleFactory`) + +Extract the composition into `DocumentTitleFactory` (a `@Component` in the `document` +package) with `build(Document)`. `DocumentImporter` (package `importing`) now consumes it. +`DocumentTitleFormatter` moves into `document` alongside the factory (it stays +package-private; `importing` reaches the formula only through the factory). The direction is +deliberate: `document` owns the rule, `importing` depends on it — not the reverse. The +German date *label* remains the deliberate Java/TS dual implementation pinned by +`docs/date-label-fixtures.json` (#666); this ADR touches the **composition** only and does +not collapse the frontend `formatDocumentDate`. + +### 2. Save-time regeneration is an EXACT match, not a heuristic + +In `DocumentService.updateDocument` only (bulk edit is out of scope), capture +`autoTitleBefore = titleFactory.build(doc)` from the **currently-persisted** state *before* +any setter runs. Then: + +- if the **submitted** title equals `autoTitleBefore`, it was the machine value → rebuild + from the new state; +- otherwise keep the submitted title verbatim (hand-written or freshly typed). + +This is an exact old-vs-new comparison — no false positives, no false negatives — relying on +the edit form round-tripping an untouched title verbatim. `projectedState` mirrors the +existing setter asymmetry exactly: `documentDate`/`location` overwrite unconditionally (a +null clears them), while precision/end/raw are taken from the DTO only when non-null and +otherwise kept from the entity. A blank submission is never persisted (the title is always +present) — it falls back to the rebuilt auto-title, which always carries at least the index. + +### 3. The one-time backlog cleanup is a grammar heuristic, behind an ADMIN endpoint + +`POST /api/admin/backfill-titles` (synchronous, under `AdminController`'s class-level +`@RequirePermission(Permission.ADMIN)`) sweeps every document and, for each whose stored +title passes the overwrite test, rebuilds it via the factory. Because the pre-edit state is +gone, the test (`DocumentTitleBackfillMatcher`, used **only** here) is a grammar heuristic: +after stripping the **literal** index prefix, the remainder must be exactly the index, a +known date-label form (+ an optional trailing location), or a lone segment equal to the +document's current location. Prose is left untouched; anything malformed fails closed. + +The backfill saves via `documentRepository.save` directly and **never** routes through +`updateDocument` — following the `backfillFileHashes` precedent — so a mechanical rename does +not snapshot the whole corpus into `document_versions`. It is idempotent (a second run +rewrites nothing) and logs one SLF4J-parameterized `scanned/updated/skipped` line; the +response is `BackfillResult(count)`. + +### 4. Edit-form feedback (FR-005) + +A localized helper line (de/en/es) under the title input explains that the title is built +from date/place and that a hand-edit is preserved, wired via `aria-describedby` and shown +only on the single-document edit form. A live preview was considered and declined. + +## Consequences + +- The three call sites can never diverge — there is exactly one formula + (`NFR-MAINT-001`). Save-time cost is a string build + compare; the backfill is one + synchronous transactional sweep over a low-thousands corpus. +- Security: the index is compared **literally** (`String.startsWith` / `Pattern.quote`) + because `originalFilename` is user-controlled and may carry regex metacharacters — an + unquoted pattern would be a ReDoS / regex-injection vector (CWE-1333 / CWE-625). The + date-label sub-patterns use only bounded, non-nested quantifiers. +- **File-replaced documents are treated as manual, by design.** The index is + `originalFilename`, which `updateDocument` reassigns to the uploaded file's name on a + file-replace. After a replace the stored title no longer matches `build(currentState)`, so + neither save-time nor backfill rewrites it. This is the accepted fail-safe of overloading + `originalFilename` rather than adding a dedicated `catalogIndex` column. +- The save-time heuristic risk is zero (exact match); the backfill heuristic can, by its + documented FR-004 rule, treat `{index} – {valid date label} – {anything}` as machine-built + and rewrite the trailing segment. This is the accepted trade for cleaning the backlog + without the lost pre-edit state. + +## Alternatives considered + +- **A dedicated `catalogIndex` column** instead of overloading `originalFilename` — rejected; + it adds a migration and a second source of truth for the index for no current benefit, and + the file-replace fail-safe is acceptable. +- **A heuristic at save-time too** (instead of the exact match) — rejected; the stored title + is available pre-edit, so an exact comparison is strictly better (no false positives). +- **A live title preview in the edit form** — rejected (FR-005); a static helper line is + calmer for the 60+ audience and avoids a second client-side mirror of the formula. +- **Collapsing the frontend `formatDocumentDate` into the backend** — out of scope; the + Java/TS date-label split is the deliberate #666 design, pinned by a shared fixture. diff --git a/docs/architecture/c4/l3-backend-3b-document-management.puml b/docs/architecture/c4/l3-backend-3b-document-management.puml index 65049e7e..4db3c41f 100644 --- a/docs/architecture/c4/l3-backend-3b-document-management.puml +++ b/docs/architecture/c4/l3-backend-3b-document-management.puml @@ -9,15 +9,17 @@ ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)") System_Boundary(backend, "API Backend (Spring Boot)") { Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, batch metadata updates, and per-month density aggregation for the timeline filter widget.") - Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).") - Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.") + Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED). Hosts the one-shot maintenance backfills (versions, file-hashes, titles) — synchronous, ADMIN-only.") + Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. On update, regenerates an unchanged auto-title from the new date/location (exact old-vs-new match, #726); exposes backfillTitles() to clean already-stale titles in one sweep. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.") Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.") Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.") Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).") Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).") Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.") - Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds an honest precision-aware title via DocumentTitleFormatter, keeps the S3 upload + thumbnail plumbing, and resolves each PDF by index (importDir/.pdf) guarded by strict index validation + canonical-path containment + %PDF magic-byte check (no recursive walk).") - Component(titleFmt, "DocumentTitleFormatter", "Pure helper", "Formats the date label baked into an import title at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).") + Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds the title via DocumentTitleFactory, keeps the S3 upload + thumbnail plumbing, and resolves each PDF by index (importDir/.pdf) guarded by strict index validation + canonical-path containment + %PDF magic-byte check (no recursive walk).") + Component(titleFactory, "DocumentTitleFactory", "Spring Component", "Single source of truth for the auto-title {index} – {dateLabel} – {location} (#726). The document package owns this formula; importer, save-time regeneration, and the backfill all build through it so they never diverge.") + Component(titleFmt, "DocumentTitleFormatter", "Pure helper (document pkg)", "Formats the date label at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).") + Component(titleMatcher, "DocumentTitleBackfillMatcher", "Pure helper", "Backfill-only heuristic deciding whether a STORED title is machine-generated (overwritable) vs hand-written prose. Index matched literally (no regex injection / ReDoS); fail-closed.") Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.") Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.") Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, full-text search with ranking and match highlighting, and transcription pipeline queue projections.") @@ -44,7 +46,11 @@ Rel(importOrch, docLoader, "4. Loads documents") Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx") Rel(personRegLoader, sheetReader, "Reads canonical .xlsx") Rel(docLoader, sheetReader, "Reads canonical .xlsx") -Rel(docLoader, titleFmt, "Builds honest title date") +Rel(docLoader, titleFactory, "Builds the auto-title") +Rel(docSvc, titleFactory, "Regenerates auto-title (save-time + backfill)") +Rel(docSvc, titleMatcher, "Backfill overwrite test") +Rel(titleFactory, titleFmt, "Formats the honest date label") +Rel(adminCtrl, docSvc, "backfillTitles() / backfillFileHashes()") Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref") Rel(personRegLoader, personSvc, "Upserts persons by source_ref") Rel(personTreeLoader, personSvc, "Upserts persons by source_ref") -- 2.49.1 From 7316c51d4a78378cdeb0edaf858ca042062d79f0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 17:08:51 +0200 Subject: [PATCH 7/9] refactor(document): share skip-null date-field resolution between save and projection (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract effectivePrecision/effectiveMetaDateEnd/effectiveMetaDateRaw, used by both applyDatePrecision (the real setters) and projectedState (the title projection), so the two can no longer drift — addresses review feedback (Markus/Felix/Sara). Writing a stored value back when the DTO omits a field is a harmless no-op, so behaviour is unchanged (185 existing DocumentServiceTest cases stay green). Also documents the file-replace "treat as manual" path inline at the reassignment site. Co-Authored-By: Claude Opus 4.8 --- .../document/DocumentService.java | 63 +++++++++++-------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java index 44b0a0d9..ed37a2c2 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java @@ -431,7 +431,11 @@ public class DocumentService { doc.setScriptType(dto.getScriptType()); } - // 4. Datei austauschen (nur wenn eine neue ausgewählt wurde) + // 4. Datei austauschen (nur wenn eine neue ausgewählt wurde). + // NB (#726): this reassigns originalFilename to the uploaded file's name. The title's index + // segment is originalFilename, so after a replace the stored title no longer matches + // build(currentState) and the row is treated as manual — neither save-time nor backfill + // rewrites it. Accepted fail-safe (ADR-031), and autoTitleBefore was already captured above. boolean fileReplaced = newFile != null && !newFile.isEmpty(); if (fileReplaced) { FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename()); @@ -479,44 +483,49 @@ public class DocumentService { } /** - * The document state the regenerated title is built from, mirroring {@code updateDocument}'s - * setter asymmetry exactly: {@code documentDate}/{@code location} are overwritten from the DTO - * (a null value clears the field), while precision/end/raw are taken from the DTO only when - * non-null and otherwise kept from the stored entity (see {@link #applyDatePrecision}). The - * index ({@code originalFilename}) is never touched by a metadata edit. A mismatch here would - * silently produce a wrong label, so it is kept lock-step with the real setters. + * The document state the regenerated title is built from. It is composed from the SAME + * resolvers the real setters use — {@code documentDate}/{@code location} overwritten from the + * DTO (a null value clears the field), precision/end/raw resolved skip-null via + * {@link #effectivePrecision}/{@link #effectiveMetaDateEnd}/{@link #effectiveMetaDateRaw} — so + * the projection cannot drift from {@link #updateDocument}. The index ({@code originalFilename}) + * is never touched by a metadata edit. */ private Document projectedState(Document doc, DocumentUpdateDTO dto) { return Document.builder() .originalFilename(doc.getOriginalFilename()) .documentDate(dto.getDocumentDate()) .location(dto.getLocation()) - .metaDatePrecision(dto.getMetaDatePrecision() != null - ? dto.getMetaDatePrecision() : doc.getMetaDatePrecision()) - .metaDateEnd(dto.getMetaDateEnd() != null - ? dto.getMetaDateEnd() : doc.getMetaDateEnd()) - .metaDateRaw(dto.getMetaDateRaw() != null - ? dto.getMetaDateRaw() : doc.getMetaDateRaw()) + .metaDatePrecision(effectivePrecision(doc, dto)) + .metaDateEnd(effectiveMetaDateEnd(doc, dto)) + .metaDateRaw(effectiveMetaDateRaw(doc, dto)) .build(); } /** - * Applies the three date-precision fields only when the DTO carries them. - * A null field means "not submitted" — overwriting the stored value with null - * would fabricate a precision the user never chose, the exact dishonesty #666 - * exists to prevent. A row with a genuinely-unknown precision must keep it when - * an unrelated edit (e.g. a location typo) is saved. + * Applies the three date-precision fields skip-null: a null DTO field means "not submitted", + * so the stored value is kept rather than overwritten with null — which would fabricate a + * precision the user never chose, the exact dishonesty #666 exists to prevent. Expressed via + * the shared {@code effective*} resolvers so {@link #projectedState} stays lock-step (writing + * the stored value back when the DTO omits a field is a harmless no-op). */ private void applyDatePrecision(Document doc, DocumentUpdateDTO dto) { - if (dto.getMetaDatePrecision() != null) { - doc.setMetaDatePrecision(dto.getMetaDatePrecision()); - } - if (dto.getMetaDateEnd() != null) { - doc.setMetaDateEnd(dto.getMetaDateEnd()); - } - if (dto.getMetaDateRaw() != null) { - doc.setMetaDateRaw(dto.getMetaDateRaw()); - } + doc.setMetaDatePrecision(effectivePrecision(doc, dto)); + doc.setMetaDateEnd(effectiveMetaDateEnd(doc, dto)); + doc.setMetaDateRaw(effectiveMetaDateRaw(doc, dto)); + } + + // Skip-null date-field resolution shared by applyDatePrecision (the real setters) and + // projectedState (the title projection) — the single rule keeps them from diverging (#726). + private static DatePrecision effectivePrecision(Document doc, DocumentUpdateDTO dto) { + return dto.getMetaDatePrecision() != null ? dto.getMetaDatePrecision() : doc.getMetaDatePrecision(); + } + + private static LocalDate effectiveMetaDateEnd(Document doc, DocumentUpdateDTO dto) { + return dto.getMetaDateEnd() != null ? dto.getMetaDateEnd() : doc.getMetaDateEnd(); + } + + private static String effectiveMetaDateRaw(Document doc, DocumentUpdateDTO dto) { + return dto.getMetaDateRaw() != null ? dto.getMetaDateRaw() : doc.getMetaDateRaw(); } /** -- 2.49.1 From f656f7c1ffdc7aaf0cf62b7c45506436f3a87b40 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 17:10:50 +0200 Subject: [PATCH 8/9] test(document): close review-flagged coverage gaps for auto-title sync (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - save-time: precision+raw carry-over when the DTO omits them (exercises the shared skip-null resolvers), and a RANGE label round-trip (Sara/Elicit) - factory: a bare Document with a null index builds "" rather than NPE-ing (Felix) - backfill matcher: negative near-misses — ASCII hyphen vs en dash, missing separator before trailing text, year-with-trailing-letters, index followed by text without a separator (Sara) - backfill integration: tighten the count assertion to exactly 1 on the clean test DB (Sara) Co-Authored-By: Claude Opus 4.8 --- .../document/DocumentServiceTest.java | 37 +++++++++++++++++++ .../DocumentTitleBackfillIntegrationTest.java | 2 +- .../DocumentTitleBackfillMatcherTest.java | 24 ++++++++++++ .../document/DocumentTitleFactoryTest.java | 7 ++++ 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java index fced773a..023b2003 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java @@ -371,6 +371,43 @@ class DocumentServiceTest { assertThat(stored.getTitle()).isEqualTo("C-0029 – Frühling 1943"); } + @Test + void updateDocument_carriesStoredPrecisionAndRaw_whenDtoOmitsThem() throws Exception { + // Only the year changes; precision/end/raw are omitted from the DTO, so projectedState + // must carry them from the entity (exercises the skip-null effective* resolvers). + Document stored = makeStored("C-0029", LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null); + stored.setMetaDateRaw("Frühling 1943"); + stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 – Frühling 1943" + DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1944, 4, 1), null, null); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – Frühling 1944"); + } + + @Test + void updateDocument_roundTripsRangeLabel_atSaveTime() throws Exception { + Document stored = Document.builder() + .id(UUID.randomUUID()) + .originalFilename("C-0029") + .documentDate(LocalDate.of(1917, 1, 10)) + .metaDatePrecision(DatePrecision.RANGE) + .metaDateEnd(LocalDate.of(1917, 1, 11)) + .receivers(new HashSet<>()) + .tags(new HashSet<>()) + .build(); + stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 – 10.–11. Jan. 1917" + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setTitle(stored.getTitle()); + dto.setDocumentDate(LocalDate.of(1918, 1, 10)); + dto.setMetaDatePrecision(DatePrecision.RANGE); + dto.setMetaDateEnd(LocalDate.of(1918, 1, 11)); + + runUpdate(stored, dto); + + assertThat(stored.getTitle()).isEqualTo("C-0029 – 10.–11. Jan. 1918"); + } + @Test void updateDocument_doesNotRegenerateToBlank_whenSubmittedTitleEmpty() throws Exception { Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin"); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java index a209e1a2..4dd556f8 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillIntegrationTest.java @@ -52,7 +52,7 @@ class DocumentTitleBackfillIntegrationTest { int count = documentService.backfillTitles(); - assertThat(count).isGreaterThanOrEqualTo(1); + assertThat(count).isEqualTo(1); // exactly the one stale row seeded (clean test DB) assertThat(documentRepository.findById(stale.getId()).orElseThrow().getTitle()) .isEqualTo("C-0029 – 1928 – Berlin"); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java index be5e02be..1a87f2e2 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java @@ -118,6 +118,30 @@ class DocumentTitleBackfillMatcherTest { assertThat(overwritable("Ganz anderer Titel", null)).isFalse(); } + // ─── near-miss: shapes that look almost machine-built but are not ────────── + + @Test + void ascii_hyphen_instead_of_en_dash_separator_is_skipped() { + // The separator is " – " (en dash); a plain " - " is not the machine separator. + assertThat(overwritable("C-0029 - 1916", null)).isFalse(); + } + + @Test + void date_label_without_separator_before_trailing_text_is_skipped() { + // "1916 Berlin" is not a date label and is not joined by " – "; prose, not machine. + assertThat(overwritable("C-0029 – 1916 Berlin", null)).isFalse(); + } + + @Test + void year_with_trailing_letters_is_not_a_year_label() { + assertThat(overwritable("C-0029 – 1916er Brief", null)).isFalse(); + } + + @Test + void index_immediately_followed_by_text_without_separator_is_skipped() { + assertThat(overwritable("C-0029x – 1916", null)).isFalse(); + } + // ─── fail-closed guards ─────────────────────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java index d01801c9..525b791f 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleFactoryTest.java @@ -71,6 +71,13 @@ class DocumentTitleFactoryTest { assertThat(factory.build(d)).isEqualTo("C-0029 – 1928"); } + @Test + void bare_document_with_null_index_builds_empty_string_not_npe() { + // originalFilename is NOT NULL in production; the guard keeps a synthetic/partial entity + // from tripping StringBuilder(null) with an opaque NPE. + assertThat(factory.build(Document.builder().build())).isEqualTo(""); + } + @Test void day_precision_renders_the_full_german_label() { Document d = doc("C-0029") -- 2.49.1 From 0693cfddd1a140386fb72aa3cffebe2257d7406a Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 4 Jun 2026 17:15:46 +0200 Subject: [PATCH 9/9] fix(document): enlarge auto-title helper to 14px and assert its localized text (#726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps the title helper from text-xs (12px) to text-sm (14px) for the 60+ audience (FR-005 prefers a larger size than the field hints) and tightens the component test to assert the actual localized string and the 14px class — addresses Leonie's and Sara's review notes. Co-Authored-By: Claude Opus 4.8 --- frontend/src/lib/document/DescriptionSection.svelte | 2 +- .../src/lib/document/DescriptionSection.svelte.spec.ts | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/frontend/src/lib/document/DescriptionSection.svelte b/frontend/src/lib/document/DescriptionSection.svelte index 3e4f12b3..59584959 100644 --- a/frontend/src/lib/document/DescriptionSection.svelte +++ b/frontend/src/lib/document/DescriptionSection.svelte @@ -78,7 +78,7 @@ const titleValue = $derived(titleDirty ? currentTitle : suggestedTitle || curren class="block w-full rounded border border-line p-2 text-sm shadow-sm focus:outline-none focus-visible:ring-2 focus-visible:ring-focus-ring" /> {#if showTitleHelp} -

+

{m.form_helper_title_autogenerated()}

{/if} diff --git a/frontend/src/lib/document/DescriptionSection.svelte.spec.ts b/frontend/src/lib/document/DescriptionSection.svelte.spec.ts index bc47a98a..a2754a76 100644 --- a/frontend/src/lib/document/DescriptionSection.svelte.spec.ts +++ b/frontend/src/lib/document/DescriptionSection.svelte.spec.ts @@ -1,6 +1,7 @@ import { afterEach, describe, expect, it } from 'vitest'; import { cleanup, render } from 'vitest-browser-svelte'; import DescriptionSection from './DescriptionSection.svelte'; +import { m } from '$lib/paraglide/messages.js'; afterEach(() => cleanup()); @@ -57,11 +58,13 @@ describe('DescriptionSection — onMount seeding (Felix B1/B2 fix regression fen }); describe('DescriptionSection — auto-generated title helper (FR-TITLE-005)', () => { - it('shows the helper and wires aria-describedby when showTitleHelp is set', async () => { + it('shows the helper with the localized text and wires aria-describedby when showTitleHelp is set', async () => { render(DescriptionSection, { showTitleHelp: true }); const help = document.querySelector('#title-help') as HTMLElement; expect(help).not.toBeNull(); - expect(help.textContent?.trim().length ?? 0).toBeGreaterThan(0); + expect(help.textContent?.trim()).toBe(m.form_helper_title_autogenerated()); + // ≥14px for the 60+ audience (FR-005 prefers a larger size than the 12px field hints). + expect(help.classList.contains('text-sm')).toBe(true); const titleInput = document.querySelector('input#title') as HTMLInputElement; expect(titleInput.getAttribute('aria-describedby')).toBe('title-help'); }); -- 2.49.1