Dokumenttitel automatisch mit Datum/Ort synchronisieren (Save-time + Backfill) (#726) #727

Merged
marcel merged 9 commits from feat/issue-726-auto-title-sync into main 2026-06-04 17:32:50 +02:00
24 changed files with 1132 additions and 48 deletions

View File

@@ -28,4 +28,18 @@ Authorization: Basic Gast_User gast
###Groups
#GET
GET http://localhost:8080/api/admin/tags
Authorization: Basic admin admin123
Authorization: Basic admin admin123
### One-time backfill: re-sync already-stale auto-titles (#726)
# RUNBOOK: a one-shot ADMIN maintenance call, NOT part of normal operation. Run it ONCE
# after deploying #726 to clean the existing backlog of stale titles (e.g. a title still
# showing "2028" after the date was corrected to "1928"). It is synchronous and idempotent
# — a second run returns {"count": 0} and writes nothing. Hit the backend DIRECTLY on
# port 8080 (NOT through the SvelteKit proxy) so the sweep can't trip the proxy timeout.
# Returns {"count": <documents rewritten>}.
POST http://localhost:8080/api/admin/backfill-titles
Authorization: Basic admin admin123
### NEGATIV-TEST: ein Nicht-Admin darf den Backfill NICHT auslösen -> 403 Forbidden
POST http://localhost:8080/api/admin/backfill-titles
Authorization: Basic Gast_User gast

View File

@@ -68,6 +68,7 @@ import static org.raddatz.familienarchiv.document.DocumentSpecifications.*;
public class DocumentService {
private final DocumentRepository documentRepository;
private final DocumentTitleFactory documentTitleFactory;
private final PersonService personService;
private final FileService fileService;
private final TagService tagService;
@@ -379,8 +380,14 @@ public class DocumentService {
DocumentStatus statusBefore = doc.getStatus();
// Auto-title sync (#726): capture the machine title from the CURRENTLY-persisted state
// BEFORE any setter runs — the setters below overwrite date/location and applyDatePrecision
// skips nulls, so the old state must be read first. The submitted title is the catalog
// auto-title iff it equals this; only then does it follow date/location forward.
String autoTitleBefore = documentTitleFactory.build(doc);
// 1. Einfache Felder Update
doc.setTitle(dto.getTitle());
doc.setTitle(resolveTitle(dto.getTitle(), autoTitleBefore, doc, dto));
doc.setDocumentDate(dto.getDocumentDate());
applyDatePrecision(doc, dto);
validateDateRange(doc); // guard before any save (updateDocumentTags below persists)
@@ -424,7 +431,11 @@ public class DocumentService {
doc.setScriptType(dto.getScriptType());
}
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde)
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde).
// NB (#726): this reassigns originalFilename to the uploaded file's name. The title's index
// segment is originalFilename, so after a replace the stored title no longer matches
// build(currentState) and the row is treated as manual — neither save-time nor backfill
// rewrites it. Accepted fail-safe (ADR-031), and autoTitleBefore was already captured above.
boolean fileReplaced = newFile != null && !newFile.isEmpty();
if (fileReplaced) {
FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename());
@@ -453,22 +464,68 @@ public class DocumentService {
}
/**
* Applies the three date-precision fields only when the DTO carries them.
* A null field means "not submitted" — overwriting the stored value with null
* would fabricate a precision the user never chose, the exact dishonesty #666
* exists to prevent. A row with a genuinely-unknown precision must keep it when
* an unrelated edit (e.g. a location typo) is saved.
* Decides the title to persist on an edit (#726). The submitted title is the catalog
* auto-title only when it equals {@code autoBefore} (built from the stored state) — an exact
* comparison with no heuristic, relying on the edit form round-tripping the stored title
* verbatim when untouched. A machine title is rebuilt from the new state so a corrected
* date/location flows into it; a hand-written or freshly-typed title is kept verbatim. A blank
* submission is never persisted (title is always present) — it falls back to the rebuilt
* auto-title, which always carries at least the index.
*/
private String resolveTitle(String submitted, String autoBefore, Document doc, DocumentUpdateDTO dto) {
if (submitted == null || submitted.isBlank()) {
return documentTitleFactory.build(projectedState(doc, dto));
}
if (!Objects.equals(submitted, autoBefore)) {
return submitted;
}
return documentTitleFactory.build(projectedState(doc, dto));
}
/**
* The document state the regenerated title is built from. It is composed from the SAME
* resolvers the real setters use — {@code documentDate}/{@code location} overwritten from the
* DTO (a null value clears the field), precision/end/raw resolved skip-null via
* {@link #effectivePrecision}/{@link #effectiveMetaDateEnd}/{@link #effectiveMetaDateRaw} — so
* the projection cannot drift from {@link #updateDocument}. The index ({@code originalFilename})
* is never touched by a metadata edit.
*/
private Document projectedState(Document doc, DocumentUpdateDTO dto) {
return Document.builder()
.originalFilename(doc.getOriginalFilename())
.documentDate(dto.getDocumentDate())
.location(dto.getLocation())
.metaDatePrecision(effectivePrecision(doc, dto))
.metaDateEnd(effectiveMetaDateEnd(doc, dto))
.metaDateRaw(effectiveMetaDateRaw(doc, dto))
.build();
}
/**
* Applies the three date-precision fields skip-null: a null DTO field means "not submitted",
* so the stored value is kept rather than overwritten with null — which would fabricate a
* precision the user never chose, the exact dishonesty #666 exists to prevent. Expressed via
* the shared {@code effective*} resolvers so {@link #projectedState} stays lock-step (writing
* the stored value back when the DTO omits a field is a harmless no-op).
*/
private void applyDatePrecision(Document doc, DocumentUpdateDTO dto) {
if (dto.getMetaDatePrecision() != null) {
doc.setMetaDatePrecision(dto.getMetaDatePrecision());
}
if (dto.getMetaDateEnd() != null) {
doc.setMetaDateEnd(dto.getMetaDateEnd());
}
if (dto.getMetaDateRaw() != null) {
doc.setMetaDateRaw(dto.getMetaDateRaw());
}
doc.setMetaDatePrecision(effectivePrecision(doc, dto));
doc.setMetaDateEnd(effectiveMetaDateEnd(doc, dto));
doc.setMetaDateRaw(effectiveMetaDateRaw(doc, dto));
}
// Skip-null date-field resolution shared by applyDatePrecision (the real setters) and
// projectedState (the title projection) — the single rule keeps them from diverging (#726).
private static DatePrecision effectivePrecision(Document doc, DocumentUpdateDTO dto) {
return dto.getMetaDatePrecision() != null ? dto.getMetaDatePrecision() : doc.getMetaDatePrecision();
}
private static LocalDate effectiveMetaDateEnd(Document doc, DocumentUpdateDTO dto) {
return dto.getMetaDateEnd() != null ? dto.getMetaDateEnd() : doc.getMetaDateEnd();
}
private static String effectiveMetaDateRaw(Document doc, DocumentUpdateDTO dto) {
return dto.getMetaDateRaw() != null ? dto.getMetaDateRaw() : doc.getMetaDateRaw();
}
/**
@@ -1010,6 +1067,43 @@ public class DocumentService {
tagService.delete(tagId);
}
/**
* One-time cleanup of already-stale auto-titles (#726, FR-003). For every document whose
* stored title passes the {@link DocumentTitleBackfillMatcher} overwrite heuristic, rebuilds
* the title from the row's current state and persists it only when it actually changed.
* Idempotent: a second run rebuilds the same value and saves nothing. Hand-written prose is
* left untouched.
*
* <p>Saves via {@code documentRepository.save} directly — it must NOT route through
* {@link #updateDocument} (which versions every write), following the {@link #backfillFileHashes}
* precedent: a mechanical rename must not snapshot the whole corpus into {@code document_versions}.
*
* @return the number of documents whose title was rewritten
*/
@Transactional
public int backfillTitles() {
List<Document> docs = documentRepository.findAll();
int updated = 0;
int skipped = 0;
for (Document doc : docs) {
if (!DocumentTitleBackfillMatcher.isOverwritable(
doc.getTitle(), doc.getOriginalFilename(), doc.getLocation())) {
skipped++;
continue;
}
String rebuilt = documentTitleFactory.build(doc);
if (rebuilt.equals(doc.getTitle())) {
skipped++; // already correct — keep idempotent, no write
continue;
}
doc.setTitle(rebuilt);
documentRepository.save(doc); // direct save, no recordVersion (mechanical rename)
updated++;
}
log.info("Title backfill complete: scanned={} updated={} skipped={}", docs.size(), updated, skipped);
return updated;
}
@Transactional
public int backfillFileHashes() {
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();

View File

@@ -0,0 +1,101 @@
package org.raddatz.familienarchiv.document;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* Heuristic overwrite test for the one-time title backfill (#726, FR-004): decides whether a
* STORED title is a machine-generated auto-title (and so may be rebuilt from the row's current
* state) versus hand-written prose (left untouched). Used ONLY by the backfill — save-time
* regeneration uses an exact old-vs-new comparison instead, with no heuristic.
*
* <p>A stored title is overwritable iff, after stripping the literal {@code index} prefix:
* <ol>
* <li>it is exactly {@code {index}}, or</li>
* <li>{@code {index} {dateLabel}} with an optional trailing {@code {location}} segment
* (any location — a present, valid date label is itself strong evidence of a machine
* title), or</li>
* <li>{@code {index} {location}} where the segment equals the document's current location
* (no date label, so the segment must match the known location to be distinguished from
* prose).</li>
* </ol>
*
* <p>Security: the {@code index} is compared <em>literally</em> via {@link String#startsWith}
* (never compiled into a regex) because {@code originalFilename} is user-controlled and may carry
* regex metacharacters — an unquoted pattern would be a ReDoS / regex-injection vector
* (CWE-1333 / CWE-625). The date-label sub-patterns use only bounded, non-nested quantifiers over
* short tokens, so there is no catastrophic backtracking. Fail-closed: any null/blank index or
* structural surprise returns {@code false}.
*/
final class DocumentTitleBackfillMatcher {
private static final String SEPARATOR = " ";
// German month tokens derived from the SAME Locale.GERMAN formatters DocumentTitleFormatter
// uses, so the matcher's accepted spellings cannot drift from what the factory emits (full
// names "Januar"…"Dezember"; abbreviations "Jan."…"Dez." — note May/June/July/März carry no
// period). Pattern.quote each so a "." in an abbreviation is literal, never a wildcard.
private static final String FULL_MONTH = monthAlternation("MMMM");
private static final String ABBR_MONTH = monthAlternation("MMM");
private static final String SEASON = "(?:Frühling|Sommer|Herbst|Winter)";
private static final String YEAR = "\\d{1,4}";
private static final String DAY_NUM = "\\d{1,2}";
// One complete date label, anchored, optionally followed by a free-form trailing location
// segment. Only bounded/non-nested quantifiers over short tokens plus a single trailing
// ".+" → linear, no catastrophic backtracking (FR-004 ReDoS guard).
private static final Pattern DATE_LABEL_WITH_OPTIONAL_LOCATION = Pattern.compile(
"^(?:" + String.join("|",
YEAR, // 1916
"ca\\. " + YEAR, // ca. 1920
FULL_MONTH + " " + YEAR, // Juni 1916
DAY_NUM + "\\. " + FULL_MONTH + " " + YEAR, // 24. Dezember 1943
SEASON + " " + YEAR, // Sommer 1916
"Datum unbekannt",
DAY_NUM + "\\." + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10.11. Jan. 1917
DAY_NUM + "\\. " + ABBR_MONTH + " " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Jan. 2. Feb. 1917
DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR + " " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Dez. 1916 2. Jan. 1917
DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10. Jan. 1917 (range end == start)
"ab " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR) // ab 10. Jan. 1917
+ ")(?: .+)?$");
private DocumentTitleBackfillMatcher() {
}
static boolean isOverwritable(String title, String index, String location) {
if (title == null || index == null || index.isBlank()) {
return false; // fail closed
}
if (!title.startsWith(index)) {
return false; // index is matched LITERALLY, never as a regex
}
String tail = title.substring(index.length());
if (tail.isEmpty()) {
return true; // exactly {index}
}
if (!tail.startsWith(SEPARATOR)) {
return false;
}
String body = tail.substring(SEPARATOR.length());
if (DATE_LABEL_WITH_OPTIONAL_LOCATION.matcher(body).matches()) {
return true; // {dateLabel} (+ optional trailing location)
}
// No date label: the lone segment must equal the document's current location to be
// distinguished from hand-written prose.
return location != null && !location.isBlank() && body.equals(location);
}
private static String monthAlternation(String pattern) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern, Locale.GERMAN);
Set<String> tokens = new LinkedHashSet<>();
for (int month = 1; month <= 12; month++) {
tokens.add(formatter.format(LocalDate.of(2000, month, 15)));
}
return tokens.stream().map(Pattern::quote).collect(Collectors.joining("|", "(?:", ")"));
}
}

View File

@@ -0,0 +1,39 @@
package org.raddatz.familienarchiv.document;
import org.springframework.stereotype.Component;
/**
* Single source of truth for the auto-generated document title
* {@code {index} {dateLabel} {location}}.
*
* <p>The {@code document} package owns this formula; {@code importing} consumes it
* (see ADR for issue #726). The leading {@code index} is the document's
* {@code originalFilename}; the date label is the honest German label produced by
* {@link DocumentTitleFormatter} (the Java half of the #666 date-label split); the
* trailing location is the {@code meta_location} verbatim, omitted when blank.
*/
@Component
public class DocumentTitleFactory {
static final String SEPARATOR = " ";
/**
* Composes the auto-title from the document's current state. The date segment is
* dropped for UNKNOWN precision or a null date (the honest "no date" case); the
* location segment is dropped when blank.
*/
public String build(Document doc) {
// originalFilename is NOT NULL in production; guard only so a synthetic/partial entity
// never trips StringBuilder(null) with an opaque NPE.
StringBuilder title = new StringBuilder(doc.getOriginalFilename() == null ? "" : doc.getOriginalFilename());
if (doc.getDocumentDate() != null && doc.getMetaDatePrecision() != DatePrecision.UNKNOWN) {
title.append(SEPARATOR).append(DocumentTitleFormatter.formatTitleDate(
doc.getDocumentDate(), doc.getMetaDatePrecision(),
doc.getMetaDateEnd(), doc.getMetaDateRaw()));
}
if (doc.getLocation() != null && !doc.getLocation().isBlank()) {
title.append(SEPARATOR).append(doc.getLocation());
}
return title.toString();
}
}

View File

@@ -1,6 +1,4 @@
package org.raddatz.familienarchiv.importing;
import org.raddatz.familienarchiv.document.DatePrecision;
package org.raddatz.familienarchiv.document;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;

View File

@@ -5,6 +5,7 @@ import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.document.DatePrecision;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentTitleFactory;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
import org.raddatz.familienarchiv.exception.DomainException;
@@ -74,6 +75,7 @@ public class DocumentImporter {
Pattern.compile("[A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF]{1,4}-+\\d+x?");
private final DocumentService documentService;
private final DocumentTitleFactory documentTitleFactory;
private final PersonService personService;
private final TagService tagService;
private final S3Client s3Client;
@@ -181,7 +183,7 @@ public class DocumentImporter {
applyAttribution(doc, row);
applyDates(doc, row);
applyAuthoritativeAssociations(doc, row);
applyFileMetadata(doc, s3Key, contentType, status, index);
applyFileMetadata(doc, s3Key, contentType, status);
applyComputedFlags(doc);
return doc;
}
@@ -217,14 +219,15 @@ public class DocumentImporter {
attachTag(doc, row.get("tags"));
}
// S3 key, content type, status, and the index-derived title.
// S3 key, content type, status, and the index-derived title. The title formula lives in
// the document package's DocumentTitleFactory (single source of truth, #726); by this point
// applyDates has populated the date/location and originalFilename carries the index.
private void applyFileMetadata(Document doc, String s3Key, String contentType,
DocumentStatus status, String index) {
DocumentStatus status) {
doc.setStatus(status);
doc.setFilePath(s3Key);
doc.setContentType(contentType);
doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(),
doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation()));
doc.setTitle(documentTitleFactory.build(doc));
}
// metadataComplete: a document counts as fully described if any of the three "who/when"
@@ -235,20 +238,6 @@ public class DocumentImporter {
|| !doc.getReceivers().isEmpty());
}
// The title carries the date at the HONEST precision (never a fabricated day) via the
// shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating.
private static String buildTitle(String index, LocalDate date, DatePrecision precision,
LocalDate end, String raw, String location) {
StringBuilder title = new StringBuilder(index);
if (date != null && precision != DatePrecision.UNKNOWN) {
title.append(" ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw));
}
if (location != null && !location.isBlank()) {
title.append(" ").append(location);
}
return title.toString();
}
// ─── attribution routing — register-first, always retain raw ─────────────────────
private Person resolveSender(String slug, String rawName) {

View File

@@ -51,6 +51,12 @@ public class AdminController {
return ResponseEntity.ok(new BackfillResult(count));
}
@PostMapping("/backfill-titles")
public ResponseEntity<BackfillResult> backfillTitles() {
int count = documentService.backfillTitles();
return ResponseEntity.ok(new BackfillResult(count));
}
@PostMapping("/generate-thumbnails")
public ResponseEntity<ThumbnailBackfillService.BackfillStatus> generateThumbnails() {
thumbnailBackfillService.runBackfillAsync();

View File

@@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Spy;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.audit.AuditKind;
import org.raddatz.familienarchiv.audit.AuditLogQueryService;
@@ -74,6 +75,9 @@ class DocumentServiceTest {
@Mock AuditLogQueryService auditLogQueryService;
@Mock TranscriptionBlockQueryService transcriptionBlockQueryService;
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
// Real factory (pure, dependency-free) so save-time title-regeneration tests exercise the
// shared composition rather than a stub — the #726 single source of truth.
@Spy DocumentTitleFactory documentTitleFactory = new DocumentTitleFactory();
@InjectMocks DocumentService documentService;
// ─── deleteDocument ───────────────────────────────────────────────────────
@@ -228,6 +232,216 @@ class DocumentServiceTest {
assertThat(doc.getMetaDateRaw()).isEqualTo("Juni 1916");
}
// ─── updateDocument save-time auto-title regeneration (#726) ──────────────
//
// Exact old-vs-new comparison: the title is the catalog auto-title iff the submitted
// title equals what the factory builds from the CURRENTLY-persisted state. The edit form
// round-trips the stored title verbatim when untouched, so an equal submission means the
// user did not type over it. makeStored() seeds index/date/precision/location and sets the
// stored title to the matching auto-title, mirroring a freshly-imported row.
private Document makeStored(String index, LocalDate date, DatePrecision precision, String location) {
Document doc = Document.builder()
.id(UUID.randomUUID())
.originalFilename(index)
.documentDate(date)
.metaDatePrecision(precision)
.location(location)
.receivers(new HashSet<>())
.tags(new HashSet<>())
.build();
doc.setTitle(documentTitleFactory.build(doc));
return doc;
}
/** A DTO that round-trips the stored auto-title untouched, with new date/precision/location. */
private static DocumentUpdateDTO editDto(String submittedTitle, LocalDate date,
DatePrecision precision, String location) {
DocumentUpdateDTO dto = new DocumentUpdateDTO();
dto.setTitle(submittedTitle);
dto.setDocumentDate(date);
dto.setMetaDatePrecision(precision);
dto.setLocation(location);
return dto;
}
private Document runUpdate(Document stored, DocumentUpdateDTO dto) throws Exception {
when(documentRepository.findById(stored.getId())).thenReturn(Optional.of(stored));
when(documentRepository.save(any())).thenReturn(stored);
documentService.updateDocument(stored.getId(), dto, null, null);
return stored;
}
@Test
void updateDocument_regeneratesAutoTitle_whenDateChanges() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
// title untouched ("C-0029 2028 Berlin"), date corrected to 1928
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 1928 Berlin");
}
@Test
void updateDocument_keepsHandWrittenTitle_whenDateChanges() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
stored.setTitle("C-0029 Brief an Mutter"); // hand-written, ≠ auto-title
DocumentUpdateDTO dto = editDto("C-0029 Brief an Mutter", LocalDate.of(1930, 1, 1), DatePrecision.YEAR, null);
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 Brief an Mutter");
}
@Test
void updateDocument_freshlyTypedTitleWins_overRegeneration() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
// user changed the date AND typed a new title in the same save
DocumentUpdateDTO dto = editDto("Geburtsanzeige", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("Geburtsanzeige");
}
@Test
void updateDocument_regeneratesWithNewDateAndLocation() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "München");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 1928 München");
}
@Test
void updateDocument_dropsTrailingLocationSegment_whenLocationCleared() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
// location cleared (null), title untouched
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 1928");
}
@Test
void updateDocument_regeneratedTitle_doesNotContainOldDate() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(2028, 1, 1), DatePrecision.YEAR, "Berlin");
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
runUpdate(stored, dto);
assertThat(stored.getTitle()).doesNotContain("2028");
}
@Test
void updateDocument_relabelsOnPrecisionChange_yearToDay() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
// stored auto-title "C-0029 1928"; set a full day at DAY precision
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 15), DatePrecision.DAY, null);
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 15. Januar 1928");
}
@Test
void updateDocument_populatesTitle_whenDateAddedToUnknownRow() throws Exception {
Document stored = makeStored("C-0029", null, DatePrecision.UNKNOWN, null);
// stored auto-title is just "C-0029"; add a 1928 YEAR date
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 1928");
}
@Test
void updateDocument_roundTripsSeasonLabel() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null);
stored.setMetaDateRaw("Frühling 1943");
stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 Frühling 1943"
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null);
dto.setMetaDateRaw("Frühling 1943");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 Frühling 1943");
}
@Test
void updateDocument_carriesStoredPrecisionAndRaw_whenDtoOmitsThem() throws Exception {
// Only the year changes; precision/end/raw are omitted from the DTO, so projectedState
// must carry them from the entity (exercises the skip-null effective* resolvers).
Document stored = makeStored("C-0029", LocalDate.of(1943, 4, 1), DatePrecision.SEASON, null);
stored.setMetaDateRaw("Frühling 1943");
stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 Frühling 1943"
DocumentUpdateDTO dto = editDto(stored.getTitle(), LocalDate.of(1944, 4, 1), null, null);
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 Frühling 1944");
}
@Test
void updateDocument_roundTripsRangeLabel_atSaveTime() throws Exception {
Document stored = Document.builder()
.id(UUID.randomUUID())
.originalFilename("C-0029")
.documentDate(LocalDate.of(1917, 1, 10))
.metaDatePrecision(DatePrecision.RANGE)
.metaDateEnd(LocalDate.of(1917, 1, 11))
.receivers(new HashSet<>())
.tags(new HashSet<>())
.build();
stored.setTitle(documentTitleFactory.build(stored)); // "C-0029 10.11. Jan. 1917"
DocumentUpdateDTO dto = new DocumentUpdateDTO();
dto.setTitle(stored.getTitle());
dto.setDocumentDate(LocalDate.of(1918, 1, 10));
dto.setMetaDatePrecision(DatePrecision.RANGE);
dto.setMetaDateEnd(LocalDate.of(1918, 1, 11));
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 10.11. Jan. 1918");
}
@Test
void updateDocument_doesNotRegenerateToBlank_whenSubmittedTitleEmpty() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
DocumentUpdateDTO dto = editDto("", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isNotBlank();
}
@Test
void updateDocument_treatsFileReplacedDoc_asManual() throws Exception {
// originalFilename was reassigned by an earlier file-replace, so the stored title (built
// at import from the old index) no longer matches build(currentState) → treated as manual.
Document stored = makeStored("scan_2024.pdf", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
stored.setTitle("C-0029 1928 Berlin"); // legacy import title, ≠ build("scan_2024.pdf"…)
DocumentUpdateDTO dto = editDto("C-0029 1928 Berlin", LocalDate.of(1930, 1, 1), DatePrecision.YEAR, "Berlin");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo("C-0029 1928 Berlin");
}
@Test
void updateDocument_idempotent_whenNothingChanges() throws Exception {
Document stored = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
String before = stored.getTitle();
DocumentUpdateDTO dto = editDto(before, LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
runUpdate(stored, dto);
assertThat(stored.getTitle()).isEqualTo(before);
}
// ─── updateDocument date-range validation (#678) ──────────────────────────
/** Builds a stored doc ready for an updateDocument call (collections initialised). */
@@ -481,6 +695,59 @@ class DocumentServiceTest {
verify(documentVersionService).recordVersion(any(Document.class));
}
// ─── backfillTitles — one-time stale-title cleanup (#726, FR-003) ─────────
@Test
void backfillTitles_rewritesStaleAutoTitle_andCountsIt() {
Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
stale.setTitle("C-0029 2028 Berlin"); // stale stored title (date typo never fixed)
when(documentRepository.findAll()).thenReturn(List.of(stale));
when(documentRepository.save(any())).thenReturn(stale);
int count = documentService.backfillTitles();
assertThat(count).isEqualTo(1);
assertThat(stale.getTitle()).isEqualTo("C-0029 1928 Berlin");
verify(documentRepository).save(stale);
}
@Test
void backfillTitles_skipsProse() {
Document prose = makeStored("C-0030", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
prose.setTitle("C-0030 Brief an Mutter");
when(documentRepository.findAll()).thenReturn(List.of(prose));
int count = documentService.backfillTitles();
assertThat(count).isZero();
assertThat(prose.getTitle()).isEqualTo("C-0030 Brief an Mutter");
verify(documentRepository, never()).save(any());
}
@Test
void backfillTitles_isIdempotent_forAlreadyCorrectTitle() {
Document fresh = makeStored("C-0031", LocalDate.of(1940, 1, 1), DatePrecision.YEAR, null);
// title already equals build(current state) → nothing to do
when(documentRepository.findAll()).thenReturn(List.of(fresh));
int count = documentService.backfillTitles();
assertThat(count).isZero();
verify(documentRepository, never()).save(any());
}
@Test
void backfillTitles_neverRecordsVersions() {
Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
stale.setTitle("C-0029 2028 Berlin");
when(documentRepository.findAll()).thenReturn(List.of(stale));
when(documentRepository.save(any())).thenReturn(stale);
documentService.backfillTitles();
verify(documentVersionService, never()).recordVersion(any());
}
// ─── thumbnail dispatch ───────────────────────────────────────────────────
@Test

View File

@@ -0,0 +1,90 @@
package org.raddatz.familienarchiv.document;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.PostgresContainerConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.transaction.annotation.Transactional;
import software.amazon.awssdk.services.s3.S3Client;
import java.time.LocalDate;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
/**
* End-to-end backfill against a real Postgres (#726, FR-003). H2 is unusable here — the
* {@code title} column is NOT NULL and the title-sync semantics depend on that — so this pins the
* behaviour on {@code postgres:16-alpine}: a stale auto-title is rewritten, the sweep is
* idempotent, prose is left alone, and the mechanical rename writes no {@code document_versions}
* rows. Permission enforcement (401/403) is covered faster by the {@code @WebMvcTest} slice in
* {@code AdminControllerTest}.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
@Transactional
class DocumentTitleBackfillIntegrationTest {
@MockitoBean S3Client s3Client;
@Autowired DocumentService documentService;
@Autowired DocumentRepository documentRepository;
@Autowired DocumentVersionRepository documentVersionRepository;
private Document persist(String index, String title, LocalDate date, DatePrecision precision, String location) {
return documentRepository.save(Document.builder()
.originalFilename(index)
.title(title)
.documentDate(date)
.metaDatePrecision(precision)
.location(location)
.status(DocumentStatus.PLACEHOLDER)
.build());
}
@Test
void backfill_rewritesStaleAutoTitle() {
Document stale = persist("C-0029", "C-0029 2028 Berlin",
LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
int count = documentService.backfillTitles();
assertThat(count).isEqualTo(1); // exactly the one stale row seeded (clean test DB)
assertThat(documentRepository.findById(stale.getId()).orElseThrow().getTitle())
.isEqualTo("C-0029 1928 Berlin");
}
@Test
void backfill_isIdempotent_secondRunChangesNothing() {
persist("C-0029", "C-0029 2028 Berlin", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
documentService.backfillTitles();
int secondRun = documentService.backfillTitles();
assertThat(secondRun).isZero();
}
@Test
void backfill_skipsProse() {
Document prose = persist("C-0030", "C-0030 Brief an Mutter",
LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
documentService.backfillTitles();
assertThat(documentRepository.findById(prose.getId()).orElseThrow().getTitle())
.isEqualTo("C-0030 Brief an Mutter");
}
@Test
void backfill_addsNoDocumentVersionRows() {
persist("C-0029", "C-0029 2028 Berlin", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
long versionsBefore = documentVersionRepository.count();
documentService.backfillTitles();
assertThat(documentVersionRepository.count()).isEqualTo(versionsBefore);
}
}

View File

@@ -0,0 +1,175 @@
package org.raddatz.familienarchiv.document;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import java.util.concurrent.TimeUnit;
import static org.assertj.core.api.Assertions.assertThat;
/**
* The backfill overwrite heuristic (FR-004) in isolation — every emittable date-label form is
* recognised, prose is left alone, and a regex-metacharacter index is matched literally without
* hanging. The exact label spellings mirror {@code docs/date-label-fixtures.json}.
*/
class DocumentTitleBackfillMatcherTest {
private static boolean overwritable(String title, String location) {
return DocumentTitleBackfillMatcher.isOverwritable(title, "C-0029", location);
}
// ─── each date-label form (index + form) is overwritable ──────────────────
@Test
void year_form() {
assertThat(overwritable("C-0029 1916", null)).isTrue();
}
@Test
void approx_form() {
assertThat(overwritable("C-0029 ca. 1920", null)).isTrue();
}
@Test
void month_form() {
assertThat(overwritable("C-0029 Juni 1916", null)).isTrue();
}
@Test
void day_form() {
assertThat(overwritable("C-0029 24. Dezember 1943", null)).isTrue();
}
@Test
void season_form() {
assertThat(overwritable("C-0029 Sommer 1916", null)).isTrue();
}
@Test
void unknown_label_form() {
assertThat(overwritable("C-0029 Datum unbekannt", null)).isTrue();
}
@Test
void range_same_month_form() {
assertThat(overwritable("C-0029 10.11. Jan. 1917", null)).isTrue();
}
@Test
void range_cross_month_form() {
assertThat(overwritable("C-0029 30. Jan. 2. Feb. 1917", null)).isTrue();
}
@Test
void range_cross_year_form() {
assertThat(overwritable("C-0029 30. Dez. 1916 2. Jan. 1917", null)).isTrue();
}
@Test
void range_single_day_form() {
assertThat(overwritable("C-0029 10. Jan. 1917", null)).isTrue();
}
@Test
void range_open_form() {
assertThat(overwritable("C-0029 ab 10. Jan. 1917", null)).isTrue();
}
// ─── date label + trailing location (any location) ────────────────────────
@Test
void date_form_with_trailing_location() {
assertThat(overwritable("C-0029 1916 Berlin", null)).isTrue();
}
@Test
void range_with_internal_separator_plus_trailing_location() {
// The range label itself contains " "; the trailing " Berlin" must still be peeled.
assertThat(overwritable("C-0029 30. Jan. 2. Feb. 1917 Berlin", null)).isTrue();
}
// ─── index-only and index+location cases ──────────────────────────────────
@Test
void exactly_index() {
assertThat(overwritable("C-0029", null)).isTrue();
}
@Test
void index_plus_location_equal_to_current() {
assertThat(overwritable("C-0029 Berlin", "Berlin")).isTrue();
}
// ─── prose is left untouched ──────────────────────────────────────────────
@Test
void prose_segment_not_matching_location_is_skipped() {
assertThat(overwritable("C-0029 Brief an Mutter", "Berlin")).isFalse();
}
@Test
void location_only_segment_is_skipped_when_no_current_location() {
// No date label, and the doc has no location to compare against → cannot prove machine.
assertThat(overwritable("C-0029 Berlin", null)).isFalse();
}
@Test
void title_not_starting_with_index_is_skipped() {
assertThat(overwritable("Ganz anderer Titel", null)).isFalse();
}
// ─── near-miss: shapes that look almost machine-built but are not ──────────
@Test
void ascii_hyphen_instead_of_en_dash_separator_is_skipped() {
// The separator is " " (en dash); a plain " - " is not the machine separator.
assertThat(overwritable("C-0029 - 1916", null)).isFalse();
}
@Test
void date_label_without_separator_before_trailing_text_is_skipped() {
// "1916 Berlin" is not a date label and is not joined by " "; prose, not machine.
assertThat(overwritable("C-0029 1916 Berlin", null)).isFalse();
}
@Test
void year_with_trailing_letters_is_not_a_year_label() {
assertThat(overwritable("C-0029 1916er Brief", null)).isFalse();
}
@Test
void index_immediately_followed_by_text_without_separator_is_skipped() {
assertThat(overwritable("C-0029x 1916", null)).isFalse();
}
// ─── fail-closed guards ───────────────────────────────────────────────────
@Test
void null_title_is_not_overwritable() {
assertThat(DocumentTitleBackfillMatcher.isOverwritable(null, "C-0029", null)).isFalse();
}
@Test
void null_index_is_not_overwritable() {
assertThat(DocumentTitleBackfillMatcher.isOverwritable("C-0029 1916", null, null)).isFalse();
}
@Test
void blank_index_is_not_overwritable() {
assertThat(DocumentTitleBackfillMatcher.isOverwritable(" 1916", " ", null)).isFalse();
}
// ─── ReDoS / regex-metacharacter index is matched literally and terminates ─
@Test
@Timeout(value = 5, unit = TimeUnit.SECONDS)
void index_with_regex_metacharacters_is_matched_literally_and_terminates() {
String hostileIndex = "C-0029(.*).pdf";
// Literal prefix → matches; trailing date label → overwritable. Must not hang.
assertThat(DocumentTitleBackfillMatcher.isOverwritable(
hostileIndex + " 1916", hostileIndex, null)).isTrue();
// A title that does NOT start with the literal hostile index is skipped, also fast.
assertThat(DocumentTitleBackfillMatcher.isOverwritable(
"C-0029 1916", hostileIndex, null)).isFalse();
}
}

View File

@@ -0,0 +1,89 @@
package org.raddatz.familienarchiv.document;
import org.junit.jupiter.api.Test;
import java.time.LocalDate;
import static org.assertj.core.api.Assertions.assertThat;
/**
* The auto-title composition {@code {index} {dateLabel} {location}} in isolation.
* The honest date-label forms themselves are pinned by {@link DocumentTitleFormatterTest}
* against the shared #666 fixture; here we assert only how the factory composes the
* three segments and which segments it omits.
*/
class DocumentTitleFactoryTest {
private final DocumentTitleFactory factory = new DocumentTitleFactory();
private static Document.DocumentBuilder doc(String index) {
return Document.builder()
.originalFilename(index)
.metaDatePrecision(DatePrecision.UNKNOWN);
}
@Test
void index_only_when_no_date_and_no_location() {
assertThat(factory.build(doc("C-0029").build())).isEqualTo("C-0029");
}
@Test
void index_and_year_date() {
Document d = doc("C-0029")
.documentDate(LocalDate.of(1928, 1, 15))
.metaDatePrecision(DatePrecision.YEAR)
.build();
assertThat(factory.build(d)).isEqualTo("C-0029 1928");
}
@Test
void index_date_and_location() {
Document d = doc("C-0029")
.documentDate(LocalDate.of(1928, 1, 15))
.metaDatePrecision(DatePrecision.YEAR)
.location("Berlin")
.build();
assertThat(factory.build(d)).isEqualTo("C-0029 1928 Berlin");
}
@Test
void location_without_date_attaches_directly_to_index() {
Document d = doc("C-0029").location("Berlin").build();
assertThat(factory.build(d)).isEqualTo("C-0029 Berlin");
}
@Test
void unknown_precision_omits_the_date_segment() {
Document d = doc("C-0029")
.documentDate(LocalDate.of(1928, 1, 15))
.metaDatePrecision(DatePrecision.UNKNOWN)
.build();
assertThat(factory.build(d)).isEqualTo("C-0029");
}
@Test
void blank_location_is_omitted() {
Document d = doc("C-0029")
.documentDate(LocalDate.of(1928, 1, 15))
.metaDatePrecision(DatePrecision.YEAR)
.location(" ")
.build();
assertThat(factory.build(d)).isEqualTo("C-0029 1928");
}
@Test
void bare_document_with_null_index_builds_empty_string_not_npe() {
// originalFilename is NOT NULL in production; the guard keeps a synthetic/partial entity
// from tripping StringBuilder(null) with an opaque NPE.
assertThat(factory.build(Document.builder().build())).isEqualTo("");
}
@Test
void day_precision_renders_the_full_german_label() {
Document d = doc("C-0029")
.documentDate(LocalDate.of(1928, 1, 15))
.metaDatePrecision(DatePrecision.DAY)
.build();
assertThat(factory.build(d)).isEqualTo("C-0029 15. Januar 1928");
}
}

View File

@@ -1,10 +1,9 @@
package org.raddatz.familienarchiv.importing;
package org.raddatz.familienarchiv.document;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.DynamicTest;
import org.junit.jupiter.api.TestFactory;
import org.raddatz.familienarchiv.document.DatePrecision;
import java.nio.file.Files;
import java.nio.file.Path;

View File

@@ -11,6 +11,7 @@ import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentTitleFactory;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
import org.raddatz.familienarchiv.person.Person;
@@ -59,8 +60,10 @@ class DocumentImporterTest {
// override this stub locally (load_skipsFile_whenMagicByteCheckThrowsIoException).
lenient().when(fileStreamOpener.open(any(File.class)))
.thenAnswer(inv -> new java.io.FileInputStream(inv.getArgument(0, File.class)));
importer = new DocumentImporter(documentService, personService, tagService, s3Client,
thumbnailAsyncRunner, fileStreamOpener);
// Real factory (pure, dependency-free) so the title-content assertions below exercise
// the shared composition rather than a stub — the #726 single source of truth.
importer = new DocumentImporter(documentService, new DocumentTitleFactory(), personService,
tagService, s3Client, thumbnailAsyncRunner, fileStreamOpener);
ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
}

View File

@@ -132,6 +132,31 @@ class AdminControllerTest {
.andExpect(jsonPath("$.count").value(3));
}
// ─── POST /api/admin/backfill-titles (#726) ────────────────────────────────
@Test
void backfillTitles_returns401_whenUnauthenticated() throws Exception {
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
.andExpect(status().isUnauthorized());
}
@Test
@WithMockUser(roles = "USER")
void backfillTitles_returns403_whenNotAdmin() throws Exception {
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
.andExpect(status().isForbidden());
}
@Test
@WithMockUser(authorities = "ADMIN")
void backfillTitles_returns200_withCount_whenAdmin() throws Exception {
when(documentService.backfillTitles()).thenReturn(7);
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
.andExpect(status().isOk())
.andExpect(jsonPath("$.count").value(7));
}
// ─── POST /api/admin/generate-thumbnails ───────────────────────────────────
@Test

View File

@@ -45,6 +45,9 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._
**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view.
**auto-generated title** (`DocumentTitleFactory`) — a `Document` title composed by the formula `{index} {dateLabel} {location}` (index = `originalFilename`; date label honest at the row's precision; location omitted when blank). On edit, an unchanged auto-title follows a corrected date/location forward (exact old-vs-new match in `DocumentService.updateDocument`); a hand-written title is kept verbatim. `POST /api/admin/backfill-titles` rewrites already-stale ones in one sweep using a grammar heuristic (`DocumentTitleBackfillMatcher`).
_Not to be confused with a hand-written title_ — only a title that still equals what the factory builds is treated as machine-generated and rewritten; prose is left untouched.
**DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level.
**Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure.

View File

@@ -0,0 +1,112 @@
# ADR-031 — The document title is a shared `document`-package factory, re-synced by an exact match on save and a grammar heuristic on a one-time backfill
**Date:** 2026-06-04
**Status:** Accepted
**Issue:** #726 (auto-sync document titles with date/location: save-time + one-time backfill)
**Milestone:**
---
## Context
A document title was a string built **once**, at import time, by a private
`DocumentImporter.buildTitle()` composing `{index} {dateLabel} {location}` (index =
`originalFilename`, date label honest at the row's precision via `DocumentTitleFormatter`,
location verbatim). Nothing rebuilt it afterwards. When an archivist later corrected a date
or location in the edit form, the title kept its stale value (e.g. it still read `2028`
after the date was fixed to `1928`), because the edit form round-trips the stored title
verbatim and `updateDocument` simply re-persisted it.
Two distinct problems live here:
1. **Going forward**, an edit to date/location must flow into a title that was machine-built
— but must never overwrite a title a human wrote.
2. **The existing backlog** of already-stale titles must be cleaned once. For these rows the
pre-edit state is gone, so there is no exact value to compare against.
The composition formula also existed only inside `importing`, which is the wrong owner: a
title is a `document` concern, and three call sites (import, save-time, backfill) must share
one rule or they will drift.
## Decision
### 1. One formula, owned by the `document` package (`DocumentTitleFactory`)
Extract the composition into `DocumentTitleFactory` (a `@Component` in the `document`
package) with `build(Document)`. `DocumentImporter` (package `importing`) now consumes it.
`DocumentTitleFormatter` moves into `document` alongside the factory (it stays
package-private; `importing` reaches the formula only through the factory). The direction is
deliberate: `document` owns the rule, `importing` depends on it — not the reverse. The
German date *label* remains the deliberate Java/TS dual implementation pinned by
`docs/date-label-fixtures.json` (#666); this ADR touches the **composition** only and does
not collapse the frontend `formatDocumentDate`.
### 2. Save-time regeneration is an EXACT match, not a heuristic
In `DocumentService.updateDocument` only (bulk edit is out of scope), capture
`autoTitleBefore = titleFactory.build(doc)` from the **currently-persisted** state *before*
any setter runs. Then:
- if the **submitted** title equals `autoTitleBefore`, it was the machine value → rebuild
from the new state;
- otherwise keep the submitted title verbatim (hand-written or freshly typed).
This is an exact old-vs-new comparison — no false positives, no false negatives — relying on
the edit form round-tripping an untouched title verbatim. `projectedState` mirrors the
existing setter asymmetry exactly: `documentDate`/`location` overwrite unconditionally (a
null clears them), while precision/end/raw are taken from the DTO only when non-null and
otherwise kept from the entity. A blank submission is never persisted (the title is always
present) — it falls back to the rebuilt auto-title, which always carries at least the index.
### 3. The one-time backlog cleanup is a grammar heuristic, behind an ADMIN endpoint
`POST /api/admin/backfill-titles` (synchronous, under `AdminController`'s class-level
`@RequirePermission(Permission.ADMIN)`) sweeps every document and, for each whose stored
title passes the overwrite test, rebuilds it via the factory. Because the pre-edit state is
gone, the test (`DocumentTitleBackfillMatcher`, used **only** here) is a grammar heuristic:
after stripping the **literal** index prefix, the remainder must be exactly the index, a
known date-label form (+ an optional trailing location), or a lone segment equal to the
document's current location. Prose is left untouched; anything malformed fails closed.
The backfill saves via `documentRepository.save` directly and **never** routes through
`updateDocument` — following the `backfillFileHashes` precedent — so a mechanical rename does
not snapshot the whole corpus into `document_versions`. It is idempotent (a second run
rewrites nothing) and logs one SLF4J-parameterized `scanned/updated/skipped` line; the
response is `BackfillResult(count)`.
### 4. Edit-form feedback (FR-005)
A localized helper line (de/en/es) under the title input explains that the title is built
from date/place and that a hand-edit is preserved, wired via `aria-describedby` and shown
only on the single-document edit form. A live preview was considered and declined.
## Consequences
- The three call sites can never diverge — there is exactly one formula
(`NFR-MAINT-001`). Save-time cost is a string build + compare; the backfill is one
synchronous transactional sweep over a low-thousands corpus.
- Security: the index is compared **literally** (`String.startsWith` / `Pattern.quote`)
because `originalFilename` is user-controlled and may carry regex metacharacters — an
unquoted pattern would be a ReDoS / regex-injection vector (CWE-1333 / CWE-625). The
date-label sub-patterns use only bounded, non-nested quantifiers.
- **File-replaced documents are treated as manual, by design.** The index is
`originalFilename`, which `updateDocument` reassigns to the uploaded file's name on a
file-replace. After a replace the stored title no longer matches `build(currentState)`, so
neither save-time nor backfill rewrites it. This is the accepted fail-safe of overloading
`originalFilename` rather than adding a dedicated `catalogIndex` column.
- The save-time heuristic risk is zero (exact match); the backfill heuristic can, by its
documented FR-004 rule, treat `{index} {valid date label} {anything}` as machine-built
and rewrite the trailing segment. This is the accepted trade for cleaning the backlog
without the lost pre-edit state.
## Alternatives considered
- **A dedicated `catalogIndex` column** instead of overloading `originalFilename` — rejected;
it adds a migration and a second source of truth for the index for no current benefit, and
the file-replace fail-safe is acceptable.
- **A heuristic at save-time too** (instead of the exact match) — rejected; the stored title
is available pre-edit, so an exact comparison is strictly better (no false positives).
- **A live title preview in the edit form** — rejected (FR-005); a static helper line is
calmer for the 60+ audience and avoids a second client-side mirror of the formula.
- **Collapsing the frontend `formatDocumentDate` into the backend** — out of scope; the
Java/TS date-label split is the deliberate #666 design, pinned by a shared fixture.

View File

@@ -9,15 +9,17 @@ ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
System_Boundary(backend, "API Backend (Spring Boot)") {
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, batch metadata updates, and per-month density aggregation for the timeline filter widget.")
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).")
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED). Hosts the one-shot maintenance backfills (versions, file-hashes, titles) — synchronous, ADMIN-only.")
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. On update, regenerates an unchanged auto-title from the new date/location (exact old-vs-new match, #726); exposes backfillTitles() to clean already-stale titles in one sweep. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.")
Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.")
Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).")
Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).")
Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.")
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds an honest precision-aware title via DocumentTitleFormatter, keeps the S3 upload + thumbnail plumbing, and resolves each PDF by index (importDir/<index>.pdf) guarded by strict index validation + canonical-path containment + %PDF magic-byte check (no recursive walk).")
Component(titleFmt, "DocumentTitleFormatter", "Pure helper", "Formats the date label baked into an import title at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).")
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, builds the title via DocumentTitleFactory, keeps the S3 upload + thumbnail plumbing, and resolves each PDF by index (importDir/<index>.pdf) guarded by strict index validation + canonical-path containment + %PDF magic-byte check (no recursive walk).")
Component(titleFactory, "DocumentTitleFactory", "Spring Component", "Single source of truth for the auto-title {index} {dateLabel} {location} (#726). The document package owns this formula; importer, save-time regeneration, and the backfill all build through it so they never diverge.")
Component(titleFmt, "DocumentTitleFormatter", "Pure helper (document pkg)", "Formats the date label at exactly the data's precision (MONTH -> 'Juni 1916', never a fabricated day). Mirrors the frontend formatDocumentDate; both are pinned to docs/date-label-fixtures.json (#666).")
Component(titleMatcher, "DocumentTitleBackfillMatcher", "Pure helper", "Backfill-only heuristic deciding whether a STORED title is machine-generated (overwritable) vs hand-written prose. Index matched literally (no regex injection / ReDoS); fail-closed.")
Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.")
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
@@ -44,7 +46,11 @@ Rel(importOrch, docLoader, "4. Loads documents")
Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx")
Rel(personRegLoader, sheetReader, "Reads canonical .xlsx")
Rel(docLoader, sheetReader, "Reads canonical .xlsx")
Rel(docLoader, titleFmt, "Builds honest title date")
Rel(docLoader, titleFactory, "Builds the auto-title")
Rel(docSvc, titleFactory, "Regenerates auto-title (save-time + backfill)")
Rel(docSvc, titleMatcher, "Backfill overwrite test")
Rel(titleFactory, titleFmt, "Formats the honest date label")
Rel(adminCtrl, docSvc, "backfillTitles() / backfillFileHashes()")
Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref")
Rel(personRegLoader, personSvc, "Upserts persons by source_ref")
Rel(personTreeLoader, personSvc, "Upserts persons by source_ref")

View File

@@ -0,0 +1,36 @@
import { expect, test } from '@playwright/test';
/**
* Auto-title sync, full-stack happy path (#726). A document whose stored title equals its
* machine-generated auto-title must follow a date correction forward on save; a hand-edit would
* be kept. The exhaustive permutations live in the backend unit/integration suites — this is the
* single end-to-end pass, and it also asserts the FR-005 helper line is present on the edit form.
*/
test.describe('Document auto-title sync (#726)', () => {
test('editing the date rebuilds the auto-title, and the edit form explains it', async ({
page
}) => {
// 1. Create a document with no date/location, so its stored title == its auto-title
// (originalFilename only). createDocument derives originalFilename from the title.
await page.goto('/documents/new');
await page.waitForSelector('[data-hydrated]');
await page.getByLabel('Titel').fill('E2E Auto-Titel Sync');
await page.getByRole('button', { name: 'Speichern', exact: true }).click();
await expect(page).toHaveURL(/\/documents\/[^/]+$/);
const detailUrl = page.url();
// 2. The edit form carries the FR-005 helper explaining the auto-generated title.
await page.goto(`${detailUrl}/edit`);
await page.waitForSelector('[data-hydrated]');
await expect(page.locator('#title-help')).toBeVisible();
// 3. Add a YEAR-precision date WITHOUT touching the title, then save.
await page.locator('#documentDate').fill('15.01.1928');
await page.locator('#metaDatePrecision').selectOption('YEAR');
await page.getByRole('button', { name: 'Speichern', exact: true }).click();
// 4. The detail page shows the regenerated title carrying the new year.
await expect(page).toHaveURL(/\/documents\/[^/]+$/);
await expect(page.getByRole('heading', { name: /E2E Auto-Titel Sync.*1928/ })).toBeVisible();
});
});

View File

@@ -56,6 +56,7 @@
"form_label_sender": "Absender",
"form_label_receivers": "Empfänger",
"form_label_title": "Titel",
"form_helper_title_autogenerated": "Wird automatisch aus Datum und Ort gebildet — sobald du den Titel änderst, bleibt deine Version erhalten.",
"form_label_tags": "Schlagworte",
"form_label_content": "Inhalt",
"form_placeholder_content": "Kurze Beschreibung des Inhalts…",

View File

@@ -56,6 +56,7 @@
"form_label_sender": "Sender",
"form_label_receivers": "Recipients",
"form_label_title": "Title",
"form_helper_title_autogenerated": "Generated automatically from the date and place — as soon as you edit the title, your version is kept.",
"form_label_tags": "Tags",
"form_label_content": "Content",
"form_placeholder_content": "Brief description of the content…",

View File

@@ -56,6 +56,7 @@
"form_label_sender": "Remitente",
"form_label_receivers": "Destinatarios",
"form_label_title": "Título",
"form_helper_title_autogenerated": "Se genera automáticamente a partir de la fecha y el lugar; en cuanto edites el título, se conservará tu versión.",
"form_label_tags": "Etiquetas",
"form_label_content": "Contenido",
"form_placeholder_content": "Breve descripción del contenido…",

View File

@@ -17,6 +17,7 @@ let {
titleRequired = false,
suggestedTitle = '',
hideTitle = false,
showTitleHelp = false,
editMode = false
}: {
tags?: Tag[];
@@ -31,6 +32,7 @@ let {
titleRequired?: boolean;
suggestedTitle?: string;
hideTitle?: boolean;
showTitleHelp?: boolean;
editMode?: boolean;
} = $props();
@@ -72,8 +74,14 @@ const titleValue = $derived(titleDirty ? currentTitle : suggestedTitle || curren
titleDirty = true;
}}
required={titleRequired}
aria-describedby={showTitleHelp ? 'title-help' : undefined}
class="block w-full rounded border border-line p-2 text-sm shadow-sm focus:outline-none focus-visible:ring-2 focus-visible:ring-focus-ring"
/>
{#if showTitleHelp}
<p id="title-help" class="mt-1 text-sm text-ink-3">
{m.form_helper_title_autogenerated()}
</p>
{/if}
</div>
{/if}

View File

@@ -1,6 +1,7 @@
import { afterEach, describe, expect, it } from 'vitest';
import { cleanup, render } from 'vitest-browser-svelte';
import DescriptionSection from './DescriptionSection.svelte';
import { m } from '$lib/paraglide/messages.js';
afterEach(() => cleanup());
@@ -55,3 +56,28 @@ describe('DescriptionSection — onMount seeding (Felix B1/B2 fix regression fen
expect(input.value).toBe('Parent Value');
});
});
describe('DescriptionSection — auto-generated title helper (FR-TITLE-005)', () => {
it('shows the helper with the localized text and wires aria-describedby when showTitleHelp is set', async () => {
render(DescriptionSection, { showTitleHelp: true });
const help = document.querySelector('#title-help') as HTMLElement;
expect(help).not.toBeNull();
expect(help.textContent?.trim()).toBe(m.form_helper_title_autogenerated());
// ≥14px for the 60+ audience (FR-005 prefers a larger size than the 12px field hints).
expect(help.classList.contains('text-sm')).toBe(true);
const titleInput = document.querySelector('input#title') as HTMLInputElement;
expect(titleInput.getAttribute('aria-describedby')).toBe('title-help');
});
it('omits the helper by default (e.g. the new-document form)', async () => {
render(DescriptionSection, {});
expect(document.querySelector('#title-help')).toBeNull();
const titleInput = document.querySelector('input#title') as HTMLInputElement;
expect(titleInput.getAttribute('aria-describedby')).toBeNull();
});
it('omits the helper when the title field is hidden (bulk edit)', async () => {
render(DescriptionSection, { showTitleHelp: true, hideTitle: true });
expect(document.querySelector('#title-help')).toBeNull();
});
});

View File

@@ -221,6 +221,7 @@ async function handleReplaceFile(e: Event) {
initialArchiveFolder={doc.archiveFolder ?? ''}
initialSummary={doc.summary ?? ''}
titleRequired={true}
showTitleHelp={true}
/>
</form>