diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java
index 9db6e5d2..44b0a0d9 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java
@@ -1058,6 +1058,43 @@ public class DocumentService {
tagService.delete(tagId);
}
+ /**
+ * One-time cleanup of already-stale auto-titles (#726, FR-003). For every document whose
+ * stored title passes the {@link DocumentTitleBackfillMatcher} overwrite heuristic, rebuilds
+ * the title from the row's current state and persists it only when it actually changed.
+ * Idempotent: a second run rebuilds the same value and saves nothing. Hand-written prose is
+ * left untouched.
+ *
+ *
Saves via {@code documentRepository.save} directly — it must NOT route through
+ * {@link #updateDocument} (which versions every write), following the {@link #backfillFileHashes}
+ * precedent: a mechanical rename must not snapshot the whole corpus into {@code document_versions}.
+ *
+ * @return the number of documents whose title was rewritten
+ */
+ @Transactional
+ public int backfillTitles() {
+ List docs = documentRepository.findAll();
+ int updated = 0;
+ int skipped = 0;
+ for (Document doc : docs) {
+ if (!DocumentTitleBackfillMatcher.isOverwritable(
+ doc.getTitle(), doc.getOriginalFilename(), doc.getLocation())) {
+ skipped++;
+ continue;
+ }
+ String rebuilt = documentTitleFactory.build(doc);
+ if (rebuilt.equals(doc.getTitle())) {
+ skipped++; // already correct — keep idempotent, no write
+ continue;
+ }
+ doc.setTitle(rebuilt);
+ documentRepository.save(doc); // direct save, no recordVersion (mechanical rename)
+ updated++;
+ }
+ log.info("Title backfill complete: scanned={} updated={} skipped={}", docs.size(), updated, skipped);
+ return updated;
+ }
+
@Transactional
public int backfillFileHashes() {
List docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java
new file mode 100644
index 00000000..ee6efc2c
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcher.java
@@ -0,0 +1,101 @@
+package org.raddatz.familienarchiv.document;
+
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.LinkedHashSet;
+import java.util.Locale;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Heuristic overwrite test for the one-time title backfill (#726, FR-004): decides whether a
+ * STORED title is a machine-generated auto-title (and so may be rebuilt from the row's current
+ * state) versus hand-written prose (left untouched). Used ONLY by the backfill — save-time
+ * regeneration uses an exact old-vs-new comparison instead, with no heuristic.
+ *
+ * A stored title is overwritable iff, after stripping the literal {@code index} prefix:
+ *
+ * - it is exactly {@code {index}}, or
+ * - {@code {index} – {dateLabel}} with an optional trailing {@code – {location}} segment
+ * (any location — a present, valid date label is itself strong evidence of a machine
+ * title), or
+ * - {@code {index} – {location}} where the segment equals the document's current location
+ * (no date label, so the segment must match the known location to be distinguished from
+ * prose).
+ *
+ *
+ * Security: the {@code index} is compared literally via {@link String#startsWith}
+ * (never compiled into a regex) because {@code originalFilename} is user-controlled and may carry
+ * regex metacharacters — an unquoted pattern would be a ReDoS / regex-injection vector
+ * (CWE-1333 / CWE-625). The date-label sub-patterns use only bounded, non-nested quantifiers over
+ * short tokens, so there is no catastrophic backtracking. Fail-closed: any null/blank index or
+ * structural surprise returns {@code false}.
+ */
+final class DocumentTitleBackfillMatcher {
+
+ private static final String SEPARATOR = " – ";
+
+ // German month tokens derived from the SAME Locale.GERMAN formatters DocumentTitleFormatter
+ // uses, so the matcher's accepted spellings cannot drift from what the factory emits (full
+ // names "Januar"…"Dezember"; abbreviations "Jan."…"Dez." — note May/June/July/März carry no
+ // period). Pattern.quote each so a "." in an abbreviation is literal, never a wildcard.
+ private static final String FULL_MONTH = monthAlternation("MMMM");
+ private static final String ABBR_MONTH = monthAlternation("MMM");
+ private static final String SEASON = "(?:Frühling|Sommer|Herbst|Winter)";
+ private static final String YEAR = "\\d{1,4}";
+ private static final String DAY_NUM = "\\d{1,2}";
+
+ // One complete date label, anchored, optionally followed by a free-form trailing location
+ // segment. Only bounded/non-nested quantifiers over short tokens plus a single trailing
+ // ".+" → linear, no catastrophic backtracking (FR-004 ReDoS guard).
+ private static final Pattern DATE_LABEL_WITH_OPTIONAL_LOCATION = Pattern.compile(
+ "^(?:" + String.join("|",
+ YEAR, // 1916
+ "ca\\. " + YEAR, // ca. 1920
+ FULL_MONTH + " " + YEAR, // Juni 1916
+ DAY_NUM + "\\. " + FULL_MONTH + " " + YEAR, // 24. Dezember 1943
+ SEASON + " " + YEAR, // Sommer 1916
+ "Datum unbekannt",
+ DAY_NUM + "\\.–" + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10.–11. Jan. 1917
+ DAY_NUM + "\\. " + ABBR_MONTH + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Jan. – 2. Feb. 1917
+ DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR + " – " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 30. Dez. 1916 – 2. Jan. 1917
+ DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR, // 10. Jan. 1917 (range end == start)
+ "ab " + DAY_NUM + "\\. " + ABBR_MONTH + " " + YEAR) // ab 10. Jan. 1917
+ + ")(?: – .+)?$");
+
+ private DocumentTitleBackfillMatcher() {
+ }
+
+ static boolean isOverwritable(String title, String index, String location) {
+ if (title == null || index == null || index.isBlank()) {
+ return false; // fail closed
+ }
+ if (!title.startsWith(index)) {
+ return false; // index is matched LITERALLY, never as a regex
+ }
+ String tail = title.substring(index.length());
+ if (tail.isEmpty()) {
+ return true; // exactly {index}
+ }
+ if (!tail.startsWith(SEPARATOR)) {
+ return false;
+ }
+ String body = tail.substring(SEPARATOR.length());
+ if (DATE_LABEL_WITH_OPTIONAL_LOCATION.matcher(body).matches()) {
+ return true; // {dateLabel} (+ optional trailing location)
+ }
+ // No date label: the lone segment must equal the document's current location to be
+ // distinguished from hand-written prose.
+ return location != null && !location.isBlank() && body.equals(location);
+ }
+
+ private static String monthAlternation(String pattern) {
+ DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern, Locale.GERMAN);
+ Set tokens = new LinkedHashSet<>();
+ for (int month = 1; month <= 12; month++) {
+ tokens.add(formatter.format(LocalDate.of(2000, month, 15)));
+ }
+ return tokens.stream().map(Pattern::quote).collect(Collectors.joining("|", "(?:", ")"));
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java b/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java
index 74b5d643..1c5f66b0 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java
@@ -51,6 +51,12 @@ public class AdminController {
return ResponseEntity.ok(new BackfillResult(count));
}
+ @PostMapping("/backfill-titles")
+ public ResponseEntity backfillTitles() {
+ int count = documentService.backfillTitles();
+ return ResponseEntity.ok(new BackfillResult(count));
+ }
+
@PostMapping("/generate-thumbnails")
public ResponseEntity generateThumbnails() {
thumbnailBackfillService.runBackfillAsync();
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java
index 76c95dca..fced773a 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java
@@ -658,6 +658,59 @@ class DocumentServiceTest {
verify(documentVersionService).recordVersion(any(Document.class));
}
+ // ─── backfillTitles — one-time stale-title cleanup (#726, FR-003) ─────────
+
+ @Test
+ void backfillTitles_rewritesStaleAutoTitle_andCountsIt() {
+ Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
+ stale.setTitle("C-0029 – 2028 – Berlin"); // stale stored title (date typo never fixed)
+ when(documentRepository.findAll()).thenReturn(List.of(stale));
+ when(documentRepository.save(any())).thenReturn(stale);
+
+ int count = documentService.backfillTitles();
+
+ assertThat(count).isEqualTo(1);
+ assertThat(stale.getTitle()).isEqualTo("C-0029 – 1928 – Berlin");
+ verify(documentRepository).save(stale);
+ }
+
+ @Test
+ void backfillTitles_skipsProse() {
+ Document prose = makeStored("C-0030", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
+ prose.setTitle("C-0030 – Brief an Mutter");
+ when(documentRepository.findAll()).thenReturn(List.of(prose));
+
+ int count = documentService.backfillTitles();
+
+ assertThat(count).isZero();
+ assertThat(prose.getTitle()).isEqualTo("C-0030 – Brief an Mutter");
+ verify(documentRepository, never()).save(any());
+ }
+
+ @Test
+ void backfillTitles_isIdempotent_forAlreadyCorrectTitle() {
+ Document fresh = makeStored("C-0031", LocalDate.of(1940, 1, 1), DatePrecision.YEAR, null);
+ // title already equals build(current state) → nothing to do
+ when(documentRepository.findAll()).thenReturn(List.of(fresh));
+
+ int count = documentService.backfillTitles();
+
+ assertThat(count).isZero();
+ verify(documentRepository, never()).save(any());
+ }
+
+ @Test
+ void backfillTitles_neverRecordsVersions() {
+ Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
+ stale.setTitle("C-0029 – 2028 – Berlin");
+ when(documentRepository.findAll()).thenReturn(List.of(stale));
+ when(documentRepository.save(any())).thenReturn(stale);
+
+ documentService.backfillTitles();
+
+ verify(documentVersionService, never()).recordVersion(any());
+ }
+
// ─── thumbnail dispatch ───────────────────────────────────────────────────
@Test
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java
new file mode 100644
index 00000000..be5e02be
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentTitleBackfillMatcherTest.java
@@ -0,0 +1,151 @@
+package org.raddatz.familienarchiv.document;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+import java.util.concurrent.TimeUnit;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * The backfill overwrite heuristic (FR-004) in isolation — every emittable date-label form is
+ * recognised, prose is left alone, and a regex-metacharacter index is matched literally without
+ * hanging. The exact label spellings mirror {@code docs/date-label-fixtures.json}.
+ */
+class DocumentTitleBackfillMatcherTest {
+
+ private static boolean overwritable(String title, String location) {
+ return DocumentTitleBackfillMatcher.isOverwritable(title, "C-0029", location);
+ }
+
+ // ─── each date-label form (index + form) is overwritable ──────────────────
+
+ @Test
+ void year_form() {
+ assertThat(overwritable("C-0029 – 1916", null)).isTrue();
+ }
+
+ @Test
+ void approx_form() {
+ assertThat(overwritable("C-0029 – ca. 1920", null)).isTrue();
+ }
+
+ @Test
+ void month_form() {
+ assertThat(overwritable("C-0029 – Juni 1916", null)).isTrue();
+ }
+
+ @Test
+ void day_form() {
+ assertThat(overwritable("C-0029 – 24. Dezember 1943", null)).isTrue();
+ }
+
+ @Test
+ void season_form() {
+ assertThat(overwritable("C-0029 – Sommer 1916", null)).isTrue();
+ }
+
+ @Test
+ void unknown_label_form() {
+ assertThat(overwritable("C-0029 – Datum unbekannt", null)).isTrue();
+ }
+
+ @Test
+ void range_same_month_form() {
+ assertThat(overwritable("C-0029 – 10.–11. Jan. 1917", null)).isTrue();
+ }
+
+ @Test
+ void range_cross_month_form() {
+ assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917", null)).isTrue();
+ }
+
+ @Test
+ void range_cross_year_form() {
+ assertThat(overwritable("C-0029 – 30. Dez. 1916 – 2. Jan. 1917", null)).isTrue();
+ }
+
+ @Test
+ void range_single_day_form() {
+ assertThat(overwritable("C-0029 – 10. Jan. 1917", null)).isTrue();
+ }
+
+ @Test
+ void range_open_form() {
+ assertThat(overwritable("C-0029 – ab 10. Jan. 1917", null)).isTrue();
+ }
+
+ // ─── date label + trailing location (any location) ────────────────────────
+
+ @Test
+ void date_form_with_trailing_location() {
+ assertThat(overwritable("C-0029 – 1916 – Berlin", null)).isTrue();
+ }
+
+ @Test
+ void range_with_internal_separator_plus_trailing_location() {
+ // The range label itself contains " – "; the trailing " – Berlin" must still be peeled.
+ assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917 – Berlin", null)).isTrue();
+ }
+
+ // ─── index-only and index+location cases ──────────────────────────────────
+
+ @Test
+ void exactly_index() {
+ assertThat(overwritable("C-0029", null)).isTrue();
+ }
+
+ @Test
+ void index_plus_location_equal_to_current() {
+ assertThat(overwritable("C-0029 – Berlin", "Berlin")).isTrue();
+ }
+
+ // ─── prose is left untouched ──────────────────────────────────────────────
+
+ @Test
+ void prose_segment_not_matching_location_is_skipped() {
+ assertThat(overwritable("C-0029 – Brief an Mutter", "Berlin")).isFalse();
+ }
+
+ @Test
+ void location_only_segment_is_skipped_when_no_current_location() {
+ // No date label, and the doc has no location to compare against → cannot prove machine.
+ assertThat(overwritable("C-0029 – Berlin", null)).isFalse();
+ }
+
+ @Test
+ void title_not_starting_with_index_is_skipped() {
+ assertThat(overwritable("Ganz anderer Titel", null)).isFalse();
+ }
+
+ // ─── fail-closed guards ───────────────────────────────────────────────────
+
+ @Test
+ void null_title_is_not_overwritable() {
+ assertThat(DocumentTitleBackfillMatcher.isOverwritable(null, "C-0029", null)).isFalse();
+ }
+
+ @Test
+ void null_index_is_not_overwritable() {
+ assertThat(DocumentTitleBackfillMatcher.isOverwritable("C-0029 – 1916", null, null)).isFalse();
+ }
+
+ @Test
+ void blank_index_is_not_overwritable() {
+ assertThat(DocumentTitleBackfillMatcher.isOverwritable(" – 1916", " ", null)).isFalse();
+ }
+
+ // ─── ReDoS / regex-metacharacter index is matched literally and terminates ─
+
+ @Test
+ @Timeout(value = 5, unit = TimeUnit.SECONDS)
+ void index_with_regex_metacharacters_is_matched_literally_and_terminates() {
+ String hostileIndex = "C-0029(.*).pdf";
+ // Literal prefix → matches; trailing date label → overwritable. Must not hang.
+ assertThat(DocumentTitleBackfillMatcher.isOverwritable(
+ hostileIndex + " – 1916", hostileIndex, null)).isTrue();
+ // A title that does NOT start with the literal hostile index is skipped, also fast.
+ assertThat(DocumentTitleBackfillMatcher.isOverwritable(
+ "C-0029 – 1916", hostileIndex, null)).isFalse();
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java
index 8e51fad7..6429d3dc 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java
@@ -132,6 +132,31 @@ class AdminControllerTest {
.andExpect(jsonPath("$.count").value(3));
}
+ // ─── POST /api/admin/backfill-titles (#726) ────────────────────────────────
+
+ @Test
+ void backfillTitles_returns401_whenUnauthenticated() throws Exception {
+ mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
+ .andExpect(status().isUnauthorized());
+ }
+
+ @Test
+ @WithMockUser(roles = "USER")
+ void backfillTitles_returns403_whenNotAdmin() throws Exception {
+ mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
+ .andExpect(status().isForbidden());
+ }
+
+ @Test
+ @WithMockUser(authorities = "ADMIN")
+ void backfillTitles_returns200_withCount_whenAdmin() throws Exception {
+ when(documentService.backfillTitles()).thenReturn(7);
+
+ mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("$.count").value(7));
+ }
+
// ─── POST /api/admin/generate-thumbnails ───────────────────────────────────
@Test