feat(document): one-time backfill endpoint for stale auto-titles (#726)
Adds POST /api/admin/backfill-titles (ADMIN-only, synchronous) which rebuilds every machine-generated title from the row's current state. A grammar heuristic (DocumentTitleBackfillMatcher) decides overwritability: index matched literally via startsWith (originalFilename is user-controlled — no regex injection / ReDoS, CWE-1333), date-label forms derived from the same Locale.GERMAN formatters as the factory so they cannot drift, prose left untouched, fail-closed on any surprise. Saves via the repository directly (no recordVersion — follows backfillFileHashes), so the mechanical rename never version-spams document_versions. Idempotent: a second run rewrites nothing. Emits one SLF4J-parameterized scanned/updated/skipped line. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -658,6 +658,59 @@ class DocumentServiceTest {
|
||||
verify(documentVersionService).recordVersion(any(Document.class));
|
||||
}
|
||||
|
||||
// ─── backfillTitles — one-time stale-title cleanup (#726, FR-003) ─────────
|
||||
|
||||
@Test
|
||||
void backfillTitles_rewritesStaleAutoTitle_andCountsIt() {
|
||||
Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
stale.setTitle("C-0029 – 2028 – Berlin"); // stale stored title (date typo never fixed)
|
||||
when(documentRepository.findAll()).thenReturn(List.of(stale));
|
||||
when(documentRepository.save(any())).thenReturn(stale);
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isEqualTo(1);
|
||||
assertThat(stale.getTitle()).isEqualTo("C-0029 – 1928 – Berlin");
|
||||
verify(documentRepository).save(stale);
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillTitles_skipsProse() {
|
||||
Document prose = makeStored("C-0030", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, null);
|
||||
prose.setTitle("C-0030 – Brief an Mutter");
|
||||
when(documentRepository.findAll()).thenReturn(List.of(prose));
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isZero();
|
||||
assertThat(prose.getTitle()).isEqualTo("C-0030 – Brief an Mutter");
|
||||
verify(documentRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillTitles_isIdempotent_forAlreadyCorrectTitle() {
|
||||
Document fresh = makeStored("C-0031", LocalDate.of(1940, 1, 1), DatePrecision.YEAR, null);
|
||||
// title already equals build(current state) → nothing to do
|
||||
when(documentRepository.findAll()).thenReturn(List.of(fresh));
|
||||
|
||||
int count = documentService.backfillTitles();
|
||||
|
||||
assertThat(count).isZero();
|
||||
verify(documentRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillTitles_neverRecordsVersions() {
|
||||
Document stale = makeStored("C-0029", LocalDate.of(1928, 1, 1), DatePrecision.YEAR, "Berlin");
|
||||
stale.setTitle("C-0029 – 2028 – Berlin");
|
||||
when(documentRepository.findAll()).thenReturn(List.of(stale));
|
||||
when(documentRepository.save(any())).thenReturn(stale);
|
||||
|
||||
documentService.backfillTitles();
|
||||
|
||||
verify(documentVersionService, never()).recordVersion(any());
|
||||
}
|
||||
|
||||
// ─── thumbnail dispatch ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.Timeout;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* The backfill overwrite heuristic (FR-004) in isolation — every emittable date-label form is
|
||||
* recognised, prose is left alone, and a regex-metacharacter index is matched literally without
|
||||
* hanging. The exact label spellings mirror {@code docs/date-label-fixtures.json}.
|
||||
*/
|
||||
class DocumentTitleBackfillMatcherTest {
|
||||
|
||||
private static boolean overwritable(String title, String location) {
|
||||
return DocumentTitleBackfillMatcher.isOverwritable(title, "C-0029", location);
|
||||
}
|
||||
|
||||
// ─── each date-label form (index + form) is overwritable ──────────────────
|
||||
|
||||
@Test
|
||||
void year_form() {
|
||||
assertThat(overwritable("C-0029 – 1916", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void approx_form() {
|
||||
assertThat(overwritable("C-0029 – ca. 1920", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void month_form() {
|
||||
assertThat(overwritable("C-0029 – Juni 1916", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void day_form() {
|
||||
assertThat(overwritable("C-0029 – 24. Dezember 1943", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void season_form() {
|
||||
assertThat(overwritable("C-0029 – Sommer 1916", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void unknown_label_form() {
|
||||
assertThat(overwritable("C-0029 – Datum unbekannt", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_same_month_form() {
|
||||
assertThat(overwritable("C-0029 – 10.–11. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_cross_month_form() {
|
||||
assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_cross_year_form() {
|
||||
assertThat(overwritable("C-0029 – 30. Dez. 1916 – 2. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_single_day_form() {
|
||||
assertThat(overwritable("C-0029 – 10. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_open_form() {
|
||||
assertThat(overwritable("C-0029 – ab 10. Jan. 1917", null)).isTrue();
|
||||
}
|
||||
|
||||
// ─── date label + trailing location (any location) ────────────────────────
|
||||
|
||||
@Test
|
||||
void date_form_with_trailing_location() {
|
||||
assertThat(overwritable("C-0029 – 1916 – Berlin", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void range_with_internal_separator_plus_trailing_location() {
|
||||
// The range label itself contains " – "; the trailing " – Berlin" must still be peeled.
|
||||
assertThat(overwritable("C-0029 – 30. Jan. – 2. Feb. 1917 – Berlin", null)).isTrue();
|
||||
}
|
||||
|
||||
// ─── index-only and index+location cases ──────────────────────────────────
|
||||
|
||||
@Test
|
||||
void exactly_index() {
|
||||
assertThat(overwritable("C-0029", null)).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void index_plus_location_equal_to_current() {
|
||||
assertThat(overwritable("C-0029 – Berlin", "Berlin")).isTrue();
|
||||
}
|
||||
|
||||
// ─── prose is left untouched ──────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void prose_segment_not_matching_location_is_skipped() {
|
||||
assertThat(overwritable("C-0029 – Brief an Mutter", "Berlin")).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void location_only_segment_is_skipped_when_no_current_location() {
|
||||
// No date label, and the doc has no location to compare against → cannot prove machine.
|
||||
assertThat(overwritable("C-0029 – Berlin", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void title_not_starting_with_index_is_skipped() {
|
||||
assertThat(overwritable("Ganz anderer Titel", null)).isFalse();
|
||||
}
|
||||
|
||||
// ─── fail-closed guards ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void null_title_is_not_overwritable() {
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(null, "C-0029", null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void null_index_is_not_overwritable() {
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable("C-0029 – 1916", null, null)).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void blank_index_is_not_overwritable() {
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(" – 1916", " ", null)).isFalse();
|
||||
}
|
||||
|
||||
// ─── ReDoS / regex-metacharacter index is matched literally and terminates ─
|
||||
|
||||
@Test
|
||||
@Timeout(value = 5, unit = TimeUnit.SECONDS)
|
||||
void index_with_regex_metacharacters_is_matched_literally_and_terminates() {
|
||||
String hostileIndex = "C-0029(.*).pdf";
|
||||
// Literal prefix → matches; trailing date label → overwritable. Must not hang.
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(
|
||||
hostileIndex + " – 1916", hostileIndex, null)).isTrue();
|
||||
// A title that does NOT start with the literal hostile index is skipped, also fast.
|
||||
assertThat(DocumentTitleBackfillMatcher.isOverwritable(
|
||||
"C-0029 – 1916", hostileIndex, null)).isFalse();
|
||||
}
|
||||
}
|
||||
@@ -132,6 +132,31 @@ class AdminControllerTest {
|
||||
.andExpect(jsonPath("$.count").value(3));
|
||||
}
|
||||
|
||||
// ─── POST /api/admin/backfill-titles (#726) ────────────────────────────────
|
||||
|
||||
@Test
|
||||
void backfillTitles_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(roles = "USER")
|
||||
void backfillTitles_returns403_whenNotAdmin() throws Exception {
|
||||
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void backfillTitles_returns200_withCount_whenAdmin() throws Exception {
|
||||
when(documentService.backfillTitles()).thenReturn(7);
|
||||
|
||||
mockMvc.perform(post("/api/admin/backfill-titles").with(csrf()))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.count").value(7));
|
||||
}
|
||||
|
||||
// ─── POST /api/admin/generate-thumbnails ───────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user