Import normalizer: offline tool to normalize the raw archive spreadsheets #663

Merged
marcel merged 172 commits from docs/import-migration into main 2026-05-28 15:05:51 +02:00
2 changed files with 70 additions and 6 deletions
Showing only changes of commit c816934391 - Show all commits

View File

@@ -159,7 +159,13 @@ public class DocumentImporter {
Person sender = resolveSender(row.get("sender_person_id"), senderName); Person sender = resolveSender(row.get("sender_person_id"), senderName);
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids")); Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"));
doc.setTitle(index); LocalDate date = parseIsoDate(row.get("date_iso"));
DatePrecision precision = parsePrecision(row.get("date_precision"));
LocalDate dateEnd = parseIsoDate(row.get("date_end"));
String dateRaw = blankToNull(row.get("date_raw"));
String location = blankToNull(row.get("location"));
doc.setTitle(buildTitle(index, date, precision, dateEnd, dateRaw, location));
doc.setStatus(status); doc.setStatus(status);
doc.setFilePath(s3Key); doc.setFilePath(s3Key);
doc.setContentType(contentType); doc.setContentType(contentType);
@@ -171,17 +177,31 @@ public class DocumentImporter {
doc.getReceivers().clear(); doc.getReceivers().clear();
doc.getReceivers().addAll(receivers); doc.getReceivers().addAll(receivers);
doc.setReceiverText(blankToNull(receiverNames)); doc.setReceiverText(blankToNull(receiverNames));
doc.setDocumentDate(parseIsoDate(row.get("date_iso"))); doc.setDocumentDate(date);
doc.setMetaDatePrecision(parsePrecision(row.get("date_precision"))); doc.setMetaDatePrecision(precision);
doc.setMetaDateEnd(parseIsoDate(row.get("date_end"))); doc.setMetaDateEnd(dateEnd);
doc.setMetaDateRaw(blankToNull(row.get("date_raw"))); doc.setMetaDateRaw(dateRaw);
doc.setLocation(blankToNull(row.get("location"))); doc.setLocation(location);
doc.setSummary(blankToNull(row.get("summary"))); doc.setSummary(blankToNull(row.get("summary")));
attachTag(doc, row.get("tags")); attachTag(doc, row.get("tags"));
doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty()); doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty());
return doc; return doc;
} }
// The title carries the date at the HONEST precision (never a fabricated day) via the
// shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating.
private static String buildTitle(String index, LocalDate date, DatePrecision precision,
LocalDate end, String raw, String location) {
StringBuilder title = new StringBuilder(index);
if (date != null && precision != DatePrecision.UNKNOWN) {
title.append(" ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw));
}
if (location != null && !location.isBlank()) {
title.append(" ").append(location);
}
return title.toString();
}
// ─── attribution routing — register-first, always retain raw ───────────────────── // ─── attribution routing — register-first, always retain raw ─────────────────────
private Person resolveSender(String slug, String rawName) { private Person resolveSender(String slug, String rawName) {

View File

@@ -404,6 +404,50 @@ class DocumentImporterTest {
d.getReceivers().isEmpty() && d.getTags().isEmpty())); d.getReceivers().isEmpty() && d.getTags().isEmpty()));
} }
// ─── title carries the honest date label — never a precision the data lacks ───────
@Test
void load_buildsTitleWithMonthLabel_whenPrecisionIsMonth(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0100")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0100", "", "", "", "", "",
"1916-06-01", "Juni 1916", "MONTH", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getTitle().contains("Juni 1916") && !d.getTitle().contains("1. Juni")));
}
@Test
void load_buildsTitleWithFullDate_whenPrecisionIsDay(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0101")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0101", "", "", "", "", "",
"1943-12-24", "24.12.1943", "DAY", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getTitle().contains("24. Dezember 1943")));
}
@Test
void load_buildsTitleFromIndexOnly_whenDateUnknown(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0102")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0102", "", "", "", "", "",
"", "?", "UNKNOWN", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getTitle().equals("W-0102")));
}
// ─── helpers ───────────────────────────────────────────────────────────────────── // ─── helpers ─────────────────────────────────────────────────────────────────────
private Map<String, String> docRow(String index, String file, String senderId, String senderName, private Map<String, String> docRow(String index, String file, String senderId, String senderName,