Import normalizer: offline tool to normalize the raw archive spreadsheets #663

Merged
marcel merged 172 commits from docs/import-migration into main 2026-05-28 15:05:51 +02:00
3 changed files with 58 additions and 2 deletions
Showing only changes of commit b52bf60913 - Show all commits

View File

@@ -839,11 +839,12 @@ public class DocumentService {
// Undated documents (null documentDate) must order last regardless of // Undated documents (null documentDate) must order last regardless of
// direction — Postgres puts NULLs FIRST on ASC by default, which would // direction — Postgres puts NULLs FIRST on ASC by default, which would
// surface the undated pile at the top with no explanation (issue #668). // surface the undated pile at the top with no explanation (issue #668).
// The createdAt tiebreaker gives a stable total order when every row is // The title tiebreaker gives a stable total order when every row is
// null-dated (the "Nur undatierte" filter), so pagination is deterministic. // null-dated (the "Nur undatierte" filter), so pagination is deterministic.
// title is @Column(nullable=false), so it is always present.
return Sort.by( return Sort.by(
new Sort.Order(direction, "documentDate").nullsLast(), new Sort.Order(direction, "documentDate").nullsLast(),
Sort.Order.asc("createdAt")); Sort.Order.asc("title"));
} }
// SENDER and RECEIVER are sorted in-memory before this method is called // SENDER and RECEIVER are sorted in-memory before this method is called
return switch (sort) { return switch (sort) {

View File

@@ -1463,6 +1463,11 @@ class DocumentServiceTest {
assertThat(dateOrder).isNotNull(); assertThat(dateOrder).isNotNull();
assertThat(dateOrder.getDirection()).isEqualTo(Sort.Direction.DESC); assertThat(dateOrder.getDirection()).isEqualTo(Sort.Direction.DESC);
assertThat(dateOrder.getNullHandling()).isEqualTo(Sort.NullHandling.NULLS_LAST); assertThat(dateOrder.getNullHandling()).isEqualTo(Sort.NullHandling.NULLS_LAST);
// Owner-decided tiebreaker (#668): title ASC, not createdAt.
Sort.Order tiebreak = captor.getValue().getSort().getOrderFor("title");
assertThat(tiebreak).isNotNull();
assertThat(tiebreak.getDirection()).isEqualTo(Sort.Direction.ASC);
assertThat(captor.getValue().getSort().getOrderFor("createdAt")).isNull();
} }
@Test @Test
@@ -1481,6 +1486,11 @@ class DocumentServiceTest {
assertThat(dateOrder).isNotNull(); assertThat(dateOrder).isNotNull();
assertThat(dateOrder.getDirection()).isEqualTo(Sort.Direction.ASC); assertThat(dateOrder.getDirection()).isEqualTo(Sort.Direction.ASC);
assertThat(dateOrder.getNullHandling()).isEqualTo(Sort.NullHandling.NULLS_LAST); assertThat(dateOrder.getNullHandling()).isEqualTo(Sort.NullHandling.NULLS_LAST);
// Owner-decided tiebreaker (#668): title ASC, not createdAt.
Sort.Order tiebreak = captor.getValue().getSort().getOrderFor("title");
assertThat(tiebreak).isNotNull();
assertThat(tiebreak.getDirection()).isEqualTo(Sort.Direction.ASC);
assertThat(captor.getValue().getSort().getOrderFor("createdAt")).isNull();
} }
@Test @Test

View File

@@ -63,6 +63,51 @@ class UndatedDocumentOrderingIntegrationTest {
assertThat(result.get(3).getDocumentDate()).isNull(); assertThat(result.get(3).getDocumentDate()).isNull();
} }
@Test
void sameDate_tiebreaksByTitleAsc_notCreatedAt_forBothDirections() throws Exception {
// Owner decision (#668): equal-date rows tie-break by title ASC, NOT
// createdAt. Insert two same-date docs so that createdAt order (insertion
// order) is the OPPOSITE of title order: the first-saved doc gets the later
// title ("zzz-first"), the second-saved doc gets the earlier title
// ("aaa-second"). If the tiebreaker were still createdAt-asc the first-saved
// row would lead; because it is title-asc the "aaa-second" row must lead —
// and it must lead in BOTH ASC and DESC date directions, since the date is
// equal so only the title tiebreaker decides.
//
// The Sort under test is built by the PRODUCTION resolveSort(DATE, dir) (via
// reflection — it is private), not hand-rolled here, so this test proves the
// real Postgres ordering that production emits, on real same-date rows.
documentRepository.deleteAll();
LocalDate sameDate = LocalDate.of(1920, 3, 3);
save("zzz-first", sameDate); // saved first → earlier createdAt
save("aaa-second", sameDate); // saved second → later createdAt
List<Document> asc = documentRepository.findAll(resolveProductionSort("ASC"));
assertThat(asc).extracting(Document::getTitle)
.containsExactly("aaa-second", "zzz-first");
List<Document> desc = documentRepository.findAll(resolveProductionSort("DESC"));
assertThat(desc).extracting(Document::getTitle)
.containsExactly("aaa-second", "zzz-first");
}
/**
* Invokes the production {@link DocumentService#resolveSort(DocumentSort, String)}
* for the DATE sort so the integration assertions exercise the real tiebreaker
* choice rather than a sort hand-built in the test.
*/
private Sort resolveProductionSort(String dir) throws Exception {
// resolveSort is a pure function of its arguments (uses no instance state), so a
// bean instance with null collaborators is sufficient to exercise it.
var ctor = DocumentService.class.getDeclaredConstructors()[0];
ctor.setAccessible(true);
Object[] args = new Object[ctor.getParameterCount()];
DocumentService service = (DocumentService) ctor.newInstance(args);
var m = DocumentService.class.getDeclaredMethod("resolveSort", DocumentSort.class, String.class);
m.setAccessible(true);
return (Sort) m.invoke(service, DocumentSort.DATE, dir);
}
@Test @Test
void undatedOnly_returnsExactlyTheNullDatedRows() { void undatedOnly_returnsExactlyTheNullDatedRows() {
List<Document> result = documentRepository.findAll(undatedOnly(true)); List<Document> result = documentRepository.findAll(undatedOnly(true));