Import normalizer: offline tool to normalize the raw archive spreadsheets #663
@@ -15,24 +15,45 @@ public record DocumentSearchResult(
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int pageSize,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
int totalPages
|
||||
int totalPages,
|
||||
/**
|
||||
* Total number of undated documents (meta_date IS NULL) matching the current
|
||||
* filter context (q/tags/sender/receiver/status) across ALL pages — not the
|
||||
* undated rows on the current page. Computed independently of the "Nur
|
||||
* undatierte" toggle so it never collapses to the page slice (issue #668).
|
||||
*/
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
long undatedCount
|
||||
) {
|
||||
/**
|
||||
* Single-page convenience factory used by empty-result shortcuts and by tests that
|
||||
* don't care about paging. Treats the whole list as page 0 of itself.
|
||||
* don't care about paging. Treats the whole list as page 0 of itself. The undated
|
||||
* count defaults to 0 — the service overlays the real global count via
|
||||
* {@link #withUndatedCount(long)} before returning.
|
||||
*/
|
||||
public static DocumentSearchResult of(List<DocumentListItem> items) {
|
||||
int size = items.size();
|
||||
return new DocumentSearchResult(items, size, 0, size, size == 0 ? 0 : 1);
|
||||
return new DocumentSearchResult(items, size, 0, size, size == 0 ? 0 : 1, 0L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Paged factory used by the service when it has a real Pageable + full match count
|
||||
* (e.g. from Spring's Page<T> or from an in-memory sort-then-slice).
|
||||
* (e.g. from Spring's Page<T> or from an in-memory sort-then-slice). The undated
|
||||
* count defaults to 0 — the service overlays the real global count via
|
||||
* {@link #withUndatedCount(long)} before returning.
|
||||
*/
|
||||
public static DocumentSearchResult paged(List<DocumentListItem> slice, Pageable pageable, long totalElements) {
|
||||
int pageSize = pageable.getPageSize();
|
||||
int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize);
|
||||
return new DocumentSearchResult(slice, totalElements, pageable.getPageNumber(), pageSize, totalPages);
|
||||
return new DocumentSearchResult(slice, totalElements, pageable.getPageNumber(), pageSize, totalPages, 0L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy with the global undated count overlaid, leaving every other
|
||||
* field untouched. Lets the service compute the count once and attach it to
|
||||
* whichever result shape the search path produced.
|
||||
*/
|
||||
public DocumentSearchResult withUndatedCount(long undatedCount) {
|
||||
return new DocumentSearchResult(items, totalElements, pageNumber, pageSize, totalPages, undatedCount);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -669,6 +669,43 @@ public class DocumentService {
|
||||
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, boolean undated, Pageable pageable) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
// FTS matched nothing → no results and, by definition, no undated matches either.
|
||||
if (rankedIds.isEmpty()) return DocumentSearchResult.of(List.of());
|
||||
}
|
||||
|
||||
// Global undated count for the current filter (q/tags/sender/receiver/status),
|
||||
// forcing undatedOnly(true) and IGNORING the user's "Nur undatierte" toggle so
|
||||
// it never collapses to the page slice and never double-counts (issue #668).
|
||||
long undatedCount = countUndatedForFilter(hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator);
|
||||
|
||||
return runSearch(text, hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, sort, dir, tagOperator, undated, pageable)
|
||||
.withUndatedCount(undatedCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts every undated document (meta_date IS NULL) matching the active filter,
|
||||
* across all pages, independent of the undated toggle. Reuses {@link #buildSearchSpec}
|
||||
* with {@code undated=true} forced so the count tracks q/tags/sender/receiver/status.
|
||||
* A {@code from}/{@code to} range excludes undated rows by the collision rule (#668),
|
||||
* so the count is legitimately 0 inside a date range.
|
||||
*/
|
||||
private long countUndatedForFilter(boolean hasText, List<UUID> ftsIds,
|
||||
LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status, TagOperator tagOperator) {
|
||||
Specification<Document> undatedSpec = buildSearchSpec(
|
||||
hasText, ftsIds, from, to, sender, receiver, tags, tagQ, status, tagOperator, true);
|
||||
return documentRepository.count(undatedSpec);
|
||||
}
|
||||
|
||||
/** The original search dispatch — produces the page slice + totals, sans undated count. */
|
||||
private DocumentSearchResult runSearch(String text, boolean hasText, List<UUID> rankedIds,
|
||||
LocalDate from, LocalDate to, UUID sender, UUID receiver,
|
||||
List<String> tags, String tagQ, DocumentStatus status,
|
||||
DocumentSort sort, String dir, TagOperator tagOperator,
|
||||
boolean undated, Pageable pageable) {
|
||||
// Pure-text RELEVANCE: push pagination into SQL — skip findAllMatchingIdsByFts entirely (ADR-008).
|
||||
// An active undated filter must NOT take this path: it bypasses buildSearchSpec, so the
|
||||
// undatedOnly predicate would be silently dropped.
|
||||
@@ -676,12 +713,6 @@ public class DocumentService {
|
||||
return relevanceSortedPageFromSql(text, pageable);
|
||||
}
|
||||
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
if (rankedIds.isEmpty()) return DocumentSearchResult.of(List.of());
|
||||
}
|
||||
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator, undated);
|
||||
|
||||
|
||||
@@ -108,6 +108,83 @@ class DocumentSearchPagedIntegrationTest {
|
||||
assertThat(result.totalPages()).isEqualTo(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_undatedCount_isGlobalFilteredTotal_notPageSlice() {
|
||||
// Seed 70 undated docs on top of the 120 dated ones. With a 50-per-page
|
||||
// window the undated rows span multiple pages, so a page-local count could
|
||||
// never exceed 50 — the global count must be the full 70 (issue #668).
|
||||
int undatedTotal = 70;
|
||||
for (int i = 0; i < undatedTotal; i++) {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Undatiert-" + String.format("%03d", i))
|
||||
.originalFilename("undatiert-" + i + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.metaDatePrecision(DatePrecision.UNKNOWN)
|
||||
.documentDate(null)
|
||||
.build());
|
||||
}
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
|
||||
// Global undated count is the full undated total, independent of page size.
|
||||
assertThat(result.undatedCount()).isEqualTo(undatedTotal);
|
||||
// Total matches both dated + undated (no undated-only filter applied).
|
||||
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE + undatedTotal);
|
||||
// The first DATE-DESC page is all dated rows (nulls last), so a page-local
|
||||
// tally would report 0 undated — proving the count is not page-derived.
|
||||
assertThat(result.items()).allMatch(item -> item.documentDate() != null);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_undatedCount_ignoresUndatedOnlyToggle() {
|
||||
// The "Nur undatierte" toggle must not skew the count: whether undated=true or
|
||||
// false, the global undated count for the same filter is identical (issue #668).
|
||||
int undatedTotal = 12;
|
||||
for (int i = 0; i < undatedTotal; i++) {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("U-" + i)
|
||||
.originalFilename("u-" + i + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.metaDatePrecision(DatePrecision.UNKNOWN)
|
||||
.documentDate(null)
|
||||
.build());
|
||||
}
|
||||
|
||||
DocumentSearchResult unfiltered = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
DocumentSearchResult undatedOnly = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, true, PageRequest.of(0, 50));
|
||||
|
||||
assertThat(unfiltered.undatedCount()).isEqualTo(undatedTotal);
|
||||
assertThat(undatedOnly.undatedCount()).isEqualTo(undatedTotal);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_undatedCount_isZero_insideDateRange() {
|
||||
// A from/to range excludes undated rows by the collision rule (#668), so the
|
||||
// global undated count inside a range is legitimately 0 even when undated docs exist.
|
||||
for (int i = 0; i < 5; i++) {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("U-range-" + i)
|
||||
.originalFilename("u-range-" + i + ".pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.metaDatePrecision(DatePrecision.UNKNOWN)
|
||||
.documentDate(null)
|
||||
.build());
|
||||
}
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, LocalDate.of(1900, 1, 1), LocalDate.of(2000, 12, 31),
|
||||
null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
|
||||
|
||||
assertThat(result.undatedCount()).isZero();
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_differentPagesReturnDisjointSlices() {
|
||||
DocumentSearchResult page0 = documentService.searchDocuments(
|
||||
|
||||
@@ -99,4 +99,32 @@ class DocumentSearchResultTest {
|
||||
assertThat(schema.requiredMode()).isEqualTo(Schema.RequiredMode.REQUIRED);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void undatedCount_component_is_annotated_as_required_in_openapi_schema() throws NoSuchFieldException {
|
||||
Schema schema = DocumentSearchResult.class.getDeclaredField("undatedCount").getAnnotation(Schema.class);
|
||||
assertThat(schema).isNotNull();
|
||||
assertThat(schema.requiredMode()).isEqualTo(Schema.RequiredMode.REQUIRED);
|
||||
}
|
||||
|
||||
@Test
|
||||
void factories_default_undatedCount_to_zero() {
|
||||
assertThat(DocumentSearchResult.of(List.of()).undatedCount()).isZero();
|
||||
assertThat(DocumentSearchResult.paged(List.of(), PageRequest.of(0, 50), 0L).undatedCount()).isZero();
|
||||
}
|
||||
|
||||
@Test
|
||||
void withUndatedCount_overlays_count_and_preserves_other_fields() {
|
||||
DocumentSearchResult base = DocumentSearchResult.paged(
|
||||
List.of(item(UUID.randomUUID())), PageRequest.of(1, 50), 120L);
|
||||
|
||||
DocumentSearchResult withCount = base.withUndatedCount(7L);
|
||||
|
||||
assertThat(withCount.undatedCount()).isEqualTo(7L);
|
||||
assertThat(withCount.items()).isEqualTo(base.items());
|
||||
assertThat(withCount.totalElements()).isEqualTo(120L);
|
||||
assertThat(withCount.pageNumber()).isEqualTo(1);
|
||||
assertThat(withCount.pageSize()).isEqualTo(50);
|
||||
assertThat(withCount.totalPages()).isEqualTo(3);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user