Import normalizer: offline tool to normalize the raw archive spreadsheets #663

Merged
marcel merged 172 commits from docs/import-migration into main 2026-05-28 15:05:51 +02:00
Showing only changes of commit 7c017eca2a - Show all commits

View File

@@ -38,11 +38,11 @@ def test_run_end_to_end(tmp_path):
assert stats["skipped_x_suffix"] == 1
assert stats["blank_index_rows"] == 1
assert stats["duplicate_index_rows"] == 2
assert stats["unresolved_unknown"] >= 1 # the "?" receiver is an UNKNOWN-class name
assert (review_dir / "skipped-x-suffix.csv").exists()
assert (review_dir / "unparsed-dates.csv").exists()
# C-0001's "Freitag 1919" is unparseable -> must appear in the review file (NFR-DATA-01)
assert "Freitag 1919" in (review_dir / "unparsed-dates.csv").read_text(encoding="utf-8")
assert (out_dir / "canonical-documents.xlsx").exists() # (keep existing asserts above)
assert (review_dir / "unresolved-names.csv").exists()
unresolved_text = (review_dir / "unresolved-names.csv").read_text(encoding="utf-8")
assert "unknown" in unresolved_text and "?" in unresolved_text # the "?" receiver