feat(normalizer): overrides loader + xlsx/csv writers
Recovered from an entangled commit: these files were correct but had been bundled into an unrelated reader-dashboard commit by a concurrent session. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
52
tools/import-normalizer/tests/test_writers.py
Normal file
52
tools/import-normalizer/tests/test_writers.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import csv
|
||||
import openpyxl
|
||||
import overrides
|
||||
import writers
|
||||
import documents
|
||||
|
||||
def test_load_overrides_missing_files(tmp_path):
|
||||
d, n = overrides.load_overrides(tmp_path / "dates.csv", tmp_path / "names.csv")
|
||||
assert d == {} and n == {}
|
||||
|
||||
def test_load_overrides_parsed(tmp_path):
|
||||
dp = tmp_path / "dates.csv"
|
||||
dp.write_text("raw,iso,precision\n13.5.65,1965-05-13,DAY\n", encoding="utf-8")
|
||||
np = tmp_path / "names.csv"
|
||||
np.write_text("raw,person_id\nEugenie Müller,de-gruyter-eugenie\n", encoding="utf-8")
|
||||
d, n = overrides.load_overrides(dp, np)
|
||||
assert d["13.5.65"] == ("1965-05-13", "DAY")
|
||||
assert n["Eugenie Müller"] == "de-gruyter-eugenie"
|
||||
|
||||
def test_write_documents_xlsx_joins_lists(tmp_path):
|
||||
doc = documents.CanonicalDocument(
|
||||
index="W-0001", receiver_person_ids=["a", "b"], receiver_names=["A", "B"],
|
||||
tags=["Brautbriefe"], date_precision="DAY", needs_review=["unparsed_date"])
|
||||
out = tmp_path / "docs.xlsx"
|
||||
writers.write_documents_xlsx([doc], out)
|
||||
wb = openpyxl.load_workbook(out)
|
||||
ws = wb.active
|
||||
header = [c.value for c in ws[1]]
|
||||
assert "receiver_person_ids" in header and "needs_review" in header
|
||||
row = {h: c.value for h, c in zip(header, ws[2])}
|
||||
assert row["receiver_person_ids"] == "a|b"
|
||||
assert row["needs_review"] == "unparsed_date"
|
||||
|
||||
def test_write_review_csv(tmp_path):
|
||||
out = tmp_path / "r.csv"
|
||||
writers.write_review_csv(out, ["raw", "count"], [["?", 3], ["x", 1]])
|
||||
rows = list(csv.reader(out.open(encoding="utf-8")))
|
||||
assert rows[0] == ["raw", "count"]
|
||||
assert rows[1] == ["?", "3"]
|
||||
|
||||
def test_write_review_csv_defangs_formula_injection(tmp_path):
|
||||
out = tmp_path / "r.csv"
|
||||
writers.write_review_csv(out, ["raw", "count"], [["=cmd|'/C calc'!A0", 1], ["-2+3", 2]])
|
||||
rows = list(csv.reader(out.open(encoding="utf-8")))
|
||||
assert rows[1][0].startswith("'=") # leading '=' neutralised
|
||||
assert rows[2][0].startswith("'-")
|
||||
|
||||
def test_write_summary_sections(tmp_path):
|
||||
out = tmp_path / "s.txt"
|
||||
writers.write_summary(out, {"# INPUTS": "", "rows": 10, "# DATES": "", "unknown_date_rate": "3.2%"})
|
||||
text = out.read_text(encoding="utf-8")
|
||||
assert "INPUTS:" in text and "DATES:" in text and " rows: 10" in text
|
||||
Reference in New Issue
Block a user