import csv import openpyxl import overrides import writers import documents def test_load_overrides_missing_files(tmp_path): d, n = overrides.load_overrides(tmp_path / "dates.csv", tmp_path / "names.csv") assert d == {} and n == {} def test_load_overrides_parsed(tmp_path): dp = tmp_path / "dates.csv" dp.write_text("raw,iso,precision\n13.5.65,1965-05-13,DAY\n", encoding="utf-8") np = tmp_path / "names.csv" np.write_text("raw,person_id\nEugenie Müller,de-gruyter-eugenie\n", encoding="utf-8") d, n = overrides.load_overrides(dp, np) assert d["13.5.65"] == ("1965-05-13", "DAY") assert n["Eugenie Müller"] == "de-gruyter-eugenie" def test_write_documents_xlsx_joins_lists(tmp_path): doc = documents.CanonicalDocument( index="W-0001", receiver_person_ids=["a", "b"], receiver_names=["A", "B"], tags=["Brautbriefe"], date_precision="DAY", needs_review=["unparsed_date"]) out = tmp_path / "docs.xlsx" writers.write_documents_xlsx([doc], out) wb = openpyxl.load_workbook(out) ws = wb.active header = [c.value for c in ws[1]] assert "receiver_person_ids" in header and "needs_review" in header row = {h: c.value for h, c in zip(header, ws[2])} assert row["receiver_person_ids"] == "a|b" assert row["needs_review"] == "unparsed_date" def test_write_documents_xlsx_carries_file_and_date_end(tmp_path): doc = documents.CanonicalDocument( index="H-0730", file="H-0730.pdf", date_iso="1917-01-10", date_precision="RANGE", date_end="1917-01-11") out = tmp_path / "docs.xlsx" writers.write_documents_xlsx([doc], out) wb = openpyxl.load_workbook(out) ws = wb.active header = [c.value for c in ws[1]] assert "file" in header and "date_end" in header row = {h: c.value for h, c in zip(header, ws[2])} assert row["file"] == "H-0730.pdf" assert row["date_end"] == "1917-01-11" def test_write_documents_xlsx_pins_timestamp(tmp_path): # determinism (NFR-IDEM-01): workbook created/modified are pinned, not the current time doc = documents.CanonicalDocument(index="W-0001") out = tmp_path / "d.xlsx" writers.write_documents_xlsx([doc], out) wb = openpyxl.load_workbook(out) assert (wb.properties.created.year, wb.properties.created.month, wb.properties.created.day) == (2020, 1, 1) def test_write_review_csv(tmp_path): out = tmp_path / "r.csv" writers.write_review_csv(out, ["raw", "count"], [["?", 3], ["x", 1]]) rows = list(csv.reader(out.open(encoding="utf-8"))) assert rows[0] == ["raw", "count"] assert rows[1] == ["?", "3"] def test_write_review_csv_defangs_formula_injection(tmp_path): out = tmp_path / "r.csv" writers.write_review_csv(out, ["raw", "count"], [["=cmd|'/C calc'!A0", 1], ["-2+3", 2]]) rows = list(csv.reader(out.open(encoding="utf-8"))) assert rows[1][0].startswith("'=") # leading '=' neutralised assert rows[2][0].startswith("'-") def test_write_summary_sections(tmp_path): out = tmp_path / "s.txt" writers.write_summary(out, {"# INPUTS": "", "rows": 10, "# DATES": "", "unknown_date_rate": "3.2%"}) text = out.read_text(encoding="utf-8") assert "INPUTS:" in text and "DATES:" in text and " rows: 10" in text