From df00ea42385d128c3fb2d1077b94a6bb6b33ae1b Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 25 May 2026 14:43:45 +0200 Subject: [PATCH] fix(normalizer): defang leading LF in CSV + assert pinned workbook timestamp Co-Authored-By: Claude Opus 4.7 --- tools/import-normalizer/tests/test_writers.py | 8 ++++++++ tools/import-normalizer/writers.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/import-normalizer/tests/test_writers.py b/tools/import-normalizer/tests/test_writers.py index 97bd7ce8..37c4e199 100644 --- a/tools/import-normalizer/tests/test_writers.py +++ b/tools/import-normalizer/tests/test_writers.py @@ -31,6 +31,14 @@ def test_write_documents_xlsx_joins_lists(tmp_path): assert row["receiver_person_ids"] == "a|b" assert row["needs_review"] == "unparsed_date" +def test_write_documents_xlsx_pins_timestamp(tmp_path): + # determinism (NFR-IDEM-01): workbook created/modified are pinned, not the current time + doc = documents.CanonicalDocument(index="W-0001") + out = tmp_path / "d.xlsx" + writers.write_documents_xlsx([doc], out) + wb = openpyxl.load_workbook(out) + assert (wb.properties.created.year, wb.properties.created.month, wb.properties.created.day) == (2020, 1, 1) + def test_write_review_csv(tmp_path): out = tmp_path / "r.csv" writers.write_review_csv(out, ["raw", "count"], [["?", 3], ["x", 1]]) diff --git a/tools/import-normalizer/writers.py b/tools/import-normalizer/writers.py index ff24b055..700179f3 100644 --- a/tools/import-normalizer/writers.py +++ b/tools/import-normalizer/writers.py @@ -19,7 +19,7 @@ def _join(value): def _csv_safe(value): """Neutralise spreadsheet formula injection (CWE-1236) in human-opened review CSVs.""" s = "" if value is None else str(value) - return "'" + s if s[:1] in ("=", "+", "-", "@", "\t", "\r") else s + return "'" + s if s[:1] in ("=", "+", "-", "@", "\t", "\r", "\n") else s DOC_COLUMNS = ["index", "box", "folder", "sender_person_id", "sender_name",