fix(normalizer): defang leading LF in CSV + assert pinned workbook timestamp

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-25 14:43:45 +02:00
parent ff1a7c07f1
commit df00ea4238
2 changed files with 9 additions and 1 deletions

View File

@@ -31,6 +31,14 @@ def test_write_documents_xlsx_joins_lists(tmp_path):
assert row["receiver_person_ids"] == "a|b"
assert row["needs_review"] == "unparsed_date"
def test_write_documents_xlsx_pins_timestamp(tmp_path):
# determinism (NFR-IDEM-01): workbook created/modified are pinned, not the current time
doc = documents.CanonicalDocument(index="W-0001")
out = tmp_path / "d.xlsx"
writers.write_documents_xlsx([doc], out)
wb = openpyxl.load_workbook(out)
assert (wb.properties.created.year, wb.properties.created.month, wb.properties.created.day) == (2020, 1, 1)
def test_write_review_csv(tmp_path):
out = tmp_path / "r.csv"
writers.write_review_csv(out, ["raw", "count"], [["?", 3], ["x", 1]])

View File

@@ -19,7 +19,7 @@ def _join(value):
def _csv_safe(value):
"""Neutralise spreadsheet formula injection (CWE-1236) in human-opened review CSVs."""
s = "" if value is None else str(value)
return "'" + s if s[:1] in ("=", "+", "-", "@", "\t", "\r") else s
return "'" + s if s[:1] in ("=", "+", "-", "@", "\t", "\r", "\n") else s
DOC_COLUMNS = ["index", "box", "folder", "sender_person_id", "sender_name",