"""Write canonical .xlsx outputs and review .csv files.""" import csv import datetime from pathlib import Path import openpyxl _PIPE = "|" # Pinned workbook metadata so reruns are content-deterministic (NFR-IDEM-01); openpyxl # otherwise stamps docProps with the current time on every save. _FIXED_TS = datetime.datetime(2020, 1, 1, 0, 0, 0) def _join(value): if isinstance(value, list): return _PIPE.join(str(v) for v in value) return "" if value is None else str(value) def _csv_safe(value): """Neutralise spreadsheet formula injection (CWE-1236) in human-opened review CSVs.""" s = "" if value is None else str(value) return "'" + s if s[:1] in ("=", "+", "-", "@", "\t", "\r") else s DOC_COLUMNS = ["index", "box", "folder", "sender_person_id", "sender_name", "receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision", "location", "tags", "summary", "source_row", "needs_review"] PERSON_COLUMNS = ["person_id", "last_name", "first_name", "maiden_name", "title", "nickname", "birth_date", "birth_date_raw", "birth_place", "death_date", "death_date_raw", "death_place", "spouse", "generation", "notes", "aliases", "provisional"] def _write_xlsx(records, columns, path: Path): wb = openpyxl.Workbook() ws = wb.active ws.append(columns) for rec in records: ws.append([_join(getattr(rec, col)) for col in columns]) wb.properties.created = _FIXED_TS wb.properties.modified = _FIXED_TS Path(path).parent.mkdir(parents=True, exist_ok=True) wb.save(path) def write_documents_xlsx(docs, path: Path): _write_xlsx(docs, DOC_COLUMNS, path) def write_persons_xlsx(people, path: Path): _write_xlsx(people, PERSON_COLUMNS, path) def write_review_csv(path: Path, header: list[str], rows: list[list]): Path(path).parent.mkdir(parents=True, exist_ok=True) with open(path, "w", encoding="utf-8", newline="") as f: w = csv.writer(f) w.writerow(header) for row in rows: w.writerow([_csv_safe(c) for c in row]) def write_summary(path: Path, stats: dict): """Render a grouped, scannable summary. Keys beginning with '#' are section headers.""" Path(path).parent.mkdir(parents=True, exist_ok=True) lines = [] for k, v in stats.items(): if k.startswith("#"): lines.append("") lines.append(k[1:].strip() + ":") else: lines.append(f" {k}: {v}") Path(path).write_text("\n".join(lines).strip() + "\n", encoding="utf-8")