"""Read .xlsx sheets into neutral list[list[str]] and map headers to fields.""" import datetime from pathlib import Path import openpyxl def _cell_to_str(value) -> str: if value is None: return "" if isinstance(value, datetime.datetime): return value.date().isoformat() if isinstance(value, datetime.date): return value.isoformat() if isinstance(value, float) and value.is_integer(): return str(int(value)) if isinstance(value, int): return str(value) return str(value).strip() def read_sheet(path: Path, sheet_name: str) -> list[list[str]]: wb = openpyxl.load_workbook(path, read_only=True, data_only=True) if sheet_name not in wb.sheetnames: raise ValueError(f"Sheet '{sheet_name}' not found in {path.name}; sheets: {wb.sheetnames}") ws = wb[sheet_name] rows = [[_cell_to_str(v) for v in row] for row in ws.iter_rows(values_only=True)] wb.close() return rows def _norm_header(text: str) -> str: return " ".join(text.lower().split()) def build_header_map(header_row: list[str], field_map: dict[str, str], required: set[str]): """Return (field->col_index, unknown_headers). Raise ValueError if a required field is missing.""" fields: dict[str, int] = {} unknown: list[str] = [] for idx, raw in enumerate(header_row): key = _norm_header(raw) if key in field_map: fields[field_map[key]] = idx elif raw.strip(): unknown.append(raw) missing = required - set(fields) if missing: raise ValueError(f"Required header(s) missing: {sorted(missing)} (found headers: {header_row})") return fields, unknown