Add bool guard before the int branch in _cell_to_str so True/False cells are preserved as "True"/"False" instead of "1"/"0". Add two regression tests covering the fix and missing-sheet error. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
51 lines
1.8 KiB
Python
51 lines
1.8 KiB
Python
"""Read .xlsx sheets into neutral list[list[str]] and map headers to fields."""
|
|
import datetime
|
|
from pathlib import Path
|
|
import openpyxl
|
|
|
|
|
|
def _cell_to_str(value) -> str:
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, bool): # bool is a subclass of int — handle before the int branch
|
|
return str(value)
|
|
if isinstance(value, datetime.datetime):
|
|
return value.date().isoformat()
|
|
if isinstance(value, datetime.date):
|
|
return value.isoformat()
|
|
if isinstance(value, float) and value.is_integer():
|
|
return str(int(value))
|
|
if isinstance(value, int):
|
|
return str(value)
|
|
return str(value).strip()
|
|
|
|
|
|
def read_sheet(path: Path, sheet_name: str) -> list[list[str]]:
|
|
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
|
|
if sheet_name not in wb.sheetnames:
|
|
raise ValueError(f"Sheet '{sheet_name}' not found in {path.name}; sheets: {wb.sheetnames}")
|
|
ws = wb[sheet_name]
|
|
rows = [[_cell_to_str(v) for v in row] for row in ws.iter_rows(values_only=True)]
|
|
wb.close()
|
|
return rows
|
|
|
|
|
|
def _norm_header(text: str) -> str:
|
|
return " ".join(text.lower().split())
|
|
|
|
|
|
def build_header_map(header_row: list[str], field_map: dict[str, str], required: set[str]):
|
|
"""Return (field->col_index, unknown_headers). Raise ValueError if a required field is missing."""
|
|
fields: dict[str, int] = {}
|
|
unknown: list[str] = []
|
|
for idx, raw in enumerate(header_row):
|
|
key = _norm_header(raw)
|
|
if key in field_map:
|
|
fields[field_map[key]] = idx
|
|
elif raw.strip():
|
|
unknown.append(raw)
|
|
missing = required - set(fields)
|
|
if missing:
|
|
raise ValueError(f"Required header(s) missing: {sorted(missing)} (found headers: {header_row})")
|
|
return fields, unknown
|