Files
familienarchiv/tools/import-normalizer/tests/test_ingest.py
2026-05-25 14:08:30 +02:00

37 lines
1.3 KiB
Python

import datetime
import openpyxl
import pytest
import ingest
def _make_workbook(tmp_path, sheet_name, rows):
wb = openpyxl.Workbook()
ws = wb.active
ws.title = sheet_name
for r in rows:
ws.append(r)
path = tmp_path / "wb.xlsx"
wb.save(path)
return path
def test_read_sheet_converts_cells(tmp_path):
path = _make_workbook(tmp_path, "S", [
["Index", "Datum"],
["W-0001", datetime.datetime(1888, 2, 15)],
["W-0002", 1],
])
rows = ingest.read_sheet(path, "S")
assert rows[0] == ["Index", "Datum"]
assert rows[1] == ["W-0001", "1888-02-15"] # Excel date -> ISO string
assert rows[2] == ["W-0002", "1"] # integer -> plain string
def test_build_header_map_collapses_whitespace_and_case():
header = ["Index", "Datum des Briefes", "EmpfängerIn", "Mystery"]
field_map = {"index": "index", "datum des briefes": "date", "empfängerin": "receivers"}
fields, unknown = ingest.build_header_map(header, field_map, required={"index"})
assert fields == {"index": 0, "date": 1, "receivers": 2}
assert unknown == ["Mystery"]
def test_build_header_map_missing_required_raises():
with pytest.raises(ValueError, match="index"):
ingest.build_header_map(["Box", "Ort"], {"box": "box", "ort": "location"}, required={"index"})