Files
familienarchiv/tools/import-normalizer/tests/test_documents.py
2026-05-25 14:12:48 +02:00

32 lines
1.5 KiB
Python

import documents
from documents import Triage
def test_extract_row():
header = {"index": 0, "file": 1, "box": 2, "folder": 3, "sender": 4,
"receivers": 5, "date": 6, "location": 7, "tags": 8, "summary": 9}
cells = ["W-0001", r"..\__scan\W-0001.pdf", "V", "1", "Walter de Gruyter",
"Eugenie Müller", "15.2.1888", "Rotterdam", "Brautbriefe", "Geschäftsreise"]
raw = documents.extract_row(cells, header, source_row=3)
assert raw.index == "W-0001"
assert raw.sender == "Walter de Gruyter"
assert raw.date == "15.2.1888"
assert raw.source_row == 3
def test_triage():
assert documents.triage(["", "", ""]) == Triage.EMPTY
assert documents.triage(["", "", "Walter"]) == Triage.BLANK_INDEX # data but no index
assert documents.triage(["W-0001x", "x"]) == Triage.X_SUFFIX
assert documents.triage(["W-0001", "x"]) == Triage.OK
def test_classify_blank_index():
header = {"sender": 4, "receivers": 5}
banner = ["", "", "", "", "Brautbriefe von Walter an Eugenie", ""]
data = ["", "", "V", "1", "", "Eugenie"]
assert documents.classify_blank_index(banner, header) == "section_banner"
assert documents.classify_blank_index(data, header) == "data_no_index"
def test_index_file_mismatch():
assert documents.index_file_mismatch("W-0010x", r"..\__scan\W-0011x.pdf") is True
assert documents.index_file_mismatch("W-0001", r"..\__scan\W-0001.pdf") is False
assert documents.index_file_mismatch("W-0001", "") is False