import documents from documents import Triage def test_extract_row(): header = {"index": 0, "file": 1, "box": 2, "folder": 3, "sender": 4, "receivers": 5, "date": 6, "location": 7, "tags": 8, "summary": 9} cells = ["W-0001", r"..\__scan\W-0001.pdf", "V", "1", "Walter de Gruyter", "Eugenie Müller", "15.2.1888", "Rotterdam", "Brautbriefe", "Geschäftsreise"] raw = documents.extract_row(cells, header, source_row=3) assert raw.index == "W-0001" assert raw.sender == "Walter de Gruyter" assert raw.date == "15.2.1888" assert raw.source_row == 3 def test_triage(): assert documents.triage(["", "", ""]) == Triage.EMPTY assert documents.triage(["", "", "Walter"]) == Triage.BLANK_INDEX # data but no index assert documents.triage(["W-0001x", "x"]) == Triage.X_SUFFIX assert documents.triage(["W-0001", "x"]) == Triage.OK def test_classify_blank_index(): header = {"sender": 4, "receivers": 5} banner = ["", "", "", "", "Brautbriefe von Walter an Eugenie", ""] data = ["", "", "V", "1", "", "Eugenie"] assert documents.classify_blank_index(banner, header) == "section_banner" assert documents.classify_blank_index(data, header) == "data_no_index" def test_index_file_mismatch(): assert documents.index_file_mismatch("W-0010x", r"..\__scan\W-0011x.pdf") is True assert documents.index_file_mismatch("W-0001", r"..\__scan\W-0001.pdf") is False assert documents.index_file_mismatch("W-0001", "") is False assert documents.index_file_mismatch("W-0001", "scans/W-0001.pdf") is False # unix path assert documents.index_file_mismatch("W-0001", "W-0001.pdf") is False # no dir