34 lines
1.7 KiB
Python
34 lines
1.7 KiB
Python
import documents
|
|
from documents import Triage
|
|
|
|
def test_extract_row():
|
|
header = {"index": 0, "file": 1, "box": 2, "folder": 3, "sender": 4,
|
|
"receivers": 5, "date": 6, "location": 7, "tags": 8, "summary": 9}
|
|
cells = ["W-0001", r"..\__scan\W-0001.pdf", "V", "1", "Walter de Gruyter",
|
|
"Eugenie Müller", "15.2.1888", "Rotterdam", "Brautbriefe", "Geschäftsreise"]
|
|
raw = documents.extract_row(cells, header, source_row=3)
|
|
assert raw.index == "W-0001"
|
|
assert raw.sender == "Walter de Gruyter"
|
|
assert raw.date == "15.2.1888"
|
|
assert raw.source_row == 3
|
|
|
|
def test_triage():
|
|
assert documents.triage(["", "", ""]) == Triage.EMPTY
|
|
assert documents.triage(["", "", "Walter"]) == Triage.BLANK_INDEX # data but no index
|
|
assert documents.triage(["W-0001x", "x"]) == Triage.X_SUFFIX
|
|
assert documents.triage(["W-0001", "x"]) == Triage.OK
|
|
|
|
def test_classify_blank_index():
|
|
header = {"sender": 4, "receivers": 5}
|
|
banner = ["", "", "", "", "Brautbriefe von Walter an Eugenie", ""]
|
|
data = ["", "", "V", "1", "", "Eugenie"]
|
|
assert documents.classify_blank_index(banner, header) == "section_banner"
|
|
assert documents.classify_blank_index(data, header) == "data_no_index"
|
|
|
|
def test_index_file_mismatch():
|
|
assert documents.index_file_mismatch("W-0010x", r"..\__scan\W-0011x.pdf") is True
|
|
assert documents.index_file_mismatch("W-0001", r"..\__scan\W-0001.pdf") is False
|
|
assert documents.index_file_mismatch("W-0001", "") is False
|
|
assert documents.index_file_mismatch("W-0001", "scans/W-0001.pdf") is False # unix path
|
|
assert documents.index_file_mismatch("W-0001", "W-0001.pdf") is False # no dir
|