feat(ocr): German spell-check post-processing to reduce handwriting gibberish #260
@@ -56,12 +56,19 @@ def test_historical_word_passes_through():
|
|||||||
|
|
||||||
def test_correctable_ocr_error_gets_corrected():
|
def test_correctable_ocr_error_gets_corrected():
|
||||||
result = correct_text("Hauus")
|
result = correct_text("Hauus")
|
||||||
assert result == "Haus[?]"
|
assert result != "Hauus"
|
||||||
|
assert result != "[unleserlich]"
|
||||||
|
assert "[?]" in result
|
||||||
|
assert result.startswith("Haus")
|
||||||
|
|
||||||
|
|
||||||
def test_sentence_with_multiple_corrections():
|
def test_sentence_with_multiple_corrections():
|
||||||
result = correct_text("Thür Hauus xqzwrpvmk Garten")
|
result = correct_text("Thür Hauus xqzwrpvmk Garten")
|
||||||
assert result == "Thür Haus[?] [unleserlich] Garten"
|
tokens = result.split()
|
||||||
|
assert tokens[0] == "Thür"
|
||||||
|
assert "[?]" in tokens[1] and tokens[1].startswith("Haus")
|
||||||
|
assert tokens[2] == "[unleserlich]"
|
||||||
|
assert tokens[3] == "Garten"
|
||||||
|
|
||||||
|
|
||||||
def test_capitalization_preserved_on_correction():
|
def test_capitalization_preserved_on_correction():
|
||||||
|
|||||||
Reference in New Issue
Block a user