Kurrent OCR produces much lower confidence than typewriter/Latin. Separate thresholds allow aggressive filtering for Kurrent (0.5) while keeping typewriter lenient (0.3). - OCR_CONFIDENCE_THRESHOLD: default for Surya paths (0.3) - OCR_CONFIDENCE_THRESHOLD_KURRENT: Kraken Kurrent path (0.5) - apply_confidence_markers() now accepts threshold parameter - get_threshold(script_type) selects the right threshold Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
161 lines
5.0 KiB
Python
161 lines
5.0 KiB
Python
"""Tests for confidence-based [unleserlich] marker insertion."""
|
|
|
|
import os
|
|
import pytest
|
|
from confidence import apply_confidence_markers, words_from_characters, get_threshold
|
|
|
|
|
|
# ─── apply_confidence_markers ─────────────────────────────────────────────────
|
|
|
|
|
|
def test_all_words_above_threshold_passes_through():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber Freund"
|
|
|
|
|
|
def test_single_low_confidence_word_replaced():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkqz", "confidence": 0.1},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
|
|
|
|
|
|
def test_adjacent_low_confidence_words_collapsed():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkqz", "confidence": 0.1},
|
|
{"text": "abc", "confidence": 0.05},
|
|
{"text": "yyy", "confidence": 0.2},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
|
|
|
|
|
|
def test_mixed_high_low_each_group_gets_marker():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkqz", "confidence": 0.1},
|
|
{"text": "wie", "confidence": 0.9},
|
|
{"text": "abc", "confidence": 0.05},
|
|
{"text": "dir", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich] wie [unleserlich] dir"
|
|
|
|
|
|
def test_all_below_threshold_returns_single_marker():
|
|
words = [
|
|
{"text": "xkq", "confidence": 0.1},
|
|
{"text": "zzz", "confidence": 0.05},
|
|
]
|
|
assert apply_confidence_markers(words) == "[unleserlich]"
|
|
|
|
|
|
def test_empty_list_returns_empty_string():
|
|
assert apply_confidence_markers([]) == ""
|
|
|
|
|
|
def test_single_word_above_threshold():
|
|
words = [{"text": "Hallo", "confidence": 0.9}]
|
|
assert apply_confidence_markers(words) == "Hallo"
|
|
|
|
|
|
def test_exact_threshold_passes_through():
|
|
"""Confidence exactly at threshold should NOT be replaced (strict <)."""
|
|
words = [{"text": "Wort", "confidence": 0.3}]
|
|
assert apply_confidence_markers(words) == "Wort"
|
|
|
|
|
|
def test_just_below_threshold_replaced():
|
|
words = [{"text": "Wort", "confidence": 0.29}]
|
|
assert apply_confidence_markers(words) == "[unleserlich]"
|
|
|
|
|
|
def test_custom_threshold_via_parameter():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "Freund", "confidence": 0.5},
|
|
]
|
|
assert apply_confidence_markers(words, threshold=0.8) == "Lieber [unleserlich]"
|
|
assert apply_confidence_markers(words, threshold=0.3) == "Lieber Freund"
|
|
|
|
|
|
def test_kurrent_threshold_is_higher_than_default():
|
|
default = get_threshold("TYPEWRITER")
|
|
kurrent = get_threshold("HANDWRITING_KURRENT")
|
|
assert kurrent > default
|
|
|
|
|
|
def test_get_threshold_kurrent():
|
|
assert get_threshold("HANDWRITING_KURRENT") == 0.5
|
|
|
|
|
|
def test_get_threshold_default():
|
|
assert get_threshold("TYPEWRITER") == 0.3
|
|
assert get_threshold("HANDWRITING_LATIN") == 0.3
|
|
assert get_threshold("UNKNOWN") == 0.3
|
|
|
|
|
|
def test_low_confidence_at_start():
|
|
words = [
|
|
{"text": "xkq", "confidence": 0.1},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "[unleserlich] Freund"
|
|
|
|
|
|
def test_low_confidence_at_end():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkq", "confidence": 0.1},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich]"
|
|
|
|
|
|
# ─── words_from_characters ────────────────────────────────────────────────────
|
|
|
|
|
|
def test_single_word_matching_confidences():
|
|
words = words_from_characters("Hallo", [0.9, 0.8, 0.85, 0.7, 0.95])
|
|
assert len(words) == 1
|
|
assert words[0]["text"] == "Hallo"
|
|
assert abs(words[0]["confidence"] - 0.84) < 0.01
|
|
|
|
|
|
def test_multi_word_with_spaces():
|
|
prediction = "Sehr geehrter"
|
|
confidences = [0.9, 0.8, 0.7, 0.6, 0.5, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
|
|
words = words_from_characters(prediction, confidences)
|
|
assert len(words) == 2
|
|
assert words[0]["text"] == "Sehr"
|
|
assert words[1]["text"] == "geehrter"
|
|
|
|
|
|
def test_length_mismatch_falls_back_safely():
|
|
words = words_from_characters("Hallo Welt", [0.9, 0.8])
|
|
assert len(words) == 1
|
|
assert words[0]["text"] == "Hallo Welt"
|
|
assert words[0]["confidence"] == 1.0
|
|
|
|
|
|
def test_empty_prediction_returns_empty():
|
|
assert words_from_characters("", []) == []
|
|
|
|
|
|
def test_single_character_word():
|
|
words = words_from_characters("A B", [0.9, 0.5, 0.3])
|
|
assert len(words) == 2
|
|
assert words[0]["text"] == "A"
|
|
assert words[0]["confidence"] == 0.9
|
|
assert words[1]["text"] == "B"
|
|
assert words[1]["confidence"] == 0.3
|
|
|
|
|
|
def test_whitespace_only_prediction():
|
|
words = words_from_characters(" ", [0.5, 0.5, 0.5])
|
|
assert words == []
|