Files
familienarchiv/ocr-service/test_confidence.py
Marcel f064b27439
Some checks failed
CI / Unit & Component Tests (push) Failing after 2s
CI / Backend Unit Tests (push) Failing after 1s
CI / Unit & Component Tests (pull_request) Failing after 1s
CI / Backend Unit Tests (pull_request) Failing after 1s
feat(ocr): per-script-type confidence thresholds
Kurrent OCR produces much lower confidence than typewriter/Latin.
Separate thresholds allow aggressive filtering for Kurrent (0.5)
while keeping typewriter lenient (0.3).

- OCR_CONFIDENCE_THRESHOLD: default for Surya paths (0.3)
- OCR_CONFIDENCE_THRESHOLD_KURRENT: Kraken Kurrent path (0.5)
- apply_confidence_markers() now accepts threshold parameter
- get_threshold(script_type) selects the right threshold

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-12 20:50:59 +02:00

161 lines
5.0 KiB
Python

"""Tests for confidence-based [unleserlich] marker insertion."""
import os
import pytest
from confidence import apply_confidence_markers, words_from_characters, get_threshold
# ─── apply_confidence_markers ─────────────────────────────────────────────────
def test_all_words_above_threshold_passes_through():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber Freund"
def test_single_low_confidence_word_replaced():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkqz", "confidence": 0.1},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
def test_adjacent_low_confidence_words_collapsed():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkqz", "confidence": 0.1},
{"text": "abc", "confidence": 0.05},
{"text": "yyy", "confidence": 0.2},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
def test_mixed_high_low_each_group_gets_marker():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkqz", "confidence": 0.1},
{"text": "wie", "confidence": 0.9},
{"text": "abc", "confidence": 0.05},
{"text": "dir", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich] wie [unleserlich] dir"
def test_all_below_threshold_returns_single_marker():
words = [
{"text": "xkq", "confidence": 0.1},
{"text": "zzz", "confidence": 0.05},
]
assert apply_confidence_markers(words) == "[unleserlich]"
def test_empty_list_returns_empty_string():
assert apply_confidence_markers([]) == ""
def test_single_word_above_threshold():
words = [{"text": "Hallo", "confidence": 0.9}]
assert apply_confidence_markers(words) == "Hallo"
def test_exact_threshold_passes_through():
"""Confidence exactly at threshold should NOT be replaced (strict <)."""
words = [{"text": "Wort", "confidence": 0.3}]
assert apply_confidence_markers(words) == "Wort"
def test_just_below_threshold_replaced():
words = [{"text": "Wort", "confidence": 0.29}]
assert apply_confidence_markers(words) == "[unleserlich]"
def test_custom_threshold_via_parameter():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "Freund", "confidence": 0.5},
]
assert apply_confidence_markers(words, threshold=0.8) == "Lieber [unleserlich]"
assert apply_confidence_markers(words, threshold=0.3) == "Lieber Freund"
def test_kurrent_threshold_is_higher_than_default():
default = get_threshold("TYPEWRITER")
kurrent = get_threshold("HANDWRITING_KURRENT")
assert kurrent > default
def test_get_threshold_kurrent():
assert get_threshold("HANDWRITING_KURRENT") == 0.5
def test_get_threshold_default():
assert get_threshold("TYPEWRITER") == 0.3
assert get_threshold("HANDWRITING_LATIN") == 0.3
assert get_threshold("UNKNOWN") == 0.3
def test_low_confidence_at_start():
words = [
{"text": "xkq", "confidence": 0.1},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "[unleserlich] Freund"
def test_low_confidence_at_end():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkq", "confidence": 0.1},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich]"
# ─── words_from_characters ────────────────────────────────────────────────────
def test_single_word_matching_confidences():
words = words_from_characters("Hallo", [0.9, 0.8, 0.85, 0.7, 0.95])
assert len(words) == 1
assert words[0]["text"] == "Hallo"
assert abs(words[0]["confidence"] - 0.84) < 0.01
def test_multi_word_with_spaces():
prediction = "Sehr geehrter"
confidences = [0.9, 0.8, 0.7, 0.6, 0.5, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
words = words_from_characters(prediction, confidences)
assert len(words) == 2
assert words[0]["text"] == "Sehr"
assert words[1]["text"] == "geehrter"
def test_length_mismatch_falls_back_safely():
words = words_from_characters("Hallo Welt", [0.9, 0.8])
assert len(words) == 1
assert words[0]["text"] == "Hallo Welt"
assert words[0]["confidence"] == 1.0
def test_empty_prediction_returns_empty():
assert words_from_characters("", []) == []
def test_single_character_word():
words = words_from_characters("A B", [0.9, 0.5, 0.3])
assert len(words) == 2
assert words[0]["text"] == "A"
assert words[0]["confidence"] == 0.9
assert words[1]["text"] == "B"
assert words[1]["confidence"] == 0.3
def test_whitespace_only_prediction():
words = words_from_characters(" ", [0.5, 0.5, 0.5])
assert words == []