New confidence.py module with two functions: - apply_confidence_markers(): replaces words below threshold with [unleserlich], collapses adjacent markers into one - words_from_characters(): reconstructs word-level confidence from Kraken's character-level data Surya 0.17 provides native word-level confidence via line.words. Kraken 7.0 provides per-character confidences via record.confidences. Both engines now pass word+confidence data through main.py, which applies the marker post-processing before returning the API response. Threshold configurable via OCR_CONFIDENCE_THRESHOLD env var (default 0.3). Frontend already renders [unleserlich] markers via transcriptionMarkers.ts. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
154 lines
4.8 KiB
Python
154 lines
4.8 KiB
Python
"""Tests for confidence-based [unleserlich] marker insertion."""
|
|
|
|
import os
|
|
import pytest
|
|
from confidence import apply_confidence_markers, words_from_characters
|
|
|
|
|
|
# ─── apply_confidence_markers ─────────────────────────────────────────────────
|
|
|
|
|
|
def test_all_words_above_threshold_passes_through():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber Freund"
|
|
|
|
|
|
def test_single_low_confidence_word_replaced():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkqz", "confidence": 0.1},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
|
|
|
|
|
|
def test_adjacent_low_confidence_words_collapsed():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkqz", "confidence": 0.1},
|
|
{"text": "abc", "confidence": 0.05},
|
|
{"text": "yyy", "confidence": 0.2},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
|
|
|
|
|
|
def test_mixed_high_low_each_group_gets_marker():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkqz", "confidence": 0.1},
|
|
{"text": "wie", "confidence": 0.9},
|
|
{"text": "abc", "confidence": 0.05},
|
|
{"text": "dir", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich] wie [unleserlich] dir"
|
|
|
|
|
|
def test_all_below_threshold_returns_single_marker():
|
|
words = [
|
|
{"text": "xkq", "confidence": 0.1},
|
|
{"text": "zzz", "confidence": 0.05},
|
|
]
|
|
assert apply_confidence_markers(words) == "[unleserlich]"
|
|
|
|
|
|
def test_empty_list_returns_empty_string():
|
|
assert apply_confidence_markers([]) == ""
|
|
|
|
|
|
def test_single_word_above_threshold():
|
|
words = [{"text": "Hallo", "confidence": 0.9}]
|
|
assert apply_confidence_markers(words) == "Hallo"
|
|
|
|
|
|
def test_exact_threshold_passes_through():
|
|
"""Confidence exactly at threshold should NOT be replaced (strict <)."""
|
|
words = [{"text": "Wort", "confidence": 0.3}]
|
|
assert apply_confidence_markers(words) == "Wort"
|
|
|
|
|
|
def test_just_below_threshold_replaced():
|
|
words = [{"text": "Wort", "confidence": 0.29}]
|
|
assert apply_confidence_markers(words) == "[unleserlich]"
|
|
|
|
|
|
def test_custom_threshold_via_env(monkeypatch):
|
|
monkeypatch.setenv("OCR_CONFIDENCE_THRESHOLD", "0.8")
|
|
# Need to reload the module to pick up the new env var
|
|
import importlib
|
|
import confidence
|
|
importlib.reload(confidence)
|
|
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "Freund", "confidence": 0.5},
|
|
]
|
|
assert confidence.apply_confidence_markers(words) == "Lieber [unleserlich]"
|
|
|
|
# Reset
|
|
monkeypatch.setenv("OCR_CONFIDENCE_THRESHOLD", "0.3")
|
|
importlib.reload(confidence)
|
|
|
|
|
|
def test_low_confidence_at_start():
|
|
words = [
|
|
{"text": "xkq", "confidence": 0.1},
|
|
{"text": "Freund", "confidence": 0.88},
|
|
]
|
|
assert apply_confidence_markers(words) == "[unleserlich] Freund"
|
|
|
|
|
|
def test_low_confidence_at_end():
|
|
words = [
|
|
{"text": "Lieber", "confidence": 0.95},
|
|
{"text": "xkq", "confidence": 0.1},
|
|
]
|
|
assert apply_confidence_markers(words) == "Lieber [unleserlich]"
|
|
|
|
|
|
# ─── words_from_characters ────────────────────────────────────────────────────
|
|
|
|
|
|
def test_single_word_matching_confidences():
|
|
words = words_from_characters("Hallo", [0.9, 0.8, 0.85, 0.7, 0.95])
|
|
assert len(words) == 1
|
|
assert words[0]["text"] == "Hallo"
|
|
assert abs(words[0]["confidence"] - 0.84) < 0.01
|
|
|
|
|
|
def test_multi_word_with_spaces():
|
|
prediction = "Sehr geehrter"
|
|
confidences = [0.9, 0.8, 0.7, 0.6, 0.5, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
|
|
words = words_from_characters(prediction, confidences)
|
|
assert len(words) == 2
|
|
assert words[0]["text"] == "Sehr"
|
|
assert words[1]["text"] == "geehrter"
|
|
|
|
|
|
def test_length_mismatch_falls_back_safely():
|
|
words = words_from_characters("Hallo Welt", [0.9, 0.8])
|
|
assert len(words) == 1
|
|
assert words[0]["text"] == "Hallo Welt"
|
|
assert words[0]["confidence"] == 1.0
|
|
|
|
|
|
def test_empty_prediction_returns_empty():
|
|
assert words_from_characters("", []) == []
|
|
|
|
|
|
def test_single_character_word():
|
|
words = words_from_characters("A B", [0.9, 0.5, 0.3])
|
|
assert len(words) == 2
|
|
assert words[0]["text"] == "A"
|
|
assert words[0]["confidence"] == 0.9
|
|
assert words[1]["text"] == "B"
|
|
assert words[1]["confidence"] == 0.3
|
|
|
|
|
|
def test_whitespace_only_prediction():
|
|
words = words_from_characters(" ", [0.5, 0.5, 0.5])
|
|
assert words == []
|