Files
familienarchiv/ocr-service/test_confidence.py
Marcel c74539b04b
Some checks failed
CI / Unit & Component Tests (push) Failing after 2s
CI / Backend Unit Tests (push) Failing after 2s
CI / Unit & Component Tests (pull_request) Failing after 2s
CI / Backend Unit Tests (pull_request) Failing after 1s
feat(ocr): auto-insert [unleserlich] markers for low-confidence words
New confidence.py module with two functions:
- apply_confidence_markers(): replaces words below threshold with
  [unleserlich], collapses adjacent markers into one
- words_from_characters(): reconstructs word-level confidence from
  Kraken's character-level data

Surya 0.17 provides native word-level confidence via line.words.
Kraken 7.0 provides per-character confidences via record.confidences.
Both engines now pass word+confidence data through main.py, which
applies the marker post-processing before returning the API response.

Threshold configurable via OCR_CONFIDENCE_THRESHOLD env var (default 0.3).
Frontend already renders [unleserlich] markers via transcriptionMarkers.ts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-12 19:16:17 +02:00

154 lines
4.8 KiB
Python

"""Tests for confidence-based [unleserlich] marker insertion."""
import os
import pytest
from confidence import apply_confidence_markers, words_from_characters
# ─── apply_confidence_markers ─────────────────────────────────────────────────
def test_all_words_above_threshold_passes_through():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber Freund"
def test_single_low_confidence_word_replaced():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkqz", "confidence": 0.1},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
def test_adjacent_low_confidence_words_collapsed():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkqz", "confidence": 0.1},
{"text": "abc", "confidence": 0.05},
{"text": "yyy", "confidence": 0.2},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich] Freund"
def test_mixed_high_low_each_group_gets_marker():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkqz", "confidence": 0.1},
{"text": "wie", "confidence": 0.9},
{"text": "abc", "confidence": 0.05},
{"text": "dir", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich] wie [unleserlich] dir"
def test_all_below_threshold_returns_single_marker():
words = [
{"text": "xkq", "confidence": 0.1},
{"text": "zzz", "confidence": 0.05},
]
assert apply_confidence_markers(words) == "[unleserlich]"
def test_empty_list_returns_empty_string():
assert apply_confidence_markers([]) == ""
def test_single_word_above_threshold():
words = [{"text": "Hallo", "confidence": 0.9}]
assert apply_confidence_markers(words) == "Hallo"
def test_exact_threshold_passes_through():
"""Confidence exactly at threshold should NOT be replaced (strict <)."""
words = [{"text": "Wort", "confidence": 0.3}]
assert apply_confidence_markers(words) == "Wort"
def test_just_below_threshold_replaced():
words = [{"text": "Wort", "confidence": 0.29}]
assert apply_confidence_markers(words) == "[unleserlich]"
def test_custom_threshold_via_env(monkeypatch):
monkeypatch.setenv("OCR_CONFIDENCE_THRESHOLD", "0.8")
# Need to reload the module to pick up the new env var
import importlib
import confidence
importlib.reload(confidence)
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "Freund", "confidence": 0.5},
]
assert confidence.apply_confidence_markers(words) == "Lieber [unleserlich]"
# Reset
monkeypatch.setenv("OCR_CONFIDENCE_THRESHOLD", "0.3")
importlib.reload(confidence)
def test_low_confidence_at_start():
words = [
{"text": "xkq", "confidence": 0.1},
{"text": "Freund", "confidence": 0.88},
]
assert apply_confidence_markers(words) == "[unleserlich] Freund"
def test_low_confidence_at_end():
words = [
{"text": "Lieber", "confidence": 0.95},
{"text": "xkq", "confidence": 0.1},
]
assert apply_confidence_markers(words) == "Lieber [unleserlich]"
# ─── words_from_characters ────────────────────────────────────────────────────
def test_single_word_matching_confidences():
words = words_from_characters("Hallo", [0.9, 0.8, 0.85, 0.7, 0.95])
assert len(words) == 1
assert words[0]["text"] == "Hallo"
assert abs(words[0]["confidence"] - 0.84) < 0.01
def test_multi_word_with_spaces():
prediction = "Sehr geehrter"
confidences = [0.9, 0.8, 0.7, 0.6, 0.5, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
words = words_from_characters(prediction, confidences)
assert len(words) == 2
assert words[0]["text"] == "Sehr"
assert words[1]["text"] == "geehrter"
def test_length_mismatch_falls_back_safely():
words = words_from_characters("Hallo Welt", [0.9, 0.8])
assert len(words) == 1
assert words[0]["text"] == "Hallo Welt"
assert words[0]["confidence"] == 1.0
def test_empty_prediction_returns_empty():
assert words_from_characters("", []) == []
def test_single_character_word():
words = words_from_characters("A B", [0.9, 0.5, 0.3])
assert len(words) == 2
assert words[0]["text"] == "A"
assert words[0]["confidence"] == 0.9
assert words[1]["text"] == "B"
assert words[1]["confidence"] == 0.3
def test_whitespace_only_prediction():
words = words_from_characters(" ", [0.5, 0.5, 0.5])
assert words == []