diff --git a/ocr-service/confidence.py b/ocr-service/confidence.py index e4835db6..1c59a963 100644 --- a/ocr-service/confidence.py +++ b/ocr-service/confidence.py @@ -9,7 +9,7 @@ ILLEGIBLE_MARKER = "[unleserlich]" CORRECTION_MARKER = "[?]" -def _collapse_adjacent_markers(tokens: list[str]) -> list[str]: +def collapse_adjacent_markers(tokens: list[str]) -> list[str]: collapsed: list[str] = [] prev_was_marker = False for token in tokens: @@ -52,7 +52,7 @@ def apply_confidence_markers(words: list[dict], threshold: float | None = None) else: tokens.append(word["text"]) - return " ".join(_collapse_adjacent_markers(tokens)) + return " ".join(collapse_adjacent_markers(tokens)) def words_from_characters(prediction: str, confidences: list[float]) -> list[dict]: diff --git a/ocr-service/spell_check.py b/ocr-service/spell_check.py index 03692ad6..2088cd41 100644 --- a/ocr-service/spell_check.py +++ b/ocr-service/spell_check.py @@ -5,7 +5,7 @@ import os from spellchecker import SpellChecker -from confidence import CORRECTION_MARKER, ILLEGIBLE_MARKER, _collapse_adjacent_markers +from confidence import CORRECTION_MARKER, ILLEGIBLE_MARKER, collapse_adjacent_markers logger = logging.getLogger(__name__) @@ -114,4 +114,4 @@ def correct_text(text: str) -> str: else: checked.append(ILLEGIBLE_MARKER) - return " ".join(_collapse_adjacent_markers(checked)) + return " ".join(collapse_adjacent_markers(checked))