From fea24aee254cfdbc868fab9a94ba779da554b040 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 17 Apr 2026 17:20:31 +0200 Subject: [PATCH] refactor(ocr): make collapse_adjacent_markers a public function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop underscore prefix — the helper is part of confidence.py's effective public API since spell_check.py imports and calls it directly. Fixes reviewer concern: importing a _-prefixed name across module boundaries contradicts Python's private-by-convention signal. Co-Authored-By: Claude Sonnet 4.6 --- ocr-service/confidence.py | 4 ++-- ocr-service/spell_check.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ocr-service/confidence.py b/ocr-service/confidence.py index e4835db6..1c59a963 100644 --- a/ocr-service/confidence.py +++ b/ocr-service/confidence.py @@ -9,7 +9,7 @@ ILLEGIBLE_MARKER = "[unleserlich]" CORRECTION_MARKER = "[?]" -def _collapse_adjacent_markers(tokens: list[str]) -> list[str]: +def collapse_adjacent_markers(tokens: list[str]) -> list[str]: collapsed: list[str] = [] prev_was_marker = False for token in tokens: @@ -52,7 +52,7 @@ def apply_confidence_markers(words: list[dict], threshold: float | None = None) else: tokens.append(word["text"]) - return " ".join(_collapse_adjacent_markers(tokens)) + return " ".join(collapse_adjacent_markers(tokens)) def words_from_characters(prediction: str, confidences: list[float]) -> list[dict]: diff --git a/ocr-service/spell_check.py b/ocr-service/spell_check.py index 03692ad6..2088cd41 100644 --- a/ocr-service/spell_check.py +++ b/ocr-service/spell_check.py @@ -5,7 +5,7 @@ import os from spellchecker import SpellChecker -from confidence import CORRECTION_MARKER, ILLEGIBLE_MARKER, _collapse_adjacent_markers +from confidence import CORRECTION_MARKER, ILLEGIBLE_MARKER, collapse_adjacent_markers logger = logging.getLogger(__name__) @@ -114,4 +114,4 @@ def correct_text(text: str) -> str: else: checked.append(ILLEGIBLE_MARKER) - return " ".join(_collapse_adjacent_markers(checked)) + return " ".join(collapse_adjacent_markers(checked))