refactor(ocr): make collapse_adjacent_markers a public function

Drop underscore prefix — the helper is part of confidence.py's effective
public API since spell_check.py imports and calls it directly.

Fixes reviewer concern: importing a _-prefixed name across module boundaries
contradicts Python's private-by-convention signal.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-17 17:20:31 +02:00
parent 68b57918eb
commit fea24aee25
2 changed files with 4 additions and 4 deletions

View File

@@ -9,7 +9,7 @@ ILLEGIBLE_MARKER = "[unleserlich]"
CORRECTION_MARKER = "[?]" CORRECTION_MARKER = "[?]"
def _collapse_adjacent_markers(tokens: list[str]) -> list[str]: def collapse_adjacent_markers(tokens: list[str]) -> list[str]:
collapsed: list[str] = [] collapsed: list[str] = []
prev_was_marker = False prev_was_marker = False
for token in tokens: for token in tokens:
@@ -52,7 +52,7 @@ def apply_confidence_markers(words: list[dict], threshold: float | None = None)
else: else:
tokens.append(word["text"]) tokens.append(word["text"])
return " ".join(_collapse_adjacent_markers(tokens)) return " ".join(collapse_adjacent_markers(tokens))
def words_from_characters(prediction: str, confidences: list[float]) -> list[dict]: def words_from_characters(prediction: str, confidences: list[float]) -> list[dict]:

View File

@@ -5,7 +5,7 @@ import os
from spellchecker import SpellChecker from spellchecker import SpellChecker
from confidence import CORRECTION_MARKER, ILLEGIBLE_MARKER, _collapse_adjacent_markers from confidence import CORRECTION_MARKER, ILLEGIBLE_MARKER, collapse_adjacent_markers
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -114,4 +114,4 @@ def correct_text(text: str) -> str:
else: else:
checked.append(ILLEGIBLE_MARKER) checked.append(ILLEGIBLE_MARKER)
return " ".join(_collapse_adjacent_markers(checked)) return " ".join(collapse_adjacent_markers(checked))