From ec85f228c1dee4796b984d9fbe8e399b7fcc7bd6 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 17 Apr 2026 17:21:37 +0200 Subject: [PATCH] refactor(ocr): document > 50 frequency threshold rationale Strict greater-than avoids non-determinism: if multiple candidates share the minimum frequency value, pyspellchecker's ranking is undefined. Co-Authored-By: Claude Sonnet 4.6 --- ocr-service/spell_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocr-service/spell_check.py b/ocr-service/spell_check.py index 2088cd41..8b0bf4d7 100644 --- a/ocr-service/spell_check.py +++ b/ocr-service/spell_check.py @@ -107,7 +107,7 @@ def correct_text(text: str) -> str: continue correction = _spell.correction(word) - if correction and _spell.word_frequency[correction] > 50: + if correction and _spell.word_frequency[correction] > 50: # strict > avoids non-determinism when candidates tie at the frequency floor if word[0].isupper() and not correction[0].isupper(): correction = correction.capitalize() checked.append(leading + correction + CORRECTION_MARKER + trailing)