diff --git a/ocr-service/engines/kraken.py b/ocr-service/engines/kraken.py index a8c09f12..8deae3ef 100644 --- a/ocr-service/engines/kraken.py +++ b/ocr-service/engines/kraken.py @@ -104,7 +104,9 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str: if cw == 0 or ch == 0: return "" - # Single synthetic baseline at vertical midpoint, spanning full crop width + # Single synthetic baseline at vertical midpoint, spanning full crop width. + # Kraken's bounds check is >= (not >), so all coordinates must be < image + # dimension — use cw-1 / ch-1 to stay strictly inside. mid_y = ch // 2 synthetic_seg = Segmentation( type="baselines", @@ -114,8 +116,8 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str: lines=[ BaselineLine( id="line0", - baseline=[(0, mid_y), (cw, mid_y)], - boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)], + baseline=[(0, mid_y), (cw - 1, mid_y)], + boundary=[(0, 0), (cw - 1, 0), (cw - 1, ch - 1), (0, ch - 1)], ) ], regions={},