From 3e34366702a2e3abd2d309c0fce981a5e5d10237 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 13 Apr 2026 16:21:00 +0200 Subject: [PATCH] fix(ocr): use cw-1/ch-1 for synthetic baseline bounds to pass Kraken's >= check Kraken's segmentation bounds check rejects coordinates where any point satisfies x >= im.width or y >= im.height (strictly >=, not >). Using (cw, ch) as the boundary corner was triggering this for every crop. Changed to (cw-1, ch-1) so all coordinates are strictly inside the image. Co-Authored-By: Claude Sonnet 4.6 --- ocr-service/engines/kraken.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ocr-service/engines/kraken.py b/ocr-service/engines/kraken.py index a8c09f12..8deae3ef 100644 --- a/ocr-service/engines/kraken.py +++ b/ocr-service/engines/kraken.py @@ -104,7 +104,9 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str: if cw == 0 or ch == 0: return "" - # Single synthetic baseline at vertical midpoint, spanning full crop width + # Single synthetic baseline at vertical midpoint, spanning full crop width. + # Kraken's bounds check is >= (not >), so all coordinates must be < image + # dimension — use cw-1 / ch-1 to stay strictly inside. mid_y = ch // 2 synthetic_seg = Segmentation( type="baselines", @@ -114,8 +116,8 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str: lines=[ BaselineLine( id="line0", - baseline=[(0, mid_y), (cw, mid_y)], - boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)], + baseline=[(0, mid_y), (cw - 1, mid_y)], + boundary=[(0, 0), (cw - 1, 0), (cw - 1, ch - 1), (0, ch - 1)], ) ], regions={},