diff --git a/ocr-service/engines/kraken.py b/ocr-service/engines/kraken.py index 4f5c30e1..a8c09f12 100644 --- a/ocr-service/engines/kraken.py +++ b/ocr-service/engines/kraken.py @@ -82,10 +82,13 @@ def extract_page_blocks(image, page_idx: int, language: str = "de") -> list[dict def extract_region_text(image, x: float, y: float, w: float, h: float) -> str: """Crop image to a normalized region and run Kraken recognition on the crop. - Used for guided OCR — skips full-page layout detection and only processes - the given bounding box. Coordinates are normalized to [0, 1]. + Used for guided OCR — skips full-page layout detection entirely. + A single synthetic baseline spanning the full crop width is used so that + blla.segment() (which crashes on small crops) is never called. + Coordinates are normalized to [0, 1]. """ - from kraken import blla, rpred + from kraken import rpred + from kraken.containers import Segmentation, BaselineLine if _model is None: raise RuntimeError("Kraken model is not loaded") @@ -97,8 +100,28 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str: y2 = min(ph, int((y + h) * ph)) crop = image.crop((x1, y1, x2, y2)) - baseline_seg = blla.segment(crop) - pred_it = rpred.rpred(_model, crop, baseline_seg) + cw, ch = crop.size + if cw == 0 or ch == 0: + return "" + + # Single synthetic baseline at vertical midpoint, spanning full crop width + mid_y = ch // 2 + synthetic_seg = Segmentation( + type="baselines", + imagename="", + text_direction="horizontal-lr", + script_detection=False, + lines=[ + BaselineLine( + id="line0", + baseline=[(0, mid_y), (cw, mid_y)], + boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)], + ) + ], + regions={}, + line_orders=[], + ) + pred_it = rpred.rpred(_model, crop, synthetic_seg) return " ".join(r.prediction for r in pred_it)