2026-04-14 10:31:35 +02:00
1 changed files with 28 additions and 5 deletions
--- a/ocr-service/engines/kraken.py
+++ b/ocr-service/engines/kraken.py
@@ -82,10 +82,13 @@ def extract_page_blocks(image, page_idx: int, language: str = "de") -> list[dict
 def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
    """Crop image to a normalized region and run Kraken recognition on the crop.

-    Used for guided OCR — skips full-page layout detection and only processes
-    the given bounding box. Coordinates are normalized to [0, 1].
+    Used for guided OCR — skips full-page layout detection entirely.
+    A single synthetic baseline spanning the full crop width is used so that
+    blla.segment() (which crashes on small crops) is never called.
+    Coordinates are normalized to [0, 1].
    """
-    from kraken import blla, rpred
+    from kraken import rpred
+    from kraken.containers import Segmentation, BaselineLine

    if _model is None:
        raise RuntimeError("Kraken model is not loaded")
@@ -97,8 +100,28 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
    y2 = min(ph, int((y + h) * ph))
    crop = image.crop((x1, y1, x2, y2))

-    baseline_seg = blla.segment(crop)
-    pred_it = rpred.rpred(_model, crop, baseline_seg)
+    cw, ch = crop.size
+    if cw == 0 or ch == 0:
+        return ""
+
+    # Single synthetic baseline at vertical midpoint, spanning full crop width
+    mid_y = ch // 2
+    synthetic_seg = Segmentation(
+        type="baselines",
+        imagename="",
+        text_direction="horizontal-lr",
+        script_detection=False,
+        lines=[
+            BaselineLine(
+                id="line0",
+                baseline=[(0, mid_y), (cw, mid_y)],
+                boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)],
+            )
+        ],
+        regions={},
+        line_orders=[],
+    )
+    pred_it = rpred.rpred(_model, crop, synthetic_seg)
    return " ".join(r.prediction for r in pred_it)