fix(ocr): use synthetic baseline in guided OCR to avoid blla crash on small crops
blla.segment() is a full-page layout detection model that kills the worker process when called on tiny annotation crops (e.g. 597x89 px). For guided OCR the annotation region IS already the text line, so segmentation is unnecessary. Replace the blla call with a single synthetic BaselineLine that spans the full crop width — rpred then runs recognition on the whole crop. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -82,10 +82,13 @@ def extract_page_blocks(image, page_idx: int, language: str = "de") -> list[dict
|
||||
def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
|
||||
"""Crop image to a normalized region and run Kraken recognition on the crop.
|
||||
|
||||
Used for guided OCR — skips full-page layout detection and only processes
|
||||
the given bounding box. Coordinates are normalized to [0, 1].
|
||||
Used for guided OCR — skips full-page layout detection entirely.
|
||||
A single synthetic baseline spanning the full crop width is used so that
|
||||
blla.segment() (which crashes on small crops) is never called.
|
||||
Coordinates are normalized to [0, 1].
|
||||
"""
|
||||
from kraken import blla, rpred
|
||||
from kraken import rpred
|
||||
from kraken.containers import Segmentation, BaselineLine
|
||||
|
||||
if _model is None:
|
||||
raise RuntimeError("Kraken model is not loaded")
|
||||
@@ -97,8 +100,28 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
|
||||
y2 = min(ph, int((y + h) * ph))
|
||||
crop = image.crop((x1, y1, x2, y2))
|
||||
|
||||
baseline_seg = blla.segment(crop)
|
||||
pred_it = rpred.rpred(_model, crop, baseline_seg)
|
||||
cw, ch = crop.size
|
||||
if cw == 0 or ch == 0:
|
||||
return ""
|
||||
|
||||
# Single synthetic baseline at vertical midpoint, spanning full crop width
|
||||
mid_y = ch // 2
|
||||
synthetic_seg = Segmentation(
|
||||
type="baselines",
|
||||
imagename="",
|
||||
text_direction="horizontal-lr",
|
||||
script_detection=False,
|
||||
lines=[
|
||||
BaselineLine(
|
||||
id="line0",
|
||||
baseline=[(0, mid_y), (cw, mid_y)],
|
||||
boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)],
|
||||
)
|
||||
],
|
||||
regions={},
|
||||
line_orders=[],
|
||||
)
|
||||
pred_it = rpred.rpred(_model, crop, synthetic_seg)
|
||||
return " ".join(r.prediction for r in pred_it)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user