feat(ocr): full OCR pipeline with polygon annotations, training, and guided mode #232

Merged
marcel merged 40 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-14 10:31:35 +02:00
Showing only changes of commit 3e34366702 - Show all commits

View File

@@ -104,7 +104,9 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
if cw == 0 or ch == 0:
return ""
# Single synthetic baseline at vertical midpoint, spanning full crop width
# Single synthetic baseline at vertical midpoint, spanning full crop width.
# Kraken's bounds check is >= (not >), so all coordinates must be < image
# dimension — use cw-1 / ch-1 to stay strictly inside.
mid_y = ch // 2
synthetic_seg = Segmentation(
type="baselines",
@@ -114,8 +116,8 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
lines=[
BaselineLine(
id="line0",
baseline=[(0, mid_y), (cw, mid_y)],
boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)],
baseline=[(0, mid_y), (cw - 1, mid_y)],
boundary=[(0, 0), (cw - 1, 0), (cw - 1, ch - 1), (0, ch - 1)],
)
],
regions={},