feat(ocr): full OCR pipeline with polygon annotations, training, and guided mode #232

Merged
marcel merged 40 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-14 10:31:35 +02:00
Showing only changes of commit 33dc4654e5 - Show all commits

View File

@@ -47,7 +47,7 @@ def extract_page_blocks(image, page_idx: int, language: str = "de") -> list[dict
pred_it = rpred.rpred(_model, image, baseline_seg)
for record in pred_it:
polygon_pts = record.cuts if hasattr(record, "cuts") else []
polygon_pts = record.boundary if hasattr(record, "boundary") and record.boundary else []
if polygon_pts:
xs = [p[0] for p in polygon_pts]
@@ -55,8 +55,8 @@ def extract_page_blocks(image, page_idx: int, language: str = "de") -> list[dict
x1, y1 = min(xs), min(ys)
x2, y2 = max(xs), max(ys)
else:
xs = [p[0] for p in record.line]
ys = [p[1] for p in record.line]
xs = [p[0] for p in record.baseline]
ys = [p[1] for p in record.baseline]
x1, y1 = min(xs), min(ys) - 5
x2, y2 = max(xs), max(ys) + 5