fix(ocr): use cw-1/ch-1 for synthetic baseline bounds to pass Kraken's >= check
Kraken's segmentation bounds check rejects coordinates where any point satisfies x >= im.width or y >= im.height (strictly >=, not >). Using (cw, ch) as the boundary corner was triggering this for every crop. Changed to (cw-1, ch-1) so all coordinates are strictly inside the image. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -104,7 +104,9 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
|
|||||||
if cw == 0 or ch == 0:
|
if cw == 0 or ch == 0:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Single synthetic baseline at vertical midpoint, spanning full crop width
|
# Single synthetic baseline at vertical midpoint, spanning full crop width.
|
||||||
|
# Kraken's bounds check is >= (not >), so all coordinates must be < image
|
||||||
|
# dimension — use cw-1 / ch-1 to stay strictly inside.
|
||||||
mid_y = ch // 2
|
mid_y = ch // 2
|
||||||
synthetic_seg = Segmentation(
|
synthetic_seg = Segmentation(
|
||||||
type="baselines",
|
type="baselines",
|
||||||
@@ -114,8 +116,8 @@ def extract_region_text(image, x: float, y: float, w: float, h: float) -> str:
|
|||||||
lines=[
|
lines=[
|
||||||
BaselineLine(
|
BaselineLine(
|
||||||
id="line0",
|
id="line0",
|
||||||
baseline=[(0, mid_y), (cw, mid_y)],
|
baseline=[(0, mid_y), (cw - 1, mid_y)],
|
||||||
boundary=[(0, 0), (cw, 0), (cw, ch), (0, ch)],
|
boundary=[(0, 0), (cw - 1, 0), (cw - 1, ch - 1), (0, ch - 1)],
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
regions={},
|
regions={},
|
||||||
|
|||||||
Reference in New Issue
Block a user