feat(ocr): per-sender specialized Kurrent models with automatic active-learning retraining #263

Merged
marcel merged 32 commits from feat/issue-253-sender-models into main 2026-04-18 12:30:57 +02:00
Showing only changes of commit 07035b9fa9 - Show all commits

View File

@@ -1,9 +1,15 @@
"""Kraken OCR engine wrapper — historical HTR model support for Kurrent/Suetterlin."""
from __future__ import annotations
import collections
import logging
import os
import threading
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from PIL.Image import Image
logger = logging.getLogger(__name__)
@@ -94,7 +100,7 @@ def is_available() -> bool:
return _model is not None
def extract_page_blocks(image, page_idx: int, language: str = "de",
def extract_page_blocks(image: Image, page_idx: int, language: str = "de",
sender_model_path: str | None = None) -> list[dict]:
"""Run Kraken segmentation + recognition on a single PIL image.
@@ -148,7 +154,7 @@ def extract_page_blocks(image, page_idx: int, language: str = "de",
return blocks
def extract_region_text(image, x: float, y: float, w: float, h: float,
def extract_region_text(image: Image, x: float, y: float, w: float, h: float,
sender_model_path: str | None = None) -> str:
"""Crop image to a normalized region and run Kraken recognition on the crop.