test(ocr): add resilience tests for tiny image and unexpected exception propagation
Some checks failed
CI / Unit & Component Tests (pull_request) Failing after 2m27s
CI / Backend Unit Tests (pull_request) Failing after 2m37s
CI / Unit & Component Tests (push) Failing after 3m14s
CI / Backend Unit Tests (push) Has been cancelled

Add test for 1×1 image (sub-tile-size) resilience and narrow preprocess_page
fallback from except Exception to (cv2.error, ValueError, MemoryError) so
programming errors propagate instead of being silently swallowed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit was merged in pull request #255.
This commit is contained in:
Marcel
2026-04-17 15:16:17 +02:00
parent 97c94c91f8
commit 4cb7c975f5
2 changed files with 25 additions and 6 deletions

View File

@@ -16,7 +16,8 @@ CLAHE_TILE_SIZE = int(os.environ.get("OCR_CLAHE_TILE_SIZE", "8"))
def preprocess_page(image: Image.Image) -> Image.Image:
"""Apply CLAHE + grayscale + Gaussian blur to improve OCR quality on aged documents.
Falls back silently to the original image if any step fails.
Falls back silently to the original image if cv2, numpy, or memory errors occur.
Unexpected exceptions (programming errors) are allowed to propagate.
"""
try:
img_array = np.array(image)
@@ -40,7 +41,7 @@ def preprocess_page(image: Image.Image) -> Image.Image:
del blurred
return result
except Exception as e:
except (cv2.error, ValueError, MemoryError) as e:
logger.warning(
"preprocess_page failed (falling back to original): %s: %s",
type(e).__name__,

View File

@@ -48,17 +48,35 @@ class TestPreprocessPage:
assert l_mean_after > l_mean_before
def test_does_not_crash_on_sub_tile_size_image(self):
"""A 1×1 image is smaller than the CLAHE tile (8×8) in both axes.
preprocess_page must not raise — it either succeeds or falls back silently."""
from preprocessing import preprocess_page
img = Image.new("RGB", (1, 1), color=(128, 100, 80))
result = preprocess_page(img)
assert isinstance(result, Image.Image)
def test_falls_back_to_pixel_identical_original_on_cv2_error(self):
"""When cv2 raises, preprocess_page must return the unmodified original image."""
"""When cv2 raises a known error, preprocess_page returns the unmodified original image."""
from preprocessing import preprocess_page
img = Image.new("RGB", (80, 60), color=(123, 45, 67))
original_pixels = list(img.getdata())
with patch("preprocessing.cv2") as mock_cv2:
mock_cv2.cvtColor.side_effect = RuntimeError("cv2 exploded")
with patch("preprocessing.cv2.cvtColor", side_effect=ValueError("bad input")):
result = preprocess_page(img)
result_pixels = list(result.getdata())
assert result_pixels == original_pixels
def test_unexpected_exception_propagates(self):
"""A RuntimeError (programming error) must propagate — not be swallowed by the cv2 fallback."""
from preprocessing import preprocess_page
img = Image.new("RGB", (80, 60))
with patch("preprocessing.cv2.cvtColor", side_effect=RuntimeError("unexpected")):
with pytest.raises(RuntimeError, match="unexpected"):
preprocess_page(img)