Files
familienarchiv/ocr-service/test_preprocessing.py
Marcel 4cb7c975f5
Some checks failed
CI / Unit & Component Tests (pull_request) Failing after 2m27s
CI / Backend Unit Tests (pull_request) Failing after 2m37s
CI / Unit & Component Tests (push) Failing after 3m14s
CI / Backend Unit Tests (push) Has been cancelled
test(ocr): add resilience tests for tiny image and unexpected exception propagation
Add test for 1×1 image (sub-tile-size) resilience and narrow preprocess_page
fallback from except Exception to (cv2.error, ValueError, MemoryError) so
programming errors propagate instead of being silently swallowed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 15:16:17 +02:00

83 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for the image preprocessing pipeline."""
import numpy as np
import pytest
from PIL import Image
from unittest.mock import patch
def _make_yellowed_image(width=100, height=100):
"""Dark, faded yellowed page: L values in a narrow low range with spatial noise.
Very dark (R≈30, G≈20, B≈10) → L_cv ≈ 80-100 in OpenCV uint8 LAB space.
The per-pixel noise gives each CLAHE tile a non-trivial histogram to equalize,
which stretches the narrow dark range toward [0-255] and reliably increases mean L.
"""
rng = np.random.default_rng(42)
arr = np.zeros((height, width, 3), dtype=np.uint8)
arr[:, :, 0] = np.clip(30 + rng.integers(-8, 9, (height, width)), 0, 255)
arr[:, :, 1] = np.clip(20 + rng.integers(-5, 6, (height, width)), 0, 255)
arr[:, :, 2] = np.clip(10 + rng.integers(-3, 4, (height, width)), 0, 255)
return Image.fromarray(arr.astype(np.uint8), mode="RGB")
class TestPreprocessPage:
def test_output_has_same_dimensions_as_input(self):
from preprocessing import preprocess_page
img = Image.new("RGB", (150, 200))
result = preprocess_page(img)
assert result.size == img.size
def test_l_channel_mean_increases_on_yellowed_image(self):
"""CLAHE equalizes the dark narrow-range histogram toward [0-255], raising mean L."""
from preprocessing import preprocess_page
import cv2
img = _make_yellowed_image()
arr_before = np.array(img)
lab_before = cv2.cvtColor(arr_before, cv2.COLOR_RGB2LAB)
l_mean_before = float(lab_before[:, :, 0].mean())
result = preprocess_page(img)
# Output is grayscale (mode "L"); its values ARE the CLAHE-enhanced L channel
l_mean_after = float(np.array(result).mean())
assert l_mean_after > l_mean_before
def test_does_not_crash_on_sub_tile_size_image(self):
"""A 1×1 image is smaller than the CLAHE tile (8×8) in both axes.
preprocess_page must not raise — it either succeeds or falls back silently."""
from preprocessing import preprocess_page
img = Image.new("RGB", (1, 1), color=(128, 100, 80))
result = preprocess_page(img)
assert isinstance(result, Image.Image)
def test_falls_back_to_pixel_identical_original_on_cv2_error(self):
"""When cv2 raises a known error, preprocess_page returns the unmodified original image."""
from preprocessing import preprocess_page
img = Image.new("RGB", (80, 60), color=(123, 45, 67))
original_pixels = list(img.getdata())
with patch("preprocessing.cv2.cvtColor", side_effect=ValueError("bad input")):
result = preprocess_page(img)
result_pixels = list(result.getdata())
assert result_pixels == original_pixels
def test_unexpected_exception_propagates(self):
"""A RuntimeError (programming error) must propagate — not be swallowed by the cv2 fallback."""
from preprocessing import preprocess_page
img = Image.new("RGB", (80, 60))
with patch("preprocessing.cv2.cvtColor", side_effect=RuntimeError("unexpected")):
with pytest.raises(RuntimeError, match="unexpected"):
preprocess_page(img)