test(ocr): add resilience tests for tiny image and unexpected exception propagation
Add test for 1×1 image (sub-tile-size) resilience and narrow preprocess_page fallback from except Exception to (cv2.error, ValueError, MemoryError) so programming errors propagate instead of being silently swallowed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit was merged in pull request #255.
This commit is contained in:
@@ -16,7 +16,8 @@ CLAHE_TILE_SIZE = int(os.environ.get("OCR_CLAHE_TILE_SIZE", "8"))
|
|||||||
def preprocess_page(image: Image.Image) -> Image.Image:
|
def preprocess_page(image: Image.Image) -> Image.Image:
|
||||||
"""Apply CLAHE + grayscale + Gaussian blur to improve OCR quality on aged documents.
|
"""Apply CLAHE + grayscale + Gaussian blur to improve OCR quality on aged documents.
|
||||||
|
|
||||||
Falls back silently to the original image if any step fails.
|
Falls back silently to the original image if cv2, numpy, or memory errors occur.
|
||||||
|
Unexpected exceptions (programming errors) are allowed to propagate.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
img_array = np.array(image)
|
img_array = np.array(image)
|
||||||
@@ -40,7 +41,7 @@ def preprocess_page(image: Image.Image) -> Image.Image:
|
|||||||
del blurred
|
del blurred
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except (cv2.error, ValueError, MemoryError) as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"preprocess_page failed (falling back to original): %s: %s",
|
"preprocess_page failed (falling back to original): %s: %s",
|
||||||
type(e).__name__,
|
type(e).__name__,
|
||||||
|
|||||||
@@ -48,17 +48,35 @@ class TestPreprocessPage:
|
|||||||
|
|
||||||
assert l_mean_after > l_mean_before
|
assert l_mean_after > l_mean_before
|
||||||
|
|
||||||
|
def test_does_not_crash_on_sub_tile_size_image(self):
|
||||||
|
"""A 1×1 image is smaller than the CLAHE tile (8×8) in both axes.
|
||||||
|
preprocess_page must not raise — it either succeeds or falls back silently."""
|
||||||
|
from preprocessing import preprocess_page
|
||||||
|
|
||||||
|
img = Image.new("RGB", (1, 1), color=(128, 100, 80))
|
||||||
|
result = preprocess_page(img)
|
||||||
|
|
||||||
|
assert isinstance(result, Image.Image)
|
||||||
|
|
||||||
def test_falls_back_to_pixel_identical_original_on_cv2_error(self):
|
def test_falls_back_to_pixel_identical_original_on_cv2_error(self):
|
||||||
"""When cv2 raises, preprocess_page must return the unmodified original image."""
|
"""When cv2 raises a known error, preprocess_page returns the unmodified original image."""
|
||||||
from preprocessing import preprocess_page
|
from preprocessing import preprocess_page
|
||||||
|
|
||||||
img = Image.new("RGB", (80, 60), color=(123, 45, 67))
|
img = Image.new("RGB", (80, 60), color=(123, 45, 67))
|
||||||
original_pixels = list(img.getdata())
|
original_pixels = list(img.getdata())
|
||||||
|
|
||||||
with patch("preprocessing.cv2") as mock_cv2:
|
with patch("preprocessing.cv2.cvtColor", side_effect=ValueError("bad input")):
|
||||||
mock_cv2.cvtColor.side_effect = RuntimeError("cv2 exploded")
|
|
||||||
|
|
||||||
result = preprocess_page(img)
|
result = preprocess_page(img)
|
||||||
|
|
||||||
result_pixels = list(result.getdata())
|
result_pixels = list(result.getdata())
|
||||||
assert result_pixels == original_pixels
|
assert result_pixels == original_pixels
|
||||||
|
|
||||||
|
def test_unexpected_exception_propagates(self):
|
||||||
|
"""A RuntimeError (programming error) must propagate — not be swallowed by the cv2 fallback."""
|
||||||
|
from preprocessing import preprocess_page
|
||||||
|
|
||||||
|
img = Image.new("RGB", (80, 60))
|
||||||
|
|
||||||
|
with patch("preprocessing.cv2.cvtColor", side_effect=RuntimeError("unexpected")):
|
||||||
|
with pytest.raises(RuntimeError, match="unexpected"):
|
||||||
|
preprocess_page(img)
|
||||||
|
|||||||
Reference in New Issue
Block a user