diff --git a/ocr-service/preprocessing.py b/ocr-service/preprocessing.py index c6f77b87..791d666e 100644 --- a/ocr-service/preprocessing.py +++ b/ocr-service/preprocessing.py @@ -16,7 +16,8 @@ CLAHE_TILE_SIZE = int(os.environ.get("OCR_CLAHE_TILE_SIZE", "8")) def preprocess_page(image: Image.Image) -> Image.Image: """Apply CLAHE + grayscale + Gaussian blur to improve OCR quality on aged documents. - Falls back silently to the original image if any step fails. + Falls back silently to the original image if cv2, numpy, or memory errors occur. + Unexpected exceptions (programming errors) are allowed to propagate. """ try: img_array = np.array(image) @@ -40,7 +41,7 @@ def preprocess_page(image: Image.Image) -> Image.Image: del blurred return result - except Exception as e: + except (cv2.error, ValueError, MemoryError) as e: logger.warning( "preprocess_page failed (falling back to original): %s: %s", type(e).__name__, diff --git a/ocr-service/test_preprocessing.py b/ocr-service/test_preprocessing.py index 41ec09bd..6bb0e83f 100644 --- a/ocr-service/test_preprocessing.py +++ b/ocr-service/test_preprocessing.py @@ -48,17 +48,35 @@ class TestPreprocessPage: assert l_mean_after > l_mean_before + def test_does_not_crash_on_sub_tile_size_image(self): + """A 1×1 image is smaller than the CLAHE tile (8×8) in both axes. + preprocess_page must not raise — it either succeeds or falls back silently.""" + from preprocessing import preprocess_page + + img = Image.new("RGB", (1, 1), color=(128, 100, 80)) + result = preprocess_page(img) + + assert isinstance(result, Image.Image) + def test_falls_back_to_pixel_identical_original_on_cv2_error(self): - """When cv2 raises, preprocess_page must return the unmodified original image.""" + """When cv2 raises a known error, preprocess_page returns the unmodified original image.""" from preprocessing import preprocess_page img = Image.new("RGB", (80, 60), color=(123, 45, 67)) original_pixels = list(img.getdata()) - with patch("preprocessing.cv2") as mock_cv2: - mock_cv2.cvtColor.side_effect = RuntimeError("cv2 exploded") - + with patch("preprocessing.cv2.cvtColor", side_effect=ValueError("bad input")): result = preprocess_page(img) result_pixels = list(result.getdata()) assert result_pixels == original_pixels + + def test_unexpected_exception_propagates(self): + """A RuntimeError (programming error) must propagate — not be swallowed by the cv2 fallback.""" + from preprocessing import preprocess_page + + img = Image.new("RGB", (80, 60)) + + with patch("preprocessing.cv2.cvtColor", side_effect=RuntimeError("unexpected")): + with pytest.raises(RuntimeError, match="unexpected"): + preprocess_page(img)