feat(ocr): increment ocr_skipped_pages_total on per-page engine failure
Bumps the counter in both /ocr/stream except blocks (standard and guided generators) so the existing skipped_pages local variable now also flows into Prometheus. Refs #652 (AC3b) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -187,3 +187,46 @@ async def test_ocr_pages_total_incremented_once_per_page_in_stream(fresh_metrics
|
||||
|
||||
value = fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get()
|
||||
assert value == 3.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_skipped_pages_total_incremented_when_engine_raises_for_a_page(fresh_metrics):
|
||||
"""When the engine raises on a page, ocr_skipped_pages_total bumps and the stream finishes."""
|
||||
mock_images = [Image.new("RGB", (100, 100)) for _ in range(2)]
|
||||
good_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
|
||||
"polygon": None, "text": "ok", "words": []}]
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
def extract_side_effect(*args, **kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
raise RuntimeError("synthetic engine failure")
|
||||
return good_blocks
|
||||
|
||||
with patch("main.kraken_engine.load_models"), \
|
||||
patch("main.load_spell_checker"), \
|
||||
patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
|
||||
patch("main.preprocess_page", side_effect=lambda img: img), \
|
||||
patch("main.surya_engine.extract_page_blocks", side_effect=extract_side_effect):
|
||||
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
||||
import main as main_module
|
||||
main_module._models_ready = True
|
||||
try:
|
||||
async with client.stream("POST", "/ocr/stream", json={
|
||||
"pdfUrl": "http://minio/doc.pdf",
|
||||
"scriptType": "TYPEWRITER",
|
||||
"language": "de",
|
||||
}) as response:
|
||||
assert response.status_code == 200
|
||||
saw_error = False
|
||||
async for line in response.aiter_lines():
|
||||
if line and '"type": "error"' in line:
|
||||
saw_error = True
|
||||
assert saw_error
|
||||
finally:
|
||||
main_module._models_ready = False
|
||||
|
||||
assert fresh_metrics.ocr_skipped_pages_total._value.get() == 1.0
|
||||
# The second page still succeeds.
|
||||
assert fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get() == 1.0
|
||||
|
||||
Reference in New Issue
Block a user