feat(ocr): add SSRF protection for PDF URL downloads
Validates PDF download URLs against an ALLOWED_PDF_HOSTS allowlist (default: minio,localhost,127.0.0.1) and disables redirect following to prevent redirect-based SSRF. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit was merged in pull request #229.
This commit is contained in:
@@ -239,6 +239,38 @@ async def test_ocr_stream_returns_400_when_kraken_unavailable_for_kurrent(mock_i
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
# ─── SSRF protection ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_stream_rejects_disallowed_host():
|
||||
with patch("main._models_ready", True):
|
||||
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
||||
response = await client.post("/ocr/stream", json={
|
||||
"pdfUrl": "http://evil.example.com/malicious.pdf",
|
||||
"scriptType": "TYPEWRITER",
|
||||
})
|
||||
|
||||
assert response.status_code == 400
|
||||
assert "not allowed" in response.json()["detail"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_stream_allows_minio_host(mock_images):
|
||||
with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
|
||||
patch("main._models_ready", True), \
|
||||
patch("main.surya_engine") as mock_surya:
|
||||
mock_surya.extract_page_blocks.return_value = [_make_block(0)]
|
||||
|
||||
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
||||
response = await client.post("/ocr/stream", json={
|
||||
"pdfUrl": "http://minio/test.pdf",
|
||||
"scriptType": "TYPEWRITER",
|
||||
})
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_stream_applies_confidence_markers(mock_images):
|
||||
"""Low-confidence words should be replaced with [unleserlich] in the stream output."""
|
||||
|
||||
Reference in New Issue
Block a user