fix(ocr): unblock event loop during OCR and show errors in UI
OCR engines are CPU-bound and were blocking Uvicorn's single async event loop, making /health unresponsive during processing. This caused new OCR requests to fail silently (health check failure → no DB record → UI shows NONE). Wrap engine calls in asyncio.to_thread() to keep the event loop free. Also surface OCR trigger errors in the frontend instead of silently resetting the spinner. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import TranscriptionEditView from '$lib/components/TranscriptionEditView.svelte'
|
|||||||
import TranscriptionReadView from '$lib/components/TranscriptionReadView.svelte';
|
import TranscriptionReadView from '$lib/components/TranscriptionReadView.svelte';
|
||||||
import TranscriptionPanelHeader from '$lib/components/TranscriptionPanelHeader.svelte';
|
import TranscriptionPanelHeader from '$lib/components/TranscriptionPanelHeader.svelte';
|
||||||
import type { TranscriptionBlockData } from '$lib/types';
|
import type { TranscriptionBlockData } from '$lib/types';
|
||||||
|
import { getErrorMessage } from '$lib/errors';
|
||||||
|
|
||||||
let { data } = $props();
|
let { data } = $props();
|
||||||
|
|
||||||
@@ -129,6 +130,7 @@ async function reviewToggle(blockId: string) {
|
|||||||
|
|
||||||
let ocrRunning = $state(false);
|
let ocrRunning = $state(false);
|
||||||
let ocrProgressMessage = $state('');
|
let ocrProgressMessage = $state('');
|
||||||
|
let ocrErrorMessage = $state('');
|
||||||
let ocrPollTimer = $state<ReturnType<typeof setInterval> | null>(null);
|
let ocrPollTimer = $state<ReturnType<typeof setInterval> | null>(null);
|
||||||
|
|
||||||
function translateOcrProgress(code: string): string {
|
function translateOcrProgress(code: string): string {
|
||||||
@@ -154,6 +156,7 @@ function translateOcrProgress(code: string): string {
|
|||||||
|
|
||||||
async function triggerOcr(scriptType: string) {
|
async function triggerOcr(scriptType: string) {
|
||||||
ocrRunning = true;
|
ocrRunning = true;
|
||||||
|
ocrErrorMessage = '';
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
|
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@@ -165,10 +168,14 @@ async function triggerOcr(scriptType: string) {
|
|||||||
pollOcrJob(data.jobId);
|
pollOcrJob(data.jobId);
|
||||||
} else {
|
} else {
|
||||||
ocrRunning = false;
|
ocrRunning = false;
|
||||||
|
const body = await res.json().catch(() => null);
|
||||||
|
const code = (body as { code?: string } | null)?.code;
|
||||||
|
ocrErrorMessage = code ? getErrorMessage(code) : m.ocr_status_error();
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Failed to trigger OCR:', e);
|
console.error('Failed to trigger OCR:', e);
|
||||||
ocrRunning = false;
|
ocrRunning = false;
|
||||||
|
ocrErrorMessage = m.ocr_status_error();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,6 +192,9 @@ function pollOcrJob(jobId: string) {
|
|||||||
ocrPollTimer = null;
|
ocrPollTimer = null;
|
||||||
ocrRunning = false;
|
ocrRunning = false;
|
||||||
ocrProgressMessage = '';
|
ocrProgressMessage = '';
|
||||||
|
if (job.status === 'FAILED') {
|
||||||
|
ocrErrorMessage = m.ocr_status_error();
|
||||||
|
}
|
||||||
await loadTranscriptionBlocks();
|
await loadTranscriptionBlocks();
|
||||||
annotationReloadKey++;
|
annotationReloadKey++;
|
||||||
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
||||||
@@ -399,6 +409,11 @@ onMount(() => {
|
|||||||
onClose={() => (transcribeMode = false)}
|
onClose={() => (transcribeMode = false)}
|
||||||
/>
|
/>
|
||||||
<div class="flex-1 overflow-y-auto">
|
<div class="flex-1 overflow-y-auto">
|
||||||
|
{#if ocrErrorMessage}
|
||||||
|
<div class="mx-4 mt-4 rounded-sm border border-red-200 bg-red-50 px-4 py-3">
|
||||||
|
<p class="text-sm text-red-700">{ocrErrorMessage}</p>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
{#if ocrRunning}
|
{#if ocrRunning}
|
||||||
<div class="flex flex-1 flex-col items-center justify-center px-6 py-12 text-center">
|
<div class="flex flex-1 flex-col items-center justify-center px-6 py-12 text-center">
|
||||||
<svg
|
<svg
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""OCR microservice — FastAPI app with Surya and Kraken engine support."""
|
"""OCR microservice — FastAPI app with Surya and Kraken engine support."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
@@ -52,6 +53,7 @@ async def run_ocr(request: OcrRequest):
|
|||||||
|
|
||||||
Downloads the PDF from the provided URL, converts pages to images,
|
Downloads the PDF from the provided URL, converts pages to images,
|
||||||
and runs the appropriate OCR engine based on scriptType.
|
and runs the appropriate OCR engine based on scriptType.
|
||||||
|
OCR engines run in a thread pool so the event loop stays free for /health.
|
||||||
"""
|
"""
|
||||||
if not _models_ready:
|
if not _models_ready:
|
||||||
raise HTTPException(status_code=503, detail="Models not loaded yet")
|
raise HTTPException(status_code=503, detail="Models not loaded yet")
|
||||||
@@ -66,10 +68,10 @@ async def run_ocr(request: OcrRequest):
|
|||||||
status_code=400,
|
status_code=400,
|
||||||
detail="Kraken model not available — cannot process Kurrent script",
|
detail="Kraken model not available — cannot process Kurrent script",
|
||||||
)
|
)
|
||||||
blocks = kraken_engine.extract_blocks(images, request.language)
|
blocks = await asyncio.to_thread(kraken_engine.extract_blocks, images, request.language)
|
||||||
else:
|
else:
|
||||||
# TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
|
# TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
|
||||||
blocks = surya_engine.extract_blocks(images, request.language)
|
blocks = await asyncio.to_thread(surya_engine.extract_blocks, images, request.language)
|
||||||
|
|
||||||
threshold = get_threshold(script_type)
|
threshold = get_threshold(script_type)
|
||||||
for block in blocks:
|
for block in blocks:
|
||||||
|
|||||||
Reference in New Issue
Block a user