refactor(ocr): extract OCR job state machine into createOcrJob hook

Pulls the trigger/poll/check-status state out of documents/[id]/+page.svelte
into a pure factory in lib/ocr/useOcrJob.svelte.ts that takes documentId,
fetchImpl, and onJobFinished callback as injected dependencies.

The page now delegates to ocrJob.triggerOcr / ocrJob.checkStatus /
ocrJob.destroy and reads ocrJob.running / .progressMessage / .errorMessage /
.skippedPages reactively.

Test discipline reset: 22 unit tests cover initial state, triggerOcr 200/
4xx-with-code/4xx-without-code/5xx/network-error paths, useExistingAnnotations
flag round-trip, checkStatus PENDING/RUNNING/DONE/no-jobId/empty-id/5xx/network
paths, polling progressMessage / skippedPages updates, DONE/FAILED → onJobFinished
callback, polling-error swallow, and destroy mid-poll cleanup.

Refs #496.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-10 10:25:26 +02:00
committed by marcel
parent dd54ba9e74
commit 878bb3843b
3 changed files with 607 additions and 85 deletions

View File

@@ -9,8 +9,7 @@ import TranscriptionEditView from '$lib/document/transcription/TranscriptionEdit
import TranscriptionReadView from '$lib/document/transcription/TranscriptionReadView.svelte';
import TranscriptionPanelHeader from '$lib/document/transcription/TranscriptionPanelHeader.svelte';
import type { TranscriptionBlockData } from '$lib/shared/types';
import { getErrorMessage } from '$lib/shared/errors';
import { translateOcrProgress } from '$lib/ocr/translateOcrProgress';
import { createOcrJob } from '$lib/ocr/useOcrJob.svelte';
import { createFileLoader } from '$lib/document/viewer/useFileLoader.svelte';
import { scrollToCommentFromQuery } from '$lib/shared/utils/deepLinkScroll';
import { getConfirmService } from '$lib/shared/services/confirm.svelte.js';
@@ -173,67 +172,17 @@ async function toggleTrainingLabel(label: string, enrolled: boolean) {
if (!res.ok) throw new Error('Failed to update training label');
}
let ocrRunning = $state(false);
let ocrProgressMessage = $state('');
let ocrErrorMessage = $state('');
let ocrPollTimer = $state<ReturnType<typeof setInterval> | null>(null);
let ocrSkippedPages = $state(0);
const ocrJob = createOcrJob({
documentId: () => doc?.id ?? '',
onJobFinished: async () => {
await loadTranscriptionBlocks();
annotationReloadKey++;
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
}
});
async function triggerOcr(scriptType: string, useExistingAnnotations: boolean = false) {
ocrRunning = true;
ocrErrorMessage = '';
try {
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ scriptType, useExistingAnnotations })
});
if (res.ok) {
const data = await res.json();
pollOcrJob(data.jobId);
} else {
ocrRunning = false;
const body = await res.json().catch(() => null);
const code = (body as { code?: string } | null)?.code;
ocrErrorMessage = code ? getErrorMessage(code) : m.ocr_status_error();
}
} catch (e) {
console.error('Failed to trigger OCR:', e);
ocrRunning = false;
ocrErrorMessage = m.ocr_status_error();
}
}
function pollOcrJob(jobId: string) {
if (ocrPollTimer) clearInterval(ocrPollTimer);
ocrPollTimer = setInterval(async () => {
try {
const res = await fetch(`/api/ocr/jobs/${jobId}`);
if (!res.ok) return;
const job = await res.json();
const rawCode = job.progressMessage ?? '';
const progress = translateOcrProgress(rawCode);
ocrProgressMessage = progress.message;
if (progress.skippedPages !== undefined) ocrSkippedPages = progress.skippedPages;
if (job.status === 'DONE' || job.status === 'FAILED') {
if (ocrPollTimer) clearInterval(ocrPollTimer);
ocrPollTimer = null;
setTimeout(() => {
ocrRunning = false;
ocrProgressMessage = '';
ocrSkippedPages = 0;
}, 1000);
if (job.status === 'FAILED') {
ocrErrorMessage = m.ocr_status_error();
}
await loadTranscriptionBlocks();
annotationReloadKey++;
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
}
} catch {
// polling is best-effort
}
}, 2000);
await ocrJob.triggerOcr(scriptType, useExistingAnnotations);
}
async function createBlockFromDraw(rect: {
@@ -316,21 +265,6 @@ function handleParagraphClick(annotationId: string) {
);
}
async function checkOcrStatus() {
if (!doc?.id) return;
try {
const res = await fetch(`/api/documents/${doc.id}/ocr-status`);
if (!res.ok) return;
const status = await res.json();
if ((status.status === 'PENDING' || status.status === 'RUNNING') && status.jobId) {
ocrRunning = true;
pollOcrJob(status.jobId);
}
} catch {
// best-effort
}
}
// Load blocks and check OCR status when transcribe mode is entered
$effect(() => {
if (transcribeMode) {
@@ -341,7 +275,7 @@ $effect(() => {
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
}
});
checkOcrStatus();
ocrJob.checkStatus();
}
});
@@ -406,7 +340,7 @@ onMount(() => {
document.addEventListener('keydown', onKeyDown);
return () => {
document.removeEventListener('keydown', onKeyDown);
if (ocrPollTimer) clearInterval(ocrPollTimer);
ocrJob.destroy();
};
});
</script>
@@ -441,7 +375,7 @@ onMount(() => {
fileUrl={fileLoader.fileUrl}
isLoading={fileLoader.isLoading}
error={fileLoader.fileError}
transcribeMode={transcribeMode && !ocrRunning}
transcribeMode={transcribeMode && !ocrJob.running}
blockNumbers={blockNumbers}
annotationReloadKey={annotationReloadKey}
annotationsDimmed={transcribeMode && panelMode === 'read'}
@@ -487,12 +421,12 @@ onMount(() => {
onClose={() => (transcribeMode = false)}
/>
<div class="flex-1 overflow-y-auto">
{#if ocrErrorMessage}
{#if ocrJob.errorMessage}
<div class="mx-4 mt-4 rounded-sm border border-red-200 bg-red-50 px-4 py-3">
<p class="text-sm text-red-700">{ocrErrorMessage}</p>
<p class="text-sm text-red-700">{ocrJob.errorMessage}</p>
</div>
{/if}
{#if ocrRunning}
{#if ocrJob.running}
<div class="flex flex-1 flex-col items-center justify-center px-6 py-12 text-center">
<svg
class="mb-4 h-8 w-8 animate-spin text-brand-mint"
@@ -517,11 +451,11 @@ onMount(() => {
{m.ocr_progress_heading()}
</p>
<p class="mt-2 text-sm text-ink-2">
{ocrProgressMessage}
{ocrJob.progressMessage}
</p>
{#if ocrSkippedPages > 0}
{#if ocrJob.skippedPages > 0}
<p class="mt-1 text-xs text-amber-600">
{ocrSkippedPages} Seiten übersprungen
{ocrJob.skippedPages} Seiten übersprungen
</p>
{/if}
</div>