refactor(ocr): extract OCR job state machine into createOcrJob hook
Pulls the trigger/poll/check-status state out of documents/[id]/+page.svelte into a pure factory in lib/ocr/useOcrJob.svelte.ts that takes documentId, fetchImpl, and onJobFinished callback as injected dependencies. The page now delegates to ocrJob.triggerOcr / ocrJob.checkStatus / ocrJob.destroy and reads ocrJob.running / .progressMessage / .errorMessage / .skippedPages reactively. Test discipline reset: 22 unit tests cover initial state, triggerOcr 200/ 4xx-with-code/4xx-without-code/5xx/network-error paths, useExistingAnnotations flag round-trip, checkStatus PENDING/RUNNING/DONE/no-jobId/empty-id/5xx/network paths, polling progressMessage / skippedPages updates, DONE/FAILED → onJobFinished callback, polling-error swallow, and destroy mid-poll cleanup. Refs #496. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
144
frontend/src/lib/ocr/useOcrJob.svelte.ts
Normal file
144
frontend/src/lib/ocr/useOcrJob.svelte.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { m } from '$lib/paraglide/messages.js';
|
||||
import { getErrorMessage } from '$lib/shared/errors';
|
||||
import { translateOcrProgress } from '$lib/ocr/translateOcrProgress';
|
||||
|
||||
export interface OcrJobOptions {
|
||||
documentId: () => string;
|
||||
fetchImpl?: typeof fetch;
|
||||
onJobFinished?: (status: 'DONE' | 'FAILED') => void | Promise<void>;
|
||||
/** Polling interval in ms — defaults to 2000. Tests pass a small value. */
|
||||
pollIntervalMs?: number;
|
||||
/** Reset delay in ms after DONE/FAILED before clearing UI state. Defaults to 1000. */
|
||||
resetDelayMs?: number;
|
||||
}
|
||||
|
||||
export interface OcrJobController {
|
||||
readonly running: boolean;
|
||||
readonly progressMessage: string;
|
||||
readonly errorMessage: string;
|
||||
readonly skippedPages: number;
|
||||
triggerOcr(scriptType: string, useExistingAnnotations: boolean): Promise<void>;
|
||||
checkStatus(): Promise<void>;
|
||||
destroy(): void;
|
||||
}
|
||||
|
||||
const DEFAULT_POLL_INTERVAL_MS = 2000;
|
||||
const DEFAULT_RESET_DELAY_MS = 1000;
|
||||
|
||||
export function createOcrJob(options: OcrJobOptions): OcrJobController {
|
||||
const { documentId, onJobFinished } = options;
|
||||
const fetchImpl = options.fetchImpl ?? fetch;
|
||||
const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
|
||||
const resetDelayMs = options.resetDelayMs ?? DEFAULT_RESET_DELAY_MS;
|
||||
|
||||
let running = $state(false);
|
||||
let progressMessage = $state('');
|
||||
let errorMessage = $state('');
|
||||
let skippedPages = $state(0);
|
||||
|
||||
let pollTimer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
function clearPolling(): void {
|
||||
if (pollTimer) {
|
||||
clearInterval(pollTimer);
|
||||
pollTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
function startPolling(jobId: string): void {
|
||||
clearPolling();
|
||||
pollTimer = setInterval(() => {
|
||||
void pollOnce(jobId);
|
||||
}, pollIntervalMs);
|
||||
}
|
||||
|
||||
async function pollOnce(jobId: string): Promise<void> {
|
||||
try {
|
||||
const res = await fetchImpl(`/api/ocr/jobs/${jobId}`);
|
||||
if (!res.ok) return;
|
||||
const job = (await res.json()) as { status: string; progressMessage?: string };
|
||||
const progress = translateOcrProgress(job.progressMessage ?? '');
|
||||
progressMessage = progress.message;
|
||||
if (progress.skippedPages !== undefined) {
|
||||
skippedPages = progress.skippedPages;
|
||||
}
|
||||
if (job.status === 'DONE' || job.status === 'FAILED') {
|
||||
clearPolling();
|
||||
const finalStatus = job.status as 'DONE' | 'FAILED';
|
||||
setTimeout(() => {
|
||||
running = false;
|
||||
progressMessage = '';
|
||||
skippedPages = 0;
|
||||
}, resetDelayMs);
|
||||
if (finalStatus === 'FAILED') {
|
||||
errorMessage = m.ocr_status_error();
|
||||
}
|
||||
await onJobFinished?.(finalStatus);
|
||||
}
|
||||
} catch {
|
||||
// polling is best-effort
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerOcr(scriptType: string, useExistingAnnotations: boolean): Promise<void> {
|
||||
running = true;
|
||||
errorMessage = '';
|
||||
try {
|
||||
const res = await fetchImpl(`/api/documents/${documentId()}/ocr`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ scriptType, useExistingAnnotations })
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = (await res.json()) as { jobId: string };
|
||||
startPolling(data.jobId);
|
||||
} else {
|
||||
running = false;
|
||||
const body = await res.json().catch(() => null);
|
||||
const code = (body as { code?: string } | null)?.code;
|
||||
errorMessage = code ? getErrorMessage(code) : m.ocr_status_error();
|
||||
}
|
||||
} catch {
|
||||
running = false;
|
||||
errorMessage = m.ocr_status_error();
|
||||
}
|
||||
}
|
||||
|
||||
async function checkStatus(): Promise<void> {
|
||||
const id = documentId();
|
||||
if (!id) return;
|
||||
try {
|
||||
const res = await fetchImpl(`/api/documents/${id}/ocr-status`);
|
||||
if (!res.ok) return;
|
||||
const status = (await res.json()) as { status: string; jobId: string | null };
|
||||
if ((status.status === 'PENDING' || status.status === 'RUNNING') && status.jobId) {
|
||||
running = true;
|
||||
startPolling(status.jobId);
|
||||
}
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}
|
||||
|
||||
function destroy(): void {
|
||||
clearPolling();
|
||||
}
|
||||
|
||||
return {
|
||||
get running() {
|
||||
return running;
|
||||
},
|
||||
get progressMessage() {
|
||||
return progressMessage;
|
||||
},
|
||||
get errorMessage() {
|
||||
return errorMessage;
|
||||
},
|
||||
get skippedPages() {
|
||||
return skippedPages;
|
||||
},
|
||||
triggerOcr,
|
||||
checkStatus,
|
||||
destroy
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user