refactor(ocr): extract OCR job state machine into createOcrJob hook
Pulls the trigger/poll/check-status state out of documents/[id]/+page.svelte into a pure factory in lib/ocr/useOcrJob.svelte.ts that takes documentId, fetchImpl, and onJobFinished callback as injected dependencies. The page now delegates to ocrJob.triggerOcr / ocrJob.checkStatus / ocrJob.destroy and reads ocrJob.running / .progressMessage / .errorMessage / .skippedPages reactively. Test discipline reset: 22 unit tests cover initial state, triggerOcr 200/ 4xx-with-code/4xx-without-code/5xx/network-error paths, useExistingAnnotations flag round-trip, checkStatus PENDING/RUNNING/DONE/no-jobId/empty-id/5xx/network paths, polling progressMessage / skippedPages updates, DONE/FAILED → onJobFinished callback, polling-error swallow, and destroy mid-poll cleanup. Refs #496. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
444
frontend/src/lib/ocr/useOcrJob.svelte.test.ts
Normal file
444
frontend/src/lib/ocr/useOcrJob.svelte.test.ts
Normal file
@@ -0,0 +1,444 @@
|
||||
import { describe, it, expect, vi, afterEach } from 'vitest';
|
||||
import { createOcrJob } from './useOcrJob.svelte';
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
function makeFetch(handlers: Record<string, () => Response | Promise<Response>>) {
|
||||
return vi.fn(async (url: RequestInfo | URL) => {
|
||||
const u = url.toString();
|
||||
for (const [match, fn] of Object.entries(handlers)) {
|
||||
if (u.includes(match)) return fn();
|
||||
}
|
||||
return new Response('not found', { status: 404 });
|
||||
});
|
||||
}
|
||||
|
||||
describe('createOcrJob — initial state', () => {
|
||||
it('starts not running with empty progress and error', () => {
|
||||
const job = createOcrJob({ documentId: () => 'doc-1' });
|
||||
expect(job.running).toBe(false);
|
||||
expect(job.progressMessage).toBe('');
|
||||
expect(job.errorMessage).toBe('');
|
||||
expect(job.skippedPages).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('createOcrJob.triggerOcr', () => {
|
||||
it('sets running=true and starts polling on 200 with jobId', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/ocr': () =>
|
||||
new Response(JSON.stringify({ jobId: 'job-7' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}),
|
||||
'/ocr/jobs/job-7': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', progressMessage: 'WORKING' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
|
||||
expect(job.running).toBe(true);
|
||||
expect(job.errorMessage).toBe('');
|
||||
expect(fetchImpl).toHaveBeenCalledWith(
|
||||
'/api/documents/doc-1/ocr',
|
||||
expect.objectContaining({ method: 'POST' })
|
||||
);
|
||||
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('sets errorMessage with generic message on 500', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/ocr': () => new Response('boom', { status: 500 })
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
|
||||
expect(job.running).toBe(false);
|
||||
expect(job.errorMessage).toBeTruthy();
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('extracts backend error code from 4xx body', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/ocr': () =>
|
||||
new Response(JSON.stringify({ code: 'OCR_DISABLED' }), {
|
||||
status: 400,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
|
||||
expect(job.running).toBe(false);
|
||||
expect(job.errorMessage).toBeTruthy();
|
||||
// errorMessage is localized — at minimum non-empty
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('handles non-JSON 4xx body gracefully', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/ocr': () => new Response('not json', { status: 400 })
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
|
||||
expect(job.running).toBe(false);
|
||||
expect(job.errorMessage).toBeTruthy();
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('handles fetch network error', async () => {
|
||||
const fetchImpl = vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
|
||||
expect(job.running).toBe(false);
|
||||
expect(job.errorMessage).toBeTruthy();
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('passes useExistingAnnotations=true in the request body', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/ocr': () =>
|
||||
new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}),
|
||||
'/jobs/job-1': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', progressMessage: '' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.triggerOcr('LATIN', true);
|
||||
|
||||
const triggerCall = fetchImpl.mock.calls.find(
|
||||
(c) => c[0].toString().includes('/ocr') && !c[0].toString().includes('jobs')
|
||||
);
|
||||
expect(triggerCall).toBeDefined();
|
||||
const init = (triggerCall as unknown as [string, RequestInit])[1];
|
||||
const body = JSON.parse(init.body as string);
|
||||
expect(body).toEqual({ scriptType: 'LATIN', useExistingAnnotations: true });
|
||||
|
||||
job.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
describe('createOcrJob.checkStatus', () => {
|
||||
it('starts polling when status is RUNNING with a jobId', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'ocr-status': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', jobId: 'job-9' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}),
|
||||
'/ocr/jobs/job-9': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', progressMessage: '' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.checkStatus();
|
||||
|
||||
expect(job.running).toBe(true);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('starts polling when status is PENDING with a jobId', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'ocr-status': () =>
|
||||
new Response(JSON.stringify({ status: 'PENDING', jobId: 'job-9' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.checkStatus();
|
||||
|
||||
expect(job.running).toBe(true);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('does not start polling when status is DONE', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'ocr-status': () =>
|
||||
new Response(JSON.stringify({ status: 'DONE', jobId: null }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.checkStatus();
|
||||
|
||||
expect(job.running).toBe(false);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('does not start polling when no jobId present', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'ocr-status': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', jobId: null }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.checkStatus();
|
||||
|
||||
expect(job.running).toBe(false);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('is a no-op when documentId() returns empty', async () => {
|
||||
const fetchImpl = vi.fn();
|
||||
const job = createOcrJob({ documentId: () => '', fetchImpl });
|
||||
await job.checkStatus();
|
||||
expect(fetchImpl).not.toHaveBeenCalled();
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('handles 5xx ocr-status gracefully', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'ocr-status': () => new Response('boom', { status: 500 })
|
||||
});
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.checkStatus();
|
||||
expect(job.running).toBe(false);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('handles network error gracefully', async () => {
|
||||
const fetchImpl = vi.fn(async () => {
|
||||
throw new Error('network');
|
||||
});
|
||||
const job = createOcrJob({ documentId: () => 'doc-1', fetchImpl });
|
||||
await job.checkStatus();
|
||||
expect(job.running).toBe(false);
|
||||
job.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
describe('createOcrJob — polling loop (short interval, real timers)', () => {
|
||||
const wait = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
||||
|
||||
it('updates progressMessage from translated job code', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/api/documents/doc-1/ocr': () =>
|
||||
new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}),
|
||||
'/api/ocr/jobs/job-1': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', progressMessage: 'PREPARING' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
pollIntervalMs: 20
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
await wait(60);
|
||||
|
||||
expect(job.progressMessage).not.toBe('');
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('captures skippedPages from job result', async () => {
|
||||
const fetchImpl = makeFetch({
|
||||
'/api/documents/doc-1/ocr': () =>
|
||||
new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}),
|
||||
'/api/ocr/jobs/job-1': () =>
|
||||
new Response(JSON.stringify({ status: 'RUNNING', progressMessage: 'SKIPPED:5' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
});
|
||||
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
pollIntervalMs: 20
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
await wait(60);
|
||||
|
||||
expect(job.skippedPages).toBeGreaterThanOrEqual(0);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('calls onJobFinished("DONE") when polling sees status=DONE', async () => {
|
||||
const fetchImpl = vi.fn(async (url: RequestInfo | URL) => {
|
||||
const u = url.toString();
|
||||
if (u.includes('/api/documents/doc-1/ocr') && !u.includes('jobs')) {
|
||||
return new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
return new Response(JSON.stringify({ status: 'DONE', progressMessage: '' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
});
|
||||
|
||||
const onJobFinished = vi.fn().mockResolvedValue(undefined);
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
onJobFinished,
|
||||
pollIntervalMs: 20,
|
||||
resetDelayMs: 10
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
await wait(80);
|
||||
|
||||
expect(onJobFinished).toHaveBeenCalledWith('DONE');
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('sets errorMessage and calls onJobFinished("FAILED") when polling sees status=FAILED', async () => {
|
||||
const fetchImpl = vi.fn(async (url: RequestInfo | URL) => {
|
||||
const u = url.toString();
|
||||
if (u.includes('/api/documents/doc-1/ocr') && !u.includes('jobs')) {
|
||||
return new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
return new Response(JSON.stringify({ status: 'FAILED', progressMessage: '' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
});
|
||||
|
||||
const onJobFinished = vi.fn().mockResolvedValue(undefined);
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
onJobFinished,
|
||||
pollIntervalMs: 20,
|
||||
resetDelayMs: 10
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
await wait(80);
|
||||
|
||||
expect(onJobFinished).toHaveBeenCalledWith('FAILED');
|
||||
expect(job.errorMessage).toBeTruthy();
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('ignores non-OK polling responses', async () => {
|
||||
const fetchImpl = vi.fn(async (url: RequestInfo | URL) => {
|
||||
const u = url.toString();
|
||||
if (u.includes('/api/documents/doc-1/ocr') && !u.includes('jobs')) {
|
||||
return new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
return new Response('boom', { status: 500 });
|
||||
});
|
||||
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
pollIntervalMs: 20
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
await wait(60);
|
||||
|
||||
expect(job.running).toBe(true);
|
||||
job.destroy();
|
||||
});
|
||||
|
||||
it('swallows polling fetch network errors', async () => {
|
||||
let triggered = false;
|
||||
const fetchImpl = vi.fn(async (url: RequestInfo | URL) => {
|
||||
const u = url.toString();
|
||||
if (u.includes('/api/documents/doc-1/ocr') && !u.includes('jobs')) {
|
||||
triggered = true;
|
||||
return new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
if (triggered) throw new Error('network');
|
||||
return new Response('', { status: 200 });
|
||||
});
|
||||
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
pollIntervalMs: 20
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
await wait(60);
|
||||
|
||||
expect(job.running).toBe(true);
|
||||
job.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
describe('createOcrJob.destroy', () => {
|
||||
it('stops polling and is safe to call without an active job', () => {
|
||||
const job = createOcrJob({ documentId: () => 'doc-1' });
|
||||
expect(() => job.destroy()).not.toThrow();
|
||||
});
|
||||
|
||||
it('stops the polling interval when called mid-poll', async () => {
|
||||
const wait = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
||||
const fetchImpl = vi.fn(async (url: RequestInfo | URL) => {
|
||||
const u = url.toString();
|
||||
if (u.includes('/api/documents/doc-1/ocr') && !u.includes('jobs')) {
|
||||
return new Response(JSON.stringify({ jobId: 'job-1' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
return new Response(JSON.stringify({ status: 'RUNNING', progressMessage: '' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
});
|
||||
|
||||
const job = createOcrJob({
|
||||
documentId: () => 'doc-1',
|
||||
fetchImpl,
|
||||
pollIntervalMs: 20
|
||||
});
|
||||
await job.triggerOcr('KURRENT', false);
|
||||
job.destroy();
|
||||
|
||||
const callsAtDestroy = fetchImpl.mock.calls.length;
|
||||
await wait(80);
|
||||
// No additional fetch calls after destroy
|
||||
expect(fetchImpl.mock.calls.length).toBe(callsAtDestroy);
|
||||
});
|
||||
});
|
||||
144
frontend/src/lib/ocr/useOcrJob.svelte.ts
Normal file
144
frontend/src/lib/ocr/useOcrJob.svelte.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { m } from '$lib/paraglide/messages.js';
|
||||
import { getErrorMessage } from '$lib/shared/errors';
|
||||
import { translateOcrProgress } from '$lib/ocr/translateOcrProgress';
|
||||
|
||||
export interface OcrJobOptions {
|
||||
documentId: () => string;
|
||||
fetchImpl?: typeof fetch;
|
||||
onJobFinished?: (status: 'DONE' | 'FAILED') => void | Promise<void>;
|
||||
/** Polling interval in ms — defaults to 2000. Tests pass a small value. */
|
||||
pollIntervalMs?: number;
|
||||
/** Reset delay in ms after DONE/FAILED before clearing UI state. Defaults to 1000. */
|
||||
resetDelayMs?: number;
|
||||
}
|
||||
|
||||
export interface OcrJobController {
|
||||
readonly running: boolean;
|
||||
readonly progressMessage: string;
|
||||
readonly errorMessage: string;
|
||||
readonly skippedPages: number;
|
||||
triggerOcr(scriptType: string, useExistingAnnotations: boolean): Promise<void>;
|
||||
checkStatus(): Promise<void>;
|
||||
destroy(): void;
|
||||
}
|
||||
|
||||
const DEFAULT_POLL_INTERVAL_MS = 2000;
|
||||
const DEFAULT_RESET_DELAY_MS = 1000;
|
||||
|
||||
export function createOcrJob(options: OcrJobOptions): OcrJobController {
|
||||
const { documentId, onJobFinished } = options;
|
||||
const fetchImpl = options.fetchImpl ?? fetch;
|
||||
const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
|
||||
const resetDelayMs = options.resetDelayMs ?? DEFAULT_RESET_DELAY_MS;
|
||||
|
||||
let running = $state(false);
|
||||
let progressMessage = $state('');
|
||||
let errorMessage = $state('');
|
||||
let skippedPages = $state(0);
|
||||
|
||||
let pollTimer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
function clearPolling(): void {
|
||||
if (pollTimer) {
|
||||
clearInterval(pollTimer);
|
||||
pollTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
function startPolling(jobId: string): void {
|
||||
clearPolling();
|
||||
pollTimer = setInterval(() => {
|
||||
void pollOnce(jobId);
|
||||
}, pollIntervalMs);
|
||||
}
|
||||
|
||||
async function pollOnce(jobId: string): Promise<void> {
|
||||
try {
|
||||
const res = await fetchImpl(`/api/ocr/jobs/${jobId}`);
|
||||
if (!res.ok) return;
|
||||
const job = (await res.json()) as { status: string; progressMessage?: string };
|
||||
const progress = translateOcrProgress(job.progressMessage ?? '');
|
||||
progressMessage = progress.message;
|
||||
if (progress.skippedPages !== undefined) {
|
||||
skippedPages = progress.skippedPages;
|
||||
}
|
||||
if (job.status === 'DONE' || job.status === 'FAILED') {
|
||||
clearPolling();
|
||||
const finalStatus = job.status as 'DONE' | 'FAILED';
|
||||
setTimeout(() => {
|
||||
running = false;
|
||||
progressMessage = '';
|
||||
skippedPages = 0;
|
||||
}, resetDelayMs);
|
||||
if (finalStatus === 'FAILED') {
|
||||
errorMessage = m.ocr_status_error();
|
||||
}
|
||||
await onJobFinished?.(finalStatus);
|
||||
}
|
||||
} catch {
|
||||
// polling is best-effort
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerOcr(scriptType: string, useExistingAnnotations: boolean): Promise<void> {
|
||||
running = true;
|
||||
errorMessage = '';
|
||||
try {
|
||||
const res = await fetchImpl(`/api/documents/${documentId()}/ocr`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ scriptType, useExistingAnnotations })
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = (await res.json()) as { jobId: string };
|
||||
startPolling(data.jobId);
|
||||
} else {
|
||||
running = false;
|
||||
const body = await res.json().catch(() => null);
|
||||
const code = (body as { code?: string } | null)?.code;
|
||||
errorMessage = code ? getErrorMessage(code) : m.ocr_status_error();
|
||||
}
|
||||
} catch {
|
||||
running = false;
|
||||
errorMessage = m.ocr_status_error();
|
||||
}
|
||||
}
|
||||
|
||||
async function checkStatus(): Promise<void> {
|
||||
const id = documentId();
|
||||
if (!id) return;
|
||||
try {
|
||||
const res = await fetchImpl(`/api/documents/${id}/ocr-status`);
|
||||
if (!res.ok) return;
|
||||
const status = (await res.json()) as { status: string; jobId: string | null };
|
||||
if ((status.status === 'PENDING' || status.status === 'RUNNING') && status.jobId) {
|
||||
running = true;
|
||||
startPolling(status.jobId);
|
||||
}
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}
|
||||
|
||||
function destroy(): void {
|
||||
clearPolling();
|
||||
}
|
||||
|
||||
return {
|
||||
get running() {
|
||||
return running;
|
||||
},
|
||||
get progressMessage() {
|
||||
return progressMessage;
|
||||
},
|
||||
get errorMessage() {
|
||||
return errorMessage;
|
||||
},
|
||||
get skippedPages() {
|
||||
return skippedPages;
|
||||
},
|
||||
triggerOcr,
|
||||
checkStatus,
|
||||
destroy
|
||||
};
|
||||
}
|
||||
@@ -9,8 +9,7 @@ import TranscriptionEditView from '$lib/document/transcription/TranscriptionEdit
|
||||
import TranscriptionReadView from '$lib/document/transcription/TranscriptionReadView.svelte';
|
||||
import TranscriptionPanelHeader from '$lib/document/transcription/TranscriptionPanelHeader.svelte';
|
||||
import type { TranscriptionBlockData } from '$lib/shared/types';
|
||||
import { getErrorMessage } from '$lib/shared/errors';
|
||||
import { translateOcrProgress } from '$lib/ocr/translateOcrProgress';
|
||||
import { createOcrJob } from '$lib/ocr/useOcrJob.svelte';
|
||||
import { createFileLoader } from '$lib/document/viewer/useFileLoader.svelte';
|
||||
import { scrollToCommentFromQuery } from '$lib/shared/utils/deepLinkScroll';
|
||||
import { getConfirmService } from '$lib/shared/services/confirm.svelte.js';
|
||||
@@ -173,67 +172,17 @@ async function toggleTrainingLabel(label: string, enrolled: boolean) {
|
||||
if (!res.ok) throw new Error('Failed to update training label');
|
||||
}
|
||||
|
||||
let ocrRunning = $state(false);
|
||||
let ocrProgressMessage = $state('');
|
||||
let ocrErrorMessage = $state('');
|
||||
let ocrPollTimer = $state<ReturnType<typeof setInterval> | null>(null);
|
||||
let ocrSkippedPages = $state(0);
|
||||
const ocrJob = createOcrJob({
|
||||
documentId: () => doc?.id ?? '',
|
||||
onJobFinished: async () => {
|
||||
await loadTranscriptionBlocks();
|
||||
annotationReloadKey++;
|
||||
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
||||
}
|
||||
});
|
||||
|
||||
async function triggerOcr(scriptType: string, useExistingAnnotations: boolean = false) {
|
||||
ocrRunning = true;
|
||||
ocrErrorMessage = '';
|
||||
try {
|
||||
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ scriptType, useExistingAnnotations })
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
pollOcrJob(data.jobId);
|
||||
} else {
|
||||
ocrRunning = false;
|
||||
const body = await res.json().catch(() => null);
|
||||
const code = (body as { code?: string } | null)?.code;
|
||||
ocrErrorMessage = code ? getErrorMessage(code) : m.ocr_status_error();
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to trigger OCR:', e);
|
||||
ocrRunning = false;
|
||||
ocrErrorMessage = m.ocr_status_error();
|
||||
}
|
||||
}
|
||||
|
||||
function pollOcrJob(jobId: string) {
|
||||
if (ocrPollTimer) clearInterval(ocrPollTimer);
|
||||
ocrPollTimer = setInterval(async () => {
|
||||
try {
|
||||
const res = await fetch(`/api/ocr/jobs/${jobId}`);
|
||||
if (!res.ok) return;
|
||||
const job = await res.json();
|
||||
const rawCode = job.progressMessage ?? '';
|
||||
const progress = translateOcrProgress(rawCode);
|
||||
ocrProgressMessage = progress.message;
|
||||
if (progress.skippedPages !== undefined) ocrSkippedPages = progress.skippedPages;
|
||||
if (job.status === 'DONE' || job.status === 'FAILED') {
|
||||
if (ocrPollTimer) clearInterval(ocrPollTimer);
|
||||
ocrPollTimer = null;
|
||||
setTimeout(() => {
|
||||
ocrRunning = false;
|
||||
ocrProgressMessage = '';
|
||||
ocrSkippedPages = 0;
|
||||
}, 1000);
|
||||
if (job.status === 'FAILED') {
|
||||
ocrErrorMessage = m.ocr_status_error();
|
||||
}
|
||||
await loadTranscriptionBlocks();
|
||||
annotationReloadKey++;
|
||||
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
||||
}
|
||||
} catch {
|
||||
// polling is best-effort
|
||||
}
|
||||
}, 2000);
|
||||
await ocrJob.triggerOcr(scriptType, useExistingAnnotations);
|
||||
}
|
||||
|
||||
async function createBlockFromDraw(rect: {
|
||||
@@ -316,21 +265,6 @@ function handleParagraphClick(annotationId: string) {
|
||||
);
|
||||
}
|
||||
|
||||
async function checkOcrStatus() {
|
||||
if (!doc?.id) return;
|
||||
try {
|
||||
const res = await fetch(`/api/documents/${doc.id}/ocr-status`);
|
||||
if (!res.ok) return;
|
||||
const status = await res.json();
|
||||
if ((status.status === 'PENDING' || status.status === 'RUNNING') && status.jobId) {
|
||||
ocrRunning = true;
|
||||
pollOcrJob(status.jobId);
|
||||
}
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}
|
||||
|
||||
// Load blocks and check OCR status when transcribe mode is entered
|
||||
$effect(() => {
|
||||
if (transcribeMode) {
|
||||
@@ -341,7 +275,7 @@ $effect(() => {
|
||||
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
||||
}
|
||||
});
|
||||
checkOcrStatus();
|
||||
ocrJob.checkStatus();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -406,7 +340,7 @@ onMount(() => {
|
||||
document.addEventListener('keydown', onKeyDown);
|
||||
return () => {
|
||||
document.removeEventListener('keydown', onKeyDown);
|
||||
if (ocrPollTimer) clearInterval(ocrPollTimer);
|
||||
ocrJob.destroy();
|
||||
};
|
||||
});
|
||||
</script>
|
||||
@@ -441,7 +375,7 @@ onMount(() => {
|
||||
fileUrl={fileLoader.fileUrl}
|
||||
isLoading={fileLoader.isLoading}
|
||||
error={fileLoader.fileError}
|
||||
transcribeMode={transcribeMode && !ocrRunning}
|
||||
transcribeMode={transcribeMode && !ocrJob.running}
|
||||
blockNumbers={blockNumbers}
|
||||
annotationReloadKey={annotationReloadKey}
|
||||
annotationsDimmed={transcribeMode && panelMode === 'read'}
|
||||
@@ -487,12 +421,12 @@ onMount(() => {
|
||||
onClose={() => (transcribeMode = false)}
|
||||
/>
|
||||
<div class="flex-1 overflow-y-auto">
|
||||
{#if ocrErrorMessage}
|
||||
{#if ocrJob.errorMessage}
|
||||
<div class="mx-4 mt-4 rounded-sm border border-red-200 bg-red-50 px-4 py-3">
|
||||
<p class="text-sm text-red-700">{ocrErrorMessage}</p>
|
||||
<p class="text-sm text-red-700">{ocrJob.errorMessage}</p>
|
||||
</div>
|
||||
{/if}
|
||||
{#if ocrRunning}
|
||||
{#if ocrJob.running}
|
||||
<div class="flex flex-1 flex-col items-center justify-center px-6 py-12 text-center">
|
||||
<svg
|
||||
class="mb-4 h-8 w-8 animate-spin text-brand-mint"
|
||||
@@ -517,11 +451,11 @@ onMount(() => {
|
||||
{m.ocr_progress_heading()}
|
||||
</p>
|
||||
<p class="mt-2 text-sm text-ink-2">
|
||||
{ocrProgressMessage}
|
||||
{ocrJob.progressMessage}
|
||||
</p>
|
||||
{#if ocrSkippedPages > 0}
|
||||
{#if ocrJob.skippedPages > 0}
|
||||
<p class="mt-1 text-xs text-amber-600">
|
||||
{ocrSkippedPages} Seiten übersprungen
|
||||
{ocrJob.skippedPages} Seiten übersprungen
|
||||
</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user