feat(ocr): full OCR pipeline with polygon annotations, training, and guided mode #232

Merged
marcel merged 40 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-14 10:31:35 +02:00
10 changed files with 473 additions and 4 deletions
Showing only changes of commit 4e08d31e01 - Show all commits

View File

@@ -505,6 +505,7 @@
"error_ocr_job_not_found": "Der OCR-Auftrag wurde nicht gefunden.",
"error_ocr_document_not_uploaded": "Das Dokument hat keine Datei — OCR ist nicht möglich.",
"error_ocr_processing_failed": "Die OCR-Verarbeitung ist fehlgeschlagen.",
"error_training_already_running": "Es läuft bereits ein Trainings-Vorgang.",
"ocr_script_type_typewriter": "Schreibmaschine",
"ocr_script_type_handwriting_latin": "Handschrift (lateinisch)",
"ocr_script_type_handwriting_kurrent": "Handschrift (Kurrent/Sütterlin)",

View File

@@ -505,6 +505,7 @@
"error_ocr_job_not_found": "The OCR job was not found.",
"error_ocr_document_not_uploaded": "The document has no file — OCR is not possible.",
"error_ocr_processing_failed": "OCR processing failed.",
"error_training_already_running": "A training run is already in progress.",
"ocr_script_type_typewriter": "Typewriter",
"ocr_script_type_handwriting_latin": "Handwriting (Latin)",
"ocr_script_type_handwriting_kurrent": "Handwriting (Kurrent/Sütterlin)",

View File

@@ -505,6 +505,7 @@
"error_ocr_job_not_found": "No se encontró el trabajo OCR.",
"error_ocr_document_not_uploaded": "El documento no tiene archivo — OCR no es posible.",
"error_ocr_processing_failed": "El procesamiento OCR ha fallado.",
"error_training_already_running": "Ya hay un proceso de entrenamiento en curso.",
"ocr_script_type_typewriter": "Máquina de escribir",
"ocr_script_type_handwriting_latin": "Escritura manuscrita (latina)",
"ocr_script_type_handwriting_kurrent": "Escritura manuscrita (Kurrent/Sütterlin)",

View File

@@ -0,0 +1,91 @@
<script lang="ts">
import TrainingHistory from './TrainingHistory.svelte';
interface Run {
id: string;
status: 'RUNNING' | 'DONE' | 'FAILED';
blockCount: number;
documentCount: number;
modelName: string;
errorMessage?: string;
triggeredBy?: string;
createdAt: string;
completedAt?: string;
}
interface TrainingInfo {
availableBlocks?: number;
totalOcrBlocks?: number;
availableDocuments?: number;
ocrServiceAvailable?: boolean;
lastRun?: Run | null;
runs?: Run[];
}
interface Props {
trainingInfo: TrainingInfo | null;
}
let { trainingInfo }: Props = $props();
let training = $state(false);
let successMessage = $state<string | null>(null);
const available = $derived(trainingInfo?.availableBlocks ?? 0);
const tooFewBlocks = $derived(available < 5);
const serviceDown = $derived(trainingInfo?.ocrServiceAvailable === false);
const disabled = $derived(training || tooFewBlocks || serviceDown);
async function startTraining() {
training = true;
successMessage = null;
try {
const res = await fetch('/api/ocr/train', { method: 'POST' });
if (res.ok) {
successMessage = 'Training wurde gestartet und abgeschlossen.';
setTimeout(() => {
successMessage = null;
}, 5000);
}
} finally {
training = false;
}
}
</script>
<div class="rounded-sm border border-line bg-surface p-6 shadow-sm">
<h2 class="mb-1 font-sans text-sm font-bold text-ink">Kurrent-Erkennung trainieren</h2>
<p class="mb-4 text-sm text-ink-2">
Starte ein neues Training mit den bisher geprüften OCR-Blöcken, um die Erkennungsgenauigkeit für
Kurrentschrift zu verbessern.
</p>
<p class="mb-3 text-sm text-ink">
<strong>{available}</strong> geprüfte Blöcke bereit /
<strong>{trainingInfo?.availableDocuments ?? 0}</strong> Dokumente
<span class="text-ink-2">(von {trainingInfo?.totalOcrBlocks ?? 0} OCR-Blöcken gesamt)</span>
</p>
<button
onclick={startTraining}
disabled={disabled}
class="rounded-sm bg-primary px-5 py-2 font-sans text-xs font-bold tracking-widest text-primary-fg uppercase transition-opacity hover:opacity-80 focus-visible:ring-2 focus-visible:ring-brand-navy disabled:cursor-not-allowed disabled:opacity-50"
>
{training ? '…' : 'Training starten'}
</button>
{#if tooFewBlocks}
<p class="mt-2 text-xs text-ink-3">
Mindestens 5 geprüfte Blöcke erforderlich (aktuell: {available}).
</p>
{:else if serviceDown}
<p class="mt-2 text-xs text-orange-600">OCR-Dienst ist nicht erreichbar.</p>
{/if}
{#if successMessage}
<p class="mt-2 text-xs text-green-700">{successMessage}</p>
{/if}
<h3 class="mt-6 mb-3 text-xs font-bold tracking-widest text-ink-3 uppercase">Verlauf</h3>
<TrainingHistory runs={trainingInfo?.runs ?? []} />
</div>

View File

@@ -0,0 +1,96 @@
import { afterEach, describe, expect, it, vi } from 'vitest';
import { cleanup, render } from 'vitest-browser-svelte';
import { page } from 'vitest/browser';
import OcrTrainingCard from './OcrTrainingCard.svelte';
afterEach(cleanup);
afterEach(() => vi.restoreAllMocks());
const baseInfo = {
availableBlocks: 10,
totalOcrBlocks: 20,
availableDocuments: 3,
ocrServiceAvailable: true,
lastRun: null,
runs: []
};
describe('OcrTrainingCard — disabled states', () => {
it('disables button and shows hint when availableBlocks is 0', async () => {
render(OcrTrainingCard, { trainingInfo: { ...baseInfo, availableBlocks: 0 } });
const btn = page.getByRole('button', { name: /Training starten/i });
await expect.element(btn).toBeDisabled();
await expect
.element(page.getByText(/Mindestens 5 geprüfte Blöcke erforderlich/i))
.toBeInTheDocument();
});
it('disables button and shows hint when availableBlocks is less than 5', async () => {
render(OcrTrainingCard, { trainingInfo: { ...baseInfo, availableBlocks: 3 } });
const btn = page.getByRole('button', { name: /Training starten/i });
await expect.element(btn).toBeDisabled();
await expect.element(page.getByText(/Mindestens 5/i)).toBeInTheDocument();
});
it('disables button and shows service-down warning when ocrServiceAvailable is false', async () => {
render(OcrTrainingCard, { trainingInfo: { ...baseInfo, ocrServiceAvailable: false } });
const btn = page.getByRole('button', { name: /Training starten/i });
await expect.element(btn).toBeDisabled();
await expect.element(page.getByText(/OCR-Dienst ist nicht erreichbar/i)).toBeInTheDocument();
});
it('does not show service-down warning when blocks are insufficient', async () => {
// tooFewBlocks hint takes priority over serviceDown hint
render(OcrTrainingCard, {
trainingInfo: { ...baseInfo, availableBlocks: 2, ocrServiceAvailable: false }
});
await expect.element(page.getByText(/Mindestens 5/i)).toBeInTheDocument();
// serviceDown text should NOT appear because tooFewBlocks branch hides it
const serviceMsg = document.querySelector('.text-orange-600');
expect(serviceMsg).toBeNull();
});
});
describe('OcrTrainingCard — enabled state', () => {
it('enables button when availableBlocks >= 5 and service is up', async () => {
render(OcrTrainingCard, { trainingInfo: baseInfo });
const btn = page.getByRole('button', { name: /Training starten/i });
await expect.element(btn).not.toBeDisabled();
});
it('shows block count info text', async () => {
render(OcrTrainingCard, {
trainingInfo: { ...baseInfo, availableBlocks: 7, totalOcrBlocks: 15 }
});
await expect.element(page.getByText(/7/)).toBeInTheDocument();
await expect.element(page.getByText(/von 15 OCR-Blöcken/i)).toBeInTheDocument();
});
});
describe('OcrTrainingCard — in-flight state', () => {
it('shows "…" while POST is in-flight', async () => {
let resolveFetch!: (v: unknown) => void;
const pendingFetch = new Promise((resolve) => {
resolveFetch = resolve;
});
vi.stubGlobal('fetch', vi.fn().mockReturnValue(pendingFetch));
render(OcrTrainingCard, { trainingInfo: baseInfo });
const btn = page.getByRole('button', { name: /Training starten/i });
await btn.click();
// While fetch is still pending the button label becomes "…"
await expect.element(page.getByRole('button', { name: '…' })).toBeInTheDocument();
// Cleanup: resolve the pending promise
resolveFetch({ ok: false });
});
});

View File

@@ -0,0 +1,76 @@
<script lang="ts">
interface Run {
id: string;
status: 'RUNNING' | 'DONE' | 'FAILED';
blockCount: number;
documentCount: number;
modelName: string;
errorMessage?: string;
triggeredBy?: string;
createdAt: string;
completedAt?: string;
}
interface Props {
runs: Run[];
}
let { runs }: Props = $props();
const dateFormatter = new Intl.DateTimeFormat('de-DE', {
day: 'numeric',
month: 'short',
year: 'numeric'
});
function formatDate(iso: string): string {
return dateFormatter.format(new Date(iso));
}
</script>
<table class="w-full text-sm">
<thead>
<tr class="border-b border-line text-xs font-bold tracking-widest text-ink-3 uppercase">
<th class="pb-2 text-left">Datum</th>
<th class="pb-2 text-left">Status</th>
<th class="pb-2 text-right">Blöcke</th>
<th class="hidden pb-2 text-right md:table-cell">Dokumente</th>
</tr>
</thead>
<tbody>
{#if runs.length === 0}
<tr>
<td colspan="4" class="py-4 text-center text-sm text-ink-2">
Noch keine Trainings-Läufe.
</td>
</tr>
{:else}
{#each runs as run (run.id)}
<tr class="border-b border-line/50 last:border-0">
<td class="py-2 text-ink-2">{formatDate(run.createdAt)}</td>
<td class="py-2">
{#if run.status === 'DONE'}
<span
class="inline-flex items-center gap-1 rounded-sm bg-green-100 px-1.5 py-0.5 text-xs font-medium text-green-700"
>✓ Fertig</span
>
{:else if run.status === 'FAILED'}
<span
class="inline-flex items-center gap-1 rounded-sm bg-red-100 px-1.5 py-0.5 text-xs font-medium text-red-700"
title={run.errorMessage}> Fehler</span
>
{:else}
<span
class="inline-flex items-center gap-1 rounded-sm bg-yellow-100 px-1.5 py-0.5 text-xs font-medium text-yellow-700"
><span class="h-1.5 w-1.5 animate-pulse rounded-full bg-yellow-500"
></span>Läuft…</span
>
{/if}
</td>
<td class="py-2 text-right text-ink-2">{run.blockCount}</td>
<td class="hidden py-2 text-right text-ink-2 md:table-cell">{run.documentCount}</td>
</tr>
{/each}
{/if}
</tbody>
</table>

View File

@@ -26,6 +26,7 @@ export type ErrorCode =
| 'OCR_JOB_NOT_FOUND'
| 'OCR_DOCUMENT_NOT_UPLOADED'
| 'OCR_PROCESSING_FAILED'
| 'TRAINING_ALREADY_RUNNING'
| 'UNAUTHORIZED'
| 'FORBIDDEN'
| 'VALIDATION_ERROR'
@@ -97,6 +98,8 @@ export function getErrorMessage(code: ErrorCode | string | undefined): string {
return m.error_ocr_document_not_uploaded();
case 'OCR_PROCESSING_FAILED':
return m.error_ocr_processing_failed();
case 'TRAINING_ALREADY_RUNNING':
return m.error_training_already_running();
case 'UNAUTHORIZED':
return m.error_unauthorized();
case 'FORBIDDEN':

View File

@@ -228,6 +228,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/ocr/train": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post: operations["triggerTraining"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/ocr/batch": {
parameters: {
query?: never;
@@ -564,6 +580,22 @@ export interface paths {
patch: operations["updateGroup"];
trace?: never;
};
"/api/documents/{id}/training-labels": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch: operations["patchTrainingLabel"];
trace?: never;
};
"/api/documents/{documentId}/comments/{commentId}": {
parameters: {
query?: never;
@@ -676,6 +708,38 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/ocr/training-info": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get: operations["getTrainingInfo"];
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/ocr/training-data/export": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get: operations["exportTrainingData"];
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/ocr/jobs/{jobId}": {
parameters: {
query?: never;
@@ -1106,7 +1170,6 @@ export interface components {
receivers?: components["schemas"]["Person"][];
sender?: components["schemas"]["Person"];
tags?: components["schemas"]["Tag"][];
/** @enum {string} */
trainingLabels?: ("KURRENT_RECOGNITION" | "KURRENT_SEGMENTATION")[];
};
UpdateTranscriptionBlockDTO: {
@@ -1174,6 +1237,24 @@ export interface components {
/** Format: date-time */
createdAt: string;
};
OcrTrainingRun: {
/** Format: uuid */
id: string;
/** @enum {string} */
status: "RUNNING" | "DONE" | "FAILED";
/** Format: int32 */
blockCount: number;
/** Format: int32 */
documentCount: number;
modelName: string;
errorMessage?: string;
/** Format: uuid */
triggeredBy?: string;
/** Format: date-time */
createdAt: string;
/** Format: date-time */
completedAt?: string;
};
BatchOcrDTO: {
documentIds: string[];
};
@@ -1314,6 +1395,10 @@ export interface components {
actorName?: string;
documentTitle?: string;
};
TrainingLabelRequest: {
label?: string;
enrolled?: boolean;
};
StatsDTO: {
/** Format: int64 */
totalPersons?: number;
@@ -1325,8 +1410,6 @@ export interface components {
/** Format: uuid */
id?: string;
displayName?: string;
/** Format: int64 */
documentCount?: number;
firstName?: string;
lastName?: string;
/** Format: int32 */
@@ -1335,8 +1418,22 @@ export interface components {
deathYear?: number;
alias?: string;
notes?: string;
/** Format: int64 */
documentCount?: number;
personType?: string;
};
TrainingInfoResponse: {
/** Format: int32 */
availableBlocks?: number;
/** Format: int32 */
totalOcrBlocks?: number;
/** Format: int32 */
availableDocuments?: number;
ocrServiceAvailable?: boolean;
lastRun?: components["schemas"]["OcrTrainingRun"];
runs?: components["schemas"]["OcrTrainingRun"][];
};
StreamingResponseBody: unknown;
OcrJob: {
/** Format: uuid */
id: string;
@@ -1381,11 +1478,11 @@ export interface components {
empty?: boolean;
};
PageableObject: {
paged?: boolean;
/** Format: int32 */
pageNumber?: number;
/** Format: int32 */
pageSize?: number;
paged?: boolean;
/** Format: int64 */
offset?: number;
sort?: components["schemas"]["SortObject"];
@@ -2082,6 +2179,26 @@ export interface operations {
};
};
};
triggerTraining: {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
requestBody?: never;
responses: {
/** @description Created */
201: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["OcrTrainingRun"];
};
};
};
};
triggerBatch: {
parameters: {
query?: never;
@@ -2743,6 +2860,30 @@ export interface operations {
};
};
};
patchTrainingLabel: {
parameters: {
query?: never;
header?: never;
path: {
id: string;
};
cookie?: never;
};
requestBody: {
content: {
"application/json": components["schemas"]["TrainingLabelRequest"];
};
};
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content?: never;
};
};
};
deleteComment: {
parameters: {
query?: never;
@@ -2923,6 +3064,46 @@ export interface operations {
};
};
};
getTrainingInfo: {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["TrainingInfoResponse"];
};
};
};
};
exportTrainingData: {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["StreamingResponseBody"];
};
};
};
};
getJobStatus: {
parameters: {
query?: never;

View File

@@ -1,6 +1,12 @@
<script lang="ts">
import { onDestroy } from 'svelte';
import { m } from '$lib/paraglide/messages.js';
import OcrTrainingCard from '$lib/components/OcrTrainingCard.svelte';
import type { components } from '$lib/generated/api.js';
type TrainingInfo = components['schemas']['TrainingInfoResponse'];
let trainingInfo: TrainingInfo | null = $state(null);
let backfillResult: number | null = $state(null);
let backfillLoading = $state(false);
@@ -51,8 +57,16 @@ async function triggerImport() {
}
}
async function fetchTrainingInfo() {
const res = await fetch('/api/ocr/training-info');
if (res.ok) {
trainingInfo = await res.json();
}
}
$effect(() => {
fetchImportStatus();
fetchTrainingInfo();
});
onDestroy(() => stopPolling());
@@ -88,6 +102,9 @@ async function backfillFileHashes() {
<div class="flex-1 overflow-y-auto p-6">
<div class="mx-auto max-w-2xl space-y-5">
<!-- OCR Training -->
<OcrTrainingCard trainingInfo={trainingInfo} />
<!-- Backfill versions -->
<div class="rounded-sm border border-line bg-surface p-6 shadow-sm">
<h2 class="mb-1 font-sans text-sm font-bold text-ink">{m.admin_system_backfill_heading()}</h2>

View File

@@ -78,6 +78,8 @@ describe('Admin system page — mass import card', () => {
startedAt: null
})
})
// training info fetch → empty
.mockResolvedValueOnce({ ok: true, json: async () => ({}) })
// trigger POST → returns RUNNING immediately
.mockResolvedValueOnce({
ok: true,