feat(ocr): add guided OCR mode using existing annotation regions
When a document has manually drawn annotation boxes, the user can now enable "Nur annotierte Bereiche" in the OCR trigger panel. The engine skips layout detection entirely and runs recognition only within the pre-drawn bounding boxes, preserving manual transcription blocks. - Python: adds OcrRegion model, extend OcrRequest/OcrBlock; guided branch in /ocr/stream groups by page and crops each region - Engines: add extract_region_text() to both Kraken and Surya - Java: adds OcrBlockResult.annotationId, OcrClient.OcrRegion, TriggerOcrDTO.useExistingAnnotations; OcrAsyncRunner dispatches to upsertGuidedBlock when annotationId is present; OcrService threads the flag through to runSingleDocument - TranscriptionService: adds upsertGuidedBlock (creates, updates OCR, or preserves MANUAL blocks) - Frontend: guided OCR toggle in OcrTrigger shown when blocks exist; skips destructive-replace confirmation in guided mode Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -531,6 +531,8 @@
|
||||
"ocr_status_analyzing_page": "Seite {current} von {total} wird analysiert…",
|
||||
"ocr_status_done_skipped": "{count} Blöcke erstellt, {skipped} Seite(n) übersprungen",
|
||||
"ocr_status_error": "OCR fehlgeschlagen",
|
||||
"ocr_use_existing_annotations": "Nur annotierte Bereiche",
|
||||
"ocr_use_existing_annotations_hint": "OCR wird nur innerhalb der bereits markierten Bereiche ausgeführt — keine neue Layout-Erkennung.",
|
||||
"transcription_block_review": "Als geprüft markieren",
|
||||
"transcription_block_unreview": "Markierung aufheben",
|
||||
"transcription_reviewed_count": "{reviewed} von {total} geprüft",
|
||||
|
||||
@@ -531,6 +531,8 @@
|
||||
"ocr_status_analyzing_page": "Analyzing page {current} of {total}…",
|
||||
"ocr_status_done_skipped": "{count} blocks created, {skipped} page(s) skipped",
|
||||
"ocr_status_error": "OCR failed",
|
||||
"ocr_use_existing_annotations": "Annotated regions only",
|
||||
"ocr_use_existing_annotations_hint": "OCR runs only within the already marked regions — no new layout detection.",
|
||||
"transcription_block_review": "Mark as reviewed",
|
||||
"transcription_block_unreview": "Unmark as reviewed",
|
||||
"transcription_reviewed_count": "{reviewed} of {total} reviewed",
|
||||
|
||||
@@ -531,6 +531,8 @@
|
||||
"ocr_status_analyzing_page": "Analizando página {current} de {total}…",
|
||||
"ocr_status_done_skipped": "{count} bloques creados, {skipped} página(s) omitida(s)",
|
||||
"ocr_status_error": "OCR fallido",
|
||||
"ocr_use_existing_annotations": "Solo regiones anotadas",
|
||||
"ocr_use_existing_annotations_hint": "El OCR se ejecuta solo dentro de las regiones ya marcadas — sin nueva detección de diseño.",
|
||||
"transcription_block_review": "Marcar como revisado",
|
||||
"transcription_block_unreview": "Desmarcar como revisado",
|
||||
"transcription_reviewed_count": "{reviewed} de {total} revisados",
|
||||
|
||||
@@ -7,10 +7,11 @@ import ScriptTypeSelect from './ScriptTypeSelect.svelte';
|
||||
interface Props {
|
||||
existingBlockCount: number;
|
||||
storedScriptType: string;
|
||||
onTrigger: (scriptType: string) => void;
|
||||
annotationCount?: number;
|
||||
onTrigger: (scriptType: string, useExistingAnnotations: boolean) => void;
|
||||
}
|
||||
|
||||
let { existingBlockCount, storedScriptType, onTrigger }: Props = $props();
|
||||
let { existingBlockCount, storedScriptType, annotationCount = 0, onTrigger }: Props = $props();
|
||||
|
||||
const { confirm } = getConfirmService();
|
||||
|
||||
@@ -18,10 +19,12 @@ let selectedScriptType: string = $state(
|
||||
untrack(() => (storedScriptType && storedScriptType !== 'UNKNOWN' ? storedScriptType : ''))
|
||||
);
|
||||
|
||||
let useExistingAnnotations: boolean = $state(false);
|
||||
|
||||
async function handleClick() {
|
||||
if (!selectedScriptType) return;
|
||||
|
||||
if (existingBlockCount > 0) {
|
||||
if (!useExistingAnnotations && existingBlockCount > 0) {
|
||||
const confirmed = await confirm({
|
||||
title: m.ocr_confirm_title(),
|
||||
body: m.ocr_confirm_body({ count: String(existingBlockCount) }),
|
||||
@@ -31,12 +34,27 @@ async function handleClick() {
|
||||
if (!confirmed) return;
|
||||
}
|
||||
|
||||
onTrigger(selectedScriptType);
|
||||
onTrigger(selectedScriptType, useExistingAnnotations);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex flex-col gap-3">
|
||||
<ScriptTypeSelect bind:value={selectedScriptType} />
|
||||
{#if annotationCount > 0}
|
||||
<div class="flex flex-col gap-1">
|
||||
<label class="flex cursor-pointer items-center gap-2">
|
||||
<input
|
||||
type="checkbox"
|
||||
bind:checked={useExistingAnnotations}
|
||||
class="h-4 w-4 cursor-pointer rounded-sm border-brand-navy/30 accent-brand-navy"
|
||||
/>
|
||||
<span class="font-sans text-sm font-medium text-brand-navy">
|
||||
{m.ocr_use_existing_annotations()}
|
||||
</span>
|
||||
</label>
|
||||
<p class="pl-6 text-xs text-ink-3">{m.ocr_use_existing_annotations_hint()}</p>
|
||||
</div>
|
||||
{/if}
|
||||
<button
|
||||
type="button"
|
||||
disabled={!selectedScriptType}
|
||||
|
||||
@@ -19,7 +19,7 @@ type Props = {
|
||||
onSaveBlock: (blockId: string, text: string) => Promise<void>;
|
||||
onDeleteBlock: (blockId: string) => Promise<void>;
|
||||
onReviewToggle: (blockId: string) => Promise<void>;
|
||||
onTriggerOcr?: (scriptType: string) => void;
|
||||
onTriggerOcr?: (scriptType: string, useExistingAnnotations: boolean) => void;
|
||||
canWrite?: boolean;
|
||||
trainingLabels?: string[];
|
||||
onToggleTrainingLabel?: (label: string, enrolled: boolean) => Promise<void>;
|
||||
@@ -370,6 +370,7 @@ $effect(() => {
|
||||
<div class="mt-3 max-w-xs">
|
||||
<OcrTrigger
|
||||
existingBlockCount={blocks.length}
|
||||
annotationCount={blocks.length}
|
||||
storedScriptType={storedScriptType}
|
||||
onTrigger={onTriggerOcr}
|
||||
/>
|
||||
|
||||
@@ -144,14 +144,14 @@ let ocrErrorMessage = $state('');
|
||||
let ocrPollTimer = $state<ReturnType<typeof setInterval> | null>(null);
|
||||
let ocrSkippedPages = $state(0);
|
||||
|
||||
async function triggerOcr(scriptType: string) {
|
||||
async function triggerOcr(scriptType: string, useExistingAnnotations: boolean = false) {
|
||||
ocrRunning = true;
|
||||
ocrErrorMessage = '';
|
||||
try {
|
||||
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ scriptType })
|
||||
body: JSON.stringify({ scriptType, useExistingAnnotations })
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
|
||||
Reference in New Issue
Block a user