feat(training): add document-level training enrollment

- V29 migration: document_training_labels join table
- TrainingLabel enum: KURRENT_RECOGNITION, KURRENT_SEGMENTATION
- Document.trainingLabels @ElementCollection
- DocumentService.addTrainingLabel / removeTrainingLabel
- PATCH /api/documents/{id}/training-labels (WRITE_ALL)
- Auto-enroll on Kurrent OCR trigger (OcrService.startOcr)
- TranscriptionEditView: enrollment chips in panel footer
- JPQL queries updated to use MEMBER OF trainingLabels

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-13 14:30:51 +02:00
parent 73229077be
commit fdf1eb92ad
12 changed files with 614 additions and 11 deletions

View File

@@ -20,6 +20,9 @@ type Props = {
onDeleteBlock: (blockId: string) => Promise<void>;
onReviewToggle: (blockId: string) => Promise<void>;
onTriggerOcr?: (scriptType: string) => void;
canWrite?: boolean;
trainingLabels?: string[];
onToggleTrainingLabel?: (label: string, enrolled: boolean) => Promise<void>;
};
let {
@@ -34,10 +37,14 @@ let {
onSaveBlock,
onDeleteBlock,
onReviewToggle,
onTriggerOcr
onTriggerOcr,
canWrite = false,
trainingLabels = [],
onToggleTrainingLabel
}: Props = $props();
let activeBlockId: string | null = $state(null);
let localLabels: string[] = $derived.by(() => [...trainingLabels]);
// Sync: when an annotation is clicked on the PDF, activate the corresponding block
$effect(() => {
@@ -188,7 +195,7 @@ let dropTargetIdx: number | null = $state(null);
let dragOffsetY: number = $state(0);
let dragStartY = 0;
let capturedEl: HTMLElement | null = null;
let listEl: HTMLElement | null = null;
let listEl: HTMLElement | null = $state(null);
function handleGripDown(e: PointerEvent, blockId: string) {
if (!(e.target as HTMLElement).closest('[data-drag-handle]')) return;
@@ -240,6 +247,23 @@ function handlePointerUp() {
capturedEl = null;
}
async function handleLabelToggle(label: string) {
if (!onToggleTrainingLabel) return;
const enrolled = !localLabels.includes(label);
// Optimistic update
if (enrolled) {
localLabels = [...localLabels, label];
} else {
localLabels = localLabels.filter((l) => l !== label);
}
try {
await onToggleTrainingLabel(label, enrolled);
} catch {
// Revert on failure
localLabels = [...trainingLabels];
}
}
function flushViaBeacon() {
for (const [blockId, text] of pendingTexts) {
clearDebounce(blockId);
@@ -390,4 +414,23 @@ $effect(() => {
{/if}
</div>
{/if}
{#if canWrite}
<div class="border-t border-line px-4 py-3">
<p class="mb-2 font-sans text-xs font-medium text-ink-2">Für Training vormerken</p>
<div class="flex flex-wrap gap-2">
{#each [{ label: 'KURRENT_RECOGNITION', display: 'Kurrent-Erkennung' }, { label: 'KURRENT_SEGMENTATION', display: 'Segmentierung' }] as chip (chip.label)}
<button
type="button"
onclick={() => handleLabelToggle(chip.label)}
class="rounded-full border px-3 py-1 font-sans text-xs font-medium transition-colors {localLabels.includes(chip.label)
? 'border-brand-mint bg-brand-mint text-brand-navy'
: 'border-line bg-surface text-ink-3 hover:border-brand-mint hover:text-brand-navy'}"
>
{chip.display}
</button>
{/each}
</div>
</div>
{/if}
</div>

View File

@@ -116,6 +116,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/documents/{documentId}/transcription-blocks/{blockId}/review": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put: operations["reviewBlock"];
post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/documents/{documentId}/transcription-blocks/reorder": {
parameters: {
query?: never;
@@ -212,6 +228,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/ocr/batch": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post: operations["triggerBatch"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/notifications/read-all": {
parameters: {
query?: never;
@@ -308,6 +340,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/documents/{documentId}/ocr": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
post: operations["triggerOcr"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/documents/{documentId}/comments": {
parameters: {
query?: never;
@@ -628,6 +676,38 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/ocr/jobs/{jobId}": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get: operations["getJobStatus"];
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/ocr/jobs/{jobId}/progress": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get: operations["streamProgress"];
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/notifications": {
parameters: {
query?: never;
@@ -740,6 +820,22 @@ export interface paths {
patch?: never;
trace?: never;
};
"/api/documents/{documentId}/ocr-status": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get: operations["getDocumentOcrStatus"];
put?: never;
post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/documents/search": {
parameters: {
query?: never;
@@ -940,6 +1036,7 @@ export interface components {
name: string;
};
PersonUpdateDTO: {
title?: string;
firstName?: string;
lastName?: string;
alias?: string;
@@ -978,6 +1075,8 @@ export interface components {
receiverIds?: string[];
tags?: string;
metadataComplete?: boolean;
/** @enum {string} */
scriptType?: "UNKNOWN" | "TYPEWRITER" | "HANDWRITING_LATIN" | "HANDWRITING_KURRENT";
};
Document: {
/** Format: uuid */
@@ -1002,9 +1101,13 @@ export interface components {
/** Format: date-time */
updatedAt: string;
metadataComplete: boolean;
/** @enum {string} */
scriptType: "UNKNOWN" | "TYPEWRITER" | "HANDWRITING_LATIN" | "HANDWRITING_KURRENT";
receivers?: components["schemas"]["Person"][];
sender?: components["schemas"]["Person"];
tags?: components["schemas"]["Tag"][];
/** @enum {string} */
trainingLabels?: ("KURRENT_RECOGNITION" | "KURRENT_SEGMENTATION")[];
};
UpdateTranscriptionBlockDTO: {
text?: string;
@@ -1021,6 +1124,9 @@ export interface components {
label?: string;
/** Format: int32 */
sortOrder: number;
/** @enum {string} */
source: "MANUAL" | "OCR";
reviewed: boolean;
/** Format: int32 */
version: number;
/** Format: uuid */
@@ -1068,6 +1174,9 @@ export interface components {
/** Format: date-time */
createdAt: string;
};
BatchOcrDTO: {
documentIds: string[];
};
GroupDTO: {
name?: string;
permissions?: string[];
@@ -1118,6 +1227,10 @@ export interface components {
firstName: string;
lastName: string;
};
TriggerOcrDTO: {
/** @enum {string} */
scriptType?: "UNKNOWN" | "TYPEWRITER" | "HANDWRITING_LATIN" | "HANDWRITING_KURRENT";
};
CreateAnnotationDTO: {
/** Format: int32 */
pageNumber?: number;
@@ -1130,6 +1243,7 @@ export interface components {
/** Format: double */
height?: number;
color?: string;
polygon?: number[][];
};
DocumentAnnotation: {
/** Format: uuid */
@@ -1148,6 +1262,7 @@ export interface components {
height: number;
color: string;
fileHash?: string;
polygon?: number[][];
/** Format: uuid */
createdBy?: string;
/** Format: date-time */
@@ -1210,6 +1325,8 @@ export interface components {
/** Format: uuid */
id?: string;
displayName?: string;
/** Format: int64 */
documentCount?: number;
firstName?: string;
lastName?: string;
/** Format: int32 */
@@ -1219,14 +1336,37 @@ export interface components {
alias?: string;
notes?: string;
personType?: string;
};
OcrJob: {
/** Format: uuid */
id: string;
/** @enum {string} */
status: "PENDING" | "RUNNING" | "DONE" | "FAILED";
/** Format: int32 */
totalDocuments: number;
/** Format: int32 */
processedDocuments: number;
/** Format: int32 */
errorCount: number;
/** Format: int32 */
skippedCount: number;
progressMessage?: string;
/** Format: uuid */
createdBy?: string;
/** Format: date-time */
createdAt: string;
/** Format: date-time */
updatedAt: string;
};
SseEmitter: {
/** Format: int64 */
documentCount?: number;
timeout?: number;
};
PageNotificationDTO: {
/** Format: int64 */
totalElements?: number;
/** Format: int32 */
totalPages?: number;
/** Format: int64 */
totalElements?: number;
pageable?: components["schemas"]["PageableObject"];
/** Format: int32 */
size?: number;
@@ -1234,10 +1374,10 @@ export interface components {
/** Format: int32 */
number?: number;
sort?: components["schemas"]["SortObject"];
/** Format: int32 */
numberOfElements?: number;
first?: boolean;
last?: boolean;
/** Format: int32 */
numberOfElements?: number;
empty?: boolean;
};
PageableObject: {
@@ -1256,10 +1396,6 @@ export interface components {
empty?: boolean;
unsorted?: boolean;
};
SseEmitter: {
/** Format: int64 */
timeout?: number;
};
DocumentVersionSummary: {
/** Format: uuid */
id: string;
@@ -1292,6 +1428,15 @@ export interface components {
/** Format: date-time */
changedAt: string;
};
OcrStatusDTO: {
status?: string;
/** Format: uuid */
jobId?: string;
/** Format: int32 */
currentPage?: number;
/** Format: int32 */
totalPages?: number;
};
DocumentSearchResult: {
documents?: components["schemas"]["Document"][];
/** Format: int64 */
@@ -1702,6 +1847,29 @@ export interface operations {
};
};
};
reviewBlock: {
parameters: {
query?: never;
header?: never;
path: {
documentId: string;
blockId: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["TranscriptionBlock"];
};
};
};
};
reorderBlocks: {
parameters: {
query?: never;
@@ -1914,6 +2082,32 @@ export interface operations {
};
};
};
triggerBatch: {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
requestBody: {
content: {
"application/json": components["schemas"]["BatchOcrDTO"];
};
};
responses: {
/** @description Accepted */
202: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": {
[key: string]: string;
};
};
};
};
};
markAllRead: {
parameters: {
query?: never;
@@ -2124,6 +2318,34 @@ export interface operations {
};
};
};
triggerOcr: {
parameters: {
query?: never;
header?: never;
path: {
documentId: string;
};
cookie?: never;
};
requestBody: {
content: {
"application/json": components["schemas"]["TriggerOcrDTO"];
};
};
responses: {
/** @description Accepted */
202: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": {
[key: string]: string;
};
};
};
};
};
getDocumentComments: {
parameters: {
query?: never;
@@ -2701,6 +2923,50 @@ export interface operations {
};
};
};
getJobStatus: {
parameters: {
query?: never;
header?: never;
path: {
jobId: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["OcrJob"];
};
};
};
};
streamProgress: {
parameters: {
query?: never;
header?: never;
path: {
jobId: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"text/event-stream": components["schemas"]["SseEmitter"];
};
};
};
};
getNotifications: {
parameters: {
query?: {
@@ -2860,6 +3126,28 @@ export interface operations {
};
};
};
getDocumentOcrStatus: {
parameters: {
query?: never;
header?: never;
path: {
documentId: string;
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description OK */
200: {
headers: {
[name: string]: unknown;
};
content: {
"*/*": components["schemas"]["OcrStatusDTO"];
};
};
};
};
search_1: {
parameters: {
query?: {

View File

@@ -129,6 +129,15 @@ async function reviewToggle(blockId: string) {
transcriptionBlocks = transcriptionBlocks.map((b) => (b.id === blockId ? updated : b));
}
async function toggleTrainingLabel(label: string, enrolled: boolean) {
const res = await fetch(`/api/documents/${doc.id}/training-labels`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ label, enrolled })
});
if (!res.ok) throw new Error('Failed to update training label');
}
let ocrRunning = $state(false);
let ocrProgressMessage = $state('');
let ocrErrorMessage = $state('');
@@ -449,11 +458,14 @@ onMount(() => {
activeAnnotationId={activeAnnotationId}
storedScriptType={doc.scriptType ?? ''}
canRunOcr={canWrite && !!doc.filePath}
canWrite={canWrite}
trainingLabels={doc.trainingLabels ?? []}
onBlockFocus={handleBlockFocus}
onSaveBlock={saveBlock}
onDeleteBlock={deleteBlock}
onReviewToggle={reviewToggle}
onTriggerOcr={triggerOcr}
onToggleTrainingLabel={toggleTrainingLabel}
/>
{/if}
</div>