feat(training): add segmentation training pipeline and complete Part 6
- Add /segtrain endpoint to OCR service (ZIP upload, ketos.segtrain, backup rotation, in-process model reload) - Add segtrainModel() to OcrClient and RestClientOcrClient (10-min timeout, X-Training-Token header) - Add SegmentationTrainingExportService: PAGE XML export with polygon de-normalization and per-page PNG rendering via PDFBox - Add GET /api/ocr/segmentation-training-data/export endpoint - Make TranscriptionBlock.text nullable for segmentation-only blocks (V31 migration) - Add Paraglide i18n translation keys for all training UI strings (de/en/es) - Pass source prop from TranscriptionEditView to TranscriptionBlock Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -533,5 +533,28 @@
|
||||
"ocr_status_error": "OCR fehlgeschlagen",
|
||||
"transcription_block_review": "Als geprüft markieren",
|
||||
"transcription_block_unreview": "Markierung aufheben",
|
||||
"transcription_reviewed_count": "{reviewed} von {total} geprüft"
|
||||
"transcription_reviewed_count": "{reviewed} von {total} geprüft",
|
||||
"training_ocr_heading": "Kurrent-Erkennung trainieren",
|
||||
"training_ocr_description": "Starte ein neues Training mit den bisher geprüften OCR-Blöcken, um die Erkennungsgenauigkeit für Kurrentschrift zu verbessern.",
|
||||
"training_ocr_blocks_ready": "{blocks} geprüfte Blöcke bereit / {docs} Dokumente",
|
||||
"training_ocr_blocks_total": "(von {total} OCR-Blöcken gesamt)",
|
||||
"training_start_btn": "Training starten",
|
||||
"training_in_progress": "…",
|
||||
"training_success": "Training wurde gestartet und abgeschlossen.",
|
||||
"training_too_few_blocks": "Mindestens 5 geprüfte Blöcke erforderlich (aktuell: {available}).",
|
||||
"training_service_down": "OCR-Dienst ist nicht erreichbar.",
|
||||
"training_history_heading": "Verlauf",
|
||||
"training_history_empty": "Noch keine Trainings-Läufe.",
|
||||
"training_history_col_date": "Datum",
|
||||
"training_history_col_status": "Status",
|
||||
"training_history_col_blocks": "Blöcke",
|
||||
"training_history_col_docs": "Dokumente",
|
||||
"training_status_done": "Fertig",
|
||||
"training_status_failed": "Fehler",
|
||||
"training_status_running": "Läuft…",
|
||||
"training_seg_heading": "Segmentierung trainieren",
|
||||
"training_seg_description": "Starte ein neues Training mit annotierten Segmentierungsbereichen, um die Texterkennung zu verbessern.",
|
||||
"training_seg_blocks_ready": "{blocks} Segmentierungsblöcke bereit",
|
||||
"training_seg_too_few_blocks": "Mindestens 5 Segmentierungsblöcke erforderlich (aktuell: {available}).",
|
||||
"transcription_block_segmentation_only": "Nur Segmentierung"
|
||||
}
|
||||
|
||||
@@ -533,5 +533,28 @@
|
||||
"ocr_status_error": "OCR failed",
|
||||
"transcription_block_review": "Mark as reviewed",
|
||||
"transcription_block_unreview": "Unmark as reviewed",
|
||||
"transcription_reviewed_count": "{reviewed} of {total} reviewed"
|
||||
"transcription_reviewed_count": "{reviewed} of {total} reviewed",
|
||||
"training_ocr_heading": "Train Kurrent recognition",
|
||||
"training_ocr_description": "Start a new training run using the reviewed OCR blocks to improve recognition accuracy for Kurrent script.",
|
||||
"training_ocr_blocks_ready": "{blocks} reviewed blocks ready / {docs} documents",
|
||||
"training_ocr_blocks_total": "(of {total} OCR blocks total)",
|
||||
"training_start_btn": "Start training",
|
||||
"training_in_progress": "…",
|
||||
"training_success": "Training started and completed.",
|
||||
"training_too_few_blocks": "At least 5 reviewed blocks required (currently: {available}).",
|
||||
"training_service_down": "OCR service is unavailable.",
|
||||
"training_history_heading": "History",
|
||||
"training_history_empty": "No training runs yet.",
|
||||
"training_history_col_date": "Date",
|
||||
"training_history_col_status": "Status",
|
||||
"training_history_col_blocks": "Blocks",
|
||||
"training_history_col_docs": "Documents",
|
||||
"training_status_done": "Done",
|
||||
"training_status_failed": "Failed",
|
||||
"training_status_running": "Running…",
|
||||
"training_seg_heading": "Train segmentation",
|
||||
"training_seg_description": "Start a new training run using annotated segmentation regions to improve text detection.",
|
||||
"training_seg_blocks_ready": "{blocks} segmentation blocks ready",
|
||||
"training_seg_too_few_blocks": "At least 5 segmentation blocks required (currently: {available}).",
|
||||
"transcription_block_segmentation_only": "Segmentation only"
|
||||
}
|
||||
|
||||
@@ -533,5 +533,28 @@
|
||||
"ocr_status_error": "OCR fallido",
|
||||
"transcription_block_review": "Marcar como revisado",
|
||||
"transcription_block_unreview": "Desmarcar como revisado",
|
||||
"transcription_reviewed_count": "{reviewed} de {total} revisados"
|
||||
"transcription_reviewed_count": "{reviewed} de {total} revisados",
|
||||
"training_ocr_heading": "Entrenar reconocimiento Kurrent",
|
||||
"training_ocr_description": "Inicia un nuevo entrenamiento con los bloques OCR revisados para mejorar la precisión de reconocimiento del script Kurrent.",
|
||||
"training_ocr_blocks_ready": "{blocks} bloques revisados listos / {docs} documentos",
|
||||
"training_ocr_blocks_total": "(de {total} bloques OCR en total)",
|
||||
"training_start_btn": "Iniciar entrenamiento",
|
||||
"training_in_progress": "…",
|
||||
"training_success": "Entrenamiento iniciado y completado.",
|
||||
"training_too_few_blocks": "Se requieren al menos 5 bloques revisados (actualmente: {available}).",
|
||||
"training_service_down": "El servicio OCR no está disponible.",
|
||||
"training_history_heading": "Historial",
|
||||
"training_history_empty": "Todavía no hay ejecuciones de entrenamiento.",
|
||||
"training_history_col_date": "Fecha",
|
||||
"training_history_col_status": "Estado",
|
||||
"training_history_col_blocks": "Bloques",
|
||||
"training_history_col_docs": "Documentos",
|
||||
"training_status_done": "Listo",
|
||||
"training_status_failed": "Error",
|
||||
"training_status_running": "Ejecutando…",
|
||||
"training_seg_heading": "Entrenar segmentación",
|
||||
"training_seg_description": "Inicia un nuevo entrenamiento con regiones de segmentación anotadas para mejorar la detección de texto.",
|
||||
"training_seg_blocks_ready": "{blocks} bloques de segmentación listos",
|
||||
"training_seg_too_few_blocks": "Se requieren al menos 5 bloques de segmentación (actualmente: {available}).",
|
||||
"transcription_block_segmentation_only": "Solo segmentación"
|
||||
}
|
||||
|
||||
@@ -344,6 +344,7 @@ $effect(() => {
|
||||
onMoveDown={() => handleMoveDown(block.id)}
|
||||
isFirst={i === 0}
|
||||
isLast={i === sortedBlocks.length - 1}
|
||||
source={block.source}
|
||||
/>
|
||||
</div>
|
||||
{/each}
|
||||
|
||||
Reference in New Issue
Block a user