feat(training): add segmentation training pipeline and complete Part 6

- Add /segtrain endpoint to OCR service (ZIP upload, ketos.segtrain,
  backup rotation, in-process model reload)
- Add segtrainModel() to OcrClient and RestClientOcrClient (10-min timeout,
  X-Training-Token header)
- Add SegmentationTrainingExportService: PAGE XML export with polygon
  de-normalization and per-page PNG rendering via PDFBox
- Add GET /api/ocr/segmentation-training-data/export endpoint
- Make TranscriptionBlock.text nullable for segmentation-only blocks
  (V31 migration)
- Add Paraglide i18n translation keys for all training UI strings (de/en/es)
- Pass source prop from TranscriptionEditView to TranscriptionBlock

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-13 15:15:17 +02:00
parent 86e9c05aaf
commit 9b2f91ee59
13 changed files with 383 additions and 5 deletions

View File

@@ -533,5 +533,28 @@
"ocr_status_error": "OCR fallido",
"transcription_block_review": "Marcar como revisado",
"transcription_block_unreview": "Desmarcar como revisado",
"transcription_reviewed_count": "{reviewed} de {total} revisados"
"transcription_reviewed_count": "{reviewed} de {total} revisados",
"training_ocr_heading": "Entrenar reconocimiento Kurrent",
"training_ocr_description": "Inicia un nuevo entrenamiento con los bloques OCR revisados para mejorar la precisión de reconocimiento del script Kurrent.",
"training_ocr_blocks_ready": "{blocks} bloques revisados listos / {docs} documentos",
"training_ocr_blocks_total": "(de {total} bloques OCR en total)",
"training_start_btn": "Iniciar entrenamiento",
"training_in_progress": "…",
"training_success": "Entrenamiento iniciado y completado.",
"training_too_few_blocks": "Se requieren al menos 5 bloques revisados (actualmente: {available}).",
"training_service_down": "El servicio OCR no está disponible.",
"training_history_heading": "Historial",
"training_history_empty": "Todavía no hay ejecuciones de entrenamiento.",
"training_history_col_date": "Fecha",
"training_history_col_status": "Estado",
"training_history_col_blocks": "Bloques",
"training_history_col_docs": "Documentos",
"training_status_done": "Listo",
"training_status_failed": "Error",
"training_status_running": "Ejecutando…",
"training_seg_heading": "Entrenar segmentación",
"training_seg_description": "Inicia un nuevo entrenamiento con regiones de segmentación anotadas para mejorar la detección de texto.",
"training_seg_blocks_ready": "{blocks} bloques de segmentación listos",
"training_seg_too_few_blocks": "Se requieren al menos 5 bloques de segmentación (actualmente: {available}).",
"transcription_block_segmentation_only": "Solo segmentación"
}