From 18cf839fac8e4b88feab5ad48d87bc4800f4ea13 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 17 Apr 2026 19:27:02 +0200 Subject: [PATCH] feat(ocr): wire SenderModelService into OcrAsyncRunner; stage missing foundational files OcrAsyncRunner now passes the per-sender model path to streamBlocks for HANDWRITING_KURRENT documents. processDocument replaced extractBlocks with streamBlocks + AtomicReference, removing the unchecked raw-array pattern. Also stages all previously uncommitted foundational files for this feature: SenderModel entity, SenderModelRepository, Flyway migrations V40/V41, updated OcrClient/RestClientOcrClient streaming API, TrainingDataExportService.exportForSender, TranscriptionService Kurrent hook, application.yaml OCR config, and frontend i18n/test additions. Co-Authored-By: Claude Sonnet 4.6 --- .../familienarchiv/model/OcrTrainingRun.java | 3 + .../familienarchiv/model/SenderModel.java | 56 +++++++++++++++ .../familienarchiv/model/TrainingStatus.java | 1 + .../repository/OcrTrainingRunRepository.java | 6 +- .../repository/SenderModelRepository.java | 12 ++++ .../TranscriptionBlockRepository.java | 19 ++++++ .../service/OcrAsyncRunner.java | 31 ++++++++- .../familienarchiv/service/OcrClient.java | 30 ++++++-- .../service/RestClientOcrClient.java | 68 ++++++++++++++----- .../service/TrainingDataExportService.java | 16 ++++- .../service/TranscriptionService.java | 9 +++ backend/src/main/resources/application.yaml | 5 ++ .../db/migration/V40__add_sender_models.sql | 10 +++ .../V41__add_person_to_training_runs.sql | 2 + .../service/OcrClientDefaultStreamTest.java | 2 + .../TranscriptionServiceGuidedTest.java | 4 +- frontend/messages/de.json | 5 ++ frontend/messages/en.json | 5 ++ frontend/messages/es.json | 5 ++ .../components/TrainingHistory.svelte.spec.ts | 23 +++++++ 20 files changed, 281 insertions(+), 31 deletions(-) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/model/SenderModel.java create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/repository/SenderModelRepository.java create mode 100644 backend/src/main/resources/db/migration/V40__add_sender_models.sql create mode 100644 backend/src/main/resources/db/migration/V41__add_person_to_training_runs.sql diff --git a/backend/src/main/java/org/raddatz/familienarchiv/model/OcrTrainingRun.java b/backend/src/main/java/org/raddatz/familienarchiv/model/OcrTrainingRun.java index e5db5231..c22081aa 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/model/OcrTrainingRun.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/model/OcrTrainingRun.java @@ -59,6 +59,9 @@ public class OcrTrainingRun { @Column(name = "triggered_by") private UUID triggeredBy; + @Column(name = "person_id") + private UUID personId; + @CreationTimestamp @Column(name = "created_at", nullable = false, updatable = false) @Schema(requiredMode = Schema.RequiredMode.REQUIRED) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/model/SenderModel.java b/backend/src/main/java/org/raddatz/familienarchiv/model/SenderModel.java new file mode 100644 index 00000000..4b8586aa --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/model/SenderModel.java @@ -0,0 +1,56 @@ +package org.raddatz.familienarchiv.model; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.persistence.*; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.CreationTimestamp; +import org.hibernate.annotations.UpdateTimestamp; + +import java.time.Instant; +import java.util.UUID; + +@Entity +@Table(name = "sender_models") +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class SenderModel { + + @Id + @GeneratedValue(strategy = GenerationType.UUID) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private UUID id; + + @Column(name = "person_id", nullable = false, unique = true) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private UUID personId; + + @JsonIgnore + @Column(name = "model_path", nullable = false) + private String modelPath; + + @Column + private Double accuracy; + + @Column + private Double cer; + + @Column(name = "corrected_lines_at_training", nullable = false) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private int correctedLinesAtTraining; + + @CreationTimestamp + @Column(name = "created_at", nullable = false, updatable = false) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private Instant createdAt; + + @UpdateTimestamp + @Column(name = "updated_at", nullable = false) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private Instant updatedAt; +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/model/TrainingStatus.java b/backend/src/main/java/org/raddatz/familienarchiv/model/TrainingStatus.java index 7e99dd2f..e91b9967 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/model/TrainingStatus.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/model/TrainingStatus.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.model; public enum TrainingStatus { + QUEUED, RUNNING, DONE, FAILED diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/OcrTrainingRunRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/OcrTrainingRunRepository.java index fe7d61d1..99ff81da 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/OcrTrainingRunRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/OcrTrainingRunRepository.java @@ -12,5 +12,9 @@ public interface OcrTrainingRunRepository extends JpaRepository findFirstByStatus(TrainingStatus status); - List findTop10ByOrderByCreatedAtDesc(); + Optional findFirstByStatusOrderByCreatedAtAsc(TrainingStatus status); + + boolean existsByPersonIdAndStatus(UUID personId, TrainingStatus status); + + List findTop20ByOrderByCreatedAtDesc(); } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/SenderModelRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/SenderModelRepository.java new file mode 100644 index 00000000..0bfb05a5 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/SenderModelRepository.java @@ -0,0 +1,12 @@ +package org.raddatz.familienarchiv.repository; + +import org.raddatz.familienarchiv.model.SenderModel; +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.Optional; +import java.util.UUID; + +public interface SenderModelRepository extends JpaRepository { + + Optional findByPersonId(UUID personId); +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java index d091f950..c88830ad 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/TranscriptionBlockRepository.java @@ -3,6 +3,7 @@ package org.raddatz.familienarchiv.repository; import org.raddatz.familienarchiv.model.TranscriptionBlock; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import java.util.List; import java.util.Optional; @@ -37,4 +38,22 @@ public interface TranscriptionBlockRepository extends JpaRepository findSegmentationBlocks(); + + @Query(""" + SELECT COUNT(b) FROM TranscriptionBlock b + JOIN Document d ON d.id = b.documentId + WHERE b.source = 'MANUAL' + AND d.sender.id = :personId + AND d.scriptType = 'HANDWRITING_KURRENT' + """) + long countManualKurrentBlocksByPerson(@Param("personId") UUID personId); + + @Query(""" + SELECT b FROM TranscriptionBlock b + JOIN Document d ON d.id = b.documentId + WHERE b.source = 'MANUAL' + AND d.sender.id = :personId + AND d.scriptType = 'HANDWRITING_KURRENT' + """) + List findManualKurrentBlocksByPerson(@Param("personId") UUID personId); } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java index 3ae853b0..08f38bda 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java @@ -9,10 +9,12 @@ import org.raddatz.familienarchiv.repository.OcrJobRepository; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Component; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; @Component @RequiredArgsConstructor @@ -29,6 +31,7 @@ public class OcrAsyncRunner { private final OcrJobRepository ocrJobRepository; private final OcrJobDocumentRepository ocrJobDocumentRepository; private final OcrProgressService ocrProgressService; + private final SenderModelService senderModelService; @Async public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) { @@ -68,12 +71,18 @@ public class OcrAsyncRunner { String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath()); + String senderModelPath = null; + if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) { + senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null); + } + AtomicInteger blockCounter = new AtomicInteger(0); AtomicInteger currentPage = new AtomicInteger(0); AtomicInteger skippedPages = new AtomicInteger(0); AtomicInteger totalPages = new AtomicInteger(0); - ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, event -> { + final String finalSenderModelPath = senderModelPath; + ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, finalSenderModelPath, event -> { switch (event) { case OcrStreamEvent.Start start -> { totalPages.set(start.totalPages()); @@ -207,7 +216,25 @@ public class OcrAsyncRunner { clearExistingBlocks(documentId); String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath()); - List blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType()); + + String senderModelPath = null; + if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) { + senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null); + } + + final AtomicReference> blocksRef = new AtomicReference<>(); + final String finalSenderModelPath = senderModelPath; + ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), null, finalSenderModelPath, event -> { + switch (event) { + case OcrStreamEvent.Page page -> { + blocksRef.compareAndSet(null, new ArrayList<>()); + blocksRef.get().addAll(page.blocks()); + } + default -> {} + } + }); + + List blocks = blocksRef.get() != null ? blocksRef.get() : List.of(); createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash()); } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrClient.java b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrClient.java index 45c2f021..6af68718 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrClient.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrClient.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.service; import org.raddatz.familienarchiv.model.ScriptType; +import org.springframework.lang.Nullable; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -37,15 +38,27 @@ public interface OcrClient { TrainingResult segtrainModel(byte[] trainingDataZip); /** - * Stream OCR results page-by-page via NDJSON. Implementations should override - * this method. The default exists only for backward compatibility during migration - * — it calls extractBlocks() and synthesizes events from the collected result. + * Fine-tune the Kurrent model for a specific sender. * - * @param regions optional list of pre-drawn annotation regions; when non-null, - * the OCR service runs in guided mode (crop + recognize per region) + * @param trainingDataZip raw ZIP bytes produced by TrainingDataExportService.exportForSender() + * @param outputModelPath where to save the trained model (e.g. /app/models/sender_{uuid}.mlmodel) + * @return training result metrics + */ + TrainingResult trainSenderModel(byte[] trainingDataZip, String outputModelPath); + + /** + * Stream OCR results page-by-page via NDJSON, optionally using a sender-specific model. + * The default implementation synthesizes events from extractBlocks() for backward compatibility. + * Implementations that support real streaming (e.g. RestClientOcrClient) override this. + * + * @param regions optional list of pre-drawn annotation regions; when non-null, + * the OCR service runs in guided mode (crop + recognize per region) + * @param senderModelPath optional path to a per-sender model file; null means use base model */ default void streamBlocks(String pdfUrl, ScriptType scriptType, - List regions, Consumer handler) { + List regions, + @Nullable String senderModelPath, + Consumer handler) { List allBlocks = extractBlocks(pdfUrl, scriptType); LinkedHashMap> byPage = new LinkedHashMap<>(); @@ -62,4 +75,9 @@ public interface OcrClient { handler.accept(new OcrStreamEvent.Done(allBlocks.size(), 0)); } + + default void streamBlocks(String pdfUrl, ScriptType scriptType, + List regions, Consumer handler) { + streamBlocks(pdfUrl, scriptType, regions, null, handler); + } } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/RestClientOcrClient.java b/backend/src/main/java/org/raddatz/familienarchiv/service/RestClientOcrClient.java index 9e58a2e1..2757ee19 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/RestClientOcrClient.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/RestClientOcrClient.java @@ -14,6 +14,7 @@ import org.springframework.http.HttpEntity; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; import org.springframework.http.client.JdkClientHttpRequestFactory; +import org.springframework.lang.Nullable; import org.springframework.stereotype.Component; import org.springframework.util.LinkedMultiValueMap; import org.springframework.util.MultiValueMap; @@ -102,6 +103,13 @@ public class RestClientOcrClient implements OcrClient, OcrHealthClient { .toList(); } + private RestClient.RequestBodySpec addTrainingAuth(RestClient.RequestBodySpec spec) { + if (trainingToken != null && !trainingToken.isBlank()) { + return spec.header("X-Training-Token", trainingToken); + } + return spec; + } + @Override public OcrClient.TrainingResult trainModel(byte[] trainingDataZip) { ByteArrayResource zipResource = new ByteArrayResource(trainingDataZip) { @@ -114,15 +122,10 @@ public class RestClientOcrClient implements OcrClient, OcrHealthClient { partHeaders.setContentType(MediaType.parseMediaType("application/zip")); body.add("file", new HttpEntity<>(zipResource, partHeaders)); - var spec = trainingRestClient.post() - .uri("/train") - .contentType(MediaType.MULTIPART_FORM_DATA); - - if (trainingToken != null && !trainingToken.isBlank()) { - spec = spec.header("X-Training-Token", trainingToken); - } - - TrainingResultJson result = spec + TrainingResultJson result = addTrainingAuth( + trainingRestClient.post() + .uri("/train") + .contentType(MediaType.MULTIPART_FORM_DATA)) .body(body) .retrieve() .body(TrainingResultJson.class); @@ -143,15 +146,35 @@ public class RestClientOcrClient implements OcrClient, OcrHealthClient { partHeaders.setContentType(MediaType.parseMediaType("application/zip")); body.add("file", new HttpEntity<>(zipResource, partHeaders)); - var spec = trainingRestClient.post() - .uri("/segtrain") - .contentType(MediaType.MULTIPART_FORM_DATA); + TrainingResultJson result = addTrainingAuth( + trainingRestClient.post() + .uri("/segtrain") + .contentType(MediaType.MULTIPART_FORM_DATA)) + .body(body) + .retrieve() + .body(TrainingResultJson.class); - if (trainingToken != null && !trainingToken.isBlank()) { - spec = spec.header("X-Training-Token", trainingToken); - } + if (result == null) return new OcrClient.TrainingResult(null, null, null, null); + return new OcrClient.TrainingResult(result.loss(), result.accuracy(), result.cer(), result.epochs()); + } - TrainingResultJson result = spec + @Override + public OcrClient.TrainingResult trainSenderModel(byte[] trainingDataZip, String outputModelPath) { + ByteArrayResource zipResource = new ByteArrayResource(trainingDataZip) { + @Override + public String getFilename() { return "sender-training-data.zip"; } + }; + + MultiValueMap body = new LinkedMultiValueMap<>(); + HttpHeaders partHeaders = new HttpHeaders(); + partHeaders.setContentType(MediaType.parseMediaType("application/zip")); + body.add("file", new HttpEntity<>(zipResource, partHeaders)); + body.add("output_model_path", outputModelPath); + + TrainingResultJson result = addTrainingAuth( + trainingRestClient.post() + .uri("/train") + .contentType(MediaType.MULTIPART_FORM_DATA)) .body(body) .retrieve() .body(TrainingResultJson.class); @@ -176,7 +199,8 @@ public class RestClientOcrClient implements OcrClient, OcrHealthClient { @Override public void streamBlocks(String pdfUrl, ScriptType scriptType, - List regions, Consumer handler) { + List regions, @Nullable String senderModelPath, + Consumer handler) { String body; try { var requestMap = new java.util.LinkedHashMap(); @@ -186,6 +210,9 @@ public class RestClientOcrClient implements OcrClient, OcrHealthClient { if (regions != null && !regions.isEmpty()) { requestMap.put("regions", regions); } + if (senderModelPath != null) { + requestMap.put("senderModelPath", senderModelPath); + } body = NDJSON_MAPPER.writeValueAsString(requestMap); } catch (IOException e) { throw new RuntimeException("Failed to serialize OCR request", e); @@ -204,7 +231,12 @@ public class RestClientOcrClient implements OcrClient, OcrHealthClient { if (response.statusCode() == 404) { log.info("OCR service does not support /ocr/stream (404), falling back to /ocr"); - OcrClient.super.streamBlocks(pdfUrl, scriptType, regions, handler); + List allBlocks = extractBlocks(pdfUrl, scriptType); + handler.accept(new OcrStreamEvent.Start(0)); + for (OcrBlockResult block : allBlocks) { + handler.accept(new OcrStreamEvent.Page(block.pageNumber(), List.of(block))); + } + handler.accept(new OcrStreamEvent.Done(allBlocks.size(), 0)); return; } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/TrainingDataExportService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/TrainingDataExportService.java index cf0b10e3..86c81053 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/TrainingDataExportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/TrainingDataExportService.java @@ -38,10 +38,20 @@ public class TrainingDataExportService { } public StreamingResponseBody exportToZip() { - // Collect all data before entering the lambda — no open DB txn during streaming - List blocks = queryEligibleBlocks(); + return exportBlocksToZip(queryEligibleBlocks()); + } + + public List queryBlocksForSender(UUID personId) { + return blockRepository.findManualKurrentBlocksByPerson(personId); + } + + public StreamingResponseBody exportForSender(UUID personId) { + return exportBlocksToZip(queryBlocksForSender(personId)); + } + + private StreamingResponseBody exportBlocksToZip(List blocks) { if (blocks.isEmpty()) { - return out -> {}; // caller checks isEmpty() for 204 response + return out -> {}; } // Group blocks by documentId so we only download each PDF once diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java index bfb02253..081f588d 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java @@ -11,6 +11,7 @@ import org.raddatz.familienarchiv.exception.ErrorCode; import org.raddatz.familienarchiv.model.BlockSource; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentAnnotation; +import org.raddatz.familienarchiv.model.ScriptType; import org.raddatz.familienarchiv.model.TranscriptionBlock; import org.raddatz.familienarchiv.model.TranscriptionBlockVersion; import org.raddatz.familienarchiv.repository.AnnotationRepository; @@ -35,6 +36,7 @@ public class TranscriptionService { private final AnnotationRepository annotationRepository; private final AnnotationService annotationService; private final DocumentService documentService; + private final SenderModelService senderModelService; public List listBlocks(UUID documentId) { return blockRepository.findByDocumentIdOrderBySortOrderAsc(documentId); @@ -122,6 +124,7 @@ public class TranscriptionService { String text = sanitizeText(dto.getText()); block.setText(text); + block.setSource(BlockSource.MANUAL); if (dto.getLabel() != null) { block.setLabel(dto.getLabel()); } @@ -129,6 +132,12 @@ public class TranscriptionService { TranscriptionBlock saved = blockRepository.save(block); saveVersion(saved, userId); + + Document doc = documentService.getDocumentById(documentId); + if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) { + senderModelService.checkAndTriggerTraining(doc.getSender().getId()); + } + return saved; } diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml index aa4c8ceb..d9bbe9d0 100644 --- a/backend/src/main/resources/application.yaml +++ b/backend/src/main/resources/application.yaml @@ -77,3 +77,8 @@ app: tags: 10 summary: 11 transcription: 13 + +ocr: + sender-model: + activation-threshold: 100 + retrain-delta: 50 diff --git a/backend/src/main/resources/db/migration/V40__add_sender_models.sql b/backend/src/main/resources/db/migration/V40__add_sender_models.sql new file mode 100644 index 00000000..2386bbe2 --- /dev/null +++ b/backend/src/main/resources/db/migration/V40__add_sender_models.sql @@ -0,0 +1,10 @@ +CREATE TABLE sender_models ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + person_id UUID NOT NULL UNIQUE REFERENCES persons(id) ON DELETE CASCADE, + model_path TEXT NOT NULL, + accuracy DOUBLE PRECISION, + cer DOUBLE PRECISION, + corrected_lines_at_training INT NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); diff --git a/backend/src/main/resources/db/migration/V41__add_person_to_training_runs.sql b/backend/src/main/resources/db/migration/V41__add_person_to_training_runs.sql new file mode 100644 index 00000000..b95a70b6 --- /dev/null +++ b/backend/src/main/resources/db/migration/V41__add_person_to_training_runs.sql @@ -0,0 +1,2 @@ +ALTER TABLE ocr_training_runs + ADD COLUMN person_id UUID REFERENCES persons(id) ON DELETE SET NULL; diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/OcrClientDefaultStreamTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/OcrClientDefaultStreamTest.java index 7be27c40..f865f5d6 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/OcrClientDefaultStreamTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/OcrClientDefaultStreamTest.java @@ -21,6 +21,7 @@ class OcrClientDefaultStreamTest { } @Override public TrainingResult trainModel(byte[] zip) { return null; } @Override public TrainingResult segtrainModel(byte[] zip) { return null; } + @Override public TrainingResult trainSenderModel(byte[] zip, String path) { return null; } }; List events = new ArrayList<>(); @@ -52,6 +53,7 @@ class OcrClientDefaultStreamTest { @Override public List extractBlocks(String pdfUrl, ScriptType scriptType) { return List.of(); } @Override public TrainingResult trainModel(byte[] zip) { return null; } @Override public TrainingResult segtrainModel(byte[] zip) { return null; } + @Override public TrainingResult trainSenderModel(byte[] zip, String path) { return null; } }; List events = new ArrayList<>(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceGuidedTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceGuidedTest.java index e8d53cad..59eed034 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceGuidedTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceGuidedTest.java @@ -22,6 +22,7 @@ class TranscriptionServiceGuidedTest { AnnotationRepository annotationRepository; AnnotationService annotationService; DocumentService documentService; + SenderModelService senderModelService; TranscriptionService service; UUID docId = UUID.randomUUID(); @@ -35,9 +36,10 @@ class TranscriptionServiceGuidedTest { annotationRepository = mock(AnnotationRepository.class); annotationService = mock(AnnotationService.class); documentService = mock(DocumentService.class); + senderModelService = mock(SenderModelService.class); service = new TranscriptionService(blockRepository, versionRepository, - annotationRepository, annotationService, documentService); + annotationRepository, annotationService, documentService, senderModelService); when(blockRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); when(versionRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); diff --git a/frontend/messages/de.json b/frontend/messages/de.json index 72ae0b6b..51b10005 100644 --- a/frontend/messages/de.json +++ b/frontend/messages/de.json @@ -561,6 +561,11 @@ "transcription_block_segmentation_only": "Nur Segmentierung", "training_chip_kurrent": "Kurrent-Erkennung", "training_chip_segmentation": "Segmentierung", + "training_col_type": "Typ", + "training_type_base": "Basis", + "training_type_personalized": "Personalisiert", + "training_col_person": "Absender", + "training_status_queued": "Warteschlange", "mission_control_heading": "Was braucht Aufmerksamkeit?", "mission_control_segmentation_heading": "Text markieren", "mission_control_segmentation_description": "Textbereiche markieren — keine Vorkenntnisse nötig", diff --git a/frontend/messages/en.json b/frontend/messages/en.json index c3711b3f..b6016949 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -561,6 +561,11 @@ "transcription_block_segmentation_only": "Segmentation only", "training_chip_kurrent": "Kurrent recognition", "training_chip_segmentation": "Segmentation", + "training_col_type": "Type", + "training_type_base": "Base", + "training_type_personalized": "Personalized", + "training_col_person": "Sender", + "training_status_queued": "Queued", "mission_control_heading": "What needs attention?", "mission_control_segmentation_heading": "Mark text", "mission_control_segmentation_description": "Mark text areas — no prior knowledge needed", diff --git a/frontend/messages/es.json b/frontend/messages/es.json index c87a2d25..19487b65 100644 --- a/frontend/messages/es.json +++ b/frontend/messages/es.json @@ -561,6 +561,11 @@ "transcription_block_segmentation_only": "Solo segmentación", "training_chip_kurrent": "Reconocimiento Kurrent", "training_chip_segmentation": "Segmentación", + "training_col_type": "Tipo", + "training_type_base": "Base", + "training_type_personalized": "Personalizado", + "training_col_person": "Remitente", + "training_status_queued": "En cola", "mission_control_heading": "¿Qué necesita atención?", "mission_control_segmentation_heading": "Marcar texto", "mission_control_segmentation_description": "Marcar áreas de texto — sin conocimientos previos", diff --git a/frontend/src/lib/components/TrainingHistory.svelte.spec.ts b/frontend/src/lib/components/TrainingHistory.svelte.spec.ts index 728b167a..95090ba6 100644 --- a/frontend/src/lib/components/TrainingHistory.svelte.spec.ts +++ b/frontend/src/lib/components/TrainingHistory.svelte.spec.ts @@ -50,3 +50,26 @@ describe('TrainingHistory — expand/collapse', () => { .not.toBeInTheDocument(); }); }); + +describe('TrainingHistory — type and person columns', () => { + it('shows "Basis" for runs without personId', async () => { + render(TrainingHistory, { runs: [makeRun(0)] }); + + await expect.element(page.getByText(/Basis/i)).toBeInTheDocument(); + }); + + it('shows "Personalisiert" for runs with personId', async () => { + const run = { ...makeRun(0), personId: 'person-1' }; + render(TrainingHistory, { runs: [run], personNames: { 'person-1': 'Karl Müller' } }); + + await expect.element(page.getByText(/Personalisiert/i)).toBeInTheDocument(); + }); + + it('shows person name from personNames for sender runs', async () => { + const run = { ...makeRun(0), personId: 'person-1' }; + render(TrainingHistory, { runs: [run], personNames: { 'person-1': 'Karl Müller' } }); + + await expect.element(page.getByText(/Personalisiert/i)).toBeInTheDocument(); + await expect.element(page.getByText('Karl Müller')).toBeInTheDocument(); + }); +});