feat(training): add document-level training enrollment

- V29 migration: document_training_labels join table
- TrainingLabel enum: KURRENT_RECOGNITION, KURRENT_SEGMENTATION
- Document.trainingLabels @ElementCollection
- DocumentService.addTrainingLabel / removeTrainingLabel
- PATCH /api/documents/{id}/training-labels (WRITE_ALL)
- Auto-enroll on Kurrent OCR trigger (OcrService.startOcr)
- TranscriptionEditView: enrollment chips in panel footer
- JPQL queries updated to use MEMBER OF trainingLabels

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-13 14:30:51 +02:00
parent 73229077be
commit fdf1eb92ad
12 changed files with 614 additions and 11 deletions

View File

@@ -21,6 +21,7 @@ import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.TrainingLabel;
import org.raddatz.familienarchiv.model.DocumentVersion;
import org.raddatz.familienarchiv.security.Permission;
import org.raddatz.familienarchiv.security.RequirePermission;
@@ -35,9 +36,11 @@ import org.springframework.core.io.InputStreamResource;
import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PatchMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RequestPart;
@@ -208,6 +211,29 @@ public class DocumentController {
return ResponseEntity.ok(DocumentSearchResult.of(results));
}
// --- TRAINING LABELS ---
public record TrainingLabelRequest(String label, boolean enrolled) {}
@PatchMapping("/{id}/training-labels")
@RequirePermission(Permission.WRITE_ALL)
public ResponseEntity<Void> patchTrainingLabel(
@PathVariable UUID id,
@RequestBody TrainingLabelRequest req) {
TrainingLabel label;
try {
label = TrainingLabel.valueOf(req.label());
} catch (IllegalArgumentException e) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Unknown training label: " + req.label());
}
if (req.enrolled()) {
documentService.addTrainingLabel(id, label);
} else {
documentService.removeTrainingLabel(id, label);
}
return ResponseEntity.noContent().build();
}
// --- VERSIONS ---
@GetMapping("/{id}/versions")

View File

@@ -110,4 +110,11 @@ public class Document {
@JoinTable(name = "document_tags", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "tag_id"))
@Builder.Default
private Set<Tag> tags = new HashSet<>();
@ElementCollection(fetch = FetchType.EAGER)
@CollectionTable(name = "document_training_labels", joinColumns = @JoinColumn(name = "document_id"))
@Column(name = "label")
@Enumerated(EnumType.STRING)
@Builder.Default
private Set<TrainingLabel> trainingLabels = new HashSet<>();
}

View File

@@ -0,0 +1,6 @@
package org.raddatz.familienarchiv.model;
public enum TrainingLabel {
KURRENT_RECOGNITION,
KURRENT_SEGMENTATION
}

View File

@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.repository;
import org.raddatz.familienarchiv.model.TranscriptionBlock;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import java.util.List;
import java.util.Optional;
@@ -14,4 +15,22 @@ public interface TranscriptionBlockRepository extends JpaRepository<Transcriptio
Optional<TranscriptionBlock> findByIdAndDocumentId(UUID id, UUID documentId);
int countByDocumentId(UUID documentId);
@Query("""
SELECT b FROM TranscriptionBlock b
JOIN DocumentAnnotation a ON a.id = b.annotationId
JOIN Document d ON d.id = b.documentId
WHERE (b.source = 'MANUAL' OR (b.source = 'OCR' AND b.reviewed = true))
AND 'KURRENT_RECOGNITION' MEMBER OF d.trainingLabels
""")
List<TranscriptionBlock> findEligibleKurrentBlocks();
@Query("""
SELECT b FROM TranscriptionBlock b
JOIN DocumentAnnotation a ON a.id = b.annotationId
JOIN Document d ON d.id = b.documentId
WHERE b.source = 'MANUAL' AND (b.text IS NULL OR b.text = '')
AND 'KURRENT_SEGMENTATION' MEMBER OF d.trainingLabels
""")
List<TranscriptionBlock> findSegmentationBlocks();
}

View File

@@ -9,6 +9,7 @@ import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.ScriptType;
import org.raddatz.familienarchiv.model.TrainingLabel;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.Tag;
import org.raddatz.familienarchiv.repository.DocumentRepository;
@@ -385,6 +386,20 @@ public class DocumentService {
documentRepository.save(doc);
}
@Transactional
public void addTrainingLabel(UUID documentId, TrainingLabel label) {
Document doc = getDocumentById(documentId);
doc.getTrainingLabels().add(label);
documentRepository.save(doc);
}
@Transactional
public void removeTrainingLabel(UUID documentId, TrainingLabel label) {
Document doc = getDocumentById(documentId);
doc.getTrainingLabels().remove(label);
documentRepository.save(doc);
}
public Document getDocumentById(UUID id) {
return documentRepository.findById(id)
.orElseThrow(() -> DomainException.notFound(ErrorCode.DOCUMENT_NOT_FOUND, "Document not found: " + id));

View File

@@ -66,6 +66,9 @@ public class OcrService {
if (scriptTypeOverride != null) {
documentService.updateScriptType(documentId, scriptTypeOverride);
if (scriptTypeOverride == ScriptType.HANDWRITING_KURRENT) {
documentService.addTrainingLabel(documentId, TrainingLabel.KURRENT_RECOGNITION);
}
}
OcrJob job = OcrJob.builder()

View File

@@ -0,0 +1,5 @@
CREATE TABLE document_training_labels (
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
label VARCHAR(50) NOT NULL,
PRIMARY KEY (document_id, label)
);