feat(transcription): add source/reviewed fields for training pipeline
Some checks failed
CI / Unit & Component Tests (push) Failing after 1s
CI / Backend Unit Tests (push) Failing after 0s
CI / Unit & Component Tests (pull_request) Failing after 0s
CI / Backend Unit Tests (pull_request) Failing after 1s

- BlockSource enum: MANUAL, OCR
- V26 migration adds source + reviewed columns to transcription_blocks
- OcrService sets source=OCR when creating blocks
- TranscriptionService.reviewBlock() toggles the reviewed flag
- PUT /api/documents/{id}/transcription-blocks/{blockId}/review endpoint
- 5 new tests: reviewBlock toggle/untoggle/notfound, controller,
  OcrService source=OCR verification

The reviewed flag enables the Kraken fine-tuning pipeline: only blocks
marked as reviewed by a human are exported as training data.

Refs #226

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-12 21:44:51 +02:00
parent f064b27439
commit 3aaec01421
9 changed files with 123 additions and 0 deletions

View File

@@ -81,6 +81,14 @@ public class TranscriptionBlockController {
return transcriptionService.listBlocks(documentId);
}
@PutMapping("/{blockId}/review")
@RequirePermission(Permission.WRITE_ALL)
public TranscriptionBlock reviewBlock(
@PathVariable UUID documentId,
@PathVariable UUID blockId) {
return transcriptionService.reviewBlock(documentId, blockId);
}
@GetMapping("/{blockId}/history")
@RequirePermission(Permission.READ_ALL)
public List<TranscriptionBlockVersion> getBlockHistory(

View File

@@ -0,0 +1,6 @@
package org.raddatz.familienarchiv.model;
public enum BlockSource {
MANUAL,
OCR
}

View File

@@ -41,6 +41,17 @@ public class TranscriptionBlock {
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private int sortOrder;
@Enumerated(EnumType.STRING)
@Column(nullable = false, length = 10)
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
@Builder.Default
private BlockSource source = BlockSource.MANUAL;
@Column(nullable = false)
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
@Builder.Default
private boolean reviewed = false;
@Version
@Column(nullable = false)
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)

View File

@@ -107,6 +107,7 @@ public class OcrService {
.documentId(documentId)
.text(block.text() != null ? block.text() : "")
.sortOrder(i)
.source(BlockSource.OCR)
.createdBy(userId)
.updatedBy(userId)
.build();

View File

@@ -116,6 +116,13 @@ public class TranscriptionService {
}
}
@Transactional
public TranscriptionBlock reviewBlock(UUID documentId, UUID blockId) {
TranscriptionBlock block = getBlock(documentId, blockId);
block.setReviewed(!block.isReviewed());
return blockRepository.save(block);
}
public List<TranscriptionBlockVersion> getBlockHistory(UUID documentId, UUID blockId) {
getBlock(documentId, blockId);
return versionRepository.findByBlockIdOrderByChangedAtDesc(blockId);

View File

@@ -0,0 +1,2 @@
ALTER TABLE transcription_blocks ADD COLUMN source VARCHAR(10) NOT NULL DEFAULT 'MANUAL';
ALTER TABLE transcription_blocks ADD COLUMN reviewed BOOLEAN NOT NULL DEFAULT FALSE;