feat(transcription): add source/reviewed fields for training pipeline
- BlockSource enum: MANUAL, OCR
- V26 migration adds source + reviewed columns to transcription_blocks
- OcrService sets source=OCR when creating blocks
- TranscriptionService.reviewBlock() toggles the reviewed flag
- PUT /api/documents/{id}/transcription-blocks/{blockId}/review endpoint
- 5 new tests: reviewBlock toggle/untoggle/notfound, controller,
OcrService source=OCR verification
The reviewed flag enables the Kraken fine-tuning pipeline: only blocks
marked as reviewed by a human are exported as training data.
Refs #226
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,6 +81,14 @@ public class TranscriptionBlockController {
|
||||
return transcriptionService.listBlocks(documentId);
|
||||
}
|
||||
|
||||
@PutMapping("/{blockId}/review")
|
||||
@RequirePermission(Permission.WRITE_ALL)
|
||||
public TranscriptionBlock reviewBlock(
|
||||
@PathVariable UUID documentId,
|
||||
@PathVariable UUID blockId) {
|
||||
return transcriptionService.reviewBlock(documentId, blockId);
|
||||
}
|
||||
|
||||
@GetMapping("/{blockId}/history")
|
||||
@RequirePermission(Permission.READ_ALL)
|
||||
public List<TranscriptionBlockVersion> getBlockHistory(
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
package org.raddatz.familienarchiv.model;
|
||||
|
||||
public enum BlockSource {
|
||||
MANUAL,
|
||||
OCR
|
||||
}
|
||||
@@ -41,6 +41,17 @@ public class TranscriptionBlock {
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private int sortOrder;
|
||||
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(nullable = false, length = 10)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private BlockSource source = BlockSource.MANUAL;
|
||||
|
||||
@Column(nullable = false)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private boolean reviewed = false;
|
||||
|
||||
@Version
|
||||
@Column(nullable = false)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
|
||||
@@ -107,6 +107,7 @@ public class OcrService {
|
||||
.documentId(documentId)
|
||||
.text(block.text() != null ? block.text() : "")
|
||||
.sortOrder(i)
|
||||
.source(BlockSource.OCR)
|
||||
.createdBy(userId)
|
||||
.updatedBy(userId)
|
||||
.build();
|
||||
|
||||
@@ -116,6 +116,13 @@ public class TranscriptionService {
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public TranscriptionBlock reviewBlock(UUID documentId, UUID blockId) {
|
||||
TranscriptionBlock block = getBlock(documentId, blockId);
|
||||
block.setReviewed(!block.isReviewed());
|
||||
return blockRepository.save(block);
|
||||
}
|
||||
|
||||
public List<TranscriptionBlockVersion> getBlockHistory(UUID documentId, UUID blockId) {
|
||||
getBlock(documentId, blockId);
|
||||
return versionRepository.findByBlockIdOrderByChangedAtDesc(blockId);
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
ALTER TABLE transcription_blocks ADD COLUMN source VARCHAR(10) NOT NULL DEFAULT 'MANUAL';
|
||||
ALTER TABLE transcription_blocks ADD COLUMN reviewed BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
Reference in New Issue
Block a user