fix(training): count segmentation blocks regardless of text content
The findSegmentationBlocks query was filtering out blocks with non-empty text. Segmentation training only needs annotation geometry (polygon/bbox), not transcription text — so any MANUAL block on a KURRENT_SEGMENTATION document should count, regardless of whether it has text. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -31,7 +31,7 @@ public interface TranscriptionBlockRepository extends JpaRepository<Transcriptio
|
|||||||
SELECT b FROM TranscriptionBlock b
|
SELECT b FROM TranscriptionBlock b
|
||||||
JOIN DocumentAnnotation a ON a.id = b.annotationId
|
JOIN DocumentAnnotation a ON a.id = b.annotationId
|
||||||
JOIN Document d ON d.id = b.documentId
|
JOIN Document d ON d.id = b.documentId
|
||||||
WHERE b.source = 'MANUAL' AND (b.text IS NULL OR b.text = '')
|
WHERE b.source = 'MANUAL'
|
||||||
AND 'KURRENT_SEGMENTATION' MEMBER OF d.trainingLabels
|
AND 'KURRENT_SEGMENTATION' MEMBER OF d.trainingLabels
|
||||||
""")
|
""")
|
||||||
List<TranscriptionBlock> findSegmentationBlocks();
|
List<TranscriptionBlock> findSegmentationBlocks();
|
||||||
|
|||||||
Reference in New Issue
Block a user