feat(ocr): add OCR infrastructure (interfaces, entities, migrations, DTOs)

- OcrClient + OcrHealthClient interfaces for testable OCR integration
- OcrBlockResult record for OCR engine response mapping
- OcrJob + OcrJobDocument entities with status enums
- V25 migration creates ocr_jobs and ocr_job_documents tables
- Repositories for job and job-document queries
- TriggerOcrDTO, BatchOcrDTO (@Size max=500), OcrStatusDTO
- ErrorCodes: OCR_SERVICE_UNAVAILABLE, OCR_JOB_NOT_FOUND,
  OCR_DOCUMENT_NOT_UPLOADED, OCR_PROCESSING_FAILED

Refs #226

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-12 15:15:16 +02:00
parent d194b6b225
commit ff3990710e
14 changed files with 281 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
CREATE TABLE ocr_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
total_documents INT NOT NULL,
processed_documents INT NOT NULL DEFAULT 0,
error_count INT NOT NULL DEFAULT 0,
skipped_count INT NOT NULL DEFAULT 0,
created_by UUID,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE ocr_job_documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
job_id UUID NOT NULL REFERENCES ocr_jobs(id) ON DELETE CASCADE,
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
error_message TEXT,
current_page INT DEFAULT 0,
total_pages INT DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_ocr_job_documents_job_id ON ocr_job_documents(job_id);
CREATE INDEX idx_ocr_job_documents_document_id ON ocr_job_documents(document_id);