feat(documents): add metadataComplete flag and enrichment queue endpoints

Adds a metadata_complete column (default true for existing rows) to drive
the enrichment queue. New drop-zone uploads always start as false; createDocument
uses an explicit DTO flag or a heuristic (any of date/sender/receivers present →
true); the mass importer applies the same heuristic per row.

New endpoints: GET /api/documents/incomplete-count, /incomplete, /incomplete/next.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-03-26 13:25:57 +01:00
parent 2bfbf45eba
commit 0ce18e1eed
9 changed files with 306 additions and 0 deletions

View File

@@ -4,6 +4,8 @@ import java.io.IOException;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
@@ -156,6 +158,23 @@ public class DocumentController {
return new QuickUploadResult(created, updated, errors);
}
@GetMapping("/incomplete-count")
public Map<String, Long> getIncompleteCount() {
return Map.of("count", documentService.getIncompleteCount());
}
@GetMapping("/incomplete")
public List<Document> getIncomplete() {
return documentService.findIncompleteDocuments();
}
@GetMapping("/incomplete/next")
public ResponseEntity<Document> getNextIncomplete(@RequestParam UUID excludeId) {
return documentService.findNextIncompleteDocument(excludeId)
.map(ResponseEntity::ok)
.orElse(ResponseEntity.noContent().build());
}
@GetMapping("/search")
public ResponseEntity<List<Document>> search(
@RequestParam(required = false) String q,

View File

@@ -17,4 +17,5 @@ public class DocumentUpdateDTO {
private UUID senderId;
private List<UUID> receiverIds;
private String tags;
private Boolean metadataComplete;
}

View File

@@ -86,6 +86,11 @@ public class Document {
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private LocalDateTime updatedAt;
@Column(name = "metadata_complete", nullable = false)
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
@Builder.Default
private boolean metadataComplete = false;
@ManyToMany(fetch = FetchType.EAGER)
@JoinTable(name = "document_receivers", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "person_id"))
@Builder.Default

View File

@@ -42,6 +42,12 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
long countByMetadataCompleteFalse();
List<Document> findByMetadataCompleteFalse(Sort sort);
Optional<Document> findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort);
@Query("SELECT DISTINCT d FROM Document d " +
"JOIN d.receivers r " +
"WHERE " +

View File

@@ -62,10 +62,12 @@ public class DocumentService {
if (existingDoc.isPresent()) {
document = existingDoc.get();
} else {
// New uploads from the drop zone always start as incomplete
document = Document.builder()
.originalFilename(originalFilename)
.title(stripExtension(originalFilename))
.status(DocumentStatus.UPLOADED)
.metadataComplete(false)
.build();
}
@@ -89,6 +91,17 @@ public class DocumentService {
? file.getOriginalFilename()
: (dto.getTitle() != null ? dto.getTitle() : "Unbenanntes Dokument");
// If the caller explicitly sets metadataComplete, use it.
// Otherwise apply heuristic: complete if at least one key field is present.
boolean metadataComplete;
if (dto.getMetadataComplete() != null) {
metadataComplete = dto.getMetadataComplete();
} else {
metadataComplete = dto.getDocumentDate() != null
|| dto.getSenderId() != null
|| (dto.getReceiverIds() != null && !dto.getReceiverIds().isEmpty());
}
Document doc = Document.builder()
.originalFilename(filename)
.title(dto.getTitle())
@@ -98,6 +111,7 @@ public class DocumentService {
.transcription(dto.getTranscription())
.summary(dto.getSummary())
.status(DocumentStatus.PLACEHOLDER)
.metadataComplete(metadataComplete)
.build();
doc = documentRepository.save(doc);
@@ -176,6 +190,11 @@ public class DocumentService {
doc.getReceivers().clear(); // Alle entfernen
}
// 3b. metadataComplete — only update when explicitly set in the DTO
if (dto.getMetadataComplete() != null) {
doc.setMetadataComplete(dto.getMetadataComplete());
}
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde)
if (newFile != null && !newFile.isEmpty()) {
FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename());
@@ -280,6 +299,19 @@ public class DocumentService {
return documentRepository.findConversation(senderId, receiverId, dateFrom, dateTo, sort);
}
public long getIncompleteCount() {
return documentRepository.countByMetadataCompleteFalse();
}
public List<Document> findIncompleteDocuments() {
return documentRepository.findByMetadataCompleteFalse(Sort.by(Sort.Direction.DESC, "createdAt"));
}
public Optional<Document> findNextIncompleteDocument(UUID currentId) {
return documentRepository.findFirstByMetadataCompleteFalseAndIdNot(
currentId, Sort.by(Sort.Direction.DESC, "createdAt"));
}
@Transactional
public void deleteDocument(UUID id) {
if (!documentRepository.existsById(id)) {

View File

@@ -312,6 +312,9 @@ public class MassImportService {
.originalFilename(originalFilename)
.build());
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
doc.setTitle(buildTitle(index, date, location));
doc.setFilePath(s3Key);
doc.setContentType(contentType);
@@ -325,6 +328,7 @@ public class MassImportService {
doc.setSender(sender);
doc.getReceivers().addAll(receivers);
if (tag != null) doc.getTags().add(tag);
doc.setMetadataComplete(metadataComplete);
documentRepository.save(doc);
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);

View File

@@ -0,0 +1,6 @@
-- Add metadata_complete flag to documents.
-- Existing rows default to true (already reviewed before this feature existed).
-- New documents created via Java will receive false from the entity default.
ALTER TABLE documents
ADD COLUMN metadata_complete BOOLEAN NOT NULL DEFAULT TRUE;