feat(documents): add metadataComplete flag and enrichment queue endpoints
Adds a metadata_complete column (default true for existing rows) to drive the enrichment queue. New drop-zone uploads always start as false; createDocument uses an explicit DTO flag or a heuristic (any of date/sender/receivers present → true); the mass importer applies the same heuristic per row. New endpoints: GET /api/documents/incomplete-count, /incomplete, /incomplete/next. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,8 @@ import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
@@ -156,6 +158,23 @@ public class DocumentController {
|
||||
return new QuickUploadResult(created, updated, errors);
|
||||
}
|
||||
|
||||
@GetMapping("/incomplete-count")
|
||||
public Map<String, Long> getIncompleteCount() {
|
||||
return Map.of("count", documentService.getIncompleteCount());
|
||||
}
|
||||
|
||||
@GetMapping("/incomplete")
|
||||
public List<Document> getIncomplete() {
|
||||
return documentService.findIncompleteDocuments();
|
||||
}
|
||||
|
||||
@GetMapping("/incomplete/next")
|
||||
public ResponseEntity<Document> getNextIncomplete(@RequestParam UUID excludeId) {
|
||||
return documentService.findNextIncompleteDocument(excludeId)
|
||||
.map(ResponseEntity::ok)
|
||||
.orElse(ResponseEntity.noContent().build());
|
||||
}
|
||||
|
||||
@GetMapping("/search")
|
||||
public ResponseEntity<List<Document>> search(
|
||||
@RequestParam(required = false) String q,
|
||||
|
||||
@@ -17,4 +17,5 @@ public class DocumentUpdateDTO {
|
||||
private UUID senderId;
|
||||
private List<UUID> receiverIds;
|
||||
private String tags;
|
||||
private Boolean metadataComplete;
|
||||
}
|
||||
|
||||
@@ -86,6 +86,11 @@ public class Document {
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
@Column(name = "metadata_complete", nullable = false)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private boolean metadataComplete = false;
|
||||
|
||||
@ManyToMany(fetch = FetchType.EAGER)
|
||||
@JoinTable(name = "document_receivers", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "person_id"))
|
||||
@Builder.Default
|
||||
|
||||
@@ -42,6 +42,12 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
|
||||
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
|
||||
|
||||
long countByMetadataCompleteFalse();
|
||||
|
||||
List<Document> findByMetadataCompleteFalse(Sort sort);
|
||||
|
||||
Optional<Document> findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort);
|
||||
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
|
||||
@@ -62,10 +62,12 @@ public class DocumentService {
|
||||
if (existingDoc.isPresent()) {
|
||||
document = existingDoc.get();
|
||||
} else {
|
||||
// New uploads from the drop zone always start as incomplete
|
||||
document = Document.builder()
|
||||
.originalFilename(originalFilename)
|
||||
.title(stripExtension(originalFilename))
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.metadataComplete(false)
|
||||
.build();
|
||||
}
|
||||
|
||||
@@ -89,6 +91,17 @@ public class DocumentService {
|
||||
? file.getOriginalFilename()
|
||||
: (dto.getTitle() != null ? dto.getTitle() : "Unbenanntes Dokument");
|
||||
|
||||
// If the caller explicitly sets metadataComplete, use it.
|
||||
// Otherwise apply heuristic: complete if at least one key field is present.
|
||||
boolean metadataComplete;
|
||||
if (dto.getMetadataComplete() != null) {
|
||||
metadataComplete = dto.getMetadataComplete();
|
||||
} else {
|
||||
metadataComplete = dto.getDocumentDate() != null
|
||||
|| dto.getSenderId() != null
|
||||
|| (dto.getReceiverIds() != null && !dto.getReceiverIds().isEmpty());
|
||||
}
|
||||
|
||||
Document doc = Document.builder()
|
||||
.originalFilename(filename)
|
||||
.title(dto.getTitle())
|
||||
@@ -98,6 +111,7 @@ public class DocumentService {
|
||||
.transcription(dto.getTranscription())
|
||||
.summary(dto.getSummary())
|
||||
.status(DocumentStatus.PLACEHOLDER)
|
||||
.metadataComplete(metadataComplete)
|
||||
.build();
|
||||
|
||||
doc = documentRepository.save(doc);
|
||||
@@ -176,6 +190,11 @@ public class DocumentService {
|
||||
doc.getReceivers().clear(); // Alle entfernen
|
||||
}
|
||||
|
||||
// 3b. metadataComplete — only update when explicitly set in the DTO
|
||||
if (dto.getMetadataComplete() != null) {
|
||||
doc.setMetadataComplete(dto.getMetadataComplete());
|
||||
}
|
||||
|
||||
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde)
|
||||
if (newFile != null && !newFile.isEmpty()) {
|
||||
FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename());
|
||||
@@ -280,6 +299,19 @@ public class DocumentService {
|
||||
return documentRepository.findConversation(senderId, receiverId, dateFrom, dateTo, sort);
|
||||
}
|
||||
|
||||
public long getIncompleteCount() {
|
||||
return documentRepository.countByMetadataCompleteFalse();
|
||||
}
|
||||
|
||||
public List<Document> findIncompleteDocuments() {
|
||||
return documentRepository.findByMetadataCompleteFalse(Sort.by(Sort.Direction.DESC, "createdAt"));
|
||||
}
|
||||
|
||||
public Optional<Document> findNextIncompleteDocument(UUID currentId) {
|
||||
return documentRepository.findFirstByMetadataCompleteFalseAndIdNot(
|
||||
currentId, Sort.by(Sort.Direction.DESC, "createdAt"));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void deleteDocument(UUID id) {
|
||||
if (!documentRepository.existsById(id)) {
|
||||
|
||||
@@ -312,6 +312,9 @@ public class MassImportService {
|
||||
.originalFilename(originalFilename)
|
||||
.build());
|
||||
|
||||
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
|
||||
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
|
||||
|
||||
doc.setTitle(buildTitle(index, date, location));
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
@@ -325,6 +328,7 @@ public class MassImportService {
|
||||
doc.setSender(sender);
|
||||
doc.getReceivers().addAll(receivers);
|
||||
if (tag != null) doc.getTags().add(tag);
|
||||
doc.setMetadataComplete(metadataComplete);
|
||||
|
||||
documentRepository.save(doc);
|
||||
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Add metadata_complete flag to documents.
|
||||
-- Existing rows default to true (already reviewed before this feature existed).
|
||||
-- New documents created via Java will receive false from the entity default.
|
||||
|
||||
ALTER TABLE documents
|
||||
ADD COLUMN metadata_complete BOOLEAN NOT NULL DEFAULT TRUE;
|
||||
Reference in New Issue
Block a user