feat(documents): add metadataComplete flag and enrichment queue endpoints
Adds a metadata_complete column (default true for existing rows) to drive the enrichment queue. New drop-zone uploads always start as false; createDocument uses an explicit DTO flag or a heuristic (any of date/sender/receivers present → true); the mass importer applies the same heuristic per row. New endpoints: GET /api/documents/incomplete-count, /incomplete, /incomplete/next. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,8 @@ import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
@@ -156,6 +158,23 @@ public class DocumentController {
|
||||
return new QuickUploadResult(created, updated, errors);
|
||||
}
|
||||
|
||||
@GetMapping("/incomplete-count")
|
||||
public Map<String, Long> getIncompleteCount() {
|
||||
return Map.of("count", documentService.getIncompleteCount());
|
||||
}
|
||||
|
||||
@GetMapping("/incomplete")
|
||||
public List<Document> getIncomplete() {
|
||||
return documentService.findIncompleteDocuments();
|
||||
}
|
||||
|
||||
@GetMapping("/incomplete/next")
|
||||
public ResponseEntity<Document> getNextIncomplete(@RequestParam UUID excludeId) {
|
||||
return documentService.findNextIncompleteDocument(excludeId)
|
||||
.map(ResponseEntity::ok)
|
||||
.orElse(ResponseEntity.noContent().build());
|
||||
}
|
||||
|
||||
@GetMapping("/search")
|
||||
public ResponseEntity<List<Document>> search(
|
||||
@RequestParam(required = false) String q,
|
||||
|
||||
@@ -17,4 +17,5 @@ public class DocumentUpdateDTO {
|
||||
private UUID senderId;
|
||||
private List<UUID> receiverIds;
|
||||
private String tags;
|
||||
private Boolean metadataComplete;
|
||||
}
|
||||
|
||||
@@ -86,6 +86,11 @@ public class Document {
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
@Column(name = "metadata_complete", nullable = false)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private boolean metadataComplete = false;
|
||||
|
||||
@ManyToMany(fetch = FetchType.EAGER)
|
||||
@JoinTable(name = "document_receivers", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "person_id"))
|
||||
@Builder.Default
|
||||
|
||||
@@ -42,6 +42,12 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
|
||||
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
|
||||
|
||||
long countByMetadataCompleteFalse();
|
||||
|
||||
List<Document> findByMetadataCompleteFalse(Sort sort);
|
||||
|
||||
Optional<Document> findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort);
|
||||
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
|
||||
@@ -62,10 +62,12 @@ public class DocumentService {
|
||||
if (existingDoc.isPresent()) {
|
||||
document = existingDoc.get();
|
||||
} else {
|
||||
// New uploads from the drop zone always start as incomplete
|
||||
document = Document.builder()
|
||||
.originalFilename(originalFilename)
|
||||
.title(stripExtension(originalFilename))
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.metadataComplete(false)
|
||||
.build();
|
||||
}
|
||||
|
||||
@@ -89,6 +91,17 @@ public class DocumentService {
|
||||
? file.getOriginalFilename()
|
||||
: (dto.getTitle() != null ? dto.getTitle() : "Unbenanntes Dokument");
|
||||
|
||||
// If the caller explicitly sets metadataComplete, use it.
|
||||
// Otherwise apply heuristic: complete if at least one key field is present.
|
||||
boolean metadataComplete;
|
||||
if (dto.getMetadataComplete() != null) {
|
||||
metadataComplete = dto.getMetadataComplete();
|
||||
} else {
|
||||
metadataComplete = dto.getDocumentDate() != null
|
||||
|| dto.getSenderId() != null
|
||||
|| (dto.getReceiverIds() != null && !dto.getReceiverIds().isEmpty());
|
||||
}
|
||||
|
||||
Document doc = Document.builder()
|
||||
.originalFilename(filename)
|
||||
.title(dto.getTitle())
|
||||
@@ -98,6 +111,7 @@ public class DocumentService {
|
||||
.transcription(dto.getTranscription())
|
||||
.summary(dto.getSummary())
|
||||
.status(DocumentStatus.PLACEHOLDER)
|
||||
.metadataComplete(metadataComplete)
|
||||
.build();
|
||||
|
||||
doc = documentRepository.save(doc);
|
||||
@@ -176,6 +190,11 @@ public class DocumentService {
|
||||
doc.getReceivers().clear(); // Alle entfernen
|
||||
}
|
||||
|
||||
// 3b. metadataComplete — only update when explicitly set in the DTO
|
||||
if (dto.getMetadataComplete() != null) {
|
||||
doc.setMetadataComplete(dto.getMetadataComplete());
|
||||
}
|
||||
|
||||
// 4. Datei austauschen (nur wenn eine neue ausgewählt wurde)
|
||||
if (newFile != null && !newFile.isEmpty()) {
|
||||
FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename());
|
||||
@@ -280,6 +299,19 @@ public class DocumentService {
|
||||
return documentRepository.findConversation(senderId, receiverId, dateFrom, dateTo, sort);
|
||||
}
|
||||
|
||||
public long getIncompleteCount() {
|
||||
return documentRepository.countByMetadataCompleteFalse();
|
||||
}
|
||||
|
||||
public List<Document> findIncompleteDocuments() {
|
||||
return documentRepository.findByMetadataCompleteFalse(Sort.by(Sort.Direction.DESC, "createdAt"));
|
||||
}
|
||||
|
||||
public Optional<Document> findNextIncompleteDocument(UUID currentId) {
|
||||
return documentRepository.findFirstByMetadataCompleteFalseAndIdNot(
|
||||
currentId, Sort.by(Sort.Direction.DESC, "createdAt"));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void deleteDocument(UUID id) {
|
||||
if (!documentRepository.existsById(id)) {
|
||||
|
||||
@@ -312,6 +312,9 @@ public class MassImportService {
|
||||
.originalFilename(originalFilename)
|
||||
.build());
|
||||
|
||||
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
|
||||
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
|
||||
|
||||
doc.setTitle(buildTitle(index, date, location));
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
@@ -325,6 +328,7 @@ public class MassImportService {
|
||||
doc.setSender(sender);
|
||||
doc.getReceivers().addAll(receivers);
|
||||
if (tag != null) doc.getTags().add(tag);
|
||||
doc.setMetadataComplete(metadataComplete);
|
||||
|
||||
documentRepository.save(doc);
|
||||
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Add metadata_complete flag to documents.
|
||||
-- Existing rows default to true (already reviewed before this feature existed).
|
||||
-- New documents created via Java will receive false from the entity default.
|
||||
|
||||
ALTER TABLE documents
|
||||
ADD COLUMN metadata_complete BOOLEAN NOT NULL DEFAULT TRUE;
|
||||
@@ -21,6 +21,7 @@ import org.springframework.test.web.servlet.MockMvc;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
@@ -212,6 +213,78 @@ class DocumentControllerTest {
|
||||
.andExpect(jsonPath("$.errors[0].code").value("UNSUPPORTED_FILE_TYPE"));
|
||||
}
|
||||
|
||||
// ─── GET /api/documents/incomplete-count ─────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getIncompleteCount_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(get("/api/documents/incomplete-count"))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void getIncompleteCount_returns200_withCount() throws Exception {
|
||||
when(documentService.getIncompleteCount()).thenReturn(3L);
|
||||
|
||||
mockMvc.perform(get("/api/documents/incomplete-count"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.count").value(3));
|
||||
}
|
||||
|
||||
// ─── GET /api/documents/incomplete ───────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getIncomplete_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(get("/api/documents/incomplete"))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void getIncomplete_returns200_withList() throws Exception {
|
||||
Document doc = Document.builder()
|
||||
.id(UUID.randomUUID()).title("Unvollständig").originalFilename("scan.pdf").build();
|
||||
when(documentService.findIncompleteDocuments()).thenReturn(List.of(doc));
|
||||
|
||||
mockMvc.perform(get("/api/documents/incomplete"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$[0].title").value("Unvollständig"));
|
||||
}
|
||||
|
||||
// ─── GET /api/documents/incomplete/next ──────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getNextIncomplete_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(get("/api/documents/incomplete/next")
|
||||
.param("excludeId", UUID.randomUUID().toString()))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void getNextIncomplete_returns200_whenNextExists() throws Exception {
|
||||
UUID excludeId = UUID.randomUUID();
|
||||
Document next = Document.builder()
|
||||
.id(UUID.randomUUID()).title("Nächster").originalFilename("next.pdf").build();
|
||||
when(documentService.findNextIncompleteDocument(excludeId)).thenReturn(Optional.of(next));
|
||||
|
||||
mockMvc.perform(get("/api/documents/incomplete/next")
|
||||
.param("excludeId", excludeId.toString()))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.title").value("Nächster"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser
|
||||
void getNextIncomplete_returns204_whenNoneRemain() throws Exception {
|
||||
UUID excludeId = UUID.randomUUID();
|
||||
when(documentService.findNextIncompleteDocument(excludeId)).thenReturn(Optional.empty());
|
||||
|
||||
mockMvc.perform(get("/api/documents/incomplete/next")
|
||||
.param("excludeId", excludeId.toString()))
|
||||
.andExpect(status().isNoContent());
|
||||
}
|
||||
|
||||
// ─── GET /api/documents/{id}/versions ────────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.service;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
@@ -11,7 +12,10 @@ import org.raddatz.familienarchiv.model.Document;
|
||||
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.model.Tag;
|
||||
import org.raddatz.familienarchiv.repository.DocumentRepository;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
@@ -344,6 +348,162 @@ class DocumentServiceTest {
|
||||
verify(annotationService).backfillAnnotationFileHashForDocument(eq(docId), any());
|
||||
}
|
||||
|
||||
// ─── getIncompleteCount ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void getIncompleteCount_delegatesToRepository() {
|
||||
when(documentRepository.countByMetadataCompleteFalse()).thenReturn(5L);
|
||||
assertThat(documentService.getIncompleteCount()).isEqualTo(5L);
|
||||
}
|
||||
|
||||
// ─── findIncompleteDocuments ──────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void findIncompleteDocuments_returnsDocumentsOrderedByCreatedAtDesc() {
|
||||
Document doc = Document.builder().id(UUID.randomUUID()).title("Test").build();
|
||||
when(documentRepository.findByMetadataCompleteFalse(any(Sort.class))).thenReturn(List.of(doc));
|
||||
|
||||
assertThat(documentService.findIncompleteDocuments()).containsExactly(doc);
|
||||
verify(documentRepository).findByMetadataCompleteFalse(Sort.by(Sort.Direction.DESC, "createdAt"));
|
||||
}
|
||||
|
||||
// ─── findNextIncompleteDocument ───────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void findNextIncompleteDocument_returnsNext_whenAnotherIncompleteExists() {
|
||||
UUID currentId = UUID.randomUUID();
|
||||
Document next = Document.builder().id(UUID.randomUUID()).title("Next").build();
|
||||
when(documentRepository.findFirstByMetadataCompleteFalseAndIdNot(eq(currentId), any(Sort.class)))
|
||||
.thenReturn(Optional.of(next));
|
||||
|
||||
assertThat(documentService.findNextIncompleteDocument(currentId)).contains(next);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findNextIncompleteDocument_returnsEmpty_whenNoMoreIncomplete() {
|
||||
UUID currentId = UUID.randomUUID();
|
||||
when(documentRepository.findFirstByMetadataCompleteFalseAndIdNot(eq(currentId), any(Sort.class)))
|
||||
.thenReturn(Optional.empty());
|
||||
|
||||
assertThat(documentService.findNextIncompleteDocument(currentId)).isEmpty();
|
||||
}
|
||||
|
||||
// ─── storeDocument metadataComplete ──────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void storeDocument_setsMetadataCompleteFalse_forNewDocument() throws Exception {
|
||||
MockMultipartFile file = new MockMultipartFile("file", "scan.pdf", "application/pdf", new byte[]{1});
|
||||
Document saved = Document.builder().id(UUID.randomUUID()).originalFilename("scan.pdf").build();
|
||||
when(documentRepository.findFirstByOriginalFilename("scan.pdf")).thenReturn(Optional.empty());
|
||||
when(documentRepository.save(any())).thenReturn(saved);
|
||||
when(fileService.uploadFile(any(), any())).thenReturn(new FileService.UploadResult("path", "hash"));
|
||||
|
||||
ArgumentCaptor<Document> captor = ArgumentCaptor.forClass(Document.class);
|
||||
documentService.storeDocument(file);
|
||||
|
||||
verify(documentRepository).save(captor.capture());
|
||||
assertThat(captor.getValue().isMetadataComplete()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void storeDocument_doesNotChangeMetadataComplete_forExistingDocument() throws Exception {
|
||||
MockMultipartFile file = new MockMultipartFile("file", "scan.pdf", "application/pdf", new byte[]{1});
|
||||
Document existing = Document.builder().id(UUID.randomUUID()).originalFilename("scan.pdf")
|
||||
.status(DocumentStatus.PLACEHOLDER).metadataComplete(true).build();
|
||||
when(documentRepository.findFirstByOriginalFilename("scan.pdf")).thenReturn(Optional.of(existing));
|
||||
when(documentRepository.save(any())).thenReturn(existing);
|
||||
when(fileService.uploadFile(any(), any())).thenReturn(new FileService.UploadResult("path", "hash"));
|
||||
|
||||
documentService.storeDocument(file);
|
||||
|
||||
assertThat(existing.isMetadataComplete()).isTrue();
|
||||
}
|
||||
|
||||
// ─── createDocument metadataComplete ─────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void createDocument_setsMetadataCompleteFromDto_whenExplicitlyProvided() throws Exception {
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle("Doc");
|
||||
dto.setMetadataComplete(true);
|
||||
Document saved = Document.builder().id(UUID.randomUUID()).title("Doc")
|
||||
.originalFilename("Doc").status(DocumentStatus.PLACEHOLDER).build();
|
||||
when(documentRepository.save(any())).thenReturn(saved);
|
||||
when(documentRepository.findById(any())).thenReturn(Optional.of(saved));
|
||||
|
||||
ArgumentCaptor<Document> captor = ArgumentCaptor.forClass(Document.class);
|
||||
documentService.createDocument(dto, null);
|
||||
|
||||
verify(documentRepository, atLeastOnce()).save(captor.capture());
|
||||
assertThat(captor.getAllValues().get(0).isMetadataComplete()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void createDocument_setsMetadataCompleteFalse_whenAllKeyFieldsMissingAndNoExplicitFlag() throws Exception {
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle("Doc");
|
||||
// no documentDate, no senderId, no receiverIds, no metadataComplete flag
|
||||
Document saved = Document.builder().id(UUID.randomUUID()).title("Doc")
|
||||
.originalFilename("Doc").status(DocumentStatus.PLACEHOLDER).build();
|
||||
when(documentRepository.save(any())).thenReturn(saved);
|
||||
when(documentRepository.findById(any())).thenReturn(Optional.of(saved));
|
||||
|
||||
ArgumentCaptor<Document> captor = ArgumentCaptor.forClass(Document.class);
|
||||
documentService.createDocument(dto, null);
|
||||
|
||||
verify(documentRepository, atLeastOnce()).save(captor.capture());
|
||||
assertThat(captor.getAllValues().get(0).isMetadataComplete()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void createDocument_setsMetadataCompleteTrue_whenDatePresentAndNoExplicitFlag() throws Exception {
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle("Doc");
|
||||
dto.setDocumentDate(LocalDate.of(2020, 1, 1));
|
||||
Document saved = Document.builder().id(UUID.randomUUID()).title("Doc")
|
||||
.originalFilename("Doc").status(DocumentStatus.PLACEHOLDER).build();
|
||||
when(documentRepository.save(any())).thenReturn(saved);
|
||||
when(documentRepository.findById(any())).thenReturn(Optional.of(saved));
|
||||
|
||||
ArgumentCaptor<Document> captor = ArgumentCaptor.forClass(Document.class);
|
||||
documentService.createDocument(dto, null);
|
||||
|
||||
verify(documentRepository, atLeastOnce()).save(captor.capture());
|
||||
assertThat(captor.getAllValues().get(0).isMetadataComplete()).isTrue();
|
||||
}
|
||||
|
||||
// ─── updateDocument metadataComplete ─────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void updateDocument_setsMetadataComplete_whenDtoHasValue() throws Exception {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document existing = Document.builder().id(id).title("Doc").originalFilename("doc.pdf")
|
||||
.status(DocumentStatus.PLACEHOLDER).metadataComplete(false).build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(existing));
|
||||
when(documentRepository.save(any())).thenReturn(existing);
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setMetadataComplete(true);
|
||||
documentService.updateDocument(id, dto, null);
|
||||
|
||||
assertThat(existing.isMetadataComplete()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_doesNotChangeMetadataComplete_whenDtoHasNull() throws Exception {
|
||||
UUID id = UUID.randomUUID();
|
||||
Document existing = Document.builder().id(id).title("Doc").originalFilename("doc.pdf")
|
||||
.status(DocumentStatus.PLACEHOLDER).metadataComplete(false).build();
|
||||
when(documentRepository.findById(id)).thenReturn(Optional.of(existing));
|
||||
when(documentRepository.save(any())).thenReturn(existing);
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
// metadataComplete not set → null
|
||||
documentService.updateDocument(id, dto, null);
|
||||
|
||||
assertThat(existing.isMetadataComplete()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillFileHashes_returnsCountOfUpdatedDocuments() throws Exception {
|
||||
UUID id1 = UUID.randomUUID();
|
||||
|
||||
Reference in New Issue
Block a user