From 0ce18e1eedc3b7c3167a27273abec86d356d6340 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 26 Mar 2026 13:25:57 +0100 Subject: [PATCH] feat(documents): add metadataComplete flag and enrichment queue endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a metadata_complete column (default true for existing rows) to drive the enrichment queue. New drop-zone uploads always start as false; createDocument uses an explicit DTO flag or a heuristic (any of date/sender/receivers present → true); the mass importer applies the same heuristic per row. New endpoints: GET /api/documents/incomplete-count, /incomplete, /incomplete/next. Co-Authored-By: Claude Sonnet 4.6 --- .../controller/DocumentController.java | 19 +++ .../familienarchiv/dto/DocumentUpdateDTO.java | 1 + .../familienarchiv/model/Document.java | 5 + .../repository/DocumentRepository.java | 6 + .../service/DocumentService.java | 32 ++++ .../service/MassImportService.java | 4 + ...15__add_metadata_complete_to_documents.sql | 6 + .../controller/DocumentControllerTest.java | 73 ++++++++ .../service/DocumentServiceTest.java | 160 ++++++++++++++++++ 9 files changed, 306 insertions(+) create mode 100644 backend/src/main/resources/db/migration/V15__add_metadata_complete_to_documents.sql diff --git a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java index 8cafba7a..d66e935a 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java @@ -4,6 +4,8 @@ import java.io.IOException; import java.time.LocalDate; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; @@ -156,6 +158,23 @@ public class DocumentController { return new QuickUploadResult(created, updated, errors); } + @GetMapping("/incomplete-count") + public Map getIncompleteCount() { + return Map.of("count", documentService.getIncompleteCount()); + } + + @GetMapping("/incomplete") + public List getIncomplete() { + return documentService.findIncompleteDocuments(); + } + + @GetMapping("/incomplete/next") + public ResponseEntity getNextIncomplete(@RequestParam UUID excludeId) { + return documentService.findNextIncompleteDocument(excludeId) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.noContent().build()); + } + @GetMapping("/search") public ResponseEntity> search( @RequestParam(required = false) String q, diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentUpdateDTO.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentUpdateDTO.java index 3649ddea..79789f24 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentUpdateDTO.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentUpdateDTO.java @@ -17,4 +17,5 @@ public class DocumentUpdateDTO { private UUID senderId; private List receiverIds; private String tags; + private Boolean metadataComplete; } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/model/Document.java b/backend/src/main/java/org/raddatz/familienarchiv/model/Document.java index 5fa21c57..f72e3f5e 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/model/Document.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/model/Document.java @@ -86,6 +86,11 @@ public class Document { @Schema(requiredMode = Schema.RequiredMode.REQUIRED) private LocalDateTime updatedAt; + @Column(name = "metadata_complete", nullable = false) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + @Builder.Default + private boolean metadataComplete = false; + @ManyToMany(fetch = FetchType.EAGER) @JoinTable(name = "document_receivers", joinColumns = @JoinColumn(name = "document_id"), inverseJoinColumns = @JoinColumn(name = "person_id")) @Builder.Default diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java index 25da2dcd..a878b67a 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java @@ -42,6 +42,12 @@ public interface DocumentRepository extends JpaRepository, JpaSp List findByFileHashIsNullAndFilePathIsNotNull(); + long countByMetadataCompleteFalse(); + + List findByMetadataCompleteFalse(Sort sort); + + Optional findFirstByMetadataCompleteFalseAndIdNot(UUID id, Sort sort); + @Query("SELECT DISTINCT d FROM Document d " + "JOIN d.receivers r " + "WHERE " + diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index cd878814..95cd0921 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -62,10 +62,12 @@ public class DocumentService { if (existingDoc.isPresent()) { document = existingDoc.get(); } else { + // New uploads from the drop zone always start as incomplete document = Document.builder() .originalFilename(originalFilename) .title(stripExtension(originalFilename)) .status(DocumentStatus.UPLOADED) + .metadataComplete(false) .build(); } @@ -89,6 +91,17 @@ public class DocumentService { ? file.getOriginalFilename() : (dto.getTitle() != null ? dto.getTitle() : "Unbenanntes Dokument"); + // If the caller explicitly sets metadataComplete, use it. + // Otherwise apply heuristic: complete if at least one key field is present. + boolean metadataComplete; + if (dto.getMetadataComplete() != null) { + metadataComplete = dto.getMetadataComplete(); + } else { + metadataComplete = dto.getDocumentDate() != null + || dto.getSenderId() != null + || (dto.getReceiverIds() != null && !dto.getReceiverIds().isEmpty()); + } + Document doc = Document.builder() .originalFilename(filename) .title(dto.getTitle()) @@ -98,6 +111,7 @@ public class DocumentService { .transcription(dto.getTranscription()) .summary(dto.getSummary()) .status(DocumentStatus.PLACEHOLDER) + .metadataComplete(metadataComplete) .build(); doc = documentRepository.save(doc); @@ -176,6 +190,11 @@ public class DocumentService { doc.getReceivers().clear(); // Alle entfernen } + // 3b. metadataComplete — only update when explicitly set in the DTO + if (dto.getMetadataComplete() != null) { + doc.setMetadataComplete(dto.getMetadataComplete()); + } + // 4. Datei austauschen (nur wenn eine neue ausgewählt wurde) if (newFile != null && !newFile.isEmpty()) { FileService.UploadResult upload = fileService.uploadFile(newFile, newFile.getOriginalFilename()); @@ -280,6 +299,19 @@ public class DocumentService { return documentRepository.findConversation(senderId, receiverId, dateFrom, dateTo, sort); } + public long getIncompleteCount() { + return documentRepository.countByMetadataCompleteFalse(); + } + + public List findIncompleteDocuments() { + return documentRepository.findByMetadataCompleteFalse(Sort.by(Sort.Direction.DESC, "createdAt")); + } + + public Optional findNextIncompleteDocument(UUID currentId) { + return documentRepository.findFirstByMetadataCompleteFalseAndIdNot( + currentId, Sort.by(Sort.Direction.DESC, "createdAt")); + } + @Transactional public void deleteDocument(UUID id) { if (!documentRepository.existsById(id)) { diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java index 522bcfd7..9f35733b 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java @@ -312,6 +312,9 @@ public class MassImportService { .originalFilename(originalFilename) .build()); + // Heuristic: mark as complete if at least one key field is present in the spreadsheet row + boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank(); + doc.setTitle(buildTitle(index, date, location)); doc.setFilePath(s3Key); doc.setContentType(contentType); @@ -325,6 +328,7 @@ public class MassImportService { doc.setSender(sender); doc.getReceivers().addAll(receivers); if (tag != null) doc.getTags().add(tag); + doc.setMetadataComplete(metadataComplete); documentRepository.save(doc); log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename); diff --git a/backend/src/main/resources/db/migration/V15__add_metadata_complete_to_documents.sql b/backend/src/main/resources/db/migration/V15__add_metadata_complete_to_documents.sql new file mode 100644 index 00000000..0d170d86 --- /dev/null +++ b/backend/src/main/resources/db/migration/V15__add_metadata_complete_to_documents.sql @@ -0,0 +1,6 @@ +-- Add metadata_complete flag to documents. +-- Existing rows default to true (already reviewed before this feature existed). +-- New documents created via Java will receive false from the entity default. + +ALTER TABLE documents + ADD COLUMN metadata_complete BOOLEAN NOT NULL DEFAULT TRUE; diff --git a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java index f4f6c439..749abd88 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java @@ -21,6 +21,7 @@ import org.springframework.test.web.servlet.MockMvc; import java.time.LocalDateTime; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.UUID; import static org.mockito.ArgumentMatchers.any; @@ -212,6 +213,78 @@ class DocumentControllerTest { .andExpect(jsonPath("$.errors[0].code").value("UNSUPPORTED_FILE_TYPE")); } + // ─── GET /api/documents/incomplete-count ───────────────────────────────── + + @Test + void getIncompleteCount_returns401_whenUnauthenticated() throws Exception { + mockMvc.perform(get("/api/documents/incomplete-count")) + .andExpect(status().isUnauthorized()); + } + + @Test + @WithMockUser + void getIncompleteCount_returns200_withCount() throws Exception { + when(documentService.getIncompleteCount()).thenReturn(3L); + + mockMvc.perform(get("/api/documents/incomplete-count")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.count").value(3)); + } + + // ─── GET /api/documents/incomplete ─────────────────────────────────────── + + @Test + void getIncomplete_returns401_whenUnauthenticated() throws Exception { + mockMvc.perform(get("/api/documents/incomplete")) + .andExpect(status().isUnauthorized()); + } + + @Test + @WithMockUser + void getIncomplete_returns200_withList() throws Exception { + Document doc = Document.builder() + .id(UUID.randomUUID()).title("Unvollständig").originalFilename("scan.pdf").build(); + when(documentService.findIncompleteDocuments()).thenReturn(List.of(doc)); + + mockMvc.perform(get("/api/documents/incomplete")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$[0].title").value("Unvollständig")); + } + + // ─── GET /api/documents/incomplete/next ────────────────────────────────── + + @Test + void getNextIncomplete_returns401_whenUnauthenticated() throws Exception { + mockMvc.perform(get("/api/documents/incomplete/next") + .param("excludeId", UUID.randomUUID().toString())) + .andExpect(status().isUnauthorized()); + } + + @Test + @WithMockUser + void getNextIncomplete_returns200_whenNextExists() throws Exception { + UUID excludeId = UUID.randomUUID(); + Document next = Document.builder() + .id(UUID.randomUUID()).title("Nächster").originalFilename("next.pdf").build(); + when(documentService.findNextIncompleteDocument(excludeId)).thenReturn(Optional.of(next)); + + mockMvc.perform(get("/api/documents/incomplete/next") + .param("excludeId", excludeId.toString())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.title").value("Nächster")); + } + + @Test + @WithMockUser + void getNextIncomplete_returns204_whenNoneRemain() throws Exception { + UUID excludeId = UUID.randomUUID(); + when(documentService.findNextIncompleteDocument(excludeId)).thenReturn(Optional.empty()); + + mockMvc.perform(get("/api/documents/incomplete/next") + .param("excludeId", excludeId.toString())) + .andExpect(status().isNoContent()); + } + // ─── GET /api/documents/{id}/versions ──────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java index 984be862..195810ab 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java @@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.service; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -11,7 +12,10 @@ import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.Tag; import org.raddatz.familienarchiv.repository.DocumentRepository; +import org.springframework.data.domain.Sort; +import org.springframework.mock.web.MockMultipartFile; +import java.time.LocalDate; import java.util.HashSet; import java.util.List; import java.util.Optional; @@ -344,6 +348,162 @@ class DocumentServiceTest { verify(annotationService).backfillAnnotationFileHashForDocument(eq(docId), any()); } + // ─── getIncompleteCount ─────────────────────────────────────────────────── + + @Test + void getIncompleteCount_delegatesToRepository() { + when(documentRepository.countByMetadataCompleteFalse()).thenReturn(5L); + assertThat(documentService.getIncompleteCount()).isEqualTo(5L); + } + + // ─── findIncompleteDocuments ────────────────────────────────────────────── + + @Test + void findIncompleteDocuments_returnsDocumentsOrderedByCreatedAtDesc() { + Document doc = Document.builder().id(UUID.randomUUID()).title("Test").build(); + when(documentRepository.findByMetadataCompleteFalse(any(Sort.class))).thenReturn(List.of(doc)); + + assertThat(documentService.findIncompleteDocuments()).containsExactly(doc); + verify(documentRepository).findByMetadataCompleteFalse(Sort.by(Sort.Direction.DESC, "createdAt")); + } + + // ─── findNextIncompleteDocument ─────────────────────────────────────────── + + @Test + void findNextIncompleteDocument_returnsNext_whenAnotherIncompleteExists() { + UUID currentId = UUID.randomUUID(); + Document next = Document.builder().id(UUID.randomUUID()).title("Next").build(); + when(documentRepository.findFirstByMetadataCompleteFalseAndIdNot(eq(currentId), any(Sort.class))) + .thenReturn(Optional.of(next)); + + assertThat(documentService.findNextIncompleteDocument(currentId)).contains(next); + } + + @Test + void findNextIncompleteDocument_returnsEmpty_whenNoMoreIncomplete() { + UUID currentId = UUID.randomUUID(); + when(documentRepository.findFirstByMetadataCompleteFalseAndIdNot(eq(currentId), any(Sort.class))) + .thenReturn(Optional.empty()); + + assertThat(documentService.findNextIncompleteDocument(currentId)).isEmpty(); + } + + // ─── storeDocument metadataComplete ────────────────────────────────────── + + @Test + void storeDocument_setsMetadataCompleteFalse_forNewDocument() throws Exception { + MockMultipartFile file = new MockMultipartFile("file", "scan.pdf", "application/pdf", new byte[]{1}); + Document saved = Document.builder().id(UUID.randomUUID()).originalFilename("scan.pdf").build(); + when(documentRepository.findFirstByOriginalFilename("scan.pdf")).thenReturn(Optional.empty()); + when(documentRepository.save(any())).thenReturn(saved); + when(fileService.uploadFile(any(), any())).thenReturn(new FileService.UploadResult("path", "hash")); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Document.class); + documentService.storeDocument(file); + + verify(documentRepository).save(captor.capture()); + assertThat(captor.getValue().isMetadataComplete()).isFalse(); + } + + @Test + void storeDocument_doesNotChangeMetadataComplete_forExistingDocument() throws Exception { + MockMultipartFile file = new MockMultipartFile("file", "scan.pdf", "application/pdf", new byte[]{1}); + Document existing = Document.builder().id(UUID.randomUUID()).originalFilename("scan.pdf") + .status(DocumentStatus.PLACEHOLDER).metadataComplete(true).build(); + when(documentRepository.findFirstByOriginalFilename("scan.pdf")).thenReturn(Optional.of(existing)); + when(documentRepository.save(any())).thenReturn(existing); + when(fileService.uploadFile(any(), any())).thenReturn(new FileService.UploadResult("path", "hash")); + + documentService.storeDocument(file); + + assertThat(existing.isMetadataComplete()).isTrue(); + } + + // ─── createDocument metadataComplete ───────────────────────────────────── + + @Test + void createDocument_setsMetadataCompleteFromDto_whenExplicitlyProvided() throws Exception { + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setTitle("Doc"); + dto.setMetadataComplete(true); + Document saved = Document.builder().id(UUID.randomUUID()).title("Doc") + .originalFilename("Doc").status(DocumentStatus.PLACEHOLDER).build(); + when(documentRepository.save(any())).thenReturn(saved); + when(documentRepository.findById(any())).thenReturn(Optional.of(saved)); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Document.class); + documentService.createDocument(dto, null); + + verify(documentRepository, atLeastOnce()).save(captor.capture()); + assertThat(captor.getAllValues().get(0).isMetadataComplete()).isTrue(); + } + + @Test + void createDocument_setsMetadataCompleteFalse_whenAllKeyFieldsMissingAndNoExplicitFlag() throws Exception { + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setTitle("Doc"); + // no documentDate, no senderId, no receiverIds, no metadataComplete flag + Document saved = Document.builder().id(UUID.randomUUID()).title("Doc") + .originalFilename("Doc").status(DocumentStatus.PLACEHOLDER).build(); + when(documentRepository.save(any())).thenReturn(saved); + when(documentRepository.findById(any())).thenReturn(Optional.of(saved)); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Document.class); + documentService.createDocument(dto, null); + + verify(documentRepository, atLeastOnce()).save(captor.capture()); + assertThat(captor.getAllValues().get(0).isMetadataComplete()).isFalse(); + } + + @Test + void createDocument_setsMetadataCompleteTrue_whenDatePresentAndNoExplicitFlag() throws Exception { + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setTitle("Doc"); + dto.setDocumentDate(LocalDate.of(2020, 1, 1)); + Document saved = Document.builder().id(UUID.randomUUID()).title("Doc") + .originalFilename("Doc").status(DocumentStatus.PLACEHOLDER).build(); + when(documentRepository.save(any())).thenReturn(saved); + when(documentRepository.findById(any())).thenReturn(Optional.of(saved)); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Document.class); + documentService.createDocument(dto, null); + + verify(documentRepository, atLeastOnce()).save(captor.capture()); + assertThat(captor.getAllValues().get(0).isMetadataComplete()).isTrue(); + } + + // ─── updateDocument metadataComplete ───────────────────────────────────── + + @Test + void updateDocument_setsMetadataComplete_whenDtoHasValue() throws Exception { + UUID id = UUID.randomUUID(); + Document existing = Document.builder().id(id).title("Doc").originalFilename("doc.pdf") + .status(DocumentStatus.PLACEHOLDER).metadataComplete(false).build(); + when(documentRepository.findById(id)).thenReturn(Optional.of(existing)); + when(documentRepository.save(any())).thenReturn(existing); + + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setMetadataComplete(true); + documentService.updateDocument(id, dto, null); + + assertThat(existing.isMetadataComplete()).isTrue(); + } + + @Test + void updateDocument_doesNotChangeMetadataComplete_whenDtoHasNull() throws Exception { + UUID id = UUID.randomUUID(); + Document existing = Document.builder().id(id).title("Doc").originalFilename("doc.pdf") + .status(DocumentStatus.PLACEHOLDER).metadataComplete(false).build(); + when(documentRepository.findById(id)).thenReturn(Optional.of(existing)); + when(documentRepository.save(any())).thenReturn(existing); + + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + // metadataComplete not set → null + documentService.updateDocument(id, dto, null); + + assertThat(existing.isMetadataComplete()).isFalse(); + } + @Test void backfillFileHashes_returnsCountOfUpdatedDocuments() throws Exception { UUID id1 = UUID.randomUUID();