diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java index d6453e19..d2f1e276 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java @@ -31,6 +31,7 @@ import javax.xml.parsers.DocumentBuilderFactory; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -53,9 +54,13 @@ public class MassImportService { public enum State { IDLE, RUNNING, DONE, FAILED } - public record ImportStatus(State state, String statusCode, @JsonIgnore String message, int processed, LocalDateTime startedAt) {} + public record SkippedFile(String filename, String reason) {} - private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null); + public record ImportStatus(State state, String statusCode, @JsonIgnore String message, int processed, int skipped, List skippedFiles, LocalDateTime startedAt) {} + + record ProcessResult(int processed, int skipped, List skippedFiles) {} + + private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null); public ImportStatus getStatus() { return currentStatus; @@ -117,22 +122,22 @@ public class MassImportService { if (currentStatus.state() == State.RUNNING) { throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress"); } - currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, LocalDateTime.now()); + currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, 0, List.of(), LocalDateTime.now()); try { File spreadsheet = findSpreadsheetFile(); log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath()); - int processed = processRows(readSpreadsheet(spreadsheet)); + ProcessResult result = processRows(readSpreadsheet(spreadsheet)); currentStatus = new ImportStatus(State.DONE, "IMPORT_DONE", - "Import abgeschlossen. " + processed + " Dokumente verarbeitet.", - processed, currentStatus.startedAt()); + "Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.", + result.processed(), result.skipped(), result.skippedFiles(), currentStatus.startedAt()); } catch (NoSpreadsheetException e) { log.error("Massenimport fehlgeschlagen: keine Tabellendatei", e); currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_NO_SPREADSHEET", - "Fehler: " + e.getMessage(), 0, currentStatus.startedAt()); + "Fehler: " + e.getMessage(), 0, 0, List.of(), currentStatus.startedAt()); } catch (Exception e) { log.error("Massenimport fehlgeschlagen", e); currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_INTERNAL", - "Fehler: " + e.getMessage(), 0, currentStatus.startedAt()); + "Fehler: " + e.getMessage(), 0, 0, List.of(), currentStatus.startedAt()); } } @@ -254,8 +259,10 @@ public class MassImportService { // --- Import logic (works on neutral List rows) --- - private int processRows(List> rows) { - int count = 0; + private ProcessResult processRows(List> rows) { + int processed = 0; + List skippedFiles = new ArrayList<>(); + for (int i = 1; i < rows.size(); i++) { // skip header row List cells = rows.get(i); String index = getCell(cells, colIndex); @@ -266,18 +273,46 @@ public class MassImportService { if (fileOnDisk.isEmpty()) { log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename); } - importSingleDocument(cells, fileOnDisk, filename, index); - count++; + + if (fileOnDisk.isPresent()) { + try { + if (!isPdfMagicBytes(fileOnDisk.get())) { + log.warn("Überspringe {}: Datei beginnt nicht mit %PDF-Signatur", filename); + skippedFiles.add(new SkippedFile(filename, "Keine gültige PDF-Signatur")); + continue; + } + } catch (IOException e) { + log.error("Fehler beim Prüfen der Magic-Bytes für {}", filename, e); + skippedFiles.add(new SkippedFile(filename, "Fehler beim Lesen der Datei")); + continue; + } + } + + boolean imported = importSingleDocument(cells, fileOnDisk, filename, index); + if (imported) { + processed++; + } + } + return new ProcessResult(processed, skippedFiles.size(), skippedFiles); + } + + private boolean isPdfMagicBytes(File file) throws IOException { + try (InputStream is = new FileInputStream(file)) { + byte[] header = is.readNBytes(4); + return header.length == 4 + && header[0] == 0x25 // % + && header[1] == 0x50 // P + && header[2] == 0x44 // D + && header[3] == 0x46; // F } - return count; } @Transactional - protected void importSingleDocument(List cells, Optional file, String originalFilename, String index) { + protected boolean importSingleDocument(List cells, Optional file, String originalFilename, String index) { Optional existing = documentService.findByOriginalFilename(originalFilename); if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) { log.info("Dokument {} existiert bereits, überspringe.", originalFilename); - return; + return false; } String archiveBox = getCell(cells, colBox); @@ -313,7 +348,7 @@ public class MassImportService { status = DocumentStatus.UPLOADED; } catch (Exception e) { log.error("S3 Upload Fehler für {}", file.get().getName(), e); - return; + return false; } } @@ -355,6 +390,7 @@ public class MassImportService { thumbnailAsyncRunner.dispatchAfterCommit(saved.getId()); } log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename); + return true; } // --- Helpers --- diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index dcd7c707..eef66a71 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -135,7 +135,7 @@ class MassImportServiceTest { @Test void runImportAsync_throwsConflict_whenAlreadyRunning() { MassImportService.ImportStatus running = new MassImportService.ImportStatus( - MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, LocalDateTime.now()); + MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, 0, List.of(), LocalDateTime.now()); ReflectionTestUtils.setField(service, "currentStatus", running); assertThatThrownBy(() -> service.runImportAsync()) @@ -325,8 +325,8 @@ class MassImportServiceTest { @Test void processRows_returnsZero_whenOnlyHeaderRow() { List> rows = List.of(List.of("header", "col1")); - Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); - assertThat(result).isEqualTo(0); + MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); + assertThat(result.processed()).isEqualTo(0); } @Test @@ -335,8 +335,8 @@ class MassImportServiceTest { List.of("header"), minimalCells("") // blank index ); - Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); - assertThat(result).isEqualTo(0); + MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); + assertThat(result.processed()).isEqualTo(0); verify(documentService, never()).findByOriginalFilename(any()); } @@ -349,9 +349,9 @@ class MassImportServiceTest { List.of("header"), minimalCells("doc001") // no dot → appends ".pdf" ); - Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); + MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); - assertThat(result).isEqualTo(1); + assertThat(result.processed()).isEqualTo(1); verify(documentService).findByOriginalFilename("doc001.pdf"); } @@ -364,9 +364,9 @@ class MassImportServiceTest { List.of("header"), minimalCells("doc002.pdf") // has dot → used as-is ); - Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); + MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); - assertThat(result).isEqualTo(1); + assertThat(result.processed()).isEqualTo(1); verify(documentService).findByOriginalFilename("doc002.pdf"); } @@ -525,6 +525,69 @@ class MassImportServiceTest { assertThat(result).isEqualTo("hello"); } + // ─── PDF magic byte validation regression ───────────────────────────────── + + @Test + void runImportAsync_uploadsValidPdf_andSkipsFakeOne(@TempDir Path tempDir) throws Exception { + byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF- + Files.write(tempDir.resolve("real.pdf"), pdfHeader); + Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf"); + buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf"); + ReflectionTestUtils.setField(service, "importDir", tempDir.toString()); + + when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + service.runImportAsync(); + + verify(s3Client, times(1)).putObject(any(PutObjectRequest.class), any(RequestBody.class)); + } + + @Test + void runImportAsync_setsSkippedCount_toOne_whenOneFakeFile(@TempDir Path tempDir) throws Exception { + byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF- + Files.write(tempDir.resolve("real.pdf"), pdfHeader); + Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf"); + buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf"); + ReflectionTestUtils.setField(service, "importDir", tempDir.toString()); + + when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + service.runImportAsync(); + + assertThat(service.getStatus().skipped()).isEqualTo(1); + } + + @Test + void runImportAsync_includesRejectedFilename_inSkippedFiles(@TempDir Path tempDir) throws Exception { + byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF- + Files.write(tempDir.resolve("real.pdf"), pdfHeader); + Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf"); + buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf"); + ReflectionTestUtils.setField(service, "importDir", tempDir.toString()); + + when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + service.runImportAsync(); + + assertThat(service.getStatus().skippedFiles()) + .extracting(MassImportService.SkippedFile::filename) + .contains("fake.pdf"); + } + + @Test + void runImportAsync_skipsFile_whenShorterThanFourBytes(@TempDir Path tempDir) throws Exception { + Files.write(tempDir.resolve("tiny.pdf"), new byte[]{0x25, 0x50, 0x44}); // only 3 bytes + buildMinimalImportXlsx(tempDir, "tiny.pdf"); + ReflectionTestUtils.setField(service, "importDir", tempDir.toString()); + + service.runImportAsync(); + + assertThat(service.getStatus().skipped()).isEqualTo(1); + } + // ─── readOds — XXE security regression ─────────────────────────────────── // Security regression — do not remove. @@ -621,4 +684,18 @@ class MassImportServiceTest { } return destination.toFile(); } + + private void buildMinimalImportXlsx(Path dir, String... filenames) throws Exception { + Path xlsx = dir.resolve("import.xlsx"); + try (XSSFWorkbook wb = new XSSFWorkbook()) { + org.apache.poi.ss.usermodel.Sheet sheet = wb.createSheet("Sheet1"); + sheet.createRow(0).createCell(0).setCellValue("Index"); + for (int i = 0; i < filenames.length; i++) { + sheet.createRow(i + 1).createCell(0).setCellValue(filenames[i]); + } + try (OutputStream out = Files.newOutputStream(xlsx)) { + wb.write(out); + } + } + } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java index 533aa7b3..cc2b885a 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java @@ -19,6 +19,7 @@ import org.springframework.test.web.servlet.MockMvc; import java.time.LocalDateTime; import java.util.List; +import java.util.List; import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.Mockito.verify; @@ -46,7 +47,7 @@ class AdminControllerTest { @WithMockUser(authorities = "ADMIN") void importStatus_returns200_withStatusCode_whenAdmin() throws Exception { MassImportService.ImportStatus status = new MassImportService.ImportStatus( - MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null); + MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null); when(massImportService.getStatus()).thenReturn(status); mockMvc.perform(get("/api/admin/import-status")) @@ -60,7 +61,7 @@ class AdminControllerTest { @WithMockUser(authorities = "ADMIN") void importStatus_messageField_notPresentInApiResponse() throws Exception { MassImportService.ImportStatus status = new MassImportService.ImportStatus( - MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null); + MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null); when(massImportService.getStatus()).thenReturn(status); mockMvc.perform(get("/api/admin/import-status"))