diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java index e09aaa76..975517e7 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java @@ -56,9 +56,17 @@ public class MassImportService { public enum State { IDLE, RUNNING, DONE, FAILED } + public enum SkipReason { + INVALID_FILENAME_PATH_TRAVERSAL, + INVALID_PDF_SIGNATURE, + FILE_READ_ERROR, + ALREADY_EXISTS, + S3_UPLOAD_FAILED + } + public record SkippedFile( @Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename, - @Schema(requiredMode = Schema.RequiredMode.REQUIRED) String reason + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason ) {} public record ImportStatus( @@ -291,6 +299,11 @@ public class MassImportService { if (index.isBlank()) continue; String filename = index.contains(".") ? index : index + ".pdf"; + if (!isValidImportFilename(filename)) { + log.warn("Skipping import row {}: filename rejected — {}", i, filename); + skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_FILENAME_PATH_TRAVERSAL)); + continue; + } Optional fileOnDisk = findFileRecursive(filename); if (fileOnDisk.isEmpty()) { log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename); @@ -300,17 +313,17 @@ public class MassImportService { try { if (!isPdfMagicBytes(fileOnDisk.get())) { log.warn("Überspringe {}: Datei beginnt nicht mit %PDF-Signatur", filename); - skippedFiles.add(new SkippedFile(filename, "INVALID_PDF_SIGNATURE")); + skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_PDF_SIGNATURE)); continue; } } catch (IOException e) { log.error("Fehler beim Prüfen der Magic-Bytes für {}", filename, e); - skippedFiles.add(new SkippedFile(filename, "FILE_READ_ERROR")); + skippedFiles.add(new SkippedFile(filename, SkipReason.FILE_READ_ERROR)); continue; } } - Optional skipReason = importSingleDocument(cells, fileOnDisk, filename, index); + Optional skipReason = importSingleDocument(cells, fileOnDisk, filename, index); if (skipReason.isPresent()) { skippedFiles.add(new SkippedFile(filename, skipReason.get())); } else { @@ -320,6 +333,23 @@ public class MassImportService { return new ProcessResult(processed, skippedFiles); } + private boolean isValidImportFilename(String filename) { + if (filename == null || filename.isBlank()) return false; + if (filename.contains("/")) return false; + if (filename.contains("\\")) return false; + if (filename.contains("∕")) return false; // U+2215 DIVISION SLASH + if (filename.contains("/")) return false; // U+FF0F FULLWIDTH SOLIDUS + if (filename.contains("⧵")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR + if (filename.contains("..")) return false; + if (filename.equals(".")) return false; + if (filename.contains("\0")) return false; + // Paths.get() is safe here on Linux for all inputs that passed the checks above; + // it may throw InvalidPathException for OS-specific illegal chars on Windows, + // but those are not reachable in production. + if (Paths.get(filename).isAbsolute()) return false; + return true; + } + // package-private: Mockito spy in tests can override to inject IOException InputStream openFileStream(File file) throws IOException { return new FileInputStream(file); @@ -342,11 +372,11 @@ public class MassImportService { * @return empty Optional on success; an Optional containing the skip reason on failure/skip. */ @Transactional - protected Optional importSingleDocument(List cells, Optional file, String originalFilename, String index) { + protected Optional importSingleDocument(List cells, Optional file, String originalFilename, String index) { Optional existing = documentService.findByOriginalFilename(originalFilename); if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) { log.info("Dokument {} existiert bereits, überspringe.", originalFilename); - return Optional.of("ALREADY_EXISTS"); + return Optional.of(SkipReason.ALREADY_EXISTS); } String archiveBox = getCell(cells, colBox); @@ -382,7 +412,7 @@ public class MassImportService { status = DocumentStatus.UPLOADED; } catch (Exception e) { log.error("S3 Upload Fehler für {}", file.get().getName(), e); - return Optional.of("S3_UPLOAD_FAILED"); + return Optional.of(SkipReason.S3_UPLOAD_FAILED); } } @@ -460,11 +490,18 @@ public class MassImportService { } private Optional findFileRecursive(String filename) { - try (Stream walk = Files.walk(Paths.get(importDir))) { - return walk.filter(p -> !Files.isDirectory(p)) + File baseDir = new File(importDir); + try (Stream walk = Files.walk(baseDir.toPath())) { + Optional match = walk.filter(p -> !Files.isDirectory(p)) .filter(p -> p.getFileName().toString().equals(filename)) - .map(Path::toFile) .findFirst(); + if (match.isEmpty()) return Optional.empty(); + File candidate = match.get().toFile(); + String baseDirCanonical = baseDir.getCanonicalPath(); + if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) { + throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate); + } + return Optional.of(candidate); } catch (IOException e) { return Optional.empty(); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index 126a6d74..d87d28c1 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -154,10 +154,10 @@ class MassImportServiceTest { .build(); when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.of(existing)); - Optional result = service.importSingleDocument(minimalCells("doc001.pdf"), Optional.empty(), "doc001.pdf", "doc001"); + Optional result = service.importSingleDocument(minimalCells("doc001.pdf"), Optional.empty(), "doc001.pdf", "doc001"); verify(documentService, never()).save(any()); - assertThat(result).isPresent().contains("ALREADY_EXISTS"); + assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS); } // ─── importSingleDocument — already-exists guard fires before file I/O ───── @@ -179,10 +179,10 @@ class MassImportServiceTest { byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF- Files.write(physicalFile, pdfHeader); - Optional result = service.importSingleDocument( + Optional result = service.importSingleDocument( minimalCells("present.pdf"), Optional.of(physicalFile.toFile()), "present.pdf", "present"); - assertThat(result).isPresent().contains("ALREADY_EXISTS"); + assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS); verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class)); verify(documentService, never()).save(any()); } @@ -204,7 +204,7 @@ class MassImportServiceTest { assertThat(service.getStatus().skipped()).isEqualTo(1); assertThat(service.getStatus().skippedFiles()) .extracting(MassImportService.SkippedFile::filename, MassImportService.SkippedFile::reason) - .containsExactly(org.assertj.core.groups.Tuple.tuple("upload_fail.pdf", "S3_UPLOAD_FAILED")); + .containsExactly(org.assertj.core.groups.Tuple.tuple("upload_fail.pdf", MassImportService.SkipReason.S3_UPLOAD_FAILED)); } @Test @@ -223,7 +223,7 @@ class MassImportServiceTest { assertThat(service.getStatus().skipped()).isEqualTo(1); assertThat(service.getStatus().skippedFiles()) .extracting(MassImportService.SkippedFile::reason) - .containsExactly("ALREADY_EXISTS"); + .containsExactly(MassImportService.SkipReason.ALREADY_EXISTS); } // ─── importSingleDocument — create new document (metadata only) ─────────── @@ -283,11 +283,11 @@ class MassImportServiceTest { doThrow(new RuntimeException("S3 error")) .when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class)); - Optional result = service.importSingleDocument( + Optional result = service.importSingleDocument( minimalCells("fail.pdf"), Optional.of(tempFile.toFile()), "fail.pdf", "fail"); verify(documentService, never()).save(any()); - assertThat(result).isPresent().contains("S3_UPLOAD_FAILED"); + assertThat(result).isPresent().contains(MassImportService.SkipReason.S3_UPLOAD_FAILED); } // ─── importSingleDocument — sender handling ─────────────────────────────── @@ -438,6 +438,110 @@ class MassImportServiceTest { verify(documentService).findByOriginalFilename("doc002.pdf"); } + // ─── isValidImportFilename — security regression — do not remove ───────── + + @Test + void isValidImportFilename_returnsFalse_whenFilenameIsNull() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", (String) null); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameIsBlank() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", " "); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsForwardSlash() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "etc/passwd"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsBackslash() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..\\etc\\passwd"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsDotDot() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "doc..evil.pdf"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameIsDotDot() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", ".."); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameIsAbsolutePath() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "/etc/passwd"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsNullByte() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "file\0.pdf"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsTrue_whenFilenameIsPlainBasename() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "document.pdf"); + assertThat(result).isTrue(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeDivisionSlash() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo∕bar.pdf"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsFullwidthSlash() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo/bar.pdf"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeReverseSolidus() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo⧵bar.pdf"); + assertThat(result).isFalse(); + } + + @Test + void isValidImportFilename_returnsTrue_whenFilenameHasLeadingDot() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", ".hidden.pdf"); + assertThat(result).isTrue(); + } + + @Test + void isValidImportFilename_returnsTrue_whenFilenameHasSpaces() { + boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "Brief an Oma.pdf"); + assertThat(result).isTrue(); + } + + @Test + void processRows_skipsRowAndContinues_whenFilenameIsPathTraversal() { + when(documentService.findByOriginalFilename("legitimate.pdf")).thenReturn(Optional.empty()); + when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + List> rows = List.of( + List.of("header"), + minimalCells("../evil"), // row 1: path traversal — should be skipped + minimalCells("legitimate.pdf") // row 2: valid — should be processed + ); + MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows); + + assertThat(result.processed()).isEqualTo(1); + assertThat(result.skippedFiles()) + .extracting(MassImportService.SkippedFile::reason) + .containsExactly(MassImportService.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL); + } + // ─── importSingleDocument — non-blank optional fields ──────────────────── @Test @@ -651,7 +755,22 @@ class MassImportServiceTest { assertThat(spyService.getStatus().skipped()).isEqualTo(1); assertThat(spyService.getStatus().skippedFiles()) .extracting(MassImportService.SkippedFile::reason) - .containsExactly("FILE_READ_ERROR"); + .containsExactly(MassImportService.SkipReason.FILE_READ_ERROR); + } + + // ─── findFileRecursive — symlink escape security regression — do not remove ─ + + @Test + void findFileRecursive_throwsDomainException_whenSymlinkEscapesImportDir( + @TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception { + Path outsideFile = outsideDir.resolve("secret.pdf"); + Files.writeString(outsideFile, "sensitive content"); + Files.createSymbolicLink(importDirPath.resolve("secret.pdf"), outsideFile); + + ReflectionTestUtils.setField(service, "importDir", importDirPath.toString()); + + assertThatThrownBy(() -> ReflectionTestUtils.invokeMethod(service, "findFileRecursive", "secret.pdf")) + .isInstanceOf(DomainException.class); } // ─── readOds — XXE security regression ───────────────────────────────────