feat(import): validate PDF magic bytes before S3 upload

Reads first 4 bytes of each candidate file before upload; rejects any
file whose header does not match %PDF (0x25 0x50 0x44 0x46). Skipped
files are counted and collected in ImportStatus.skippedFiles so
operators can see what was rejected without querying Loki.

Breaking: ImportStatus record gains skipped + skippedFiles fields.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-18 12:51:29 +02:00
committed by marcel
parent 1247b51d9e
commit f77fb79cd2
3 changed files with 141 additions and 27 deletions

View File

@@ -135,7 +135,7 @@ class MassImportServiceTest {
@Test
void runImportAsync_throwsConflict_whenAlreadyRunning() {
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, LocalDateTime.now());
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, 0, List.of(), LocalDateTime.now());
ReflectionTestUtils.setField(service, "currentStatus", running);
assertThatThrownBy(() -> service.runImportAsync())
@@ -325,8 +325,8 @@ class MassImportServiceTest {
@Test
void processRows_returnsZero_whenOnlyHeaderRow() {
List<List<String>> rows = List.of(List.of("header", "col1"));
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result).isEqualTo(0);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(0);
}
@Test
@@ -335,8 +335,8 @@ class MassImportServiceTest {
List.of("header"),
minimalCells("") // blank index
);
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result).isEqualTo(0);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(0);
verify(documentService, never()).findByOriginalFilename(any());
}
@@ -349,9 +349,9 @@ class MassImportServiceTest {
List.of("header"),
minimalCells("doc001") // no dot → appends ".pdf"
);
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result).isEqualTo(1);
assertThat(result.processed()).isEqualTo(1);
verify(documentService).findByOriginalFilename("doc001.pdf");
}
@@ -364,9 +364,9 @@ class MassImportServiceTest {
List.of("header"),
minimalCells("doc002.pdf") // has dot → used as-is
);
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result).isEqualTo(1);
assertThat(result.processed()).isEqualTo(1);
verify(documentService).findByOriginalFilename("doc002.pdf");
}
@@ -525,6 +525,69 @@ class MassImportServiceTest {
assertThat(result).isEqualTo("hello");
}
// ─── PDF magic byte validation regression ─────────────────────────────────
@Test
void runImportAsync_uploadsValidPdf_andSkipsFakeOne(@TempDir Path tempDir) throws Exception {
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
service.runImportAsync();
verify(s3Client, times(1)).putObject(any(PutObjectRequest.class), any(RequestBody.class));
}
@Test
void runImportAsync_setsSkippedCount_toOne_whenOneFakeFile(@TempDir Path tempDir) throws Exception {
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
}
@Test
void runImportAsync_includesRejectedFilename_inSkippedFiles(@TempDir Path tempDir) throws Exception {
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
service.runImportAsync();
assertThat(service.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::filename)
.contains("fake.pdf");
}
@Test
void runImportAsync_skipsFile_whenShorterThanFourBytes(@TempDir Path tempDir) throws Exception {
Files.write(tempDir.resolve("tiny.pdf"), new byte[]{0x25, 0x50, 0x44}); // only 3 bytes
buildMinimalImportXlsx(tempDir, "tiny.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
}
// ─── readOds — XXE security regression ───────────────────────────────────
// Security regression — do not remove.
@@ -621,4 +684,18 @@ class MassImportServiceTest {
}
return destination.toFile();
}
private void buildMinimalImportXlsx(Path dir, String... filenames) throws Exception {
Path xlsx = dir.resolve("import.xlsx");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
org.apache.poi.ss.usermodel.Sheet sheet = wb.createSheet("Sheet1");
sheet.createRow(0).createCell(0).setCellValue("Index");
for (int i = 0; i < filenames.length; i++) {
sheet.createRow(i + 1).createCell(0).setCellValue(filenames[i]);
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
}
}

View File

@@ -19,6 +19,7 @@ import org.springframework.test.web.servlet.MockMvc;
import java.time.LocalDateTime;
import java.util.List;
import java.util.List;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.Mockito.verify;
@@ -47,7 +48,7 @@ class AdminControllerTest {
@WithMockUser(authorities = "ADMIN")
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null);
when(massImportService.getStatus()).thenReturn(status);
mockMvc.perform(get("/api/admin/import-status"))
@@ -61,7 +62,7 @@ class AdminControllerTest {
@WithMockUser(authorities = "ADMIN")
void importStatus_messageField_notPresentInApiResponse() throws Exception {
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null);
when(massImportService.getStatus()).thenReturn(status);
mockMvc.perform(get("/api/admin/import-status"))