feat(import): validate PDF magic bytes before S3 upload
Reads first 4 bytes of each candidate file before upload; rejects any file whose header does not match %PDF (0x25 0x50 0x44 0x46). Skipped files are counted and collected in ImportStatus.skippedFiles so operators can see what was rejected without querying Loki. Breaking: ImportStatus record gains skipped + skippedFiles fields. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -31,6 +31,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
@@ -53,9 +54,13 @@ public class MassImportService {
|
||||
|
||||
public enum State { IDLE, RUNNING, DONE, FAILED }
|
||||
|
||||
public record ImportStatus(State state, String statusCode, @JsonIgnore String message, int processed, LocalDateTime startedAt) {}
|
||||
public record SkippedFile(String filename, String reason) {}
|
||||
|
||||
private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
|
||||
public record ImportStatus(State state, String statusCode, @JsonIgnore String message, int processed, int skipped, List<SkippedFile> skippedFiles, LocalDateTime startedAt) {}
|
||||
|
||||
record ProcessResult(int processed, int skipped, List<SkippedFile> skippedFiles) {}
|
||||
|
||||
private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null);
|
||||
|
||||
public ImportStatus getStatus() {
|
||||
return currentStatus;
|
||||
@@ -117,22 +122,22 @@ public class MassImportService {
|
||||
if (currentStatus.state() == State.RUNNING) {
|
||||
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
||||
}
|
||||
currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, LocalDateTime.now());
|
||||
currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, 0, List.of(), LocalDateTime.now());
|
||||
try {
|
||||
File spreadsheet = findSpreadsheetFile();
|
||||
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
|
||||
int processed = processRows(readSpreadsheet(spreadsheet));
|
||||
ProcessResult result = processRows(readSpreadsheet(spreadsheet));
|
||||
currentStatus = new ImportStatus(State.DONE, "IMPORT_DONE",
|
||||
"Import abgeschlossen. " + processed + " Dokumente verarbeitet.",
|
||||
processed, currentStatus.startedAt());
|
||||
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
|
||||
result.processed(), result.skipped(), result.skippedFiles(), currentStatus.startedAt());
|
||||
} catch (NoSpreadsheetException e) {
|
||||
log.error("Massenimport fehlgeschlagen: keine Tabellendatei", e);
|
||||
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_NO_SPREADSHEET",
|
||||
"Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
|
||||
"Fehler: " + e.getMessage(), 0, 0, List.of(), currentStatus.startedAt());
|
||||
} catch (Exception e) {
|
||||
log.error("Massenimport fehlgeschlagen", e);
|
||||
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_INTERNAL",
|
||||
"Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
|
||||
"Fehler: " + e.getMessage(), 0, 0, List.of(), currentStatus.startedAt());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,8 +259,10 @@ public class MassImportService {
|
||||
|
||||
// --- Import logic (works on neutral List<String> rows) ---
|
||||
|
||||
private int processRows(List<List<String>> rows) {
|
||||
int count = 0;
|
||||
private ProcessResult processRows(List<List<String>> rows) {
|
||||
int processed = 0;
|
||||
List<SkippedFile> skippedFiles = new ArrayList<>();
|
||||
|
||||
for (int i = 1; i < rows.size(); i++) { // skip header row
|
||||
List<String> cells = rows.get(i);
|
||||
String index = getCell(cells, colIndex);
|
||||
@@ -266,18 +273,46 @@ public class MassImportService {
|
||||
if (fileOnDisk.isEmpty()) {
|
||||
log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename);
|
||||
}
|
||||
importSingleDocument(cells, fileOnDisk, filename, index);
|
||||
count++;
|
||||
|
||||
if (fileOnDisk.isPresent()) {
|
||||
try {
|
||||
if (!isPdfMagicBytes(fileOnDisk.get())) {
|
||||
log.warn("Überspringe {}: Datei beginnt nicht mit %PDF-Signatur", filename);
|
||||
skippedFiles.add(new SkippedFile(filename, "Keine gültige PDF-Signatur"));
|
||||
continue;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Fehler beim Prüfen der Magic-Bytes für {}", filename, e);
|
||||
skippedFiles.add(new SkippedFile(filename, "Fehler beim Lesen der Datei"));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
boolean imported = importSingleDocument(cells, fileOnDisk, filename, index);
|
||||
if (imported) {
|
||||
processed++;
|
||||
}
|
||||
}
|
||||
return new ProcessResult(processed, skippedFiles.size(), skippedFiles);
|
||||
}
|
||||
|
||||
private boolean isPdfMagicBytes(File file) throws IOException {
|
||||
try (InputStream is = new FileInputStream(file)) {
|
||||
byte[] header = is.readNBytes(4);
|
||||
return header.length == 4
|
||||
&& header[0] == 0x25 // %
|
||||
&& header[1] == 0x50 // P
|
||||
&& header[2] == 0x44 // D
|
||||
&& header[3] == 0x46; // F
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
protected void importSingleDocument(List<String> cells, Optional<File> file, String originalFilename, String index) {
|
||||
protected boolean importSingleDocument(List<String> cells, Optional<File> file, String originalFilename, String index) {
|
||||
Optional<Document> existing = documentService.findByOriginalFilename(originalFilename);
|
||||
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
|
||||
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
String archiveBox = getCell(cells, colBox);
|
||||
@@ -313,7 +348,7 @@ public class MassImportService {
|
||||
status = DocumentStatus.UPLOADED;
|
||||
} catch (Exception e) {
|
||||
log.error("S3 Upload Fehler für {}", file.get().getName(), e);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -355,6 +390,7 @@ public class MassImportService {
|
||||
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
|
||||
}
|
||||
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
@@ -135,7 +135,7 @@ class MassImportServiceTest {
|
||||
@Test
|
||||
void runImportAsync_throwsConflict_whenAlreadyRunning() {
|
||||
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
|
||||
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, LocalDateTime.now());
|
||||
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, 0, List.of(), LocalDateTime.now());
|
||||
ReflectionTestUtils.setField(service, "currentStatus", running);
|
||||
|
||||
assertThatThrownBy(() -> service.runImportAsync())
|
||||
@@ -325,8 +325,8 @@ class MassImportServiceTest {
|
||||
@Test
|
||||
void processRows_returnsZero_whenOnlyHeaderRow() {
|
||||
List<List<String>> rows = List.of(List.of("header", "col1"));
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
assertThat(result).isEqualTo(0);
|
||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
assertThat(result.processed()).isEqualTo(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -335,8 +335,8 @@ class MassImportServiceTest {
|
||||
List.of("header"),
|
||||
minimalCells("") // blank index
|
||||
);
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
assertThat(result).isEqualTo(0);
|
||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
assertThat(result.processed()).isEqualTo(0);
|
||||
verify(documentService, never()).findByOriginalFilename(any());
|
||||
}
|
||||
|
||||
@@ -349,9 +349,9 @@ class MassImportServiceTest {
|
||||
List.of("header"),
|
||||
minimalCells("doc001") // no dot → appends ".pdf"
|
||||
);
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
|
||||
assertThat(result).isEqualTo(1);
|
||||
assertThat(result.processed()).isEqualTo(1);
|
||||
verify(documentService).findByOriginalFilename("doc001.pdf");
|
||||
}
|
||||
|
||||
@@ -364,9 +364,9 @@ class MassImportServiceTest {
|
||||
List.of("header"),
|
||||
minimalCells("doc002.pdf") // has dot → used as-is
|
||||
);
|
||||
Integer result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
||||
|
||||
assertThat(result).isEqualTo(1);
|
||||
assertThat(result.processed()).isEqualTo(1);
|
||||
verify(documentService).findByOriginalFilename("doc002.pdf");
|
||||
}
|
||||
|
||||
@@ -525,6 +525,69 @@ class MassImportServiceTest {
|
||||
assertThat(result).isEqualTo("hello");
|
||||
}
|
||||
|
||||
// ─── PDF magic byte validation regression ─────────────────────────────────
|
||||
|
||||
@Test
|
||||
void runImportAsync_uploadsValidPdf_andSkipsFakeOne(@TempDir Path tempDir) throws Exception {
|
||||
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
|
||||
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
|
||||
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
|
||||
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
verify(s3Client, times(1)).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_setsSkippedCount_toOne_whenOneFakeFile(@TempDir Path tempDir) throws Exception {
|
||||
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
|
||||
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
|
||||
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
|
||||
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().skipped()).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_includesRejectedFilename_inSkippedFiles(@TempDir Path tempDir) throws Exception {
|
||||
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
|
||||
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
|
||||
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
|
||||
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().skippedFiles())
|
||||
.extracting(MassImportService.SkippedFile::filename)
|
||||
.contains("fake.pdf");
|
||||
}
|
||||
|
||||
@Test
|
||||
void runImportAsync_skipsFile_whenShorterThanFourBytes(@TempDir Path tempDir) throws Exception {
|
||||
Files.write(tempDir.resolve("tiny.pdf"), new byte[]{0x25, 0x50, 0x44}); // only 3 bytes
|
||||
buildMinimalImportXlsx(tempDir, "tiny.pdf");
|
||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||
|
||||
service.runImportAsync();
|
||||
|
||||
assertThat(service.getStatus().skipped()).isEqualTo(1);
|
||||
}
|
||||
|
||||
// ─── readOds — XXE security regression ───────────────────────────────────
|
||||
|
||||
// Security regression — do not remove.
|
||||
@@ -621,4 +684,18 @@ class MassImportServiceTest {
|
||||
}
|
||||
return destination.toFile();
|
||||
}
|
||||
|
||||
private void buildMinimalImportXlsx(Path dir, String... filenames) throws Exception {
|
||||
Path xlsx = dir.resolve("import.xlsx");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
org.apache.poi.ss.usermodel.Sheet sheet = wb.createSheet("Sheet1");
|
||||
sheet.createRow(0).createCell(0).setCellValue("Index");
|
||||
for (int i = 0; i < filenames.length; i++) {
|
||||
sheet.createRow(i + 1).createCell(0).setCellValue(filenames[i]);
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ import org.springframework.test.web.servlet.MockMvc;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
import java.util.List;
|
||||
|
||||
import static org.mockito.ArgumentMatchers.anyList;
|
||||
import static org.mockito.Mockito.verify;
|
||||
@@ -46,7 +47,7 @@ class AdminControllerTest {
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
|
||||
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
|
||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
|
||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null);
|
||||
when(massImportService.getStatus()).thenReturn(status);
|
||||
|
||||
mockMvc.perform(get("/api/admin/import-status"))
|
||||
@@ -60,7 +61,7 @@ class AdminControllerTest {
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void importStatus_messageField_notPresentInApiResponse() throws Exception {
|
||||
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
|
||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, null);
|
||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, 0, List.of(), null);
|
||||
when(massImportService.getStatus()).thenReturn(status);
|
||||
|
||||
mockMvc.perform(get("/api/admin/import-status"))
|
||||
|
||||
Reference in New Issue
Block a user