security(import): validate PDF magic bytes before S3 upload #618

Merged
marcel merged 10 commits from worktree-feat+issue-529-pdf-magic-bytes into main 2026-05-19 09:45:04 +02:00
2 changed files with 30 additions and 16 deletions
Showing only changes of commit 53dba772ae - Show all commits

View File

@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.importing;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.*;
@@ -55,9 +56,20 @@ public class MassImportService {
public enum State { IDLE, RUNNING, DONE, FAILED }
public record SkippedFile(String filename, String reason) {}
public record SkippedFile(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String reason
) {}
public record ImportStatus(State state, String statusCode, @JsonIgnore String message, int processed, List<SkippedFile> skippedFiles, LocalDateTime startedAt) {
public record ImportStatus(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
@JsonIgnore String message,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) List<SkippedFile> skippedFiles,
LocalDateTime startedAt
) {
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("skipped")
public int skipped() { return skippedFiles.size(); }
}
@@ -300,8 +312,12 @@ public class MassImportService {
return new ProcessResult(processed, skippedFiles);
}
InputStream openFileStream(File file) throws IOException {
return new FileInputStream(file);
}
private boolean isPdfMagicBytes(File file) throws IOException {
try (InputStream is = new FileInputStream(file)) {
try (InputStream is = openFileStream(file)) {
byte[] header = is.readNBytes(4);
return header.length == 4
&& header[0] == 0x25 // %

View File

@@ -40,7 +40,6 @@ import java.util.zip.ZipOutputStream;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.*;
@@ -572,20 +571,19 @@ class MassImportServiceTest {
@Test
void runImportAsync_skipsFile_whenMagicBytesCheckThrowsIOException(@TempDir Path tempDir) throws Exception {
Files.writeString(tempDir.resolve("unreadable.pdf"), "some content");
File unreadable = tempDir.resolve("unreadable.pdf").toFile();
buildMinimalImportXlsx(tempDir, "unreadable.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
unreadable.setReadable(false);
assumeTrue(!unreadable.canRead(), "Requires non-root file permissions");
try {
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
assertThat(service.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::reason)
.containsExactly("FILE_READ_ERROR");
} finally {
unreadable.setReadable(true);
}
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
MassImportService spyService = spy(service);
doThrow(new java.io.IOException("simulated read error")).when(spyService).openFileStream(any(File.class));
spyService.runImportAsync();
assertThat(spyService.getStatus().skipped()).isEqualTo(1);
assertThat(spyService.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::reason)
.containsExactly("FILE_READ_ERROR");
}
// ─── readOds — XXE security regression ───────────────────────────────────