feat(importing): add orchestrator, wire admin, retire raw-spreadsheet path
CanonicalImportOrchestrator runs the four loaders in an explicit dependency
DAG (TagTree -> PersonRegister -> PersonTree -> Document), owns the async
runner + ImportStatus state machine the admin UI consumes, smoke-checks all
four artifacts are present before starting (fail-fast IMPORT_FAILED_ARTIFACT
rather than a half-run), and fails closed on a malformed artifact.
AdminController now depends on the orchestrator; the {state, statusCode,
processed, skippedFiles, skipped} response shape is unchanged so
ImportStatusCard.svelte keeps working.
Deletes the legacy MassImportService (positional @Value app.import.col.*,
ISO-only parseDate, Java name classification) and the ODS/XXE
XxeSafeXmlParser path now that the loaders cover them — the security guards
were ported to DocumentImporter first (previous commit). Replaces the
positional column config in application.yaml with the canonical artifact
directory.
Refs #669
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,94 @@
|
|||||||
|
package org.raddatz.familienarchiv.importing;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.raddatz.familienarchiv.exception.DomainException;
|
||||||
|
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.scheduling.annotation.Async;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the four canonical loaders in their real dependency order — encoded explicitly
|
||||||
|
* here, not implied by call order — and owns the async runner plus the {@link ImportStatus}
|
||||||
|
* state machine the admin UI consumes. The orchestrator smoke-checks that all four
|
||||||
|
* artifacts are present before starting, failing fast rather than half-loading tags but no
|
||||||
|
* documents. A malformed artifact (a loader throwing) sets {@code FAILED}; an individual
|
||||||
|
* bad file is surfaced through the {@link ImportStatus.SkippedFile} mechanism instead.
|
||||||
|
*/
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class CanonicalImportOrchestrator {
|
||||||
|
|
||||||
|
private static final String TAG_TREE_ARTIFACT = "canonical-tag-tree.xlsx";
|
||||||
|
private static final String PERSONS_ARTIFACT = "canonical-persons.xlsx";
|
||||||
|
private static final String PERSONS_TREE_ARTIFACT = "canonical-persons-tree.json";
|
||||||
|
private static final String DOCUMENTS_ARTIFACT = "canonical-documents.xlsx";
|
||||||
|
|
||||||
|
private final TagTreeImporter tagTreeImporter;
|
||||||
|
private final PersonRegisterImporter personRegisterImporter;
|
||||||
|
private final PersonTreeImporter personTreeImporter;
|
||||||
|
private final DocumentImporter documentImporter;
|
||||||
|
|
||||||
|
@Value("${app.import.dir:/import}")
|
||||||
|
private String canonicalDir;
|
||||||
|
|
||||||
|
private volatile ImportStatus currentStatus = new ImportStatus(
|
||||||
|
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||||
|
|
||||||
|
public ImportStatus getStatus() {
|
||||||
|
return currentStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Async
|
||||||
|
public void runImportAsync() {
|
||||||
|
if (currentStatus.state() == ImportStatus.State.RUNNING) {
|
||||||
|
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
||||||
|
}
|
||||||
|
runImport();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Synchronous entry point — wrapped by {@link #runImportAsync()} and called directly in tests. */
|
||||||
|
void runImport() {
|
||||||
|
currentStatus = new ImportStatus(ImportStatus.State.RUNNING, "IMPORT_RUNNING",
|
||||||
|
"Import läuft...", 0, List.of(), LocalDateTime.now());
|
||||||
|
try {
|
||||||
|
File tagTree = requireArtifact(TAG_TREE_ARTIFACT);
|
||||||
|
File persons = requireArtifact(PERSONS_ARTIFACT);
|
||||||
|
File personsTree = requireArtifact(PERSONS_TREE_ARTIFACT);
|
||||||
|
File documents = requireArtifact(DOCUMENTS_ARTIFACT);
|
||||||
|
|
||||||
|
// Dependency DAG: documents need persons + tags; the tree needs persons.
|
||||||
|
tagTreeImporter.load(tagTree);
|
||||||
|
personRegisterImporter.load(persons);
|
||||||
|
personTreeImporter.load(personsTree);
|
||||||
|
DocumentImporter.LoadResult result = documentImporter.load(documents);
|
||||||
|
|
||||||
|
currentStatus = new ImportStatus(ImportStatus.State.DONE, "IMPORT_DONE",
|
||||||
|
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
|
||||||
|
result.processed(), result.skippedFiles(), currentStatus.startedAt());
|
||||||
|
} catch (DomainException e) {
|
||||||
|
log.error("Canonical import failed: {}", e.getMessage());
|
||||||
|
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_ARTIFACT",
|
||||||
|
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Canonical import failed", e);
|
||||||
|
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_INTERNAL",
|
||||||
|
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private File requireArtifact(String name) {
|
||||||
|
File artifact = new File(canonicalDir, name);
|
||||||
|
if (!artifact.isFile()) {
|
||||||
|
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||||
|
"Missing canonical artifact: " + name);
|
||||||
|
}
|
||||||
|
return artifact;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,509 +0,0 @@
|
|||||||
package org.raddatz.familienarchiv.importing;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
||||||
import io.swagger.v3.oas.annotations.media.Schema;
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.apache.poi.ss.usermodel.*;
|
|
||||||
import java.util.Objects;
|
|
||||||
import org.raddatz.familienarchiv.exception.DomainException;
|
|
||||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
|
||||||
import org.raddatz.familienarchiv.document.Document;
|
|
||||||
import org.raddatz.familienarchiv.document.DocumentService;
|
|
||||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
|
||||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
|
||||||
import org.raddatz.familienarchiv.person.Person;
|
|
||||||
import org.raddatz.familienarchiv.tag.Tag;
|
|
||||||
import org.raddatz.familienarchiv.person.Person;
|
|
||||||
import org.raddatz.familienarchiv.person.PersonNameParser;
|
|
||||||
import org.raddatz.familienarchiv.person.PersonService;
|
|
||||||
import org.raddatz.familienarchiv.tag.TagService;
|
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
|
||||||
import org.springframework.scheduling.annotation.Async;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
|
||||||
import org.w3c.dom.Element;
|
|
||||||
import org.w3c.dom.NodeList;
|
|
||||||
import software.amazon.awssdk.core.sync.RequestBody;
|
|
||||||
import software.amazon.awssdk.services.s3.S3Client;
|
|
||||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.time.LocalDate;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.time.format.DateTimeParseException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import java.util.zip.ZipFile;
|
|
||||||
|
|
||||||
@Service
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
@Slf4j
|
|
||||||
public class MassImportService {
|
|
||||||
|
|
||||||
public enum State { IDLE, RUNNING, DONE, FAILED }
|
|
||||||
|
|
||||||
public enum SkipReason {
|
|
||||||
INVALID_FILENAME_PATH_TRAVERSAL,
|
|
||||||
INVALID_PDF_SIGNATURE,
|
|
||||||
FILE_READ_ERROR,
|
|
||||||
ALREADY_EXISTS,
|
|
||||||
S3_UPLOAD_FAILED
|
|
||||||
}
|
|
||||||
|
|
||||||
public record SkippedFile(
|
|
||||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
|
|
||||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
|
|
||||||
) {}
|
|
||||||
|
|
||||||
public record ImportStatus(
|
|
||||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
|
|
||||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
|
|
||||||
@JsonIgnore String message,
|
|
||||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
|
|
||||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) List<SkippedFile> skippedFiles,
|
|
||||||
LocalDateTime startedAt
|
|
||||||
) {
|
|
||||||
// Note: @Schema on a record accessor method is not picked up by SpringDoc; the
|
|
||||||
// "skipped" count is a computed convenience field derived from skippedFiles.size().
|
|
||||||
@JsonProperty("skipped")
|
|
||||||
public int skipped() { return skippedFiles.size(); }
|
|
||||||
|
|
||||||
/** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
|
|
||||||
public ImportStatus {
|
|
||||||
skippedFiles = List.copyOf(skippedFiles);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
record ProcessResult(int processed, List<SkippedFile> skippedFiles) {}
|
|
||||||
|
|
||||||
private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
|
||||||
|
|
||||||
public ImportStatus getStatus() {
|
|
||||||
return currentStatus;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final DocumentService documentService;
|
|
||||||
private final PersonService personService;
|
|
||||||
private final TagService tagService;
|
|
||||||
private final S3Client s3Client;
|
|
||||||
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
|
|
||||||
|
|
||||||
@Value("${app.s3.bucket}")
|
|
||||||
private String bucketName;
|
|
||||||
|
|
||||||
@Value("${app.import.col.index:0}")
|
|
||||||
private int colIndex;
|
|
||||||
|
|
||||||
@Value("${app.import.col.box:1}")
|
|
||||||
private int colBox;
|
|
||||||
|
|
||||||
@Value("${app.import.col.folder:2}")
|
|
||||||
private int colFolder;
|
|
||||||
|
|
||||||
@Value("${app.import.col.sender:3}")
|
|
||||||
private int colSender;
|
|
||||||
|
|
||||||
@Value("${app.import.col.receivers:5}")
|
|
||||||
private int colReceivers;
|
|
||||||
|
|
||||||
@Value("${app.import.col.date:7}")
|
|
||||||
private int colDate;
|
|
||||||
|
|
||||||
@Value("${app.import.col.location:9}")
|
|
||||||
private int colLocation;
|
|
||||||
|
|
||||||
@Value("${app.import.col.tags:10}")
|
|
||||||
private int colTags;
|
|
||||||
|
|
||||||
@Value("${app.import.col.summary:11}")
|
|
||||||
private int colSummary;
|
|
||||||
|
|
||||||
@Value("${app.import.col.transcription:13}")
|
|
||||||
private int colTranscription;
|
|
||||||
|
|
||||||
@Value("${app.import.dir:/import}")
|
|
||||||
private String importDir;
|
|
||||||
|
|
||||||
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
|
||||||
|
|
||||||
// ODS XML namespaces
|
|
||||||
private static final String NS_TABLE = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
|
|
||||||
private static final String NS_TEXT = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
|
|
||||||
|
|
||||||
// We only need up to this many columns; caps repeated-empty-cell expansion
|
|
||||||
private static final int MAX_COLS = 20;
|
|
||||||
|
|
||||||
@Async
|
|
||||||
public void runImportAsync() {
|
|
||||||
if (currentStatus.state() == State.RUNNING) {
|
|
||||||
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
|
|
||||||
}
|
|
||||||
currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, List.of(), LocalDateTime.now());
|
|
||||||
try {
|
|
||||||
File spreadsheet = findSpreadsheetFile();
|
|
||||||
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
|
|
||||||
ProcessResult result = processRows(readSpreadsheet(spreadsheet));
|
|
||||||
currentStatus = new ImportStatus(State.DONE, "IMPORT_DONE",
|
|
||||||
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
|
|
||||||
result.processed(), result.skippedFiles(), currentStatus.startedAt());
|
|
||||||
} catch (NoSpreadsheetException e) {
|
|
||||||
log.error("Massenimport fehlgeschlagen: keine Tabellendatei", e);
|
|
||||||
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_NO_SPREADSHEET",
|
|
||||||
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Massenimport fehlgeschlagen", e);
|
|
||||||
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_INTERNAL",
|
|
||||||
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class NoSpreadsheetException extends RuntimeException {
|
|
||||||
NoSpreadsheetException(String message) { super(message); }
|
|
||||||
}
|
|
||||||
|
|
||||||
private File findSpreadsheetFile() throws IOException {
|
|
||||||
try (Stream<Path> files = Files.list(Paths.get(importDir))) {
|
|
||||||
return files
|
|
||||||
.filter(p -> {
|
|
||||||
String name = p.toString().toLowerCase();
|
|
||||||
return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
|
|
||||||
})
|
|
||||||
.findFirst()
|
|
||||||
.orElseThrow(() -> new NoSpreadsheetException(
|
|
||||||
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
|
|
||||||
.toFile();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Spreadsheet reading (format-specific, produces neutral List<List<String>>) ---
|
|
||||||
|
|
||||||
private List<List<String>> readSpreadsheet(File file) throws Exception {
|
|
||||||
String name = file.getName().toLowerCase();
|
|
||||||
if (name.endsWith(".ods")) {
|
|
||||||
return readOds(file);
|
|
||||||
}
|
|
||||||
return readXlsx(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads an ODS file by parsing its content.xml directly (no extra library needed).
|
|
||||||
* ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
|
|
||||||
*/
|
|
||||||
List<List<String>> readOds(File file) throws Exception {
|
|
||||||
List<List<String>> result = new ArrayList<>();
|
|
||||||
|
|
||||||
try (ZipFile zip = new ZipFile(file)) {
|
|
||||||
var entry = zip.getEntry("content.xml");
|
|
||||||
if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
|
|
||||||
|
|
||||||
var factory = XxeSafeXmlParser.hardenedFactory();
|
|
||||||
factory.setNamespaceAware(true);
|
|
||||||
var builder = factory.newDocumentBuilder();
|
|
||||||
var doc = builder.parse(zip.getInputStream(entry));
|
|
||||||
|
|
||||||
NodeList tables = doc.getElementsByTagNameNS(NS_TABLE, "table");
|
|
||||||
if (tables.getLength() == 0) return result;
|
|
||||||
|
|
||||||
var table = (Element) tables.item(0);
|
|
||||||
NodeList rows = table.getElementsByTagNameNS(NS_TABLE, "table-row");
|
|
||||||
|
|
||||||
for (int i = 0; i < rows.getLength(); i++) {
|
|
||||||
var row = (Element) rows.item(i);
|
|
||||||
List<String> rowData = new ArrayList<>();
|
|
||||||
NodeList cells = row.getElementsByTagNameNS(NS_TABLE, "table-cell");
|
|
||||||
|
|
||||||
for (int j = 0; j < cells.getLength() && rowData.size() < MAX_COLS; j++) {
|
|
||||||
var cell = (Element) cells.item(j);
|
|
||||||
|
|
||||||
// Read the display text (first <text:p>)
|
|
||||||
String value = "";
|
|
||||||
NodeList textNodes = cell.getElementsByTagNameNS(NS_TEXT, "p");
|
|
||||||
if (textNodes.getLength() > 0) {
|
|
||||||
value = textNodes.item(0).getTextContent().trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expand number-columns-repeated (capped at MAX_COLS)
|
|
||||||
String repeatAttr = cell.getAttributeNS(NS_TABLE, "number-columns-repeated");
|
|
||||||
int repeat = repeatAttr.isEmpty() ? 1 : Integer.parseInt(repeatAttr);
|
|
||||||
repeat = Math.min(repeat, MAX_COLS - rowData.size());
|
|
||||||
|
|
||||||
for (int r = 0; r < repeat; r++) {
|
|
||||||
rowData.add(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result.add(rowData);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Reads an XLSX/XLS file using Apache POI. Converts all cells to strings. */
|
|
||||||
private List<List<String>> readXlsx(File file) throws Exception {
|
|
||||||
List<List<String>> result = new ArrayList<>();
|
|
||||||
try (FileInputStream fis = new FileInputStream(file);
|
|
||||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
|
||||||
|
|
||||||
Sheet sheet = workbook.getSheetAt(0);
|
|
||||||
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
|
|
||||||
Row row = sheet.getRow(i);
|
|
||||||
List<String> rowData = new ArrayList<>();
|
|
||||||
if (row != null) {
|
|
||||||
for (int j = 0; j < MAX_COLS; j++) {
|
|
||||||
rowData.add(xlsxCellToString(row.getCell(j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result.add(rowData);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String xlsxCellToString(Cell cell) {
|
|
||||||
if (cell == null) return "";
|
|
||||||
return switch (cell.getCellType()) {
|
|
||||||
case STRING -> cell.getStringCellValue();
|
|
||||||
case NUMERIC -> {
|
|
||||||
if (DateUtil.isCellDateFormatted(cell)) {
|
|
||||||
yield cell.getLocalDateTimeCellValue().toLocalDate().toString(); // ISO
|
|
||||||
}
|
|
||||||
yield String.valueOf((int) cell.getNumericCellValue());
|
|
||||||
}
|
|
||||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
|
||||||
default -> "";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Import logic (works on neutral List<String> rows) ---
|
|
||||||
|
|
||||||
private ProcessResult processRows(List<List<String>> rows) {
|
|
||||||
int processed = 0;
|
|
||||||
List<SkippedFile> skippedFiles = new ArrayList<>();
|
|
||||||
|
|
||||||
for (int i = 1; i < rows.size(); i++) { // skip header row
|
|
||||||
List<String> cells = rows.get(i);
|
|
||||||
String index = getCell(cells, colIndex);
|
|
||||||
if (index.isBlank()) continue;
|
|
||||||
|
|
||||||
String filename = index.contains(".") ? index : index + ".pdf";
|
|
||||||
if (!isValidImportFilename(filename)) {
|
|
||||||
log.warn("Skipping import row {}: filename rejected — {}", i, filename);
|
|
||||||
skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_FILENAME_PATH_TRAVERSAL));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Optional<File> fileOnDisk = findFileRecursive(filename);
|
|
||||||
if (fileOnDisk.isEmpty()) {
|
|
||||||
log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fileOnDisk.isPresent()) {
|
|
||||||
try {
|
|
||||||
if (!isPdfMagicBytes(fileOnDisk.get())) {
|
|
||||||
log.warn("Überspringe {}: Datei beginnt nicht mit %PDF-Signatur", filename);
|
|
||||||
skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_PDF_SIGNATURE));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.error("Fehler beim Prüfen der Magic-Bytes für {}", filename, e);
|
|
||||||
skippedFiles.add(new SkippedFile(filename, SkipReason.FILE_READ_ERROR));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional<SkipReason> skipReason = importSingleDocument(cells, fileOnDisk, filename, index);
|
|
||||||
if (skipReason.isPresent()) {
|
|
||||||
skippedFiles.add(new SkippedFile(filename, skipReason.get()));
|
|
||||||
} else {
|
|
||||||
processed++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new ProcessResult(processed, skippedFiles);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isValidImportFilename(String filename) {
|
|
||||||
if (filename == null || filename.isBlank()) return false;
|
|
||||||
if (filename.contains("/")) return false;
|
|
||||||
if (filename.contains("\\")) return false;
|
|
||||||
if (filename.contains("∕")) return false; // U+2215 DIVISION SLASH
|
|
||||||
if (filename.contains("/")) return false; // U+FF0F FULLWIDTH SOLIDUS
|
|
||||||
if (filename.contains("⧵")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
|
|
||||||
if (filename.contains("..")) return false;
|
|
||||||
if (filename.equals(".")) return false;
|
|
||||||
if (filename.contains("\0")) return false;
|
|
||||||
// Paths.get() is safe here on Linux for all inputs that passed the checks above;
|
|
||||||
// it may throw InvalidPathException for OS-specific illegal chars on Windows,
|
|
||||||
// but those are not reachable in production.
|
|
||||||
if (Paths.get(filename).isAbsolute()) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// package-private: Mockito spy in tests can override to inject IOException
|
|
||||||
InputStream openFileStream(File file) throws IOException {
|
|
||||||
return new FileInputStream(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isPdfMagicBytes(File file) throws IOException {
|
|
||||||
try (InputStream is = openFileStream(file)) {
|
|
||||||
byte[] header = is.readNBytes(4);
|
|
||||||
return header.length == 4
|
|
||||||
&& header[0] == 0x25 // %
|
|
||||||
&& header[1] == 0x50 // P
|
|
||||||
&& header[2] == 0x44 // D
|
|
||||||
&& header[3] == 0x46; // F
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Imports a single document row.
|
|
||||||
*
|
|
||||||
* @return empty Optional on success; an Optional containing the skip reason on failure/skip.
|
|
||||||
*/
|
|
||||||
@Transactional
|
|
||||||
protected Optional<SkipReason> importSingleDocument(List<String> cells, Optional<File> file, String originalFilename, String index) {
|
|
||||||
Optional<Document> existing = documentService.findByOriginalFilename(originalFilename);
|
|
||||||
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
|
|
||||||
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
|
|
||||||
return Optional.of(SkipReason.ALREADY_EXISTS);
|
|
||||||
}
|
|
||||||
|
|
||||||
String archiveBox = getCell(cells, colBox);
|
|
||||||
String archiveFolder = getCell(cells, colFolder);
|
|
||||||
String senderRaw = getCell(cells, colSender);
|
|
||||||
String receiversRaw = getCell(cells, colReceivers);
|
|
||||||
LocalDate date = parseDate(getCell(cells, colDate));
|
|
||||||
String location = getCell(cells, colLocation);
|
|
||||||
String tagRaw = getCell(cells, colTags);
|
|
||||||
String summary = getCell(cells, colSummary);
|
|
||||||
String transcription = getCell(cells, colTranscription);
|
|
||||||
|
|
||||||
String s3Key = null;
|
|
||||||
String contentType = null;
|
|
||||||
DocumentStatus status = DocumentStatus.PLACEHOLDER;
|
|
||||||
|
|
||||||
if (file.isPresent()) {
|
|
||||||
try {
|
|
||||||
contentType = Files.probeContentType(file.get().toPath());
|
|
||||||
} catch (IOException e) {
|
|
||||||
contentType = null;
|
|
||||||
}
|
|
||||||
if (contentType == null) contentType = "application/octet-stream";
|
|
||||||
|
|
||||||
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
|
|
||||||
try {
|
|
||||||
s3Client.putObject(PutObjectRequest.builder()
|
|
||||||
.bucket(bucketName)
|
|
||||||
.key(s3Key)
|
|
||||||
.contentType(contentType)
|
|
||||||
.build(),
|
|
||||||
RequestBody.fromFile(file.get()));
|
|
||||||
status = DocumentStatus.UPLOADED;
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("S3 Upload Fehler für {}", file.get().getName(), e);
|
|
||||||
return Optional.of(SkipReason.S3_UPLOAD_FAILED);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Person sender = senderRaw.isBlank() ? null : findOrCreatePerson(senderRaw);
|
|
||||||
List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
|
|
||||||
.map(this::findOrCreatePerson)
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
Tag tag = null;
|
|
||||||
if (!tagRaw.isBlank()) {
|
|
||||||
tag = tagService.findOrCreate(tagRaw);
|
|
||||||
}
|
|
||||||
|
|
||||||
Document doc = existing.orElse(Document.builder()
|
|
||||||
.originalFilename(originalFilename)
|
|
||||||
.build());
|
|
||||||
|
|
||||||
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
|
|
||||||
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
|
|
||||||
|
|
||||||
doc.setTitle(buildTitle(index, date, location));
|
|
||||||
doc.setFilePath(s3Key);
|
|
||||||
doc.setContentType(contentType);
|
|
||||||
doc.setStatus(status);
|
|
||||||
doc.setArchiveBox(archiveBox.isBlank() ? null : archiveBox);
|
|
||||||
doc.setArchiveFolder(archiveFolder.isBlank() ? null : archiveFolder);
|
|
||||||
doc.setDocumentDate(date);
|
|
||||||
doc.setLocation(location.isBlank() ? null : location);
|
|
||||||
doc.setSummary(summary.isBlank() ? null : summary);
|
|
||||||
doc.setTranscription(transcription.isBlank() ? null : transcription);
|
|
||||||
doc.setSender(sender);
|
|
||||||
doc.getReceivers().addAll(receivers);
|
|
||||||
if (tag != null) doc.getTags().add(tag);
|
|
||||||
doc.setMetadataComplete(metadataComplete);
|
|
||||||
|
|
||||||
Document saved = documentService.save(doc);
|
|
||||||
if (file.isPresent()) {
|
|
||||||
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
|
|
||||||
}
|
|
||||||
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Helpers ---
|
|
||||||
|
|
||||||
private String getCell(List<String> cells, int col) {
|
|
||||||
if (col >= cells.size()) return "";
|
|
||||||
String val = cells.get(col);
|
|
||||||
return val == null ? "" : val.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
private LocalDate parseDate(String value) {
|
|
||||||
if (value == null || value.isBlank()) return null;
|
|
||||||
try {
|
|
||||||
return LocalDate.parse(value.trim());
|
|
||||||
} catch (DateTimeParseException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String buildTitle(String index, LocalDate date, String location) {
|
|
||||||
StringBuilder sb = new StringBuilder(index);
|
|
||||||
if (date != null) {
|
|
||||||
sb.append(" \u2013 ").append(date.format(GERMAN_DATE));
|
|
||||||
}
|
|
||||||
if (location != null && !location.isBlank()) {
|
|
||||||
sb.append(" \u2013 ").append(location);
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Person findOrCreatePerson(String rawName) {
|
|
||||||
return personService.findOrCreateByAlias(rawName);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Optional<File> findFileRecursive(String filename) {
|
|
||||||
File baseDir = new File(importDir);
|
|
||||||
try (Stream<Path> walk = Files.walk(baseDir.toPath())) {
|
|
||||||
Optional<Path> match = walk.filter(p -> !Files.isDirectory(p))
|
|
||||||
.filter(p -> p.getFileName().toString().equals(filename))
|
|
||||||
.findFirst();
|
|
||||||
if (match.isEmpty()) return Optional.empty();
|
|
||||||
File candidate = match.get().toFile();
|
|
||||||
String baseDirCanonical = baseDir.getCanonicalPath();
|
|
||||||
if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
|
|
||||||
throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
|
|
||||||
}
|
|
||||||
return Optional.of(candidate);
|
|
||||||
} catch (IOException e) {
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
package org.raddatz.familienarchiv.importing;
|
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
|
|
||||||
class XxeSafeXmlParser {
|
|
||||||
|
|
||||||
private XxeSafeXmlParser() {}
|
|
||||||
|
|
||||||
static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException {
|
|
||||||
var factory = DocumentBuilderFactory.newInstance();
|
|
||||||
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
|
||||||
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
|
||||||
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
|
|
||||||
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
|
||||||
factory.setXIncludeAware(false);
|
|
||||||
factory.setExpandEntityReferences(false);
|
|
||||||
return factory;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -5,7 +5,8 @@ import org.raddatz.familienarchiv.security.Permission;
|
|||||||
import org.raddatz.familienarchiv.security.RequirePermission;
|
import org.raddatz.familienarchiv.security.RequirePermission;
|
||||||
import org.raddatz.familienarchiv.document.DocumentService;
|
import org.raddatz.familienarchiv.document.DocumentService;
|
||||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||||
import org.raddatz.familienarchiv.importing.MassImportService;
|
import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
|
||||||
|
import org.raddatz.familienarchiv.importing.ImportStatus;
|
||||||
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.GetMapping;
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
@@ -21,20 +22,20 @@ import lombok.RequiredArgsConstructor;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class AdminController {
|
public class AdminController {
|
||||||
|
|
||||||
private final MassImportService massImportService;
|
private final CanonicalImportOrchestrator importOrchestrator;
|
||||||
private final DocumentService documentService;
|
private final DocumentService documentService;
|
||||||
private final DocumentVersionService documentVersionService;
|
private final DocumentVersionService documentVersionService;
|
||||||
private final ThumbnailBackfillService thumbnailBackfillService;
|
private final ThumbnailBackfillService thumbnailBackfillService;
|
||||||
|
|
||||||
@PostMapping("/trigger-import")
|
@PostMapping("/trigger-import")
|
||||||
public ResponseEntity<MassImportService.ImportStatus> triggerMassImport() {
|
public ResponseEntity<ImportStatus> triggerMassImport() {
|
||||||
massImportService.runImportAsync();
|
importOrchestrator.runImportAsync();
|
||||||
return ResponseEntity.accepted().body(massImportService.getStatus());
|
return ResponseEntity.accepted().body(importOrchestrator.getStatus());
|
||||||
}
|
}
|
||||||
|
|
||||||
@GetMapping("/import-status")
|
@GetMapping("/import-status")
|
||||||
public ResponseEntity<MassImportService.ImportStatus> importStatus() {
|
public ResponseEntity<ImportStatus> importStatus() {
|
||||||
return ResponseEntity.ok(massImportService.getStatus());
|
return ResponseEntity.ok(importOrchestrator.getStatus());
|
||||||
}
|
}
|
||||||
|
|
||||||
@PostMapping("/backfill-versions")
|
@PostMapping("/backfill-versions")
|
||||||
|
|||||||
@@ -125,17 +125,10 @@ app:
|
|||||||
password: ${APP_ADMIN_PASSWORD:admin123}
|
password: ${APP_ADMIN_PASSWORD:admin123}
|
||||||
|
|
||||||
import:
|
import:
|
||||||
col:
|
# Directory holding the normalizer's committed canonical artifacts
|
||||||
index: 0
|
# (canonical-{documents,persons,tag-tree}.xlsx + canonical-persons-tree.json).
|
||||||
box: 1
|
# The loader maps columns by header name — no positional indices (see ADR-025).
|
||||||
folder: 2
|
dir: ${IMPORT_DIR:/import}
|
||||||
sender: 3
|
|
||||||
receivers: 5
|
|
||||||
date: 7
|
|
||||||
location: 9
|
|
||||||
tags: 10
|
|
||||||
summary: 11
|
|
||||||
transcription: 13
|
|
||||||
|
|
||||||
ocr:
|
ocr:
|
||||||
sender-model:
|
sender-model:
|
||||||
|
|||||||
@@ -0,0 +1,130 @@
|
|||||||
|
package org.raddatz.familienarchiv.importing;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import org.mockito.InOrder;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.raddatz.familienarchiv.exception.DomainException;
|
||||||
|
import org.springframework.test.util.ReflectionTestUtils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.Mockito.inOrder;
|
||||||
|
import static org.mockito.Mockito.never;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class CanonicalImportOrchestratorTest {
|
||||||
|
|
||||||
|
@Mock TagTreeImporter tagTreeImporter;
|
||||||
|
@Mock PersonRegisterImporter personRegisterImporter;
|
||||||
|
@Mock PersonTreeImporter personTreeImporter;
|
||||||
|
@Mock DocumentImporter documentImporter;
|
||||||
|
|
||||||
|
private CanonicalImportOrchestrator orchestrator(Path dir) {
|
||||||
|
CanonicalImportOrchestrator o = new CanonicalImportOrchestrator(
|
||||||
|
tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
|
||||||
|
ReflectionTestUtils.setField(o, "canonicalDir", dir.toString());
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeAllArtifacts(Path dir) throws Exception {
|
||||||
|
Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
|
||||||
|
Files.writeString(dir.resolve("canonical-persons.xlsx"), "x");
|
||||||
|
Files.writeString(dir.resolve("canonical-persons-tree.json"), "x");
|
||||||
|
Files.writeString(dir.resolve("canonical-documents.xlsx"), "x");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getStatus_isIdleByDefault(@TempDir Path dir) {
|
||||||
|
assertThat(orchestrator(dir).getStatus().state()).isEqualTo(ImportStatus.State.IDLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImport_loadsTagsAndPersonsBeforeDocuments(@TempDir Path dir) throws Exception {
|
||||||
|
writeAllArtifacts(dir);
|
||||||
|
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(0, List.of()));
|
||||||
|
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||||
|
|
||||||
|
o.runImport();
|
||||||
|
|
||||||
|
InOrder order = inOrder(tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
|
||||||
|
order.verify(tagTreeImporter).load(any());
|
||||||
|
order.verify(personRegisterImporter).load(any());
|
||||||
|
order.verify(personTreeImporter).load(any());
|
||||||
|
order.verify(documentImporter).load(any());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImport_setsStatusDone_onSuccess(@TempDir Path dir) throws Exception {
|
||||||
|
writeAllArtifacts(dir);
|
||||||
|
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(3, List.of()));
|
||||||
|
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||||
|
|
||||||
|
o.runImport();
|
||||||
|
|
||||||
|
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
|
||||||
|
assertThat(o.getStatus().processed()).isEqualTo(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImport_failsClosed_whenAnArtifactIsMissing(@TempDir Path dir) throws Exception {
|
||||||
|
Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
|
||||||
|
// the other three artifacts are absent
|
||||||
|
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||||
|
|
||||||
|
o.runImport();
|
||||||
|
|
||||||
|
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
|
||||||
|
verify(tagTreeImporter, never()).load(any());
|
||||||
|
verify(documentImporter, never()).load(any());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImport_setsStatusFailed_whenLoaderThrows(@TempDir Path dir) throws Exception {
|
||||||
|
writeAllArtifacts(dir);
|
||||||
|
when(tagTreeImporter.load(any())).thenThrow(DomainException.badRequest(
|
||||||
|
org.raddatz.familienarchiv.exception.ErrorCode.IMPORT_ARTIFACT_INVALID, "bad"));
|
||||||
|
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||||
|
|
||||||
|
o.runImport();
|
||||||
|
|
||||||
|
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
|
||||||
|
verify(documentImporter, never()).load(any());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImportAsync_throwsConflict_whenAlreadyRunning(@TempDir Path dir) {
|
||||||
|
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||||
|
ReflectionTestUtils.setField(o, "currentStatus", new ImportStatus(
|
||||||
|
ImportStatus.State.RUNNING, "IMPORT_RUNNING", "running", 0, List.of(), null));
|
||||||
|
|
||||||
|
assertThatThrownBy(o::runImportAsync)
|
||||||
|
.isInstanceOf(DomainException.class)
|
||||||
|
.hasMessageContaining("already in progress");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImport_aggregatesDocumentSkips(@TempDir Path dir) throws Exception {
|
||||||
|
writeAllArtifacts(dir);
|
||||||
|
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(1,
|
||||||
|
List.of(new ImportStatus.SkippedFile("fake.pdf", ImportStatus.SkipReason.INVALID_PDF_SIGNATURE))));
|
||||||
|
CanonicalImportOrchestrator o = orchestrator(dir);
|
||||||
|
|
||||||
|
o.runImport();
|
||||||
|
|
||||||
|
assertThat(o.getStatus().skipped()).isEqualTo(1);
|
||||||
|
assertThat(o.getStatus().skippedFiles())
|
||||||
|
.extracting(ImportStatus.SkippedFile::filename)
|
||||||
|
.containsExactly("fake.pdf");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,896 +0,0 @@
|
|||||||
package org.raddatz.familienarchiv.importing;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
|
||||||
import org.junit.jupiter.api.io.TempDir;
|
|
||||||
import org.mockito.Mock;
|
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
|
||||||
import org.raddatz.familienarchiv.exception.DomainException;
|
|
||||||
import org.raddatz.familienarchiv.document.Document;
|
|
||||||
import org.raddatz.familienarchiv.document.DocumentService;
|
|
||||||
import org.raddatz.familienarchiv.document.DocumentStatus;
|
|
||||||
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
|
|
||||||
import org.raddatz.familienarchiv.person.Person;
|
|
||||||
import org.raddatz.familienarchiv.tag.Tag;
|
|
||||||
import org.raddatz.familienarchiv.tag.TagService;
|
|
||||||
import org.raddatz.familienarchiv.person.PersonService;
|
|
||||||
import org.springframework.test.util.ReflectionTestUtils;
|
|
||||||
import software.amazon.awssdk.core.sync.RequestBody;
|
|
||||||
import software.amazon.awssdk.services.s3.S3Client;
|
|
||||||
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
|
|
||||||
|
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
||||||
import org.xml.sax.SAXParseException;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.time.LocalDate;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipOutputStream;
|
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
|
||||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
|
||||||
import static org.mockito.Mockito.*;
|
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
|
||||||
class MassImportServiceTest {
|
|
||||||
|
|
||||||
@Mock DocumentService documentService;
|
|
||||||
@Mock PersonService personService;
|
|
||||||
@Mock TagService tagService;
|
|
||||||
@Mock S3Client s3Client;
|
|
||||||
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
|
|
||||||
|
|
||||||
MassImportService service;
|
|
||||||
|
|
||||||
@BeforeEach
|
|
||||||
void setUp() {
|
|
||||||
service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
|
|
||||||
ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", "/import");
|
|
||||||
ReflectionTestUtils.setField(service, "colIndex", 0);
|
|
||||||
ReflectionTestUtils.setField(service, "colBox", 1);
|
|
||||||
ReflectionTestUtils.setField(service, "colFolder", 2);
|
|
||||||
ReflectionTestUtils.setField(service, "colSender", 3);
|
|
||||||
ReflectionTestUtils.setField(service, "colReceivers", 5);
|
|
||||||
ReflectionTestUtils.setField(service, "colDate", 7);
|
|
||||||
ReflectionTestUtils.setField(service, "colLocation", 9);
|
|
||||||
ReflectionTestUtils.setField(service, "colTags", 10);
|
|
||||||
ReflectionTestUtils.setField(service, "colSummary", 11);
|
|
||||||
ReflectionTestUtils.setField(service, "colTranscription", 13);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── getStatus ────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getStatus_returnsIdleByDefault() {
|
|
||||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.IDLE);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getStatus_hasStatusCode_IMPORT_IDLE_byDefault() {
|
|
||||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_IDLE");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── runImportAsync ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_setsFailedStatus_whenImportDirectoryDoesNotExist() {
|
|
||||||
// /import directory doesn't exist in test environment → IOException → IMPORT_FAILED_INTERNAL
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
|
||||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_INTERNAL");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) {
|
|
||||||
// Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the
|
|
||||||
// configured path in the message. Proves the field, not a constant,
|
|
||||||
// drives the lookup.
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
|
||||||
assertThat(service.getStatus().message()).contains(tempDir.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_setsStatusCode_IMPORT_FAILED_NO_SPREADSHEET_whenDirIsEmpty(@TempDir Path tempDir) {
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_NO_SPREADSHEET");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_setsStatusCode_IMPORT_DONE_whenSpreadsheetHasNoDataRows(@TempDir Path tempDir) throws Exception {
|
|
||||||
Path xlsx = tempDir.resolve("import.xlsx");
|
|
||||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
|
||||||
wb.createSheet("Sheet1");
|
|
||||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
|
||||||
wb.write(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_DONE");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_throwsConflict_whenAlreadyRunning() {
|
|
||||||
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
|
|
||||||
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, List.of(), LocalDateTime.now());
|
|
||||||
ReflectionTestUtils.setField(service, "currentStatus", running);
|
|
||||||
|
|
||||||
assertThatThrownBy(() -> service.runImportAsync())
|
|
||||||
.isInstanceOf(DomainException.class)
|
|
||||||
.hasMessageContaining("already in progress");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — skip already uploaded ─────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_skips_whenDocumentAlreadyUploadedNotPlaceholder() {
|
|
||||||
Document existing = Document.builder()
|
|
||||||
.id(UUID.randomUUID())
|
|
||||||
.originalFilename("doc001.pdf")
|
|
||||||
.status(DocumentStatus.UPLOADED)
|
|
||||||
.build();
|
|
||||||
when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.of(existing));
|
|
||||||
|
|
||||||
Optional<MassImportService.SkipReason> result = service.importSingleDocument(minimalCells("doc001.pdf"), Optional.empty(), "doc001.pdf", "doc001");
|
|
||||||
|
|
||||||
verify(documentService, never()).save(any());
|
|
||||||
assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — already-exists guard fires before file I/O ─────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_skipsWithAlreadyExists_whenDocumentUploadedAndFileIsPresent(@TempDir Path tempDir) throws Exception {
|
|
||||||
// Document already exists with status UPLOADED (not PLACEHOLDER).
|
|
||||||
// A physical PDF file is also present on disk (valid magic bytes).
|
|
||||||
// Expected: ALREADY_EXISTS is returned and no S3 upload is attempted —
|
|
||||||
// the guard fires before any file I/O, so no partial processing occurs.
|
|
||||||
Document existing = Document.builder()
|
|
||||||
.id(UUID.randomUUID())
|
|
||||||
.originalFilename("present.pdf")
|
|
||||||
.status(DocumentStatus.UPLOADED)
|
|
||||||
.build();
|
|
||||||
when(documentService.findByOriginalFilename("present.pdf")).thenReturn(Optional.of(existing));
|
|
||||||
|
|
||||||
Path physicalFile = tempDir.resolve("present.pdf");
|
|
||||||
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
|
|
||||||
Files.write(physicalFile, pdfHeader);
|
|
||||||
|
|
||||||
Optional<MassImportService.SkipReason> result = service.importSingleDocument(
|
|
||||||
minimalCells("present.pdf"), Optional.of(physicalFile.toFile()), "present.pdf", "present");
|
|
||||||
|
|
||||||
assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS);
|
|
||||||
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
|
||||||
verify(documentService, never()).save(any());
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — S3 failure surfaced in skippedFiles ──────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_addsS3UploadFailed_toSkippedFiles_whenS3Throws(@TempDir Path tempDir) throws Exception {
|
|
||||||
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
|
|
||||||
Files.write(tempDir.resolve("upload_fail.pdf"), pdfHeader);
|
|
||||||
buildMinimalImportXlsx(tempDir, "upload_fail.pdf");
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
when(documentService.findByOriginalFilename("upload_fail.pdf")).thenReturn(Optional.empty());
|
|
||||||
doThrow(new RuntimeException("S3 unavailable"))
|
|
||||||
.when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().skipped()).isEqualTo(1);
|
|
||||||
assertThat(service.getStatus().skippedFiles())
|
|
||||||
.extracting(MassImportService.SkippedFile::filename, MassImportService.SkippedFile::reason)
|
|
||||||
.containsExactly(org.assertj.core.groups.Tuple.tuple("upload_fail.pdf", MassImportService.SkipReason.S3_UPLOAD_FAILED));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_addsAlreadyExists_toSkippedFiles_whenDocumentAlreadyUploaded(@TempDir Path tempDir) throws Exception {
|
|
||||||
buildMinimalImportXlsx(tempDir, "existing.pdf");
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
Document existing = Document.builder()
|
|
||||||
.id(UUID.randomUUID())
|
|
||||||
.originalFilename("existing.pdf")
|
|
||||||
.status(DocumentStatus.UPLOADED)
|
|
||||||
.build();
|
|
||||||
when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(existing));
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().skipped()).isEqualTo(1);
|
|
||||||
assertThat(service.getStatus().skippedFiles())
|
|
||||||
.extracting(MassImportService.SkippedFile::reason)
|
|
||||||
.containsExactly(MassImportService.SkipReason.ALREADY_EXISTS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — create new document (metadata only) ───────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_createsNewDocument_whenNotExists() {
|
|
||||||
when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
service.importSingleDocument(minimalCells("doc002.pdf"), Optional.empty(), "doc002.pdf", "doc002");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(d ->
|
|
||||||
d.getOriginalFilename().equals("doc002.pdf")
|
|
||||||
&& d.getStatus() == DocumentStatus.PLACEHOLDER));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — update existing placeholder ──────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_updatesExistingPlaceholder() {
|
|
||||||
Document placeholder = Document.builder()
|
|
||||||
.id(UUID.randomUUID())
|
|
||||||
.originalFilename("existing.pdf")
|
|
||||||
.status(DocumentStatus.PLACEHOLDER)
|
|
||||||
.build();
|
|
||||||
when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(placeholder));
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
service.importSingleDocument(minimalCells("existing.pdf"), Optional.empty(), "existing.pdf", "existing");
|
|
||||||
|
|
||||||
verify(documentService).save(same(placeholder));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — with file (S3 upload) ─────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_uploadsFileToS3_andSetsStatusUploaded(@TempDir Path tempDir) throws Exception {
|
|
||||||
Path tempFile = tempDir.resolve("doc003.pdf");
|
|
||||||
Files.write(tempFile, "PDF content".getBytes());
|
|
||||||
|
|
||||||
when(documentService.findByOriginalFilename("doc003.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
service.importSingleDocument(
|
|
||||||
minimalCells("doc003.pdf"), Optional.of(tempFile.toFile()), "doc003.pdf", "doc003");
|
|
||||||
|
|
||||||
verify(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
|
||||||
verify(documentService).save(argThat(d -> d.getStatus() == DocumentStatus.UPLOADED));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_returnsS3UploadFailed_whenS3UploadFails(@TempDir Path tempDir) throws Exception {
|
|
||||||
Path tempFile = tempDir.resolve("fail.pdf");
|
|
||||||
Files.write(tempFile, "data".getBytes());
|
|
||||||
|
|
||||||
when(documentService.findByOriginalFilename("fail.pdf")).thenReturn(Optional.empty());
|
|
||||||
doThrow(new RuntimeException("S3 error"))
|
|
||||||
.when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
|
||||||
|
|
||||||
Optional<MassImportService.SkipReason> result = service.importSingleDocument(
|
|
||||||
minimalCells("fail.pdf"), Optional.of(tempFile.toFile()), "fail.pdf", "fail");
|
|
||||||
|
|
||||||
verify(documentService, never()).save(any());
|
|
||||||
assertThat(result).isPresent().contains(MassImportService.SkipReason.S3_UPLOAD_FAILED);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — sender handling ───────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_setsNullSender_whenSenderCellIsBlank() {
|
|
||||||
when(documentService.findByOriginalFilename("nosender.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<String> cells = buildCells("nosender.pdf", "", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "nosender.pdf", "nosender");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(d -> d.getSender() == null));
|
|
||||||
verify(personService, never()).findOrCreateByAlias(any());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_createsSender_whenSenderCellIsNonBlank() {
|
|
||||||
Person sender = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
|
|
||||||
when(documentService.findByOriginalFilename("withsender.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(sender);
|
|
||||||
|
|
||||||
List<String> cells = buildCells("withsender.pdf", "Walter Müller", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "withsender.pdf", "withsender");
|
|
||||||
|
|
||||||
verify(personService).findOrCreateByAlias("Walter Müller");
|
|
||||||
verify(documentService).save(argThat(d -> d.getSender() == sender));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — tag handling ─────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_createsTag_whenTagCellIsNonBlank() {
|
|
||||||
Tag tag = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
|
|
||||||
when(documentService.findByOriginalFilename("tagged.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
when(tagService.findOrCreate("Familie")).thenReturn(tag);
|
|
||||||
|
|
||||||
List<String> cells = buildCells("tagged.pdf", "", "", "Familie");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "tagged.pdf", "tagged");
|
|
||||||
|
|
||||||
verify(tagService).findOrCreate("Familie");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_doesNotCreateTag_whenTagCellIsBlank() {
|
|
||||||
when(documentService.findByOriginalFilename("notag.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<String> cells = buildCells("notag.pdf", "", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "notag.pdf", "notag");
|
|
||||||
|
|
||||||
verify(tagService, never()).findOrCreate(any());
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — metadataComplete heuristic ───────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_metadataComplete_whenSenderPresent() {
|
|
||||||
Person sender = Person.builder().id(UUID.randomUUID()).firstName("A").lastName("B").build();
|
|
||||||
when(documentService.findByOriginalFilename("meta.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
when(personService.findOrCreateByAlias("A B")).thenReturn(sender);
|
|
||||||
|
|
||||||
List<String> cells = buildCells("meta.pdf", "A B", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "meta.pdf", "meta");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(Document::isMetadataComplete));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_metadataIncomplete_whenNoKeyFieldsPresent() {
|
|
||||||
when(documentService.findByOriginalFilename("nometa.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<String> cells = buildCells("nometa.pdf", "", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "nometa.pdf", "nometa");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(d -> !d.isMetadataComplete()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — blank fields set to null ─────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_setsBlankFieldsToNull() {
|
|
||||||
when(documentService.findByOriginalFilename("blank.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<String> cells = buildCells("blank.pdf", "", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "blank.pdf", "blank");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(d ->
|
|
||||||
d.getLocation() == null &&
|
|
||||||
d.getSummary() == null &&
|
|
||||||
d.getTranscription() == null &&
|
|
||||||
d.getArchiveBox() == null &&
|
|
||||||
d.getArchiveFolder() == null));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── processRows — via ReflectionTestUtils ────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void processRows_returnsZero_whenOnlyHeaderRow() {
|
|
||||||
List<List<String>> rows = List.of(List.of("header", "col1"));
|
|
||||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
|
||||||
assertThat(result.processed()).isEqualTo(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void processRows_skipsRowWithBlankIndex() {
|
|
||||||
List<List<String>> rows = List.of(
|
|
||||||
List.of("header"),
|
|
||||||
minimalCells("") // blank index
|
|
||||||
);
|
|
||||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
|
||||||
assertThat(result.processed()).isEqualTo(0);
|
|
||||||
verify(documentService, never()).findByOriginalFilename(any());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void processRows_addsExtension_whenIndexHasNoDot() {
|
|
||||||
when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<List<String>> rows = List.of(
|
|
||||||
List.of("header"),
|
|
||||||
minimalCells("doc001") // no dot → appends ".pdf"
|
|
||||||
);
|
|
||||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
|
||||||
|
|
||||||
assertThat(result.processed()).isEqualTo(1);
|
|
||||||
verify(documentService).findByOriginalFilename("doc001.pdf");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void processRows_usesFilenameAsIs_whenIndexHasDot() {
|
|
||||||
when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<List<String>> rows = List.of(
|
|
||||||
List.of("header"),
|
|
||||||
minimalCells("doc002.pdf") // has dot → used as-is
|
|
||||||
);
|
|
||||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
|
||||||
|
|
||||||
assertThat(result.processed()).isEqualTo(1);
|
|
||||||
verify(documentService).findByOriginalFilename("doc002.pdf");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── isValidImportFilename — security regression — do not remove ─────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameIsNull() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", (String) null);
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameIsBlank() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", " ");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsForwardSlash() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "etc/passwd");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsBackslash() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..\\etc\\passwd");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsDotDot() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "doc..evil.pdf");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameIsDotDot() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameIsAbsolutePath() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "/etc/passwd");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsNullByte() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "file\0.pdf");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsTrue_whenFilenameIsPlainBasename() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "document.pdf");
|
|
||||||
assertThat(result).isTrue();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeDivisionSlash() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo∕bar.pdf");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsFullwidthSlash() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo/bar.pdf");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeReverseSolidus() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo⧵bar.pdf");
|
|
||||||
assertThat(result).isFalse();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsTrue_whenFilenameHasLeadingDot() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", ".hidden.pdf");
|
|
||||||
assertThat(result).isTrue();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void isValidImportFilename_returnsTrue_whenFilenameHasSpaces() {
|
|
||||||
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "Brief an Oma.pdf");
|
|
||||||
assertThat(result).isTrue();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void processRows_skipsRowAndContinues_whenFilenameIsPathTraversal() {
|
|
||||||
when(documentService.findByOriginalFilename("legitimate.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<List<String>> rows = List.of(
|
|
||||||
List.of("header"),
|
|
||||||
minimalCells("../evil"), // row 1: path traversal — should be skipped
|
|
||||||
minimalCells("legitimate.pdf") // row 2: valid — should be processed
|
|
||||||
);
|
|
||||||
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
|
|
||||||
|
|
||||||
assertThat(result.processed()).isEqualTo(1);
|
|
||||||
assertThat(result.skippedFiles())
|
|
||||||
.extracting(MassImportService.SkippedFile::reason)
|
|
||||||
.containsExactly(MassImportService.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── importSingleDocument — non-blank optional fields ────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_setsNonNullOptionalFields_whenPresent() {
|
|
||||||
when(documentService.findByOriginalFilename("rich.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
// box=1, folder=2, location=9, summary=11, transcription=13
|
|
||||||
List<String> cells = List.of(
|
|
||||||
"rich.pdf", // 0: index
|
|
||||||
"Box A", // 1: box
|
|
||||||
"Folder B", // 2: folder
|
|
||||||
"", // 3: sender
|
|
||||||
"", // 4: unused
|
|
||||||
"", // 5: receivers
|
|
||||||
"", // 6: unused
|
|
||||||
"", // 7: date
|
|
||||||
"", // 8: unused
|
|
||||||
"Hamburg", // 9: location
|
|
||||||
"", // 10: tags
|
|
||||||
"A summary", // 11: summary
|
|
||||||
"", // 12: unused
|
|
||||||
"A transcript" // 13: transcription
|
|
||||||
);
|
|
||||||
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "rich.pdf", "rich");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(d ->
|
|
||||||
"Box A".equals(d.getArchiveBox()) &&
|
|
||||||
"Folder B".equals(d.getArchiveFolder()) &&
|
|
||||||
"Hamburg".equals(d.getLocation()) &&
|
|
||||||
"A summary".equals(d.getSummary()) &&
|
|
||||||
"A transcript".equals(d.getTranscription())));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_setsMetadataComplete_whenReceiversArePresent() {
|
|
||||||
Person receiver = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
|
|
||||||
when(documentService.findByOriginalFilename("rcv.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(receiver);
|
|
||||||
|
|
||||||
List<String> cells = List.of(
|
|
||||||
"rcv.pdf", "", "", "", "", "Walter Müller", "", "", "", "", "", "", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "rcv.pdf", "rcv");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(Document::isMetadataComplete));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void importSingleDocument_setsMetadataComplete_whenDateIsPresent() {
|
|
||||||
when(documentService.findByOriginalFilename("dated.pdf")).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
List<String> cells = List.of(
|
|
||||||
"dated.pdf", "", "", "", "", "", "", "2024-03-15", "", "", "", "", "", "");
|
|
||||||
service.importSingleDocument(cells, Optional.empty(), "dated.pdf", "dated");
|
|
||||||
|
|
||||||
verify(documentService).save(argThat(Document::isMetadataComplete));
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── buildTitle — null location ───────────────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void buildTitle_withNullLocation_skipsLocationPart() {
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
|
||||||
"doc005", LocalDate.of(1940, 5, 1), (String) null);
|
|
||||||
assertThat(result).contains("doc005").contains("1940");
|
|
||||||
assertThat(result).doesNotContain("Berlin");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── parseDate — via ReflectionTestUtils ─────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void parseDate_returnsNull_whenValueIsNull() {
|
|
||||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", (String) null);
|
|
||||||
assertThat(result).isNull();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void parseDate_returnsNull_whenValueIsBlank() {
|
|
||||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", " ");
|
|
||||||
assertThat(result).isNull();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void parseDate_returnsDate_whenValidIsoFormat() {
|
|
||||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "2024-03-15");
|
|
||||||
assertThat(result).isEqualTo(LocalDate.of(2024, 3, 15));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void parseDate_returnsNull_whenInvalidDateString() {
|
|
||||||
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "15.03.2024");
|
|
||||||
assertThat(result).isNull();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── buildTitle — via ReflectionTestUtils ────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void buildTitle_withDateAndLocation() {
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
|
||||||
"doc001", LocalDate.of(1940, 5, 1), "Berlin");
|
|
||||||
assertThat(result).contains("doc001").contains("Berlin").contains("1940");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void buildTitle_withDateOnly() {
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
|
||||||
"doc002", LocalDate.of(1960, 8, 15), "");
|
|
||||||
assertThat(result).contains("doc002").contains("1960");
|
|
||||||
assertThat(result).doesNotContain("Berlin");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void buildTitle_withIndexOnly_whenDateAndLocationAreNull() {
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
|
||||||
"doc003", null, "");
|
|
||||||
assertThat(result).isEqualTo("doc003");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void buildTitle_withLocationOnly_whenDateIsNull() {
|
|
||||||
// date=null, location present → date part skipped, location appended
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
|
|
||||||
"doc004", null, "Berlin");
|
|
||||||
assertThat(result).contains("doc004").contains("Berlin");
|
|
||||||
assertThat(result).doesNotContain("("); // no date part
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── getCell — via ReflectionTestUtils ───────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getCell_returnsEmptyString_whenColBeyondListSize() {
|
|
||||||
List<String> cells = List.of("a", "b");
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 5);
|
|
||||||
assertThat(result).isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getCell_returnsEmptyString_whenValueIsNull() {
|
|
||||||
List<String> cells = new ArrayList<>();
|
|
||||||
cells.add(null);
|
|
||||||
cells.add("b");
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
|
|
||||||
assertThat(result).isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getCell_returnsTrimmedValue() {
|
|
||||||
List<String> cells = List.of(" hello ", "world");
|
|
||||||
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
|
|
||||||
assertThat(result).isEqualTo("hello");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── PDF magic byte validation regression ─────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_uploadsValidPdf_andSkipsFakeOne(@TempDir Path tempDir) throws Exception {
|
|
||||||
setupOneValidOneFakeImport(tempDir);
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
verify(s3Client, times(1)).putObject(any(PutObjectRequest.class), any(RequestBody.class));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_setsSkippedCount_toOne_whenOneFakeFile(@TempDir Path tempDir) throws Exception {
|
|
||||||
setupOneValidOneFakeImport(tempDir);
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().skipped()).isEqualTo(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_includesRejectedFilename_inSkippedFiles(@TempDir Path tempDir) throws Exception {
|
|
||||||
setupOneValidOneFakeImport(tempDir);
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().skippedFiles())
|
|
||||||
.extracting(MassImportService.SkippedFile::filename)
|
|
||||||
.contains("fake.pdf");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_skipsFile_whenShorterThanFourBytes(@TempDir Path tempDir) throws Exception {
|
|
||||||
Files.write(tempDir.resolve("tiny.pdf"), new byte[]{0x25, 0x50, 0x44}); // only 3 bytes
|
|
||||||
buildMinimalImportXlsx(tempDir, "tiny.pdf");
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
|
||||||
|
|
||||||
service.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(service.getStatus().skipped()).isEqualTo(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void runImportAsync_skipsFile_whenMagicBytesCheckThrowsIOException(@TempDir Path tempDir) throws Exception {
|
|
||||||
Files.writeString(tempDir.resolve("unreadable.pdf"), "some content");
|
|
||||||
buildMinimalImportXlsx(tempDir, "unreadable.pdf");
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
|
||||||
|
|
||||||
MassImportService spyService = spy(service);
|
|
||||||
doThrow(new java.io.IOException("simulated read error")).when(spyService).openFileStream(any(File.class));
|
|
||||||
|
|
||||||
spyService.runImportAsync();
|
|
||||||
|
|
||||||
assertThat(spyService.getStatus().skipped()).isEqualTo(1);
|
|
||||||
assertThat(spyService.getStatus().skippedFiles())
|
|
||||||
.extracting(MassImportService.SkippedFile::reason)
|
|
||||||
.containsExactly(MassImportService.SkipReason.FILE_READ_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── findFileRecursive — symlink escape security regression — do not remove ─
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void findFileRecursive_throwsDomainException_whenSymlinkEscapesImportDir(
|
|
||||||
@TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception {
|
|
||||||
Path outsideFile = outsideDir.resolve("secret.pdf");
|
|
||||||
Files.writeString(outsideFile, "sensitive content");
|
|
||||||
Files.createSymbolicLink(importDirPath.resolve("secret.pdf"), outsideFile);
|
|
||||||
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", importDirPath.toString());
|
|
||||||
|
|
||||||
assertThatThrownBy(() -> ReflectionTestUtils.invokeMethod(service, "findFileRecursive", "secret.pdf"))
|
|
||||||
.isInstanceOf(DomainException.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── readOds — XXE security regression ───────────────────────────────────
|
|
||||||
|
|
||||||
// Security regression — do not remove.
|
|
||||||
@Test
|
|
||||||
void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception {
|
|
||||||
File malicious = buildXxeOds(tempDir, "file:///etc/hostname");
|
|
||||||
assertThatThrownBy(() -> service.readOds(malicious))
|
|
||||||
.isInstanceOf(SAXParseException.class)
|
|
||||||
.hasMessageContaining("DOCTYPE is disallowed");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception {
|
|
||||||
File valid = buildValidOds(tempDir, "Mustermann");
|
|
||||||
List<List<String>> rows = service.readOds(valid);
|
|
||||||
assertThat(rows).isNotEmpty();
|
|
||||||
assertThat(rows.get(0)).contains("Mustermann");
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── helpers ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds a minimal 14-element cell row with the given filename at index 0
|
|
||||||
* and blanks for all optional fields.
|
|
||||||
*/
|
|
||||||
private List<String> minimalCells(String filename) {
|
|
||||||
return buildCells(filename, "", "", "");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds a cell row with sender, receiver, and tag controls.
|
|
||||||
* Layout matches the default column indices set in setUp().
|
|
||||||
*/
|
|
||||||
private List<String> buildCells(String filename, String sender, String receivers, String tag) {
|
|
||||||
// 14 elements: index=0,box=1,folder=2,sender=3,[4],receivers=5,[6],date=7,[8],location=9,tag=10,summary=11,[12],transcription=13
|
|
||||||
return List.of(
|
|
||||||
filename, // 0: index
|
|
||||||
"", // 1: box
|
|
||||||
"", // 2: folder
|
|
||||||
sender, // 3: sender
|
|
||||||
"", // 4: (unused)
|
|
||||||
receivers, // 5: receivers
|
|
||||||
"", // 6: (unused)
|
|
||||||
"", // 7: date
|
|
||||||
"", // 8: (unused)
|
|
||||||
"", // 9: location
|
|
||||||
tag, // 10: tags
|
|
||||||
"", // 11: summary
|
|
||||||
"", // 12: (unused)
|
|
||||||
"" // 13: transcription
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */
|
|
||||||
private File buildXxeOds(Path dir, String entityTarget) throws Exception {
|
|
||||||
String xml = "<?xml version=\"1.0\"?>"
|
|
||||||
+ "<!DOCTYPE foo [<!ENTITY xxe SYSTEM \"" + entityTarget + "\">]>"
|
|
||||||
+ "<office:document-content"
|
|
||||||
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
|
|
||||||
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
|
|
||||||
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
|
|
||||||
+ "<office:body><office:spreadsheet>"
|
|
||||||
+ "<table:table><table:table-row><table:table-cell>"
|
|
||||||
+ "<text:p>&xxe;</text:p>"
|
|
||||||
+ "</table:table-cell></table:table-row></table:table>"
|
|
||||||
+ "</office:spreadsheet></office:body>"
|
|
||||||
+ "</office:document-content>";
|
|
||||||
return writeOdsZip(dir.resolve("malicious.ods"), xml);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value.
|
|
||||||
* cellValue must not contain XML metacharacters ({@code < > &}). */
|
|
||||||
private File buildValidOds(Path dir, String cellValue) throws Exception {
|
|
||||||
String xml = "<?xml version=\"1.0\"?>"
|
|
||||||
+ "<office:document-content"
|
|
||||||
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
|
|
||||||
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
|
|
||||||
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
|
|
||||||
+ "<office:body><office:spreadsheet>"
|
|
||||||
+ "<table:table><table:table-row><table:table-cell>"
|
|
||||||
+ "<text:p>" + cellValue + "</text:p>"
|
|
||||||
+ "</table:table-cell></table:table-row></table:table>"
|
|
||||||
+ "</office:spreadsheet></office:body>"
|
|
||||||
+ "</office:document-content>";
|
|
||||||
return writeOdsZip(dir.resolve("valid.ods"), xml);
|
|
||||||
}
|
|
||||||
|
|
||||||
private File writeOdsZip(Path destination, String contentXml) throws Exception {
|
|
||||||
try (OutputStream fos = Files.newOutputStream(destination);
|
|
||||||
ZipOutputStream zip = new ZipOutputStream(fos)) {
|
|
||||||
zip.putNextEntry(new ZipEntry("content.xml"));
|
|
||||||
zip.write(contentXml.getBytes(StandardCharsets.UTF_8));
|
|
||||||
zip.closeEntry();
|
|
||||||
}
|
|
||||||
return destination.toFile();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setupOneValidOneFakeImport(Path tempDir) throws Exception {
|
|
||||||
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
|
|
||||||
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
|
|
||||||
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
|
|
||||||
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
|
|
||||||
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
|
||||||
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
|
|
||||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void buildMinimalImportXlsx(Path dir, String... filenames) throws Exception {
|
|
||||||
Path xlsx = dir.resolve("import.xlsx");
|
|
||||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
|
||||||
org.apache.poi.ss.usermodel.Sheet sheet = wb.createSheet("Sheet1");
|
|
||||||
sheet.createRow(0).createCell(0).setCellValue("Index");
|
|
||||||
for (int i = 0; i < filenames.length; i++) {
|
|
||||||
sheet.createRow(i + 1).createCell(0).setCellValue(filenames[i]);
|
|
||||||
}
|
|
||||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
|
||||||
wb.write(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -7,7 +7,8 @@ import org.raddatz.familienarchiv.security.PermissionAspect;
|
|||||||
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
|
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
|
||||||
import org.raddatz.familienarchiv.document.DocumentService;
|
import org.raddatz.familienarchiv.document.DocumentService;
|
||||||
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
import org.raddatz.familienarchiv.document.DocumentVersionService;
|
||||||
import org.raddatz.familienarchiv.importing.MassImportService;
|
import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
|
||||||
|
import org.raddatz.familienarchiv.importing.ImportStatus;
|
||||||
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
|
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
|
||||||
@@ -35,7 +36,7 @@ class AdminControllerTest {
|
|||||||
|
|
||||||
@Autowired MockMvc mockMvc;
|
@Autowired MockMvc mockMvc;
|
||||||
|
|
||||||
@MockitoBean MassImportService massImportService;
|
@MockitoBean CanonicalImportOrchestrator importOrchestrator;
|
||||||
@MockitoBean DocumentService documentService;
|
@MockitoBean DocumentService documentService;
|
||||||
@MockitoBean DocumentVersionService documentVersionService;
|
@MockitoBean DocumentVersionService documentVersionService;
|
||||||
@MockitoBean ThumbnailBackfillService thumbnailBackfillService;
|
@MockitoBean ThumbnailBackfillService thumbnailBackfillService;
|
||||||
@@ -46,9 +47,9 @@ class AdminControllerTest {
|
|||||||
@Test
|
@Test
|
||||||
@WithMockUser(authorities = "ADMIN")
|
@WithMockUser(authorities = "ADMIN")
|
||||||
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
|
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
|
||||||
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
|
ImportStatus status = new ImportStatus(
|
||||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||||
when(massImportService.getStatus()).thenReturn(status);
|
when(importOrchestrator.getStatus()).thenReturn(status);
|
||||||
|
|
||||||
mockMvc.perform(get("/api/admin/import-status"))
|
mockMvc.perform(get("/api/admin/import-status"))
|
||||||
.andExpect(status().isOk())
|
.andExpect(status().isOk())
|
||||||
@@ -60,9 +61,9 @@ class AdminControllerTest {
|
|||||||
@Test
|
@Test
|
||||||
@WithMockUser(authorities = "ADMIN")
|
@WithMockUser(authorities = "ADMIN")
|
||||||
void importStatus_messageField_notPresentInApiResponse() throws Exception {
|
void importStatus_messageField_notPresentInApiResponse() throws Exception {
|
||||||
MassImportService.ImportStatus status = new MassImportService.ImportStatus(
|
ImportStatus status = new ImportStatus(
|
||||||
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
|
||||||
when(massImportService.getStatus()).thenReturn(status);
|
when(importOrchestrator.getStatus()).thenReturn(status);
|
||||||
|
|
||||||
mockMvc.perform(get("/api/admin/import-status"))
|
mockMvc.perform(get("/api/admin/import-status"))
|
||||||
.andExpect(status().isOk())
|
.andExpect(status().isOk())
|
||||||
|
|||||||
Reference in New Issue
Block a user