feat: rewrite MassImportService for ODS import

- Use WorkbookFactory.create() to support .ods, .xlsx, and .xls - Discover any spreadsheet file (not just .xlsx) in /import - Fix column indices to match actual ODS structure (index=0, box=1, folder=2, sender=3, receivers=5, date=7, location=9, tags=10, summary=11, transcription=13) - Append .pdf extension to bare index values (W-0001 → W-0001.pdf) - Build German-format title: "W-0001 – 15. Februar 1888 – Rotterdam" - Parse ISO date strings (col 7 is text in LibreOffice ODS) - Resolve sender (col 3) and receivers (col 5) to Person entities via lookup-or-create by alias using PersonNameParser normalisation - Import tag (col 10) via lookup-or-create - Import summary from col 11 (Inhalt) - Import archiveBox (col 1) and archiveFolder (col 2) - Inject PersonRepository and TagRepository Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 20:50:06 +01:00
parent 6e5761840c
commit 5abec093e5
1 changed files with 147 additions and 55 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java
@@ -3,12 +3,15 @@ package org.raddatz.familienarchiv.service;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.poi.ss.usermodel.*;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.raddatz.familienarchiv.exception.DomainException;
 import org.raddatz.familienarchiv.exception.ErrorCode;
 import org.raddatz.familienarchiv.model.Document;
 import org.raddatz.familienarchiv.model.DocumentStatus;
+import org.raddatz.familienarchiv.model.Person;
+import org.raddatz.familienarchiv.model.Tag;
 import org.raddatz.familienarchiv.repository.DocumentRepository;
+import org.raddatz.familienarchiv.repository.PersonRepository;
+import org.raddatz.familienarchiv.repository.TagRepository;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
@@ -26,6 +29,10 @@ import java.nio.file.Paths;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
 import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.List;
+import java.util.Locale;
 import java.util.Optional;
 import java.util.UUID;
 import java.util.stream.Stream;
@@ -46,22 +53,45 @@ public class MassImportService {
    }

    private final DocumentRepository documentRepository;
+    private final PersonRepository personRepository;
+    private final TagRepository tagRepository;
    private final S3Client s3Client;

    @Value("${app.s3.bucket}")
    private String bucketName;

-    // Konfiguration der Spalten (wie im ExcelService)
-    @Value("${app.import.excel.col.filename:0}")
-    private int colFilename;
-    @Value("${app.import.excel.col.date:1}")
+    @Value("${app.import.col.index:0}")
+    private int colIndex;
+
+    @Value("${app.import.col.box:1}")
+    private int colBox;
+
+    @Value("${app.import.col.folder:2}")
+    private int colFolder;
+
+    @Value("${app.import.col.sender:3}")
+    private int colSender;
+
+    @Value("${app.import.col.receivers:5}")
+    private int colReceivers;
+
+    @Value("${app.import.col.date:7}")
    private int colDate;
-    @Value("${app.import.excel.col.location:2}")
+
+    @Value("${app.import.col.location:9}")
    private int colLocation;
-    @Value("${app.import.excel.col.transcription:3}")
+
+    @Value("${app.import.col.tags:10}")
+    private int colTags;
+
+    @Value("${app.import.col.summary:11}")
+    private int colSummary;
+
+    @Value("${app.import.col.transcription:13}")
    private int colTranscription;

    private static final String IMPORT_DIR = "/import";
+    private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);

    @Async
    public void runImportAsync() {
@@ -70,52 +100,55 @@ public class MassImportService {
        }
        currentStatus = new ImportStatus(State.RUNNING, "Import läuft...", 0, LocalDateTime.now());
        try {
-            File excelFile = findExcelFile();
-            log.info("Starte Massenimport aus: {}", excelFile.getAbsolutePath());
-            int processed = processExcel(excelFile);
-            currentStatus = new ImportStatus(State.DONE, "Import abgeschlossen. " + processed + " Dokumente verarbeitet.", processed, currentStatus.startedAt());
+            File spreadsheet = findSpreadsheetFile();
+            log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
+            int processed = processSpreadsheet(spreadsheet);
+            currentStatus = new ImportStatus(State.DONE,
+                    "Import abgeschlossen. " + processed + " Dokumente verarbeitet.",
+                    processed, currentStatus.startedAt());
        } catch (Exception e) {
            log.error("Massenimport fehlgeschlagen", e);
            currentStatus = new ImportStatus(State.FAILED, "Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
        }
    }

-    private File findExcelFile() throws IOException {
+    private File findSpreadsheetFile() throws IOException {
        try (Stream<Path> files = Files.list(Paths.get(IMPORT_DIR))) {
-            return files.filter(p -> p.toString().endsWith(".xlsx"))
+            return files
+                    .filter(p -> {
+                        String name = p.toString().toLowerCase();
+                        return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
+                    })
                    .findFirst()
-                    .orElseThrow(() -> new RuntimeException("Keine .xlsx Datei in " + IMPORT_DIR + " gefunden!"))
+                    .orElseThrow(() -> new RuntimeException(
+                            "Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!"))
                    .toFile();
        }
    }

-    private int processExcel(File excelFile) throws IOException {
+    private int processSpreadsheet(File file) throws IOException {
        int count = 0;
-        try (FileInputStream fis = new FileInputStream(excelFile);
-                Workbook workbook = new XSSFWorkbook(fis)) {
+        try (FileInputStream fis = new FileInputStream(file);
+             Workbook workbook = WorkbookFactory.create(fis)) {

            Sheet sheet = workbook.getSheetAt(0);

-            // Wir nehmen an: Spalte "FilePath" im Excel ist RELATIV zum Import-Ordner
-            // ODER: Wir suchen die Datei rekursiv, wenn nur der Name angegeben ist.
-
            for (int i = 1; i <= sheet.getLastRowNum(); i++) {
                Row row = sheet.getRow(i);
-                if (row == null)
-                    continue;
+                if (row == null) continue;

-                String filename = getCellValue(row.getCell(colFilename));
-                if (filename == null || filename.isBlank())
-                    continue;
+                String index = getCellValue(row.getCell(colIndex));
+                if (index == null || index.isBlank()) continue;
+
+                // Append .pdf extension if the index has none
+                String filename = index.contains(".") ? index : index + ".pdf";

-                // Datei auf der Festplatte suchen
                Optional<File> fileOnDisk = findFileRecursive(filename);
-
                if (fileOnDisk.isPresent()) {
-                    importSingleDocument(row, fileOnDisk.get(), filename);
+                    importSingleDocument(row, fileOnDisk.get(), filename, index);
                    count++;
                } else {
-                    log.warn("Datei aus Excel nicht gefunden: {}", filename);
+                    log.warn("Datei nicht gefunden: {}", filename);
                }
            }
        }
@@ -123,24 +156,26 @@ public class MassImportService {
    }

    @Transactional
-    protected void importSingleDocument(Row row, File file, String originalFilename) {
-        // Metadaten lesen
-        LocalDate date = null;
-        Cell dateCell = row.getCell(colDate);
-        if (dateCell != null && dateCell.getCellType() == CellType.NUMERIC) {
-            date = dateCell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
-        }
-        String location = getCellValue(row.getCell(colLocation));
-        String transcription = getCellValue(row.getCell(colTranscription));
-
-        // Prüfen ob schon da
+    protected void importSingleDocument(Row row, File file, String originalFilename, String index) {
+        // Skip documents that have already been processed beyond placeholder stage
        Optional<Document> existing = documentRepository.findByOriginalFilename(originalFilename);
        if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
            log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
            return;
        }

-        // Detect MIME type from the local file
+        // Read metadata from ODS row
+        String archiveBox     = getCellValue(row.getCell(colBox));
+        String archiveFolder  = getCellValue(row.getCell(colFolder));
+        String senderRaw      = getCellValue(row.getCell(colSender));
+        String receiversRaw   = getCellValue(row.getCell(colReceivers));
+        LocalDate date        = parseDate(row.getCell(colDate));
+        String location       = getCellValue(row.getCell(colLocation));
+        String tagRaw         = getCellValue(row.getCell(colTags));
+        String summary        = getCellValue(row.getCell(colSummary));
+        String transcription  = getCellValue(row.getCell(colTranscription));
+
+        // Detect content type from the local file
        String contentType;
        try {
            contentType = Files.probeContentType(file.toPath());
@@ -149,7 +184,7 @@ public class MassImportService {
        }
        if (contentType == null) contentType = "application/octet-stream";

-        // Upload zu S3
+        // Upload to S3
        String s3Key = "documents/" + UUID.randomUUID() + "_" + file.getName();
        try {
            s3Client.putObject(PutObjectRequest.builder()
@@ -159,28 +194,85 @@ public class MassImportService {
                    .build(),
                    RequestBody.fromFile(file));
        } catch (Exception e) {
-            log.error("S3 Upload Fehler für " + file.getName(), e);
-            return; // Abbruch für dieses Dokument
+            log.error("S3 Upload Fehler für {}", file.getName(), e);
+            return;
        }

-        // DB Speichern
+        // Resolve sender and receivers to Person entities
+        Person sender = senderRaw != null && !senderRaw.isBlank() ? findOrCreatePerson(senderRaw) : null;
+
+        List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
+                .map(this::findOrCreatePerson)
+                .toList();
+
+        // Resolve tag
+        Tag tag = null;
+        if (tagRaw != null && !tagRaw.isBlank()) {
+            tag = tagRepository.findByNameIgnoreCase(tagRaw)
+                    .orElseGet(() -> tagRepository.save(Tag.builder().name(tagRaw).build()));
+        }
+
+        // Build or update the Document record
        Document doc = existing.orElse(Document.builder()
                .originalFilename(originalFilename)
-                .title(originalFilename)
                .build());

+        doc.setTitle(buildTitle(index, date, location));
        doc.setFilePath(s3Key);
        doc.setContentType(contentType);
-        doc.setStatus(DocumentStatus.UPLOADED); // Jetzt ist es da!
+        doc.setStatus(DocumentStatus.UPLOADED);
+        doc.setArchiveBox(archiveBox);
+        doc.setArchiveFolder(archiveFolder);
        doc.setDocumentDate(date);
        doc.setLocation(location);
+        doc.setSummary(summary);
        doc.setTranscription(transcription);
+        doc.setSender(sender);
+        doc.getReceivers().addAll(receivers);
+        if (tag != null) doc.getTags().add(tag);

        documentRepository.save(doc);
        log.info("Importiert: {}", originalFilename);
    }

-    // Sucht Datei im gesamten /import Ordner (rekursiv)
+    private Person findOrCreatePerson(String rawName) {
+        String alias = rawName.trim();
+        return personRepository.findByAliasIgnoreCase(alias).orElseGet(() -> {
+            PersonNameParser.SplitName split = PersonNameParser.split(alias);
+            return personRepository.save(Person.builder()
+                    .alias(alias)
+                    .firstName(split.firstName())
+                    .lastName(split.lastName())
+                    .build());
+        });
+    }
+
+    private String buildTitle(String index, LocalDate date, String location) {
+        StringBuilder sb = new StringBuilder(index);
+        if (date != null) {
+            sb.append(" – ").append(date.format(GERMAN_DATE));
+        }
+        if (location != null && !location.isBlank()) {
+            sb.append(" – ").append(location);
+        }
+        return sb.toString();
+    }
+
+    private LocalDate parseDate(Cell cell) {
+        if (cell == null) return null;
+        if (cell.getCellType() == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) {
+            return cell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
+        }
+        if (cell.getCellType() == CellType.STRING) {
+            try {
+                return LocalDate.parse(cell.getStringCellValue().trim());
+            } catch (DateTimeParseException e) {
+                return null;
+            }
+        }
+        return null;
+    }
+
    private Optional<File> findFileRecursive(String filename) {
        try (Stream<Path> walk = Files.walk(Paths.get(IMPORT_DIR))) {
            return walk.filter(p -> !Files.isDirectory(p))
@@ -193,12 +285,12 @@ public class MassImportService {
    }

    private String getCellValue(Cell cell) {
-        if (cell == null)
-            return null;
-        if (cell.getCellType() == CellType.STRING)
-            return cell.getStringCellValue();
-        if (cell.getCellType() == CellType.NUMERIC)
-            return String.valueOf((int) cell.getNumericCellValue());
-        return "";
+        if (cell == null) return null;
+        return switch (cell.getCellType()) {
+            case STRING -> cell.getStringCellValue();
+            case NUMERIC -> String.valueOf((int) cell.getNumericCellValue());
+            case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
+            default -> null;
+        };
    }
 }