From 5abec093e5bfaad647709cc7584e30e9c85565b5 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 15 Mar 2026 20:50:06 +0100 Subject: [PATCH] feat: rewrite MassImportService for ODS import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use WorkbookFactory.create() to support .ods, .xlsx, and .xls - Discover any spreadsheet file (not just .xlsx) in /import - Fix column indices to match actual ODS structure (index=0, box=1, folder=2, sender=3, receivers=5, date=7, location=9, tags=10, summary=11, transcription=13) - Append .pdf extension to bare index values (W-0001 → W-0001.pdf) - Build German-format title: "W-0001 – 15. Februar 1888 – Rotterdam" - Parse ISO date strings (col 7 is text in LibreOffice ODS) - Resolve sender (col 3) and receivers (col 5) to Person entities via lookup-or-create by alias using PersonNameParser normalisation - Import tag (col 10) via lookup-or-create - Import summary from col 11 (Inhalt) - Import archiveBox (col 1) and archiveFolder (col 2) - Inject PersonRepository and TagRepository Co-Authored-By: Claude Sonnet 4.6 --- .../service/MassImportService.java | 202 +++++++++++++----- 1 file changed, 147 insertions(+), 55 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java index da04d58a..9f1b93a3 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/MassImportService.java @@ -3,12 +3,15 @@ package org.raddatz.familienarchiv.service; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.poi.ss.usermodel.*; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.exception.ErrorCode; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; +import org.raddatz.familienarchiv.model.Person; +import org.raddatz.familienarchiv.model.Tag; import org.raddatz.familienarchiv.repository.DocumentRepository; +import org.raddatz.familienarchiv.repository.PersonRepository; +import org.raddatz.familienarchiv.repository.TagRepository; import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; @@ -26,6 +29,10 @@ import java.nio.file.Paths; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; import java.util.Optional; import java.util.UUID; import java.util.stream.Stream; @@ -46,22 +53,45 @@ public class MassImportService { } private final DocumentRepository documentRepository; + private final PersonRepository personRepository; + private final TagRepository tagRepository; private final S3Client s3Client; @Value("${app.s3.bucket}") private String bucketName; - // Konfiguration der Spalten (wie im ExcelService) - @Value("${app.import.excel.col.filename:0}") - private int colFilename; - @Value("${app.import.excel.col.date:1}") + @Value("${app.import.col.index:0}") + private int colIndex; + + @Value("${app.import.col.box:1}") + private int colBox; + + @Value("${app.import.col.folder:2}") + private int colFolder; + + @Value("${app.import.col.sender:3}") + private int colSender; + + @Value("${app.import.col.receivers:5}") + private int colReceivers; + + @Value("${app.import.col.date:7}") private int colDate; - @Value("${app.import.excel.col.location:2}") + + @Value("${app.import.col.location:9}") private int colLocation; - @Value("${app.import.excel.col.transcription:3}") + + @Value("${app.import.col.tags:10}") + private int colTags; + + @Value("${app.import.col.summary:11}") + private int colSummary; + + @Value("${app.import.col.transcription:13}") private int colTranscription; private static final String IMPORT_DIR = "/import"; + private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN); @Async public void runImportAsync() { @@ -70,52 +100,55 @@ public class MassImportService { } currentStatus = new ImportStatus(State.RUNNING, "Import läuft...", 0, LocalDateTime.now()); try { - File excelFile = findExcelFile(); - log.info("Starte Massenimport aus: {}", excelFile.getAbsolutePath()); - int processed = processExcel(excelFile); - currentStatus = new ImportStatus(State.DONE, "Import abgeschlossen. " + processed + " Dokumente verarbeitet.", processed, currentStatus.startedAt()); + File spreadsheet = findSpreadsheetFile(); + log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath()); + int processed = processSpreadsheet(spreadsheet); + currentStatus = new ImportStatus(State.DONE, + "Import abgeschlossen. " + processed + " Dokumente verarbeitet.", + processed, currentStatus.startedAt()); } catch (Exception e) { log.error("Massenimport fehlgeschlagen", e); currentStatus = new ImportStatus(State.FAILED, "Fehler: " + e.getMessage(), 0, currentStatus.startedAt()); } } - private File findExcelFile() throws IOException { + private File findSpreadsheetFile() throws IOException { try (Stream files = Files.list(Paths.get(IMPORT_DIR))) { - return files.filter(p -> p.toString().endsWith(".xlsx")) + return files + .filter(p -> { + String name = p.toString().toLowerCase(); + return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls"); + }) .findFirst() - .orElseThrow(() -> new RuntimeException("Keine .xlsx Datei in " + IMPORT_DIR + " gefunden!")) + .orElseThrow(() -> new RuntimeException( + "Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!")) .toFile(); } } - private int processExcel(File excelFile) throws IOException { + private int processSpreadsheet(File file) throws IOException { int count = 0; - try (FileInputStream fis = new FileInputStream(excelFile); - Workbook workbook = new XSSFWorkbook(fis)) { + try (FileInputStream fis = new FileInputStream(file); + Workbook workbook = WorkbookFactory.create(fis)) { Sheet sheet = workbook.getSheetAt(0); - // Wir nehmen an: Spalte "FilePath" im Excel ist RELATIV zum Import-Ordner - // ODER: Wir suchen die Datei rekursiv, wenn nur der Name angegeben ist. - for (int i = 1; i <= sheet.getLastRowNum(); i++) { Row row = sheet.getRow(i); - if (row == null) - continue; + if (row == null) continue; - String filename = getCellValue(row.getCell(colFilename)); - if (filename == null || filename.isBlank()) - continue; + String index = getCellValue(row.getCell(colIndex)); + if (index == null || index.isBlank()) continue; + + // Append .pdf extension if the index has none + String filename = index.contains(".") ? index : index + ".pdf"; - // Datei auf der Festplatte suchen Optional fileOnDisk = findFileRecursive(filename); - if (fileOnDisk.isPresent()) { - importSingleDocument(row, fileOnDisk.get(), filename); + importSingleDocument(row, fileOnDisk.get(), filename, index); count++; } else { - log.warn("Datei aus Excel nicht gefunden: {}", filename); + log.warn("Datei nicht gefunden: {}", filename); } } } @@ -123,24 +156,26 @@ public class MassImportService { } @Transactional - protected void importSingleDocument(Row row, File file, String originalFilename) { - // Metadaten lesen - LocalDate date = null; - Cell dateCell = row.getCell(colDate); - if (dateCell != null && dateCell.getCellType() == CellType.NUMERIC) { - date = dateCell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate(); - } - String location = getCellValue(row.getCell(colLocation)); - String transcription = getCellValue(row.getCell(colTranscription)); - - // Prüfen ob schon da + protected void importSingleDocument(Row row, File file, String originalFilename, String index) { + // Skip documents that have already been processed beyond placeholder stage Optional existing = documentRepository.findByOriginalFilename(originalFilename); if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) { log.info("Dokument {} existiert bereits, überspringe.", originalFilename); return; } - // Detect MIME type from the local file + // Read metadata from ODS row + String archiveBox = getCellValue(row.getCell(colBox)); + String archiveFolder = getCellValue(row.getCell(colFolder)); + String senderRaw = getCellValue(row.getCell(colSender)); + String receiversRaw = getCellValue(row.getCell(colReceivers)); + LocalDate date = parseDate(row.getCell(colDate)); + String location = getCellValue(row.getCell(colLocation)); + String tagRaw = getCellValue(row.getCell(colTags)); + String summary = getCellValue(row.getCell(colSummary)); + String transcription = getCellValue(row.getCell(colTranscription)); + + // Detect content type from the local file String contentType; try { contentType = Files.probeContentType(file.toPath()); @@ -149,7 +184,7 @@ public class MassImportService { } if (contentType == null) contentType = "application/octet-stream"; - // Upload zu S3 + // Upload to S3 String s3Key = "documents/" + UUID.randomUUID() + "_" + file.getName(); try { s3Client.putObject(PutObjectRequest.builder() @@ -159,28 +194,85 @@ public class MassImportService { .build(), RequestBody.fromFile(file)); } catch (Exception e) { - log.error("S3 Upload Fehler für " + file.getName(), e); - return; // Abbruch für dieses Dokument + log.error("S3 Upload Fehler für {}", file.getName(), e); + return; } - // DB Speichern + // Resolve sender and receivers to Person entities + Person sender = senderRaw != null && !senderRaw.isBlank() ? findOrCreatePerson(senderRaw) : null; + + List receivers = PersonNameParser.parseReceivers(receiversRaw).stream() + .map(this::findOrCreatePerson) + .toList(); + + // Resolve tag + Tag tag = null; + if (tagRaw != null && !tagRaw.isBlank()) { + tag = tagRepository.findByNameIgnoreCase(tagRaw) + .orElseGet(() -> tagRepository.save(Tag.builder().name(tagRaw).build())); + } + + // Build or update the Document record Document doc = existing.orElse(Document.builder() .originalFilename(originalFilename) - .title(originalFilename) .build()); + doc.setTitle(buildTitle(index, date, location)); doc.setFilePath(s3Key); doc.setContentType(contentType); - doc.setStatus(DocumentStatus.UPLOADED); // Jetzt ist es da! + doc.setStatus(DocumentStatus.UPLOADED); + doc.setArchiveBox(archiveBox); + doc.setArchiveFolder(archiveFolder); doc.setDocumentDate(date); doc.setLocation(location); + doc.setSummary(summary); doc.setTranscription(transcription); + doc.setSender(sender); + doc.getReceivers().addAll(receivers); + if (tag != null) doc.getTags().add(tag); documentRepository.save(doc); log.info("Importiert: {}", originalFilename); } - // Sucht Datei im gesamten /import Ordner (rekursiv) + private Person findOrCreatePerson(String rawName) { + String alias = rawName.trim(); + return personRepository.findByAliasIgnoreCase(alias).orElseGet(() -> { + PersonNameParser.SplitName split = PersonNameParser.split(alias); + return personRepository.save(Person.builder() + .alias(alias) + .firstName(split.firstName()) + .lastName(split.lastName()) + .build()); + }); + } + + private String buildTitle(String index, LocalDate date, String location) { + StringBuilder sb = new StringBuilder(index); + if (date != null) { + sb.append(" – ").append(date.format(GERMAN_DATE)); + } + if (location != null && !location.isBlank()) { + sb.append(" – ").append(location); + } + return sb.toString(); + } + + private LocalDate parseDate(Cell cell) { + if (cell == null) return null; + if (cell.getCellType() == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) { + return cell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate(); + } + if (cell.getCellType() == CellType.STRING) { + try { + return LocalDate.parse(cell.getStringCellValue().trim()); + } catch (DateTimeParseException e) { + return null; + } + } + return null; + } + private Optional findFileRecursive(String filename) { try (Stream walk = Files.walk(Paths.get(IMPORT_DIR))) { return walk.filter(p -> !Files.isDirectory(p)) @@ -193,12 +285,12 @@ public class MassImportService { } private String getCellValue(Cell cell) { - if (cell == null) - return null; - if (cell.getCellType() == CellType.STRING) - return cell.getStringCellValue(); - if (cell.getCellType() == CellType.NUMERIC) - return String.valueOf((int) cell.getNumericCellValue()); - return ""; + if (cell == null) return null; + return switch (cell.getCellType()) { + case STRING -> cell.getStringCellValue(); + case NUMERIC -> String.valueOf((int) cell.getNumericCellValue()); + case BOOLEAN -> String.valueOf(cell.getBooleanCellValue()); + default -> null; + }; } }