feat: rewrite MassImportService for ODS import

- Use WorkbookFactory.create() to support .ods, .xlsx, and .xls
- Discover any spreadsheet file (not just .xlsx) in /import
- Fix column indices to match actual ODS structure (index=0, box=1,
  folder=2, sender=3, receivers=5, date=7, location=9, tags=10,
  summary=11, transcription=13)
- Append .pdf extension to bare index values (W-0001 → W-0001.pdf)
- Build German-format title: "W-0001 – 15. Februar 1888 – Rotterdam"
- Parse ISO date strings (col 7 is text in LibreOffice ODS)
- Resolve sender (col 3) and receivers (col 5) to Person entities via
  lookup-or-create by alias using PersonNameParser normalisation
- Import tag (col 10) via lookup-or-create
- Import summary from col 11 (Inhalt)
- Import archiveBox (col 1) and archiveFolder (col 2)
- Inject PersonRepository and TagRepository

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-03-15 20:50:06 +01:00
parent 6e5761840c
commit 5abec093e5

View File

@@ -3,12 +3,15 @@ package org.raddatz.familienarchiv.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.Tag;
import org.raddatz.familienarchiv.repository.DocumentRepository;
import org.raddatz.familienarchiv.repository.PersonRepository;
import org.raddatz.familienarchiv.repository.TagRepository;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
@@ -26,6 +29,10 @@ import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Stream;
@@ -46,22 +53,45 @@ public class MassImportService {
}
private final DocumentRepository documentRepository;
private final PersonRepository personRepository;
private final TagRepository tagRepository;
private final S3Client s3Client;
@Value("${app.s3.bucket}")
private String bucketName;
// Konfiguration der Spalten (wie im ExcelService)
@Value("${app.import.excel.col.filename:0}")
private int colFilename;
@Value("${app.import.excel.col.date:1}")
@Value("${app.import.col.index:0}")
private int colIndex;
@Value("${app.import.col.box:1}")
private int colBox;
@Value("${app.import.col.folder:2}")
private int colFolder;
@Value("${app.import.col.sender:3}")
private int colSender;
@Value("${app.import.col.receivers:5}")
private int colReceivers;
@Value("${app.import.col.date:7}")
private int colDate;
@Value("${app.import.excel.col.location:2}")
@Value("${app.import.col.location:9}")
private int colLocation;
@Value("${app.import.excel.col.transcription:3}")
@Value("${app.import.col.tags:10}")
private int colTags;
@Value("${app.import.col.summary:11}")
private int colSummary;
@Value("${app.import.col.transcription:13}")
private int colTranscription;
private static final String IMPORT_DIR = "/import";
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
@Async
public void runImportAsync() {
@@ -70,52 +100,55 @@ public class MassImportService {
}
currentStatus = new ImportStatus(State.RUNNING, "Import läuft...", 0, LocalDateTime.now());
try {
File excelFile = findExcelFile();
log.info("Starte Massenimport aus: {}", excelFile.getAbsolutePath());
int processed = processExcel(excelFile);
currentStatus = new ImportStatus(State.DONE, "Import abgeschlossen. " + processed + " Dokumente verarbeitet.", processed, currentStatus.startedAt());
File spreadsheet = findSpreadsheetFile();
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
int processed = processSpreadsheet(spreadsheet);
currentStatus = new ImportStatus(State.DONE,
"Import abgeschlossen. " + processed + " Dokumente verarbeitet.",
processed, currentStatus.startedAt());
} catch (Exception e) {
log.error("Massenimport fehlgeschlagen", e);
currentStatus = new ImportStatus(State.FAILED, "Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
}
}
private File findExcelFile() throws IOException {
private File findSpreadsheetFile() throws IOException {
try (Stream<Path> files = Files.list(Paths.get(IMPORT_DIR))) {
return files.filter(p -> p.toString().endsWith(".xlsx"))
return files
.filter(p -> {
String name = p.toString().toLowerCase();
return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
})
.findFirst()
.orElseThrow(() -> new RuntimeException("Keine .xlsx Datei in " + IMPORT_DIR + " gefunden!"))
.orElseThrow(() -> new RuntimeException(
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!"))
.toFile();
}
}
private int processExcel(File excelFile) throws IOException {
private int processSpreadsheet(File file) throws IOException {
int count = 0;
try (FileInputStream fis = new FileInputStream(excelFile);
Workbook workbook = new XSSFWorkbook(fis)) {
try (FileInputStream fis = new FileInputStream(file);
Workbook workbook = WorkbookFactory.create(fis)) {
Sheet sheet = workbook.getSheetAt(0);
// Wir nehmen an: Spalte "FilePath" im Excel ist RELATIV zum Import-Ordner
// ODER: Wir suchen die Datei rekursiv, wenn nur der Name angegeben ist.
for (int i = 1; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
if (row == null)
continue;
if (row == null) continue;
String filename = getCellValue(row.getCell(colFilename));
if (filename == null || filename.isBlank())
continue;
String index = getCellValue(row.getCell(colIndex));
if (index == null || index.isBlank()) continue;
// Append .pdf extension if the index has none
String filename = index.contains(".") ? index : index + ".pdf";
// Datei auf der Festplatte suchen
Optional<File> fileOnDisk = findFileRecursive(filename);
if (fileOnDisk.isPresent()) {
importSingleDocument(row, fileOnDisk.get(), filename);
importSingleDocument(row, fileOnDisk.get(), filename, index);
count++;
} else {
log.warn("Datei aus Excel nicht gefunden: {}", filename);
log.warn("Datei nicht gefunden: {}", filename);
}
}
}
@@ -123,24 +156,26 @@ public class MassImportService {
}
@Transactional
protected void importSingleDocument(Row row, File file, String originalFilename) {
// Metadaten lesen
LocalDate date = null;
Cell dateCell = row.getCell(colDate);
if (dateCell != null && dateCell.getCellType() == CellType.NUMERIC) {
date = dateCell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
}
String location = getCellValue(row.getCell(colLocation));
String transcription = getCellValue(row.getCell(colTranscription));
// Prüfen ob schon da
protected void importSingleDocument(Row row, File file, String originalFilename, String index) {
// Skip documents that have already been processed beyond placeholder stage
Optional<Document> existing = documentRepository.findByOriginalFilename(originalFilename);
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
return;
}
// Detect MIME type from the local file
// Read metadata from ODS row
String archiveBox = getCellValue(row.getCell(colBox));
String archiveFolder = getCellValue(row.getCell(colFolder));
String senderRaw = getCellValue(row.getCell(colSender));
String receiversRaw = getCellValue(row.getCell(colReceivers));
LocalDate date = parseDate(row.getCell(colDate));
String location = getCellValue(row.getCell(colLocation));
String tagRaw = getCellValue(row.getCell(colTags));
String summary = getCellValue(row.getCell(colSummary));
String transcription = getCellValue(row.getCell(colTranscription));
// Detect content type from the local file
String contentType;
try {
contentType = Files.probeContentType(file.toPath());
@@ -149,7 +184,7 @@ public class MassImportService {
}
if (contentType == null) contentType = "application/octet-stream";
// Upload zu S3
// Upload to S3
String s3Key = "documents/" + UUID.randomUUID() + "_" + file.getName();
try {
s3Client.putObject(PutObjectRequest.builder()
@@ -159,28 +194,85 @@ public class MassImportService {
.build(),
RequestBody.fromFile(file));
} catch (Exception e) {
log.error("S3 Upload Fehler für " + file.getName(), e);
return; // Abbruch für dieses Dokument
log.error("S3 Upload Fehler für {}", file.getName(), e);
return;
}
// DB Speichern
// Resolve sender and receivers to Person entities
Person sender = senderRaw != null && !senderRaw.isBlank() ? findOrCreatePerson(senderRaw) : null;
List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
.map(this::findOrCreatePerson)
.toList();
// Resolve tag
Tag tag = null;
if (tagRaw != null && !tagRaw.isBlank()) {
tag = tagRepository.findByNameIgnoreCase(tagRaw)
.orElseGet(() -> tagRepository.save(Tag.builder().name(tagRaw).build()));
}
// Build or update the Document record
Document doc = existing.orElse(Document.builder()
.originalFilename(originalFilename)
.title(originalFilename)
.build());
doc.setTitle(buildTitle(index, date, location));
doc.setFilePath(s3Key);
doc.setContentType(contentType);
doc.setStatus(DocumentStatus.UPLOADED); // Jetzt ist es da!
doc.setStatus(DocumentStatus.UPLOADED);
doc.setArchiveBox(archiveBox);
doc.setArchiveFolder(archiveFolder);
doc.setDocumentDate(date);
doc.setLocation(location);
doc.setSummary(summary);
doc.setTranscription(transcription);
doc.setSender(sender);
doc.getReceivers().addAll(receivers);
if (tag != null) doc.getTags().add(tag);
documentRepository.save(doc);
log.info("Importiert: {}", originalFilename);
}
// Sucht Datei im gesamten /import Ordner (rekursiv)
private Person findOrCreatePerson(String rawName) {
String alias = rawName.trim();
return personRepository.findByAliasIgnoreCase(alias).orElseGet(() -> {
PersonNameParser.SplitName split = PersonNameParser.split(alias);
return personRepository.save(Person.builder()
.alias(alias)
.firstName(split.firstName())
.lastName(split.lastName())
.build());
});
}
private String buildTitle(String index, LocalDate date, String location) {
StringBuilder sb = new StringBuilder(index);
if (date != null) {
sb.append(" ").append(date.format(GERMAN_DATE));
}
if (location != null && !location.isBlank()) {
sb.append(" ").append(location);
}
return sb.toString();
}
private LocalDate parseDate(Cell cell) {
if (cell == null) return null;
if (cell.getCellType() == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) {
return cell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
}
if (cell.getCellType() == CellType.STRING) {
try {
return LocalDate.parse(cell.getStringCellValue().trim());
} catch (DateTimeParseException e) {
return null;
}
}
return null;
}
private Optional<File> findFileRecursive(String filename) {
try (Stream<Path> walk = Files.walk(Paths.get(IMPORT_DIR))) {
return walk.filter(p -> !Files.isDirectory(p))
@@ -193,12 +285,12 @@ public class MassImportService {
}
private String getCellValue(Cell cell) {
if (cell == null)
return null;
if (cell.getCellType() == CellType.STRING)
return cell.getStringCellValue();
if (cell.getCellType() == CellType.NUMERIC)
return String.valueOf((int) cell.getNumericCellValue());
return "";
if (cell == null) return null;
return switch (cell.getCellType()) {
case STRING -> cell.getStringCellValue();
case NUMERIC -> String.valueOf((int) cell.getNumericCellValue());
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
default -> null;
};
}
}