feat: rewrite MassImportService for ODS import
- Use WorkbookFactory.create() to support .ods, .xlsx, and .xls - Discover any spreadsheet file (not just .xlsx) in /import - Fix column indices to match actual ODS structure (index=0, box=1, folder=2, sender=3, receivers=5, date=7, location=9, tags=10, summary=11, transcription=13) - Append .pdf extension to bare index values (W-0001 → W-0001.pdf) - Build German-format title: "W-0001 – 15. Februar 1888 – Rotterdam" - Parse ISO date strings (col 7 is text in LibreOffice ODS) - Resolve sender (col 3) and receivers (col 5) to Person entities via lookup-or-create by alias using PersonNameParser normalisation - Import tag (col 10) via lookup-or-create - Import summary from col 11 (Inhalt) - Import archiveBox (col 1) and archiveFolder (col 2) - Inject PersonRepository and TagRepository Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,12 +3,15 @@ package org.raddatz.familienarchiv.service;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.model.Document;
|
||||
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.model.Person;
|
||||
import org.raddatz.familienarchiv.model.Tag;
|
||||
import org.raddatz.familienarchiv.repository.DocumentRepository;
|
||||
import org.raddatz.familienarchiv.repository.PersonRepository;
|
||||
import org.raddatz.familienarchiv.repository.TagRepository;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
@@ -26,6 +29,10 @@ import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Stream;
|
||||
@@ -46,22 +53,45 @@ public class MassImportService {
|
||||
}
|
||||
|
||||
private final DocumentRepository documentRepository;
|
||||
private final PersonRepository personRepository;
|
||||
private final TagRepository tagRepository;
|
||||
private final S3Client s3Client;
|
||||
|
||||
@Value("${app.s3.bucket}")
|
||||
private String bucketName;
|
||||
|
||||
// Konfiguration der Spalten (wie im ExcelService)
|
||||
@Value("${app.import.excel.col.filename:0}")
|
||||
private int colFilename;
|
||||
@Value("${app.import.excel.col.date:1}")
|
||||
@Value("${app.import.col.index:0}")
|
||||
private int colIndex;
|
||||
|
||||
@Value("${app.import.col.box:1}")
|
||||
private int colBox;
|
||||
|
||||
@Value("${app.import.col.folder:2}")
|
||||
private int colFolder;
|
||||
|
||||
@Value("${app.import.col.sender:3}")
|
||||
private int colSender;
|
||||
|
||||
@Value("${app.import.col.receivers:5}")
|
||||
private int colReceivers;
|
||||
|
||||
@Value("${app.import.col.date:7}")
|
||||
private int colDate;
|
||||
@Value("${app.import.excel.col.location:2}")
|
||||
|
||||
@Value("${app.import.col.location:9}")
|
||||
private int colLocation;
|
||||
@Value("${app.import.excel.col.transcription:3}")
|
||||
|
||||
@Value("${app.import.col.tags:10}")
|
||||
private int colTags;
|
||||
|
||||
@Value("${app.import.col.summary:11}")
|
||||
private int colSummary;
|
||||
|
||||
@Value("${app.import.col.transcription:13}")
|
||||
private int colTranscription;
|
||||
|
||||
private static final String IMPORT_DIR = "/import";
|
||||
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
||||
|
||||
@Async
|
||||
public void runImportAsync() {
|
||||
@@ -70,52 +100,55 @@ public class MassImportService {
|
||||
}
|
||||
currentStatus = new ImportStatus(State.RUNNING, "Import läuft...", 0, LocalDateTime.now());
|
||||
try {
|
||||
File excelFile = findExcelFile();
|
||||
log.info("Starte Massenimport aus: {}", excelFile.getAbsolutePath());
|
||||
int processed = processExcel(excelFile);
|
||||
currentStatus = new ImportStatus(State.DONE, "Import abgeschlossen. " + processed + " Dokumente verarbeitet.", processed, currentStatus.startedAt());
|
||||
File spreadsheet = findSpreadsheetFile();
|
||||
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
|
||||
int processed = processSpreadsheet(spreadsheet);
|
||||
currentStatus = new ImportStatus(State.DONE,
|
||||
"Import abgeschlossen. " + processed + " Dokumente verarbeitet.",
|
||||
processed, currentStatus.startedAt());
|
||||
} catch (Exception e) {
|
||||
log.error("Massenimport fehlgeschlagen", e);
|
||||
currentStatus = new ImportStatus(State.FAILED, "Fehler: " + e.getMessage(), 0, currentStatus.startedAt());
|
||||
}
|
||||
}
|
||||
|
||||
private File findExcelFile() throws IOException {
|
||||
private File findSpreadsheetFile() throws IOException {
|
||||
try (Stream<Path> files = Files.list(Paths.get(IMPORT_DIR))) {
|
||||
return files.filter(p -> p.toString().endsWith(".xlsx"))
|
||||
return files
|
||||
.filter(p -> {
|
||||
String name = p.toString().toLowerCase();
|
||||
return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
|
||||
})
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new RuntimeException("Keine .xlsx Datei in " + IMPORT_DIR + " gefunden!"))
|
||||
.orElseThrow(() -> new RuntimeException(
|
||||
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!"))
|
||||
.toFile();
|
||||
}
|
||||
}
|
||||
|
||||
private int processExcel(File excelFile) throws IOException {
|
||||
private int processSpreadsheet(File file) throws IOException {
|
||||
int count = 0;
|
||||
try (FileInputStream fis = new FileInputStream(excelFile);
|
||||
Workbook workbook = new XSSFWorkbook(fis)) {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
||||
|
||||
Sheet sheet = workbook.getSheetAt(0);
|
||||
|
||||
// Wir nehmen an: Spalte "FilePath" im Excel ist RELATIV zum Import-Ordner
|
||||
// ODER: Wir suchen die Datei rekursiv, wenn nur der Name angegeben ist.
|
||||
|
||||
for (int i = 1; i <= sheet.getLastRowNum(); i++) {
|
||||
Row row = sheet.getRow(i);
|
||||
if (row == null)
|
||||
continue;
|
||||
if (row == null) continue;
|
||||
|
||||
String filename = getCellValue(row.getCell(colFilename));
|
||||
if (filename == null || filename.isBlank())
|
||||
continue;
|
||||
String index = getCellValue(row.getCell(colIndex));
|
||||
if (index == null || index.isBlank()) continue;
|
||||
|
||||
// Append .pdf extension if the index has none
|
||||
String filename = index.contains(".") ? index : index + ".pdf";
|
||||
|
||||
// Datei auf der Festplatte suchen
|
||||
Optional<File> fileOnDisk = findFileRecursive(filename);
|
||||
|
||||
if (fileOnDisk.isPresent()) {
|
||||
importSingleDocument(row, fileOnDisk.get(), filename);
|
||||
importSingleDocument(row, fileOnDisk.get(), filename, index);
|
||||
count++;
|
||||
} else {
|
||||
log.warn("Datei aus Excel nicht gefunden: {}", filename);
|
||||
log.warn("Datei nicht gefunden: {}", filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -123,24 +156,26 @@ public class MassImportService {
|
||||
}
|
||||
|
||||
@Transactional
|
||||
protected void importSingleDocument(Row row, File file, String originalFilename) {
|
||||
// Metadaten lesen
|
||||
LocalDate date = null;
|
||||
Cell dateCell = row.getCell(colDate);
|
||||
if (dateCell != null && dateCell.getCellType() == CellType.NUMERIC) {
|
||||
date = dateCell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
|
||||
}
|
||||
String location = getCellValue(row.getCell(colLocation));
|
||||
String transcription = getCellValue(row.getCell(colTranscription));
|
||||
|
||||
// Prüfen ob schon da
|
||||
protected void importSingleDocument(Row row, File file, String originalFilename, String index) {
|
||||
// Skip documents that have already been processed beyond placeholder stage
|
||||
Optional<Document> existing = documentRepository.findByOriginalFilename(originalFilename);
|
||||
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
|
||||
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Detect MIME type from the local file
|
||||
// Read metadata from ODS row
|
||||
String archiveBox = getCellValue(row.getCell(colBox));
|
||||
String archiveFolder = getCellValue(row.getCell(colFolder));
|
||||
String senderRaw = getCellValue(row.getCell(colSender));
|
||||
String receiversRaw = getCellValue(row.getCell(colReceivers));
|
||||
LocalDate date = parseDate(row.getCell(colDate));
|
||||
String location = getCellValue(row.getCell(colLocation));
|
||||
String tagRaw = getCellValue(row.getCell(colTags));
|
||||
String summary = getCellValue(row.getCell(colSummary));
|
||||
String transcription = getCellValue(row.getCell(colTranscription));
|
||||
|
||||
// Detect content type from the local file
|
||||
String contentType;
|
||||
try {
|
||||
contentType = Files.probeContentType(file.toPath());
|
||||
@@ -149,7 +184,7 @@ public class MassImportService {
|
||||
}
|
||||
if (contentType == null) contentType = "application/octet-stream";
|
||||
|
||||
// Upload zu S3
|
||||
// Upload to S3
|
||||
String s3Key = "documents/" + UUID.randomUUID() + "_" + file.getName();
|
||||
try {
|
||||
s3Client.putObject(PutObjectRequest.builder()
|
||||
@@ -159,28 +194,85 @@ public class MassImportService {
|
||||
.build(),
|
||||
RequestBody.fromFile(file));
|
||||
} catch (Exception e) {
|
||||
log.error("S3 Upload Fehler für " + file.getName(), e);
|
||||
return; // Abbruch für dieses Dokument
|
||||
log.error("S3 Upload Fehler für {}", file.getName(), e);
|
||||
return;
|
||||
}
|
||||
|
||||
// DB Speichern
|
||||
// Resolve sender and receivers to Person entities
|
||||
Person sender = senderRaw != null && !senderRaw.isBlank() ? findOrCreatePerson(senderRaw) : null;
|
||||
|
||||
List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
|
||||
.map(this::findOrCreatePerson)
|
||||
.toList();
|
||||
|
||||
// Resolve tag
|
||||
Tag tag = null;
|
||||
if (tagRaw != null && !tagRaw.isBlank()) {
|
||||
tag = tagRepository.findByNameIgnoreCase(tagRaw)
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder().name(tagRaw).build()));
|
||||
}
|
||||
|
||||
// Build or update the Document record
|
||||
Document doc = existing.orElse(Document.builder()
|
||||
.originalFilename(originalFilename)
|
||||
.title(originalFilename)
|
||||
.build());
|
||||
|
||||
doc.setTitle(buildTitle(index, date, location));
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setStatus(DocumentStatus.UPLOADED); // Jetzt ist es da!
|
||||
doc.setStatus(DocumentStatus.UPLOADED);
|
||||
doc.setArchiveBox(archiveBox);
|
||||
doc.setArchiveFolder(archiveFolder);
|
||||
doc.setDocumentDate(date);
|
||||
doc.setLocation(location);
|
||||
doc.setSummary(summary);
|
||||
doc.setTranscription(transcription);
|
||||
doc.setSender(sender);
|
||||
doc.getReceivers().addAll(receivers);
|
||||
if (tag != null) doc.getTags().add(tag);
|
||||
|
||||
documentRepository.save(doc);
|
||||
log.info("Importiert: {}", originalFilename);
|
||||
}
|
||||
|
||||
// Sucht Datei im gesamten /import Ordner (rekursiv)
|
||||
private Person findOrCreatePerson(String rawName) {
|
||||
String alias = rawName.trim();
|
||||
return personRepository.findByAliasIgnoreCase(alias).orElseGet(() -> {
|
||||
PersonNameParser.SplitName split = PersonNameParser.split(alias);
|
||||
return personRepository.save(Person.builder()
|
||||
.alias(alias)
|
||||
.firstName(split.firstName())
|
||||
.lastName(split.lastName())
|
||||
.build());
|
||||
});
|
||||
}
|
||||
|
||||
private String buildTitle(String index, LocalDate date, String location) {
|
||||
StringBuilder sb = new StringBuilder(index);
|
||||
if (date != null) {
|
||||
sb.append(" – ").append(date.format(GERMAN_DATE));
|
||||
}
|
||||
if (location != null && !location.isBlank()) {
|
||||
sb.append(" – ").append(location);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private LocalDate parseDate(Cell cell) {
|
||||
if (cell == null) return null;
|
||||
if (cell.getCellType() == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) {
|
||||
return cell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
|
||||
}
|
||||
if (cell.getCellType() == CellType.STRING) {
|
||||
try {
|
||||
return LocalDate.parse(cell.getStringCellValue().trim());
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Optional<File> findFileRecursive(String filename) {
|
||||
try (Stream<Path> walk = Files.walk(Paths.get(IMPORT_DIR))) {
|
||||
return walk.filter(p -> !Files.isDirectory(p))
|
||||
@@ -193,12 +285,12 @@ public class MassImportService {
|
||||
}
|
||||
|
||||
private String getCellValue(Cell cell) {
|
||||
if (cell == null)
|
||||
return null;
|
||||
if (cell.getCellType() == CellType.STRING)
|
||||
return cell.getStringCellValue();
|
||||
if (cell.getCellType() == CellType.NUMERIC)
|
||||
return String.valueOf((int) cell.getNumericCellValue());
|
||||
return "";
|
||||
if (cell == null) return null;
|
||||
return switch (cell.getCellType()) {
|
||||
case STRING -> cell.getStringCellValue();
|
||||
case NUMERIC -> String.valueOf((int) cell.getNumericCellValue());
|
||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user