Import normalizer: offline tool to normalize the raw archive spreadsheets #663

Merged
marcel merged 172 commits from docs/import-migration into main 2026-05-28 15:05:51 +02:00
3 changed files with 49 additions and 13 deletions
Showing only changes of commit 4cc725d546 - Show all commits

View File

@@ -24,7 +24,6 @@ import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.raddatz.familienarchiv.tag.TagService; import org.raddatz.familienarchiv.tag.TagService;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.file.Files; import java.nio.file.Files;
@@ -79,6 +78,7 @@ public class DocumentImporter {
private final TagService tagService; private final TagService tagService;
private final S3Client s3Client; private final S3Client s3Client;
private final ThumbnailAsyncRunner thumbnailAsyncRunner; private final ThumbnailAsyncRunner thumbnailAsyncRunner;
private final FileStreamOpener fileStreamOpener;
@Value("${app.s3.bucket:familienarchiv}") @Value("${app.s3.bucket:familienarchiv}")
private String bucketName; private String bucketName;
@@ -349,13 +349,10 @@ public class DocumentImporter {
return INDEX_PATTERN.matcher(index).matches(); return INDEX_PATTERN.matcher(index).matches();
} }
// package-private: a Mockito spy in tests can override to inject IOException
InputStream openFileStream(File file) throws IOException {
return new FileInputStream(file);
}
private boolean isPdfMagicBytes(File file) throws IOException { private boolean isPdfMagicBytes(File file) throws IOException {
try (InputStream is = openFileStream(file)) { // FileStreamOpener is injected so tests can stub a throwing implementation for the
// IO-error branch without spying on the importer itself.
try (InputStream is = fileStreamOpener.open(file)) {
byte[] header = is.readNBytes(4); byte[] header = is.readNBytes(4);
return header.length == 4 return header.length == 4
&& header[0] == 0x25 // % && header[0] == 0x25 // %

View File

@@ -0,0 +1,33 @@
package org.raddatz.familienarchiv.importing;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Test seam for opening a {@link File} as an {@link InputStream}. Extracted so the magic-byte
* check in {@link DocumentImporter} can be unit-tested for the IO-error branch by injecting a
* mock that throws, without needing a Mockito spy on the importer itself.
*
* <p>Production uses {@link DefaultFileStreamOpener}, a one-line delegate to
* {@code new FileInputStream(file)}.
*/
@FunctionalInterface
public interface FileStreamOpener {
/** Opens {@code file} for sequential reads. Caller closes the returned stream. */
InputStream open(File file) throws IOException;
/** Default production implementation: plain {@code FileInputStream}. */
@Component
final class DefaultFileStreamOpener implements FileStreamOpener {
@Override
public InputStream open(File file) throws IOException {
return new FileInputStream(file);
}
}
}

View File

@@ -49,12 +49,18 @@ class DocumentImporterTest {
@Mock TagService tagService; @Mock TagService tagService;
@Mock S3Client s3Client; @Mock S3Client s3Client;
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner; @Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
@Mock FileStreamOpener fileStreamOpener;
DocumentImporter importer; DocumentImporter importer;
@BeforeEach @BeforeEach
void setUp() { void setUp() throws java.io.IOException {
importer = new DocumentImporter(documentService, personService, tagService, s3Client, thumbnailAsyncRunner); // Default opener delegates to FileInputStream — tests that need to force an IOException
// override this stub locally (load_skipsFile_whenMagicByteCheckThrowsIoException).
lenient().when(fileStreamOpener.open(any(File.class)))
.thenAnswer(inv -> new java.io.FileInputStream(inv.getArgument(0, File.class)));
importer = new DocumentImporter(documentService, personService, tagService, s3Client,
thumbnailAsyncRunner, fileStreamOpener);
ReflectionTestUtils.setField(importer, "bucketName", "test-bucket"); ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
} }
@@ -305,11 +311,11 @@ class DocumentImporterTest {
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty()); lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", "")); Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", ""));
DocumentImporter spyImporter = org.mockito.Mockito.spy(importer); // FileStreamOpener is injected — stub it to throw, no spy on the importer needed.
org.mockito.Mockito.doThrow(new java.io.IOException("read error")) org.mockito.Mockito.when(fileStreamOpener.open(any(File.class)))
.when(spyImporter).openFileStream(any(File.class)); .thenThrow(new java.io.IOException("read error"));
DocumentImporter.LoadResult result = spyImporter.load(xlsx.toFile()); DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
assertThat(result.skippedFiles()) assertThat(result.skippedFiles())
.extracting(ImportStatus.SkippedFile::reason) .extracting(ImportStatus.SkippedFile::reason)