Import normalizer: offline tool to normalize the raw archive spreadsheets #663

Merged
marcel merged 172 commits from docs/import-migration into main 2026-05-28 15:05:51 +02:00
3 changed files with 49 additions and 13 deletions
Showing only changes of commit 4cc725d546 - Show all commits

View File

@@ -24,7 +24,6 @@ import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.raddatz.familienarchiv.tag.TagService;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
@@ -79,6 +78,7 @@ public class DocumentImporter {
private final TagService tagService;
private final S3Client s3Client;
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
private final FileStreamOpener fileStreamOpener;
@Value("${app.s3.bucket:familienarchiv}")
private String bucketName;
@@ -349,13 +349,10 @@ public class DocumentImporter {
return INDEX_PATTERN.matcher(index).matches();
}
// package-private: a Mockito spy in tests can override to inject IOException
InputStream openFileStream(File file) throws IOException {
return new FileInputStream(file);
}
private boolean isPdfMagicBytes(File file) throws IOException {
try (InputStream is = openFileStream(file)) {
// FileStreamOpener is injected so tests can stub a throwing implementation for the
// IO-error branch without spying on the importer itself.
try (InputStream is = fileStreamOpener.open(file)) {
byte[] header = is.readNBytes(4);
return header.length == 4
&& header[0] == 0x25 // %

View File

@@ -0,0 +1,33 @@
package org.raddatz.familienarchiv.importing;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Test seam for opening a {@link File} as an {@link InputStream}. Extracted so the magic-byte
* check in {@link DocumentImporter} can be unit-tested for the IO-error branch by injecting a
* mock that throws, without needing a Mockito spy on the importer itself.
*
* <p>Production uses {@link DefaultFileStreamOpener}, a one-line delegate to
* {@code new FileInputStream(file)}.
*/
@FunctionalInterface
public interface FileStreamOpener {
/** Opens {@code file} for sequential reads. Caller closes the returned stream. */
InputStream open(File file) throws IOException;
/** Default production implementation: plain {@code FileInputStream}. */
@Component
final class DefaultFileStreamOpener implements FileStreamOpener {
@Override
public InputStream open(File file) throws IOException {
return new FileInputStream(file);
}
}
}

View File

@@ -49,12 +49,18 @@ class DocumentImporterTest {
@Mock TagService tagService;
@Mock S3Client s3Client;
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
@Mock FileStreamOpener fileStreamOpener;
DocumentImporter importer;
@BeforeEach
void setUp() {
importer = new DocumentImporter(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
void setUp() throws java.io.IOException {
// Default opener delegates to FileInputStream — tests that need to force an IOException
// override this stub locally (load_skipsFile_whenMagicByteCheckThrowsIoException).
lenient().when(fileStreamOpener.open(any(File.class)))
.thenAnswer(inv -> new java.io.FileInputStream(inv.getArgument(0, File.class)));
importer = new DocumentImporter(documentService, personService, tagService, s3Client,
thumbnailAsyncRunner, fileStreamOpener);
ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
}
@@ -305,11 +311,11 @@ class DocumentImporterTest {
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", ""));
DocumentImporter spyImporter = org.mockito.Mockito.spy(importer);
org.mockito.Mockito.doThrow(new java.io.IOException("read error"))
.when(spyImporter).openFileStream(any(File.class));
// FileStreamOpener is injected — stub it to throw, no spy on the importer needed.
org.mockito.Mockito.when(fileStreamOpener.open(any(File.class)))
.thenThrow(new java.io.IOException("read error"));
DocumentImporter.LoadResult result = spyImporter.load(xlsx.toFile());
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
assertThat(result.skippedFiles())
.extracting(ImportStatus.SkippedFile::reason)