diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java index c6d78df3..f136b0e0 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java @@ -24,6 +24,7 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import java.io.File; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; @@ -126,6 +127,36 @@ class DocumentImporterTest { assertThat(validIndex("W-0001.pdf")).isFalse(); } + // ─── catalog-shape rejects — pass the char pre-checks but must fail INDEX_PATTERN ──── + // These pin the regex branch itself: each string contains no separator, dot, slash + // homoglyph, null byte, or absolute marker, so it sails past every char guard and is + // rejected *only* because INDEX_PATTERN.matches() returns false. A weaker pattern would + // let them through — these tests would then go red. + + @Test + void isValidImportIndex_returnsFalse_whenSpaceInIndex() { + // The real-world reject: "J 0070" is a space-typo with no PDF on disk. + assertThat(validIndex("J 0070")).isFalse(); + } + + @Test + void isValidImportIndex_returnsFalse_whenFiveLetterPrefix() { + // The catalog prefix is at most 4 letters; 5 must not match. + assertThat(validIndex("WXYZA-0001")).isFalse(); + } + + @Test + void isValidImportIndex_returnsFalse_whenNoLetterPrefix() { + // A digit-led id (no letter prefix) is not a catalog shape. + assertThat(validIndex("12-0001")).isFalse(); + } + + @Test + void isValidImportIndex_returnsFalse_whenUppercaseXSuffix() { + // Only a lowercase trailing "x" is allowed; an uppercase "X" suffix must fail. + assertThat(validIndex("W-0001X")).isFalse(); + } + @Test void isValidImportIndex_returnsTrue_whenPlainCatalogIndex() { assertThat(validIndex("W-0124")).isTrue(); @@ -222,7 +253,7 @@ class DocumentImporterTest { ReflectionTestUtils.setField(importer, "importDir", importDirPath.toString()); org.assertj.core.api.Assertions.assertThatThrownBy( - () -> ReflectionTestUtils.invokeMethod(importer, "resolvePdfByIndex", "W-0001")) + () -> ReflectionTestUtils.invokeMethod(importer, "resolvePdfByIndex", "W-0001", 2)) .isInstanceOf(org.raddatz.familienarchiv.exception.DomainException.class); } @@ -232,12 +263,24 @@ class DocumentImporterTest { Path expected = tempDir.resolve("Eu-0628.pdf"); Files.writeString(expected, "%PDF-1.4"); - Optional resolved = ReflectionTestUtils.invokeMethod(importer, "resolvePdfByIndex", "Eu-0628"); + Optional resolved = ReflectionTestUtils.invokeMethod(importer, "resolvePdfByIndex", "Eu-0628", 2); assertThat(resolved).isPresent(); assertThat(resolved.get().getCanonicalFile()).isEqualTo(expected.toFile().getCanonicalFile()); } + // NOTE (Sara, PR #687): the IOException branch of resolvePdfByIndex — where + // File.getCanonicalPath() itself throws (an OS-level failure mid-resolution, not the + // symlink-escape DomainException) — is intentionally NOT covered by a test. Unlike + // isPdfMagicBytes, which has the package-private openFileStream(File) seam a Mockito spy can + // make throw, getCanonicalPath() is called on a File built internally with no injection seam, + // and there is no portable, deterministic way to make it throw on a temp file (it does not + // throw for missing/symlinked paths — those are handled by isFile()/the containment check). + // Adding a seam purely to test this would be production code in service of a non-defect; the + // substantive fix is the log.warn() now emitted in that branch so the quiet skip surfaces in + // ops. Left uncovered by deliberate decision, documented here so the branch is not assumed + // tested. + // ─── PDF magic-byte guard — ported — do not remove ────────────────────────────── @Test @@ -544,7 +587,7 @@ class DocumentImporterTest { row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), "")); } } - try (java.io.OutputStream out = Files.newOutputStream(xlsx)) { + try (OutputStream out = Files.newOutputStream(xlsx)) { wb.write(out); } }