diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java index 108b4bc8..69b1eb03 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java @@ -20,7 +20,14 @@ public interface TagRepository extends JpaRepository { } - Optional findByNameIgnoreCase(String name); + // Tag-name resolution (see TagService.findOrCreate). Names that collide case-insensitively across + // the canonical tree are VALID — a parent and its same-named lowercase child (e.g. "Geburt" / + // "Geburt/geburt") are distinct nodes with their own source_ref and document attachments. So + // resolution must be exact-case first, then a non-throwing list for the case-insensitive fallback. + // Do NOT add a unique(lower(name)) constraint — it would reject these legitimate rows. See #730. + Optional findByName(String name); + + List findAllByNameIgnoreCase(String name); // Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3). Optional findBySourceRef(String sourceRef); diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java index 1dac8610..0a077a2d 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java @@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.tag; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -55,10 +56,21 @@ public class TagService { return tagRepository.findBySourceRef(sourceRef); } + /** + * Resolves a tag name to a single tag, creating one when absent. Never throws on case-insensitive + * collisions: names that differ only by case are valid distinct nodes in the canonical tree (a + * parent and its same-named lowercase child), so resolution prefers an exact-case match, then + * falls back to the lowest-id case-insensitive match, then creates. See #730. + */ public Tag findOrCreate(String name) { String cleanName = name.trim(); - return tagRepository.findByNameIgnoreCase(cleanName) - .orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build())); + Optional exact = tagRepository.findByName(cleanName); + if (exact.isPresent()) return exact.get(); // exact-case wins (edit round-trip replays the stored name) + List caseInsensitive = tagRepository.findAllByNameIgnoreCase(cleanName); + if (!caseInsensitive.isEmpty()) { + return caseInsensitive.stream().min(Comparator.comparing(Tag::getId)).orElseThrow(); // deterministic tie-break by id — list is non-empty, never throws + } + return tagRepository.save(Tag.builder().name(cleanName).build()); // create-when-absent (orphan tag: null sourceRef/parentId) } /** diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/TagCaseCollisionIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/TagCaseCollisionIntegrationTest.java new file mode 100644 index 00000000..ebf18b10 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/TagCaseCollisionIntegrationTest.java @@ -0,0 +1,123 @@ +package org.raddatz.familienarchiv.document; + +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.raddatz.familienarchiv.tag.Tag; +import org.raddatz.familienarchiv.tag.TagRepository; +import org.raddatz.familienarchiv.tag.TagService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.bean.override.mockito.MockitoBean; +import org.springframework.transaction.annotation.Transactional; +import software.amazon.awssdk.services.s3.S3Client; + +import java.time.LocalDate; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; + +/** + * #730 — tag-name resolution against a real Postgres. A mocked repo can't prove the two things that + * actually break: that {@code findAllByNameIgnoreCase} folds case the way Postgres {@code LOWER()} + * does (critical for umlauts like {@code ü}), and that saving a document tagged with a case-colliding + * tag no longer throws {@code NonUniqueResultException}. H2 folds case differently, so this pins the + * behaviour on {@code postgres:16-alpine}. The four-branch resolution logic itself is covered faster + * by the mocked {@code TagServiceTest}. + */ +@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE) +@ActiveProfiles("test") +@Import(PostgresContainerConfig.class) +@Transactional +class TagCaseCollisionIntegrationTest { + + @MockitoBean S3Client s3Client; + @Autowired DocumentService documentService; + @Autowired DocumentRepository documentRepository; + @Autowired TagRepository tagRepository; + @Autowired TagService tagService; + + private Tag persistTag(String name, String sourceRef, UUID parentId) { + return tagRepository.save(Tag.builder().name(name).sourceRef(sourceRef).parentId(parentId).build()); + } + + private Document persistDocTaggedWith(Tag tag) { + return documentRepository.save(Document.builder() + .originalFilename("C-7301") + .title("Weihnachtsbrief") + .documentDate(LocalDate.of(1928, 1, 1)) + .metaDatePrecision(DatePrecision.YEAR) + .status(DocumentStatus.UPLOADED) + .tags(new HashSet<>(Set.of(tag))) + .build()); + } + + @Test + void updateDocument_succeedsAndKeepsExactChildTag_whenTaggedWithCaseCollidingChild() throws Exception { + Tag parent = persistTag("Weihnachten", "Weihnachten", null); + Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId()); + Document doc = persistDocTaggedWith(child); + + DocumentUpdateDTO dto = new DocumentUpdateDTO(); + dto.setTitle("Weihnachtsbrief"); + dto.setDocumentDate(LocalDate.of(1930, 1, 1)); // change the date — the field that 500'd on staging + dto.setMetaDatePrecision(DatePrecision.YEAR); + dto.setTags("weihnachten"); // the edit form round-trips the stored child name + + assertThatCode(() -> documentService.updateDocument(doc.getId(), dto, null, null)) + .doesNotThrowAnyException(); + + Set tags = documentRepository.findById(doc.getId()).orElseThrow().getTags(); + assertThat(tags).hasSize(1); + assertThat(tags.iterator().next().getId()).isEqualTo(child.getId()); // child kept, not the parent + } + + @Test + void findOrCreate_resolvesUmlautCollisionDeterministically_withoutThrow() { + // The regression catcher: a plain-ASCII pair would stay green even if Postgres folded ü wrongly. + Tag parent = persistTag("Glückwünsche", "Glückwünsche", null); + Tag child = persistTag("glückwünsche", "Glückwünsche/glückwünsche", parent.getId()); + + // Proof that real Postgres LOWER() folds the umlaut so both rows match case-insensitively. + // Query with the UPPERCASE form findOrCreate actually passes — folding LOWER('GLÜCKWÜNSCHE') + // against LOWER(name) is the exact step under test; a lowercase probe wouldn't exercise it. + assertThat(tagRepository.findAllByNameIgnoreCase("GLÜCKWÜNSCHE")).hasSize(2); + + // No exact-case "GLÜCKWÜNSCHE" row exists → resolution falls through to the case-insensitive + // branch with two candidates and must pick the lowest id deterministically, never throwing. + UUID expected = List.of(parent, child).stream().min(Comparator.comparing(Tag::getId)).orElseThrow().getId(); + Tag first = tagService.findOrCreate("GLÜCKWÜNSCHE"); + Tag second = tagService.findOrCreate("GLÜCKWÜNSCHE"); + + assertThat(first.getId()).isEqualTo(expected); + assertThat(second.getId()).isEqualTo(first.getId()); + } + + @Test + void bulkEdit_resolvesCaseCollidingTagThroughFindOrCreate_withoutThrow() { + // Bulk-edit shares resolveTags → findOrCreate; this guards a future refactor that bypasses it. + Tag parent = persistTag("Weihnachten", "Weihnachten", null); + Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId()); + Document doc = documentRepository.save(Document.builder() + .originalFilename("C-7302") + .title("Brief") + .status(DocumentStatus.UPLOADED) + .build()); + + DocumentBulkEditDTO dto = new DocumentBulkEditDTO(); + dto.setTagNames(List.of("weihnachten")); + + assertThatCode(() -> documentService.applyBulkEditToDocument(doc.getId(), dto, null)) + .doesNotThrowAnyException(); + + Set tags = documentRepository.findById(doc.getId()).orElseThrow().getTags(); + assertThat(tags).hasSize(1); + assertThat(tags.iterator().next().getId()).isEqualTo(child.getId()); + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/tag/TagServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/tag/TagServiceTest.java index 3cb2762f..d0373f7b 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/tag/TagServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/tag/TagServiceTest.java @@ -53,20 +53,68 @@ class TagServiceTest { // ─── findOrCreate ───────────────────────────────────────────────────────── @Test - void findOrCreate_returnsExisting_whenNameFound() { - Tag existing = Tag.builder().id(UUID.randomUUID()).name("Familie").build(); - when(tagRepository.findByNameIgnoreCase("Familie")).thenReturn(Optional.of(existing)); + void findOrCreate_exactCaseWins_overCaseInsensitiveSibling() { + // "Geburt" (parent) and "geburt" (child) both exist; the edit round-trip replays the stored + // name "geburt", which must bind to the exact-case row, not the parent. + Tag exact = Tag.builder().id(UUID.randomUUID()).name("geburt").build(); + when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exact)); - Tag result = tagService.findOrCreate("Familie"); + Tag result = tagService.findOrCreate("geburt"); - assertThat(result).isEqualTo(existing); + assertThat(result).isEqualTo(exact); verify(tagRepository, never()).save(any()); } @Test - void findOrCreate_createsNew_whenNameNotFound() { + void findOrCreate_exactCaseWins_evenWhenItsIdIsNotTheLowest() { + // Adversarial guard: exact-case must short-circuit BEFORE the lowest-id rule. Here the exact row + // has the higher id, so a naive "always pick lowest id across all CI matches" would pick wrong. + Tag exactHigherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000009")).name("geburt").build(); + when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exactHigherId)); + + Tag result = tagService.findOrCreate("geburt"); + + assertThat(result).isEqualTo(exactHigherId); + verify(tagRepository, never()).findAllByNameIgnoreCase(any()); // exact-case wins without consulting the CI list + verify(tagRepository, never()).save(any()); + } + + @Test + void findOrCreate_usesSingleCaseInsensitiveMatch_whenNoExactCase() { + // Stored name is "Weihnachten"; a save replays "weihnachten" (no exact-case row) → bind to the + // single case-insensitive match rather than creating a duplicate. + Tag stored = Tag.builder().id(UUID.randomUUID()).name("Weihnachten").build(); + when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty()); + when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(stored)); + + Tag result = tagService.findOrCreate("weihnachten"); + + assertThat(result).isEqualTo(stored); + verify(tagRepository, never()).save(any()); + } + + @Test + void findOrCreate_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() { + // Two rows collide case-insensitively and neither equals the query exactly. Resolution must be + // deterministic (lowest id) and never throw — proven by calling twice and getting the same id. + Tag lowerId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).name("Reisepläne").build(); + Tag higherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).name("reisepläne").build(); + when(tagRepository.findByName("REISEPLÄNE")).thenReturn(Optional.empty()); + when(tagRepository.findAllByNameIgnoreCase("REISEPLÄNE")).thenReturn(List.of(higherId, lowerId)); + + Tag first = tagService.findOrCreate("REISEPLÄNE"); + Tag second = tagService.findOrCreate("REISEPLÄNE"); + + assertThat(first.getId()).isEqualTo(lowerId.getId()); + assertThat(second.getId()).isEqualTo(first.getId()); + verify(tagRepository, never()).save(any()); + } + + @Test + void findOrCreate_createsOrphanTag_whenNameAbsent() { Tag saved = Tag.builder().id(UUID.randomUUID()).name("Krieg").build(); - when(tagRepository.findByNameIgnoreCase("Krieg")).thenReturn(Optional.empty()); + when(tagRepository.findByName("Krieg")).thenReturn(Optional.empty()); + when(tagRepository.findAllByNameIgnoreCase("Krieg")).thenReturn(List.of()); when(tagRepository.save(any())).thenReturn(saved); Tag result = tagService.findOrCreate("Krieg"); @@ -76,13 +124,15 @@ class TagServiceTest { } @Test - void findOrCreate_trimsWhitespaceBeforeLookup() { - Tag existing = Tag.builder().id(UUID.randomUUID()).name("Urlaub").build(); - when(tagRepository.findByNameIgnoreCase("Urlaub")).thenReturn(Optional.of(existing)); + void findOrCreate_trimsWhitespace_thenLandsOnCaseInsensitiveChild() { + Tag child = Tag.builder().id(UUID.randomUUID()).name("weihnachten").build(); + when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty()); + when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(child)); - tagService.findOrCreate(" Urlaub "); + Tag result = tagService.findOrCreate(" weihnachten "); - verify(tagRepository).findByNameIgnoreCase("Urlaub"); + assertThat(result).isEqualTo(child); + verify(tagRepository).findByName("weihnachten"); } // ─── update ─────────────────────────────────────────────────────────────── diff --git a/docs/adr/032-tag-name-resolution-tolerates-case-collisions.md b/docs/adr/032-tag-name-resolution-tolerates-case-collisions.md new file mode 100644 index 00000000..9a1fe61f --- /dev/null +++ b/docs/adr/032-tag-name-resolution-tolerates-case-collisions.md @@ -0,0 +1,105 @@ +# ADR-032 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint + +**Date:** 2026-06-06 +**Status:** Accepted +**Issue:** #730 (document with a case-colliding tag cannot be saved — `findByNameIgnoreCase` `NonUniqueResultException`) +**Milestone:** — + +--- + +## Context + +`TagService.findOrCreate(name)` is the single point that turns a tag **name** into a `Tag` +row. The document edit form, bulk-edit, and the upload batch all round-trip tag **names** +(the edit form sends `tags.map(t => t.name).join(',')`) and re-resolve them on **every** +save through `resolveTags → findOrCreate`. The old implementation resolved with +`tagRepository.findByNameIgnoreCase(name)`, a derived query returning `Optional`. + +That signature encodes an invariant the data does **not** hold: that a name is globally +unique case-insensitively. The canonical tag tree legitimately contains names that differ +only by case — a parent container and its same-named lowercase **child** (`Geburt` / +`Geburt/geburt`, `Weihnachten` / `Weihnachten/weihnachten`, …), or two siblings +(`Reise/Reisepläne` / `Reise/reisepläne`). Each is a distinct node with its own +`source_ref` (the stable identity, per ADR-025) and its own document attachments — **not** an +accidental duplicate. When two rows matched case-insensitively, Hibernate threw +`NonUniqueResultException` → `IncorrectResultSizeDataAccessException` → a generic HTTP 500. + +The effect was severe and opaque: every document carrying one of ~10 colliding tags (≈180 +document-tag attachments on staging) became **un-editable** — any field change failed on save +because the whole tag set is re-resolved — and the user saw only "an unexpected error", with +no hint that a tag was the cause. + +This is a **lookup** problem, not a data problem: the collisions are valid canonical nodes +and must be preserved. + +## Decision + +### 1. Resolution is exact-case first, then a non-throwing deterministic fallback + +`findOrCreate` resolves in three ordered steps and never throws on a collision: + +1. `findByName(cleanName)` — **exact-case** derived query. If present, return it. The edit + round-trip replays the stored name verbatim, so the exact-case row is the right binding + (typing the bare child name `weihnachten` binds to the child; `Weihnachten` binds to the + parent container). +2. else `findAllByNameIgnoreCase(cleanName)` — the **plural** case-insensitive list. If + non-empty, return the element with the **lowest `id`** (`min(comparing(Tag::getId))`). +3. else create the tag (an orphan: null `sourceRef`/`parentId`). + +The two repository methods are deliberately **two distinctly-named methods** — Spring Data +cannot disambiguate an `Optional` from a `List` derived query by return type alone. +The throwing `Optional findByNameIgnoreCase` is **deleted** so the non-unique-throwing +call cannot be reintroduced; `findOrCreate` was its only production caller. + +### 2. The tie-break is `id`, and it is load-bearing + +`id` is a stable, always-present, unique column, so "lowest id" is a total, deterministic +order over the candidates: the same name resolves to the same row on every call, forever, +without throwing. This matters only in the free-text authoring path (step 2), where no +exact-case row exists yet two case-folding rows do. + +### 3. No `unique(lower(name))` constraint — and a load-bearing comment says so + +A global case-insensitive uniqueness constraint is **wrong**: it would reject the legitimate +parent/child canonical nodes. It would also **fail to apply** against the existing rows, +turning a code-only deploy into a failed Flyway migration that blocks startup. A comment at +both `findOrCreate` and the repository methods records this so the constraint is not "helpfully" +added later. + +## Consequences + +- **Code-only, zero migration, fully reversible** (roll back the JAR). No tag data is touched; + the colliding rows stay exactly as the canonical importer produced them. +- One change fixes all three write paths — single-document edit, bulk-edit, and upload batch — + because they all funnel through `resolveTags → findOrCreate`, which stays the single source + of truth (resolution logic is **not** hoisted into `DocumentService`). +- **Free-text tag semantics under collision are accepted as-is** (issue #730, option A): the + bare word `weihnachten` binds to the deep child and `Weihnachten` to the parent container. + Correct for the edit round-trip and acceptable for authoring; making the typeahead show the + tree path so an author can tell a container from its same-named child is a separate + follow-up. +- The wire response stays opaque: after the fix this path no longer throws + `IncorrectResultSizeDataAccessException`, and `GlobalExceptionHandler`'s generic handler maps + any stray one to `INTERNAL_ERROR` with no Hibernate/SQL leak — so no dedicated handler was + added. +- **The sibling Person path is unfixed but tracked.** `PersonService.findOrCreateByAlias` + (`findByAliasIgnoreCase`) and `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` carry the same + latent `Optional`-non-unique throw on user-influenced names; deferred to #731 rather than + widened into this fix. +- Postgres `LOWER()` folding of umlauts (`ü`/`ä`) is the actual correctness hinge of the + fallback and cannot be proven by a mocked repo, so it is pinned by a Testcontainers + `postgres:16-alpine` test on a `Glückwünsche`/`glückwünsche` pair; a plain-ASCII test would + stay green while the bug reappeared for umlaut tags. + +## Alternatives considered + +- **A `unique(lower(name))` index** — rejected: the collisions are valid canonical nodes, and + the migration would fail against the existing data and block startup. +- **Merging/deduping the colliding tags** — rejected: each has a distinct `source_ref`, tree + position, and real document attachments; they are not duplicates. +- **Round-tripping tag IDs instead of names** so resolution can't be ambiguous at all — the + cleaner long-term shape (removes the name-as-key smell), but a larger change with frontend + surface; deferred to #732. The lookup fix here is the minimal correct unblock. +- **Hoisting resolution into `DocumentService.resolveTags`** — rejected: it would duplicate the + rule across the edit, bulk-edit, and import paths and let them drift; `findOrCreate` stays + the one owner.