2026-06-06 11:38:47 +02:00
5 changed files with 312 additions and 15 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java
@@ -20,7 +20,14 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
    }
-    Optional<Tag> findByNameIgnoreCase(String name);
+    // Tag-name resolution (see TagService.findOrCreate). Names that collide case-insensitively across
    // the canonical tree are VALID — a parent and its same-named lowercase child (e.g. "Geburt" /
    // "Geburt/geburt") are distinct nodes with their own source_ref and document attachments. So
    // resolution must be exact-case first, then a non-throwing list for the case-insensitive fallback.
    // Do NOT add a unique(lower(name)) constraint — it would reject these legitimate rows. See #730.
    Optional<Tag> findByName(String name);
    List<Tag> findAllByNameIgnoreCase(String name);
    // Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
    Optional<Tag> findBySourceRef(String sourceRef);
--- a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java
@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.tag;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -55,10 +56,21 @@ public class TagService {
        return tagRepository.findBySourceRef(sourceRef);
    }
    /**
     * Resolves a tag name to a single tag, creating one when absent. Never throws on case-insensitive
     * collisions: names that differ only by case are valid distinct nodes in the canonical tree (a
     * parent and its same-named lowercase child), so resolution prefers an exact-case match, then
     * falls back to the lowest-id case-insensitive match, then creates. See #730.
     */
    public Tag findOrCreate(String name) {
        String cleanName = name.trim();
-        return tagRepository.findByNameIgnoreCase(cleanName)
+        Optional<Tag> exact = tagRepository.findByName(cleanName);
-                .orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
+        if (exact.isPresent()) return exact.get();                          // exact-case wins (edit round-trip replays the stored name)
        List<Tag> caseInsensitive = tagRepository.findAllByNameIgnoreCase(cleanName);
        if (!caseInsensitive.isEmpty()) {
            return caseInsensitive.stream().min(Comparator.comparing(Tag::getId)).orElseThrow(); // deterministic tie-break by id — list is non-empty, never throws
        }
        return tagRepository.save(Tag.builder().name(cleanName).build());   // create-when-absent (orphan tag: null sourceRef/parentId)
    }
    /**
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/TagCaseCollisionIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/TagCaseCollisionIntegrationTest.java
@@ -0,0 +1,123 @@
 package org.raddatz.familienarchiv.document;
 import org.junit.jupiter.api.Test;
 import org.raddatz.familienarchiv.PostgresContainerConfig;
 import org.raddatz.familienarchiv.tag.Tag;
 import org.raddatz.familienarchiv.tag.TagRepository;
 import org.raddatz.familienarchiv.tag.TagService;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Import;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.bean.override.mockito.MockitoBean;
 import org.springframework.transaction.annotation.Transactional;
 import software.amazon.awssdk.services.s3.S3Client;
 import java.time.LocalDate;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
 /**
 * #730 — tag-name resolution against a real Postgres. A mocked repo can't prove the two things that
 * actually break: that {@code findAllByNameIgnoreCase} folds case the way Postgres {@code LOWER()}
 * does (critical for umlauts like {@code ü}), and that saving a document tagged with a case-colliding
 * tag no longer throws {@code NonUniqueResultException}. H2 folds case differently, so this pins the
 * behaviour on {@code postgres:16-alpine}. The four-branch resolution logic itself is covered faster
 * by the mocked {@code TagServiceTest}.
 */
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
@Transactional
 class TagCaseCollisionIntegrationTest {
    @MockitoBean S3Client s3Client;
    @Autowired DocumentService documentService;
    @Autowired DocumentRepository documentRepository;
    @Autowired TagRepository tagRepository;
    @Autowired TagService tagService;
    private Tag persistTag(String name, String sourceRef, UUID parentId) {
        return tagRepository.save(Tag.builder().name(name).sourceRef(sourceRef).parentId(parentId).build());
    }
    private Document persistDocTaggedWith(Tag tag) {
        return documentRepository.save(Document.builder()
                .originalFilename("C-7301")
                .title("Weihnachtsbrief")
                .documentDate(LocalDate.of(1928, 1, 1))
                .metaDatePrecision(DatePrecision.YEAR)
                .status(DocumentStatus.UPLOADED)
                .tags(new HashSet<>(Set.of(tag)))
                .build());
    }
    @Test
    void updateDocument_succeedsAndKeepsExactChildTag_whenTaggedWithCaseCollidingChild() throws Exception {
        Tag parent = persistTag("Weihnachten", "Weihnachten", null);
        Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
        Document doc = persistDocTaggedWith(child);
        DocumentUpdateDTO dto = new DocumentUpdateDTO();
        dto.setTitle("Weihnachtsbrief");
        dto.setDocumentDate(LocalDate.of(1930, 1, 1)); // change the date — the field that 500'd on staging
        dto.setMetaDatePrecision(DatePrecision.YEAR);
        dto.setTags("weihnachten"); // the edit form round-trips the stored child name
        assertThatCode(() -> documentService.updateDocument(doc.getId(), dto, null, null))
                .doesNotThrowAnyException();
        Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
        assertThat(tags).hasSize(1);
        assertThat(tags.iterator().next().getId()).isEqualTo(child.getId()); // child kept, not the parent
    }
    @Test
    void findOrCreate_resolvesUmlautCollisionDeterministically_withoutThrow() {
        // The regression catcher: a plain-ASCII pair would stay green even if Postgres folded ü wrongly.
        Tag parent = persistTag("Glückwünsche", "Glückwünsche", null);
        Tag child = persistTag("glückwünsche", "Glückwünsche/glückwünsche", parent.getId());
        // Proof that real Postgres LOWER() folds the umlaut so both rows match case-insensitively.
        // Query with the UPPERCASE form findOrCreate actually passes — folding LOWER('GLÜCKWÜNSCHE')
        // against LOWER(name) is the exact step under test; a lowercase probe wouldn't exercise it.
        assertThat(tagRepository.findAllByNameIgnoreCase("GLÜCKWÜNSCHE")).hasSize(2);
        // No exact-case "GLÜCKWÜNSCHE" row exists → resolution falls through to the case-insensitive
        // branch with two candidates and must pick the lowest id deterministically, never throwing.
        UUID expected = List.of(parent, child).stream().min(Comparator.comparing(Tag::getId)).orElseThrow().getId();
        Tag first = tagService.findOrCreate("GLÜCKWÜNSCHE");
        Tag second = tagService.findOrCreate("GLÜCKWÜNSCHE");
        assertThat(first.getId()).isEqualTo(expected);
        assertThat(second.getId()).isEqualTo(first.getId());
    }
    @Test
    void bulkEdit_resolvesCaseCollidingTagThroughFindOrCreate_withoutThrow() {
        // Bulk-edit shares resolveTags → findOrCreate; this guards a future refactor that bypasses it.
        Tag parent = persistTag("Weihnachten", "Weihnachten", null);
        Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
        Document doc = documentRepository.save(Document.builder()
                .originalFilename("C-7302")
                .title("Brief")
                .status(DocumentStatus.UPLOADED)
                .build());
        DocumentBulkEditDTO dto = new DocumentBulkEditDTO();
        dto.setTagNames(List.of("weihnachten"));
        assertThatCode(() -> documentService.applyBulkEditToDocument(doc.getId(), dto, null))
                .doesNotThrowAnyException();
        Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
        assertThat(tags).hasSize(1);
        assertThat(tags.iterator().next().getId()).isEqualTo(child.getId());
    }
 }
--- a/backend/src/test/java/org/raddatz/familienarchiv/tag/TagServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/tag/TagServiceTest.java
@@ -53,20 +53,68 @@ class TagServiceTest {
    // ─── findOrCreate ─────────────────────────────────────────────────────────
    @Test
-    void findOrCreate_returnsExisting_whenNameFound() {
+    void findOrCreate_exactCaseWins_overCaseInsensitiveSibling() {
-        Tag existing = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
+        // "Geburt" (parent) and "geburt" (child) both exist; the edit round-trip replays the stored
-        when(tagRepository.findByNameIgnoreCase("Familie")).thenReturn(Optional.of(existing));
+        // name "geburt", which must bind to the exact-case row, not the parent.
        Tag exact = Tag.builder().id(UUID.randomUUID()).name("geburt").build();
        when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exact));
-        Tag result = tagService.findOrCreate("Familie");
+        Tag result = tagService.findOrCreate("geburt");
-        assertThat(result).isEqualTo(existing);
+        assertThat(result).isEqualTo(exact);
        verify(tagRepository, never()).save(any());
    }
    @Test
-    void findOrCreate_createsNew_whenNameNotFound() {
+    void findOrCreate_exactCaseWins_evenWhenItsIdIsNotTheLowest() {
        // Adversarial guard: exact-case must short-circuit BEFORE the lowest-id rule. Here the exact row
        // has the higher id, so a naive "always pick lowest id across all CI matches" would pick wrong.
        Tag exactHigherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000009")).name("geburt").build();
        when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exactHigherId));
        Tag result = tagService.findOrCreate("geburt");
        assertThat(result).isEqualTo(exactHigherId);
        verify(tagRepository, never()).findAllByNameIgnoreCase(any()); // exact-case wins without consulting the CI list
        verify(tagRepository, never()).save(any());
    }
    @Test
    void findOrCreate_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
        // Stored name is "Weihnachten"; a save replays "weihnachten" (no exact-case row) → bind to the
        // single case-insensitive match rather than creating a duplicate.
        Tag stored = Tag.builder().id(UUID.randomUUID()).name("Weihnachten").build();
        when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
        when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(stored));
        Tag result = tagService.findOrCreate("weihnachten");
        assertThat(result).isEqualTo(stored);
        verify(tagRepository, never()).save(any());
    }
    @Test
    void findOrCreate_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() {
        // Two rows collide case-insensitively and neither equals the query exactly. Resolution must be
        // deterministic (lowest id) and never throw — proven by calling twice and getting the same id.
        Tag lowerId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).name("Reisepläne").build();
        Tag higherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).name("reisepläne").build();
        when(tagRepository.findByName("REISEPLÄNE")).thenReturn(Optional.empty());
        when(tagRepository.findAllByNameIgnoreCase("REISEPLÄNE")).thenReturn(List.of(higherId, lowerId));
        Tag first = tagService.findOrCreate("REISEPLÄNE");
        Tag second = tagService.findOrCreate("REISEPLÄNE");
        assertThat(first.getId()).isEqualTo(lowerId.getId());
        assertThat(second.getId()).isEqualTo(first.getId());
        verify(tagRepository, never()).save(any());
    }
    @Test
    void findOrCreate_createsOrphanTag_whenNameAbsent() {
        Tag saved = Tag.builder().id(UUID.randomUUID()).name("Krieg").build();
-        when(tagRepository.findByNameIgnoreCase("Krieg")).thenReturn(Optional.empty());
+        when(tagRepository.findByName("Krieg")).thenReturn(Optional.empty());
        when(tagRepository.findAllByNameIgnoreCase("Krieg")).thenReturn(List.of());
        when(tagRepository.save(any())).thenReturn(saved);
        Tag result = tagService.findOrCreate("Krieg");
@@ -76,13 +124,15 @@ class TagServiceTest {
    }
    @Test
-    void findOrCreate_trimsWhitespaceBeforeLookup() {
+    void findOrCreate_trimsWhitespace_thenLandsOnCaseInsensitiveChild() {
-        Tag existing = Tag.builder().id(UUID.randomUUID()).name("Urlaub").build();
+        Tag child = Tag.builder().id(UUID.randomUUID()).name("weihnachten").build();
-        when(tagRepository.findByNameIgnoreCase("Urlaub")).thenReturn(Optional.of(existing));
+        when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
        when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(child));
-        tagService.findOrCreate("  Urlaub  ");
+        Tag result = tagService.findOrCreate("  weihnachten  ");
-        verify(tagRepository).findByNameIgnoreCase("Urlaub");
+        assertThat(result).isEqualTo(child);
        verify(tagRepository).findByName("weihnachten");
    }
    // ─── update ───────────────────────────────────────────────────────────────
--- a/docs/adr/032-tag-name-resolution-tolerates-case-collisions.md
+++ b/docs/adr/032-tag-name-resolution-tolerates-case-collisions.md
@@ -0,0 +1,105 @@
 # ADR-032 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint
 **Date:** 2026-06-06
 **Status:** Accepted
 **Issue:** #730 (document with a case-colliding tag cannot be saved — `findByNameIgnoreCase` `NonUniqueResultException`)
 **Milestone:** —
 ---
 ## Context
 `TagService.findOrCreate(name)` is the single point that turns a tag **name** into a `Tag`
 row. The document edit form, bulk-edit, and the upload batch all round-trip tag **names**
 (the edit form sends `tags.map(t => t.name).join(',')`) and re-resolve them on **every**
 save through `resolveTags → findOrCreate`. The old implementation resolved with
 `tagRepository.findByNameIgnoreCase(name)`, a derived query returning `Optional<Tag>`.
 That signature encodes an invariant the data does **not** hold: that a name is globally
 unique case-insensitively. The canonical tag tree legitimately contains names that differ
 only by case — a parent container and its same-named lowercase **child** (`Geburt` /
 `Geburt/geburt`, `Weihnachten` / `Weihnachten/weihnachten`, …), or two siblings
 (`Reise/Reisepläne` / `Reise/reisepläne`). Each is a distinct node with its own
 `source_ref` (the stable identity, per ADR-025) and its own document attachments — **not** an
 accidental duplicate. When two rows matched case-insensitively, Hibernate threw
 `NonUniqueResultException` → `IncorrectResultSizeDataAccessException` → a generic HTTP 500.
 The effect was severe and opaque: every document carrying one of ~10 colliding tags (≈180
 document-tag attachments on staging) became **un-editable** — any field change failed on save
 because the whole tag set is re-resolved — and the user saw only "an unexpected error", with
 no hint that a tag was the cause.
 This is a **lookup** problem, not a data problem: the collisions are valid canonical nodes
 and must be preserved.
 ## Decision
 ### 1. Resolution is exact-case first, then a non-throwing deterministic fallback
 `findOrCreate` resolves in three ordered steps and never throws on a collision:
 1. `findByName(cleanName)` — **exact-case** derived query. If present, return it. The edit
   round-trip replays the stored name verbatim, so the exact-case row is the right binding
   (typing the bare child name `weihnachten` binds to the child; `Weihnachten` binds to the
   parent container).
 2. else `findAllByNameIgnoreCase(cleanName)` — the **plural** case-insensitive list. If
   non-empty, return the element with the **lowest `id`** (`min(comparing(Tag::getId))`).
 3. else create the tag (an orphan: null `sourceRef`/`parentId`).
 The two repository methods are deliberately **two distinctly-named methods** — Spring Data
 cannot disambiguate an `Optional<Tag>` from a `List<Tag>` derived query by return type alone.
 The throwing `Optional<Tag> findByNameIgnoreCase` is **deleted** so the non-unique-throwing
 call cannot be reintroduced; `findOrCreate` was its only production caller.
 ### 2. The tie-break is `id`, and it is load-bearing
 `id` is a stable, always-present, unique column, so "lowest id" is a total, deterministic
 order over the candidates: the same name resolves to the same row on every call, forever,
 without throwing. This matters only in the free-text authoring path (step 2), where no
 exact-case row exists yet two case-folding rows do.
 ### 3. No `unique(lower(name))` constraint — and a load-bearing comment says so
 A global case-insensitive uniqueness constraint is **wrong**: it would reject the legitimate
 parent/child canonical nodes. It would also **fail to apply** against the existing rows,
 turning a code-only deploy into a failed Flyway migration that blocks startup. A comment at
 both `findOrCreate` and the repository methods records this so the constraint is not "helpfully"
 added later.
 ## Consequences
 - **Code-only, zero migration, fully reversible** (roll back the JAR). No tag data is touched;
  the colliding rows stay exactly as the canonical importer produced them.
 - One change fixes all three write paths — single-document edit, bulk-edit, and upload batch —
  because they all funnel through `resolveTags → findOrCreate`, which stays the single source
  of truth (resolution logic is **not** hoisted into `DocumentService`).
 - **Free-text tag semantics under collision are accepted as-is** (issue #730, option A): the
  bare word `weihnachten` binds to the deep child and `Weihnachten` to the parent container.
  Correct for the edit round-trip and acceptable for authoring; making the typeahead show the
  tree path so an author can tell a container from its same-named child is a separate
  follow-up.
 - The wire response stays opaque: after the fix this path no longer throws
  `IncorrectResultSizeDataAccessException`, and `GlobalExceptionHandler`'s generic handler maps
  any stray one to `INTERNAL_ERROR` with no Hibernate/SQL leak — so no dedicated handler was
  added.
 - **The sibling Person path is unfixed but tracked.** `PersonService.findOrCreateByAlias`
  (`findByAliasIgnoreCase`) and `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` carry the same
  latent `Optional`-non-unique throw on user-influenced names; deferred to #731 rather than
  widened into this fix.
 - Postgres `LOWER()` folding of umlauts (`ü`/`ä`) is the actual correctness hinge of the
  fallback and cannot be proven by a mocked repo, so it is pinned by a Testcontainers
  `postgres:16-alpine` test on a `Glückwünsche`/`glückwünsche` pair; a plain-ASCII test would
  stay green while the bug reappeared for umlaut tags.
 ## Alternatives considered
 - **A `unique(lower(name))` index** — rejected: the collisions are valid canonical nodes, and
  the migration would fail against the existing data and block startup.
 - **Merging/deduping the colliding tags** — rejected: each has a distinct `source_ref`, tree
  position, and real document attachments; they are not duplicates.
 - **Round-tripping tag IDs instead of names** so resolution can't be ambiguous at all — the
  cleaner long-term shape (removes the name-as-key smell), but a larger change with frontend
  surface; deferred to #732. The lookup fix here is the minimal correct unblock.
 - **Hoisting resolution into `DocumentService.resolveTags`** — rejected: it would duplicate the
  rule across the edit, bulk-edit, and import paths and let them drift; `findOrCreate` stays
  the one owner.