fix(tag): resolve case-colliding tag names without throwing (#730) #733

Merged
marcel merged 5 commits from fix/issue-730-tag-case-collision into main 2026-06-06 11:38:47 +02:00
5 changed files with 312 additions and 15 deletions

View File

@@ -20,7 +20,14 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
} }
Optional<Tag> findByNameIgnoreCase(String name); // Tag-name resolution (see TagService.findOrCreate). Names that collide case-insensitively across
// the canonical tree are VALID — a parent and its same-named lowercase child (e.g. "Geburt" /
// "Geburt/geburt") are distinct nodes with their own source_ref and document attachments. So
// resolution must be exact-case first, then a non-throwing list for the case-insensitive fallback.
// Do NOT add a unique(lower(name)) constraint — it would reject these legitimate rows. See #730.
Optional<Tag> findByName(String name);
List<Tag> findAllByNameIgnoreCase(String name);
// Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3). // Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
Optional<Tag> findBySourceRef(String sourceRef); Optional<Tag> findBySourceRef(String sourceRef);

View File

@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.tag;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
@@ -55,10 +56,21 @@ public class TagService {
return tagRepository.findBySourceRef(sourceRef); return tagRepository.findBySourceRef(sourceRef);
} }
/**
* Resolves a tag name to a single tag, creating one when absent. Never throws on case-insensitive
* collisions: names that differ only by case are valid distinct nodes in the canonical tree (a
* parent and its same-named lowercase child), so resolution prefers an exact-case match, then
* falls back to the lowest-id case-insensitive match, then creates. See #730.
*/
public Tag findOrCreate(String name) { public Tag findOrCreate(String name) {
String cleanName = name.trim(); String cleanName = name.trim();
return tagRepository.findByNameIgnoreCase(cleanName) Optional<Tag> exact = tagRepository.findByName(cleanName);
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build())); if (exact.isPresent()) return exact.get(); // exact-case wins (edit round-trip replays the stored name)
List<Tag> caseInsensitive = tagRepository.findAllByNameIgnoreCase(cleanName);
if (!caseInsensitive.isEmpty()) {
return caseInsensitive.stream().min(Comparator.comparing(Tag::getId)).orElseThrow(); // deterministic tie-break by id — list is non-empty, never throws
}
return tagRepository.save(Tag.builder().name(cleanName).build()); // create-when-absent (orphan tag: null sourceRef/parentId)
} }
/** /**

View File

@@ -0,0 +1,123 @@
package org.raddatz.familienarchiv.document;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.PostgresContainerConfig;
import org.raddatz.familienarchiv.tag.Tag;
import org.raddatz.familienarchiv.tag.TagRepository;
import org.raddatz.familienarchiv.tag.TagService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.transaction.annotation.Transactional;
import software.amazon.awssdk.services.s3.S3Client;
import java.time.LocalDate;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatCode;
/**
* #730 — tag-name resolution against a real Postgres. A mocked repo can't prove the two things that
* actually break: that {@code findAllByNameIgnoreCase} folds case the way Postgres {@code LOWER()}
* does (critical for umlauts like {@code ü}), and that saving a document tagged with a case-colliding
* tag no longer throws {@code NonUniqueResultException}. H2 folds case differently, so this pins the
* behaviour on {@code postgres:16-alpine}. The four-branch resolution logic itself is covered faster
* by the mocked {@code TagServiceTest}.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
@Transactional
class TagCaseCollisionIntegrationTest {
@MockitoBean S3Client s3Client;
@Autowired DocumentService documentService;
@Autowired DocumentRepository documentRepository;
@Autowired TagRepository tagRepository;
@Autowired TagService tagService;
private Tag persistTag(String name, String sourceRef, UUID parentId) {
return tagRepository.save(Tag.builder().name(name).sourceRef(sourceRef).parentId(parentId).build());
}
private Document persistDocTaggedWith(Tag tag) {
return documentRepository.save(Document.builder()
.originalFilename("C-7301")
.title("Weihnachtsbrief")
.documentDate(LocalDate.of(1928, 1, 1))
.metaDatePrecision(DatePrecision.YEAR)
.status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(tag)))
.build());
}
@Test
void updateDocument_succeedsAndKeepsExactChildTag_whenTaggedWithCaseCollidingChild() throws Exception {
Tag parent = persistTag("Weihnachten", "Weihnachten", null);
Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
Document doc = persistDocTaggedWith(child);
DocumentUpdateDTO dto = new DocumentUpdateDTO();
dto.setTitle("Weihnachtsbrief");
dto.setDocumentDate(LocalDate.of(1930, 1, 1)); // change the date — the field that 500'd on staging
dto.setMetaDatePrecision(DatePrecision.YEAR);
dto.setTags("weihnachten"); // the edit form round-trips the stored child name
assertThatCode(() -> documentService.updateDocument(doc.getId(), dto, null, null))
.doesNotThrowAnyException();
Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
assertThat(tags).hasSize(1);
assertThat(tags.iterator().next().getId()).isEqualTo(child.getId()); // child kept, not the parent
}
@Test
void findOrCreate_resolvesUmlautCollisionDeterministically_withoutThrow() {
// The regression catcher: a plain-ASCII pair would stay green even if Postgres folded ü wrongly.
Tag parent = persistTag("Glückwünsche", "Glückwünsche", null);
Tag child = persistTag("glückwünsche", "Glückwünsche/glückwünsche", parent.getId());
// Proof that real Postgres LOWER() folds the umlaut so both rows match case-insensitively.
// Query with the UPPERCASE form findOrCreate actually passes — folding LOWER('GLÜCKWÜNSCHE')
// against LOWER(name) is the exact step under test; a lowercase probe wouldn't exercise it.
assertThat(tagRepository.findAllByNameIgnoreCase("GLÜCKWÜNSCHE")).hasSize(2);
// No exact-case "GLÜCKWÜNSCHE" row exists → resolution falls through to the case-insensitive
// branch with two candidates and must pick the lowest id deterministically, never throwing.
UUID expected = List.of(parent, child).stream().min(Comparator.comparing(Tag::getId)).orElseThrow().getId();
Tag first = tagService.findOrCreate("GLÜCKWÜNSCHE");
Tag second = tagService.findOrCreate("GLÜCKWÜNSCHE");
assertThat(first.getId()).isEqualTo(expected);
assertThat(second.getId()).isEqualTo(first.getId());
}
@Test
void bulkEdit_resolvesCaseCollidingTagThroughFindOrCreate_withoutThrow() {
// Bulk-edit shares resolveTags → findOrCreate; this guards a future refactor that bypasses it.
Tag parent = persistTag("Weihnachten", "Weihnachten", null);
Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
Document doc = documentRepository.save(Document.builder()
.originalFilename("C-7302")
.title("Brief")
.status(DocumentStatus.UPLOADED)
.build());
DocumentBulkEditDTO dto = new DocumentBulkEditDTO();
dto.setTagNames(List.of("weihnachten"));
assertThatCode(() -> documentService.applyBulkEditToDocument(doc.getId(), dto, null))
.doesNotThrowAnyException();
Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
assertThat(tags).hasSize(1);
assertThat(tags.iterator().next().getId()).isEqualTo(child.getId());
}
}

View File

@@ -53,20 +53,68 @@ class TagServiceTest {
// ─── findOrCreate ───────────────────────────────────────────────────────── // ─── findOrCreate ─────────────────────────────────────────────────────────
@Test @Test
void findOrCreate_returnsExisting_whenNameFound() { void findOrCreate_exactCaseWins_overCaseInsensitiveSibling() {
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Familie").build(); // "Geburt" (parent) and "geburt" (child) both exist; the edit round-trip replays the stored
when(tagRepository.findByNameIgnoreCase("Familie")).thenReturn(Optional.of(existing)); // name "geburt", which must bind to the exact-case row, not the parent.
Tag exact = Tag.builder().id(UUID.randomUUID()).name("geburt").build();
when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exact));
Tag result = tagService.findOrCreate("Familie"); Tag result = tagService.findOrCreate("geburt");
assertThat(result).isEqualTo(existing); assertThat(result).isEqualTo(exact);
verify(tagRepository, never()).save(any()); verify(tagRepository, never()).save(any());
} }
@Test @Test
void findOrCreate_createsNew_whenNameNotFound() { void findOrCreate_exactCaseWins_evenWhenItsIdIsNotTheLowest() {
// Adversarial guard: exact-case must short-circuit BEFORE the lowest-id rule. Here the exact row
// has the higher id, so a naive "always pick lowest id across all CI matches" would pick wrong.
Tag exactHigherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000009")).name("geburt").build();
when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exactHigherId));
Tag result = tagService.findOrCreate("geburt");
assertThat(result).isEqualTo(exactHigherId);
verify(tagRepository, never()).findAllByNameIgnoreCase(any()); // exact-case wins without consulting the CI list
verify(tagRepository, never()).save(any());
}
@Test
void findOrCreate_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
// Stored name is "Weihnachten"; a save replays "weihnachten" (no exact-case row) → bind to the
// single case-insensitive match rather than creating a duplicate.
Tag stored = Tag.builder().id(UUID.randomUUID()).name("Weihnachten").build();
when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(stored));
Tag result = tagService.findOrCreate("weihnachten");
assertThat(result).isEqualTo(stored);
verify(tagRepository, never()).save(any());
}
@Test
void findOrCreate_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() {
// Two rows collide case-insensitively and neither equals the query exactly. Resolution must be
// deterministic (lowest id) and never throw — proven by calling twice and getting the same id.
Tag lowerId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).name("Reisepläne").build();
Tag higherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).name("reisepläne").build();
when(tagRepository.findByName("REISEPLÄNE")).thenReturn(Optional.empty());
when(tagRepository.findAllByNameIgnoreCase("REISEPLÄNE")).thenReturn(List.of(higherId, lowerId));
Tag first = tagService.findOrCreate("REISEPLÄNE");
Tag second = tagService.findOrCreate("REISEPLÄNE");
assertThat(first.getId()).isEqualTo(lowerId.getId());
assertThat(second.getId()).isEqualTo(first.getId());
verify(tagRepository, never()).save(any());
}
@Test
void findOrCreate_createsOrphanTag_whenNameAbsent() {
Tag saved = Tag.builder().id(UUID.randomUUID()).name("Krieg").build(); Tag saved = Tag.builder().id(UUID.randomUUID()).name("Krieg").build();
when(tagRepository.findByNameIgnoreCase("Krieg")).thenReturn(Optional.empty()); when(tagRepository.findByName("Krieg")).thenReturn(Optional.empty());
when(tagRepository.findAllByNameIgnoreCase("Krieg")).thenReturn(List.of());
when(tagRepository.save(any())).thenReturn(saved); when(tagRepository.save(any())).thenReturn(saved);
Tag result = tagService.findOrCreate("Krieg"); Tag result = tagService.findOrCreate("Krieg");
@@ -76,13 +124,15 @@ class TagServiceTest {
} }
@Test @Test
void findOrCreate_trimsWhitespaceBeforeLookup() { void findOrCreate_trimsWhitespace_thenLandsOnCaseInsensitiveChild() {
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Urlaub").build(); Tag child = Tag.builder().id(UUID.randomUUID()).name("weihnachten").build();
when(tagRepository.findByNameIgnoreCase("Urlaub")).thenReturn(Optional.of(existing)); when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(child));
tagService.findOrCreate(" Urlaub "); Tag result = tagService.findOrCreate(" weihnachten ");
verify(tagRepository).findByNameIgnoreCase("Urlaub"); assertThat(result).isEqualTo(child);
verify(tagRepository).findByName("weihnachten");
} }
// ─── update ─────────────────────────────────────────────────────────────── // ─── update ───────────────────────────────────────────────────────────────

View File

@@ -0,0 +1,105 @@
# ADR-032 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint
**Date:** 2026-06-06
**Status:** Accepted
**Issue:** #730 (document with a case-colliding tag cannot be saved — `findByNameIgnoreCase` `NonUniqueResultException`)
**Milestone:**
---
## Context
`TagService.findOrCreate(name)` is the single point that turns a tag **name** into a `Tag`
row. The document edit form, bulk-edit, and the upload batch all round-trip tag **names**
(the edit form sends `tags.map(t => t.name).join(',')`) and re-resolve them on **every**
save through `resolveTags → findOrCreate`. The old implementation resolved with
`tagRepository.findByNameIgnoreCase(name)`, a derived query returning `Optional<Tag>`.
That signature encodes an invariant the data does **not** hold: that a name is globally
unique case-insensitively. The canonical tag tree legitimately contains names that differ
only by case — a parent container and its same-named lowercase **child** (`Geburt` /
`Geburt/geburt`, `Weihnachten` / `Weihnachten/weihnachten`, …), or two siblings
(`Reise/Reisepläne` / `Reise/reisepläne`). Each is a distinct node with its own
`source_ref` (the stable identity, per ADR-025) and its own document attachments — **not** an
accidental duplicate. When two rows matched case-insensitively, Hibernate threw
`NonUniqueResultException``IncorrectResultSizeDataAccessException` → a generic HTTP 500.
The effect was severe and opaque: every document carrying one of ~10 colliding tags (≈180
document-tag attachments on staging) became **un-editable** — any field change failed on save
because the whole tag set is re-resolved — and the user saw only "an unexpected error", with
no hint that a tag was the cause.
This is a **lookup** problem, not a data problem: the collisions are valid canonical nodes
and must be preserved.
## Decision
### 1. Resolution is exact-case first, then a non-throwing deterministic fallback
`findOrCreate` resolves in three ordered steps and never throws on a collision:
1. `findByName(cleanName)`**exact-case** derived query. If present, return it. The edit
round-trip replays the stored name verbatim, so the exact-case row is the right binding
(typing the bare child name `weihnachten` binds to the child; `Weihnachten` binds to the
parent container).
2. else `findAllByNameIgnoreCase(cleanName)` — the **plural** case-insensitive list. If
non-empty, return the element with the **lowest `id`** (`min(comparing(Tag::getId))`).
3. else create the tag (an orphan: null `sourceRef`/`parentId`).
The two repository methods are deliberately **two distinctly-named methods** — Spring Data
cannot disambiguate an `Optional<Tag>` from a `List<Tag>` derived query by return type alone.
The throwing `Optional<Tag> findByNameIgnoreCase` is **deleted** so the non-unique-throwing
call cannot be reintroduced; `findOrCreate` was its only production caller.
### 2. The tie-break is `id`, and it is load-bearing
`id` is a stable, always-present, unique column, so "lowest id" is a total, deterministic
order over the candidates: the same name resolves to the same row on every call, forever,
without throwing. This matters only in the free-text authoring path (step 2), where no
exact-case row exists yet two case-folding rows do.
### 3. No `unique(lower(name))` constraint — and a load-bearing comment says so
A global case-insensitive uniqueness constraint is **wrong**: it would reject the legitimate
parent/child canonical nodes. It would also **fail to apply** against the existing rows,
turning a code-only deploy into a failed Flyway migration that blocks startup. A comment at
both `findOrCreate` and the repository methods records this so the constraint is not "helpfully"
added later.
## Consequences
- **Code-only, zero migration, fully reversible** (roll back the JAR). No tag data is touched;
the colliding rows stay exactly as the canonical importer produced them.
- One change fixes all three write paths — single-document edit, bulk-edit, and upload batch —
because they all funnel through `resolveTags → findOrCreate`, which stays the single source
of truth (resolution logic is **not** hoisted into `DocumentService`).
- **Free-text tag semantics under collision are accepted as-is** (issue #730, option A): the
bare word `weihnachten` binds to the deep child and `Weihnachten` to the parent container.
Correct for the edit round-trip and acceptable for authoring; making the typeahead show the
tree path so an author can tell a container from its same-named child is a separate
follow-up.
- The wire response stays opaque: after the fix this path no longer throws
`IncorrectResultSizeDataAccessException`, and `GlobalExceptionHandler`'s generic handler maps
any stray one to `INTERNAL_ERROR` with no Hibernate/SQL leak — so no dedicated handler was
added.
- **The sibling Person path is unfixed but tracked.** `PersonService.findOrCreateByAlias`
(`findByAliasIgnoreCase`) and `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` carry the same
latent `Optional`-non-unique throw on user-influenced names; deferred to #731 rather than
widened into this fix.
- Postgres `LOWER()` folding of umlauts (`ü`/`ä`) is the actual correctness hinge of the
fallback and cannot be proven by a mocked repo, so it is pinned by a Testcontainers
`postgres:16-alpine` test on a `Glückwünsche`/`glückwünsche` pair; a plain-ASCII test would
stay green while the bug reappeared for umlaut tags.
## Alternatives considered
- **A `unique(lower(name))` index** — rejected: the collisions are valid canonical nodes, and
the migration would fail against the existing data and block startup.
- **Merging/deduping the colliding tags** — rejected: each has a distinct `source_ref`, tree
position, and real document attachments; they are not duplicates.
- **Round-tripping tag IDs instead of names** so resolution can't be ambiguous at all — the
cleaner long-term shape (removes the name-as-key smell), but a larger change with frontend
surface; deferred to #732. The lookup fix here is the minimal correct unblock.
- **Hoisting resolution into `DocumentService.resolveTags`** — rejected: it would duplicate the
rule across the edit, bulk-edit, and import paths and let them drift; `findOrCreate` stays
the one owner.