fix(tag): resolve case-colliding tag names without throwing (#730) #733
@@ -20,7 +20,14 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
|
||||
}
|
||||
|
||||
|
||||
Optional<Tag> findByNameIgnoreCase(String name);
|
||||
// Tag-name resolution (see TagService.findOrCreate). Names that collide case-insensitively across
|
||||
// the canonical tree are VALID — a parent and its same-named lowercase child (e.g. "Geburt" /
|
||||
// "Geburt/geburt") are distinct nodes with their own source_ref and document attachments. So
|
||||
// resolution must be exact-case first, then a non-throwing list for the case-insensitive fallback.
|
||||
// Do NOT add a unique(lower(name)) constraint — it would reject these legitimate rows. See #730.
|
||||
Optional<Tag> findByName(String name);
|
||||
|
||||
List<Tag> findAllByNameIgnoreCase(String name);
|
||||
|
||||
// Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
|
||||
Optional<Tag> findBySourceRef(String sourceRef);
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.tag;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
@@ -55,10 +56,21 @@ public class TagService {
|
||||
return tagRepository.findBySourceRef(sourceRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a tag name to a single tag, creating one when absent. Never throws on case-insensitive
|
||||
* collisions: names that differ only by case are valid distinct nodes in the canonical tree (a
|
||||
* parent and its same-named lowercase child), so resolution prefers an exact-case match, then
|
||||
* falls back to the lowest-id case-insensitive match, then creates. See #730.
|
||||
*/
|
||||
public Tag findOrCreate(String name) {
|
||||
String cleanName = name.trim();
|
||||
return tagRepository.findByNameIgnoreCase(cleanName)
|
||||
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
|
||||
Optional<Tag> exact = tagRepository.findByName(cleanName);
|
||||
if (exact.isPresent()) return exact.get(); // exact-case wins (edit round-trip replays the stored name)
|
||||
List<Tag> caseInsensitive = tagRepository.findAllByNameIgnoreCase(cleanName);
|
||||
if (!caseInsensitive.isEmpty()) {
|
||||
return caseInsensitive.stream().min(Comparator.comparing(Tag::getId)).orElseThrow(); // deterministic tie-break by id — list is non-empty, never throws
|
||||
}
|
||||
return tagRepository.save(Tag.builder().name(cleanName).build()); // create-when-absent (orphan tag: null sourceRef/parentId)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.raddatz.familienarchiv.tag.Tag;
|
||||
import org.raddatz.familienarchiv.tag.TagRepository;
|
||||
import org.raddatz.familienarchiv.tag.TagService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatCode;
|
||||
|
||||
/**
|
||||
* #730 — tag-name resolution against a real Postgres. A mocked repo can't prove the two things that
|
||||
* actually break: that {@code findAllByNameIgnoreCase} folds case the way Postgres {@code LOWER()}
|
||||
* does (critical for umlauts like {@code ü}), and that saving a document tagged with a case-colliding
|
||||
* tag no longer throws {@code NonUniqueResultException}. H2 folds case differently, so this pins the
|
||||
* behaviour on {@code postgres:16-alpine}. The four-branch resolution logic itself is covered faster
|
||||
* by the mocked {@code TagServiceTest}.
|
||||
*/
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@Transactional
|
||||
class TagCaseCollisionIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
@Autowired DocumentService documentService;
|
||||
@Autowired DocumentRepository documentRepository;
|
||||
@Autowired TagRepository tagRepository;
|
||||
@Autowired TagService tagService;
|
||||
|
||||
private Tag persistTag(String name, String sourceRef, UUID parentId) {
|
||||
return tagRepository.save(Tag.builder().name(name).sourceRef(sourceRef).parentId(parentId).build());
|
||||
}
|
||||
|
||||
private Document persistDocTaggedWith(Tag tag) {
|
||||
return documentRepository.save(Document.builder()
|
||||
.originalFilename("C-7301")
|
||||
.title("Weihnachtsbrief")
|
||||
.documentDate(LocalDate.of(1928, 1, 1))
|
||||
.metaDatePrecision(DatePrecision.YEAR)
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.tags(new HashSet<>(Set.of(tag)))
|
||||
.build());
|
||||
}
|
||||
|
||||
@Test
|
||||
void updateDocument_succeedsAndKeepsExactChildTag_whenTaggedWithCaseCollidingChild() throws Exception {
|
||||
Tag parent = persistTag("Weihnachten", "Weihnachten", null);
|
||||
Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
|
||||
Document doc = persistDocTaggedWith(child);
|
||||
|
||||
DocumentUpdateDTO dto = new DocumentUpdateDTO();
|
||||
dto.setTitle("Weihnachtsbrief");
|
||||
dto.setDocumentDate(LocalDate.of(1930, 1, 1)); // change the date — the field that 500'd on staging
|
||||
dto.setMetaDatePrecision(DatePrecision.YEAR);
|
||||
dto.setTags("weihnachten"); // the edit form round-trips the stored child name
|
||||
|
||||
assertThatCode(() -> documentService.updateDocument(doc.getId(), dto, null, null))
|
||||
.doesNotThrowAnyException();
|
||||
|
||||
Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
|
||||
assertThat(tags).hasSize(1);
|
||||
assertThat(tags.iterator().next().getId()).isEqualTo(child.getId()); // child kept, not the parent
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_resolvesUmlautCollisionDeterministically_withoutThrow() {
|
||||
// The regression catcher: a plain-ASCII pair would stay green even if Postgres folded ü wrongly.
|
||||
Tag parent = persistTag("Glückwünsche", "Glückwünsche", null);
|
||||
Tag child = persistTag("glückwünsche", "Glückwünsche/glückwünsche", parent.getId());
|
||||
|
||||
// Proof that real Postgres LOWER() folds the umlaut so both rows match case-insensitively.
|
||||
// Query with the UPPERCASE form findOrCreate actually passes — folding LOWER('GLÜCKWÜNSCHE')
|
||||
// against LOWER(name) is the exact step under test; a lowercase probe wouldn't exercise it.
|
||||
assertThat(tagRepository.findAllByNameIgnoreCase("GLÜCKWÜNSCHE")).hasSize(2);
|
||||
|
||||
// No exact-case "GLÜCKWÜNSCHE" row exists → resolution falls through to the case-insensitive
|
||||
// branch with two candidates and must pick the lowest id deterministically, never throwing.
|
||||
UUID expected = List.of(parent, child).stream().min(Comparator.comparing(Tag::getId)).orElseThrow().getId();
|
||||
Tag first = tagService.findOrCreate("GLÜCKWÜNSCHE");
|
||||
Tag second = tagService.findOrCreate("GLÜCKWÜNSCHE");
|
||||
|
||||
assertThat(first.getId()).isEqualTo(expected);
|
||||
assertThat(second.getId()).isEqualTo(first.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void bulkEdit_resolvesCaseCollidingTagThroughFindOrCreate_withoutThrow() {
|
||||
// Bulk-edit shares resolveTags → findOrCreate; this guards a future refactor that bypasses it.
|
||||
Tag parent = persistTag("Weihnachten", "Weihnachten", null);
|
||||
Tag child = persistTag("weihnachten", "Weihnachten/weihnachten", parent.getId());
|
||||
Document doc = documentRepository.save(Document.builder()
|
||||
.originalFilename("C-7302")
|
||||
.title("Brief")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.build());
|
||||
|
||||
DocumentBulkEditDTO dto = new DocumentBulkEditDTO();
|
||||
dto.setTagNames(List.of("weihnachten"));
|
||||
|
||||
assertThatCode(() -> documentService.applyBulkEditToDocument(doc.getId(), dto, null))
|
||||
.doesNotThrowAnyException();
|
||||
|
||||
Set<Tag> tags = documentRepository.findById(doc.getId()).orElseThrow().getTags();
|
||||
assertThat(tags).hasSize(1);
|
||||
assertThat(tags.iterator().next().getId()).isEqualTo(child.getId());
|
||||
}
|
||||
}
|
||||
@@ -53,20 +53,68 @@ class TagServiceTest {
|
||||
// ─── findOrCreate ─────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void findOrCreate_returnsExisting_whenNameFound() {
|
||||
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
|
||||
when(tagRepository.findByNameIgnoreCase("Familie")).thenReturn(Optional.of(existing));
|
||||
void findOrCreate_exactCaseWins_overCaseInsensitiveSibling() {
|
||||
// "Geburt" (parent) and "geburt" (child) both exist; the edit round-trip replays the stored
|
||||
// name "geburt", which must bind to the exact-case row, not the parent.
|
||||
Tag exact = Tag.builder().id(UUID.randomUUID()).name("geburt").build();
|
||||
when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exact));
|
||||
|
||||
Tag result = tagService.findOrCreate("Familie");
|
||||
Tag result = tagService.findOrCreate("geburt");
|
||||
|
||||
assertThat(result).isEqualTo(existing);
|
||||
assertThat(result).isEqualTo(exact);
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_createsNew_whenNameNotFound() {
|
||||
void findOrCreate_exactCaseWins_evenWhenItsIdIsNotTheLowest() {
|
||||
// Adversarial guard: exact-case must short-circuit BEFORE the lowest-id rule. Here the exact row
|
||||
// has the higher id, so a naive "always pick lowest id across all CI matches" would pick wrong.
|
||||
Tag exactHigherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000009")).name("geburt").build();
|
||||
when(tagRepository.findByName("geburt")).thenReturn(Optional.of(exactHigherId));
|
||||
|
||||
Tag result = tagService.findOrCreate("geburt");
|
||||
|
||||
assertThat(result).isEqualTo(exactHigherId);
|
||||
verify(tagRepository, never()).findAllByNameIgnoreCase(any()); // exact-case wins without consulting the CI list
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_usesSingleCaseInsensitiveMatch_whenNoExactCase() {
|
||||
// Stored name is "Weihnachten"; a save replays "weihnachten" (no exact-case row) → bind to the
|
||||
// single case-insensitive match rather than creating a duplicate.
|
||||
Tag stored = Tag.builder().id(UUID.randomUUID()).name("Weihnachten").build();
|
||||
when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(stored));
|
||||
|
||||
Tag result = tagService.findOrCreate("weihnachten");
|
||||
|
||||
assertThat(result).isEqualTo(stored);
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_returnsLowestIdDeterministically_whenMultipleCaseInsensitiveMatches() {
|
||||
// Two rows collide case-insensitively and neither equals the query exactly. Resolution must be
|
||||
// deterministic (lowest id) and never throw — proven by calling twice and getting the same id.
|
||||
Tag lowerId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000001")).name("Reisepläne").build();
|
||||
Tag higherId = Tag.builder().id(UUID.fromString("00000000-0000-0000-0000-000000000002")).name("reisepläne").build();
|
||||
when(tagRepository.findByName("REISEPLÄNE")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("REISEPLÄNE")).thenReturn(List.of(higherId, lowerId));
|
||||
|
||||
Tag first = tagService.findOrCreate("REISEPLÄNE");
|
||||
Tag second = tagService.findOrCreate("REISEPLÄNE");
|
||||
|
||||
assertThat(first.getId()).isEqualTo(lowerId.getId());
|
||||
assertThat(second.getId()).isEqualTo(first.getId());
|
||||
verify(tagRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_createsOrphanTag_whenNameAbsent() {
|
||||
Tag saved = Tag.builder().id(UUID.randomUUID()).name("Krieg").build();
|
||||
when(tagRepository.findByNameIgnoreCase("Krieg")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findByName("Krieg")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("Krieg")).thenReturn(List.of());
|
||||
when(tagRepository.save(any())).thenReturn(saved);
|
||||
|
||||
Tag result = tagService.findOrCreate("Krieg");
|
||||
@@ -76,13 +124,15 @@ class TagServiceTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void findOrCreate_trimsWhitespaceBeforeLookup() {
|
||||
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Urlaub").build();
|
||||
when(tagRepository.findByNameIgnoreCase("Urlaub")).thenReturn(Optional.of(existing));
|
||||
void findOrCreate_trimsWhitespace_thenLandsOnCaseInsensitiveChild() {
|
||||
Tag child = Tag.builder().id(UUID.randomUUID()).name("weihnachten").build();
|
||||
when(tagRepository.findByName("weihnachten")).thenReturn(Optional.empty());
|
||||
when(tagRepository.findAllByNameIgnoreCase("weihnachten")).thenReturn(List.of(child));
|
||||
|
||||
tagService.findOrCreate(" Urlaub ");
|
||||
Tag result = tagService.findOrCreate(" weihnachten ");
|
||||
|
||||
verify(tagRepository).findByNameIgnoreCase("Urlaub");
|
||||
assertThat(result).isEqualTo(child);
|
||||
verify(tagRepository).findByName("weihnachten");
|
||||
}
|
||||
|
||||
// ─── update ───────────────────────────────────────────────────────────────
|
||||
|
||||
105
docs/adr/032-tag-name-resolution-tolerates-case-collisions.md
Normal file
105
docs/adr/032-tag-name-resolution-tolerates-case-collisions.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# ADR-032 — Tag-name resolution tolerates case-collisions: exact-case first, then a deterministic lowest-id fallback, and never a `unique(lower(name))` constraint
|
||||
|
||||
**Date:** 2026-06-06
|
||||
**Status:** Accepted
|
||||
**Issue:** #730 (document with a case-colliding tag cannot be saved — `findByNameIgnoreCase` `NonUniqueResultException`)
|
||||
**Milestone:** —
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
`TagService.findOrCreate(name)` is the single point that turns a tag **name** into a `Tag`
|
||||
row. The document edit form, bulk-edit, and the upload batch all round-trip tag **names**
|
||||
(the edit form sends `tags.map(t => t.name).join(',')`) and re-resolve them on **every**
|
||||
save through `resolveTags → findOrCreate`. The old implementation resolved with
|
||||
`tagRepository.findByNameIgnoreCase(name)`, a derived query returning `Optional<Tag>`.
|
||||
|
||||
That signature encodes an invariant the data does **not** hold: that a name is globally
|
||||
unique case-insensitively. The canonical tag tree legitimately contains names that differ
|
||||
only by case — a parent container and its same-named lowercase **child** (`Geburt` /
|
||||
`Geburt/geburt`, `Weihnachten` / `Weihnachten/weihnachten`, …), or two siblings
|
||||
(`Reise/Reisepläne` / `Reise/reisepläne`). Each is a distinct node with its own
|
||||
`source_ref` (the stable identity, per ADR-025) and its own document attachments — **not** an
|
||||
accidental duplicate. When two rows matched case-insensitively, Hibernate threw
|
||||
`NonUniqueResultException` → `IncorrectResultSizeDataAccessException` → a generic HTTP 500.
|
||||
|
||||
The effect was severe and opaque: every document carrying one of ~10 colliding tags (≈180
|
||||
document-tag attachments on staging) became **un-editable** — any field change failed on save
|
||||
because the whole tag set is re-resolved — and the user saw only "an unexpected error", with
|
||||
no hint that a tag was the cause.
|
||||
|
||||
This is a **lookup** problem, not a data problem: the collisions are valid canonical nodes
|
||||
and must be preserved.
|
||||
|
||||
## Decision
|
||||
|
||||
### 1. Resolution is exact-case first, then a non-throwing deterministic fallback
|
||||
|
||||
`findOrCreate` resolves in three ordered steps and never throws on a collision:
|
||||
|
||||
1. `findByName(cleanName)` — **exact-case** derived query. If present, return it. The edit
|
||||
round-trip replays the stored name verbatim, so the exact-case row is the right binding
|
||||
(typing the bare child name `weihnachten` binds to the child; `Weihnachten` binds to the
|
||||
parent container).
|
||||
2. else `findAllByNameIgnoreCase(cleanName)` — the **plural** case-insensitive list. If
|
||||
non-empty, return the element with the **lowest `id`** (`min(comparing(Tag::getId))`).
|
||||
3. else create the tag (an orphan: null `sourceRef`/`parentId`).
|
||||
|
||||
The two repository methods are deliberately **two distinctly-named methods** — Spring Data
|
||||
cannot disambiguate an `Optional<Tag>` from a `List<Tag>` derived query by return type alone.
|
||||
The throwing `Optional<Tag> findByNameIgnoreCase` is **deleted** so the non-unique-throwing
|
||||
call cannot be reintroduced; `findOrCreate` was its only production caller.
|
||||
|
||||
### 2. The tie-break is `id`, and it is load-bearing
|
||||
|
||||
`id` is a stable, always-present, unique column, so "lowest id" is a total, deterministic
|
||||
order over the candidates: the same name resolves to the same row on every call, forever,
|
||||
without throwing. This matters only in the free-text authoring path (step 2), where no
|
||||
exact-case row exists yet two case-folding rows do.
|
||||
|
||||
### 3. No `unique(lower(name))` constraint — and a load-bearing comment says so
|
||||
|
||||
A global case-insensitive uniqueness constraint is **wrong**: it would reject the legitimate
|
||||
parent/child canonical nodes. It would also **fail to apply** against the existing rows,
|
||||
turning a code-only deploy into a failed Flyway migration that blocks startup. A comment at
|
||||
both `findOrCreate` and the repository methods records this so the constraint is not "helpfully"
|
||||
added later.
|
||||
|
||||
## Consequences
|
||||
|
||||
- **Code-only, zero migration, fully reversible** (roll back the JAR). No tag data is touched;
|
||||
the colliding rows stay exactly as the canonical importer produced them.
|
||||
- One change fixes all three write paths — single-document edit, bulk-edit, and upload batch —
|
||||
because they all funnel through `resolveTags → findOrCreate`, which stays the single source
|
||||
of truth (resolution logic is **not** hoisted into `DocumentService`).
|
||||
- **Free-text tag semantics under collision are accepted as-is** (issue #730, option A): the
|
||||
bare word `weihnachten` binds to the deep child and `Weihnachten` to the parent container.
|
||||
Correct for the edit round-trip and acceptable for authoring; making the typeahead show the
|
||||
tree path so an author can tell a container from its same-named child is a separate
|
||||
follow-up.
|
||||
- The wire response stays opaque: after the fix this path no longer throws
|
||||
`IncorrectResultSizeDataAccessException`, and `GlobalExceptionHandler`'s generic handler maps
|
||||
any stray one to `INTERNAL_ERROR` with no Hibernate/SQL leak — so no dedicated handler was
|
||||
added.
|
||||
- **The sibling Person path is unfixed but tracked.** `PersonService.findOrCreateByAlias`
|
||||
(`findByAliasIgnoreCase`) and `findByFirstNameIgnoreCaseAndLastNameIgnoreCase` carry the same
|
||||
latent `Optional`-non-unique throw on user-influenced names; deferred to #731 rather than
|
||||
widened into this fix.
|
||||
- Postgres `LOWER()` folding of umlauts (`ü`/`ä`) is the actual correctness hinge of the
|
||||
fallback and cannot be proven by a mocked repo, so it is pinned by a Testcontainers
|
||||
`postgres:16-alpine` test on a `Glückwünsche`/`glückwünsche` pair; a plain-ASCII test would
|
||||
stay green while the bug reappeared for umlaut tags.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **A `unique(lower(name))` index** — rejected: the collisions are valid canonical nodes, and
|
||||
the migration would fail against the existing data and block startup.
|
||||
- **Merging/deduping the colliding tags** — rejected: each has a distinct `source_ref`, tree
|
||||
position, and real document attachments; they are not duplicates.
|
||||
- **Round-tripping tag IDs instead of names** so resolution can't be ambiguous at all — the
|
||||
cleaner long-term shape (removes the name-as-key smell), but a larger change with frontend
|
||||
surface; deferred to #732. The lookup fix here is the minimal correct unblock.
|
||||
- **Hoisting resolution into `DocumentService.resolveTags`** — rejected: it would duplicate the
|
||||
rule across the edit, bulk-edit, and import paths and let them drift; `findOrCreate` stays
|
||||
the one owner.
|
||||
Reference in New Issue
Block a user