feat(#221): add AND/OR tag filtering with hierarchy expansion in document search

- Replace hasTags(List<String>) spec with hasTags(List<Set<UUID>>, useOr)
- AND mode: one EXISTS subquery per expanded tag ID set; empty set = disjunction
- OR mode: union of all expanded sets into a single EXISTS subquery
- DocumentService calls tagService.expandTagNamesToDescendantIdSets() before building spec
- DocumentController exposes ?tagOp=AND|OR query param (default AND)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-16 15:44:18 +02:00
parent 3fba740469
commit 57dc72b51d
8 changed files with 209 additions and 59 deletions

View File

@@ -204,11 +204,12 @@ public class DocumentController {
@RequestParam(required = false) String tagQ,
@Parameter(description = "Filter by document status") @RequestParam(required = false) DocumentStatus status,
@Parameter(description = "Sort field") @RequestParam(required = false) DocumentSort sort,
@Parameter(description = "Sort direction: ASC or DESC") @RequestParam(required = false, defaultValue = "DESC") String dir) {
@Parameter(description = "Sort direction: ASC or DESC") @RequestParam(required = false, defaultValue = "DESC") String dir,
@Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp) {
if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC");
}
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir));
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, tagOp));
}
// --- TRAINING LABELS ---

View File

@@ -4,6 +4,7 @@ import jakarta.persistence.criteria.*;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import org.raddatz.familienarchiv.model.Document;
@@ -54,34 +55,64 @@ public class DocumentSpecifications {
return (root, query, cb) -> status == null ? null : cb.equal(root.get("status"), status);
}
// Filtert nach Schlagworten (UND-Verknüpfung, exakter Match)
public static Specification<Document> hasTags(List<String> tags) {
/**
* Filtert nach vorausgeweiteten Tag-ID-Sets mit AND- oder OR-Logik.
*
* <p>AND (useOr=false): Das Dokument muss mindestens einen Tag aus <em>jedem</em> Set besitzen.
* <p>OR (useOr=true): Das Dokument muss mindestens einen Tag aus der Vereinigung aller Sets besitzen.
*
* <p>Jedes Set repräsentiert einen ausgewählten Tag inklusive aller seiner Nachkommen
* (vorausgeweitet durch {@code TagRepository.findDescendantIdsByName}).
*/
public static Specification<Document> hasTags(List<Set<UUID>> tagIdSets, boolean useOr) {
return (root, query, cb) -> {
if (tags == null || tags.isEmpty())
if (tagIdSets == null || tagIdSets.isEmpty())
return null;
List<Predicate> predicates = new ArrayList<>();
for (String tagName : tags) {
if (!StringUtils.hasText(tagName)) continue;
Subquery<Long> subquery = query.subquery(Long.class);
Root<Document> subRoot = subquery.from(Document.class);
Join<Document, Tag> subTags = subRoot.join("tags");
subquery.select(subRoot.get("id"))
.where(
cb.equal(subRoot.get("id"), root.get("id")),
cb.equal(cb.lower(subTags.get("name")), tagName.trim().toLowerCase())
);
predicates.add(cb.exists(subquery));
if (!useOr) {
// AND mode: an empty set means the tag resolved to no IDs (doesn't exist) —
// no document can satisfy the condition, so return no results immediately.
boolean hasEmptySet = tagIdSets.stream().anyMatch(s -> s == null || s.isEmpty());
if (hasEmptySet) return cb.disjunction();
}
List<Set<UUID>> nonEmpty = tagIdSets.stream()
.filter(s -> s != null && !s.isEmpty())
.toList();
if (nonEmpty.isEmpty()) return null;
if (useOr) {
Set<UUID> union = new java.util.HashSet<>();
nonEmpty.forEach(union::addAll);
return documentHasTagIn(root, query, cb, union);
}
// AND: one EXISTS subquery per set
List<Predicate> predicates = new ArrayList<>();
for (Set<UUID> ids : nonEmpty) {
predicates.add(documentHasTagIn(root, query, cb, ids));
}
return cb.and(predicates.toArray(new Predicate[0]));
};
}
private static Predicate documentHasTagIn(
Root<Document> root,
jakarta.persistence.criteria.CriteriaQuery<?> query,
jakarta.persistence.criteria.CriteriaBuilder cb,
Set<UUID> tagIds) {
Subquery<UUID> subquery = query.subquery(UUID.class);
Root<Document> subRoot = subquery.from(Document.class);
Join<Document, Tag> subTags = subRoot.join("tags");
subquery.select(subRoot.get("id"))
.where(
cb.equal(subRoot.get("id"), root.get("id")),
subTags.get("id").in(tagIds)
);
return cb.exists(subquery);
}
// Filtert nach partiellem Tag-Namen (ILIKE) — für Live-Tag-Suche
public static Specification<Document> hasTagPartial(String tagQ) {
return (root, query, cb) -> {

View File

@@ -293,7 +293,7 @@ public class DocumentService {
}
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, String tagOperator) {
boolean hasText = StringUtils.hasText(text);
List<UUID> rankedIds = null;
@@ -302,12 +302,15 @@ public class DocumentService {
if (rankedIds.isEmpty()) return DocumentSearchResult.withMatchData(List.of(), Map.of());
}
boolean useOrLogic = "OR".equalsIgnoreCase(tagOperator);
List<Set<UUID>> expandedTagSets = tagService.expandTagNamesToDescendantIdSets(tags);
Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null;
Specification<Document> spec = Specification.where(textSpec)
.and(isBetween(from, to))
.and(hasSender(sender))
.and(hasReceiver(receiver))
.and(hasTags(tags))
.and(hasTags(expandedTagSets, useOrLogic))
.and(hasTagPartial(tagQ))
.and(hasStatus(status));

View File

@@ -62,7 +62,7 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_returns200_whenAuthenticated() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
@@ -72,13 +72,13 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_withStatusParam_passesItToService() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED"))
.andExpect(status().isOk());
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any());
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any());
}
@Test
@@ -105,7 +105,7 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_responseContainsTotalCount() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
@@ -126,7 +126,7 @@ class DocumentControllerTest {
.build();
var matchData = new org.raddatz.familienarchiv.dto.SearchMatchData(
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.withMatchData(List.of(doc), Map.of(docId, matchData)));
mockMvc.perform(get("/api/documents/search").param("q", "Brief"))

View File

@@ -7,6 +7,7 @@ import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.model.DocumentAnnotation;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.Tag;
import org.raddatz.familienarchiv.model.TranscriptionBlock;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
@@ -37,6 +38,9 @@ class DocumentRepositoryTest {
@Autowired
private PersonRepository personRepository;
@Autowired
private TagRepository tagRepository;
@Autowired
private AnnotationRepository annotationRepository;
@@ -345,6 +349,105 @@ class DocumentRepositoryTest {
assertThat(stats.getTranscriptionCount()).isEqualTo(0L);
}
// ─── hasTags specification — AND/OR + hierarchy ───────────────────────────
@Test
void hasTags_and_findsDocumentThatHasBothTags() {
Tag tagA = tagRepository.save(Tag.builder().name("TagA").build());
Tag tagB = tagRepository.save(Tag.builder().name("TagB").build());
Tag tagC = tagRepository.save(Tag.builder().name("TagC").build());
Document docAB = documentRepository.save(Document.builder()
.title("DocAB").originalFilename("docab.pdf").status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(tagA, tagB))).build());
documentRepository.save(Document.builder()
.title("DocA").originalFilename("doca.pdf").status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(tagA))).build());
// AND: must have both TagA and TagB
List<UUID> setA = tagRepository.findDescendantIdsByName("TagA").stream().toList();
List<UUID> setB = tagRepository.findDescendantIdsByName("TagB").stream().toList();
List<UUID> setC = tagRepository.findDescendantIdsByName("TagC").stream().toList();
var spec = DocumentSpecifications.hasTags(
List.of(new HashSet<>(setA), new HashSet<>(setB)), false);
List<Document> results = documentRepository.findAll(spec);
assertThat(results).hasSize(1);
assertThat(results.get(0).getId()).isEqualTo(docAB.getId());
}
@Test
void hasTags_or_findsDocumentThatHasEitherTag() {
Tag tagA = tagRepository.save(Tag.builder().name("OrTagA").build());
Tag tagB = tagRepository.save(Tag.builder().name("OrTagB").build());
Document docA = documentRepository.save(Document.builder()
.title("OrDocA").originalFilename("ordoca.pdf").status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(tagA))).build());
Document docB = documentRepository.save(Document.builder()
.title("OrDocB").originalFilename("ordocb.pdf").status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(tagB))).build());
List<UUID> setA = tagRepository.findDescendantIdsByName("OrTagA").stream().toList();
List<UUID> setB = tagRepository.findDescendantIdsByName("OrTagB").stream().toList();
var spec = DocumentSpecifications.hasTags(
List.of(new HashSet<>(setA), new HashSet<>(setB)), true);
List<Document> results = documentRepository.findAll(spec);
assertThat(results).hasSize(2);
assertThat(results).extracting(Document::getId).containsExactlyInAnyOrder(docA.getId(), docB.getId());
}
@Test
void hasTags_hierarchySearch_findsDocumentTaggedWithChildWhenSearchingByParent() {
Tag parent = tagRepository.save(Tag.builder().name("HierParent").build());
Tag child = tagRepository.save(Tag.builder().name("HierChild").parentId(parent.getId()).build());
Document docWithChild = documentRepository.save(Document.builder()
.title("DocWithChild").originalFilename("docwithchild.pdf").status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(child))).build());
documentRepository.save(Document.builder()
.title("DocWithParent").originalFilename("docwithparent.pdf").status(DocumentStatus.UPLOADED)
.tags(new HashSet<>(Set.of(parent))).build());
// Searching by "HierParent" should include descendants (HierChild)
List<UUID> parentAndDescendants = tagRepository.findDescendantIdsByName("HierParent")
.stream().toList();
// Must include both parent and child IDs
assertThat(parentAndDescendants).contains(parent.getId(), child.getId());
var spec = DocumentSpecifications.hasTags(
List.of(new HashSet<>(parentAndDescendants)), false);
List<Document> results = documentRepository.findAll(spec);
assertThat(results).hasSize(2); // both doc-with-child and doc-with-parent match
}
@Test
void findDescendantIdsByName_returnsOnlyMatchingTag_whenNoChildren() {
Tag tag = tagRepository.save(Tag.builder().name("Leaf").build());
List<UUID> ids = tagRepository.findDescendantIdsByName("Leaf")
.stream().toList();
assertThat(ids).containsExactly(tag.getId());
}
@Test
void findDescendantIdsByName_returnsParentAndAllDescendants() {
Tag grandparent = tagRepository.save(Tag.builder().name("Grandparent").build());
Tag parent2 = tagRepository.save(Tag.builder().name("ParentNode").parentId(grandparent.getId()).build());
Tag child2 = tagRepository.save(Tag.builder().name("ChildNode").parentId(parent2.getId()).build());
List<UUID> ids = tagRepository.findDescendantIdsByName("Grandparent")
.stream().toList();
assertThat(ids).containsExactlyInAnyOrder(grandparent.getId(), parent2.getId(), child2.getId());
}
// ─── seeding helpers ─────────────────────────────────────────────────────
private Document uploaded(String title) {

View File

@@ -15,8 +15,10 @@ import org.springframework.context.annotation.Import;
import org.springframework.data.jpa.domain.Specification;
import java.time.LocalDate;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.raddatz.familienarchiv.repository.DocumentSpecifications.*;
@@ -156,47 +158,57 @@ class DocumentSpecificationsTest {
// ─── hasTags ──────────────────────────────────────────────────────────────
@Test
void hasTags_returnsAllDocuments_whenTagListIsNull() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(null)));
void hasTags_returnsAllDocuments_whenTagSetListIsNull() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(null, false)));
assertThat(result).hasSize(3);
}
@Test
void hasTags_returnsAllDocuments_whenTagListIsEmpty() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of())));
void hasTags_returnsAllDocuments_whenTagSetListIsEmpty() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of(), false)));
assertThat(result).hasSize(3);
}
@Test
void hasTags_filtersDocumentsByTag() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of("Familie"))));
void hasTags_and_filtersDocumentsByTag() {
Set<UUID> familieIds = new HashSet<>(tagRepository.findDescendantIdsByName("Familie"));
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of(familieIds), false)));
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
}
@Test
void hasTags_isCaseInsensitive() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of("familie"))));
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
}
@Test
void hasTags_requiresAllTagsToBePresent_andLogic() {
// briefEarly has "Familie" but not "Urlaub" — should be excluded
void hasTags_and_requiresAllTagsToBePresent() {
// briefEarly has "Familie" but not "Urlaub" — AND should return empty
Set<UUID> familieIds = new HashSet<>(tagRepository.findDescendantIdsByName("Familie"));
Set<UUID> urlaubIds = new HashSet<>(tagRepository.findDescendantIdsByName("Urlaub"));
List<Document> result = documentRepository.findAll(
Specification.where(hasTags(List.of("Familie", "Urlaub"))));
Specification.where(hasTags(List.of(familieIds, urlaubIds), false)));
assertThat(result).isEmpty();
}
@Test
void hasTags_skipsEmptyTagNames() {
// An empty string in the tag list should be ignored
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of(" ", "Familie"))));
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
void hasTags_or_findsDocumentWithEitherTag() {
Set<UUID> familieIds = new HashSet<>(tagRepository.findDescendantIdsByName("Familie"));
Set<UUID> urlaubIds = new HashSet<>(tagRepository.findDescendantIdsByName("Urlaub"));
List<Document> result = documentRepository.findAll(
Specification.where(hasTags(List.of(familieIds, urlaubIds), true)));
assertThat(result).extracting(Document::getTitle)
.containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief");
}
@Test
void hasTags_returnsEmpty_whenTagIdSetIsEmpty() {
// An empty ID set means the requested tag resolved to nothing — no docs can match
List<Document> result = documentRepository.findAll(
Specification.where(hasTags(List.of(new HashSet<>()), false)));
assertThat(result).isEmpty();
}
@Test
void hasTags_returnsEmpty_whenTagDoesNotExist() {
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of("Unbekannt"))));
// Non-existent tag → findDescendantIdsByName returns empty list → hasTags returns no results
Set<UUID> unknownIds = new HashSet<>(tagRepository.findDescendantIdsByName("Unbekannt"));
List<Document> result = documentRepository.findAll(Specification.where(hasTags(List.of(unknownIds), false)));
assertThat(result).isEmpty();
}

View File

@@ -53,7 +53,7 @@ class DocumentServiceSortTest {
.thenReturn(List.of(newer, older));
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC");
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null);
// Expect: date order (newer 1960 first), NOT rank order (older 1940 first)
assertThat(result.documents()).hasSize(2);
@@ -75,7 +75,7 @@ class DocumentServiceSortTest {
.thenReturn(List.of(doc2, doc1)); // unordered from DB
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null);
// Expect: rank order restored (id1 first)
assertThat(result.documents().get(0).getId()).isEqualTo(id1);
@@ -94,7 +94,7 @@ class DocumentServiceSortTest {
.thenReturn(List.of(doc2, doc1));
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, null, null);
"Brief", null, null, null, null, null, null, null, null, null, null);
assertThat(result.documents().get(0).getId()).isEqualTo(id1);
}

View File

@@ -1204,7 +1204,7 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class)))
.thenReturn(List.of());
documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null);
documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null, null);
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class));
}
@@ -1214,7 +1214,7 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class)))
.thenReturn(List.of());
documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null);
documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null, null);
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class));
}
@@ -1292,7 +1292,7 @@ class DocumentServiceTest {
.thenReturn(List.of(withSender, noSender));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc");
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null);
assertThat(result.documents()).hasSize(2);
assertThat(result.documents()).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender");
@@ -1312,7 +1312,7 @@ class DocumentServiceTest {
.thenReturn(List.of(noReceivers, withReceiver));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc");
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null);
assertThat(result.documents()).extracting(Document::getTitle)
.containsExactly("Has Receiver", "No Receivers");
@@ -1334,7 +1334,7 @@ class DocumentServiceTest {
.thenReturn(List.of(docNullName, docSmith));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc");
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null);
// null lastName should sort to end (treated as empty), not before "smith" (as "null")
assertThat(result.documents()).extracting(Document::getTitle)
@@ -1356,7 +1356,7 @@ class DocumentServiceTest {
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null);
assertThat(result.matchData()).containsKey(docId);
SearchMatchData md = result.matchData().get(docId);
@@ -1370,7 +1370,7 @@ class DocumentServiceTest {
.thenReturn(List.of());
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, null, null);
null, null, null, null, null, null, null, null, null, null, null);
assertThat(result.matchData()).isEmpty();
}
@@ -1389,7 +1389,7 @@ class DocumentServiceTest {
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null);
SearchMatchData md = result.matchData().get(docId);
assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin");