perf(ocr): resolve person names in single batch query in getTrainingInfo
Replace the per-run getById loop with a single getAllById call on distinct person IDs, eliminating the N+1 query when training history contains multiple sender model runs. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -202,16 +202,15 @@ public class OcrTrainingService {
|
||||
List<OcrTrainingRun> recentRuns = trainingRunRepository.findTop20ByOrderByCreatedAtDesc();
|
||||
OcrTrainingRun lastRun = recentRuns.isEmpty() ? null : recentRuns.get(0);
|
||||
|
||||
List<UUID> distinctPersonIds = recentRuns.stream()
|
||||
.map(OcrTrainingRun::getPersonId)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(java.util.stream.Collectors.toList());
|
||||
Map<String, String> personNames = new HashMap<>();
|
||||
for (OcrTrainingRun run : recentRuns) {
|
||||
if (run.getPersonId() != null && !personNames.containsKey(run.getPersonId().toString())) {
|
||||
try {
|
||||
personNames.put(run.getPersonId().toString(),
|
||||
personService.getById(run.getPersonId()).getDisplayName());
|
||||
} catch (Exception e) {
|
||||
log.debug("Could not resolve display name for person {}: {}", run.getPersonId(), e.getMessage());
|
||||
}
|
||||
}
|
||||
if (!distinctPersonIds.isEmpty()) {
|
||||
personService.getAllById(distinctPersonIds)
|
||||
.forEach(p -> personNames.put(p.getId().toString(), p.getDisplayName()));
|
||||
}
|
||||
|
||||
return new TrainingInfoResponse(
|
||||
|
||||
@@ -4,6 +4,7 @@ import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.model.OcrTrainingRun;
|
||||
import org.raddatz.familienarchiv.model.Person;
|
||||
import org.raddatz.familienarchiv.model.TrainingStatus;
|
||||
import org.raddatz.familienarchiv.model.TranscriptionBlock;
|
||||
import org.raddatz.familienarchiv.repository.OcrTrainingRunRepository;
|
||||
@@ -232,6 +233,37 @@ class OcrTrainingServiceTest {
|
||||
run.getStatus() == TrainingStatus.FAILED && run.getErrorMessage() != null));
|
||||
}
|
||||
|
||||
// ─── getTrainingInfo: batch person name resolution ────────────────────────
|
||||
|
||||
@Test
|
||||
void getTrainingInfo_resolves_person_names_in_single_batch_call() {
|
||||
UUID personA = UUID.randomUUID();
|
||||
UUID personB = UUID.randomUUID();
|
||||
List<OcrTrainingRun> runs = List.of(
|
||||
OcrTrainingRun.builder().id(UUID.randomUUID()).status(TrainingStatus.DONE)
|
||||
.personId(personA).blockCount(5).documentCount(1).modelName("sender_a").build(),
|
||||
OcrTrainingRun.builder().id(UUID.randomUUID()).status(TrainingStatus.DONE)
|
||||
.personId(personB).blockCount(5).documentCount(1).modelName("sender_b").build(),
|
||||
OcrTrainingRun.builder().id(UUID.randomUUID()).status(TrainingStatus.DONE)
|
||||
.personId(personA).blockCount(5).documentCount(1).modelName("sender_a").build()
|
||||
);
|
||||
when(runRepository.findTop20ByOrderByCreatedAtDesc()).thenReturn(runs);
|
||||
when(exportService.queryEligibleBlocks()).thenReturn(List.of());
|
||||
|
||||
Person pa = Person.builder().id(personA).firstName("Anna").lastName("Müller").build();
|
||||
Person pb = Person.builder().id(personB).firstName("Karl").lastName("Bauer").build();
|
||||
when(personService.getAllById(argThat(ids -> ids.size() == 2 && ids.containsAll(List.of(personA, personB)))))
|
||||
.thenReturn(List.of(pa, pb));
|
||||
when(healthClient.isHealthy()).thenReturn(true);
|
||||
|
||||
OcrTrainingService.TrainingInfoResponse info = service.getTrainingInfo();
|
||||
|
||||
verify(personService, never()).getById(any());
|
||||
verify(personService, times(1)).getAllById(any());
|
||||
assertThat(info.personNames()).containsKey(personA.toString());
|
||||
assertThat(info.personNames()).containsKey(personB.toString());
|
||||
}
|
||||
|
||||
// ─── Orphan recovery ──────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user