From 99aee777decd4cd1c58cb5af2f9fbcc6db5dae0d Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 28 Apr 2026 21:33:15 +0200 Subject: [PATCH] fix(transcription): word-boundary regex prevents single-word displayName corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Felix #1 / Markus #5 / Sara #1 (PR #366 review). The naive text.replace("@" + old, "@" + new) silently corrupted any composite mention that began with the renamed single-name person — e.g. renaming the single-name "Hans" turned "@Hans Müller" into "@Henry Müller", obliterating the historical reference to Hans Müller without warning. Replace with a regex matching "@OldName" only at a token boundary: not followed by a letter/digit/hyphen (catches @Hans-Peter) and not followed by "" (catches @Hans Müller). False negatives — e.g. sentence-initial "@Hans Bekam" — are accepted as the conservative trade-off; corruption is irrecoverable, missed renames are not. The new failing test reproduced the reviewer scenario exactly: two persons ("Hans Müller" + single-name "Hans"), one block referencing both, rename Hans → Henry. Pre-fix output corrupted "@Hans Müller" to "@Henry Müller"; post-fix preserves the composite mention and only updates the standalone. The existing partial-name guard test (Hans-Peter Müller / Hans Müller) and multiple-occurrences test still pass — the regex is a strict superset of the boundary constraints already covered. Refs #362 #366 Co-Authored-By: Claude Opus 4.7 --- .../PersonMentionPropagationListener.java | 12 ++++++++- .../PersonMentionPropagationListenerTest.java | 26 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListener.java b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListener.java index 9258edb7..84c297e2 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListener.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListener.java @@ -11,6 +11,8 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Transcription-domain consumer of {@link PersonDisplayNameChangedEvent}. When @@ -49,10 +51,18 @@ public class PersonMentionPropagationListener { String oldNeedle = "@" + event.oldDisplayName(); String newNeedle = "@" + event.newDisplayName(); + // Match @OldName only at a token boundary: not followed by a letter/digit/hyphen + // (catches @Hans-Peter when renaming Hans) AND not followed by " " + // (catches @Hans Müller when renaming the single-name @Hans). False negatives — + // e.g. "@Hans Bekam" where Bekam is sentence-initial — are accepted as the + // conservative trade-off; the alternative (corruption) is irrecoverable. + Pattern boundary = Pattern.compile( + Pattern.quote(oldNeedle) + "(?![\\p{L}0-9\\-]| (?=\\p{Lu}))"); + String replacement = Matcher.quoteReplacement(newNeedle); for (TranscriptionBlock block : blocks) { if (block.getText() != null) { - block.setText(block.getText().replace(oldNeedle, newNeedle)); + block.setText(boundary.matcher(block.getText()).replaceAll(replacement)); } for (PersonMention mention : block.getMentionedPersons()) { if (mention.getPersonId().equals(event.personId())) { diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListenerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListenerTest.java index 33bd0830..078b80f7 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListenerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/PersonMentionPropagationListenerTest.java @@ -123,6 +123,32 @@ class PersonMentionPropagationListenerTest { org.assertj.core.groups.Tuple.tuple(hansId, "Hans Schmidt")); } + @Test + void doesNotCorruptCompositeMention_whenRenamingSingleWordPerson() { + UUID hansMüllerId = savedPersonId("Hans", "Müller"); + UUID hansId = savedPersonId(null, "Hans"); + TranscriptionBlock saved = saveBlock( + "@Hans Müller schrieb. Auch @Hans hat geschrieben.", + List.of( + new PersonMention(hansMüllerId, "Hans Müller"), + new PersonMention(hansId, "Hans"))); + em.clear(); + + listener.onPersonDisplayNameChanged( + new PersonDisplayNameChangedEvent(hansId, "Hans", "Henry")); + blockRepository.flush(); + em.clear(); + + TranscriptionBlock reloaded = blockRepository.findById(saved.getId()).orElseThrow(); + assertThat(reloaded.getText()) + .isEqualTo("@Hans Müller schrieb. Auch @Henry hat geschrieben."); + assertThat(reloaded.getMentionedPersons()) + .extracting(PersonMention::getPersonId, PersonMention::getDisplayName) + .containsExactlyInAnyOrder( + org.assertj.core.groups.Tuple.tuple(hansMüllerId, "Hans Müller"), + org.assertj.core.groups.Tuple.tuple(hansId, "Henry")); + } + @Test void rewritesAllOccurrences_whenSameMentionAppearsTwiceInBlock() { UUID personId = savedPersonId("Auguste", "Raddatz");