fix(transcription): word-boundary regex prevents single-word displayName corruption

Felix #1 / Markus #5 / Sara #1 (PR #366 review). The naive
text.replace("@" + old, "@" + new) silently corrupted any composite mention
that began with the renamed single-name person — e.g. renaming the
single-name "Hans" turned "@Hans Müller" into "@Henry Müller", obliterating
the historical reference to Hans Müller without warning.

Replace with a regex matching "@OldName" only at a token boundary: not
followed by a letter/digit/hyphen (catches @Hans-Peter) and not followed by
"<space><uppercase>" (catches @Hans Müller). False negatives — e.g.
sentence-initial "@Hans Bekam" — are accepted as the conservative
trade-off; corruption is irrecoverable, missed renames are not.

The new failing test reproduced the reviewer scenario exactly: two persons
("Hans Müller" + single-name "Hans"), one block referencing both, rename
Hans → Henry. Pre-fix output corrupted "@Hans Müller" to "@Henry Müller";
post-fix preserves the composite mention and only updates the standalone.

The existing partial-name guard test (Hans-Peter Müller / Hans Müller) and
multiple-occurrences test still pass — the regex is a strict superset of
the boundary constraints already covered.

Refs #362 #366

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-28 21:33:15 +02:00
parent 8b498665df
commit 99aee777de
2 changed files with 37 additions and 1 deletions

View File

@@ -11,6 +11,8 @@ import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Transcription-domain consumer of {@link PersonDisplayNameChangedEvent}. When
@@ -49,10 +51,18 @@ public class PersonMentionPropagationListener {
String oldNeedle = "@" + event.oldDisplayName();
String newNeedle = "@" + event.newDisplayName();
// Match @OldName only at a token boundary: not followed by a letter/digit/hyphen
// (catches @Hans-Peter when renaming Hans) AND not followed by " <Uppercase>"
// (catches @Hans Müller when renaming the single-name @Hans). False negatives —
// e.g. "@Hans Bekam" where Bekam is sentence-initial — are accepted as the
// conservative trade-off; the alternative (corruption) is irrecoverable.
Pattern boundary = Pattern.compile(
Pattern.quote(oldNeedle) + "(?![\\p{L}0-9\\-]| (?=\\p{Lu}))");
String replacement = Matcher.quoteReplacement(newNeedle);
for (TranscriptionBlock block : blocks) {
if (block.getText() != null) {
block.setText(block.getText().replace(oldNeedle, newNeedle));
block.setText(boundary.matcher(block.getText()).replaceAll(replacement));
}
for (PersonMention mention : block.getMentionedPersons()) {
if (mention.getPersonId().equals(event.personId())) {

View File

@@ -123,6 +123,32 @@ class PersonMentionPropagationListenerTest {
org.assertj.core.groups.Tuple.tuple(hansId, "Hans Schmidt"));
}
@Test
void doesNotCorruptCompositeMention_whenRenamingSingleWordPerson() {
UUID hansMüllerId = savedPersonId("Hans", "Müller");
UUID hansId = savedPersonId(null, "Hans");
TranscriptionBlock saved = saveBlock(
"@Hans Müller schrieb. Auch @Hans hat geschrieben.",
List.of(
new PersonMention(hansMüllerId, "Hans Müller"),
new PersonMention(hansId, "Hans")));
em.clear();
listener.onPersonDisplayNameChanged(
new PersonDisplayNameChangedEvent(hansId, "Hans", "Henry"));
blockRepository.flush();
em.clear();
TranscriptionBlock reloaded = blockRepository.findById(saved.getId()).orElseThrow();
assertThat(reloaded.getText())
.isEqualTo("@Hans Müller schrieb. Auch @Henry hat geschrieben.");
assertThat(reloaded.getMentionedPersons())
.extracting(PersonMention::getPersonId, PersonMention::getDisplayName)
.containsExactlyInAnyOrder(
org.assertj.core.groups.Tuple.tuple(hansMüllerId, "Hans Müller"),
org.assertj.core.groups.Tuple.tuple(hansId, "Henry"));
}
@Test
void rewritesAllOccurrences_whenSameMentionAppearsTwiceInBlock() {
UUID personId = savedPersonId("Auguste", "Raddatz");