fix(transcription): word-boundary regex prevents single-word displayName corruption
Felix #1 / Markus #5 / Sara #1 (PR #366 review). The naive text.replace("@" + old, "@" + new) silently corrupted any composite mention that began with the renamed single-name person — e.g. renaming the single-name "Hans" turned "@Hans Müller" into "@Henry Müller", obliterating the historical reference to Hans Müller without warning. Replace with a regex matching "@OldName" only at a token boundary: not followed by a letter/digit/hyphen (catches @Hans-Peter) and not followed by "<space><uppercase>" (catches @Hans Müller). False negatives — e.g. sentence-initial "@Hans Bekam" — are accepted as the conservative trade-off; corruption is irrecoverable, missed renames are not. The new failing test reproduced the reviewer scenario exactly: two persons ("Hans Müller" + single-name "Hans"), one block referencing both, rename Hans → Henry. Pre-fix output corrupted "@Hans Müller" to "@Henry Müller"; post-fix preserves the composite mention and only updates the standalone. The existing partial-name guard test (Hans-Peter Müller / Hans Müller) and multiple-occurrences test still pass — the regex is a strict superset of the boundary constraints already covered. Refs #362 #366 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,8 @@ import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Transcription-domain consumer of {@link PersonDisplayNameChangedEvent}. When
|
||||
@@ -49,10 +51,18 @@ public class PersonMentionPropagationListener {
|
||||
|
||||
String oldNeedle = "@" + event.oldDisplayName();
|
||||
String newNeedle = "@" + event.newDisplayName();
|
||||
// Match @OldName only at a token boundary: not followed by a letter/digit/hyphen
|
||||
// (catches @Hans-Peter when renaming Hans) AND not followed by " <Uppercase>"
|
||||
// (catches @Hans Müller when renaming the single-name @Hans). False negatives —
|
||||
// e.g. "@Hans Bekam" where Bekam is sentence-initial — are accepted as the
|
||||
// conservative trade-off; the alternative (corruption) is irrecoverable.
|
||||
Pattern boundary = Pattern.compile(
|
||||
Pattern.quote(oldNeedle) + "(?![\\p{L}0-9\\-]| (?=\\p{Lu}))");
|
||||
String replacement = Matcher.quoteReplacement(newNeedle);
|
||||
|
||||
for (TranscriptionBlock block : blocks) {
|
||||
if (block.getText() != null) {
|
||||
block.setText(block.getText().replace(oldNeedle, newNeedle));
|
||||
block.setText(boundary.matcher(block.getText()).replaceAll(replacement));
|
||||
}
|
||||
for (PersonMention mention : block.getMentionedPersons()) {
|
||||
if (mention.getPersonId().equals(event.personId())) {
|
||||
|
||||
@@ -123,6 +123,32 @@ class PersonMentionPropagationListenerTest {
|
||||
org.assertj.core.groups.Tuple.tuple(hansId, "Hans Schmidt"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void doesNotCorruptCompositeMention_whenRenamingSingleWordPerson() {
|
||||
UUID hansMüllerId = savedPersonId("Hans", "Müller");
|
||||
UUID hansId = savedPersonId(null, "Hans");
|
||||
TranscriptionBlock saved = saveBlock(
|
||||
"@Hans Müller schrieb. Auch @Hans hat geschrieben.",
|
||||
List.of(
|
||||
new PersonMention(hansMüllerId, "Hans Müller"),
|
||||
new PersonMention(hansId, "Hans")));
|
||||
em.clear();
|
||||
|
||||
listener.onPersonDisplayNameChanged(
|
||||
new PersonDisplayNameChangedEvent(hansId, "Hans", "Henry"));
|
||||
blockRepository.flush();
|
||||
em.clear();
|
||||
|
||||
TranscriptionBlock reloaded = blockRepository.findById(saved.getId()).orElseThrow();
|
||||
assertThat(reloaded.getText())
|
||||
.isEqualTo("@Hans Müller schrieb. Auch @Henry hat geschrieben.");
|
||||
assertThat(reloaded.getMentionedPersons())
|
||||
.extracting(PersonMention::getPersonId, PersonMention::getDisplayName)
|
||||
.containsExactlyInAnyOrder(
|
||||
org.assertj.core.groups.Tuple.tuple(hansMüllerId, "Hans Müller"),
|
||||
org.assertj.core.groups.Tuple.tuple(hansId, "Henry"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void rewritesAllOccurrences_whenSameMentionAppearsTwiceInBlock() {
|
||||
UUID personId = savedPersonId("Auguste", "Raddatz");
|
||||
|
||||
Reference in New Issue
Block a user