feat(person-mention): renderTranscriptionBody for safe read-mode HTML

Replaces every @DisplayName in a transcription block's text with an anchor
link to /persons/{personId}, sourced from the mentionedPersons sidecar.
The @ prefix is stripped from the rendered link text per spec — it is an
editor affordance, not part of the historical text.

Stored-XSS hardening: HTML-escapes block text, displayName, and personId
before injection. Word-boundary lookahead avoids prefix collisions
(@Hans vs @HansMüller). Longest-displayName-first + first-sidecar-wins
make rendering deterministic for the OQ-1 collision case (#5339).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-29 08:12:52 +02:00
parent b4b46a0a79
commit eb6e21f032
2 changed files with 194 additions and 3 deletions

View File

@@ -1,6 +1,12 @@
import { describe, it, expect } from 'vitest';
import { detectMention, escapeHtml, extractContent, renderBody } from './mention';
import type { MentionDTO } from '$lib/types';
import {
detectMention,
escapeHtml,
extractContent,
renderBody,
renderTranscriptionBody
} from './mention';
import type { MentionDTO, PersonMention } from '$lib/types';
// ─── escapeHtml ───────────────────────────────────────────────────────────────
@@ -161,3 +167,144 @@ describe('renderBody', () => {
expect(result).not.toContain('\n');
});
});
// ─── renderTranscriptionBody ──────────────────────────────────────────────────
describe('renderTranscriptionBody', () => {
const auguste: PersonMention = {
personId: '550e8400-e29b-41d4-a716-446655440000',
displayName: 'Auguste Raddatz'
};
const hans: PersonMention = {
personId: '550e8400-e29b-41d4-a716-446655440001',
displayName: 'Hans'
};
it('returns empty string for empty input', () => {
expect(renderTranscriptionBody('', [])).toBe('');
});
it('returns escaped plain text when no mentions', () => {
expect(renderTranscriptionBody('Hello world', [])).toBe('Hello world');
});
it('escapes < and > in plain block text', () => {
const result = renderTranscriptionBody('<script>alert(1)</script>', []);
expect(result).toBe('&lt;script&gt;alert(1)&lt;/script&gt;');
expect(result).not.toContain('<script>');
});
it('escapes & in plain block text', () => {
expect(renderTranscriptionBody('AT&T', [])).toBe('AT&amp;T');
});
it('replaces @DisplayName with anchor link to /persons/{personId}', () => {
const result = renderTranscriptionBody('Brief an @Auguste Raddatz vom Mai', [auguste]);
expect(result).toContain(`<a href="/persons/${auguste.personId}"`);
expect(result).toContain('class="person-mention"');
expect(result).toContain(`data-person-id="${auguste.personId}"`);
expect(result).toContain('>Auguste Raddatz</a>');
});
it('strips the @ prefix from rendered link text (read mode)', () => {
const result = renderTranscriptionBody('Hallo @Auguste Raddatz!', [auguste]);
// The anchor body is the bare display name — no leading @
expect(result).not.toMatch(/>@Auguste Raddatz</);
expect(result).toMatch(/>Auguste Raddatz</);
});
it('removes the trigger @ from the surrounding text (no orphan @ before the link)', () => {
const result = renderTranscriptionBody('Brief an @Auguste Raddatz vom Mai', [auguste]);
// No bare @ remains where the mention was
expect(result).not.toMatch(/@<a/);
});
it('replaces all occurrences of the same mention', () => {
const result = renderTranscriptionBody('@Auguste Raddatz und @Auguste Raddatz', [auguste]);
const anchorCount = (result.match(/<a /g) ?? []).length;
expect(anchorCount).toBe(2);
});
it('does not replace plain-text occurrences without the @ trigger', () => {
const result = renderTranscriptionBody('Auguste Raddatz war hier', [auguste]);
expect(result).not.toContain('<a ');
expect(result).toBe('Auguste Raddatz war hier');
});
it('processes longer displayNames first to avoid prefix shadowing', () => {
const augusteShort: PersonMention = { personId: 'p-short', displayName: 'Auguste' };
const augusteLong: PersonMention = {
personId: 'p-long',
displayName: 'Auguste Raddatz'
};
// Sidecar order is short-first; longer match must still win for the long text
const result = renderTranscriptionBody('@Auguste Raddatz schreibt @Auguste', [
augusteShort,
augusteLong
]);
expect(result).toContain('href="/persons/p-long"');
expect(result).toContain('href="/persons/p-short"');
// The "Raddatz" suffix must not leak inside the short-name anchor
expect(result).not.toMatch(/>Auguste<\/a> Raddatz/);
});
it('does not match @ followed by extra word characters (word boundary)', () => {
// Sidecar contains "Hans"; text contains "@HansMüller" — no link.
const result = renderTranscriptionBody('Brief an @HansMüller', [hans]);
expect(result).not.toContain('<a ');
expect(result).toContain('@HansM');
});
it('first-sidecar-wins when two entries share the same displayName', () => {
// Two persons named "Hans" — first sidecar entry wins for all occurrences.
const hansFirst: PersonMention = { personId: 'p-first', displayName: 'Hans' };
const hansSecond: PersonMention = { personId: 'p-second', displayName: 'Hans' };
const result = renderTranscriptionBody('@Hans und @Hans', [hansFirst, hansSecond]);
expect(result).toContain('href="/persons/p-first"');
expect(result).not.toContain('href="/persons/p-second"');
const anchorCount = (result.match(/<a /g) ?? []).length;
expect(anchorCount).toBe(2);
});
it('escapes HTML in displayName to prevent stored XSS', () => {
const xss: PersonMention = {
personId: 'p-xss',
displayName: '<script>alert(1)</script>'
};
const result = renderTranscriptionBody('Hi @<script>alert(1)</script> there', [xss]);
expect(result).not.toContain('<script>');
expect(result).toContain('&lt;script&gt;');
});
it('escapes <img onerror=...> payloads in surrounding block text', () => {
const result = renderTranscriptionBody('<img src=x onerror=alert(1)> hello', []);
expect(result).not.toContain('<img');
expect(result).toContain('&lt;img');
});
it('does not double-encode HTML-entity-already-encoded payloads', () => {
// `&amp;lt;script&amp;gt;` is already-escaped HTML in the source text.
// renderTranscriptionBody must escape the literal & once → `&amp;amp;lt;...`
// — never silently decode pre-escaped entities.
const result = renderTranscriptionBody('text &amp;lt;script&amp;gt;', []);
expect(result).toBe('text &amp;amp;lt;script&amp;amp;gt;');
});
it('escapes quotes in displayName so they cannot break the href attribute', () => {
const tricky: PersonMention = {
personId: 'p-quote',
displayName: 'O"Brien'
};
const result = renderTranscriptionBody('@O"Brien', [tricky]);
// The raw `"` from the displayName must never appear inside the rendered link
// — it would terminate the attribute value early and let an attacker craft
// arbitrary attributes on the anchor. It must arrive at the browser as &quot;.
expect(result).toMatch(/>O&quot;Brien<\/a>/);
expect(result).not.toMatch(/>O"Brien<\/a>/);
});
it('renders nothing when mentionedPersons is undefined-empty and no @ triggers', () => {
const result = renderTranscriptionBody('Plain old transcription text.', []);
expect(result).toBe('Plain old transcription text.');
});
});

View File

@@ -1,4 +1,4 @@
import type { MentionDTO } from '$lib/types';
import type { MentionDTO, PersonMention } from '$lib/types';
/**
* Given the current textarea value and cursor position, returns the
@@ -62,6 +62,50 @@ export function escapeHtml(str: string): string {
.replaceAll("'", '&#39;');
}
function escapeRegExp(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Renders a transcription block's text segment as safe HTML for read mode.
*
* Rules:
* 1. The full text is HTML-escaped first (defense against stored XSS).
* 2. For each entry in `mentionedPersons`, every `@DisplayName` occurrence is
* replaced with `<a href="/persons/{personId}" class="person-mention" …>DisplayName</a>`.
* The `@` prefix is stripped from the rendered link text — it is an editor
* affordance, not part of the historical text (issue #362).
* 3. Longest displayNames are processed first so a short prefix in the sidecar
* cannot shadow a longer match in the text (e.g. `@Auguste` vs `@Auguste Raddatz`).
* 4. Word-boundary lookahead prevents `@Hans` from matching `@HansMüller`.
* 5. First-sidecar-wins for entries that share a displayName (deterministic
* rule per Felix decision OQ-1, comment #5339).
*/
export function renderTranscriptionBody(text: string, mentionedPersons: PersonMention[]): string {
if (!text) return '';
let escaped = escapeHtml(text);
const seen = new Set<string>();
const unique: PersonMention[] = [];
for (const mention of mentionedPersons) {
if (seen.has(mention.displayName)) continue;
seen.add(mention.displayName);
unique.push(mention);
}
const sorted = [...unique].sort((a, b) => b.displayName.length - a.displayName.length);
for (const mention of sorted) {
const escapedDisplayName = escapeHtml(mention.displayName);
const escapedPersonId = escapeHtml(mention.personId);
const pattern = new RegExp(`@${escapeRegExp(escapedDisplayName)}(?![\\p{L}\\p{N}])`, 'gu');
const link = `<a href="/persons/${escapedPersonId}" class="person-mention" data-person-id="${escapedPersonId}">${escapedDisplayName}</a>`;
escaped = escaped.replace(pattern, link);
}
return escaped;
}
/**
* Renders a comment body as safe HTML:
* 1. Escapes all HTML-special characters in the raw content