feat(person-mention): renderTranscriptionBody for safe read-mode HTML
Replaces every @DisplayName in a transcription block's text with an anchor
link to /persons/{personId}, sourced from the mentionedPersons sidecar.
The @ prefix is stripped from the rendered link text per spec — it is an
editor affordance, not part of the historical text.
Stored-XSS hardening: HTML-escapes block text, displayName, and personId
before injection. Word-boundary lookahead avoids prefix collisions
(@Hans vs @HansMüller). Longest-displayName-first + first-sidecar-wins
make rendering deterministic for the OQ-1 collision case (#5339).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,12 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { detectMention, escapeHtml, extractContent, renderBody } from './mention';
|
||||
import type { MentionDTO } from '$lib/types';
|
||||
import {
|
||||
detectMention,
|
||||
escapeHtml,
|
||||
extractContent,
|
||||
renderBody,
|
||||
renderTranscriptionBody
|
||||
} from './mention';
|
||||
import type { MentionDTO, PersonMention } from '$lib/types';
|
||||
|
||||
// ─── escapeHtml ───────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -161,3 +167,144 @@ describe('renderBody', () => {
|
||||
expect(result).not.toContain('\n');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── renderTranscriptionBody ──────────────────────────────────────────────────
|
||||
|
||||
describe('renderTranscriptionBody', () => {
|
||||
const auguste: PersonMention = {
|
||||
personId: '550e8400-e29b-41d4-a716-446655440000',
|
||||
displayName: 'Auguste Raddatz'
|
||||
};
|
||||
const hans: PersonMention = {
|
||||
personId: '550e8400-e29b-41d4-a716-446655440001',
|
||||
displayName: 'Hans'
|
||||
};
|
||||
|
||||
it('returns empty string for empty input', () => {
|
||||
expect(renderTranscriptionBody('', [])).toBe('');
|
||||
});
|
||||
|
||||
it('returns escaped plain text when no mentions', () => {
|
||||
expect(renderTranscriptionBody('Hello world', [])).toBe('Hello world');
|
||||
});
|
||||
|
||||
it('escapes < and > in plain block text', () => {
|
||||
const result = renderTranscriptionBody('<script>alert(1)</script>', []);
|
||||
expect(result).toBe('<script>alert(1)</script>');
|
||||
expect(result).not.toContain('<script>');
|
||||
});
|
||||
|
||||
it('escapes & in plain block text', () => {
|
||||
expect(renderTranscriptionBody('AT&T', [])).toBe('AT&T');
|
||||
});
|
||||
|
||||
it('replaces @DisplayName with anchor link to /persons/{personId}', () => {
|
||||
const result = renderTranscriptionBody('Brief an @Auguste Raddatz vom Mai', [auguste]);
|
||||
expect(result).toContain(`<a href="/persons/${auguste.personId}"`);
|
||||
expect(result).toContain('class="person-mention"');
|
||||
expect(result).toContain(`data-person-id="${auguste.personId}"`);
|
||||
expect(result).toContain('>Auguste Raddatz</a>');
|
||||
});
|
||||
|
||||
it('strips the @ prefix from rendered link text (read mode)', () => {
|
||||
const result = renderTranscriptionBody('Hallo @Auguste Raddatz!', [auguste]);
|
||||
// The anchor body is the bare display name — no leading @
|
||||
expect(result).not.toMatch(/>@Auguste Raddatz</);
|
||||
expect(result).toMatch(/>Auguste Raddatz</);
|
||||
});
|
||||
|
||||
it('removes the trigger @ from the surrounding text (no orphan @ before the link)', () => {
|
||||
const result = renderTranscriptionBody('Brief an @Auguste Raddatz vom Mai', [auguste]);
|
||||
// No bare @ remains where the mention was
|
||||
expect(result).not.toMatch(/@<a/);
|
||||
});
|
||||
|
||||
it('replaces all occurrences of the same mention', () => {
|
||||
const result = renderTranscriptionBody('@Auguste Raddatz und @Auguste Raddatz', [auguste]);
|
||||
const anchorCount = (result.match(/<a /g) ?? []).length;
|
||||
expect(anchorCount).toBe(2);
|
||||
});
|
||||
|
||||
it('does not replace plain-text occurrences without the @ trigger', () => {
|
||||
const result = renderTranscriptionBody('Auguste Raddatz war hier', [auguste]);
|
||||
expect(result).not.toContain('<a ');
|
||||
expect(result).toBe('Auguste Raddatz war hier');
|
||||
});
|
||||
|
||||
it('processes longer displayNames first to avoid prefix shadowing', () => {
|
||||
const augusteShort: PersonMention = { personId: 'p-short', displayName: 'Auguste' };
|
||||
const augusteLong: PersonMention = {
|
||||
personId: 'p-long',
|
||||
displayName: 'Auguste Raddatz'
|
||||
};
|
||||
// Sidecar order is short-first; longer match must still win for the long text
|
||||
const result = renderTranscriptionBody('@Auguste Raddatz schreibt @Auguste', [
|
||||
augusteShort,
|
||||
augusteLong
|
||||
]);
|
||||
expect(result).toContain('href="/persons/p-long"');
|
||||
expect(result).toContain('href="/persons/p-short"');
|
||||
// The "Raddatz" suffix must not leak inside the short-name anchor
|
||||
expect(result).not.toMatch(/>Auguste<\/a> Raddatz/);
|
||||
});
|
||||
|
||||
it('does not match @ followed by extra word characters (word boundary)', () => {
|
||||
// Sidecar contains "Hans"; text contains "@HansMüller" — no link.
|
||||
const result = renderTranscriptionBody('Brief an @HansMüller', [hans]);
|
||||
expect(result).not.toContain('<a ');
|
||||
expect(result).toContain('@HansM');
|
||||
});
|
||||
|
||||
it('first-sidecar-wins when two entries share the same displayName', () => {
|
||||
// Two persons named "Hans" — first sidecar entry wins for all occurrences.
|
||||
const hansFirst: PersonMention = { personId: 'p-first', displayName: 'Hans' };
|
||||
const hansSecond: PersonMention = { personId: 'p-second', displayName: 'Hans' };
|
||||
const result = renderTranscriptionBody('@Hans und @Hans', [hansFirst, hansSecond]);
|
||||
expect(result).toContain('href="/persons/p-first"');
|
||||
expect(result).not.toContain('href="/persons/p-second"');
|
||||
const anchorCount = (result.match(/<a /g) ?? []).length;
|
||||
expect(anchorCount).toBe(2);
|
||||
});
|
||||
|
||||
it('escapes HTML in displayName to prevent stored XSS', () => {
|
||||
const xss: PersonMention = {
|
||||
personId: 'p-xss',
|
||||
displayName: '<script>alert(1)</script>'
|
||||
};
|
||||
const result = renderTranscriptionBody('Hi @<script>alert(1)</script> there', [xss]);
|
||||
expect(result).not.toContain('<script>');
|
||||
expect(result).toContain('<script>');
|
||||
});
|
||||
|
||||
it('escapes <img onerror=...> payloads in surrounding block text', () => {
|
||||
const result = renderTranscriptionBody('<img src=x onerror=alert(1)> hello', []);
|
||||
expect(result).not.toContain('<img');
|
||||
expect(result).toContain('<img');
|
||||
});
|
||||
|
||||
it('does not double-encode HTML-entity-already-encoded payloads', () => {
|
||||
// `&lt;script&gt;` is already-escaped HTML in the source text.
|
||||
// renderTranscriptionBody must escape the literal & once → `&amp;lt;...`
|
||||
// — never silently decode pre-escaped entities.
|
||||
const result = renderTranscriptionBody('text &lt;script&gt;', []);
|
||||
expect(result).toBe('text &amp;lt;script&amp;gt;');
|
||||
});
|
||||
|
||||
it('escapes quotes in displayName so they cannot break the href attribute', () => {
|
||||
const tricky: PersonMention = {
|
||||
personId: 'p-quote',
|
||||
displayName: 'O"Brien'
|
||||
};
|
||||
const result = renderTranscriptionBody('@O"Brien', [tricky]);
|
||||
// The raw `"` from the displayName must never appear inside the rendered link
|
||||
// — it would terminate the attribute value early and let an attacker craft
|
||||
// arbitrary attributes on the anchor. It must arrive at the browser as ".
|
||||
expect(result).toMatch(/>O"Brien<\/a>/);
|
||||
expect(result).not.toMatch(/>O"Brien<\/a>/);
|
||||
});
|
||||
|
||||
it('renders nothing when mentionedPersons is undefined-empty and no @ triggers', () => {
|
||||
const result = renderTranscriptionBody('Plain old transcription text.', []);
|
||||
expect(result).toBe('Plain old transcription text.');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { MentionDTO } from '$lib/types';
|
||||
import type { MentionDTO, PersonMention } from '$lib/types';
|
||||
|
||||
/**
|
||||
* Given the current textarea value and cursor position, returns the
|
||||
@@ -62,6 +62,50 @@ export function escapeHtml(str: string): string {
|
||||
.replaceAll("'", ''');
|
||||
}
|
||||
|
||||
function escapeRegExp(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders a transcription block's text segment as safe HTML for read mode.
|
||||
*
|
||||
* Rules:
|
||||
* 1. The full text is HTML-escaped first (defense against stored XSS).
|
||||
* 2. For each entry in `mentionedPersons`, every `@DisplayName` occurrence is
|
||||
* replaced with `<a href="/persons/{personId}" class="person-mention" …>DisplayName</a>`.
|
||||
* The `@` prefix is stripped from the rendered link text — it is an editor
|
||||
* affordance, not part of the historical text (issue #362).
|
||||
* 3. Longest displayNames are processed first so a short prefix in the sidecar
|
||||
* cannot shadow a longer match in the text (e.g. `@Auguste` vs `@Auguste Raddatz`).
|
||||
* 4. Word-boundary lookahead prevents `@Hans` from matching `@HansMüller`.
|
||||
* 5. First-sidecar-wins for entries that share a displayName (deterministic
|
||||
* rule per Felix decision OQ-1, comment #5339).
|
||||
*/
|
||||
export function renderTranscriptionBody(text: string, mentionedPersons: PersonMention[]): string {
|
||||
if (!text) return '';
|
||||
let escaped = escapeHtml(text);
|
||||
|
||||
const seen = new Set<string>();
|
||||
const unique: PersonMention[] = [];
|
||||
for (const mention of mentionedPersons) {
|
||||
if (seen.has(mention.displayName)) continue;
|
||||
seen.add(mention.displayName);
|
||||
unique.push(mention);
|
||||
}
|
||||
|
||||
const sorted = [...unique].sort((a, b) => b.displayName.length - a.displayName.length);
|
||||
|
||||
for (const mention of sorted) {
|
||||
const escapedDisplayName = escapeHtml(mention.displayName);
|
||||
const escapedPersonId = escapeHtml(mention.personId);
|
||||
const pattern = new RegExp(`@${escapeRegExp(escapedDisplayName)}(?![\\p{L}\\p{N}])`, 'gu');
|
||||
const link = `<a href="/persons/${escapedPersonId}" class="person-mention" data-person-id="${escapedPersonId}">${escapedDisplayName}</a>`;
|
||||
escaped = escaped.replace(pattern, link);
|
||||
}
|
||||
|
||||
return escaped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders a comment body as safe HTML:
|
||||
* 1. Escapes all HTML-special characters in the raw content
|
||||
|
||||
Reference in New Issue
Block a user