diff --git a/frontend/src/lib/components/GeschichtenCard.svelte b/frontend/src/lib/components/GeschichtenCard.svelte index 6b8a0d4c..a8e7a0b1 100644 --- a/frontend/src/lib/components/GeschichtenCard.svelte +++ b/frontend/src/lib/components/GeschichtenCard.svelte @@ -1,7 +1,7 @@ '); + expect(out).not.toContain(''); + }); + + it('drops markup', () => { + const out = extractText(''); + expect(out).not.toContain(' markup', () => { + const out = extractText('

short

', 80)).toBe('short'); + }); + + it('truncates at the boundary with an ellipsis', () => { + const html = '

' + 'a'.repeat(100) + '

'; + const out = plainExcerpt(html, 20); + expect(out.length).toBeLessThanOrEqual(21); + expect(out.endsWith('…')).toBe(true); + }); + + it('breaks at a word boundary when possible', () => { + const out = plainExcerpt('

The quick brown fox jumps over

', 18); + expect(out).toBe('The quick brown…'); + }); +}); diff --git a/frontend/src/lib/utils/extractText.ts b/frontend/src/lib/utils/extractText.ts new file mode 100644 index 00000000..331d9dea --- /dev/null +++ b/frontend/src/lib/utils/extractText.ts @@ -0,0 +1,38 @@ +/** + * **Not a sanitizer.** This module extracts visible text from a (presumed + * already-sanitised) HTML string for excerpt rendering. It is safe ONLY + * because the Geschichte body is sanitised against the OWASP allow-list + * on the server before persistence, and via DOMPurify on render. + * + * Do not use these helpers to defend against XSS — `safeHtml()` in + * `./sanitize.ts` is the only sanitiser. Calling `extractText()` on + * untrusted input that has not been sanitised does not protect against + * `javascript:` URLs, event-handler attributes, or `` payloads. + */ + +/** + * Strip tags and return plain text. Uses DOMParser in the browser; on the + * server it falls back to a regex that drops angle-bracket sequences. + * The fallback is **not** a sanitiser — see module docstring. + */ +export function extractText(html: string | null | undefined): string { + if (!html) return ''; + if (typeof DOMParser === 'function') { + const doc = new DOMParser().parseFromString(html, 'text/html'); + return (doc.body.textContent ?? '').replace(/\s+/g, ' ').trim(); + } + return html + .replace(/<[^>]*>/g, '') + .replace(/\s+/g, ' ') + .trim(); +} + +/** + * Strip tags then truncate to `max` chars on a word boundary, appending an + * ellipsis when truncated. Used for editorial story excerpts. + */ +export function plainExcerpt(html: string | null | undefined, max = 80): string { + const text = extractText(html); + if (text.length <= max) return text; + return text.slice(0, max).replace(/\s+\S*$/, '') + '…'; +} diff --git a/frontend/src/lib/utils/stripHtml.spec.ts b/frontend/src/lib/utils/stripHtml.spec.ts deleted file mode 100644 index 4dcc7308..00000000 --- a/frontend/src/lib/utils/stripHtml.spec.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { plainExcerpt, stripHtml } from './stripHtml'; - -describe('stripHtml', () => { - it('returns empty string for null/undefined/empty', () => { - expect(stripHtml(null)).toBe(''); - expect(stripHtml(undefined)).toBe(''); - expect(stripHtml('')).toBe(''); - }); - - it('strips tags and preserves visible text', () => { - expect(stripHtml('

Hello world

')).toBe('Hello world'); - }); - - it('strips nested HTML', () => { - expect(stripHtml('

A

B

')).toBe('AB'); - }); -}); - -describe('plainExcerpt', () => { - it('returns full text when under the limit', () => { - expect(plainExcerpt('

short

', 80)).toBe('short'); - }); - - it('truncates at the boundary with an ellipsis', () => { - const html = '

' + 'a'.repeat(100) + '

'; - const out = plainExcerpt(html, 20); - expect(out.length).toBeLessThanOrEqual(21); // 20 chars + ellipsis - expect(out.endsWith('…')).toBe(true); - }); - - it('breaks at a word boundary when possible', () => { - const out = plainExcerpt('

The quick brown fox jumps over

', 18); - expect(out).toBe('The quick brown…'); - }); -}); diff --git a/frontend/src/lib/utils/stripHtml.ts b/frontend/src/lib/utils/stripHtml.ts deleted file mode 100644 index 00ce62ff..00000000 --- a/frontend/src/lib/utils/stripHtml.ts +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Strip HTML tags from a string and return the plain text. - * Uses DOMParser in the browser, falls back to a regex strip on the server - * (where DOMParser is not available without isomorphic-dompurify's JSDOM). - */ -export function stripHtml(html: string | null | undefined): string { - if (!html) return ''; - if (typeof DOMParser === 'function') { - const doc = new DOMParser().parseFromString(html, 'text/html'); - return (doc.body.textContent ?? '').trim(); - } - return html.replace(/<[^>]*>/g, '').trim(); -} - -/** - * Strip HTML and truncate to a maximum length, appending an ellipsis when - * the source exceeds it. Used for editorial story excerpts. - */ -export function plainExcerpt(html: string | null | undefined, max = 80): string { - const text = stripHtml(html); - if (text.length <= max) return text; - return text.slice(0, max).replace(/\s+\S*$/, '') + '…'; -} diff --git a/frontend/src/routes/geschichten/+page.svelte b/frontend/src/routes/geschichten/+page.svelte index d85aeab3..feb1f876 100644 --- a/frontend/src/routes/geschichten/+page.svelte +++ b/frontend/src/routes/geschichten/+page.svelte @@ -1,7 +1,7 @@