/** * **Not a sanitizer.** This module extracts visible text from a (presumed * already-sanitised) HTML string for excerpt rendering. It is safe ONLY * because the Geschichte body is sanitised against the OWASP allow-list * on the server before persistence, and via DOMPurify on render. * * Do not use these helpers to defend against XSS — `safeHtml()` in * `./sanitize.ts` is the only sanitiser. Calling `extractText()` on * untrusted input that has not been sanitised does not protect against * `javascript:` URLs, event-handler attributes, or `` payloads. */ /** * Strip tags and return plain text. Uses DOMParser in the browser; on the * server it falls back to a regex that drops angle-bracket sequences. * The fallback is **not** a sanitiser — see module docstring. */ export function extractText(html: string | null | undefined): string { if (!html) return ''; if (typeof DOMParser === 'function') { const doc = new DOMParser().parseFromString(html, 'text/html'); return (doc.body.textContent ?? '').replace(/\s+/g, ' ').trim(); } return html .replace(/<[^>]*>/g, '') .replace(/\s+/g, ' ') .trim(); } /** * Strip tags then truncate to `max` chars on a word boundary, appending an * ellipsis when truncated. Used for editorial story excerpts. */ export function plainExcerpt(html: string | null | undefined, max = 80): string { const text = extractText(html); if (text.length <= max) return text; return text.slice(0, max).replace(/\s+\S*$/, '') + '…'; }