Files
familienarchiv/frontend/src/lib/shared/utils/extractText.ts
2026-05-05 14:35:15 +02:00

39 lines
1.5 KiB
TypeScript

/**
* **Not a sanitizer.** This module extracts visible text from a (presumed
* already-sanitised) HTML string for excerpt rendering. It is safe ONLY
* because the Geschichte body is sanitised against the OWASP allow-list
* on the server before persistence, and via DOMPurify on render.
*
* Do not use these helpers to defend against XSS — `safeHtml()` in
* `./sanitize.ts` is the only sanitiser. Calling `extractText()` on
* untrusted input that has not been sanitised does not protect against
* `javascript:` URLs, event-handler attributes, or `<svg/onload>` payloads.
*/
/**
* Strip tags and return plain text. Uses DOMParser in the browser; on the
* server it falls back to a regex that drops angle-bracket sequences.
* The fallback is **not** a sanitiser — see module docstring.
*/
export function extractText(html: string | null | undefined): string {
if (!html) return '';
if (typeof DOMParser === 'function') {
const doc = new DOMParser().parseFromString(html, 'text/html');
return (doc.body.textContent ?? '').replace(/\s+/g, ' ').trim();
}
return html
.replace(/<[^>]*>/g, '')
.replace(/\s+/g, ' ')
.trim();
}
/**
* Strip tags then truncate to `max` chars on a word boundary, appending an
* ellipsis when truncated. Used for editorial story excerpts.
*/
export function plainExcerpt(html: string | null | undefined, max = 80): string {
const text = extractText(html);
if (text.length <= max) return text;
return text.slice(0, max).replace(/\s+\S*$/, '') + '…';
}