39 lines
1.5 KiB
TypeScript
39 lines
1.5 KiB
TypeScript
/**
|
|
* **Not a sanitizer.** This module extracts visible text from a (presumed
|
|
* already-sanitised) HTML string for excerpt rendering. It is safe ONLY
|
|
* because the Geschichte body is sanitised against the OWASP allow-list
|
|
* on the server before persistence, and via DOMPurify on render.
|
|
*
|
|
* Do not use these helpers to defend against XSS — `safeHtml()` in
|
|
* `./sanitize.ts` is the only sanitiser. Calling `extractText()` on
|
|
* untrusted input that has not been sanitised does not protect against
|
|
* `javascript:` URLs, event-handler attributes, or `<svg/onload>` payloads.
|
|
*/
|
|
|
|
/**
|
|
* Strip tags and return plain text. Uses DOMParser in the browser; on the
|
|
* server it falls back to a regex that drops angle-bracket sequences.
|
|
* The fallback is **not** a sanitiser — see module docstring.
|
|
*/
|
|
export function extractText(html: string | null | undefined): string {
|
|
if (!html) return '';
|
|
if (typeof DOMParser === 'function') {
|
|
const doc = new DOMParser().parseFromString(html, 'text/html');
|
|
return (doc.body.textContent ?? '').replace(/\s+/g, ' ').trim();
|
|
}
|
|
return html
|
|
.replace(/<[^>]*>/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
/**
|
|
* Strip tags then truncate to `max` chars on a word boundary, appending an
|
|
* ellipsis when truncated. Used for editorial story excerpts.
|
|
*/
|
|
export function plainExcerpt(html: string | null | undefined, max = 80): string {
|
|
const text = extractText(html);
|
|
if (text.length <= max) return text;
|
|
return text.slice(0, max).replace(/\s+\S*$/, '') + '…';
|
|
}
|