Files
familienarchiv/frontend/src/lib/shared/utils/extractText.spec.ts
2026-06-08 22:57:28 +02:00

80 lines
3.1 KiB
TypeScript

import { describe, expect, it } from 'vitest';
import { extractText, plainExcerpt } from './extractText';
describe('extractText', () => {
it('returns empty string for null/undefined/empty', () => {
expect(extractText(null)).toBe('');
expect(extractText(undefined)).toBe('');
expect(extractText('')).toBe('');
});
it('strips tags and preserves visible text', () => {
expect(extractText('<p>Hello <strong>world</strong></p>')).toBe('Hello world');
});
it('collapses whitespace within and between blocks', () => {
expect(extractText('<p>One</p><p>Two</p>')).toBe('OneTwo');
expect(extractText('<p>foo bar</p>')).toBe('foo bar');
});
// XSS-shaped inputs: extractText must NOT execute, render, or expose the
// payload as HTML. It is only required to return *some* string. The fact
// that it exists is documented as a non-sanitiser; these tests prevent
// silent regressions where the function might somehow leak a tag.
describe('XSS-shaped input — never re-emits markup, even though this is not a sanitiser', () => {
it('drops <script> and surfaces only its text content', () => {
const out = extractText('<p>ok</p><script>alert(1)</script>');
expect(out).not.toContain('<script>');
expect(out).not.toContain('</script>');
});
it('drops <svg/onload> markup', () => {
const out = extractText('<svg/onload=alert(1)>');
expect(out).not.toContain('<svg');
expect(out).not.toContain('onload');
});
it('drops <iframe srcdoc=…> markup', () => {
const out = extractText('<iframe srcdoc="<script>alert(1)</script>">');
expect(out).not.toContain('<iframe');
expect(out).not.toContain('srcdoc');
});
it('drops <a href="javascript:…"> tag (text content may remain)', () => {
const out = extractText('<a href="javascript:alert(1)">click</a>');
expect(out).not.toContain('<a ');
expect(out).not.toContain('javascript:');
});
});
});
// SSR regex-fallback XSS gate — must stay in the Node (.test.ts / .spec.ts) project.
// The browser project's DOMParser would silently take the safe branch → false green.
// This test fires the regex fallback specifically (Node has no DOMParser).
describe('plainExcerpt — SSR regex-fallback XSS gate (Node tier)', () => {
it('does not emit onerror= in output when given an <img onerror> payload (security regression)', () => {
// plainExcerpt calls extractText which regex-strips tags in Node (no DOMParser).
// SvelteKit SSR auto-escapes the result, so onerror= in output is the first-paint risk.
const out = plainExcerpt('<img src=x onerror="window.__xss=1">');
expect(out).not.toContain('onerror=');
});
});
describe('plainExcerpt', () => {
it('returns full text when under the limit', () => {
expect(plainExcerpt('<p>short</p>', 80)).toBe('short');
});
it('truncates at the boundary with an ellipsis', () => {
const html = '<p>' + 'a'.repeat(100) + '</p>';
const out = plainExcerpt(html, 20);
expect(out.length).toBeLessThanOrEqual(21);
expect(out.endsWith('…')).toBe(true);
});
it('breaks at a word boundary when possible', () => {
const out = plainExcerpt('<p>The quick brown fox jumps over</p>', 18);
expect(out).toBe('The quick brown…');
});
});