fix(document): configure pdf.js wasmUrl so CCITT/JBIG2 scans stop rendering blank (#708) #713

Merged
marcel merged 15 commits from feat/issue-708-pdfjs-wasmurl into main 2026-06-01 21:25:57 +02:00
5 changed files with 30 additions and 52 deletions
Showing only changes of commit 4c57a2262f - Show all commits

View File

@@ -6,6 +6,7 @@
"scripts": {
"dev": "vite dev",
"build": "vite build",
"postbuild": "node scripts/assert-pdfjs-wasm.mjs",
"preview": "vite preview",
"prepare": "svelte-kit sync || true && git -C .. config core.hooksPath .husky 2>/dev/null || true",
"postinstall": "patch-package",

View File

@@ -0,0 +1,29 @@
// Build-time guard for issue #708. The pdf.js wasm image decoders are copied
// into build/client/pdfjs-wasm/ by vite-plugin-static-copy. If a future
// pdfjs-dist bump moves or renames the wasm, the glob could silently copy
// nothing — and CCITT/JBIG2/JPEG2000 scans would render blank in production
// again with no test catching it (the bug is invisible to unit tests). Fail
// the build loudly instead. Runs after `npm run build` (incl. the Docker
// build stage) via the `postbuild` npm script.
import { existsSync, statSync } from 'node:fs';
import { join } from 'node:path';
const dir = join(process.cwd(), 'build', 'client', 'pdfjs-wasm');
// jbig2.wasm decodes JBIG2 + CCITTFax; openjpeg.wasm decodes JPEG2000.
const required = ['jbig2.wasm', 'openjpeg.wasm'];
const missing = required.filter((f) => {
const p = join(dir, f);
return !existsSync(p) || statSync(p).size === 0;
});
if (missing.length > 0) {
console.error(
`\n[assert-pdfjs-wasm] MISSING from build output: ${missing.join(', ')}\n` +
`Expected non-empty files in ${dir}.\n` +
`The pdf.js wasm decoders did not ship — scanned PDFs would render blank.\n` +
`Check the vite-plugin-static-copy target in vite.config.ts and that\n` +
`node_modules/pdfjs-dist/wasm/ still contains these files. See issue #708.\n`
);
process.exit(1);
}

View File

@@ -1,52 +0,0 @@
import { describe, it, expect, afterEach, vi } from 'vitest';
import { cleanup, render } from 'vitest-browser-svelte';
import PdfViewer from './PdfViewer.svelte';
import ccittUrl from './fixtures/ccitt-g4.pdf?url';
import jpegUrl from './fixtures/jpeg-dct.pdf?url';
// Behavioral, real-render coverage of the wasm decode path. Unlike the rest of
// the viewer tests, these use the REAL pdf.js loader (no libLoader prop) so the
// page is actually decoded and painted, and the wasm is fetched from
// /pdfjs-wasm/ exactly as in production. CI runs this in a real Chromium.
// See issue #708.
afterEach(cleanup);
// A blank page is a uniform white canvas. A rendered page has dark glyph pixels.
function countNonBackgroundPixels(canvas: HTMLCanvasElement): number {
const ctx = canvas.getContext('2d');
if (!ctx || canvas.width === 0 || canvas.height === 0) return 0;
const { data } = ctx.getImageData(0, 0, canvas.width, canvas.height);
let count = 0;
for (let i = 0; i < data.length; i += 4) {
const r = data[i];
const g = data[i + 1];
const b = data[i + 2];
const a = data[i + 3];
if (a > 0 && (r < 250 || g < 250 || b < 250)) count++;
}
return count;
}
async function expectNonBlankRender(url: string): Promise<void> {
render(PdfViewer, { url, documentId: 'fixture' });
await vi.waitFor(
() => {
const canvas = document.querySelector('canvas');
expect(canvas).not.toBeNull();
expect((canvas as HTMLCanvasElement).width).toBeGreaterThan(0);
expect(countNonBackgroundPixels(canvas as HTMLCanvasElement)).toBeGreaterThan(50);
},
{ timeout: 20000, interval: 250 }
);
}
describe('PdfViewer — real codec fixtures (wasm decode path)', () => {
it('renders a CCITT (G4 fax) scan as a non-blank page — same jbig2.wasm path JBIG2 uses', async () => {
await expectNonBlankRender(ccittUrl);
});
it('renders a DCTDecode (JPEG) PDF as a non-blank page — no regression', async () => {
await expectNonBlankRender(jpegUrl);
});
});