Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled

Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
Rocky
2026-05-19 21:18:17 +02:00
commit 834c6db075
797 changed files with 267839 additions and 0 deletions

View File

@@ -0,0 +1,135 @@
/**
* browseClient unit tests — binary resolution and error mapping.
*
* These are pure unit tests; they do NOT require a running browse daemon.
* Cross-platform: assertions that pin POSIX behavior early-return on win32
* and vice versa, so both lanes only exercise their own branch.
*/
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import { BrowseClientError } from "../src/types";
import { resolveBrowseBin, findExecutable } from "../src/browseClient";
// A real, always-present executable for the test platform — `cmd.exe` on
// Windows (System32 is on every install) and `/bin/sh` on POSIX. Lets the
// "honors override when it points at a real executable" test work in both
// lanes without writing a temp script.
const REAL_EXE: string =
process.platform === "win32"
? path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd.exe")
: "/bin/sh";
function withEnv<T>(overrides: Record<string, string | undefined>, fn: () => T): T {
const saved: Record<string, string | undefined> = {};
for (const k of Object.keys(overrides)) saved[k] = process.env[k];
for (const [k, v] of Object.entries(overrides)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
try {
return fn();
} finally {
for (const [k, v] of Object.entries(saved)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
}
}
describe("findExecutable", () => {
test("returns the bare path on POSIX when it's executable", () => {
if (process.platform === "win32") return;
const found = findExecutable("/bin/sh");
expect(found).toBe("/bin/sh");
});
test("on win32, probes .exe / .cmd / .bat after the bare-path miss", () => {
if (process.platform !== "win32") return;
// cmd.exe lives at System32\cmd.exe — probe with the bare base.
const base = path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd");
const found = findExecutable(base);
expect(found).toBe(base + ".exe");
});
test("returns null when no extension matches", () => {
const found = findExecutable("/nonexistent/path/to/nothing");
expect(found).toBeNull();
});
});
describe("resolveBrowseBin", () => {
test("throws BrowseClientError with setup hint when nothing is found", () => {
// Point overrides at non-existent paths and clear PATH so Bun.which finds
// nothing. Sibling/global probes go through findExecutable on real paths,
// but the test asserts on the error shape rather than depending on whether
// a real browse install exists on the box.
let thrown: unknown = null;
try {
withEnv(
{
GSTACK_BROWSE_BIN: "/nonexistent/gstack-browse-bin",
BROWSE_BIN: "/nonexistent/browse-bin",
PATH: "",
Path: "",
},
() => resolveBrowseBin(),
);
} catch (err) {
thrown = err;
}
if (thrown) {
expect(thrown).toBeInstanceOf(BrowseClientError);
expect((thrown as BrowseClientError).message).toContain("browse binary not found");
expect((thrown as BrowseClientError).message).toContain("./setup");
expect((thrown as BrowseClientError).message).toContain("GSTACK_BROWSE_BIN");
// Back-compat alias still surfaces in the diagnostic.
expect((thrown as BrowseClientError).message).toContain("BROWSE_BIN");
}
// If the test box has a real browse install on disk, sibling/global may
// resolve and the helper won't throw — that's fine; the assertion is
// gated on whether it threw at all.
});
test("honors GSTACK_BROWSE_BIN when it points at a real executable", () => {
const resolved = withEnv({ GSTACK_BROWSE_BIN: REAL_EXE }, () => resolveBrowseBin());
expect(resolved).toBe(REAL_EXE);
});
test("honors BROWSE_BIN as a back-compat alias", () => {
const resolved = withEnv(
{ GSTACK_BROWSE_BIN: undefined, BROWSE_BIN: REAL_EXE },
() => resolveBrowseBin(),
);
expect(resolved).toBe(REAL_EXE);
});
test("GSTACK_BROWSE_BIN takes precedence over BROWSE_BIN", () => {
const resolved = withEnv(
{ GSTACK_BROWSE_BIN: REAL_EXE, BROWSE_BIN: "/nonexistent/legacy" },
() => resolveBrowseBin(),
);
expect(resolved).toBe(REAL_EXE);
});
test("strips wrapping double quotes from override values", () => {
const resolved = withEnv({ GSTACK_BROWSE_BIN: `"${REAL_EXE}"` }, () => resolveBrowseBin());
expect(resolved).toBe(REAL_EXE);
});
});
describe("BrowseClientError", () => {
test("captures exit code, command, and stderr", () => {
const err = new BrowseClientError(127, "pdf", "Chromium not found");
expect(err.exitCode).toBe(127);
expect(err.command).toBe("pdf");
expect(err.stderr).toBe("Chromium not found");
expect(err.message).toContain("browse pdf exited 127");
expect(err.message).toContain("Chromium not found");
expect(err.name).toBe("BrowseClientError");
});
});

View File

@@ -0,0 +1,76 @@
/**
* Combined-features copy-paste gate — the P0 CI gate.
*
* This test runs the compiled `make-pdf/dist/pdf` binary against a fixture
* that has every v1 typography feature on (smartypants, hyphens, chapter
* breaks, bold/italic, inline code, blockquote, lists, headings). It then
* pipes the output through pdftotext and asserts the extracted text
* matches the handwritten expected.txt.
*
* Codex round 2 told us this (not per-feature gates) is the real gate a
* user actually cares about — features interact, and the combined
* extraction is what predicts production quality.
*
* Gating: only runs when the compiled binary + browse + pdftotext are all
* available. Skipped cleanly otherwise (local dev without full install).
*/
import { describe, expect, test } from "bun:test";
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import { copyPasteGate, resolvePdftotext } from "../../src/pdftotext";
const FIXTURE = path.resolve(__dirname, "../fixtures/combined-gate.md");
const EXPECTED = path.resolve(__dirname, "../fixtures/combined-gate.expected.txt");
const ROOT = path.resolve(__dirname, "../../..");
const PDF_BIN = path.join(ROOT, "make-pdf/dist/pdf");
const BROWSE_BIN = path.join(ROOT, "browse/dist/browse");
function prerequisitesAvailable(): { ok: true } | { ok: false; reason: string } {
if (!fs.existsSync(PDF_BIN)) return { ok: false, reason: `make-pdf binary missing (${PDF_BIN}). Run bun run build.` };
if (!fs.existsSync(BROWSE_BIN)) return { ok: false, reason: `browse binary missing (${BROWSE_BIN}).` };
if (!fs.existsSync(FIXTURE)) return { ok: false, reason: `fixture missing (${FIXTURE}).` };
if (!fs.existsSync(EXPECTED)) return { ok: false, reason: `expected.txt missing (${EXPECTED}).` };
try { resolvePdftotext(); } catch (err: any) { return { ok: false, reason: err.message }; }
return { ok: true };
}
describe("combined-features copy-paste gate", () => {
const avail = prerequisitesAvailable();
test.skipIf(!avail.ok)("fixture PDF extracts cleanly through pdftotext", () => {
if (!avail.ok) return; // satisfies the type checker
// Use /tmp directly (browse's validateOutputPath allows /private/tmp,
// which macOS resolves /tmp to). os.tmpdir() returns /var/folders/...
// which is outside the safe-dirs allowlist.
const outputPdf = `/tmp/make-pdf-combined-gate-${process.pid}.pdf`;
try {
execFileSync(PDF_BIN, ["generate", FIXTURE, outputPdf, "--quiet"], {
encoding: "utf8",
env: { ...process.env, BROWSE_BIN },
stdio: ["ignore", "pipe", "pipe"],
});
expect(fs.existsSync(outputPdf)).toBe(true);
const expected = fs.readFileSync(EXPECTED, "utf8");
const result = copyPasteGate(outputPdf, expected);
if (!result.ok) {
// Attach the extracted text so CI logs make the failure diagnosable
process.stderr.write(`\n--- EXTRACTED ---\n${result.extracted}\n--- END ---\n\n`);
process.stderr.write(`--- REASONS ---\n${result.reasons.join("\n")}\n--- END ---\n`);
}
expect(result.ok).toBe(true);
} finally {
try { fs.unlinkSync(outputPdf); } catch { /* ignore */ }
}
}, 30000);
if (!avail.ok) {
test("prerequisites check", () => {
console.warn(`[skip] ${avail.reason}`);
});
}
});

View File

@@ -0,0 +1,20 @@
The Horizon
This is the combined-features fixture. Every feature turned on simultaneously. The gate asserts that all of these paragraphs extract cleanly from the PDF with pdftotext.
A paragraph with bold, italic, and inline code tokens — each of which gets a different HTML treatment. None should fragment text on copy-paste.
A paragraph with “curly quotes”, single quotes, an em dash — like this, and an ellipsis… All three get smartypants transforms.
A subsection heading
First list item with some words that keep it on one line.
Second list item with more words.
Third list item.
A blockquote from Van Dyke. Her diminished size is in me, not in her.
A second chapter
This content begins on a fresh page because the default chapter-breaks rule fires. Extract must still find these paragraphs.
A final paragraph with enough words to trigger hyphenation across the line wrap boundary. Extraordinary words sometimes hyphenate. Interdisciplinary ones certainly do.

30
make-pdf/test/fixtures/combined-gate.md vendored Normal file
View File

@@ -0,0 +1,30 @@
# The Horizon
This is the combined-features fixture. Every feature turned on simultaneously.
The gate asserts that all of these paragraphs extract cleanly from the PDF
with pdftotext.
A paragraph with **bold**, *italic*, and `inline code` tokens — each of which
gets a different HTML treatment. None should fragment text on copy-paste.
A paragraph with "curly quotes", 'single quotes', an em dash -- like this,
and an ellipsis... All three get smartypants transforms.
## A subsection heading
Lists must not break mid-item:
- First list item with some words that keep it on one line.
- Second list item with more words.
- Third list item.
> A blockquote from Van Dyke. Her diminished size is in me, not in her.
# A second chapter
This content begins on a fresh page because the default chapter-breaks rule
fires. Extract must still find these paragraphs.
A final paragraph with enough words to trigger hyphenation across the line
wrap boundary. Extraordinary words sometimes hyphenate. Interdisciplinary
ones certainly do.

View File

@@ -0,0 +1,207 @@
/**
* pdftotext unit tests — normalize() and copyPasteGate() assertions.
*
* These tests are pure unit tests of the normalization + assertion logic.
* They do NOT require pdftotext to be installed (the actual binary is
* mocked by manipulating strings directly).
*/
import { describe, expect, test } from "bun:test";
import * as path from "node:path";
import { normalize, copyPasteGate, findExecutable, resolvePdftotext, PdftotextUnavailableError } from "../src/pdftotext";
describe("normalize", () => {
test("strips trailing spaces", () => {
expect(normalize("hello \nworld")).toBe("hello\nworld");
});
test("collapses runs of 3+ blank lines to 2", () => {
expect(normalize("a\n\n\n\nb")).toBe("a\n\nb");
});
test("converts form feeds to double newlines (page break boundary)", () => {
expect(normalize("page1\fpage2")).toBe("page1\n\npage2");
});
test("normalizes CRLF and CR to LF (Windows Xpdf)", () => {
expect(normalize("a\r\nb\rc")).toBe("a\nb\nc");
});
test("removes soft hyphens (hyphens: auto artifact)", () => {
expect(normalize("extra\u00adordinary")).toBe("extraordinary");
});
test("replaces non-breaking space with regular space", () => {
expect(normalize("hello\u00a0world")).toBe("hello world");
});
test("strips zero-width characters", () => {
expect(normalize("a\u200bb\u200cc")).toBe("abc");
});
test("NFC-normalizes composed glyphs (macOS NFD → Linux NFC)", () => {
// "é" composed vs decomposed
const decomposed = "e\u0301";
const composed = "\u00e9";
expect(normalize(decomposed)).toBe(composed);
});
test("trims leading/trailing whitespace on whole string", () => {
expect(normalize("\n\n hello \n\n")).toBe("hello");
});
});
describe("copyPasteGate — assertion logic", () => {
// These tests exercise the gate's internal assertions by mocking the
// pdftotext step. We can't easily run the real binary in every test
// env, so we verify the assertion logic directly via fake inputs.
//
// The gate takes a PDF path — but assertion #1 (paragraph presence) and
// #2 (per-glyph emission) are string operations we can validate here.
test("flags 'S ai li ng' per-glyph emission when reassembled letters appear in source", () => {
// Build expected/extracted strings that would trip the gate.
const expected = "Sailing on the open sea.";
const extracted = "S a i l i n g on the open sea.";
// Simulate by running normalize + assertion manually; the regex is
// looked at in the gate.
const fragRegex = /((?:\b\w\s){4,})/g;
const match = fragRegex.exec(extracted);
expect(match).not.toBeNull();
if (match) {
const letters = match[1].replace(/\s/g, "");
expect(letters.toLowerCase()).toBe("sailing");
expect(expected.toLowerCase().includes(letters.toLowerCase())).toBe(true);
}
});
test("does NOT flag 'A B C D' as per-glyph when letters don't appear in source", () => {
const expected = "The quick brown fox.";
const extracted = "The quick A B C D brown fox.";
const fragRegex = /((?:\b\w\s){4,})/g;
const match = fragRegex.exec(extracted);
if (match) {
const letters = match[1].replace(/\s/g, "");
// "ABCD" is not a substring of expected
expect(expected.toLowerCase().includes(letters.toLowerCase())).toBe(false);
}
});
test("paragraph boundary count drift calculation", () => {
const expected = "para1\n\npara2\n\npara3";
const extractedOk = "para1\n\npara2\n\npara3";
const extractedTooFew = "para1 para2 para3";
const extractedTooMany = "para1\n\n\n\npara2\n\n\n\npara3\n\n\n\npara4\n\n\n\npara5";
const expectedBreaks = (expected.match(/\n\n/g) || []).length;
const okBreaks = (extractedOk.match(/\n\n/g) || []).length;
const tooFewBreaks = (extractedTooFew.match(/\n\n/g) || []).length;
const tooManyBreaksNormalized = (normalize(extractedTooMany).match(/\n\n/g) || []).length;
expect(Math.abs(expectedBreaks - okBreaks)).toBeLessThanOrEqual(4);
expect(Math.abs(expectedBreaks - tooFewBreaks)).toBeGreaterThan(1);
// After normalize, 3+ newlines become 2, so the count matches
expect(Math.abs(expectedBreaks - tooManyBreaksNormalized)).toBeLessThanOrEqual(4);
});
});
// ─── Binary resolution (v1.24-aligned) ──────────────────────────
const REAL_EXE: string =
process.platform === "win32"
? path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd.exe")
: "/bin/sh";
function withEnv<T>(overrides: Record<string, string | undefined>, fn: () => T): T {
const saved: Record<string, string | undefined> = {};
for (const k of Object.keys(overrides)) saved[k] = process.env[k];
for (const [k, v] of Object.entries(overrides)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
try {
return fn();
} finally {
for (const [k, v] of Object.entries(saved)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
}
}
describe("findExecutable (pdftotext.ts)", () => {
test("returns the bare path on POSIX when it's executable", () => {
if (process.platform === "win32") return;
expect(findExecutable("/bin/sh")).toBe("/bin/sh");
});
test("on win32, probes .exe / .cmd / .bat after the bare-path miss", () => {
if (process.platform !== "win32") return;
const base = path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd");
expect(findExecutable(base)).toBe(base + ".exe");
});
test("returns null when no extension matches", () => {
expect(findExecutable("/nonexistent/path/to/nothing")).toBeNull();
});
});
describe("resolvePdftotext (override resolution, v1.24-aligned)", () => {
test("honors GSTACK_PDFTOTEXT_BIN when it points at a real executable", () => {
// We can't fake a real pdftotext, but we can fake "any executable" to
// exercise the override-resolution path. describeBinary will mark flavor
// as "unknown" since cmd.exe / /bin/sh don't respond to -v like pdftotext;
// the test asserts on the bin-path resolution, not the version probe.
const info = withEnv({ GSTACK_PDFTOTEXT_BIN: REAL_EXE }, () => resolvePdftotext());
expect(info.bin).toBe(REAL_EXE);
});
test("honors PDFTOTEXT_BIN as a back-compat alias", () => {
const info = withEnv(
{ GSTACK_PDFTOTEXT_BIN: undefined, PDFTOTEXT_BIN: REAL_EXE },
() => resolvePdftotext(),
);
expect(info.bin).toBe(REAL_EXE);
});
test("GSTACK_PDFTOTEXT_BIN takes precedence over PDFTOTEXT_BIN", () => {
const info = withEnv(
{ GSTACK_PDFTOTEXT_BIN: REAL_EXE, PDFTOTEXT_BIN: "/nonexistent/legacy" },
() => resolvePdftotext(),
);
expect(info.bin).toBe(REAL_EXE);
});
test("strips wrapping double quotes from override values", () => {
const info = withEnv({ GSTACK_PDFTOTEXT_BIN: `"${REAL_EXE}"` }, () => resolvePdftotext());
expect(info.bin).toBe(REAL_EXE);
});
test("error message includes Windows install hint and GSTACK_PDFTOTEXT_BIN", () => {
let thrown: unknown = null;
try {
withEnv(
{
GSTACK_PDFTOTEXT_BIN: "/nonexistent/gstack-pdftotext",
PDFTOTEXT_BIN: "/nonexistent/pdftotext",
PATH: "",
Path: "",
},
() => resolvePdftotext(),
);
} catch (err) {
thrown = err;
}
// If the test box has a real pdftotext on disk, resolution succeeds
// (POSIX candidates) — that's fine; the assertion is gated on whether
// it threw. On Windows-CI without poppler, it throws.
if (thrown) {
expect(thrown).toBeInstanceOf(PdftotextUnavailableError);
expect((thrown as Error).message).toContain("pdftotext not found");
expect((thrown as Error).message).toContain("GSTACK_PDFTOTEXT_BIN");
expect((thrown as Error).message).toContain("Windows");
expect((thrown as Error).message).toContain("scoop install poppler");
}
});
});

View File

@@ -0,0 +1,449 @@
/**
* Renderer unit tests — pure-function assertions for render.ts, smartypants.ts,
* and print-css.ts. No Playwright, no PDF generation.
*/
import { describe, expect, test } from "bun:test";
import { render, sanitizeUntrustedHtml } from "../src/render";
import { smartypants } from "../src/smartypants";
import { printCss } from "../src/print-css";
// ─── smartypants ──────────────────────────────────────────────
describe("smartypants", () => {
test("converts straight double quotes to curly", () => {
const out = smartypants(`<p>She said "hello" to him.</p>`);
expect(out).toContain("\u201chello\u201d");
});
test("converts em dash (--)", () => {
const out = smartypants(`<p>This is it -- the answer.</p>`);
expect(out).toContain("\u2014");
});
test("converts ellipsis (...)", () => {
const out = smartypants(`<p>Wait...</p>`);
expect(out).toContain("\u2026");
});
test("converts apostrophes in contractions", () => {
const out = smartypants(`<p>don't you know?</p>`);
expect(out).toContain("don\u2019t");
});
test("does NOT touch content inside <code> blocks", () => {
const input = `<pre><code>const x = "hello"; // it's fine</code></pre>`;
const out = smartypants(input);
expect(out).toBe(input); // unchanged
});
test("does NOT touch content inside <pre> blocks", () => {
const input = `<pre>"quoted" -- don't</pre>`;
const out = smartypants(input);
expect(out).toBe(input);
});
test("does NOT touch inline code", () => {
const out = smartypants(`<p>Use <code>it's</code> like this: "hello".</p>`);
expect(out).toContain("<code>it's</code>");
expect(out).toContain("\u201chello\u201d");
});
test("does NOT touch URLs", () => {
const out = smartypants(`<p>Visit https://example.com/it's-page for "details".</p>`);
expect(out).toContain("https://example.com/it's-page");
expect(out).toContain("\u201cdetails\u201d");
});
test("does NOT touch HTML attribute values", () => {
const out = smartypants(`<a href="it's-a-test.html">link</a>`);
expect(out).toContain(`href="it's-a-test.html"`);
});
test("does NOT convert -- in CLI flags", () => {
// Prose like "try --verbose mode" should not turn -- into em dash
const out = smartypants(`<p>Try --verbose mode.</p>`);
// Since "--" is followed by a word char but not preceded by word/space,
// it should remain intact. We're lenient here — acceptable either way.
expect(out).toMatch(/--verbose|—verbose/);
});
});
// ─── sanitizer ──────────────────────────────────────────────
describe("sanitizeUntrustedHtml", () => {
test("strips <script> tags and content", () => {
const input = `<p>hello</p><script>alert(1)</script><p>world</p>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("<script");
expect(out).not.toContain("alert");
expect(out).toContain("<p>hello</p>");
expect(out).toContain("<p>world</p>");
});
test("strips <iframe>", () => {
const input = `<p>hi</p><iframe src="evil.com"></iframe>`;
expect(sanitizeUntrustedHtml(input)).not.toContain("<iframe");
});
test("strips onclick attribute", () => {
const input = `<a href="#" onclick="alert(1)">click</a>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("onclick");
expect(out).toContain("href=\"#\"");
});
test("strips event handlers with mixed case (onClick, ONCLICK)", () => {
const input1 = `<a href="#" onClick="x()">a</a>`;
const input2 = `<a href="#" ONCLICK="x()">b</a>`;
expect(sanitizeUntrustedHtml(input1)).not.toContain("onClick");
expect(sanitizeUntrustedHtml(input2)).not.toContain("ONCLICK");
});
test("rewrites javascript: URLs in href to #", () => {
const input = `<a href="javascript:alert(1)">bad</a>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("javascript:");
expect(out).toContain('href="#"');
});
test("strips inline SVG <script>", () => {
const input = `<svg><script>alert(1)</script><circle r="5"/></svg>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("<script");
expect(out).toContain("<circle");
});
test("strips <object>, <embed>, <link>, <meta>, <base>, <form>", () => {
const input = `
<object data="x.swf"></object>
<embed src="y.mov">
<link rel="stylesheet" href="evil.css">
<meta http-equiv="refresh" content="0;url=evil">
<base href="evil.com">
<form action="evil"><input/></form>
`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("<object");
expect(out).not.toContain("<embed");
expect(out).not.toContain("<link");
expect(out).not.toContain("<meta");
expect(out).not.toContain("<base");
expect(out).not.toContain("<form");
});
test("strips srcdoc attribute (iframe escape vector)", () => {
const input = `<div srcdoc="<script>bad</script>">hi</div>`;
expect(sanitizeUntrustedHtml(input)).not.toContain("srcdoc");
});
});
// ─── end-to-end render ──────────────────────────────────────────────
describe("render (end-to-end)", () => {
test("produces a full HTML document with title, body, and CSS", () => {
const result = render({
markdown: `# Hello\n\nA paragraph with "quotes" and -- dashes.\n`,
});
expect(result.html).toContain("<!doctype html>");
expect(result.html).toContain("<title>Hello</title>");
expect(result.html).toContain("<h1");
expect(result.html).toContain("Hello");
// CSS should be inlined as <style>...
expect(result.html).toMatch(/<style>[\s\S]*font-family: Helvetica/);
// Smartypants ran
expect(result.html).toContain("\u201cquotes\u201d");
expect(result.html).toContain("\u2014");
});
test("derives title from first H1 when --title is not passed", () => {
const result = render({ markdown: `# My Title\n\nBody.` });
expect(result.meta.title).toBe("My Title");
});
test("uses --title override when provided", () => {
const result = render({
markdown: `# Auto-derived\n\nBody.`,
title: "Explicit Title",
});
expect(result.meta.title).toBe("Explicit Title");
});
test("includes cover block when cover=true", () => {
const result = render({
markdown: `# Doc\n\nBody.`,
cover: true,
subtitle: "A subtitle",
author: "Garry Tan",
});
expect(result.html).toContain(`class="cover"`);
expect(result.html).toContain(`class="cover-title"`);
expect(result.html).toContain("A subtitle");
expect(result.html).toContain("Garry Tan");
});
test("omits cover block when cover=false", () => {
const result = render({ markdown: `# Memo\n\nBody.` });
expect(result.html).not.toContain(`class="cover"`);
});
test("injects watermark element when --watermark is set", () => {
const result = render({ markdown: `# Doc`, watermark: "DRAFT" });
expect(result.html).toContain(`class="watermark"`);
expect(result.html).toContain("DRAFT");
// And the CSS rule for it must be present
expect(result.html).toContain("position: fixed");
expect(result.html).toContain("rotate(-30deg)");
});
test("wraps each H1 in its own .chapter section (default)", () => {
const result = render({
markdown: `# One\n\nbody 1\n\n# Two\n\nbody 2\n`,
});
const chapterMatches = result.html.match(/class="chapter"/g);
expect(chapterMatches).toBeTruthy();
if (chapterMatches) expect(chapterMatches.length).toBe(2);
});
test("does NOT create chapter sections when noChapterBreaks=true", () => {
const result = render({
markdown: `# One\n\nbody\n\n# Two\n\nbody\n`,
noChapterBreaks: true,
});
const chapterMatches = result.html.match(/class="chapter"/g) ?? [];
expect(chapterMatches.length).toBe(1);
});
test("builds a TOC with H1/H2 entries when toc=true", () => {
const result = render({
markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`,
toc: true,
});
expect(result.html).toContain(`class="toc"`);
expect(result.html).toContain(`<h2>Contents</h2>`);
expect(result.html).toContain("One");
expect(result.html).toContain("Sub");
expect(result.html).toContain("Two");
});
test("strips dangerous HTML from untrusted markdown", () => {
const result = render({
markdown: `# Safe\n\n<script>alert('xss')</script>\n\nBody.`,
});
expect(result.html).not.toContain("<script");
expect(result.html).not.toContain("alert");
expect(result.html).toContain("Safe");
});
test("respects text-align: left — no justify in print CSS", () => {
const result = render({ markdown: `para1\n\npara2\n` });
// The rule from the design-review fix: no p + p indent, text-align: left.
expect(result.printCss).toContain("text-align: left");
expect(result.printCss).not.toContain("text-align: justify");
expect(result.printCss).not.toContain("text-indent");
});
test("includes CJK font fallback in body", () => {
const result = render({ markdown: `body` });
expect(result.printCss).toContain("Hiragino Kaku Gothic");
expect(result.printCss).toContain("Noto Sans CJK");
});
});
// ─── print-css ──────────────────────────────────────────────
describe("printCss", () => {
test("emits 1in margins by default", () => {
const css = printCss();
expect(css).toContain("margin: 1in");
});
test("respects custom margins flag", () => {
const css = printCss({ margins: "72pt" });
expect(css).toContain("margin: 72pt");
});
test("emits letter page size by default", () => {
const css = printCss();
expect(css).toContain("size: letter");
});
test("respects custom page size", () => {
const css = printCss({ pageSize: "a4" });
expect(css).toContain("size: a4");
});
test("suppresses running header and footer on cover page", () => {
const css = printCss();
expect(css).toMatch(/@page\s*:first\s*\{[\s\S]*?content:\s*none[\s\S]*?content:\s*none/);
});
test("omits CONFIDENTIAL when confidential=false", () => {
const css = printCss({ confidential: false });
expect(css).not.toContain("CONFIDENTIAL");
});
test("emits watermark CSS only when watermark is set", () => {
const withWatermark = printCss({ watermark: "DRAFT" });
expect(withWatermark).toContain(".watermark");
expect(withWatermark).toContain("rotate(-30deg)");
const withoutWatermark = printCss();
expect(withoutWatermark).not.toContain(".watermark");
});
test("drops chapter break rule when noChapterBreaks=true", () => {
const on = printCss({ noChapterBreaks: false });
expect(on).toContain("break-before: page");
const off = printCss({ noChapterBreaks: true });
expect(off).not.toContain(".chapter { break-before: page");
});
test("always sets p { text-align: left }", () => {
const css = printCss();
expect(css).toContain("text-align: left");
});
test("never sets text-indent on p", () => {
const css = printCss();
// Confirm no p-indent slipped in
expect(css).not.toMatch(/p\s*\+\s*p\s*\{[^}]*text-indent/);
});
test("emits @bottom-center page-number rule by default", () => {
const css = printCss();
expect(css).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("suppresses @bottom-center page-number rule when pageNumbers=false", () => {
const css = printCss({ pageNumbers: false });
expect(css).not.toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("still emits @bottom-center when pageNumbers=true (explicit)", () => {
const css = printCss({ pageNumbers: true });
expect(css).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("font stacks include Liberation Sans adjacent to Helvetica", () => {
const css = printCss({ confidential: true });
// Body stack
expect(css).toMatch(/font-family:\s*Helvetica,\s*"Liberation Sans",\s*Arial/);
// At least one @page margin box (running header / page number / CONFIDENTIAL)
// should also have the updated stack.
const marginBoxStacks = css.match(/@(top|bottom)-(center|right)\s*\{[^}]*Liberation Sans/g) ?? [];
expect(marginBoxStacks.length).toBeGreaterThanOrEqual(1);
});
test("all four original Helvetica stacks now include Liberation Sans", () => {
const css = printCss({ runningHeader: "Running Title", confidential: true });
// Count: body (1) + running header (1) + page numbers (1) + confidential (1) = 4
const occurrences = (css.match(/"Liberation Sans"/g) ?? []).length;
expect(occurrences).toBeGreaterThanOrEqual(4);
});
});
// ─── render() — pageNumbers / footerTemplate data flow ───────────────
describe("render() — pageNumbers data flow", () => {
test("CSS footer renders by default", () => {
const result = render({ markdown: `# Doc\n\nBody.` });
expect(result.printCss).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("--no-page-numbers reaches the CSS layer", () => {
const result = render({ markdown: `# Doc\n\nBody.`, pageNumbers: false });
expect(result.printCss).not.toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("footerTemplate suppresses CSS page numbers (custom footer wins)", () => {
const result = render({
markdown: `# Doc\n\nBody.`,
footerTemplate: `<div class="foo">custom</div>`,
});
expect(result.printCss).not.toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("pageNumbers=true + no footerTemplate keeps CSS footer", () => {
const result = render({ markdown: `# Doc`, pageNumbers: true });
expect(result.printCss).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
});
// ─── render() — HTML entity handling in titles, cover, TOC ───────────
describe("render() — no double HTML entity escaping", () => {
type Case = { char: string; inTitle: string; expectedTitleMeta: string };
// Only characters that should flow through unchanged. `"` and `'` are
// omitted from this set because smartypants converts them to curly quotes
// before heading extraction — asserted separately below.
const cases: Case[] = [
{ char: "&", inTitle: "A & B", expectedTitleMeta: "A & B" },
{ char: "<", inTitle: "A < B", expectedTitleMeta: "A < B" },
{ char: ">", inTitle: "A > B", expectedTitleMeta: "A > B" },
{ char: "©", inTitle: "A © B", expectedTitleMeta: "A © B" },
{ char: "—", inTitle: "A — B", expectedTitleMeta: "A — B" },
];
for (const { char, inTitle, expectedTitleMeta } of cases) {
test(`"${char}" in H1 has no double-escape in <title> or cover`, () => {
const result = render({
markdown: `# ${inTitle}\n\nBody.`,
cover: true,
author: "A",
});
// Meta: decoded plain text.
expect(result.meta.title).toBe(expectedTitleMeta);
// HTML: <title>...</title> never contains double-escape patterns.
expect(result.html).not.toMatch(/<title>[^<]*&amp;amp;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;lt;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;gt;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;#\d+;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;#x[0-9a-fA-F]+;/);
// Cover block also single-escape.
expect(result.html).not.toMatch(/class="cover-title"[^>]*>[^<]*&amp;amp;/);
});
}
test('ampersand in <title> renders as exactly one "&amp;"', () => {
const result = render({ markdown: `# Faber & Faber\n\nBody.` });
expect(result.html).toContain("<title>Faber &amp; Faber</title>");
expect(result.html).not.toContain("&amp;amp;");
});
test("TOC entries have no double-escape when a heading contains '&'", () => {
const result = render({
markdown: `# Doc\n\n## Faber & Faber\n\nBody.\n\n## Other\n\nMore.`,
toc: true,
});
// TOC renders the heading text through escapeHtml; must be single-escaped.
expect(result.html).toContain("Faber &amp; Faber");
expect(result.html).not.toContain("&amp;amp;");
});
test('numeric entity in H1 (e.g. "&#169;") decodes cleanly to <title>', () => {
// Marked passes through numeric entities verbatim in the HTML output,
// so the decoder must handle them.
const result = render({ markdown: `# A &#169; B\n\nBody.` });
expect(result.meta.title).toBe("A © B");
expect(result.html).toContain("<title>A © B</title>");
});
test("smartypants converts raw quotes in title BEFORE extraction (contract)", () => {
// We do NOT assert raw `"` survives — smartypants is expected to convert it.
// The contract is: no double-escape of the encoded form.
const result = render({ markdown: `# Say "hi"\n\nBody.` });
expect(result.html).not.toContain("&amp;quot;");
expect(result.html).not.toContain("&amp;#39;");
// And <title> contains exactly one level of escaping.
const titleMatch = result.html.match(/<title>([^<]*)<\/title>/);
expect(titleMatch).toBeTruthy();
if (titleMatch) {
// Never contains a double-encoded entity.
expect(titleMatch[1]).not.toMatch(/&amp;(amp|lt|gt|quot|#\d+);/);
}
});
});