Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled

Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
Rocky
2026-05-19 21:18:17 +02:00
commit 834c6db075
797 changed files with 267839 additions and 0 deletions

643
make-pdf/SKILL.md Normal file
View File

@@ -0,0 +1,643 @@
---
name: make-pdf
preamble-tier: 1
version: 1.0.0
description: |
Turn any markdown file into a publication-quality PDF. Proper 1in margins,
intelligent page breaks, page numbers, cover pages, running headers, curly
quotes and em dashes, clickable TOC, diagonal DRAFT watermark. Not a draft
artifact — a finished artifact. Use when asked to "make a PDF", "export to
PDF", "turn this markdown into a PDF", or "generate a document". (gstack)
Voice triggers (speech-to-text aliases): "make this a pdf", "make it a pdf", "export to pdf", "turn this into a pdf", "turn this markdown into a pdf", "generate a pdf", "make a pdf from", "pdf this markdown".
triggers:
- markdown to pdf
- generate pdf
- make pdf
- export pdf
allowed-tools:
- Bash
- Read
- AskUserQuestion
---
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## Preamble (run first)
```bash
_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
[ -n "$_UPD" ] && echo "$_UPD" || true
mkdir -p ~/.gstack/sessions
touch ~/.gstack/sessions/"$PPID"
_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
find ~/.gstack/sessions -mmin +120 -type f -exec rm {} + 2>/dev/null || true
_PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true")
_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no")
_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
echo "BRANCH: $_BRANCH"
_SKILL_PREFIX=$(~/.claude/skills/gstack/bin/gstack-config get skill_prefix 2>/dev/null || echo "false")
echo "PROACTIVE: $_PROACTIVE"
echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED"
echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
_TEL_PROMPTED=$([ -f ~/.gstack/.telemetry-prompted ] && echo "yes" || echo "no")
_TEL_START=$(date +%s)
_SESSION_ID="$$-$(date +%s)"
echo "TELEMETRY: ${_TEL:-off}"
echo "TEL_PROMPTED: $_TEL_PROMPTED"
_EXPLAIN_LEVEL=$(~/.claude/skills/gstack/bin/gstack-config get explain_level 2>/dev/null || echo "default")
if [ "$_EXPLAIN_LEVEL" != "default" ] && [ "$_EXPLAIN_LEVEL" != "terse" ]; then _EXPLAIN_LEVEL="default"; fi
echo "EXPLAIN_LEVEL: $_EXPLAIN_LEVEL"
_QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"make-pdf","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
if [ "$_TEL" != "off" ] && [ -x "~/.claude/skills/gstack/bin/gstack-telemetry-log" ]; then
~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true
fi
rm -f "$_PF" 2>/dev/null || true
fi
break
done
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true
_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl"
if [ -f "$_LEARN_FILE" ]; then
_LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ')
echo "LEARNINGS: $_LEARN_COUNT entries loaded"
if [ "$_LEARN_COUNT" -gt 5 ] 2>/dev/null; then
~/.claude/skills/gstack/bin/gstack-learnings-search --limit 3 2>/dev/null || true
fi
else
echo "LEARNINGS: 0"
fi
~/.claude/skills/gstack/bin/gstack-timeline-log '{"skill":"make-pdf","event":"started","branch":"'"$_BRANCH"'","session":"'"$_SESSION_ID"'"}' 2>/dev/null &
_HAS_ROUTING="no"
if [ -f CLAUDE.md ] && grep -q "## Skill routing" CLAUDE.md 2>/dev/null; then
_HAS_ROUTING="yes"
fi
_ROUTING_DECLINED=$(~/.claude/skills/gstack/bin/gstack-config get routing_declined 2>/dev/null || echo "false")
echo "HAS_ROUTING: $_HAS_ROUTING"
echo "ROUTING_DECLINED: $_ROUTING_DECLINED"
_VENDORED="no"
if [ -d ".claude/skills/gstack" ] && [ ! -L ".claude/skills/gstack" ]; then
if [ -f ".claude/skills/gstack/VERSION" ] || [ -d ".claude/skills/gstack/.git" ]; then
_VENDORED="yes"
fi
fi
echo "VENDORED_GSTACK: $_VENDORED"
echo "MODEL_OVERLAY: claude"
_CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode 2>/dev/null || echo "explicit")
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
## MAKE-PDF SETUP (run this check BEFORE any make-pdf command)
```bash
_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
P=""
[ -n "$MAKE_PDF_BIN" ] && [ -x "$MAKE_PDF_BIN" ] && P="$MAKE_PDF_BIN"
[ -z "$P" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" ] && P="$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf"
[ -z "$P" ] && P="$HOME/.claude/skills/gstack/make-pdf/dist/pdf"
if [ -x "$P" ]; then
echo "MAKE_PDF_READY: $P"
alias _p_="$P" # shellcheck alias helper (not exported)
export P # available as $P in subsequent blocks within the same skill invocation
else
echo "MAKE_PDF_NOT_AVAILABLE (run './setup' in the gstack repo to build it)"
fi
```
If `MAKE_PDF_NOT_AVAILABLE` is printed: tell the user the binary is not
built. Have them run `./setup` from the gstack repo, then retry.
If `MAKE_PDF_READY` is printed: `$P` is the binary path for the rest of
the skill. Use `$P` (not an explicit path) so the skill body stays portable.
Core commands:
- `$P generate <input.md> [output.pdf]` — render markdown to PDF (80% use case)
- `$P generate --cover --toc essay.md out.pdf` — full publication layout
- `$P generate --watermark DRAFT memo.md draft.pdf` — diagonal DRAFT watermark
- `$P preview <input.md>` — render HTML and open in browser (fast iteration)
- `$P setup` — verify browse + Chromium + pdftotext and run a smoke test
- `$P --help` — full flag reference
Output contract:
- `stdout`: ONLY the output path on success. One line.
- `stderr`: progress (`Rendering HTML... Generating PDF...`) unless `--quiet`.
- Exit 0 success / 1 bad args / 2 render error / 3 Paged.js timeout / 4 browse unavailable.
## Plan Mode Safe Operations
In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`codex review`, writes to `~/.gstack/`, writes to the plan file, and `open` for generated artifacts.
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
If `SKILL_PREFIX` is `"true"`, suggest/invoke `/gstack-*` names. Disk paths stay `~/.claude/skills/gstack/[skill-name]/SKILL.md`.
If output shows `UPGRADE_AVAILABLE <old> <new>`: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined).
If output shows `JUST_UPGRADED <from> <to>`: print "Running gstack v{to} (just updated!)". If `SPAWNED_SESSION` is true, skip feature discovery.
Feature discovery, max one prompt per session:
- Missing `~/.claude/skills/gstack/.feature-prompted-continuous-checkpoint`: AskUserQuestion for Continuous checkpoint auto-commits. If accepted, run `~/.claude/skills/gstack/bin/gstack-config set checkpoint_mode continuous`. Always touch marker.
- Missing `~/.claude/skills/gstack/.feature-prompted-model-overlay`: inform "Model overlays are active. MODEL_OVERLAY shows the patch." Always touch marker.
After upgrade prompts, continue workflow.
If `WRITING_STYLE_PENDING` is `yes`: ask once about writing style:
> v1 prompts are simpler: first-use jargon glosses, outcome-framed questions, shorter prose. Keep default or restore terse?
Options:
- A) Keep the new default (recommended — good writing helps everyone)
- B) Restore V0 prose — set `explain_level: terse`
If A: leave `explain_level` unset (defaults to `default`).
If B: run `~/.claude/skills/gstack/bin/gstack-config set explain_level terse`.
Always run (regardless of choice):
```bash
rm -f ~/.gstack/.writing-style-prompt-pending
touch ~/.gstack/.writing-style-prompted
```
Skip if `WRITING_STYLE_PENDING` is `no`.
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
```bash
open https://garryslist.org/posts/boil-the-ocean
touch ~/.gstack/.completeness-intro-seen
```
Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
Options:
- A) Help gstack get better! (recommended)
- B) No thanks
If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`
If B: ask follow-up:
> Anonymous mode sends only aggregate usage, no unique ID.
Options:
- A) Sure, anonymous is fine
- B) No thanks, fully off
If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous`
If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off`
Always run:
```bash
touch ~/.gstack/.telemetry-prompted
```
Skip if `TEL_PROMPTED` is `yes`.
If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: ask once:
> Let gstack proactively suggest skills, like /qa for "does this work?" or /investigate for bugs?
Options:
- A) Keep it on (recommended)
- B) Turn it off — I'll type /commands myself
If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true`
If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false`
Always run:
```bash
touch ~/.gstack/.proactive-prompted
```
Skip if `PROACTIVE_PROMPTED` is `yes`.
If `HAS_ROUTING` is `no` AND `ROUTING_DECLINED` is `false` AND `PROACTIVE_PROMPTED` is `yes`:
Check if a CLAUDE.md file exists in the project root. If it does not exist, create it.
Use AskUserQuestion:
> gstack works best when your project's CLAUDE.md includes skill routing rules.
Options:
- A) Add routing rules to CLAUDE.md (recommended)
- B) No thanks, I'll invoke skills manually
If A: Append this section to the end of CLAUDE.md:
```markdown
## Skill routing
When the user's request matches an available skill, invoke it via the Skill tool. When in doubt, invoke the skill.
Key routing rules:
- Product ideas/brainstorming → invoke /office-hours
- Strategy/scope → invoke /plan-ceo-review
- Architecture → invoke /plan-eng-review
- Design system/plan review → invoke /design-consultation or /plan-design-review
- Full review pipeline → invoke /autoplan
- Bugs/errors → invoke /investigate
- QA/testing site behavior → invoke /qa or /qa-only
- Code review/diff check → invoke /review
- Visual polish → invoke /design-review
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
If B: run `~/.claude/skills/gstack/bin/gstack-config set routing_declined true` and say they can re-enable with `gstack-config set routing_declined false`.
This only happens once per project. Skip if `HAS_ROUTING` is `yes` or `ROUTING_DECLINED` is `true`.
If `VENDORED_GSTACK` is `yes`, warn once via AskUserQuestion unless `~/.gstack/.vendoring-warned-$SLUG` exists:
> This project has gstack vendored in `.claude/skills/gstack/`. Vendoring is deprecated.
> Migrate to team mode?
Options:
- A) Yes, migrate to team mode now
- B) No, I'll handle it myself
If A:
1. Run `git rm -r .claude/skills/gstack/`
2. Run `echo '.claude/skills/gstack/' >> .gitignore`
3. Run `~/.claude/skills/gstack/bin/gstack-team-init required` (or `optional`)
4. Run `git add .claude/ .gitignore CLAUDE.md && git commit -m "chore: migrate gstack from vendored to team mode"`
5. Tell the user: "Done. Each developer now runs: `cd ~/.claude/skills/gstack && ./setup --team`"
If B: say "OK, you're on your own to keep the vendored copy up to date."
Always run (regardless of choice):
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true
touch ~/.gstack/.vendoring-warned-${SLUG:-unknown}
```
If marker exists, skip.
If `SPAWNED_SESSION` is `"true"`, you are running inside a session spawned by an
AI orchestrator (e.g., OpenClaw). In spawned sessions:
- Do NOT use AskUserQuestion for interactive prompts. Auto-choose the recommended option.
- Do NOT run upgrade checks, telemetry prompts, routing injection, or lake intro.
- Focus on completing the task and reporting results via prose output.
- End with a completion report: what shipped, decisions made, anything uncertain.
## Artifacts Sync (skill start)
```bash
_GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
# Prefer the v1.27.0.0 artifacts file; fall back to brain file for users
# upgrading mid-stream before the migration script runs.
if [ -f "$HOME/.gstack-artifacts-remote.txt" ]; then
_BRAIN_REMOTE_FILE="$HOME/.gstack-artifacts-remote.txt"
else
_BRAIN_REMOTE_FILE="$HOME/.gstack-brain-remote.txt"
fi
_BRAIN_SYNC_BIN="~/.claude/skills/gstack/bin/gstack-brain-sync"
_BRAIN_CONFIG_BIN="~/.claude/skills/gstack/bin/gstack-config"
# /sync-gbrain context-load: teach the agent to use gbrain when it's available.
# Per-worktree pin: post-spike redesign uses kubectl-style `.gbrain-source` in the
# git toplevel to scope queries. Look for the pin in the worktree (not a global
# state file) so that opening worktree B without a pin doesn't claim "indexed"
# just because worktree A was synced. Empty string when gbrain is not
# configured (zero context cost for non-gbrain users).
_GBRAIN_CONFIG="$HOME/.gbrain/config.json"
if [ -f "$_GBRAIN_CONFIG" ] && command -v gbrain >/dev/null 2>&1; then
_GBRAIN_VERSION_OK=$(gbrain --version 2>/dev/null | grep -c '^gbrain ' || echo 0)
if [ "$_GBRAIN_VERSION_OK" -gt 0 ] 2>/dev/null; then
_GBRAIN_PIN_PATH=""
_REPO_TOP=$(git rev-parse --show-toplevel 2>/dev/null || echo "")
if [ -n "$_REPO_TOP" ] && [ -f "$_REPO_TOP/.gbrain-source" ]; then
_GBRAIN_PIN_PATH="$_REPO_TOP/.gbrain-source"
fi
if [ -n "$_GBRAIN_PIN_PATH" ]; then
echo "GBrain configured. Prefer \`gbrain search\`/\`gbrain query\` over Grep for"
echo "semantic questions; use \`gbrain code-def\`/\`code-refs\`/\`code-callers\` for"
echo "symbol-aware code lookup. See \"## GBrain Search Guidance\" in CLAUDE.md."
echo "Run /sync-gbrain to refresh."
else
echo "GBrain configured but this worktree isn't pinned yet. Run \`/sync-gbrain --full\`"
echo "before relying on \`gbrain search\` for code questions in this worktree."
echo "Falls back to Grep until pinned."
fi
fi
fi
_BRAIN_SYNC_MODE=$("$_BRAIN_CONFIG_BIN" get artifacts_sync_mode 2>/dev/null || echo off)
# Detect remote-MCP mode (Path 4 of /setup-gbrain). Local artifacts sync is
# a no-op in remote mode; the brain server pulls from GitHub/GitLab on its
# own cadence. Read claude.json directly to keep this preamble fast (no
# subprocess to claude CLI on every skill start).
_GBRAIN_MCP_MODE="none"
if command -v jq >/dev/null 2>&1 && [ -f "$HOME/.claude.json" ]; then
_GBRAIN_MCP_TYPE=$(jq -r '.mcpServers.gbrain.type // .mcpServers.gbrain.transport // empty' "$HOME/.claude.json" 2>/dev/null)
case "$_GBRAIN_MCP_TYPE" in
url|http|sse) _GBRAIN_MCP_MODE="remote-http" ;;
stdio) _GBRAIN_MCP_MODE="local-stdio" ;;
esac
fi
if [ -f "$_BRAIN_REMOTE_FILE" ] && [ ! -d "$_GSTACK_HOME/.git" ] && [ "$_BRAIN_SYNC_MODE" = "off" ]; then
_BRAIN_NEW_URL=$(head -1 "$_BRAIN_REMOTE_FILE" 2>/dev/null | tr -d '[:space:]')
if [ -n "$_BRAIN_NEW_URL" ]; then
echo "ARTIFACTS_SYNC: artifacts repo detected: $_BRAIN_NEW_URL"
echo "ARTIFACTS_SYNC: run 'gstack-brain-restore' to pull your cross-machine artifacts (or 'gstack-config set artifacts_sync_mode off' to dismiss forever)"
fi
fi
if [ -d "$_GSTACK_HOME/.git" ] && [ "$_BRAIN_SYNC_MODE" != "off" ]; then
_BRAIN_LAST_PULL_FILE="$_GSTACK_HOME/.brain-last-pull"
_BRAIN_NOW=$(date +%s)
_BRAIN_DO_PULL=1
if [ -f "$_BRAIN_LAST_PULL_FILE" ]; then
_BRAIN_LAST=$(cat "$_BRAIN_LAST_PULL_FILE" 2>/dev/null || echo 0)
_BRAIN_AGE=$(( _BRAIN_NOW - _BRAIN_LAST ))
[ "$_BRAIN_AGE" -lt 86400 ] && _BRAIN_DO_PULL=0
fi
if [ "$_BRAIN_DO_PULL" = "1" ]; then
( cd "$_GSTACK_HOME" && git fetch origin >/dev/null 2>&1 && git merge --ff-only "origin/$(git rev-parse --abbrev-ref HEAD)" >/dev/null 2>&1 ) || true
echo "$_BRAIN_NOW" > "$_BRAIN_LAST_PULL_FILE"
fi
"$_BRAIN_SYNC_BIN" --once 2>/dev/null || true
fi
if [ "$_GBRAIN_MCP_MODE" = "remote-http" ]; then
# Remote-MCP mode: local artifacts sync is a no-op (brain admin's server
# pulls from GitHub/GitLab). Show the user this is by design, not broken.
_GBRAIN_HOST=$(jq -r '.mcpServers.gbrain.url // empty' "$HOME/.claude.json" 2>/dev/null | sed -E 's|^https?://([^/:]+).*|\1|')
echo "ARTIFACTS_SYNC: remote-mode (managed by brain server ${_GBRAIN_HOST:-remote})"
elif [ -d "$_GSTACK_HOME/.git" ] && [ "$_BRAIN_SYNC_MODE" != "off" ]; then
_BRAIN_QUEUE_DEPTH=0
[ -f "$_GSTACK_HOME/.brain-queue.jsonl" ] && _BRAIN_QUEUE_DEPTH=$(wc -l < "$_GSTACK_HOME/.brain-queue.jsonl" | tr -d ' ')
_BRAIN_LAST_PUSH="never"
[ -f "$_GSTACK_HOME/.brain-last-push" ] && _BRAIN_LAST_PUSH=$(cat "$_GSTACK_HOME/.brain-last-push" 2>/dev/null || echo never)
echo "ARTIFACTS_SYNC: mode=$_BRAIN_SYNC_MODE | last_push=$_BRAIN_LAST_PUSH | queue=$_BRAIN_QUEUE_DEPTH"
else
echo "ARTIFACTS_SYNC: off"
fi
```
Privacy stop-gate: if output shows `ARTIFACTS_SYNC: off`, `artifacts_sync_mode_prompted` is `false`, and gbrain is on PATH or `gbrain doctor --fast --json` works, ask once:
> gstack can publish your artifacts (CEO plans, designs, reports) to a private GitHub repo that GBrain indexes across machines. How much should sync?
Options:
- A) Everything allowlisted (recommended)
- B) Only artifacts
- C) Decline, keep everything local
After answer:
```bash
# Chosen mode: full | artifacts-only | off
"$_BRAIN_CONFIG_BIN" set artifacts_sync_mode <choice>
"$_BRAIN_CONFIG_BIN" set artifacts_sync_mode_prompted true
```
If A/B and `~/.gstack/.git` is missing, ask whether to run `gstack-artifacts-init`. Do not block the skill.
At skill END before telemetry:
```bash
"~/.claude/skills/gstack/bin/gstack-brain-sync" --discover-new 2>/dev/null || true
"~/.claude/skills/gstack/bin/gstack-brain-sync" --once 2>/dev/null || true
```
## Model-Specific Behavioral Patch (claude)
The following nudges are tuned for the claude model family. They are
**subordinate** to skill workflow, STOP points, AskUserQuestion gates, plan-mode
safety, and /ship review gates. If a nudge below conflicts with skill instructions,
the skill wins. Treat these as preferences, not rules.
**Todo-list discipline.** When working through a multi-step plan, mark each task
complete individually as you finish it. Do not batch-complete at the end. If a task
turns out to be unnecessary, mark it skipped with a one-line reason.
**Think before heavy actions.** For complex operations (refactors, migrations,
non-trivial new features), briefly state your approach before executing. This lets
the user course-correct cheaply instead of mid-flight.
**Dedicated tools over Bash.** Prefer Read, Edit, Write, Glob, Grep over shell
equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer.
## Voice
Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler.
No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do.
The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides.
## Completion Status Protocol
When completing a skill workflow, report status using one of:
- **DONE** — completed with evidence.
- **DONE_WITH_CONCERNS** — completed, but list concerns.
- **BLOCKED** — cannot proceed; state blocker and what was tried.
- **NEEDS_CONTEXT** — missing info; state exactly what is needed.
Escalate after 3 failed attempts, uncertain security-sensitive changes, or scope you cannot verify. Format: `STATUS`, `REASON`, `ATTEMPTED`, `RECOMMENDATION`.
## Operational Self-Improvement
Before completing, if you discovered a durable project quirk or command fix that would save 5+ minutes next time, log it:
```bash
~/.claude/skills/gstack/bin/gstack-learnings-log '{"skill":"SKILL_NAME","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}'
```
Do not log obvious facts or one-time transient errors.
## Telemetry (run last)
After workflow completion, log telemetry. Use skill `name:` from frontmatter. OUTCOME is success/error/abort/unknown.
**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to
`~/.gstack/analytics/`, matching preamble analytics writes.
Run this bash:
```bash
_TEL_END=$(date +%s)
_TEL_DUR=$(( _TEL_END - _TEL_START ))
rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true
# Session timeline: record skill completion (local-only, never sent anywhere)
~/.claude/skills/gstack/bin/gstack-timeline-log '{"skill":"SKILL_NAME","event":"completed","branch":"'$(git branch --show-current 2>/dev/null || echo unknown)'","outcome":"OUTCOME","duration_s":"'"$_TEL_DUR"'","session":"'"$_SESSION_ID"'"}' 2>/dev/null || true
# Local analytics (gated on telemetry setting)
if [ "$_TEL" != "off" ]; then
echo '{"skill":"SKILL_NAME","duration_s":"'"$_TEL_DUR"'","outcome":"OUTCOME","browse":"USED_BROWSE","session":"'"$_SESSION_ID"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
# Remote telemetry (opt-in, requires binary)
if [ "$_TEL" != "off" ] && [ -x ~/.claude/skills/gstack/bin/gstack-telemetry-log ]; then
~/.claude/skills/gstack/bin/gstack-telemetry-log \
--skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \
--used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null &
fi
```
Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
## Plan Status Footer
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
# make-pdf: publication-quality PDFs from markdown
Turn `.md` files into PDFs that look like Faber & Faber essays: 1in margins,
left-aligned body, Helvetica throughout, curly quotes and em dashes, optional
cover page and clickable TOC, diagonal DRAFT watermark when you need it.
Copy-paste from the PDF produces clean words, never "S a i l i n g".
On Linux, install `fonts-liberation` for correct rendering — Helvetica and Arial
aren't present by default, and Liberation Sans is the standard metric-compatible
fallback. CI and Docker builds install it automatically via Dockerfile.ci.
## Core patterns
### 80% case — memo/letter
One command, no flags. Gets a clean PDF with running header + page numbers
+ CONFIDENTIAL footer by default.
```bash
$P generate letter.md # writes /tmp/letter.pdf
$P generate letter.md letter.pdf # explicit output path
```
### Publication mode — cover + TOC + chapter breaks
```bash
$P generate --cover --toc --author "Garry Tan" --title "On Horizons" \
essay.md essay.pdf
```
Each top-level H1 in the markdown starts a new page. Disable with
`--no-chapter-breaks` for memos that happen to have multiple H1s.
### Draft-stage watermark
```bash
$P generate --watermark DRAFT memo.md draft.pdf
```
Diagonal 10% opacity DRAFT across every page. When the draft is final, drop
the flag and regenerate.
### Fast iteration via preview
```bash
$P preview essay.md
```
Renders HTML with the same print CSS and opens it in your browser. Refresh
as you edit the markdown. Skip the PDF round trip until you're ready.
### Brand-free (no CONFIDENTIAL footer)
```bash
$P generate --no-confidential memo.md memo.pdf
```
## Common flags
```
Page layout:
--margins <dim> 1in (default) | 72pt | 2.54cm | 25mm
--page-size letter|a4|legal
Structure:
--cover Cover page (title, author, date, hairline rule)
--toc Clickable TOC with page numbers
--no-chapter-breaks Don't start a new page at every H1
Branding:
--watermark <text> Diagonal watermark ("DRAFT", "CONFIDENTIAL")
--header-template <html> Custom running header
--footer-template <html> Custom footer (mutex with --page-numbers)
--no-confidential Suppress the CONFIDENTIAL right-footer
Output:
--page-numbers "N of M" footer (default on)
--tagged Accessible PDF (default on)
--outline PDF bookmarks from headings (default on)
--quiet Suppress progress on stderr
--verbose Per-stage timings
Network:
--allow-network Fetch external images. Off by default
(blocks tracking pixels).
Metadata:
--title "..." Document title (defaults to first H1)
--author "..." Author for cover + PDF metadata
--date "..." Date for cover (defaults to today)
```
## When Claude should run it
Watch for markdown-to-PDF intent. Any of these patterns → run `$P generate`:
- "Can you make this markdown a PDF"
- "Export it as a PDF"
- "Turn this letter into a PDF"
- "I need a PDF of the essay"
- "Print this as a PDF for me"
If the user has a `.md` file open and says "make it look nice", propose
`$P generate --cover --toc` and ask before running.
## Debugging
- Output looks empty / blank → check browse daemon is running: `$B status`.
- Fragmented text on copy-paste → highlight.js output (Phase 4). Retry with
`--no-syntax` once that flag exists. For now, remove fenced code blocks
and regenerate.
- Paged.js timeout → probably no headings in the markdown. Drop `--toc`.
- External image missing → add `--allow-network` (understand you're giving
the markdown file permission to fetch from its image URLs).
- Generated PDF too tall/wide → `--page-size a4` or `--margins 0.75in`.
## Output contract
```
stdout: /tmp/letter.pdf ← just the path, one line
stderr: Rendering HTML... ← progress spinner (unless --quiet)
Generating PDF...
Done in 1.5s. 43 words · 22KB · /tmp/letter.pdf
exit code: 0 success / 1 bad args / 2 render error / 3 Paged.js timeout
/ 4 browse unavailable
```
Capture the path: `PDF=$($P generate letter.md)` — then use `$PDF`.

161
make-pdf/SKILL.md.tmpl Normal file
View File

@@ -0,0 +1,161 @@
---
name: make-pdf
preamble-tier: 1
version: 1.0.0
description: |
Turn any markdown file into a publication-quality PDF. Proper 1in margins,
intelligent page breaks, page numbers, cover pages, running headers, curly
quotes and em dashes, clickable TOC, diagonal DRAFT watermark. Not a draft
artifact — a finished artifact. Use when asked to "make a PDF", "export to
PDF", "turn this markdown into a PDF", or "generate a document". (gstack)
voice-triggers:
- "make this a pdf"
- "make it a pdf"
- "export to pdf"
- "turn this into a pdf"
- "turn this markdown into a pdf"
- "generate a pdf"
- "make a pdf from"
- "pdf this markdown"
triggers:
- markdown to pdf
- generate pdf
- make pdf
- export pdf
allowed-tools:
- Bash
- Read
- AskUserQuestion
---
{{PREAMBLE}}
# make-pdf: publication-quality PDFs from markdown
Turn `.md` files into PDFs that look like Faber & Faber essays: 1in margins,
left-aligned body, Helvetica throughout, curly quotes and em dashes, optional
cover page and clickable TOC, diagonal DRAFT watermark when you need it.
Copy-paste from the PDF produces clean words, never "S a i l i n g".
On Linux, install `fonts-liberation` for correct rendering — Helvetica and Arial
aren't present by default, and Liberation Sans is the standard metric-compatible
fallback. CI and Docker builds install it automatically via Dockerfile.ci.
## Core patterns
### 80% case — memo/letter
One command, no flags. Gets a clean PDF with running header + page numbers
+ CONFIDENTIAL footer by default.
```bash
$P generate letter.md # writes /tmp/letter.pdf
$P generate letter.md letter.pdf # explicit output path
```
### Publication mode — cover + TOC + chapter breaks
```bash
$P generate --cover --toc --author "Garry Tan" --title "On Horizons" \
essay.md essay.pdf
```
Each top-level H1 in the markdown starts a new page. Disable with
`--no-chapter-breaks` for memos that happen to have multiple H1s.
### Draft-stage watermark
```bash
$P generate --watermark DRAFT memo.md draft.pdf
```
Diagonal 10% opacity DRAFT across every page. When the draft is final, drop
the flag and regenerate.
### Fast iteration via preview
```bash
$P preview essay.md
```
Renders HTML with the same print CSS and opens it in your browser. Refresh
as you edit the markdown. Skip the PDF round trip until you're ready.
### Brand-free (no CONFIDENTIAL footer)
```bash
$P generate --no-confidential memo.md memo.pdf
```
## Common flags
```
Page layout:
--margins <dim> 1in (default) | 72pt | 2.54cm | 25mm
--page-size letter|a4|legal
Structure:
--cover Cover page (title, author, date, hairline rule)
--toc Clickable TOC with page numbers
--no-chapter-breaks Don't start a new page at every H1
Branding:
--watermark <text> Diagonal watermark ("DRAFT", "CONFIDENTIAL")
--header-template <html> Custom running header
--footer-template <html> Custom footer (mutex with --page-numbers)
--no-confidential Suppress the CONFIDENTIAL right-footer
Output:
--page-numbers "N of M" footer (default on)
--tagged Accessible PDF (default on)
--outline PDF bookmarks from headings (default on)
--quiet Suppress progress on stderr
--verbose Per-stage timings
Network:
--allow-network Fetch external images. Off by default
(blocks tracking pixels).
Metadata:
--title "..." Document title (defaults to first H1)
--author "..." Author for cover + PDF metadata
--date "..." Date for cover (defaults to today)
```
## When Claude should run it
Watch for markdown-to-PDF intent. Any of these patterns → run `$P generate`:
- "Can you make this markdown a PDF"
- "Export it as a PDF"
- "Turn this letter into a PDF"
- "I need a PDF of the essay"
- "Print this as a PDF for me"
If the user has a `.md` file open and says "make it look nice", propose
`$P generate --cover --toc` and ask before running.
## Debugging
- Output looks empty / blank → check browse daemon is running: `$B status`.
- Fragmented text on copy-paste → highlight.js output (Phase 4). Retry with
`--no-syntax` once that flag exists. For now, remove fenced code blocks
and regenerate.
- Paged.js timeout → probably no headings in the markdown. Drop `--toc`.
- External image missing → add `--allow-network` (understand you're giving
the markdown file permission to fetch from its image URLs).
- Generated PDF too tall/wide → `--page-size a4` or `--margins 0.75in`.
## Output contract
```
stdout: /tmp/letter.pdf ← just the path, one line
stderr: Rendering HTML... ← progress spinner (unless --quiet)
Generating PDF...
Done in 1.5s. 43 words · 22KB · /tmp/letter.pdf
exit code: 0 success / 1 bad args / 2 render error / 3 Paged.js timeout
/ 4 browse unavailable
```
Capture the path: `PDF=$($P generate letter.md)` — then use `$PDF`.

View File

@@ -0,0 +1,381 @@
/**
* Typed shell-out wrapper for the browse CLI.
*
* Every browse call goes through this file. Reasons:
* - One place to do binary resolution.
* - One place to enforce the --from-file convention for large payloads
* (Windows argv cap is 8191 chars; 200KB HTML dies without this).
* - One place that maps non-zero exit codes to typed errors.
*
* Binary resolution order (Codex round 2 #4, v1.24-aligned):
* 1. $GSTACK_BROWSE_BIN env override (preferred, matches v1.24 GSTACK_*_BIN pattern)
* 2. $BROWSE_BIN env override (back-compat alias)
* 3. sibling dir: dirname(argv[0])/../browse/dist/browse[.exe]
* 4. ~/.claude/skills/gstack/browse/dist/browse[.exe]
* 5. PATH lookup via Bun.which('browse') — handles Windows PATHEXT natively
* 6. error with setup hint
*
* Windows quirks:
* - bun build --compile --outfile X emits X.exe on win32, so candidate paths
* need a .exe probe pass (fs.accessSync(X_OK) degrades to existence-checking
* on Windows per Node docs, so the bare path silently misses the .exe file).
* - `which` only exists in Git Bash; Bun.which() handles cmd.exe / PowerShell
* natively via PATHEXT semantics.
*/
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import * as crypto from "node:crypto";
import { BrowseClientError } from "./types";
export interface LoadHtmlOptions {
html: string; // raw HTML string
waitUntil?: "load" | "domcontentloaded" | "networkidle";
tabId: number;
}
export interface PdfOptions {
output: string;
tabId: number;
format?: string;
width?: string;
height?: string;
marginTop?: string;
marginRight?: string;
marginBottom?: string;
marginLeft?: string;
headerTemplate?: string;
footerTemplate?: string;
pageNumbers?: boolean;
tagged?: boolean;
outline?: boolean;
printBackground?: boolean;
preferCSSPageSize?: boolean;
toc?: boolean;
}
export interface JsOptions {
tabId: number;
expression: string; // JS expression to evaluate
}
/**
* Resolve an absolute or PATH-resolvable command via Bun.which-style semantics,
* with a Windows .exe/.cmd/.bat extension probe for absolute paths. Mirrors
* the v1.24 claude-bin.ts override-resolution shape.
*
* Returns null if nothing resolves; callers degrade with a typed error rather
* than throwing here.
*/
function resolveOverride(value: string | undefined, env: NodeJS.ProcessEnv): string | null {
if (!value?.trim()) return null;
const trimmed = value.trim().replace(/^"(.*)"$/, '$1');
if (path.isAbsolute(trimmed)) return findExecutable(trimmed);
const PATH = env.PATH ?? env.Path ?? '';
return Bun.which(trimmed, { PATH }) ?? null;
}
/**
* Probe a base path for executability, honoring Windows extension suffixes.
*
* On POSIX, isExecutable(base) is the only check that matters. On Windows,
* fs.accessSync(p, X_OK) degrades to an existence check — so a bare-path probe
* misses bun-compiled binaries (which land at base.exe). After the bare probe
* fails on win32, try .exe / .cmd / .bat. Linux/macOS behavior is unchanged.
*/
export function findExecutable(base: string): string | null {
if (isExecutable(base)) return base;
if (process.platform === "win32") {
for (const ext of [".exe", ".cmd", ".bat"]) {
const withExt = base + ext;
if (isExecutable(withExt)) return withExt;
}
}
return null;
}
/**
* Locate the browse binary. Throws a BrowseClientError with a
* canonical setup message if not found. See header for resolution order.
*/
export function resolveBrowseBin(env: NodeJS.ProcessEnv = process.env): string {
// 1 + 2: env overrides (GSTACK_BROWSE_BIN preferred, BROWSE_BIN back-compat).
const overrideRaw = env.GSTACK_BROWSE_BIN ?? env.BROWSE_BIN;
const override = resolveOverride(overrideRaw, env);
if (override) return override;
// 3: sibling — make-pdf and browse co-located in dist/.
const selfDir = path.dirname(process.argv[0]);
const siblingCandidates = [
path.resolve(selfDir, "../browse/dist/browse"),
path.resolve(selfDir, "../../browse/dist/browse"),
path.resolve(selfDir, "../browse"),
];
for (const candidate of siblingCandidates) {
const found = findExecutable(candidate);
if (found) return found;
}
// 4: global install.
const home = os.homedir();
const globalPath = path.join(home, ".claude/skills/gstack/browse/dist/browse");
const globalFound = findExecutable(globalPath);
if (globalFound) return globalFound;
// 5: PATH lookup via Bun.which — handles Windows PATHEXT natively (no `which`
// dependency on cmd.exe / PowerShell, no `where`-vs-`which` branch).
const PATH = env.PATH ?? env.Path ?? '';
const onPath = Bun.which('browse', { PATH });
if (onPath) return onPath;
throw new BrowseClientError(
/* exitCode */ 127,
"resolve",
[
"browse binary not found.",
"",
"make-pdf needs browse (the gstack Chromium daemon) to render PDFs.",
"Tried:",
` - $GSTACK_BROWSE_BIN (${env.GSTACK_BROWSE_BIN || "unset"})`,
` - $BROWSE_BIN (${env.BROWSE_BIN || "unset"})`,
` - sibling: ${siblingCandidates.join(", ")}`,
` - global: ${globalPath}`,
" - PATH: `browse`",
"",
"To fix: run gstack setup from the gstack repo:",
" cd ~/.claude/skills/gstack && ./setup",
"",
"Or set GSTACK_BROWSE_BIN explicitly:",
process.platform === "win32"
? ' setx GSTACK_BROWSE_BIN "C:\\path\\to\\browse.exe"'
: " export GSTACK_BROWSE_BIN=/path/to/browse",
].join("\n"),
);
}
function isExecutable(p: string): boolean {
try {
fs.accessSync(p, fs.constants.X_OK);
return true;
} catch {
return false;
}
}
/**
* Run a browse command. Returns stdout on success.
* Throws BrowseClientError on non-zero exit.
*/
function runBrowse(args: string[]): string {
const bin = resolveBrowseBin();
try {
return execFileSync(bin, args, {
encoding: "utf8",
maxBuffer: 16 * 1024 * 1024, // 16MB; tab content can be large
stdio: ["ignore", "pipe", "pipe"],
});
} catch (err: any) {
const exitCode = typeof err.status === "number" ? err.status : 1;
const stderr = typeof err.stderr === "string"
? err.stderr
: (err.stderr?.toString() ?? "");
throw new BrowseClientError(exitCode, args[0] || "unknown", stderr);
}
}
/**
* Write a payload to a tmp file and return the path. Used for any payload
* >4KB to avoid Windows argv limits (Codex round 2 #3).
*
* Path must be under the browse safe-dirs allowlist (/tmp or cwd on
* non-Windows; os.tmpdir on Windows). v1.6.0.0 tightened --from-file
* validation to close a CLI/API parity gap (PR #1103), so os.tmpdir()
* on macOS (/var/folders/...) now fails validateReadPath. Use the same
* TEMP_DIR convention as browse/src/platform.ts.
*/
const PAYLOAD_TMP_DIR = process.platform === "win32" ? os.tmpdir() : "/tmp";
function writePayloadFile(payload: Record<string, unknown>): string {
const hash = crypto.createHash("sha256")
.update(JSON.stringify(payload))
.digest("hex")
.slice(0, 12);
const tmpPath = path.join(PAYLOAD_TMP_DIR, `make-pdf-browse-${process.pid}-${hash}.json`);
fs.writeFileSync(tmpPath, JSON.stringify(payload), "utf8");
return tmpPath;
}
function cleanupPayloadFile(p: string): void {
try { fs.unlinkSync(p); } catch { /* best-effort */ }
}
// ─── Public API ─────────────────────────────────────────────────
/**
* Open a new tab. Returns the tabId.
* Requires `$B newtab --json` to be available (added in the browse flag
* extension for this feature). If --json isn't supported yet, the fallback
* parses "Opened tab N" from stdout.
*/
export function newtab(url?: string): number {
const args = ["newtab"];
if (url) args.push(url);
// Try --json first (preferred path for programmatic use)
try {
const out = runBrowse([...args, "--json"]);
const parsed = JSON.parse(out);
if (typeof parsed.tabId === "number") return parsed.tabId;
} catch {
// Fall back to stdout-string parsing. Brittle, but works on older browse builds.
}
const out = runBrowse(args);
const m = out.match(/tab\s+(\d+)/i);
if (!m) throw new BrowseClientError(1, "newtab", `could not parse tab id from: ${out}`);
return parseInt(m[1], 10);
}
/**
* Close a tab (by id or the active tab).
*/
export function closetab(tabId?: number): void {
const args = ["closetab"];
if (tabId !== undefined) args.push(String(tabId));
runBrowse(args);
}
/**
* Load raw HTML into a specific tab.
* Uses --from-file for any payload >4KB (Codex round 2 #3).
*/
export function loadHtml(opts: LoadHtmlOptions): void {
// Always use --from-file to dodge argv limits. The HTML is almost always >4KB.
const payload = {
html: opts.html,
waitUntil: opts.waitUntil ?? "domcontentloaded",
};
const payloadFile = writePayloadFile(payload);
try {
runBrowse([
"load-html",
"--from-file", payloadFile,
"--tab-id", String(opts.tabId),
]);
} finally {
cleanupPayloadFile(payloadFile);
}
}
/**
* Evaluate a JS expression in a tab. Returns the serialized result as string.
*/
export function js(opts: JsOptions): string {
return runBrowse([
"js",
opts.expression,
"--tab-id", String(opts.tabId),
]).trim();
}
/**
* Poll a boolean JS expression until it evaluates to true, or timeout.
* Returns true if it succeeded, false if timed out.
*/
export function waitForExpression(opts: {
expression: string;
tabId: number;
timeoutMs: number;
pollIntervalMs?: number;
}): boolean {
const poll = opts.pollIntervalMs ?? 200;
const deadline = Date.now() + opts.timeoutMs;
while (Date.now() < deadline) {
try {
const result = js({ expression: opts.expression, tabId: opts.tabId });
if (result === "true") return true;
} catch {
// Tab may still be loading; keep polling
}
const wait = Math.min(poll, Math.max(0, deadline - Date.now()));
if (wait <= 0) break;
// Synchronous sleep is fine — this only runs once per PDF render
const end = Date.now() + wait;
while (Date.now() < end) { /* busy wait */ }
}
return false;
}
/**
* Generate a PDF from the given tab. Uses --from-file when header/footer
* templates are present (they can be HTML strings of arbitrary size).
*/
export function pdf(opts: PdfOptions): void {
// If any large payload is present, send via --from-file
const hasLargePayload =
(opts.headerTemplate && opts.headerTemplate.length > 1024) ||
(opts.footerTemplate && opts.footerTemplate.length > 1024);
if (hasLargePayload) {
const payloadFile = writePayloadFile({
output: opts.output,
tabId: opts.tabId,
...optionsToPdfFlags(opts),
});
try {
runBrowse(["pdf", "--from-file", payloadFile]);
} finally {
cleanupPayloadFile(payloadFile);
}
return;
}
// Small payload: pass flags via argv
const args = ["pdf", opts.output, "--tab-id", String(opts.tabId)];
pushFlagsFromOptions(args, opts);
runBrowse(args);
}
function optionsToPdfFlags(opts: PdfOptions): Record<string, unknown> {
// Shape mirrors what the browse `pdf` case expects when reading --from-file
const out: Record<string, unknown> = {};
if (opts.format) out.format = opts.format;
if (opts.width) out.width = opts.width;
if (opts.height) out.height = opts.height;
if (opts.marginTop) out.marginTop = opts.marginTop;
if (opts.marginRight) out.marginRight = opts.marginRight;
if (opts.marginBottom) out.marginBottom = opts.marginBottom;
if (opts.marginLeft) out.marginLeft = opts.marginLeft;
if (opts.headerTemplate !== undefined) out.headerTemplate = opts.headerTemplate;
if (opts.footerTemplate !== undefined) out.footerTemplate = opts.footerTemplate;
if (opts.pageNumbers !== undefined) out.pageNumbers = opts.pageNumbers;
if (opts.tagged !== undefined) out.tagged = opts.tagged;
if (opts.outline !== undefined) out.outline = opts.outline;
if (opts.printBackground !== undefined) out.printBackground = opts.printBackground;
if (opts.preferCSSPageSize !== undefined) out.preferCSSPageSize = opts.preferCSSPageSize;
if (opts.toc !== undefined) out.toc = opts.toc;
return out;
}
function pushFlagsFromOptions(args: string[], opts: PdfOptions): void {
if (opts.format) { args.push("--format", opts.format); }
if (opts.width) { args.push("--width", opts.width); }
if (opts.height) { args.push("--height", opts.height); }
if (opts.marginTop) { args.push("--margin-top", opts.marginTop); }
if (opts.marginRight) { args.push("--margin-right", opts.marginRight); }
if (opts.marginBottom) { args.push("--margin-bottom", opts.marginBottom); }
if (opts.marginLeft) { args.push("--margin-left", opts.marginLeft); }
if (opts.headerTemplate !== undefined) {
args.push("--header-template", opts.headerTemplate);
}
if (opts.footerTemplate !== undefined) {
args.push("--footer-template", opts.footerTemplate);
}
if (opts.pageNumbers === true) args.push("--page-numbers");
if (opts.tagged === true) args.push("--tagged");
if (opts.outline === true) args.push("--outline");
if (opts.printBackground === true) args.push("--print-background");
if (opts.preferCSSPageSize === true) args.push("--prefer-css-page-size");
if (opts.toc === true) args.push("--toc");
}

256
make-pdf/src/cli.ts Normal file
View File

@@ -0,0 +1,256 @@
#!/usr/bin/env bun
/**
* make-pdf CLI — argv parse, dispatch, exit.
*
* Output contract (per CEO plan DX spec):
* stdout: ONLY the output path on success. One line. Nothing else.
* stderr: progress spinner per stage, final "Done in Xs. N pages."
* --quiet: suppress progress. Errors still print.
* --verbose: per-stage timings.
* exit 0 success / 1 bad args / 2 render error / 3 Paged.js timeout / 4 browse unavailable.
*/
import { COMMANDS } from "./commands";
import { ExitCode, BrowseClientError } from "./types";
import type { GenerateOptions, PreviewOptions } from "./types";
interface ParsedArgs {
command: string;
positional: string[];
flags: Record<string, string | boolean>;
}
function parseArgs(argv: string[]): ParsedArgs {
const args = argv.slice(2);
if (args.length === 0) {
printUsage();
process.exit(ExitCode.Success);
}
// First non-flag arg is the command.
let command = "";
const positional: string[] = [];
const flags: Record<string, string | boolean> = {};
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (a.startsWith("--")) {
const key = a.slice(2);
const next = args[i + 1];
if (next !== undefined && !next.startsWith("--")) {
flags[key] = next;
i++;
} else {
flags[key] = true;
}
} else if (!command) {
command = a;
} else {
positional.push(a);
}
}
return { command, positional, flags };
}
function printUsage(): void {
const lines = [
"make-pdf — turn markdown into publication-quality PDFs",
"",
"Usage:",
];
for (const [name, info] of COMMANDS) {
lines.push(` $P ${info.usage}`);
lines.push(` ${info.description}`);
}
lines.push("");
lines.push("Page layout:");
lines.push(" --margins <dim> All four margins (default: 1in). in, pt, cm, mm.");
lines.push(" --page-size letter|a4|legal (aliases: --format)");
lines.push("");
lines.push("Document structure:");
lines.push(" --cover Add a cover page.");
lines.push(" --toc Generate clickable table of contents.");
lines.push(" --no-chapter-breaks Don't start a new page at every H1.");
lines.push("");
lines.push("Branding:");
lines.push(" --watermark <text> Diagonal watermark on every page.");
lines.push(" --header-template <html>");
lines.push(" --footer-template <html> Mutex with --page-numbers.");
lines.push(" --no-confidential Suppress the CONFIDENTIAL footer.");
lines.push("");
lines.push("Output control:");
lines.push(" --page-numbers / --no-page-numbers (default: on)");
lines.push(" --tagged / --no-tagged (default: on, accessible PDF)");
lines.push(" --outline / --no-outline (default: on, PDF bookmarks)");
lines.push(" --quiet Suppress progress on stderr.");
lines.push(" --verbose Per-stage timings on stderr.");
lines.push("");
lines.push("Network:");
lines.push(" --allow-network Load external images (off by default).");
lines.push("");
lines.push("Examples:");
lines.push(" $P generate letter.md");
lines.push(" $P generate --cover --toc essay.md essay.pdf");
lines.push(" $P generate --watermark DRAFT memo.md draft.pdf");
lines.push(" $P preview letter.md");
lines.push("");
lines.push("Run `$P setup` to verify browse + Chromium + pdftotext install.");
console.error(lines.join("\n"));
}
function generateOptionsFromFlags(parsed: ParsedArgs): GenerateOptions {
const p = parsed.positional;
if (p.length === 0) {
console.error("$P generate: missing <input.md>");
console.error("Usage: $P generate <input.md> [output.pdf] [options]");
process.exit(ExitCode.BadArgs);
}
const f = parsed.flags;
const booleanFlag = (key: string, def: boolean): boolean => {
if (f[key] === true) return true;
if (f[`no-${key}`] === true) return false;
return def;
};
return {
input: p[0],
output: p[1],
margins: f.margins as string | undefined,
marginTop: f["margin-top"] as string | undefined,
marginRight: f["margin-right"] as string | undefined,
marginBottom: f["margin-bottom"] as string | undefined,
marginLeft: f["margin-left"] as string | undefined,
pageSize: ((f["page-size"] ?? f.format) as any),
cover: f.cover === true,
toc: f.toc === true,
noChapterBreaks: f["no-chapter-breaks"] === true,
watermark: typeof f.watermark === "string" ? f.watermark : undefined,
headerTemplate: typeof f["header-template"] === "string"
? f["header-template"] : undefined,
footerTemplate: typeof f["footer-template"] === "string"
? f["footer-template"] : undefined,
confidential: booleanFlag("confidential", true),
pageNumbers: booleanFlag("page-numbers", true),
tagged: booleanFlag("tagged", true),
outline: booleanFlag("outline", true),
quiet: f.quiet === true,
verbose: f.verbose === true,
allowNetwork: f["allow-network"] === true,
title: typeof f.title === "string" ? f.title : undefined,
author: typeof f.author === "string" ? f.author : undefined,
date: typeof f.date === "string" ? f.date : undefined,
};
}
function previewOptionsFromFlags(parsed: ParsedArgs): PreviewOptions {
const p = parsed.positional;
if (p.length === 0) {
console.error("$P preview: missing <input.md>");
console.error("Usage: $P preview <input.md> [options]");
process.exit(ExitCode.BadArgs);
}
const f = parsed.flags;
const booleanFlag = (key: string, def: boolean): boolean => {
if (f[key] === true) return true;
if (f[`no-${key}`] === true) return false;
return def;
};
return {
input: p[0],
cover: f.cover === true,
toc: f.toc === true,
watermark: typeof f.watermark === "string" ? f.watermark : undefined,
noChapterBreaks: f["no-chapter-breaks"] === true,
confidential: booleanFlag("confidential", true),
allowNetwork: f["allow-network"] === true,
title: typeof f.title === "string" ? f.title : undefined,
author: typeof f.author === "string" ? f.author : undefined,
date: typeof f.date === "string" ? f.date : undefined,
quiet: f.quiet === true,
verbose: f.verbose === true,
};
}
async function main(): Promise<void> {
const parsed = parseArgs(process.argv);
if (!parsed.command) {
printUsage();
process.exit(ExitCode.BadArgs);
}
if (!COMMANDS.has(parsed.command)) {
console.error(`$P: unknown command: ${parsed.command}`);
console.error("");
printUsage();
process.exit(ExitCode.BadArgs);
}
try {
switch (parsed.command) {
case "version": {
// Read from VERSION file or fall back to a hard-coded default.
try {
const fs = await import("node:fs");
const path = await import("node:path");
const versionFile = path.resolve(
path.dirname(process.argv[1] || ""),
"../../VERSION",
);
const version = fs.readFileSync(versionFile, "utf8").trim();
console.log(version);
} catch {
console.log("make-pdf (version unknown)");
}
process.exit(ExitCode.Success);
}
case "setup": {
const { runSetup } = await import("./setup");
await runSetup();
process.exit(ExitCode.Success);
}
case "generate": {
const opts = generateOptionsFromFlags(parsed);
const { generate } = await import("./orchestrator");
const outputPath = await generate(opts);
// Contract: stdout = output path only
console.log(outputPath);
process.exit(ExitCode.Success);
}
case "preview": {
const opts = previewOptionsFromFlags(parsed);
const { preview } = await import("./orchestrator");
const htmlPath = await preview(opts);
console.log(htmlPath);
process.exit(ExitCode.Success);
}
default:
// Unreachable: COMMANDS.has guarded above
process.exit(ExitCode.BadArgs);
}
} catch (err: any) {
if (err instanceof BrowseClientError) {
console.error(`$P: ${err.message}`);
process.exit(ExitCode.BrowseUnavailable);
}
if (err?.code === "ENOENT") {
console.error(`$P: file not found: ${err.path ?? err.message}`);
process.exit(ExitCode.BadArgs);
}
if (err?.name === "PagedJsTimeout") {
console.error(`$P: ${err.message}`);
process.exit(ExitCode.PagedJsTimeout);
}
console.error(`$P: ${err?.message ?? String(err)}`);
if (parsed.flags.verbose && err?.stack) {
console.error(err.stack);
}
process.exit(ExitCode.RenderError);
}
}
main();

62
make-pdf/src/commands.ts Normal file
View File

@@ -0,0 +1,62 @@
/**
* Command registry for make-pdf — single source of truth.
*
* Dependency graph:
* commands.ts ──▶ cli.ts (runtime dispatch)
* ──▶ gen-skill-docs.ts (generates usage table in SKILL.md)
* ──▶ tests (validation)
*
* Zero side effects. Safe to import from build scripts.
*/
export const COMMANDS = new Map<string, {
description: string;
usage: string;
flags?: string[];
category: "Primary" | "Setup";
}>([
["generate", {
description: "Render a markdown file to a publication-quality PDF",
usage: "generate <input.md> [output.pdf] [options]",
category: "Primary",
flags: [
// Page layout
"--margins", "--margin-top", "--margin-right", "--margin-bottom", "--margin-left",
"--page-size", "--format",
// Structure
"--cover", "--toc", "--no-chapter-breaks",
// Branding
"--watermark", "--header-template", "--footer-template", "--no-confidential",
// Output
"--page-numbers", "--no-page-numbers", "--tagged", "--no-tagged",
"--outline", "--no-outline", "--quiet", "--verbose",
// Network
"--allow-network",
// Metadata
"--title", "--author", "--date",
],
}],
["preview", {
description: "Render markdown to HTML and open it in the browser (fast iteration)",
usage: "preview <input.md> [options]",
category: "Primary",
flags: [
"--cover", "--toc", "--no-chapter-breaks", "--watermark",
"--no-confidential", "--allow-network",
"--title", "--author", "--date",
"--quiet", "--verbose",
],
}],
["setup", {
description: "Verify browse + Chromium + pdftotext, then run a smoke test",
usage: "setup",
category: "Setup",
flags: [],
}],
["version", {
description: "Print make-pdf version",
usage: "version",
category: "Setup",
flags: [],
}],
]);

View File

@@ -0,0 +1,234 @@
/**
* Orchestrator — ties render, browseClient, and filesystem together.
*
* generate(opts): markdown → PDF on disk. Returns output path.
* preview(opts): markdown → HTML, opens it in a browser.
*
* Progress indication (per DX spec):
* - stdout: ONLY the output path, printed by cli.ts after this returns.
* - stderr: spinner + per-stage status lines, unless opts.quiet.
* - --verbose: stage timings.
*
* Tab lifecycle: every generate opens a dedicated tab via $B newtab --json,
* runs load-html/js/pdf against --tab-id <N>, and closes the tab in a
* try/finally. Parallel $P generate calls never race on the active tab.
*/
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import * as crypto from "node:crypto";
import { spawn } from "node:child_process";
import { render } from "./render";
import type { GenerateOptions, PreviewOptions } from "./types";
import { ExitCode } from "./types";
import * as browseClient from "./browseClient";
class ProgressReporter {
private readonly quiet: boolean;
private readonly verbose: boolean;
private readonly stageStart = new Map<string, number>();
private readonly totalStart: number;
constructor(opts: { quiet?: boolean; verbose?: boolean }) {
this.quiet = opts.quiet === true;
this.verbose = opts.verbose === true;
this.totalStart = Date.now();
}
begin(stage: string): void {
this.stageStart.set(stage, Date.now());
if (this.quiet) return;
process.stderr.write(`\r\x1b[K${stage}...`);
}
end(stage: string, extra?: string): void {
const start = this.stageStart.get(stage) ?? Date.now();
const ms = Date.now() - start;
if (this.quiet) return;
if (this.verbose) {
process.stderr.write(`\r\x1b[K${stage} (${ms}ms)${extra ? `${extra}` : ""}\n`);
}
}
done(extra: string): void {
if (this.quiet) return;
const total = ((Date.now() - this.totalStart) / 1000).toFixed(1);
process.stderr.write(`\r\x1b[KDone in ${total}s. ${extra}\n`);
}
fail(stage: string, err: Error): void {
if (!this.quiet) process.stderr.write("\r\x1b[K");
// Always emit failure info, even in quiet mode — this is an error path.
process.stderr.write(`${stage} failed: ${err.message}\n`);
}
}
/**
* generate — full pipeline. Returns the output PDF path on success.
*/
export async function generate(opts: GenerateOptions): Promise<string> {
const progress = new ProgressReporter(opts);
const input = path.resolve(opts.input);
if (!fs.existsSync(input)) {
throw new Error(`input file not found: ${input}`);
}
const outputPath = path.resolve(
opts.output ?? path.join(os.tmpdir(), `${deriveSlug(input)}.pdf`),
);
// Stage 1: read markdown
progress.begin("Reading markdown");
const markdown = fs.readFileSync(input, "utf8");
progress.end("Reading markdown");
// Stage 2: render HTML
progress.begin("Rendering HTML");
const rendered = render({
markdown,
title: opts.title,
author: opts.author,
date: opts.date,
cover: opts.cover,
toc: opts.toc,
watermark: opts.watermark,
noChapterBreaks: opts.noChapterBreaks,
confidential: opts.confidential,
pageSize: opts.pageSize,
margins: opts.margins,
pageNumbers: opts.pageNumbers,
footerTemplate: opts.footerTemplate,
});
progress.end("Rendering HTML", `${rendered.meta.wordCount} words`);
// Stage 3: write HTML to a tmp file browse can read
// (We don't actually write it; we pass inline via --from-file JSON.)
// But for preview mode and debugging, we still write to tmp.
const htmlTmp = tmpFile("html");
fs.writeFileSync(htmlTmp, rendered.html, "utf8");
// Stage 4: spin up a dedicated tab, load HTML, (wait for Paged.js if TOC),
// then emit PDF. Always close the tab.
progress.begin("Opening tab");
const tabId = browseClient.newtab();
progress.end("Opening tab", `tabId=${tabId}`);
try {
progress.begin("Loading HTML into Chromium");
browseClient.loadHtml({
html: rendered.html,
waitUntil: "domcontentloaded",
tabId,
});
progress.end("Loading HTML into Chromium");
if (opts.toc) {
progress.begin("Paginating with Paged.js");
// Browse's $B pdf already waits internally when --toc is passed.
// We pass toc=true to browseClient.pdf() below.
progress.end("Paginating with Paged.js", "Paged.js after");
}
progress.begin("Generating PDF");
browseClient.pdf({
output: outputPath,
tabId,
format: opts.pageSize ?? "letter",
marginTop: opts.marginTop ?? opts.margins ?? "1in",
marginRight: opts.marginRight ?? opts.margins ?? "1in",
marginBottom: opts.marginBottom ?? opts.margins ?? "1in",
marginLeft: opts.marginLeft ?? opts.margins ?? "1in",
headerTemplate: opts.headerTemplate,
footerTemplate: opts.footerTemplate,
// CSS is the single source of truth for page numbers (see print-css.ts
// @bottom-center). Chromium's native numbering always off to avoid double
// footers. The CSS layer honors pageNumbers + footerTemplate via render().
pageNumbers: false,
tagged: opts.tagged !== false,
outline: opts.outline !== false,
printBackground: !!opts.watermark,
toc: opts.toc,
});
progress.end("Generating PDF");
const stat = fs.statSync(outputPath);
const kb = Math.round(stat.size / 1024);
progress.done(`${rendered.meta.wordCount} words · ${kb}KB · ${outputPath}`);
} finally {
// Always clean up the tab — even on crash, timeout, or Chromium hang.
try {
browseClient.closetab(tabId);
} catch {
// best-effort; we already exited the main path
}
// Cleanup tmp HTML
try { fs.unlinkSync(htmlTmp); } catch { /* best-effort */ }
}
return outputPath;
}
/**
* preview — render HTML and open it. No PDF round trip.
*/
export async function preview(opts: PreviewOptions): Promise<string> {
const progress = new ProgressReporter(opts);
const input = path.resolve(opts.input);
if (!fs.existsSync(input)) {
throw new Error(`input file not found: ${input}`);
}
progress.begin("Rendering HTML");
const markdown = fs.readFileSync(input, "utf8");
const rendered = render({
markdown,
title: opts.title,
author: opts.author,
date: opts.date,
cover: opts.cover,
toc: opts.toc,
watermark: opts.watermark,
noChapterBreaks: opts.noChapterBreaks,
confidential: opts.confidential,
pageNumbers: opts.pageNumbers,
});
progress.end("Rendering HTML", `${rendered.meta.wordCount} words`);
// Write to a stable path under /tmp so the user can reload in the same tab.
const previewPath = path.join(os.tmpdir(), `make-pdf-preview-${deriveSlug(input)}.html`);
fs.writeFileSync(previewPath, rendered.html, "utf8");
progress.begin("Opening preview");
tryOpen(previewPath);
progress.end("Opening preview");
progress.done(`Preview at ${previewPath}`);
return previewPath;
}
// ─── helpers ──────────────────────────────────────────────
function deriveSlug(p: string): string {
const base = path.basename(p).replace(/\.[^.]+$/, "");
return base.replace(/[^a-zA-Z0-9-_]+/g, "-").slice(0, 64) || "document";
}
function tmpFile(ext: string): string {
const hash = crypto.randomBytes(6).toString("hex");
return path.join(os.tmpdir(), `make-pdf-${process.pid}-${hash}.${ext}`);
}
function tryOpen(pathOrUrl: string): void {
const platform = process.platform;
const cmd = platform === "darwin" ? "open" :
platform === "win32" ? "cmd" :
"xdg-open";
const args = platform === "win32" ? ["/c", "start", "", pathOrUrl] : [pathOrUrl];
try {
const child = spawn(cmd, args, { detached: true, stdio: "ignore" });
child.unref();
} catch {
// Non-fatal; the caller already has the path and will print it.
}
}
/** Setup-only re-export so cli.ts can dynamic-import without another file. */
export { ExitCode };

284
make-pdf/src/pdftotext.ts Normal file
View File

@@ -0,0 +1,284 @@
/**
* pdftotext wrapper — the tool behind the copy-paste CI gate.
*
* Codex round 2 surfaced two real problems we address here:
*
* #18: pdftotext (Poppler) vs pdftotext (Xpdf) vs pdftotext-next vary on
* whitespace, line wrap, Unicode normalization, form feeds, and
* extraction order. Cross-platform exact diffing is a non-starter.
* We normalize aggressively and diff the normalized form.
*
* #19: the regex /(?:\b\w\s){4,}/ only catches one failure shape (letters
* spaced out). It misses word-order corruption, missing whitespace
* between paragraphs, and homoglyph substitution. We add a word-token
* diff and a paragraph-boundary assertion on top.
*
* Resolution order for the pdftotext binary (v1.24-aligned):
* 1. $GSTACK_PDFTOTEXT_BIN env override (preferred, matches v1.24 GSTACK_*_BIN pattern)
* 2. $PDFTOTEXT_BIN env override (back-compat alias)
* 3. PATH lookup via Bun.which('pdftotext') — handles Windows PATHEXT natively
* 4. standard POSIX paths (Homebrew + distro) — no Windows candidates because
* Poppler scatters across Scoop / Chocolatey / oschwartz10612-poppler-windows
* and guessing causes false positives. Set GSTACK_PDFTOTEXT_BIN explicitly.
* 5. throws a friendly "install poppler" error
*
* The wrapper is *optional at runtime*: production renders don't need it.
* Only the CI gate and unit tests invoke pdftotext.
*/
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
export class PdftotextUnavailableError extends Error {
constructor(message: string) {
super(message);
this.name = "PdftotextUnavailableError";
}
}
export interface PdftotextInfo {
bin: string;
version: string; // "pdftotext version 24.02.0" or similar
flavor: "poppler" | "xpdf" | "unknown";
}
/**
* Probe a base path for executability, honoring Windows extension suffixes.
* Matches browseClient.ts:findExecutable — duplicated rather than shared
* because the two modules already duplicate isExecutable for compile-isolation.
*/
export function findExecutable(base: string): string | null {
if (isExecutable(base)) return base;
if (process.platform === "win32") {
for (const ext of [".exe", ".cmd", ".bat"]) {
const withExt = base + ext;
if (isExecutable(withExt)) return withExt;
}
}
return null;
}
function resolveOverride(value: string | undefined, env: NodeJS.ProcessEnv): string | null {
if (!value?.trim()) return null;
const trimmed = value.trim().replace(/^"(.*)"$/, '$1');
if (path.isAbsolute(trimmed)) return findExecutable(trimmed);
const PATH = env.PATH ?? env.Path ?? '';
return Bun.which(trimmed, { PATH }) ?? null;
}
/**
* Locate pdftotext. Throws PdftotextUnavailableError if none is found.
*/
export function resolvePdftotext(env: NodeJS.ProcessEnv = process.env): PdftotextInfo {
// 1 + 2: env overrides (GSTACK_PDFTOTEXT_BIN preferred, PDFTOTEXT_BIN back-compat).
const overrideRaw = env.GSTACK_PDFTOTEXT_BIN ?? env.PDFTOTEXT_BIN;
const override = resolveOverride(overrideRaw, env);
if (override) return describeBinary(override);
// 3: PATH lookup via Bun.which — handles Windows PATHEXT natively.
const PATH = env.PATH ?? env.Path ?? '';
const onPath = Bun.which('pdftotext', { PATH });
if (onPath) return describeBinary(onPath);
// 4: POSIX-only standard locations. No Windows candidates — Poppler installs
// scatter across Scoop/Chocolatey/portable zips and guessing causes false
// positives. Windows users set GSTACK_PDFTOTEXT_BIN explicitly.
const posixCandidates = [
"/opt/homebrew/bin/pdftotext", // Apple Silicon Homebrew
"/usr/local/bin/pdftotext", // Intel Mac or Linuxbrew
"/usr/bin/pdftotext", // distro package
];
for (const candidate of posixCandidates) {
if (isExecutable(candidate)) return describeBinary(candidate);
}
throw new PdftotextUnavailableError([
"pdftotext not found.",
"",
"make-pdf needs pdftotext to run the copy-paste CI gate.",
"(Runtime rendering does NOT need it. This only affects tests.)",
"",
"To install:",
" macOS: brew install poppler",
" Ubuntu: sudo apt-get install poppler-utils",
" Fedora: sudo dnf install poppler-utils",
" Windows: scoop install poppler (or download from",
" https://github.com/oschwartz10612/poppler-windows)",
"",
"Or set GSTACK_PDFTOTEXT_BIN to an explicit path:",
process.platform === "win32"
? ' setx GSTACK_PDFTOTEXT_BIN "C:\\path\\to\\pdftotext.exe"'
: " export GSTACK_PDFTOTEXT_BIN=/path/to/pdftotext",
].join("\n"));
}
function isExecutable(p: string): boolean {
try {
fs.accessSync(p, fs.constants.X_OK);
return true;
} catch {
return false;
}
}
function describeBinary(bin: string): PdftotextInfo {
let version = "unknown";
let flavor: PdftotextInfo["flavor"] = "unknown";
try {
// pdftotext -v writes to stderr and exits 0 on poppler, 99 on some xpdf builds.
const result = execFileSync(bin, ["-v"], {
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"],
});
version = (result || "").trim().split("\n")[0] || "unknown";
} catch (err: any) {
// Many pdftotext builds exit non-zero on -v but still write to stderr.
const stderr = err?.stderr?.toString?.() ?? "";
version = stderr.trim().split("\n")[0] || "unknown";
}
const v = version.toLowerCase();
if (v.includes("poppler")) flavor = "poppler";
else if (v.includes("xpdf")) flavor = "xpdf";
return { bin, version, flavor };
}
/**
* Run pdftotext on a PDF and return the extracted text.
*
* Uses `-layout` by default because that's what downstream normalization
* expects. Callers that need raw text can pass layout=false.
*/
export function pdftotext(pdfPath: string, opts?: { layout?: boolean }): string {
const info = resolvePdftotext();
const layout = opts?.layout ?? true;
const args: string[] = [];
if (layout) args.push("-layout");
args.push(pdfPath, "-"); // "-" = stdout
try {
return execFileSync(info.bin, args, {
encoding: "utf8",
maxBuffer: 32 * 1024 * 1024,
});
} catch (err: any) {
throw new Error(`pdftotext failed on ${pdfPath}: ${err.message}`);
}
}
/**
* Normalize extracted text for cross-platform, cross-flavor diffing.
*
* What we strip / normalize:
* - Unicode: NFC canonical composition (macOS emits NFD; Linux emits NFC;
* this dodges the fundamental encoding diff).
* - CR and CRLF → LF (Windows Xpdf emits CRLF).
* - Form feeds (\f) → double newline (Poppler emits \f at page breaks).
* - Trailing spaces on every line.
* - Runs of 3+ blank lines → 2 blank lines.
* - Leading/trailing whitespace on the whole string.
* - Non-breaking space (U+00A0) → regular space.
* - Zero-width space (U+200B) and zero-width non-joiner (U+200C) → empty.
* - Soft hyphen (U+00AD) → empty (pdftotext -layout sometimes emits these
* for hyphens: auto breaks).
*/
export function normalize(raw: string): string {
let s = raw;
s = s.normalize("NFC");
s = s.replace(/\r\n/g, "\n");
s = s.replace(/\r/g, "\n");
s = s.replace(/\f/g, "\n\n");
s = s.replace(/\u00a0/g, " ");
s = s.replace(/[\u200b\u200c\u00ad]/g, "");
s = s.replace(/[ \t]+$/gm, "");
s = s.replace(/\n{3,}/g, "\n\n");
s = s.trim();
return s;
}
/**
* The canonical copy-paste gate used in the E2E tests.
*
* Returns { ok: true } when all three assertions pass; returns
* { ok: false, reasons: [...] } with one or more failure reasons otherwise.
*/
export interface GateResult {
ok: boolean;
reasons: string[];
extracted: string;
}
export function copyPasteGate(pdfPath: string, expected: string): GateResult {
const extracted = normalize(pdftotext(pdfPath, { layout: true }));
const expectedNorm = normalize(expected);
const reasons: string[] = [];
// Assertion 1: every expected paragraph appears as a whole line or
// contiguous block in the extracted text.
const expectedParagraphs = splitParagraphs(expectedNorm);
for (const paragraph of expectedParagraphs) {
const compact = collapseWhitespace(paragraph);
const extractedCompact = collapseWhitespace(extracted);
if (!extractedCompact.includes(compact)) {
reasons.push(
`expected paragraph not found in extracted text: ${truncate(paragraph, 80)}`,
);
}
}
// Assertion 2: no "S a i l i n g"-style single-char runs.
// Count groups of 4+ consecutive letter-then-space tokens. False positive
// risk on things like "A B C D" (initials) — mitigate by requiring the
// letters spell a known-word substring of the expected text.
const fragRegex = /((?:\b\w\s){4,})/g;
let fragMatch: RegExpExecArray | null;
while ((fragMatch = fragRegex.exec(extracted)) !== null) {
const letters = fragMatch[1].replace(/\s/g, "");
// Only flag if the reassembled letters appear in the expected text.
if (expectedNorm.toLowerCase().includes(letters.toLowerCase()) && letters.length >= 4) {
reasons.push(
`per-glyph emission detected (the "S ai li ng" bug): "${fragMatch[1].trim()}" reassembles to "${letters}"`,
);
}
}
// Assertion 3: paragraph boundaries preserved. Count double-newlines
// in both; they should differ by no more than ±2 (header/footer noise).
const expectedBreaks = (expectedNorm.match(/\n\n/g) || []).length;
const extractedBreaks = (extracted.match(/\n\n/g) || []).length;
if (Math.abs(expectedBreaks - extractedBreaks) > 4) {
reasons.push(
`paragraph boundary count drift: expected ~${expectedBreaks}, got ${extractedBreaks}`,
);
}
return { ok: reasons.length === 0, reasons, extracted };
}
function splitParagraphs(s: string): string[] {
return s.split(/\n\n+/).map(p => p.trim()).filter(p => p.length > 0);
}
function collapseWhitespace(s: string): string {
return s.replace(/\s+/g, " ").trim();
}
function truncate(s: string, n: number): string {
return s.length > n ? s.slice(0, n) + "..." : s;
}
/**
* Emit diagnostic info to stderr — useful for CI failure debugging.
* Call this once before running any gate in a CI log.
*/
export function logDiagnostics(): void {
try {
const info = resolvePdftotext();
process.stderr.write(
`[pdftotext] bin=${info.bin} flavor=${info.flavor} version="${info.version}" ` +
`os=${os.platform()}-${os.arch()} node=${process.version}\n`,
);
} catch (err: any) {
process.stderr.write(`[pdftotext] unavailable: ${err.message}\n`);
}
}

361
make-pdf/src/print-css.ts Normal file
View File

@@ -0,0 +1,361 @@
/**
* Print stylesheet generator.
*
* Source of truth: .context/designs/make-pdf-print-reference.html and siblings.
* Mirror those CSS rules here. The HTML references were approved via
* /plan-design-review with explicit design decisions locked in the plan:
*
* - Helvetica first, with Liberation Sans as a metric-compatible Linux
* fallback (Helvetica and Arial aren't installed on most Linux distros;
* Liberation Sans ships via the fonts-liberation package and Playwright's
* install-deps). No bundled webfonts — dodges the per-glyph Tj bug that
* breaks copy-paste extraction.
* - All paragraphs flush-left. No first-line indent, no justify, no
* p+p indent. text-align: left everywhere. 12pt margin-bottom.
* - Cover page has the same 1in margins as every other page. No flexbox
* center, no inset padding, no vertical centering. Distinction comes
* from eyebrow + larger title + hairline rule, not from centering.
* - `@page :first` suppresses running header/footer but does NOT override
* the 1in margin.
* - No <link>, no external CSS/fonts — everything inlined.
* - CJK fallback: Helvetica, Liberation Sans, Arial, Hiragino Kaku Gothic
* ProN, Noto Sans CJK JP, Microsoft YaHei, sans-serif.
*/
export interface PrintCssOptions {
// Document structure
cover?: boolean;
toc?: boolean;
noChapterBreaks?: boolean;
// Branding
watermark?: string;
confidential?: boolean;
// Header (running title, top of page)
runningHeader?: string;
// Page size (in CSS `@page size:` terms)
pageSize?: "letter" | "a4" | "legal" | "tabloid";
// Margins (default 1in)
margins?: string;
// Whether to render "N of M" page numbers in the @page @bottom-center rule.
// Default true. Set false to suppress CSS numbering (used when the caller
// supplies a custom Chromium footerTemplate, or when --no-page-numbers).
pageNumbers?: boolean;
}
/**
* Produce a CSS block (no <style> wrapper) for inline injection.
*/
export function printCss(opts: PrintCssOptions = {}): string {
const size = opts.pageSize ?? "letter";
const margin = opts.margins ?? "1in";
const hasWatermark = typeof opts.watermark === "string" && opts.watermark.length > 0;
return [
pageRules(size, margin, opts),
rootTypography(),
coverRules(opts.cover === true),
tocRules(opts.toc === true),
chapterRules(opts.noChapterBreaks === true),
blockRules(),
inlineRules(),
codeRules(),
quoteRules(),
figureRules(),
tableRules(),
listRules(),
footnoteRules(),
hasWatermark ? watermarkRules() : "",
breakAvoidRules(),
].filter(Boolean).join("\n\n");
}
function pageRules(size: string, margin: string, opts: PrintCssOptions): string {
const runningHeader = escapeCssString(opts.runningHeader ?? "");
const showConfidential = opts.confidential !== false;
const showPageNumbers = opts.pageNumbers !== false;
return [
`@page {`,
` size: ${size};`,
` margin: ${margin};`,
runningHeader
? ` @top-center { content: "${runningHeader}"; font-family: Helvetica, "Liberation Sans", Arial, sans-serif; font-size: 9pt; color: #666; }`
: ``,
showPageNumbers
? ` @bottom-center { content: counter(page) " of " counter(pages); font-family: Helvetica, "Liberation Sans", Arial, sans-serif; font-size: 9pt; color: #666; }`
: ``,
showConfidential
? ` @bottom-right { content: "CONFIDENTIAL"; font-family: Helvetica, "Liberation Sans", Arial, sans-serif; font-size: 8pt; color: #aaa; letter-spacing: 0.05em; }`
: ``,
`}`,
``,
// Cover page: suppress running header/footer but keep margins.
`@page :first {`,
` @top-center { content: none; }`,
` @bottom-center { content: none; }`,
` @bottom-right { content: none; }`,
`}`,
].filter(line => line !== "").join("\n");
}
function rootTypography(): string {
return [
`html { lang: en; }`,
`body {`,
` font-family: Helvetica, "Liberation Sans", Arial, "Hiragino Kaku Gothic ProN", "Noto Sans CJK JP", "Microsoft YaHei", sans-serif;`,
` font-size: 11pt;`,
` line-height: 1.5;`,
` color: #111;`,
` background: white;`,
` hyphens: auto;`,
` font-variant-ligatures: common-ligatures;`,
` font-kerning: normal;`,
` text-rendering: geometricPrecision;`,
` margin: 0;`,
` padding: 0;`,
`}`,
].join("\n");
}
function coverRules(enabled: boolean): string {
if (!enabled) return "";
return [
`.cover {`,
` page: first;`,
` page-break-after: always;`,
` break-after: page;`,
` text-align: left;`,
`}`,
`.cover .eyebrow {`,
` font-size: 9pt;`,
` letter-spacing: 0.2em;`,
` text-transform: uppercase;`,
` color: #666;`,
` margin: 0 0 36pt;`,
`}`,
`.cover h1.cover-title {`,
` font-size: 32pt;`,
` line-height: 1.15;`,
` font-weight: 700;`,
` letter-spacing: -0.01em;`,
` margin: 0 0 18pt;`,
` max-width: 5.5in;`,
` text-align: left;`,
`}`,
`.cover .cover-subtitle {`,
` font-size: 14pt;`,
` line-height: 1.4;`,
` font-weight: 400;`,
` color: #333;`,
` margin: 0 0 36pt;`,
` max-width: 5in;`,
` text-align: left;`,
`}`,
`.cover hr.rule {`,
` width: 2.5in;`,
` height: 0;`,
` border: 0;`,
` border-top: 1px solid #111;`,
` margin: 0 0 18pt 0;`,
`}`,
`.cover .cover-meta { font-size: 10pt; line-height: 1.6; color: #333; text-align: left; }`,
`.cover .cover-meta strong { font-weight: 700; }`,
].join("\n");
}
function tocRules(enabled: boolean): string {
if (!enabled) return "";
return [
`.toc { page-break-after: always; break-after: page; }`,
`.toc h2 {`,
` font-size: 13pt;`,
` text-transform: uppercase;`,
` letter-spacing: 0.15em;`,
` color: #666;`,
` font-weight: 600;`,
` margin: 0 0 0.5in;`,
`}`,
`.toc ol {`,
` list-style: none;`,
` padding: 0;`,
` margin: 0;`,
`}`,
`.toc li {`,
` display: flex;`,
` align-items: baseline;`,
` gap: 0.25in;`,
` font-size: 11pt;`,
` line-height: 2;`,
` padding: 4pt 0;`,
`}`,
`.toc li .toc-title { flex: 0 0 auto; }`,
`.toc li .toc-dots { flex: 1 1 auto; border-bottom: 1px dotted #aaa; margin: 0 6pt; transform: translateY(-4pt); }`,
`.toc li .toc-page { flex: 0 0 auto; color: #666; font-variant-numeric: tabular-nums; }`,
`.toc li.level-2 { padding-left: 0.35in; font-size: 10pt; }`,
`.toc li a { color: inherit; text-decoration: none; }`,
].join("\n");
}
function chapterRules(noChapterBreaks: boolean): string {
const breakRule = noChapterBreaks
? `/* chapter breaks disabled */`
: [
`.chapter { break-before: page; page-break-before: always; }`,
`.chapter:first-of-type { break-before: auto; page-break-before: auto; }`,
].join("\n");
return [
breakRule,
`h1 {`,
` font-size: 22pt;`,
` line-height: 1.2;`,
` font-weight: 700;`,
` letter-spacing: -0.01em;`,
` margin: 0 0 0.25in;`,
` break-after: avoid;`,
` page-break-after: avoid;`,
`}`,
`h2 { font-size: 15pt; line-height: 1.3; font-weight: 700; margin: 24pt 0 6pt; break-after: avoid; page-break-after: avoid; }`,
`h3 { font-size: 12pt; line-height: 1.4; font-weight: 700; text-transform: uppercase; letter-spacing: 0.08em; color: #333; margin: 18pt 0 4pt; break-after: avoid; page-break-after: avoid; }`,
`h4 { font-size: 11pt; font-weight: 700; margin: 12pt 0 4pt; break-after: avoid; page-break-after: avoid; }`,
].join("\n");
}
function blockRules(): string {
// Flush-left paragraphs, no indent, 12pt gap. No justify.
// Rule from the plan's "Body paragraph rule (post-review fix)".
return [
`p {`,
` margin: 0 0 12pt;`,
` text-align: left;`,
` widows: 3;`,
` orphans: 3;`,
`}`,
`p:first-child { margin-top: 0; }`,
`p.lead { font-size: 13pt; line-height: 1.45; color: #222; margin: 0 0 18pt; }`,
].join("\n");
}
function inlineRules(): string {
return [
`a {`,
` color: #0055cc;`,
` text-decoration: underline;`,
` text-decoration-thickness: 0.5pt;`,
` text-underline-offset: 1.5pt;`,
`}`,
`strong { font-weight: 700; }`,
`em { font-style: italic; }`,
].join("\n");
}
function codeRules(): string {
return [
`code {`,
` font-family: "SF Mono", Menlo, Consolas, monospace;`,
` font-size: 9.5pt;`,
` background: #f4f4f4;`,
` padding: 1pt 3pt;`,
` border-radius: 2pt;`,
` border: 0.5pt solid #e4e4e4;`,
`}`,
`pre {`,
` font-family: "SF Mono", Menlo, Consolas, monospace;`,
` font-size: 9pt;`,
` line-height: 1.4;`,
` background: #f7f7f5;`,
` padding: 10pt 12pt;`,
` border: 0.5pt solid #e0e0e0;`,
` border-radius: 3pt;`,
` margin: 12pt 0;`,
` overflow: hidden;`,
` white-space: pre-wrap;`,
`}`,
`pre code { background: none; border: 0; padding: 0; font-size: inherit; }`,
// highlight.js minimal palette (kept neutral, prints well)
`.hljs-keyword { color: #8b0000; font-weight: 500; }`,
`.hljs-string { color: #0d6608; }`,
`.hljs-comment { color: #888; font-style: italic; }`,
`.hljs-function, .hljs-title { color: #0044aa; }`,
`.hljs-number { color: #a64d00; }`,
].join("\n");
}
function quoteRules(): string {
return [
`blockquote {`,
` margin: 12pt 0;`,
` padding: 0 0 0 18pt;`,
` border-left: 2pt solid #111;`,
` color: #333;`,
` font-size: 11pt;`,
` line-height: 1.5;`,
`}`,
`blockquote p { margin-bottom: 6pt; text-align: left; }`,
`blockquote cite { display: block; margin-top: 6pt; font-style: normal; font-size: 9.5pt; color: #666; letter-spacing: 0.02em; }`,
`blockquote cite::before { content: "— "; }`,
].join("\n");
}
function figureRules(): string {
return [
`figure { margin: 12pt 0; }`,
`figure img { display: block; max-width: 100%; height: auto; }`,
`figcaption { font-size: 9pt; color: #666; margin-top: 6pt; font-style: italic; }`,
].join("\n");
}
function tableRules(): string {
return [
`table { width: 100%; border-collapse: collapse; margin: 12pt 0; font-size: 10pt; }`,
`th, td { border-bottom: 0.5pt solid #ccc; padding: 5pt 8pt; text-align: left; vertical-align: top; }`,
`th { font-weight: 700; border-bottom: 1pt solid #111; background: transparent; }`,
].join("\n");
}
function listRules(): string {
return [
`ul, ol { margin: 0 0 12pt 0; padding-left: 20pt; }`,
`li { margin-bottom: 3pt; line-height: 1.45; }`,
`li > ul, li > ol { margin-top: 3pt; margin-bottom: 0; }`,
].join("\n");
}
function footnoteRules(): string {
return [
`.footnote-ref { font-size: 0.75em; vertical-align: super; line-height: 0; text-decoration: none; color: #0055cc; }`,
`.footnotes { margin-top: 24pt; padding-top: 12pt; border-top: 0.5pt solid #ccc; font-size: 9.5pt; line-height: 1.4; }`,
`.footnotes ol { padding-left: 18pt; }`,
].join("\n");
}
function watermarkRules(): string {
return [
`.watermark {`,
` position: fixed;`,
` top: 50%;`,
` left: 50%;`,
` transform: translate(-50%, -50%) rotate(-30deg);`,
` font-size: 140pt;`,
` font-weight: 700;`,
` color: rgba(200, 0, 0, 0.06);`,
` letter-spacing: 0.08em;`,
` pointer-events: none;`,
` z-index: 9999;`,
` user-select: none;`,
` white-space: nowrap;`,
`}`,
].join("\n");
}
function breakAvoidRules(): string {
return `blockquote, pre, code, table, figure, li, .keep-together { break-inside: avoid; page-break-inside: avoid; }`;
}
function escapeCssString(s: string): string {
return s.replace(/\\/g, "\\\\").replace(/"/g, "\\\"");
}

375
make-pdf/src/render.ts Normal file
View File

@@ -0,0 +1,375 @@
/**
* Markdown → HTML renderer. Pure function, no I/O, no Playwright.
*
* Pipeline:
* 1. marked parses markdown → HTML
* 2. Sanitize: strip <script>, <iframe>, <object>, <embed>, <link>,
* <meta>, <base>, <form>, and all on* event handlers + javascript:
* URLs. (Codex round 2 #9: untrusted markdown can embed raw HTML.)
* 3. Smartypants transform (code/URL-safe).
* 4. Assemble full HTML document with print CSS inlined and
* semantic structure (cover, TOC placeholder, body).
*/
import { marked } from "marked";
import { smartypants } from "./smartypants";
import { printCss, type PrintCssOptions } from "./print-css";
export interface RenderOptions {
markdown: string;
// Document-level metadata (used for cover, PDF metadata, running header).
title?: string;
author?: string;
date?: string; // ISO or human string
subtitle?: string;
// Features
cover?: boolean;
toc?: boolean;
watermark?: string;
noChapterBreaks?: boolean;
confidential?: boolean; // default: true
// Page layout
pageSize?: "letter" | "a4" | "legal" | "tabloid";
margins?: string;
// Footer behavior. pageNumbers defaults to true. When footerTemplate is set,
// CSS page numbers are suppressed so the custom Chromium footer wins cleanly.
pageNumbers?: boolean;
footerTemplate?: string;
}
export interface RenderResult {
html: string; // full HTML document, ready for $B load-html
printCss: string; // for debugging / preview
bodyHtml: string; // just the rendered body (tests, snapshots)
meta: {
title: string;
author: string;
date: string;
wordCount: number;
};
}
/**
* Pure renderer. No side effects.
*/
export function render(opts: RenderOptions): RenderResult {
// 1. Markdown → HTML
const rawHtml = marked.parse(opts.markdown, { async: false }) as string;
// 2. Sanitize
const cleanHtml = sanitizeUntrustedHtml(rawHtml);
// 3. Decode common entities so smartypants can match raw " and '.
// marked HTML-encodes quotes in text ("hello" → &quot;hello&quot;);
// without decoding, smartypants' regex never fires. These get re-encoded
// implicitly by the browser's HTML parser downstream, and for the ones
// that should stay as curly-quote Unicode, that IS the final form.
const decoded = decodeTypographicEntities(cleanHtml);
// 4. Smartypants (code-safe)
const typographicHtml = smartypants(decoded);
// 4. Derive metadata (title from first H1 if not provided)
const derivedTitle = opts.title ?? extractFirstHeading(typographicHtml) ?? "Document";
const derivedAuthor = opts.author ?? "";
const derivedDate = opts.date ?? formatToday();
// 5. Build CSS
// CSS is the single source of truth for page numbers (Chromium native
// numbering is always off in orchestrator). If the caller supplied a custom
// footerTemplate, suppress CSS page numbers too so their footer wins.
const showPageNumbers = opts.pageNumbers !== false && !opts.footerTemplate;
const cssOptions: PrintCssOptions = {
cover: opts.cover,
toc: opts.toc,
noChapterBreaks: opts.noChapterBreaks,
watermark: opts.watermark,
confidential: opts.confidential !== false,
runningHeader: derivedTitle,
pageSize: opts.pageSize,
margins: opts.margins,
pageNumbers: showPageNumbers,
};
const css = printCss(cssOptions);
// 6. Assemble document
const coverBlock = opts.cover
? buildCoverBlock({
title: derivedTitle,
subtitle: opts.subtitle,
author: derivedAuthor,
date: derivedDate,
})
: "";
const tocBlock = opts.toc
? buildTocBlock(typographicHtml)
: "";
// Wrap body in .chapter sections at H1 boundaries if chapter breaks are on.
const chapterHtml = opts.noChapterBreaks
? `<section class="chapter">${typographicHtml}</section>`
: wrapChaptersByH1(typographicHtml);
const watermarkBlock = opts.watermark
? `<div class="watermark">${escapeHtml(opts.watermark)}</div>`
: "";
const fullHtml = [
`<!doctype html>`,
`<html lang="en">`,
`<head>`,
`<meta charset="utf-8">`,
`<title>${escapeHtml(derivedTitle)}</title>`,
derivedAuthor ? `<meta name="author" content="${escapeHtml(derivedAuthor)}">` : ``,
`<style>`,
css,
`</style>`,
`</head>`,
`<body>`,
watermarkBlock,
coverBlock,
tocBlock,
chapterHtml,
`</body>`,
`</html>`,
].filter(Boolean).join("\n");
return {
html: fullHtml,
printCss: css,
bodyHtml: typographicHtml,
meta: {
title: derivedTitle,
author: derivedAuthor,
date: derivedDate,
wordCount: countWords(stripTags(typographicHtml)),
},
};
}
/**
* Decode the HTML entities that marked emits for text-node quotes/apostrophes.
* Only the four that matter for smartypants — leaves &amp; alone because it
* can be legitimately doubled (&amp;amp;) and we don't want to double-decode.
*/
function decodeTypographicEntities(html: string): string {
return html
.replace(/&quot;/g, "\"")
.replace(/&#39;/g, "'")
.replace(/&apos;/g, "'")
.replace(/&#x27;/g, "'");
}
// ─── Sanitizer ────────────────────────────────────────────────────────
/**
* Strip dangerous HTML from markdown-produced output.
*
* We can't use DOMPurify (server-side; adds a jsdom dep). A conservative
* regex sanitizer is fine for this use case because:
* 1. marked produces structured HTML (never malformed)
* 2. we only need to strip a fixed blacklist of elements + attrs
* 3. the output goes through Chromium's parser again, which normalizes
*
* What's stripped:
* - <script>, <iframe>, <object>, <embed>, <link>, <meta>, <base>, <form>
* (and their content).
* - on* event handler attributes (onclick, ONCLICK, etc.).
* - href/src with javascript: scheme.
* - <svg> tags with <script> inside them.
*/
export function sanitizeUntrustedHtml(html: string): string {
let s = html;
// Elements to remove entirely (including content).
const DANGER_TAGS = [
"script", "iframe", "object", "embed", "link", "meta", "base", "form",
"applet", "frame", "frameset",
];
for (const tag of DANGER_TAGS) {
const re = new RegExp(`<${tag}\\b[\\s\\S]*?</${tag}>`, "gi");
s = s.replace(re, "");
// Self-closing / unclosed variants
const selfRe = new RegExp(`<${tag}\\b[^>]*/?>`, "gi");
s = s.replace(selfRe, "");
}
// SVG <script>
s = s.replace(/<svg([^>]*)>([\s\S]*?)<\/svg>/gi, (_, attrs, body) => {
return `<svg${attrs}>${body.replace(/<script\b[\s\S]*?<\/script>/gi, "")}</svg>`;
});
// Event handler attributes (on* in any case).
s = s.replace(/\s+on[a-zA-Z]+\s*=\s*"[^"]*"/gi, "");
s = s.replace(/\s+on[a-zA-Z]+\s*=\s*'[^']*'/gi, "");
s = s.replace(/\s+on[a-zA-Z]+\s*=\s*[^\s>]+/gi, "");
// javascript: URLs in href/src/action/formaction
s = s.replace(
/(\s(?:href|src|action|formaction|xlink:href)\s*=\s*)(?:"javascript:[^"]*"|'javascript:[^']*'|javascript:[^\s>]+)/gi,
'$1"#"',
);
// srcdoc attribute (iframe escape hatch — already stripped via iframe above,
// but defense-in-depth).
s = s.replace(/\s+srcdoc\s*=\s*"[^"]*"/gi, "");
s = s.replace(/\s+srcdoc\s*=\s*'[^']*'/gi, "");
// style="url(javascript:..)" — strip javascript: inside style attrs.
s = s.replace(/url\(\s*javascript:[^)]*\)/gi, "url(#)");
return s;
}
// ─── Cover / TOC / Chapter helpers ────────────────────────────────────
function buildCoverBlock(opts: {
title: string;
subtitle?: string;
author?: string;
date: string;
}): string {
const title = escapeHtml(opts.title);
const subtitle = opts.subtitle ? escapeHtml(opts.subtitle) : "";
const author = opts.author ? escapeHtml(opts.author) : "";
const date = escapeHtml(opts.date);
return [
`<section class="cover">`,
` <h1 class="cover-title">${title}</h1>`,
subtitle ? ` <p class="cover-subtitle">${subtitle}</p>` : ``,
` <hr class="rule">`,
` <div class="cover-meta">`,
author ? ` <div><strong>${author}</strong></div>` : ``,
` <div>${date}</div>`,
` </div>`,
`</section>`,
].filter(Boolean).join("\n");
}
/**
* Scan HTML for H1/H2/H3 headings and emit a TOC placeholder.
* Page numbers are filled in by Paged.js (when --toc is passed and Paged.js
* polyfill is injected).
*/
function buildTocBlock(html: string): string {
const headings = extractHeadings(html);
if (headings.length === 0) return "";
const items = headings.map((h, i) => {
const level = h.level >= 2 ? "level-2" : "level-1";
const id = `toc-${i}`;
return [
` <li class="${level}">`,
` <span class="toc-title"><a href="#${id}">${escapeHtml(h.text)}</a></span>`,
` <span class="toc-dots"></span>`,
` <span class="toc-page" data-toc-target="${id}"></span>`,
` </li>`,
].join("\n");
}).join("\n");
return [
`<section class="toc">`,
` <h2>Contents</h2>`,
` <ol>`,
items,
` </ol>`,
`</section>`,
].join("\n");
}
function extractHeadings(html: string): Array<{ level: number; text: string }> {
const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi;
const headings: Array<{ level: number; text: string }> = [];
let match;
while ((match = re.exec(html)) !== null) {
const level = parseInt(match[1].slice(1), 10);
const text = decodeTextEntities(stripTags(match[2]).trim());
if (text) headings.push({ level, text });
}
return headings;
}
/**
* Wrap H1-rooted sections in <section class="chapter">. When chapter breaks
* are on (default), CSS `.chapter { break-before: page }` fires between them.
*/
function wrapChaptersByH1(html: string): string {
// Split on H1 openings. Everything before the first H1 is a preamble.
const h1Re = /<h1\b[^>]*>/gi;
const matches: number[] = [];
let m;
while ((m = h1Re.exec(html)) !== null) {
matches.push(m.index);
}
if (matches.length === 0) {
return `<section class="chapter">${html}</section>`;
}
const chunks: string[] = [];
const preamble = html.slice(0, matches[0]);
if (preamble.trim().length > 0) {
chunks.push(`<section class="chapter">${preamble}</section>`);
}
for (let i = 0; i < matches.length; i++) {
const start = matches[i];
const end = i + 1 < matches.length ? matches[i + 1] : html.length;
chunks.push(`<section class="chapter">${html.slice(start, end)}</section>`);
}
return chunks.join("\n");
}
function extractFirstHeading(html: string): string | null {
const m = html.match(/<h1\b[^>]*>([\s\S]*?)<\/h1>/i);
return m ? decodeTextEntities(stripTags(m[1]).trim()) : null;
}
/**
* Decode HTML entities in plain text extracted from rendered HTML. Distinct
* from decodeTypographicEntities (which runs on in-pipeline HTML and preserves
* &amp; because &amp;amp; can be legitimate there). This runs on text destined
* for <title>, cover, and TOC entries where &amp; MUST become & or escapeHtml
* produces &amp;amp;.
*
* Amp-last ordering: input "&amp;#169;" decodes to "&#169;" in the named pass,
* then the numeric pass decodes "&#169;" to "©". Decoding &amp; first would
* produce "&#169;" and the numeric pass would consume it — different end state
* but risks double-decode on inputs like "&amp;lt;".
*/
function decodeTextEntities(s: string): string {
return s
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&apos;/g, "'")
.replace(/&#x27;/g, "'")
.replace(/&#(\d+);/g, (_, n) => String.fromCodePoint(parseInt(n, 10)))
.replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCodePoint(parseInt(n, 16)))
.replace(/&amp;/g, "&");
}
function stripTags(html: string): string {
return html.replace(/<[^>]+>/g, "");
}
function escapeHtml(s: string): string {
return s
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
}
function countWords(text: string): number {
return text.split(/\s+/).filter(w => w.length > 0).length;
}
function formatToday(): string {
const now = new Date();
return now.toLocaleDateString("en-US", { year: "numeric", month: "long", day: "numeric" });
}

110
make-pdf/src/setup.ts Normal file
View File

@@ -0,0 +1,110 @@
/**
* `$P setup` — guided smoke test.
*
* Flow (per the CEO plan CLI UX spec):
* 1. Verify browse binary exists and responds
* 2. Verify Chromium launches via $B goto about:blank
* 3. Verify pdftotext is installed (warn, don't fail)
* 4. Generate a smoke-test PDF from an inline 2-paragraph fixture
* 5. Open it
* 6. Print a 3-command cheatsheet
*/
import * as os from "node:os";
import * as path from "node:path";
import * as fs from "node:fs";
import * as browseClient from "./browseClient";
import { resolvePdftotext, PdftotextUnavailableError } from "./pdftotext";
import { generate } from "./orchestrator";
export async function runSetup(): Promise<void> {
process.stderr.write("make-pdf setup — verifying install\n\n");
// 1. Resolve browse binary
process.stderr.write(" [1/5] Checking browse binary...");
try {
const bin = browseClient.resolveBrowseBin();
process.stderr.write(` OK (${bin})\n`);
} catch (err: any) {
process.stderr.write(" FAIL\n");
process.stderr.write(`\n${err.message}\n`);
process.exit(4);
}
// 2. Chromium smoke (navigate a dedicated tab to about:blank)
process.stderr.write(" [2/5] Launching Chromium...");
let chromiumTab: number | null = null;
try {
chromiumTab = browseClient.newtab("about:blank");
process.stderr.write(` OK (tab ${chromiumTab})\n`);
} catch (err: any) {
process.stderr.write(" FAIL\n");
process.stderr.write(`\nChromium failed to launch: ${err.message}\n`);
process.stderr.write("\nTo fix: run gstack setup from the gstack repo:\n");
process.stderr.write(" cd ~/.claude/skills/gstack && ./setup\n");
process.exit(4);
} finally {
if (chromiumTab !== null) {
try { browseClient.closetab(chromiumTab); } catch { /* ignore */ }
}
}
// 3. pdftotext (optional — CI gate only)
process.stderr.write(" [3/5] Checking pdftotext (optional)...");
try {
const info = resolvePdftotext();
process.stderr.write(` OK (${info.flavor}, ${info.version.split(" ").slice(-1)[0] || "version unknown"})\n`);
} catch (err) {
process.stderr.write(" SKIP\n");
if (err instanceof PdftotextUnavailableError) {
process.stderr.write(
" pdftotext not installed. This is optional — only the CI\n" +
" copy-paste gate needs it. To enable:\n" +
" macOS: brew install poppler\n" +
" Ubuntu: sudo apt-get install poppler-utils\n",
);
}
}
// 4. Render smoke-test PDF
process.stderr.write(" [4/5] Generating smoke-test PDF...\n");
const fixture = [
"# Hello from make-pdf",
"",
"This is a two-paragraph smoke test. If you can read this sentence in the PDF that just opened, the pipeline works end-to-end.",
"",
"The second paragraph contains curly quotes (\"hello\"), an em dash -- like this, and an ellipsis... all of which should render correctly.",
"",
].join("\n");
const fixturePath = path.join(os.tmpdir(), `make-pdf-smoke-${process.pid}.md`);
const outPath = path.join(os.tmpdir(), `make-pdf-smoke-${process.pid}.pdf`);
fs.writeFileSync(fixturePath, fixture, "utf8");
try {
await generate({
input: fixturePath,
output: outPath,
quiet: true,
pageNumbers: true,
});
process.stderr.write(` PASSED. Smoke test saved to ${outPath}\n`);
} catch (err: any) {
process.stderr.write(` FAILED: ${err.message}\n`);
process.exit(2);
} finally {
try { fs.unlinkSync(fixturePath); } catch { /* ignore */ }
}
// 5. Cheatsheet
process.stderr.write(" [5/5] All checks passed.\n\n");
process.stderr.write([
"make-pdf is ready. Try:",
" $P generate letter.md # default memo mode",
" $P generate --cover --toc essay.md # full publication",
" $P generate --watermark DRAFT memo.md # diagonal watermark",
"",
`Smoke-test PDF: ${outPath}`,
"",
].join("\n"));
}

100
make-pdf/src/smartypants.ts Normal file
View File

@@ -0,0 +1,100 @@
/**
* Inline typographic transform (smartypants).
*
* Converts ASCII typography to real Unicode:
* "quoted" → "quoted" (U+201C/U+201D)
* 'quoted' → 'quoted' (U+2018/U+2019)
* don't → don't (apostrophe: U+2019)
* -- → — (em dash U+2014)
* ... → … (ellipsis U+2026)
*
* Critical: must NOT touch code, URLs, or HTML attributes. The Codex round
* 2 review flagged this specifically — smartypants run over a fenced code
* block corrupts the code and tokens inside tag attributes can break
* parsing.
*
* This operates on HTML (marked already produced it) and walks text nodes
* only via a lightweight regex that recognizes code/pre/URL zones and
* skips them entirely.
*/
const CODE_ZONE_RE = /<(pre|code|script|style)\b[^>]*>[\s\S]*?<\/\1>/gi;
const TAG_RE = /<[^>]+>/g;
const URL_RE = /\bhttps?:\/\/\S+/g;
/**
* Apply smartypants to an HTML string. Zones that should not be touched:
* - <pre>, <code>, <script>, <style> blocks (content unchanged)
* - HTML tags themselves (attributes unchanged)
* - URLs (http:// and https:// spans unchanged)
*/
export function smartypants(html: string): string {
// Step 1: split into preserved + transformed zones.
// Preserved zones: code/pre/script/style, tags, URLs.
// We carve them out with placeholder tokens, transform the rest, and
// splice them back.
const preserved: string[] = [];
const PLACEHOLDER = (i: number) => `\u0000SMARTPANTS_PRESERVED_${i}\u0000`;
const carve = (source: string, pattern: RegExp): string => {
return source.replace(pattern, (match) => {
const idx = preserved.length;
preserved.push(match);
return PLACEHOLDER(idx);
});
};
let s = html;
s = carve(s, CODE_ZONE_RE);
s = carve(s, TAG_RE);
s = carve(s, URL_RE);
s = transformText(s);
// Step 2: restore preserved zones.
// Use a function to avoid $-substitution gotchas.
s = s.replace(/\u0000SMARTPANTS_PRESERVED_(\d+)\u0000/g, (_, idx) => {
return preserved[parseInt(idx, 10)] ?? "";
});
return s;
}
/**
* Transform plain text (no HTML, no code, no URLs).
*
* Order matters:
* 1. Triple dots first (so they don't collide with later apostrophes)
* 2. Em dashes (two hyphens → em dash)
* 3. Apostrophes (contractions + possessives)
* 4. Double quotes (open/close pairing)
* 5. Single quotes (open/close pairing — after apostrophes)
*/
function transformText(text: string): string {
let s = text;
// Ellipsis: three literal dots (with optional spaces) → …
s = s.replace(/\.\s?\.\s?\./g, "\u2026");
// Em dash: -- → —. Require space or word-char boundary on both sides so
// we don't mangle ARGV-style flags in prose like `--verbose`.
s = s.replace(/(\w|\s)--(\w|\s)/g, "$1\u2014$2");
// Standalone -- at start/end
s = s.replace(/^--\s/gm, "\u2014 ");
s = s.replace(/\s--$/gm, " \u2014");
// Apostrophes in contractions and possessives.
// "don't", "it's", "they're", "Garry's"
s = s.replace(/(\w)'(\w)/g, "$1\u2019$2");
// Double quotes: open if preceded by whitespace/bol, close if preceded
// by word char or punctuation.
s = s.replace(/(^|[\s\(\[\{\-])"/g, "$1\u201c"); // opening "
s = s.replace(/"/g, "\u201d"); // remaining " are closing
// Single quotes (after apostrophe pass):
s = s.replace(/(^|[\s\(\[\{\-])'/g, "$1\u2018"); // opening '
s = s.replace(/'/g, "\u2019"); // remaining ' are closing
return s;
}

124
make-pdf/src/types.ts Normal file
View File

@@ -0,0 +1,124 @@
/**
* make-pdf — shared types.
*
* No runtime code. Imports are safe from any module.
*/
export type PageSize = "letter" | "a4" | "legal" | "tabloid";
export type FontMode = "sans"; // v1: Helvetica only. Future: "serif" | "custom".
/**
* Options for `$P generate` — the public CLI contract.
* Matches the flag set documented in the CEO plan.
*/
export interface GenerateOptions {
input: string; // markdown input path
output?: string; // PDF output path (default: /tmp/<slug>.pdf)
// Page layout
margins?: string; // "1in" | "72pt" | "25mm" | "2.54cm"
marginTop?: string;
marginRight?: string;
marginBottom?: string;
marginLeft?: string;
pageSize?: PageSize; // default "letter"
// Document structure
cover?: boolean;
toc?: boolean;
noChapterBreaks?: boolean; // default: chapter breaks ON
// Branding
watermark?: string; // e.g. "DRAFT"
headerTemplate?: string; // raw HTML
footerTemplate?: string; // raw HTML, mutex with pageNumbers
confidential?: boolean; // default: true
// Output control
pageNumbers?: boolean; // default: true
tagged?: boolean; // default: true (accessible PDF)
outline?: boolean; // default: true (PDF bookmarks)
quiet?: boolean; // suppress progress on stderr
verbose?: boolean; // per-stage timings on stderr
// Network
allowNetwork?: boolean; // default: false
// Metadata
title?: string;
author?: string;
date?: string; // ISO-ish; default: today
}
/**
* Options for `$P preview`.
*/
export interface PreviewOptions {
input: string;
quiet?: boolean;
verbose?: boolean;
// Same render flags as generate so preview matches output
cover?: boolean;
toc?: boolean;
watermark?: string;
noChapterBreaks?: boolean;
confidential?: boolean;
pageNumbers?: boolean;
allowNetwork?: boolean;
title?: string;
author?: string;
date?: string;
}
/**
* Parsed page.pdf() options passed to browse.
*/
export interface BrowsePdfOptions {
output: string;
tabId: number;
format?: PageSize;
width?: string;
height?: string;
margins?: {
top: string;
right: string;
bottom: string;
left: string;
};
headerTemplate?: string;
footerTemplate?: string;
pageNumbers?: boolean;
displayHeaderFooter?: boolean;
tagged?: boolean;
outline?: boolean;
printBackground?: boolean;
preferCSSPageSize?: boolean;
toc?: boolean; // signals browse to wait for Paged.js
}
/**
* Exit codes for $P generate.
* Mirror these in orchestrator error paths.
*/
export const ExitCode = {
Success: 0,
BadArgs: 1,
RenderError: 2,
PagedJsTimeout: 3,
BrowseUnavailable: 4,
} as const;
export type ExitCode = typeof ExitCode[keyof typeof ExitCode];
/**
* Structured error for browse CLI shell-out failures.
*/
export class BrowseClientError extends Error {
constructor(
public readonly exitCode: number,
public readonly command: string,
public readonly stderr: string,
) {
super(`browse ${command} exited ${exitCode}: ${stderr.trim()}`);
this.name = "BrowseClientError";
}
}

View File

@@ -0,0 +1,135 @@
/**
* browseClient unit tests — binary resolution and error mapping.
*
* These are pure unit tests; they do NOT require a running browse daemon.
* Cross-platform: assertions that pin POSIX behavior early-return on win32
* and vice versa, so both lanes only exercise their own branch.
*/
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import { BrowseClientError } from "../src/types";
import { resolveBrowseBin, findExecutable } from "../src/browseClient";
// A real, always-present executable for the test platform — `cmd.exe` on
// Windows (System32 is on every install) and `/bin/sh` on POSIX. Lets the
// "honors override when it points at a real executable" test work in both
// lanes without writing a temp script.
const REAL_EXE: string =
process.platform === "win32"
? path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd.exe")
: "/bin/sh";
function withEnv<T>(overrides: Record<string, string | undefined>, fn: () => T): T {
const saved: Record<string, string | undefined> = {};
for (const k of Object.keys(overrides)) saved[k] = process.env[k];
for (const [k, v] of Object.entries(overrides)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
try {
return fn();
} finally {
for (const [k, v] of Object.entries(saved)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
}
}
describe("findExecutable", () => {
test("returns the bare path on POSIX when it's executable", () => {
if (process.platform === "win32") return;
const found = findExecutable("/bin/sh");
expect(found).toBe("/bin/sh");
});
test("on win32, probes .exe / .cmd / .bat after the bare-path miss", () => {
if (process.platform !== "win32") return;
// cmd.exe lives at System32\cmd.exe — probe with the bare base.
const base = path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd");
const found = findExecutable(base);
expect(found).toBe(base + ".exe");
});
test("returns null when no extension matches", () => {
const found = findExecutable("/nonexistent/path/to/nothing");
expect(found).toBeNull();
});
});
describe("resolveBrowseBin", () => {
test("throws BrowseClientError with setup hint when nothing is found", () => {
// Point overrides at non-existent paths and clear PATH so Bun.which finds
// nothing. Sibling/global probes go through findExecutable on real paths,
// but the test asserts on the error shape rather than depending on whether
// a real browse install exists on the box.
let thrown: unknown = null;
try {
withEnv(
{
GSTACK_BROWSE_BIN: "/nonexistent/gstack-browse-bin",
BROWSE_BIN: "/nonexistent/browse-bin",
PATH: "",
Path: "",
},
() => resolveBrowseBin(),
);
} catch (err) {
thrown = err;
}
if (thrown) {
expect(thrown).toBeInstanceOf(BrowseClientError);
expect((thrown as BrowseClientError).message).toContain("browse binary not found");
expect((thrown as BrowseClientError).message).toContain("./setup");
expect((thrown as BrowseClientError).message).toContain("GSTACK_BROWSE_BIN");
// Back-compat alias still surfaces in the diagnostic.
expect((thrown as BrowseClientError).message).toContain("BROWSE_BIN");
}
// If the test box has a real browse install on disk, sibling/global may
// resolve and the helper won't throw — that's fine; the assertion is
// gated on whether it threw at all.
});
test("honors GSTACK_BROWSE_BIN when it points at a real executable", () => {
const resolved = withEnv({ GSTACK_BROWSE_BIN: REAL_EXE }, () => resolveBrowseBin());
expect(resolved).toBe(REAL_EXE);
});
test("honors BROWSE_BIN as a back-compat alias", () => {
const resolved = withEnv(
{ GSTACK_BROWSE_BIN: undefined, BROWSE_BIN: REAL_EXE },
() => resolveBrowseBin(),
);
expect(resolved).toBe(REAL_EXE);
});
test("GSTACK_BROWSE_BIN takes precedence over BROWSE_BIN", () => {
const resolved = withEnv(
{ GSTACK_BROWSE_BIN: REAL_EXE, BROWSE_BIN: "/nonexistent/legacy" },
() => resolveBrowseBin(),
);
expect(resolved).toBe(REAL_EXE);
});
test("strips wrapping double quotes from override values", () => {
const resolved = withEnv({ GSTACK_BROWSE_BIN: `"${REAL_EXE}"` }, () => resolveBrowseBin());
expect(resolved).toBe(REAL_EXE);
});
});
describe("BrowseClientError", () => {
test("captures exit code, command, and stderr", () => {
const err = new BrowseClientError(127, "pdf", "Chromium not found");
expect(err.exitCode).toBe(127);
expect(err.command).toBe("pdf");
expect(err.stderr).toBe("Chromium not found");
expect(err.message).toContain("browse pdf exited 127");
expect(err.message).toContain("Chromium not found");
expect(err.name).toBe("BrowseClientError");
});
});

View File

@@ -0,0 +1,76 @@
/**
* Combined-features copy-paste gate — the P0 CI gate.
*
* This test runs the compiled `make-pdf/dist/pdf` binary against a fixture
* that has every v1 typography feature on (smartypants, hyphens, chapter
* breaks, bold/italic, inline code, blockquote, lists, headings). It then
* pipes the output through pdftotext and asserts the extracted text
* matches the handwritten expected.txt.
*
* Codex round 2 told us this (not per-feature gates) is the real gate a
* user actually cares about — features interact, and the combined
* extraction is what predicts production quality.
*
* Gating: only runs when the compiled binary + browse + pdftotext are all
* available. Skipped cleanly otherwise (local dev without full install).
*/
import { describe, expect, test } from "bun:test";
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import { copyPasteGate, resolvePdftotext } from "../../src/pdftotext";
const FIXTURE = path.resolve(__dirname, "../fixtures/combined-gate.md");
const EXPECTED = path.resolve(__dirname, "../fixtures/combined-gate.expected.txt");
const ROOT = path.resolve(__dirname, "../../..");
const PDF_BIN = path.join(ROOT, "make-pdf/dist/pdf");
const BROWSE_BIN = path.join(ROOT, "browse/dist/browse");
function prerequisitesAvailable(): { ok: true } | { ok: false; reason: string } {
if (!fs.existsSync(PDF_BIN)) return { ok: false, reason: `make-pdf binary missing (${PDF_BIN}). Run bun run build.` };
if (!fs.existsSync(BROWSE_BIN)) return { ok: false, reason: `browse binary missing (${BROWSE_BIN}).` };
if (!fs.existsSync(FIXTURE)) return { ok: false, reason: `fixture missing (${FIXTURE}).` };
if (!fs.existsSync(EXPECTED)) return { ok: false, reason: `expected.txt missing (${EXPECTED}).` };
try { resolvePdftotext(); } catch (err: any) { return { ok: false, reason: err.message }; }
return { ok: true };
}
describe("combined-features copy-paste gate", () => {
const avail = prerequisitesAvailable();
test.skipIf(!avail.ok)("fixture PDF extracts cleanly through pdftotext", () => {
if (!avail.ok) return; // satisfies the type checker
// Use /tmp directly (browse's validateOutputPath allows /private/tmp,
// which macOS resolves /tmp to). os.tmpdir() returns /var/folders/...
// which is outside the safe-dirs allowlist.
const outputPdf = `/tmp/make-pdf-combined-gate-${process.pid}.pdf`;
try {
execFileSync(PDF_BIN, ["generate", FIXTURE, outputPdf, "--quiet"], {
encoding: "utf8",
env: { ...process.env, BROWSE_BIN },
stdio: ["ignore", "pipe", "pipe"],
});
expect(fs.existsSync(outputPdf)).toBe(true);
const expected = fs.readFileSync(EXPECTED, "utf8");
const result = copyPasteGate(outputPdf, expected);
if (!result.ok) {
// Attach the extracted text so CI logs make the failure diagnosable
process.stderr.write(`\n--- EXTRACTED ---\n${result.extracted}\n--- END ---\n\n`);
process.stderr.write(`--- REASONS ---\n${result.reasons.join("\n")}\n--- END ---\n`);
}
expect(result.ok).toBe(true);
} finally {
try { fs.unlinkSync(outputPdf); } catch { /* ignore */ }
}
}, 30000);
if (!avail.ok) {
test("prerequisites check", () => {
console.warn(`[skip] ${avail.reason}`);
});
}
});

View File

@@ -0,0 +1,20 @@
The Horizon
This is the combined-features fixture. Every feature turned on simultaneously. The gate asserts that all of these paragraphs extract cleanly from the PDF with pdftotext.
A paragraph with bold, italic, and inline code tokens — each of which gets a different HTML treatment. None should fragment text on copy-paste.
A paragraph with “curly quotes”, single quotes, an em dash — like this, and an ellipsis… All three get smartypants transforms.
A subsection heading
First list item with some words that keep it on one line.
Second list item with more words.
Third list item.
A blockquote from Van Dyke. Her diminished size is in me, not in her.
A second chapter
This content begins on a fresh page because the default chapter-breaks rule fires. Extract must still find these paragraphs.
A final paragraph with enough words to trigger hyphenation across the line wrap boundary. Extraordinary words sometimes hyphenate. Interdisciplinary ones certainly do.

30
make-pdf/test/fixtures/combined-gate.md vendored Normal file
View File

@@ -0,0 +1,30 @@
# The Horizon
This is the combined-features fixture. Every feature turned on simultaneously.
The gate asserts that all of these paragraphs extract cleanly from the PDF
with pdftotext.
A paragraph with **bold**, *italic*, and `inline code` tokens — each of which
gets a different HTML treatment. None should fragment text on copy-paste.
A paragraph with "curly quotes", 'single quotes', an em dash -- like this,
and an ellipsis... All three get smartypants transforms.
## A subsection heading
Lists must not break mid-item:
- First list item with some words that keep it on one line.
- Second list item with more words.
- Third list item.
> A blockquote from Van Dyke. Her diminished size is in me, not in her.
# A second chapter
This content begins on a fresh page because the default chapter-breaks rule
fires. Extract must still find these paragraphs.
A final paragraph with enough words to trigger hyphenation across the line
wrap boundary. Extraordinary words sometimes hyphenate. Interdisciplinary
ones certainly do.

View File

@@ -0,0 +1,207 @@
/**
* pdftotext unit tests — normalize() and copyPasteGate() assertions.
*
* These tests are pure unit tests of the normalization + assertion logic.
* They do NOT require pdftotext to be installed (the actual binary is
* mocked by manipulating strings directly).
*/
import { describe, expect, test } from "bun:test";
import * as path from "node:path";
import { normalize, copyPasteGate, findExecutable, resolvePdftotext, PdftotextUnavailableError } from "../src/pdftotext";
describe("normalize", () => {
test("strips trailing spaces", () => {
expect(normalize("hello \nworld")).toBe("hello\nworld");
});
test("collapses runs of 3+ blank lines to 2", () => {
expect(normalize("a\n\n\n\nb")).toBe("a\n\nb");
});
test("converts form feeds to double newlines (page break boundary)", () => {
expect(normalize("page1\fpage2")).toBe("page1\n\npage2");
});
test("normalizes CRLF and CR to LF (Windows Xpdf)", () => {
expect(normalize("a\r\nb\rc")).toBe("a\nb\nc");
});
test("removes soft hyphens (hyphens: auto artifact)", () => {
expect(normalize("extra\u00adordinary")).toBe("extraordinary");
});
test("replaces non-breaking space with regular space", () => {
expect(normalize("hello\u00a0world")).toBe("hello world");
});
test("strips zero-width characters", () => {
expect(normalize("a\u200bb\u200cc")).toBe("abc");
});
test("NFC-normalizes composed glyphs (macOS NFD → Linux NFC)", () => {
// "é" composed vs decomposed
const decomposed = "e\u0301";
const composed = "\u00e9";
expect(normalize(decomposed)).toBe(composed);
});
test("trims leading/trailing whitespace on whole string", () => {
expect(normalize("\n\n hello \n\n")).toBe("hello");
});
});
describe("copyPasteGate — assertion logic", () => {
// These tests exercise the gate's internal assertions by mocking the
// pdftotext step. We can't easily run the real binary in every test
// env, so we verify the assertion logic directly via fake inputs.
//
// The gate takes a PDF path — but assertion #1 (paragraph presence) and
// #2 (per-glyph emission) are string operations we can validate here.
test("flags 'S ai li ng' per-glyph emission when reassembled letters appear in source", () => {
// Build expected/extracted strings that would trip the gate.
const expected = "Sailing on the open sea.";
const extracted = "S a i l i n g on the open sea.";
// Simulate by running normalize + assertion manually; the regex is
// looked at in the gate.
const fragRegex = /((?:\b\w\s){4,})/g;
const match = fragRegex.exec(extracted);
expect(match).not.toBeNull();
if (match) {
const letters = match[1].replace(/\s/g, "");
expect(letters.toLowerCase()).toBe("sailing");
expect(expected.toLowerCase().includes(letters.toLowerCase())).toBe(true);
}
});
test("does NOT flag 'A B C D' as per-glyph when letters don't appear in source", () => {
const expected = "The quick brown fox.";
const extracted = "The quick A B C D brown fox.";
const fragRegex = /((?:\b\w\s){4,})/g;
const match = fragRegex.exec(extracted);
if (match) {
const letters = match[1].replace(/\s/g, "");
// "ABCD" is not a substring of expected
expect(expected.toLowerCase().includes(letters.toLowerCase())).toBe(false);
}
});
test("paragraph boundary count drift calculation", () => {
const expected = "para1\n\npara2\n\npara3";
const extractedOk = "para1\n\npara2\n\npara3";
const extractedTooFew = "para1 para2 para3";
const extractedTooMany = "para1\n\n\n\npara2\n\n\n\npara3\n\n\n\npara4\n\n\n\npara5";
const expectedBreaks = (expected.match(/\n\n/g) || []).length;
const okBreaks = (extractedOk.match(/\n\n/g) || []).length;
const tooFewBreaks = (extractedTooFew.match(/\n\n/g) || []).length;
const tooManyBreaksNormalized = (normalize(extractedTooMany).match(/\n\n/g) || []).length;
expect(Math.abs(expectedBreaks - okBreaks)).toBeLessThanOrEqual(4);
expect(Math.abs(expectedBreaks - tooFewBreaks)).toBeGreaterThan(1);
// After normalize, 3+ newlines become 2, so the count matches
expect(Math.abs(expectedBreaks - tooManyBreaksNormalized)).toBeLessThanOrEqual(4);
});
});
// ─── Binary resolution (v1.24-aligned) ──────────────────────────
const REAL_EXE: string =
process.platform === "win32"
? path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd.exe")
: "/bin/sh";
function withEnv<T>(overrides: Record<string, string | undefined>, fn: () => T): T {
const saved: Record<string, string | undefined> = {};
for (const k of Object.keys(overrides)) saved[k] = process.env[k];
for (const [k, v] of Object.entries(overrides)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
try {
return fn();
} finally {
for (const [k, v] of Object.entries(saved)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
}
}
describe("findExecutable (pdftotext.ts)", () => {
test("returns the bare path on POSIX when it's executable", () => {
if (process.platform === "win32") return;
expect(findExecutable("/bin/sh")).toBe("/bin/sh");
});
test("on win32, probes .exe / .cmd / .bat after the bare-path miss", () => {
if (process.platform !== "win32") return;
const base = path.join(process.env.SystemRoot ?? "C:\\Windows", "System32", "cmd");
expect(findExecutable(base)).toBe(base + ".exe");
});
test("returns null when no extension matches", () => {
expect(findExecutable("/nonexistent/path/to/nothing")).toBeNull();
});
});
describe("resolvePdftotext (override resolution, v1.24-aligned)", () => {
test("honors GSTACK_PDFTOTEXT_BIN when it points at a real executable", () => {
// We can't fake a real pdftotext, but we can fake "any executable" to
// exercise the override-resolution path. describeBinary will mark flavor
// as "unknown" since cmd.exe / /bin/sh don't respond to -v like pdftotext;
// the test asserts on the bin-path resolution, not the version probe.
const info = withEnv({ GSTACK_PDFTOTEXT_BIN: REAL_EXE }, () => resolvePdftotext());
expect(info.bin).toBe(REAL_EXE);
});
test("honors PDFTOTEXT_BIN as a back-compat alias", () => {
const info = withEnv(
{ GSTACK_PDFTOTEXT_BIN: undefined, PDFTOTEXT_BIN: REAL_EXE },
() => resolvePdftotext(),
);
expect(info.bin).toBe(REAL_EXE);
});
test("GSTACK_PDFTOTEXT_BIN takes precedence over PDFTOTEXT_BIN", () => {
const info = withEnv(
{ GSTACK_PDFTOTEXT_BIN: REAL_EXE, PDFTOTEXT_BIN: "/nonexistent/legacy" },
() => resolvePdftotext(),
);
expect(info.bin).toBe(REAL_EXE);
});
test("strips wrapping double quotes from override values", () => {
const info = withEnv({ GSTACK_PDFTOTEXT_BIN: `"${REAL_EXE}"` }, () => resolvePdftotext());
expect(info.bin).toBe(REAL_EXE);
});
test("error message includes Windows install hint and GSTACK_PDFTOTEXT_BIN", () => {
let thrown: unknown = null;
try {
withEnv(
{
GSTACK_PDFTOTEXT_BIN: "/nonexistent/gstack-pdftotext",
PDFTOTEXT_BIN: "/nonexistent/pdftotext",
PATH: "",
Path: "",
},
() => resolvePdftotext(),
);
} catch (err) {
thrown = err;
}
// If the test box has a real pdftotext on disk, resolution succeeds
// (POSIX candidates) — that's fine; the assertion is gated on whether
// it threw. On Windows-CI without poppler, it throws.
if (thrown) {
expect(thrown).toBeInstanceOf(PdftotextUnavailableError);
expect((thrown as Error).message).toContain("pdftotext not found");
expect((thrown as Error).message).toContain("GSTACK_PDFTOTEXT_BIN");
expect((thrown as Error).message).toContain("Windows");
expect((thrown as Error).message).toContain("scoop install poppler");
}
});
});

View File

@@ -0,0 +1,449 @@
/**
* Renderer unit tests — pure-function assertions for render.ts, smartypants.ts,
* and print-css.ts. No Playwright, no PDF generation.
*/
import { describe, expect, test } from "bun:test";
import { render, sanitizeUntrustedHtml } from "../src/render";
import { smartypants } from "../src/smartypants";
import { printCss } from "../src/print-css";
// ─── smartypants ──────────────────────────────────────────────
describe("smartypants", () => {
test("converts straight double quotes to curly", () => {
const out = smartypants(`<p>She said "hello" to him.</p>`);
expect(out).toContain("\u201chello\u201d");
});
test("converts em dash (--)", () => {
const out = smartypants(`<p>This is it -- the answer.</p>`);
expect(out).toContain("\u2014");
});
test("converts ellipsis (...)", () => {
const out = smartypants(`<p>Wait...</p>`);
expect(out).toContain("\u2026");
});
test("converts apostrophes in contractions", () => {
const out = smartypants(`<p>don't you know?</p>`);
expect(out).toContain("don\u2019t");
});
test("does NOT touch content inside <code> blocks", () => {
const input = `<pre><code>const x = "hello"; // it's fine</code></pre>`;
const out = smartypants(input);
expect(out).toBe(input); // unchanged
});
test("does NOT touch content inside <pre> blocks", () => {
const input = `<pre>"quoted" -- don't</pre>`;
const out = smartypants(input);
expect(out).toBe(input);
});
test("does NOT touch inline code", () => {
const out = smartypants(`<p>Use <code>it's</code> like this: "hello".</p>`);
expect(out).toContain("<code>it's</code>");
expect(out).toContain("\u201chello\u201d");
});
test("does NOT touch URLs", () => {
const out = smartypants(`<p>Visit https://example.com/it's-page for "details".</p>`);
expect(out).toContain("https://example.com/it's-page");
expect(out).toContain("\u201cdetails\u201d");
});
test("does NOT touch HTML attribute values", () => {
const out = smartypants(`<a href="it's-a-test.html">link</a>`);
expect(out).toContain(`href="it's-a-test.html"`);
});
test("does NOT convert -- in CLI flags", () => {
// Prose like "try --verbose mode" should not turn -- into em dash
const out = smartypants(`<p>Try --verbose mode.</p>`);
// Since "--" is followed by a word char but not preceded by word/space,
// it should remain intact. We're lenient here — acceptable either way.
expect(out).toMatch(/--verbose|—verbose/);
});
});
// ─── sanitizer ──────────────────────────────────────────────
describe("sanitizeUntrustedHtml", () => {
test("strips <script> tags and content", () => {
const input = `<p>hello</p><script>alert(1)</script><p>world</p>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("<script");
expect(out).not.toContain("alert");
expect(out).toContain("<p>hello</p>");
expect(out).toContain("<p>world</p>");
});
test("strips <iframe>", () => {
const input = `<p>hi</p><iframe src="evil.com"></iframe>`;
expect(sanitizeUntrustedHtml(input)).not.toContain("<iframe");
});
test("strips onclick attribute", () => {
const input = `<a href="#" onclick="alert(1)">click</a>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("onclick");
expect(out).toContain("href=\"#\"");
});
test("strips event handlers with mixed case (onClick, ONCLICK)", () => {
const input1 = `<a href="#" onClick="x()">a</a>`;
const input2 = `<a href="#" ONCLICK="x()">b</a>`;
expect(sanitizeUntrustedHtml(input1)).not.toContain("onClick");
expect(sanitizeUntrustedHtml(input2)).not.toContain("ONCLICK");
});
test("rewrites javascript: URLs in href to #", () => {
const input = `<a href="javascript:alert(1)">bad</a>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("javascript:");
expect(out).toContain('href="#"');
});
test("strips inline SVG <script>", () => {
const input = `<svg><script>alert(1)</script><circle r="5"/></svg>`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("<script");
expect(out).toContain("<circle");
});
test("strips <object>, <embed>, <link>, <meta>, <base>, <form>", () => {
const input = `
<object data="x.swf"></object>
<embed src="y.mov">
<link rel="stylesheet" href="evil.css">
<meta http-equiv="refresh" content="0;url=evil">
<base href="evil.com">
<form action="evil"><input/></form>
`;
const out = sanitizeUntrustedHtml(input);
expect(out).not.toContain("<object");
expect(out).not.toContain("<embed");
expect(out).not.toContain("<link");
expect(out).not.toContain("<meta");
expect(out).not.toContain("<base");
expect(out).not.toContain("<form");
});
test("strips srcdoc attribute (iframe escape vector)", () => {
const input = `<div srcdoc="<script>bad</script>">hi</div>`;
expect(sanitizeUntrustedHtml(input)).not.toContain("srcdoc");
});
});
// ─── end-to-end render ──────────────────────────────────────────────
describe("render (end-to-end)", () => {
test("produces a full HTML document with title, body, and CSS", () => {
const result = render({
markdown: `# Hello\n\nA paragraph with "quotes" and -- dashes.\n`,
});
expect(result.html).toContain("<!doctype html>");
expect(result.html).toContain("<title>Hello</title>");
expect(result.html).toContain("<h1");
expect(result.html).toContain("Hello");
// CSS should be inlined as <style>...
expect(result.html).toMatch(/<style>[\s\S]*font-family: Helvetica/);
// Smartypants ran
expect(result.html).toContain("\u201cquotes\u201d");
expect(result.html).toContain("\u2014");
});
test("derives title from first H1 when --title is not passed", () => {
const result = render({ markdown: `# My Title\n\nBody.` });
expect(result.meta.title).toBe("My Title");
});
test("uses --title override when provided", () => {
const result = render({
markdown: `# Auto-derived\n\nBody.`,
title: "Explicit Title",
});
expect(result.meta.title).toBe("Explicit Title");
});
test("includes cover block when cover=true", () => {
const result = render({
markdown: `# Doc\n\nBody.`,
cover: true,
subtitle: "A subtitle",
author: "Garry Tan",
});
expect(result.html).toContain(`class="cover"`);
expect(result.html).toContain(`class="cover-title"`);
expect(result.html).toContain("A subtitle");
expect(result.html).toContain("Garry Tan");
});
test("omits cover block when cover=false", () => {
const result = render({ markdown: `# Memo\n\nBody.` });
expect(result.html).not.toContain(`class="cover"`);
});
test("injects watermark element when --watermark is set", () => {
const result = render({ markdown: `# Doc`, watermark: "DRAFT" });
expect(result.html).toContain(`class="watermark"`);
expect(result.html).toContain("DRAFT");
// And the CSS rule for it must be present
expect(result.html).toContain("position: fixed");
expect(result.html).toContain("rotate(-30deg)");
});
test("wraps each H1 in its own .chapter section (default)", () => {
const result = render({
markdown: `# One\n\nbody 1\n\n# Two\n\nbody 2\n`,
});
const chapterMatches = result.html.match(/class="chapter"/g);
expect(chapterMatches).toBeTruthy();
if (chapterMatches) expect(chapterMatches.length).toBe(2);
});
test("does NOT create chapter sections when noChapterBreaks=true", () => {
const result = render({
markdown: `# One\n\nbody\n\n# Two\n\nbody\n`,
noChapterBreaks: true,
});
const chapterMatches = result.html.match(/class="chapter"/g) ?? [];
expect(chapterMatches.length).toBe(1);
});
test("builds a TOC with H1/H2 entries when toc=true", () => {
const result = render({
markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`,
toc: true,
});
expect(result.html).toContain(`class="toc"`);
expect(result.html).toContain(`<h2>Contents</h2>`);
expect(result.html).toContain("One");
expect(result.html).toContain("Sub");
expect(result.html).toContain("Two");
});
test("strips dangerous HTML from untrusted markdown", () => {
const result = render({
markdown: `# Safe\n\n<script>alert('xss')</script>\n\nBody.`,
});
expect(result.html).not.toContain("<script");
expect(result.html).not.toContain("alert");
expect(result.html).toContain("Safe");
});
test("respects text-align: left — no justify in print CSS", () => {
const result = render({ markdown: `para1\n\npara2\n` });
// The rule from the design-review fix: no p + p indent, text-align: left.
expect(result.printCss).toContain("text-align: left");
expect(result.printCss).not.toContain("text-align: justify");
expect(result.printCss).not.toContain("text-indent");
});
test("includes CJK font fallback in body", () => {
const result = render({ markdown: `body` });
expect(result.printCss).toContain("Hiragino Kaku Gothic");
expect(result.printCss).toContain("Noto Sans CJK");
});
});
// ─── print-css ──────────────────────────────────────────────
describe("printCss", () => {
test("emits 1in margins by default", () => {
const css = printCss();
expect(css).toContain("margin: 1in");
});
test("respects custom margins flag", () => {
const css = printCss({ margins: "72pt" });
expect(css).toContain("margin: 72pt");
});
test("emits letter page size by default", () => {
const css = printCss();
expect(css).toContain("size: letter");
});
test("respects custom page size", () => {
const css = printCss({ pageSize: "a4" });
expect(css).toContain("size: a4");
});
test("suppresses running header and footer on cover page", () => {
const css = printCss();
expect(css).toMatch(/@page\s*:first\s*\{[\s\S]*?content:\s*none[\s\S]*?content:\s*none/);
});
test("omits CONFIDENTIAL when confidential=false", () => {
const css = printCss({ confidential: false });
expect(css).not.toContain("CONFIDENTIAL");
});
test("emits watermark CSS only when watermark is set", () => {
const withWatermark = printCss({ watermark: "DRAFT" });
expect(withWatermark).toContain(".watermark");
expect(withWatermark).toContain("rotate(-30deg)");
const withoutWatermark = printCss();
expect(withoutWatermark).not.toContain(".watermark");
});
test("drops chapter break rule when noChapterBreaks=true", () => {
const on = printCss({ noChapterBreaks: false });
expect(on).toContain("break-before: page");
const off = printCss({ noChapterBreaks: true });
expect(off).not.toContain(".chapter { break-before: page");
});
test("always sets p { text-align: left }", () => {
const css = printCss();
expect(css).toContain("text-align: left");
});
test("never sets text-indent on p", () => {
const css = printCss();
// Confirm no p-indent slipped in
expect(css).not.toMatch(/p\s*\+\s*p\s*\{[^}]*text-indent/);
});
test("emits @bottom-center page-number rule by default", () => {
const css = printCss();
expect(css).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("suppresses @bottom-center page-number rule when pageNumbers=false", () => {
const css = printCss({ pageNumbers: false });
expect(css).not.toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("still emits @bottom-center when pageNumbers=true (explicit)", () => {
const css = printCss({ pageNumbers: true });
expect(css).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("font stacks include Liberation Sans adjacent to Helvetica", () => {
const css = printCss({ confidential: true });
// Body stack
expect(css).toMatch(/font-family:\s*Helvetica,\s*"Liberation Sans",\s*Arial/);
// At least one @page margin box (running header / page number / CONFIDENTIAL)
// should also have the updated stack.
const marginBoxStacks = css.match(/@(top|bottom)-(center|right)\s*\{[^}]*Liberation Sans/g) ?? [];
expect(marginBoxStacks.length).toBeGreaterThanOrEqual(1);
});
test("all four original Helvetica stacks now include Liberation Sans", () => {
const css = printCss({ runningHeader: "Running Title", confidential: true });
// Count: body (1) + running header (1) + page numbers (1) + confidential (1) = 4
const occurrences = (css.match(/"Liberation Sans"/g) ?? []).length;
expect(occurrences).toBeGreaterThanOrEqual(4);
});
});
// ─── render() — pageNumbers / footerTemplate data flow ───────────────
describe("render() — pageNumbers data flow", () => {
test("CSS footer renders by default", () => {
const result = render({ markdown: `# Doc\n\nBody.` });
expect(result.printCss).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("--no-page-numbers reaches the CSS layer", () => {
const result = render({ markdown: `# Doc\n\nBody.`, pageNumbers: false });
expect(result.printCss).not.toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("footerTemplate suppresses CSS page numbers (custom footer wins)", () => {
const result = render({
markdown: `# Doc\n\nBody.`,
footerTemplate: `<div class="foo">custom</div>`,
});
expect(result.printCss).not.toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
test("pageNumbers=true + no footerTemplate keeps CSS footer", () => {
const result = render({ markdown: `# Doc`, pageNumbers: true });
expect(result.printCss).toMatch(/@bottom-center\s*\{\s*content:\s*counter\(page\)/);
});
});
// ─── render() — HTML entity handling in titles, cover, TOC ───────────
describe("render() — no double HTML entity escaping", () => {
type Case = { char: string; inTitle: string; expectedTitleMeta: string };
// Only characters that should flow through unchanged. `"` and `'` are
// omitted from this set because smartypants converts them to curly quotes
// before heading extraction — asserted separately below.
const cases: Case[] = [
{ char: "&", inTitle: "A & B", expectedTitleMeta: "A & B" },
{ char: "<", inTitle: "A < B", expectedTitleMeta: "A < B" },
{ char: ">", inTitle: "A > B", expectedTitleMeta: "A > B" },
{ char: "©", inTitle: "A © B", expectedTitleMeta: "A © B" },
{ char: "—", inTitle: "A — B", expectedTitleMeta: "A — B" },
];
for (const { char, inTitle, expectedTitleMeta } of cases) {
test(`"${char}" in H1 has no double-escape in <title> or cover`, () => {
const result = render({
markdown: `# ${inTitle}\n\nBody.`,
cover: true,
author: "A",
});
// Meta: decoded plain text.
expect(result.meta.title).toBe(expectedTitleMeta);
// HTML: <title>...</title> never contains double-escape patterns.
expect(result.html).not.toMatch(/<title>[^<]*&amp;amp;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;lt;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;gt;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;#\d+;/);
expect(result.html).not.toMatch(/<title>[^<]*&amp;#x[0-9a-fA-F]+;/);
// Cover block also single-escape.
expect(result.html).not.toMatch(/class="cover-title"[^>]*>[^<]*&amp;amp;/);
});
}
test('ampersand in <title> renders as exactly one "&amp;"', () => {
const result = render({ markdown: `# Faber & Faber\n\nBody.` });
expect(result.html).toContain("<title>Faber &amp; Faber</title>");
expect(result.html).not.toContain("&amp;amp;");
});
test("TOC entries have no double-escape when a heading contains '&'", () => {
const result = render({
markdown: `# Doc\n\n## Faber & Faber\n\nBody.\n\n## Other\n\nMore.`,
toc: true,
});
// TOC renders the heading text through escapeHtml; must be single-escaped.
expect(result.html).toContain("Faber &amp; Faber");
expect(result.html).not.toContain("&amp;amp;");
});
test('numeric entity in H1 (e.g. "&#169;") decodes cleanly to <title>', () => {
// Marked passes through numeric entities verbatim in the HTML output,
// so the decoder must handle them.
const result = render({ markdown: `# A &#169; B\n\nBody.` });
expect(result.meta.title).toBe("A © B");
expect(result.html).toContain("<title>A © B</title>");
});
test("smartypants converts raw quotes in title BEFORE extraction (contract)", () => {
// We do NOT assert raw `"` survives — smartypants is expected to convert it.
// The contract is: no double-escape of the encoded form.
const result = render({ markdown: `# Say "hi"\n\nBody.` });
expect(result.html).not.toContain("&amp;quot;");
expect(result.html).not.toContain("&amp;#39;");
// And <title> contains exactly one level of escaping.
const titleMatch = result.html.match(/<title>([^<]*)<\/title>/);
expect(titleMatch).toBeTruthy();
if (titleMatch) {
// Never contains a double-encoded entity.
expect(titleMatch[1]).not.toMatch(/&amp;(amp|lt|gt|quot|#\d+);/);
}
});
});