Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
8
.github/workflows/actionlint.yml
vendored
Normal file
8
.github/workflows/actionlint.yml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
name: Workflow Lint
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
actionlint:
|
||||
runs-on: ubicloud-standard-8
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: rhysd/actionlint@v1.7.11
|
||||
41
.github/workflows/ci-image.yml
vendored
Normal file
41
.github/workflows/ci-image.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Build CI Image
|
||||
on:
|
||||
# Rebuild weekly (Monday 6am UTC) to pick up CLI updates
|
||||
schedule:
|
||||
- cron: '0 6 * * 1'
|
||||
# Rebuild on Dockerfile or lockfile changes
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '.github/docker/Dockerfile.ci'
|
||||
- 'package.json'
|
||||
- 'bun.lock'
|
||||
# Manual trigger
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubicloud-standard-8
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# Copy lockfile + package.json into Docker build context
|
||||
- run: cp package.json bun.lock .github/docker/
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .github/docker
|
||||
file: .github/docker/Dockerfile.ci
|
||||
push: true
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}/ci:latest
|
||||
ghcr.io/${{ github.repository }}/ci:${{ github.sha }}
|
||||
133
.github/workflows/evals-periodic.yml
vendored
Normal file
133
.github/workflows/evals-periodic.yml
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
name: Periodic Evals
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 6 * * 1' # Monday 6 AM UTC
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: evals-periodic
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
IMAGE: ghcr.io/${{ github.repository }}/ci
|
||||
EVALS_TIER: periodic
|
||||
EVALS_ALL: 1 # Ignore diff — run all periodic tests
|
||||
|
||||
jobs:
|
||||
build-image:
|
||||
runs-on: ubicloud-standard-8
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
outputs:
|
||||
image-tag: ${{ steps.meta.outputs.tag }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- id: meta
|
||||
run: echo "tag=${{ env.IMAGE }}:${{ hashFiles('.github/docker/Dockerfile.ci', 'package.json', 'bun.lock') }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Check if image exists
|
||||
id: check
|
||||
run: |
|
||||
if docker manifest inspect ${{ steps.meta.outputs.tag }} > /dev/null 2>&1; then
|
||||
echo "exists=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "exists=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- if: steps.check.outputs.exists == 'false'
|
||||
run: cp package.json bun.lock .github/docker/
|
||||
|
||||
- if: steps.check.outputs.exists == 'false'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .github/docker
|
||||
file: .github/docker/Dockerfile.ci
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta.outputs.tag }}
|
||||
${{ env.IMAGE }}:latest
|
||||
|
||||
evals:
|
||||
runs-on: ubicloud-standard-8
|
||||
needs: build-image
|
||||
container:
|
||||
image: ${{ needs.build-image.outputs.image-tag }}
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --user runner
|
||||
timeout-minutes: 25
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
suite:
|
||||
- name: e2e-plan
|
||||
file: test/skill-e2e-plan.test.ts
|
||||
- name: e2e-design
|
||||
file: test/skill-e2e-design.test.ts
|
||||
- name: e2e-qa-bugs
|
||||
file: test/skill-e2e-qa-bugs.test.ts
|
||||
- name: e2e-qa-workflow
|
||||
file: test/skill-e2e-qa-workflow.test.ts
|
||||
- name: e2e-review
|
||||
file: test/skill-e2e-review.test.ts
|
||||
- name: e2e-workflow
|
||||
file: test/skill-e2e-workflow.test.ts
|
||||
- name: e2e-routing
|
||||
file: test/skill-routing-e2e.test.ts
|
||||
- name: e2e-codex
|
||||
file: test/codex-e2e.test.ts
|
||||
- name: e2e-gemini
|
||||
file: test/gemini-e2e.test.ts
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Fix bun temp
|
||||
run: |
|
||||
mkdir -p /home/runner/.cache/bun
|
||||
{
|
||||
echo "BUN_INSTALL_CACHE_DIR=/home/runner/.cache/bun"
|
||||
echo "BUN_TMPDIR=/home/runner/.cache/bun"
|
||||
echo "TMPDIR=/home/runner/.cache"
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
# Recursive copy (cp -r) instead of symlink: bun build resolves a
|
||||
# file's realpath when looking for sibling deps. See evals.yml for the
|
||||
# full explanation. cp -al would be faster but /opt and /workspace
|
||||
# are on different overlay-fs layers, so cross-device hardlink fails.
|
||||
- name: Restore deps
|
||||
run: |
|
||||
if [ -d /opt/node_modules_cache ] && diff -q /opt/node_modules_cache/.package.json package.json >/dev/null 2>&1; then
|
||||
cp -r /opt/node_modules_cache node_modules
|
||||
else
|
||||
bun install
|
||||
fi
|
||||
|
||||
- run: bun run build
|
||||
|
||||
- name: Run ${{ matrix.suite.name }}
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
EVALS_CONCURRENCY: "40"
|
||||
PLAYWRIGHT_BROWSERS_PATH: /opt/playwright-browsers
|
||||
run: EVALS=1 bun test --retry 2 --concurrent --max-concurrency 40 ${{ matrix.suite.file }}
|
||||
|
||||
- name: Upload eval results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: eval-periodic-${{ matrix.suite.name }}
|
||||
path: ~/.gstack-dev/evals/*.json
|
||||
retention-days: 90
|
||||
248
.github/workflows/evals.yml
vendored
Normal file
248
.github/workflows/evals.yml
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
name: E2E Evals
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: evals-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
IMAGE: ghcr.io/${{ github.repository }}/ci
|
||||
EVALS_TIER: gate
|
||||
|
||||
jobs:
|
||||
# Build Docker image with pre-baked toolchain (cached — only rebuilds on Dockerfile/lockfile change)
|
||||
build-image:
|
||||
runs-on: ubicloud-standard-8
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
outputs:
|
||||
image-tag: ${{ steps.meta.outputs.tag }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- id: meta
|
||||
run: echo "tag=${{ env.IMAGE }}:${{ hashFiles('.github/docker/Dockerfile.ci', 'package.json', 'bun.lock') }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Check if image exists
|
||||
id: check
|
||||
run: |
|
||||
if docker manifest inspect ${{ steps.meta.outputs.tag }} > /dev/null 2>&1; then
|
||||
echo "exists=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "exists=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- if: steps.check.outputs.exists == 'false'
|
||||
run: cp package.json bun.lock .github/docker/
|
||||
|
||||
- if: steps.check.outputs.exists == 'false'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .github/docker
|
||||
file: .github/docker/Dockerfile.ci
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta.outputs.tag }}
|
||||
${{ env.IMAGE }}:latest
|
||||
|
||||
evals:
|
||||
runs-on: ${{ matrix.suite.runner || 'ubicloud-standard-8' }}
|
||||
needs: build-image
|
||||
container:
|
||||
image: ${{ needs.build-image.outputs.image-tag }}
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --user runner
|
||||
timeout-minutes: 25
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
suite:
|
||||
- name: llm-judge
|
||||
file: test/skill-llm-eval.test.ts
|
||||
- name: e2e-browse
|
||||
file: test/skill-e2e-bws.test.ts
|
||||
runner: ubicloud-standard-8
|
||||
- name: e2e-plan
|
||||
file: test/skill-e2e-plan.test.ts
|
||||
- name: e2e-deploy
|
||||
file: test/skill-e2e-deploy.test.ts
|
||||
- name: e2e-design
|
||||
file: test/skill-e2e-design.test.ts
|
||||
- name: e2e-qa-bugs
|
||||
file: test/skill-e2e-qa-bugs.test.ts
|
||||
- name: e2e-qa-workflow
|
||||
file: test/skill-e2e-qa-workflow.test.ts
|
||||
- name: e2e-review
|
||||
file: test/skill-e2e-review.test.ts
|
||||
- name: e2e-workflow
|
||||
file: test/skill-e2e-workflow.test.ts
|
||||
- name: e2e-routing
|
||||
file: test/skill-routing-e2e.test.ts
|
||||
- name: e2e-codex
|
||||
file: test/codex-e2e.test.ts
|
||||
- name: e2e-gemini
|
||||
file: test/gemini-e2e.test.ts
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# Bun creates root-owned temp dirs during Docker build. GH Actions runs as
|
||||
# runner user with HOME=/github/home. Redirect bun's cache to a writable dir.
|
||||
- name: Fix bun temp
|
||||
run: |
|
||||
mkdir -p /home/runner/.cache/bun
|
||||
{
|
||||
echo "BUN_INSTALL_CACHE_DIR=/home/runner/.cache/bun"
|
||||
echo "BUN_TMPDIR=/home/runner/.cache/bun"
|
||||
echo "TMPDIR=/home/runner/.cache"
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
# Restore pre-installed node_modules from Docker image via recursive
|
||||
# copy. Symlink (`ln -s`) breaks bun's module resolution because bun
|
||||
# resolves a file's realpath when walking up to find node_modules/<dep>;
|
||||
# from a symlinked path, realpath escapes the workspace and sibling
|
||||
# deps no longer resolve. Hardlink copy (`cp -al`) fails because /opt
|
||||
# and /workspace are on different overlay-fs layers ("Invalid
|
||||
# cross-device link"). Recursive copy works on every layout. Cost:
|
||||
# ~5s for ~200 packages of small JS files vs ~0s for symlink — still
|
||||
# vastly cheaper than rerunning `bun install` (network + resolution).
|
||||
- name: Restore deps
|
||||
run: |
|
||||
if [ -d /opt/node_modules_cache ] && diff -q /opt/node_modules_cache/.package.json package.json >/dev/null 2>&1; then
|
||||
cp -r /opt/node_modules_cache node_modules
|
||||
else
|
||||
bun install
|
||||
fi
|
||||
|
||||
- run: bun run build
|
||||
|
||||
# Verify Playwright can launch Chromium (fails fast if sandbox/deps are broken)
|
||||
- name: Verify Chromium
|
||||
if: matrix.suite.name == 'e2e-browse'
|
||||
run: |
|
||||
echo "whoami=$(whoami) HOME=$HOME TMPDIR=${TMPDIR:-unset}"
|
||||
touch /tmp/.bun-test && rm /tmp/.bun-test && echo "/tmp writable"
|
||||
bun -e "import {chromium} from 'playwright';const b=await chromium.launch({args:['--no-sandbox']});console.log('Chromium OK');await b.close()"
|
||||
|
||||
- name: Run ${{ matrix.suite.name }}
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
EVALS_CONCURRENCY: "40"
|
||||
PLAYWRIGHT_BROWSERS_PATH: /opt/playwright-browsers
|
||||
run: EVALS=1 bun test --retry 2 --concurrent --max-concurrency 40 ${{ matrix.suite.file }}
|
||||
|
||||
- name: Upload eval results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: eval-${{ matrix.suite.name }}
|
||||
path: ~/.gstack-dev/evals/*.json
|
||||
retention-days: 90
|
||||
|
||||
report:
|
||||
runs-on: ubicloud-standard-8
|
||||
needs: evals
|
||||
if: always() && github.event_name == 'pull_request'
|
||||
timeout-minutes: 5
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Download all eval artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: eval-*
|
||||
path: /tmp/eval-results
|
||||
merge-multiple: true
|
||||
|
||||
- name: Post PR comment
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# shellcheck disable=SC2086,SC2059
|
||||
RESULTS=$(find /tmp/eval-results -name '*.json' 2>/dev/null | sort)
|
||||
if [ -z "$RESULTS" ]; then
|
||||
echo "No eval results found"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
TOTAL=0; PASSED=0; FAILED=0; COST="0"
|
||||
SUITE_LINES=""
|
||||
for f in $RESULTS; do
|
||||
if ! jq -e '.total_tests' "$f" >/dev/null 2>&1; then
|
||||
echo "Skipping malformed JSON: $f"
|
||||
continue
|
||||
fi
|
||||
T=$(jq -r '.total_tests // 0' "$f")
|
||||
P=$(jq -r '.passed // 0' "$f")
|
||||
F=$(jq -r '.failed // 0' "$f")
|
||||
C=$(jq -r '.total_cost_usd // 0' "$f")
|
||||
TIER=$(jq -r '.tier // "unknown"' "$f")
|
||||
[ "$T" -eq 0 ] && continue
|
||||
TOTAL=$((TOTAL + T))
|
||||
PASSED=$((PASSED + P))
|
||||
FAILED=$((FAILED + F))
|
||||
COST=$(echo "$COST + $C" | bc)
|
||||
STATUS_ICON="✅"
|
||||
[ "$F" -gt 0 ] && STATUS_ICON="❌"
|
||||
SUITE_LINES="${SUITE_LINES}| ${TIER} | ${P}/${T} | ${STATUS_ICON} | \$${C} |\n"
|
||||
done
|
||||
|
||||
STATUS="✅ PASS"
|
||||
[ "$FAILED" -gt 0 ] && STATUS="❌ FAIL"
|
||||
|
||||
BODY="## E2E Evals: ${STATUS}
|
||||
|
||||
**${PASSED}/${TOTAL}** tests passed | **\$${COST}** total cost | **12 parallel runners**
|
||||
|
||||
| Suite | Result | Status | Cost |
|
||||
|-------|--------|--------|------|
|
||||
$(echo -e "$SUITE_LINES")
|
||||
|
||||
---
|
||||
*12x ubicloud-standard-8 (Docker: pre-baked toolchain + deps) | wall clock ≈ slowest suite*"
|
||||
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
FAILURES=""
|
||||
for f in $RESULTS; do
|
||||
if ! jq -e '.failed' "$f" >/dev/null 2>&1; then continue; fi
|
||||
F=$(jq -r '.failed // 0' "$f")
|
||||
[ "$F" -eq 0 ] && continue
|
||||
FAILS=$(jq -r '.tests[] | select(.passed == false) | "- ❌ \(.name): \(.exit_reason // "unknown")"' "$f" 2>/dev/null || echo "- ⚠️ $(basename "$f"): parse error")
|
||||
FAILURES="${FAILURES}${FAILS}\n"
|
||||
done
|
||||
BODY="${BODY}
|
||||
|
||||
### Failures
|
||||
$(echo -e "$FAILURES")"
|
||||
fi
|
||||
|
||||
# Update existing comment or create new one
|
||||
COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments \
|
||||
--jq '.[] | select(.body | startswith("## E2E Evals")) | .id' | tail -1)
|
||||
|
||||
if [ -n "$COMMENT_ID" ]; then
|
||||
gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}" \
|
||||
-X PATCH -f body="$BODY"
|
||||
else
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
fi
|
||||
80
.github/workflows/make-pdf-gate.yml
vendored
Normal file
80
.github/workflows/make-pdf-gate.yml
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
name: make-pdf copy-paste gate
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'make-pdf/**'
|
||||
- 'browse/src/meta-commands.ts'
|
||||
- 'browse/src/write-commands.ts'
|
||||
- 'browse/src/commands.ts'
|
||||
- 'browse/src/cli.ts'
|
||||
- 'scripts/resolvers/make-pdf.ts'
|
||||
- 'package.json'
|
||||
- '.github/workflows/make-pdf-gate.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: make-pdf-gate-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
gate:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubicloud-standard-8, macos-latest]
|
||||
# Windows is tolerant-mode — Xpdf / Poppler-Windows extraction
|
||||
# differs enough from the Linux/macOS baseline that the strict
|
||||
# exact-diff gate is unreliable. Enable once the normalized
|
||||
# comparator proves tolerant enough (Codex round 2 #18).
|
||||
#
|
||||
# include:
|
||||
# - os: windows-latest
|
||||
# tolerant: true
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Install poppler (macOS)
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: brew install poppler
|
||||
|
||||
- name: Install poppler-utils (Ubuntu)
|
||||
if: matrix.os == 'ubicloud-standard-8'
|
||||
run: sudo apt-get update && sudo apt-get install -y poppler-utils
|
||||
|
||||
- name: Install Playwright Chromium
|
||||
run: bunx playwright install chromium
|
||||
|
||||
- name: Build binaries
|
||||
run: bun run build
|
||||
|
||||
- name: ad-hoc codesign (Apple Silicon)
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: |
|
||||
for bin in browse/dist/browse browse/dist/find-browse design/dist/design make-pdf/dist/pdf; do
|
||||
codesign --remove-signature "$bin" 2>/dev/null || true
|
||||
codesign -s - -f "$bin" || true
|
||||
done
|
||||
|
||||
- name: Log toolchain versions
|
||||
run: |
|
||||
echo "OS: ${{ matrix.os }}"
|
||||
bun --version
|
||||
which pdftotext && pdftotext -v 2>&1 | head -1 || true
|
||||
|
||||
- name: Run make-pdf unit tests
|
||||
run: bun test make-pdf/test/*.test.ts
|
||||
|
||||
- name: Run combined-features copy-paste gate (P0)
|
||||
env:
|
||||
BROWSE_BIN: ${{ github.workspace }}/browse/dist/browse
|
||||
run: bun test make-pdf/test/e2e/combined-gate.test.ts
|
||||
43
.github/workflows/pr-title-sync.yml
vendored
Normal file
43
.github/workflows/pr-title-sync.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: PR Title Sync
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, edited]
|
||||
paths:
|
||||
- 'VERSION'
|
||||
|
||||
concurrency:
|
||||
group: pr-title-sync-${{ github.event.pull_request.number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
sync:
|
||||
name: Sync PR title to VERSION
|
||||
runs-on: ubicloud-standard-8
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
if: github.actor != 'github-actions[bot]'
|
||||
steps:
|
||||
- name: Checkout PR head
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Rewrite PR title to match VERSION
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUM: ${{ github.event.pull_request.number }}
|
||||
OLD_TITLE: ${{ github.event.pull_request.title }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
chmod +x ./bin/gstack-pr-title-rewrite.sh
|
||||
VERSION=$(cat VERSION | tr -d '[:space:]')
|
||||
NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE")
|
||||
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
|
||||
echo "Title already correct; no change."
|
||||
exit 0
|
||||
fi
|
||||
echo "Rewriting: $OLD_TITLE -> $NEW_TITLE"
|
||||
gh pr edit "$PR_NUM" --title "$NEW_TITLE"
|
||||
33
.github/workflows/skill-docs.yml
vendored
Normal file
33
.github/workflows/skill-docs.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
name: Skill Docs Freshness
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
check-freshness:
|
||||
runs-on: ubicloud-standard-8
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
- run: bun install
|
||||
- name: Check Claude host freshness
|
||||
run: bun run gen:skill-docs
|
||||
- name: Verify Claude skill docs are fresh
|
||||
run: |
|
||||
git diff --exit-code || {
|
||||
echo "Generated SKILL.md files are stale. Run: bun run gen:skill-docs"
|
||||
exit 1
|
||||
}
|
||||
- name: Check Codex host freshness
|
||||
run: bun run gen:skill-docs --host codex
|
||||
- name: Verify Codex skill docs are fresh
|
||||
run: |
|
||||
git diff --exit-code -- .agents/ || {
|
||||
echo "Generated Codex SKILL.md files are stale. Run: bun run gen:skill-docs --host codex"
|
||||
exit 1
|
||||
}
|
||||
- name: Generate Factory skill docs
|
||||
run: bun run gen:skill-docs --host factory
|
||||
- name: Verify Factory skill docs are fresh
|
||||
run: |
|
||||
git diff --exit-code -- .factory/ || {
|
||||
echo "Generated Factory SKILL.md files are stale. Run: bun run gen:skill-docs --host factory"
|
||||
exit 1
|
||||
}
|
||||
74
.github/workflows/version-gate.yml
vendored
Normal file
74
.github/workflows/version-gate.yml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
name: Version Gate
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'VERSION'
|
||||
- 'CHANGELOG.md'
|
||||
- 'package.json'
|
||||
|
||||
concurrency:
|
||||
group: version-gate-${{ github.event.pull_request.number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Check VERSION is not stale vs queue
|
||||
runs-on: ubicloud-standard-8
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
steps:
|
||||
- name: Checkout PR head
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
|
||||
- name: Read versions
|
||||
id: versions
|
||||
run: |
|
||||
set -euo pipefail
|
||||
PR_VERSION=$(cat VERSION | tr -d '[:space:]')
|
||||
BASE_REF="${{ github.event.pull_request.base.ref }}"
|
||||
git fetch origin "$BASE_REF" --depth=1 --quiet || true
|
||||
BASE_VERSION=$(git show "origin/$BASE_REF:VERSION" 2>/dev/null | tr -d '[:space:]' || echo "0.0.0.0")
|
||||
{
|
||||
echo "pr_version=$PR_VERSION"
|
||||
echo "base_version=$BASE_VERSION"
|
||||
echo "base_ref=$BASE_REF"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Detect bump level
|
||||
id: bump
|
||||
run: |
|
||||
LEVEL=$(bun run scripts/detect-bump.ts "${{ steps.versions.outputs.base_version }}" "${{ steps.versions.outputs.pr_version }}")
|
||||
echo "level=$LEVEL" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Query queue (util) — fail-open on error
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
bun run bin/gstack-next-version \
|
||||
--base "${{ steps.versions.outputs.base_ref }}" \
|
||||
--bump "${{ steps.bump.outputs.level }}" \
|
||||
--current-version "${{ steps.versions.outputs.base_version }}" \
|
||||
--workspace-root null \
|
||||
--exclude-pr "${{ github.event.pull_request.number }}" \
|
||||
> next.json 2> next.err
|
||||
RC=$?
|
||||
if [ "$RC" != "0" ] || [ ! -s next.json ]; then
|
||||
echo '{"offline":true}' > next.json
|
||||
echo "::warning::util exit=$RC — failing open. stderr:"
|
||||
cat next.err || true
|
||||
fi
|
||||
|
||||
- name: Compare PR VERSION to next free slot
|
||||
env:
|
||||
PR_VERSION: ${{ steps.versions.outputs.pr_version }}
|
||||
run: |
|
||||
bun run scripts/compare-pr-version.ts next.json "${{ github.event.pull_request.number }}"
|
||||
121
.github/workflows/windows-free-tests.yml
vendored
Normal file
121
.github/workflows/windows-free-tests.yml
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
name: Windows Free Tests
|
||||
|
||||
# Curated subset of the free test suite that runs on a paid faster Windows runner.
|
||||
#
|
||||
# Codex's v1.18.0.0 review flagged that the existing evals.yml workflow uses
|
||||
# a Linux container, so a windows-latest matrix entry there isn't a drop-in.
|
||||
# This workflow is non-container, runs the curated Windows-safe subset, plus
|
||||
# targeted resolver tests that exercise the Bun.which-based claude binary
|
||||
# resolution + the GSTACK_CLAUDE_BIN override path on Windows.
|
||||
#
|
||||
# Runner: GitHub-hosted free `windows-latest`. The whole rest of CI runs on
|
||||
# Ubicloud (Linux), but Ubicloud doesn't ship Windows runners and we don't
|
||||
# want to flip on GitHub's org-level larger-runner billing for just this one
|
||||
# job. 4 cores, ~60s spin-up, $0. The wave-coverage tests this runs are
|
||||
# small enough that total job time stays under 2 minutes.
|
||||
#
|
||||
# What this DOES NOT do (still out of scope, tracked as follow-up):
|
||||
# - Run the full free suite on Windows. The 24 tests that hardcode /bin/sh,
|
||||
# spawn('sh',...), or raw /tmp/ paths are excluded by scripts/test-free-shards.ts
|
||||
# --windows-only. They need POSIX-bound surfaces to be ported off shell
|
||||
# primitives before they can run on Windows.
|
||||
# - Run Playwright/browser-backed tests. Browse server bring-up on Windows is
|
||||
# a separate concern (PR #1238 windows-pty-bun-pty-fix is in flight).
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: windows-free-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
windows-free-tests:
|
||||
# Ubicloud Windows runner (same provider as the Linux evals workflow).
|
||||
# To revert: swap to `windows-latest` (GitHub's free 4-core Windows runner).
|
||||
runs-on: windows-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: oven-sh/setup-bun@v1
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Configure git identity (required by tests that init temp repos)
|
||||
run: |
|
||||
git config --global user.email "windows-ci@gstack.test"
|
||||
git config --global user.name "Windows CI"
|
||||
git config --global init.defaultBranch main
|
||||
shell: bash
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Build server-node.mjs (required by Windows browse path)
|
||||
# browse/src/cli.ts module-level throws on Windows if server-node.mjs
|
||||
# is missing — Bun can't drive Playwright's Chromium on Windows
|
||||
# (oven-sh/bun#4253). The bundle must exist for any test that
|
||||
# transitively loads cli.ts to even import. We build only the
|
||||
# Node-compatible server bundle here; full `bun run build` would
|
||||
# also compile every binary which is slow and unnecessary for tests.
|
||||
run: bash browse/scripts/build-node-server.sh
|
||||
shell: bash
|
||||
|
||||
- name: Generate host SKILL.md outputs (.agents, .factory)
|
||||
# The golden-file regression tests in test/gen-skill-docs.test.ts read
|
||||
# .agents/skills/gstack-ship/SKILL.md and .factory/skills/gstack-ship/
|
||||
# SKILL.md. Both are gitignored — generated on demand by gen:skill-docs.
|
||||
# On Mac/Linux CI the existing eval workflow regenerates these as part
|
||||
# of its own pipeline; the windows-free-tests lane doesn't share that
|
||||
# so it must regenerate explicitly.
|
||||
run: bun run gen:skill-docs --host all
|
||||
shell: bash
|
||||
|
||||
# The Windows job verifies the new portability work this PR delivers,
|
||||
# not the entire free suite. After v1.20.0.0 ships, full-suite Windows
|
||||
# parity is a P4 follow-up TODO that depends on porting many tests off
|
||||
# POSIX-bound surfaces (raw /tmp paths, /bin/bash hardcodes, bash
|
||||
# shebang spawns, mode-bit assertions, deleted v1.14 sidebar refs, etc).
|
||||
#
|
||||
# The curated subset enumeration in scripts/test-free-shards.ts is
|
||||
# retained for future expansion — `bun run test:windows --list` gives
|
||||
# contributors a starting point to grow Windows coverage incrementally.
|
||||
#
|
||||
# What we verify here is exactly the new code paths v1.20.0.0 ships:
|
||||
# - bin/gstack-paths state-root resolution (test/gstack-paths.test.ts)
|
||||
# - browse/src/claude-bin.ts Bun.which wrapper + override + arg-prefix
|
||||
# resolution including the GSTACK_CLAUDE_BIN=wsl PATHEXT path
|
||||
# (browse/test/claude-bin.test.ts)
|
||||
# - scripts/test-free-shards.ts curation logic itself
|
||||
# (test/test-free-shards.test.ts)
|
||||
|
||||
- name: Show curated subset (informational — for future expansion)
|
||||
run: bun run scripts/test-free-shards.ts --windows-only --list
|
||||
shell: bash
|
||||
continue-on-error: true
|
||||
|
||||
- name: Verify new portability work on Windows
|
||||
# Tests targeting the v1.20.0.0 lane plus v1.30.0.0 fix-wave additions
|
||||
# plus v1.36.0.0 Windows-install hardening (sanitizer + _link_or_copy
|
||||
# helper + build-script subshells + doc/config-key drift guard).
|
||||
# v1.30.0.0 extension covers icacls hardening (#1308), bash.exe telemetry
|
||||
# wrap (#1306), and Bun.which-based binary resolvers (#1307). These must
|
||||
# pass on Windows for the wave's "Windows hardening" framing to be honest.
|
||||
run: |
|
||||
bun test \
|
||||
test/gstack-paths.test.ts \
|
||||
browse/test/claude-bin.test.ts \
|
||||
test/test-free-shards.test.ts \
|
||||
browse/test/file-permissions.test.ts \
|
||||
browse/test/security.test.ts \
|
||||
browse/test/server-sanitize-surrogates.test.ts \
|
||||
test/setup-windows-fallback.test.ts \
|
||||
test/build-script-shell-compat.test.ts \
|
||||
test/docs-config-keys.test.ts \
|
||||
make-pdf/test/browseClient.test.ts \
|
||||
make-pdf/test/pdftotext.test.ts
|
||||
shell: bash
|
||||
Reference in New Issue
Block a user