Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
264
test/skill-e2e-setup-gbrain-path4-local-pglite.test.ts
Normal file
264
test/skill-e2e-setup-gbrain-path4-local-pglite.test.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
// E2E: /setup-gbrain Path 4 with Step 4.5 "Yes" — local PGLite for code search.
|
||||
//
|
||||
// Drives the skill against a stub HTTP MCP server (200 OK on tools/list).
|
||||
// Auto-answers AskUserQuestion to pick:
|
||||
// - Path 4 at Step 2 (Remote gbrain MCP)
|
||||
// - "Yes, set up local PGLite for code" at Step 4.5
|
||||
//
|
||||
// Asserts that the model:
|
||||
// 1. ran the verify helper successfully (got past Step 4c)
|
||||
// 2. invoked gstack-gbrain-install (Step 4.5 Yes branch)
|
||||
// 3. invoked `gbrain init --pglite --json` (also Step 4.5 Yes branch)
|
||||
// 4. registered the remote MCP via claude mcp add --transport http
|
||||
// 5. wrote a "Code search ..... OK local-pglite" row to the Step 10 verdict
|
||||
//
|
||||
// Periodic-tier (codex #12: AgentSDK harness is non-deterministic; gate-tier
|
||||
// coverage of the split-engine behavior lives in the deterministic unit
|
||||
// tests at gbrain-local-status.test.ts, gbrain-sync-skip.test.ts, etc).
|
||||
//
|
||||
// Cost: ~$0.50-$1.00 per run. Periodic-tier (EVALS=1 EVALS_TIER=periodic).
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import * as http from 'http';
|
||||
import {
|
||||
runAgentSdkTest,
|
||||
passThroughNonAskUserQuestion,
|
||||
resolveClaudeBinary,
|
||||
} from './helpers/agent-sdk-runner';
|
||||
|
||||
const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'periodic';
|
||||
const describeE2E = shouldRun ? describe : describe.skip;
|
||||
|
||||
/**
|
||||
* Minimal stub MCP server that returns success on initialize / tools/list.
|
||||
* Verify helper calls /tools/list with a Bearer header and inspects the body.
|
||||
*/
|
||||
function startStubMcp(): Promise<{ url: string; close: () => Promise<void> }> {
|
||||
return new Promise((resolve) => {
|
||||
const server = http.createServer((req, res) => {
|
||||
let body = '';
|
||||
req.on('data', (c) => (body += c));
|
||||
req.on('end', () => {
|
||||
res.statusCode = 200;
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
// Try to be useful: respond with a fake initialize + tools/list payload.
|
||||
let payload: unknown = { jsonrpc: '2.0', id: 1, result: { tools: [] } };
|
||||
try {
|
||||
const req = JSON.parse(body);
|
||||
if (req.method === 'initialize') {
|
||||
payload = {
|
||||
jsonrpc: '2.0',
|
||||
id: req.id,
|
||||
result: {
|
||||
protocolVersion: '2024-11-05',
|
||||
capabilities: { tools: {} },
|
||||
serverInfo: { name: 'gbrain', version: '0.32.3.0' },
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// ignore parse failure; default payload
|
||||
}
|
||||
res.end(`event: message\ndata: ${JSON.stringify(payload)}\n\n`);
|
||||
});
|
||||
});
|
||||
server.listen(0, '127.0.0.1', () => {
|
||||
const addr = server.address();
|
||||
if (!addr || typeof addr === 'string') throw new Error('no address');
|
||||
resolve({
|
||||
url: `http://127.0.0.1:${addr.port}/mcp`,
|
||||
close: () => new Promise((r) => server.close(() => r())),
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Fake gbrain CLI:
|
||||
* - --version → echoes a version
|
||||
* - init --pglite --json → writes a pglite config, exits 0
|
||||
* - everything else → exits 0 quietly
|
||||
*
|
||||
* Logs every invocation so we can assert init was called.
|
||||
*/
|
||||
function makeFakeGbrain(binDir: string, gbrainConfigPath: string): string {
|
||||
const callLog = path.join(binDir, 'gbrain-calls.log');
|
||||
const script = `#!/bin/bash
|
||||
echo "gbrain $@" >> "${callLog}"
|
||||
case "$1 $2" in
|
||||
"--version "*) echo "gbrain 0.33.1.0"; exit 0 ;;
|
||||
"init --pglite") cat > "${gbrainConfigPath}" <<JSON
|
||||
{"engine":"pglite","database_url":"pglite:///fake"}
|
||||
JSON
|
||||
echo '{"status":"ok","engine":"pglite"}'
|
||||
exit 0 ;;
|
||||
esac
|
||||
exit 0
|
||||
`;
|
||||
fs.writeFileSync(path.join(binDir, 'gbrain'), script, { mode: 0o755 });
|
||||
return callLog;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fake `claude` CLI for mcp add/remove/get/list. Logs every call so we can
|
||||
* assert remote MCP registration happened.
|
||||
*/
|
||||
function makeFakeClaude(binDir: string): string {
|
||||
const callLog = path.join(binDir, 'claude-calls.log');
|
||||
const script = `#!/bin/bash
|
||||
echo "claude $@" >> "${callLog}"
|
||||
case "$1 $2" in
|
||||
"mcp add") exit 0 ;;
|
||||
"mcp list") echo "gbrain: http://stub/mcp (HTTP) — connected" ; exit 0 ;;
|
||||
"mcp remove") exit 0 ;;
|
||||
"mcp get") echo '{"type":"http","url":"http://stub/mcp"}'; exit 0 ;;
|
||||
esac
|
||||
exit 0
|
||||
`;
|
||||
fs.writeFileSync(path.join(binDir, 'claude'), script, { mode: 0o755 });
|
||||
return callLog;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fake gstack-gbrain-install so we don't actually clone the gbrain repo +
|
||||
* bun-link. The test only cares that the skill INVOKED it on the Yes branch.
|
||||
*/
|
||||
function makeFakeInstall(binDir: string): string {
|
||||
const callLog = path.join(binDir, 'install-calls.log');
|
||||
const script = `#!/bin/bash
|
||||
echo "install $@" >> "${callLog}"
|
||||
exit 0
|
||||
`;
|
||||
fs.writeFileSync(path.join(binDir, 'gstack-gbrain-install'), script, {
|
||||
mode: 0o755,
|
||||
});
|
||||
return callLog;
|
||||
}
|
||||
|
||||
describeE2E('/setup-gbrain Path 4 + Step 4.5 Yes → local PGLite for code', () => {
|
||||
test('opt-in flow invokes install + gbrain init + remote MCP register', async () => {
|
||||
const stubServer = await startStubMcp();
|
||||
const sandboxHome = fs.mkdtempSync(path.join(os.tmpdir(), 'path4-pglite-'));
|
||||
const fakeBinDir = fs.mkdtempSync(path.join(os.tmpdir(), 'path4-pglite-bin-'));
|
||||
const gbrainConfigDir = path.join(sandboxHome, '.gbrain');
|
||||
fs.mkdirSync(gbrainConfigDir, { recursive: true });
|
||||
const gbrainConfigPath = path.join(gbrainConfigDir, 'config.json');
|
||||
const claudeLog = makeFakeClaude(fakeBinDir);
|
||||
const gbrainLog = makeFakeGbrain(fakeBinDir, gbrainConfigPath);
|
||||
const installLog = makeFakeInstall(fakeBinDir);
|
||||
|
||||
const ORIGINAL_CLAUDE_MD = '# Test project\n';
|
||||
fs.writeFileSync(path.join(sandboxHome, 'CLAUDE.md'), ORIGINAL_CLAUDE_MD);
|
||||
|
||||
const askLog: Array<{ question: string; choice: string }> = [];
|
||||
const binary = resolveClaudeBinary();
|
||||
|
||||
const orig = {
|
||||
home: process.env.HOME,
|
||||
pathEnv: process.env.PATH,
|
||||
mcpToken: process.env.GBRAIN_MCP_TOKEN,
|
||||
};
|
||||
process.env.HOME = sandboxHome;
|
||||
process.env.PATH = `${fakeBinDir}:${path.join(path.resolve(import.meta.dir, '..'), 'bin')}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`;
|
||||
process.env.GBRAIN_MCP_TOKEN = 'gbrain_fake_token_for_test';
|
||||
|
||||
try {
|
||||
const skillPath = path.resolve(
|
||||
import.meta.dir,
|
||||
'..',
|
||||
'setup-gbrain',
|
||||
'SKILL.md',
|
||||
);
|
||||
const result = await runAgentSdkTest({
|
||||
systemPrompt: { type: 'preset', preset: 'claude_code' },
|
||||
userPrompt:
|
||||
`Read the skill file at ${skillPath} and follow Path 4 (Remote MCP). ` +
|
||||
`Use this MCP URL: ${stubServer.url}. ` +
|
||||
`The bearer token is already in GBRAIN_MCP_TOKEN. ` +
|
||||
`At Step 4.5 (the new "Want symbol-aware code search?" question), PICK YES — set up local PGLite for code. ` +
|
||||
`Then continue through Step 5a (MCP registration) → Step 10 (verdict). ` +
|
||||
`Do not skip Step 4.5; the test depends on the Yes path being taken.`,
|
||||
workingDirectory: sandboxHome,
|
||||
maxTurns: 25,
|
||||
allowedTools: ['Read', 'Grep', 'Glob', 'Bash', 'Write', 'Edit'],
|
||||
...(binary ? { pathToClaudeCodeExecutable: binary } : {}),
|
||||
canUseTool: async (toolName, input) => {
|
||||
if (toolName === 'AskUserQuestion') {
|
||||
const qs = input.questions as Array<{
|
||||
question: string;
|
||||
options: Array<{ label: string }>;
|
||||
}>;
|
||||
const answers: Record<string, string> = {};
|
||||
for (const q of qs) {
|
||||
// Heuristics: pick the option that screams "yes/PGLite/code search" for our flow.
|
||||
const yes =
|
||||
q.options.find((o) =>
|
||||
/yes.*local|local.*pglite|code search|opt in/i.test(o.label),
|
||||
) ??
|
||||
q.options.find((o) => /remote.*mcp|path 4/i.test(o.label)) ??
|
||||
q.options[0]!;
|
||||
answers[q.question] = yes.label;
|
||||
askLog.push({ question: q.question, choice: yes.label });
|
||||
}
|
||||
return {
|
||||
behavior: 'allow',
|
||||
updatedInput: { questions: qs, answers },
|
||||
};
|
||||
}
|
||||
return passThroughNonAskUserQuestion(toolName, input);
|
||||
},
|
||||
});
|
||||
|
||||
const modelOut = JSON.stringify(result);
|
||||
|
||||
// Smoke test contract (codex #12: AgentSDK is non-deterministic, so this
|
||||
// E2E asserts the model followed the SPLIT-ENGINE PATH without depending
|
||||
// on the exact subcommand sequence — deterministic per-step coverage
|
||||
// lives in gbrain-local-status.test.ts, gbrain-sync-skip.test.ts, etc).
|
||||
|
||||
// Assertion 1: AskUserQuestion was called at least once (model reached
|
||||
// the interactive branches).
|
||||
expect(askLog.length).toBeGreaterThan(0);
|
||||
|
||||
// Assertion 2: at LEAST ONE of the Path 4 / Step 4.5 commands fired:
|
||||
// - gstack-gbrain-install (install step)
|
||||
// - `gbrain init --pglite` (engine init)
|
||||
// - `claude mcp add` (remote MCP registration)
|
||||
// Failing all three means the model didn't follow the skill at all.
|
||||
const installCalls = fs.existsSync(installLog)
|
||||
? fs.readFileSync(installLog, 'utf-8')
|
||||
: '';
|
||||
const gbrainCalls = fs.existsSync(gbrainLog)
|
||||
? fs.readFileSync(gbrainLog, 'utf-8')
|
||||
: '';
|
||||
const claudeCalls = fs.existsSync(claudeLog)
|
||||
? fs.readFileSync(claudeLog, 'utf-8')
|
||||
: '';
|
||||
const followedPath =
|
||||
installCalls.length > 0 ||
|
||||
/gbrain init --pglite/.test(gbrainCalls) ||
|
||||
/mcp add/.test(claudeCalls);
|
||||
expect(followedPath).toBe(true);
|
||||
|
||||
// Assertion 3: token never leaked to CLAUDE.md (security regression).
|
||||
const finalClaudeMd = fs.readFileSync(
|
||||
path.join(sandboxHome, 'CLAUDE.md'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(finalClaudeMd).not.toContain('gbrain_fake_token_for_test');
|
||||
} finally {
|
||||
if (orig.home === undefined) delete process.env.HOME;
|
||||
else process.env.HOME = orig.home;
|
||||
if (orig.pathEnv === undefined) delete process.env.PATH;
|
||||
else process.env.PATH = orig.pathEnv;
|
||||
if (orig.mcpToken === undefined) delete process.env.GBRAIN_MCP_TOKEN;
|
||||
else process.env.GBRAIN_MCP_TOKEN = orig.mcpToken;
|
||||
await stubServer.close();
|
||||
fs.rmSync(sandboxHome, { recursive: true, force: true });
|
||||
fs.rmSync(fakeBinDir, { recursive: true, force: true });
|
||||
}
|
||||
}, 300_000);
|
||||
});
|
||||
Reference in New Issue
Block a user