Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
82
test/helpers/tool-map.ts
Normal file
82
test/helpers/tool-map.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* Tool compatibility map across provider CLIs.
|
||||
*
|
||||
* Not all provider CLIs expose equivalent tools. A benchmark that uses Edit, Glob,
|
||||
* or Grep won't run cleanly on CLIs that don't have those. The map answers:
|
||||
* "which tools does each provider's CLI expose by default?"
|
||||
*
|
||||
* When a benchmark is scoped to a tool a provider lacks, the harness records
|
||||
* `unsupported_tool` in the result and continues with the other providers.
|
||||
*
|
||||
* Source-of-truth references:
|
||||
* - Claude Code: https://code.claude.com/docs/en/tools
|
||||
* - Codex CLI: `codex exec --help` tool listing
|
||||
* - Gemini CLI: `gemini --help` (limited tool surface as of 2026-04)
|
||||
*/
|
||||
|
||||
export type ToolName =
|
||||
| 'Read'
|
||||
| 'Write'
|
||||
| 'Edit'
|
||||
| 'Bash'
|
||||
| 'Agent'
|
||||
| 'Glob'
|
||||
| 'Grep'
|
||||
| 'AskUserQuestion'
|
||||
| 'WebSearch'
|
||||
| 'WebFetch';
|
||||
|
||||
export const TOOL_COMPATIBILITY: Record<'claude' | 'gpt' | 'gemini', Record<ToolName, boolean>> = {
|
||||
claude: {
|
||||
Read: true,
|
||||
Write: true,
|
||||
Edit: true,
|
||||
Bash: true,
|
||||
Agent: true,
|
||||
Glob: true,
|
||||
Grep: true,
|
||||
AskUserQuestion: true,
|
||||
WebSearch: true,
|
||||
WebFetch: true,
|
||||
},
|
||||
gpt: {
|
||||
// Codex CLI has a narrower tool surface: it uses shell + apply_patch.
|
||||
// Read/Glob/Grep-style operations happen via shell pipelines.
|
||||
Read: true,
|
||||
Write: false, // apply_patch handles writes; no standalone Write tool
|
||||
Edit: false, // apply_patch handles edits; no standalone Edit tool
|
||||
Bash: true,
|
||||
Agent: false,
|
||||
Glob: false,
|
||||
Grep: false,
|
||||
AskUserQuestion: false,
|
||||
WebSearch: true, // --enable web_search_cached
|
||||
WebFetch: false,
|
||||
},
|
||||
gemini: {
|
||||
// Gemini CLI (as of 2026-04) has a limited tool surface in --yolo mode.
|
||||
// Shell access depends on flags; most agentic tools are not exposed.
|
||||
Read: true,
|
||||
Write: false,
|
||||
Edit: false,
|
||||
Bash: false,
|
||||
Agent: false,
|
||||
Glob: false,
|
||||
Grep: false,
|
||||
AskUserQuestion: false,
|
||||
WebSearch: true,
|
||||
WebFetch: false,
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Determine which tools from a required-set are missing for a given provider.
|
||||
* Empty array means full compatibility.
|
||||
*/
|
||||
export function missingTools(
|
||||
provider: 'claude' | 'gpt' | 'gemini',
|
||||
requiredTools: ToolName[]
|
||||
): ToolName[] {
|
||||
const map = TOOL_COMPATIBILITY[provider];
|
||||
return requiredTools.filter(t => !map[t]);
|
||||
}
|
||||
Reference in New Issue
Block a user