Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled

Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
Rocky
2026-05-19 21:18:17 +02:00
commit 834c6db075
797 changed files with 267839 additions and 0 deletions

View File

@@ -0,0 +1,97 @@
/**
* Opus 4.7 model overlay — gate-tier assertions on the pacing directive.
*
* v1.6.4.0 regressed plan-review cadence because the Opus 4.7 overlay
* carried a "Batch your questions" directive that physically rendered
* above the skill-level pacing rule. Opus 4.7 read top-to-bottom,
* absorbed batching as the ambient default, and stopped honoring the
* plan-review STOP directives.
*
* v1.7.0.0 replaces that block with "Pace questions to the skill" —
* one-question-at-a-time is now the default when the skill contains
* STOP directives; batching becomes the explicit exception.
*
* This test asserts:
* - The new "Pace questions" directive is present
* - The old "Batch your questions" directive is gone
* - The AUTO_DECIDE-compatible language survives (subordination, skill wins)
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import type { TemplateContext } from '../scripts/resolvers/types';
import { HOST_PATHS } from '../scripts/resolvers/types';
import { generateModelOverlay } from '../scripts/resolvers/model-overlay';
function makeCtx(model: string): TemplateContext {
return {
skillName: 'test-skill',
tmplPath: 'test.tmpl',
host: 'claude',
paths: HOST_PATHS.claude,
preambleTier: 2,
model,
};
}
const ROOT = path.resolve(__dirname, '..');
describe('Opus 4.7 overlay — pacing directive', () => {
test('raw opus-4-7.md contains "Pace questions to the skill"', () => {
const raw = fs.readFileSync(
path.join(ROOT, 'model-overlays/opus-4-7.md'),
'utf-8',
);
expect(raw).toContain('Pace questions to the skill');
});
test('raw opus-4-7.md does NOT contain "Batch your questions" directive', () => {
const raw = fs.readFileSync(
path.join(ROOT, 'model-overlays/opus-4-7.md'),
'utf-8',
);
expect(raw).not.toContain('**Batch your questions.**');
});
test('resolved overlay output contains "Pace questions to the skill"', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toContain('Pace questions to the skill');
});
test('resolved overlay inherits from claude base (INHERIT:claude)', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
// The claude base contributes the subordination wrapper + Todo discipline
expect(out).toContain('Todo-list discipline');
expect(out).toContain('subordinate');
});
test('resolved overlay says skill STOP directives trigger one-per-turn pacing', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toMatch(/STOP\. AskUserQuestion/);
expect(out).toMatch(/pace one question per turn|one question per turn/i);
});
test('resolved overlay requires AskUserQuestion as tool_use', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toContain('tool_use');
});
test('resolved overlay flags "obvious fix" findings still need user approval', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toMatch(/obvious fix/i);
expect(out).toMatch(/user approval/i);
});
test('resolved overlay keeps Effort-match / Literal interpretation nudges', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toContain('Effort-match the step');
expect(out).toContain('Literal interpretation awareness');
});
test('claude overlay (no INHERIT chain) does not carry the pacing directive', () => {
// Claude is the default overlay; opus-4-7 inherits FROM claude.
// The pacing directive belongs to opus-4-7 only.
const out = generateModelOverlay(makeCtx('claude'));
expect(out).not.toContain('Pace questions to the skill');
});
});