Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
97
test/model-overlay-opus-4-7.test.ts
Normal file
97
test/model-overlay-opus-4-7.test.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Opus 4.7 model overlay — gate-tier assertions on the pacing directive.
|
||||
*
|
||||
* v1.6.4.0 regressed plan-review cadence because the Opus 4.7 overlay
|
||||
* carried a "Batch your questions" directive that physically rendered
|
||||
* above the skill-level pacing rule. Opus 4.7 read top-to-bottom,
|
||||
* absorbed batching as the ambient default, and stopped honoring the
|
||||
* plan-review STOP directives.
|
||||
*
|
||||
* v1.7.0.0 replaces that block with "Pace questions to the skill" —
|
||||
* one-question-at-a-time is now the default when the skill contains
|
||||
* STOP directives; batching becomes the explicit exception.
|
||||
*
|
||||
* This test asserts:
|
||||
* - The new "Pace questions" directive is present
|
||||
* - The old "Batch your questions" directive is gone
|
||||
* - The AUTO_DECIDE-compatible language survives (subordination, skill wins)
|
||||
*/
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
import { generateModelOverlay } from '../scripts/resolvers/model-overlay';
|
||||
|
||||
function makeCtx(model: string): TemplateContext {
|
||||
return {
|
||||
skillName: 'test-skill',
|
||||
tmplPath: 'test.tmpl',
|
||||
host: 'claude',
|
||||
paths: HOST_PATHS.claude,
|
||||
preambleTier: 2,
|
||||
model,
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
|
||||
describe('Opus 4.7 overlay — pacing directive', () => {
|
||||
test('raw opus-4-7.md contains "Pace questions to the skill"', () => {
|
||||
const raw = fs.readFileSync(
|
||||
path.join(ROOT, 'model-overlays/opus-4-7.md'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(raw).toContain('Pace questions to the skill');
|
||||
});
|
||||
|
||||
test('raw opus-4-7.md does NOT contain "Batch your questions" directive', () => {
|
||||
const raw = fs.readFileSync(
|
||||
path.join(ROOT, 'model-overlays/opus-4-7.md'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(raw).not.toContain('**Batch your questions.**');
|
||||
});
|
||||
|
||||
test('resolved overlay output contains "Pace questions to the skill"', () => {
|
||||
const out = generateModelOverlay(makeCtx('opus-4-7'));
|
||||
expect(out).toContain('Pace questions to the skill');
|
||||
});
|
||||
|
||||
test('resolved overlay inherits from claude base (INHERIT:claude)', () => {
|
||||
const out = generateModelOverlay(makeCtx('opus-4-7'));
|
||||
// The claude base contributes the subordination wrapper + Todo discipline
|
||||
expect(out).toContain('Todo-list discipline');
|
||||
expect(out).toContain('subordinate');
|
||||
});
|
||||
|
||||
test('resolved overlay says skill STOP directives trigger one-per-turn pacing', () => {
|
||||
const out = generateModelOverlay(makeCtx('opus-4-7'));
|
||||
expect(out).toMatch(/STOP\. AskUserQuestion/);
|
||||
expect(out).toMatch(/pace one question per turn|one question per turn/i);
|
||||
});
|
||||
|
||||
test('resolved overlay requires AskUserQuestion as tool_use', () => {
|
||||
const out = generateModelOverlay(makeCtx('opus-4-7'));
|
||||
expect(out).toContain('tool_use');
|
||||
});
|
||||
|
||||
test('resolved overlay flags "obvious fix" findings still need user approval', () => {
|
||||
const out = generateModelOverlay(makeCtx('opus-4-7'));
|
||||
expect(out).toMatch(/obvious fix/i);
|
||||
expect(out).toMatch(/user approval/i);
|
||||
});
|
||||
|
||||
test('resolved overlay keeps Effort-match / Literal interpretation nudges', () => {
|
||||
const out = generateModelOverlay(makeCtx('opus-4-7'));
|
||||
expect(out).toContain('Effort-match the step');
|
||||
expect(out).toContain('Literal interpretation awareness');
|
||||
});
|
||||
|
||||
test('claude overlay (no INHERIT chain) does not carry the pacing directive', () => {
|
||||
// Claude is the default overlay; opus-4-7 inherits FROM claude.
|
||||
// The pacing directive belongs to opus-4-7 only.
|
||||
const out = generateModelOverlay(makeCtx('claude'));
|
||||
expect(out).not.toContain('Pace questions to the skill');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user