Initial import from garrytan/gstack@026751e (main snapshot via local relay)
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Some checks failed
Workflow Lint / actionlint (push) Has been cancelled
Build CI Image / build (push) Has been cancelled
Skill Docs Freshness / check-freshness (push) Has been cancelled
Periodic Evals / build-image (push) Has been cancelled
Periodic Evals / evals (map[file:test/codex-e2e.test.ts name:e2e-codex]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/gemini-e2e.test.ts name:e2e-gemini]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-design.test.ts name:e2e-design]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-plan.test.ts name:e2e-plan]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-bugs.test.ts name:e2e-qa-bugs]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-qa-workflow.test.ts name:e2e-qa-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-review.test.ts name:e2e-review]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-e2e-workflow.test.ts name:e2e-workflow]) (push) Has been cancelled
Periodic Evals / evals (map[file:test/skill-routing-e2e.test.ts name:e2e-routing]) (push) Has been cancelled
Source: https://github.com/garrytan/gstack/commit/026751e
This commit is contained in:
18
lib/conductor-env-shim.ts
Normal file
18
lib/conductor-env-shim.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* Conductor workspaces don't inherit the user's interactive shell env, so the
|
||||
* canonical ANTHROPIC_API_KEY / OPENAI_API_KEY may be missing while
|
||||
* Conductor's GSTACK_-prefixed forms are present. Promote the GSTACK_ form to
|
||||
* canonical when canonical is empty, so subprocesses (gbrain embed,
|
||||
* @anthropic-ai/claude-agent-sdk, etc) pick it up.
|
||||
*
|
||||
* Import this for its side effect: `import "../lib/conductor-env-shim";`
|
||||
*/
|
||||
export function promoteConductorEnv(): void {
|
||||
for (const key of ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] as const) {
|
||||
if (!process.env[key] && process.env[`GSTACK_${key}`]) {
|
||||
process.env[key] = process.env[`GSTACK_${key}`];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
promoteConductorEnv();
|
||||
174
lib/gbrain-exec.ts
Normal file
174
lib/gbrain-exec.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
/**
|
||||
* Centralized gbrain CLI invocation.
|
||||
*
|
||||
* Every `gbrain ...` spawn from `bin/gstack-gbrain-sync.ts` and
|
||||
* `bin/gstack-memory-ingest.ts` MUST go through `spawnGbrain` (or
|
||||
* `execGbrainJson`), and the invariant test
|
||||
* `test/gbrain-exec-invariant.test.ts` enforces this with a static-source
|
||||
* grep. The helper layer guarantees three properties:
|
||||
*
|
||||
* 1. **DATABASE_URL is seeded from gbrain's own config**, not from the
|
||||
* caller's `.env.local`. gbrain auto-loads `.env.local` via dotenv on
|
||||
* startup. When `/sync-gbrain` runs inside a Next.js / Prisma / Rails
|
||||
* project with its own `DATABASE_URL`, gbrain reads that one and not
|
||||
* its own `${GBRAIN_HOME:-$HOME/.gbrain}/config.json`. Auth fails;
|
||||
* code + memory stages crash; only brain-sync's git push survives.
|
||||
*
|
||||
* 2. **Bun-aware env passing.** Mutating `process.env.DATABASE_URL` does
|
||||
* NOT propagate to children of `child_process.spawnSync`/`spawn` in
|
||||
* Bun — the child gets the original startup env. So we cannot just
|
||||
* set process.env; we must thread an explicit `env:` dict to every
|
||||
* spawn. This is the central bug the helper exists to prevent
|
||||
* regressing on.
|
||||
*
|
||||
* 3. **`GBRAIN_HOME` honored consistently.** Other gstack helpers
|
||||
* (`detectEngineTier`) already honor `GBRAIN_HOME`. `buildGbrainEnv`
|
||||
* reads from `${GBRAIN_HOME:-$HOME/.gbrain}/config.json` so all
|
||||
* gstack-side gbrain calls agree on which config file matters.
|
||||
*
|
||||
* **Escape hatch:** `GSTACK_RESPECT_ENV_DATABASE_URL=1` returns the
|
||||
* caller's env unchanged. Use only when the brain intentionally lives in
|
||||
* the project's local DB (rare).
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
import { homedir } from "os";
|
||||
import { spawnSync, spawn, execFileSync, type SpawnSyncReturns, type ChildProcess, type SpawnOptions } from "child_process";
|
||||
|
||||
interface GbrainConfig {
|
||||
database_url?: string;
|
||||
}
|
||||
|
||||
export interface BuildGbrainEnvOptions {
|
||||
/**
|
||||
* Caller env to extend. Defaults to `process.env`. Tests inject a
|
||||
* synthetic env so the helper can be exercised without polluting the
|
||||
* real process env.
|
||||
*/
|
||||
baseEnv?: NodeJS.ProcessEnv;
|
||||
/**
|
||||
* When true, announce on stderr that we overrode the caller's
|
||||
* DATABASE_URL. Suppressed for the `--quiet` sync flow.
|
||||
*/
|
||||
announce?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an env dict with DATABASE_URL seeded from
|
||||
* `${GBRAIN_HOME:-$HOME/.gbrain}/config.json`. Returns the base env
|
||||
* unchanged when:
|
||||
* - `GSTACK_RESPECT_ENV_DATABASE_URL=1` (intentional opt-out),
|
||||
* - the config file is missing or unparseable,
|
||||
* - the config has no `database_url`,
|
||||
* - the caller already set DATABASE_URL to the same value.
|
||||
*
|
||||
* Always returns a fresh object — mutating the returned env never
|
||||
* affects the caller's env. Tests assert on effective values, not
|
||||
* object identity.
|
||||
*/
|
||||
export function buildGbrainEnv(opts: BuildGbrainEnvOptions = {}): NodeJS.ProcessEnv {
|
||||
const baseEnv = opts.baseEnv || process.env;
|
||||
const out: NodeJS.ProcessEnv = { ...baseEnv };
|
||||
if (baseEnv.GSTACK_RESPECT_ENV_DATABASE_URL === "1") return out;
|
||||
|
||||
const homeBase = baseEnv.HOME || homedir();
|
||||
const gbrainHome = baseEnv.GBRAIN_HOME || join(homeBase, ".gbrain");
|
||||
const configPath = join(gbrainHome, "config.json");
|
||||
if (!existsSync(configPath)) return out;
|
||||
|
||||
let cfg: GbrainConfig = {};
|
||||
try {
|
||||
cfg = JSON.parse(readFileSync(configPath, "utf-8")) as GbrainConfig;
|
||||
} catch {
|
||||
return out;
|
||||
}
|
||||
if (!cfg.database_url) return out;
|
||||
if (baseEnv.DATABASE_URL === cfg.database_url) return out;
|
||||
|
||||
const hadCaller = baseEnv.DATABASE_URL !== undefined;
|
||||
out.DATABASE_URL = cfg.database_url;
|
||||
if (opts.announce) {
|
||||
const note = hadCaller ? " (overrode value from caller env / .env.local)" : "";
|
||||
process.stderr.write(`[gbrain-exec] seeded DATABASE_URL from ${configPath}${note}\n`);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export interface SpawnGbrainOptions {
|
||||
/** Timeout in milliseconds. Defaults to 30s. */
|
||||
timeout?: number;
|
||||
/** Working directory for the child process. */
|
||||
cwd?: string;
|
||||
/** Stdio configuration. Defaults to capturing both stdout and stderr. */
|
||||
stdio?: "inherit" | "pipe" | "ignore" | Array<"inherit" | "pipe" | "ignore">;
|
||||
/**
|
||||
* Base env to extend before seeding DATABASE_URL. Defaults to
|
||||
* `process.env`. Tests inject a synthetic env so the spawn picks up a
|
||||
* gbrain shim on PATH and a fake `~/.gbrain/config.json`.
|
||||
*/
|
||||
baseEnv?: NodeJS.ProcessEnv;
|
||||
/** Whether to announce DATABASE_URL seeding on stderr. */
|
||||
announce?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn `gbrain <args>` with the seeded env. Returns the raw
|
||||
* `SpawnSyncReturns<string>` so callers can inspect `status`, `stdout`,
|
||||
* `stderr` exactly as they would with `spawnSync` directly.
|
||||
*/
|
||||
export function spawnGbrain(args: string[], opts: SpawnGbrainOptions = {}): SpawnSyncReturns<string> {
|
||||
return spawnSync("gbrain", args, {
|
||||
encoding: "utf-8",
|
||||
timeout: opts.timeout ?? 30_000,
|
||||
cwd: opts.cwd,
|
||||
stdio: opts.stdio || ["ignore", "pipe", "pipe"],
|
||||
env: buildGbrainEnv({ baseEnv: opts.baseEnv, announce: opts.announce }),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `gbrain <args>` and parse stdout as JSON. Returns `null` on
|
||||
* non-zero exit, parse failure, or timeout. Useful for `gbrain sources
|
||||
* list --json` and similar.
|
||||
*/
|
||||
export function execGbrainJson<T = unknown>(args: string[], opts: SpawnGbrainOptions = {}): T | null {
|
||||
const r = spawnGbrain(args, opts);
|
||||
if (r.status !== 0) return null;
|
||||
try {
|
||||
return JSON.parse(r.stdout || "null") as T;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Async streaming variant for callers that need to attach stdout/stderr
|
||||
* listeners (e.g., `gbrain import` in `gstack-memory-ingest.ts`). Always
|
||||
* injects the seeded env. Returns the raw `ChildProcess` so the caller
|
||||
* can wire up its own promise around exit/timeout/signal handling.
|
||||
*/
|
||||
export function spawnGbrainAsync(
|
||||
args: string[],
|
||||
opts: { stdio?: SpawnOptions["stdio"]; cwd?: string; baseEnv?: NodeJS.ProcessEnv } = {},
|
||||
): ChildProcess {
|
||||
return spawn("gbrain", args, {
|
||||
stdio: opts.stdio || ["ignore", "pipe", "pipe"],
|
||||
cwd: opts.cwd,
|
||||
env: buildGbrainEnv({ baseEnv: opts.baseEnv, announce: false }),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `gbrain <args>` via execFileSync. Throws on non-zero exit. Useful
|
||||
* for callers that want to surface gbrain's stderr as the error message.
|
||||
*/
|
||||
export function execGbrainText(args: string[], opts: SpawnGbrainOptions = {}): string {
|
||||
return execFileSync("gbrain", args, {
|
||||
encoding: "utf-8",
|
||||
timeout: opts.timeout ?? 30_000,
|
||||
cwd: opts.cwd,
|
||||
stdio: opts.stdio || ["ignore", "pipe", "pipe"],
|
||||
env: buildGbrainEnv({ baseEnv: opts.baseEnv, announce: opts.announce }),
|
||||
});
|
||||
}
|
||||
268
lib/gbrain-local-status.ts
Normal file
268
lib/gbrain-local-status.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
/**
|
||||
* gbrain-local-status — classify the local gbrain engine into 5 states.
|
||||
*
|
||||
* Shared between bin/gstack-gbrain-detect (preamble probe on every skill start)
|
||||
* and bin/gstack-gbrain-sync.ts (orchestrator SKIP-when-not-ok semantics).
|
||||
* Single source of truth: same probe, same classification, same cache.
|
||||
*
|
||||
* Per the split-engine plan (D2 + D8):
|
||||
* - Probe: `gbrain sources list --json`. Cheap (~80ms), actually hits the DB.
|
||||
* Uses the same stderr patterns as lib/gbrain-sources.ts:66-67.
|
||||
* - Cache: 60s TTL at ~/.gstack/.gbrain-local-status-cache.json, keyed on
|
||||
* {home, path_hash, gbrain_bin_path, gbrain_version, config_mtime}.
|
||||
* - --no-cache bypass: /setup-gbrain and /sync-gbrain pass it after any
|
||||
* state-mutating operation so the next read sees fresh status.
|
||||
*
|
||||
* No-cli → gbrain not on PATH.
|
||||
* Missing → CLI present, ~/.gbrain/config.json absent.
|
||||
* Broken-config → config exists but `gbrain sources list` fails with config parse error
|
||||
* (or any non-recognized error — defensive default per codex #8).
|
||||
* Broken-db → config exists, DB unreachable per stderr classification.
|
||||
* Ok → DB reachable, sources list returned valid JSON.
|
||||
*/
|
||||
|
||||
import { execFileSync } from "child_process";
|
||||
import {
|
||||
createHash,
|
||||
} from "crypto";
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readFileSync,
|
||||
renameSync,
|
||||
statSync,
|
||||
writeFileSync,
|
||||
} from "fs";
|
||||
import { homedir } from "os";
|
||||
import { dirname, join } from "path";
|
||||
|
||||
export type LocalEngineStatus =
|
||||
| "ok"
|
||||
| "no-cli"
|
||||
| "missing-config"
|
||||
| "broken-config"
|
||||
| "broken-db";
|
||||
|
||||
export interface ClassifyOptions {
|
||||
/** Bypass the 60s cache. Used after any state-mutating operation. */
|
||||
noCache?: boolean;
|
||||
/** Env override for the spawned `gbrain` (used by tests to point at a fake binary). */
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
schema_version: 1;
|
||||
status: LocalEngineStatus;
|
||||
cached_at: number;
|
||||
/** Cache invariants — entry is invalidated if any of these change between writes. */
|
||||
key: {
|
||||
home: string;
|
||||
path_hash: string;
|
||||
gbrain_bin_path: string;
|
||||
gbrain_version: string;
|
||||
config_mtime: number; // 0 when config absent
|
||||
config_size: number; // 0 when config absent
|
||||
};
|
||||
}
|
||||
|
||||
export const CACHE_TTL_MS = 60_000;
|
||||
export const PROBE_TIMEOUT_MS = 5_000;
|
||||
|
||||
/** Effective user home — respects HOME env override (used by tests). */
|
||||
function userHome(): string {
|
||||
return process.env.HOME || homedir();
|
||||
}
|
||||
|
||||
/** Cache path computed fresh on each call so tests can mutate GSTACK_HOME per case. */
|
||||
export function cacheFilePath(): string {
|
||||
return join(
|
||||
process.env.GSTACK_HOME || join(userHome(), ".gstack"),
|
||||
".gbrain-local-status-cache.json",
|
||||
);
|
||||
}
|
||||
|
||||
function gbrainConfigPath(): string {
|
||||
return join(userHome(), ".gbrain", "config.json");
|
||||
}
|
||||
|
||||
function hashPath(p: string): string {
|
||||
return createHash("sha256").update(p).digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the absolute path of `gbrain` on PATH. Returns null when missing.
|
||||
* Memoized per-process keyed on PATH so detect's call and the classifier's
|
||||
* call share one fork-exec (~200ms saved per skill preamble).
|
||||
*/
|
||||
const _gbrainBinCache = new Map<string, string | null>();
|
||||
export function resolveGbrainBin(env?: NodeJS.ProcessEnv): string | null {
|
||||
const e = env ?? process.env;
|
||||
const key = e.PATH || "";
|
||||
if (_gbrainBinCache.has(key)) return _gbrainBinCache.get(key)!;
|
||||
let result: string | null = null;
|
||||
try {
|
||||
execFileSync("gbrain", ["--version"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 2_000,
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
env: e,
|
||||
});
|
||||
result = "gbrain";
|
||||
} catch {
|
||||
result = null;
|
||||
}
|
||||
_gbrainBinCache.set(key, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Memoized per-process. */
|
||||
const _gbrainVersionCache = new Map<string, string>();
|
||||
export function readGbrainVersion(env?: NodeJS.ProcessEnv): string {
|
||||
const e = env ?? process.env;
|
||||
const key = `${e.PATH || ""}|${resolveGbrainBin(e) || ""}`;
|
||||
if (_gbrainVersionCache.has(key)) return _gbrainVersionCache.get(key)!;
|
||||
let result = "";
|
||||
try {
|
||||
const out = execFileSync("gbrain", ["--version"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 2_000,
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
env: e,
|
||||
});
|
||||
result = out.trim().split("\n")[0] || "";
|
||||
} catch {
|
||||
result = "";
|
||||
}
|
||||
_gbrainVersionCache.set(key, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function configFingerprint(): { mtime: number; size: number } {
|
||||
try {
|
||||
const st = statSync(gbrainConfigPath());
|
||||
return { mtime: Math.floor(st.mtimeMs), size: st.size };
|
||||
} catch {
|
||||
return { mtime: 0, size: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
function buildCacheKey(
|
||||
gbrainBin: string | null,
|
||||
gbrainVersion: string,
|
||||
env?: NodeJS.ProcessEnv,
|
||||
): CacheEntry["key"] {
|
||||
const e = env ?? process.env;
|
||||
const config = configFingerprint();
|
||||
return {
|
||||
home: e.HOME || "",
|
||||
path_hash: hashPath(e.PATH || ""),
|
||||
gbrain_bin_path: gbrainBin || "",
|
||||
gbrain_version: gbrainVersion,
|
||||
config_mtime: config.mtime,
|
||||
config_size: config.size,
|
||||
};
|
||||
}
|
||||
|
||||
function keysEqual(a: CacheEntry["key"], b: CacheEntry["key"]): boolean {
|
||||
return (
|
||||
a.home === b.home &&
|
||||
a.path_hash === b.path_hash &&
|
||||
a.gbrain_bin_path === b.gbrain_bin_path &&
|
||||
a.gbrain_version === b.gbrain_version &&
|
||||
a.config_mtime === b.config_mtime &&
|
||||
a.config_size === b.config_size
|
||||
);
|
||||
}
|
||||
|
||||
function readCache(key: CacheEntry["key"]): LocalEngineStatus | null {
|
||||
if (!existsSync(cacheFilePath())) return null;
|
||||
try {
|
||||
const raw = JSON.parse(readFileSync(cacheFilePath(), "utf-8")) as CacheEntry;
|
||||
if (raw.schema_version !== 1) return null;
|
||||
if (Date.now() - raw.cached_at > CACHE_TTL_MS) return null;
|
||||
if (!keysEqual(raw.key, key)) return null;
|
||||
return raw.status;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function writeCache(status: LocalEngineStatus, key: CacheEntry["key"]): void {
|
||||
const entry: CacheEntry = {
|
||||
schema_version: 1,
|
||||
status,
|
||||
cached_at: Date.now(),
|
||||
key,
|
||||
};
|
||||
try {
|
||||
mkdirSync(dirname(cacheFilePath()), { recursive: true });
|
||||
const tmp = cacheFilePath() + ".tmp." + process.pid;
|
||||
writeFileSync(tmp, JSON.stringify(entry, null, 2), "utf-8");
|
||||
renameSync(tmp, cacheFilePath());
|
||||
} catch {
|
||||
// Cache write failure is non-fatal — we re-probe next call.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe via `gbrain sources list --json`. Classify the outcome.
|
||||
*
|
||||
* Pattern strings ("Cannot connect to database", "config.json") are deliberately
|
||||
* the same strings used in lib/gbrain-sources.ts:66-67. If gbrain reworks its
|
||||
* error messages, classifier returns broken-config defensively (codex #8).
|
||||
*/
|
||||
function freshClassify(env?: NodeJS.ProcessEnv): LocalEngineStatus {
|
||||
// 1. CLI on PATH?
|
||||
const gbrainBin = resolveGbrainBin(env);
|
||||
if (!gbrainBin) return "no-cli";
|
||||
|
||||
// 2. Config file present?
|
||||
if (!existsSync(gbrainConfigPath())) return "missing-config";
|
||||
|
||||
// 3. Probe gbrain sources list.
|
||||
try {
|
||||
execFileSync("gbrain", ["sources", "list", "--json"], {
|
||||
encoding: "utf-8",
|
||||
timeout: PROBE_TIMEOUT_MS,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
env: env ?? process.env,
|
||||
});
|
||||
return "ok";
|
||||
} catch (err) {
|
||||
const e = err as NodeJS.ErrnoException & { stderr?: Buffer | string };
|
||||
const stderr = (e.stderr ? e.stderr.toString() : "") || "";
|
||||
|
||||
// ENOENT can happen if gbrain disappeared between resolveGbrainBin and now.
|
||||
if (e.code === "ENOENT") return "no-cli";
|
||||
|
||||
// Pattern match against gbrain's known error strings. Order matters:
|
||||
// "Cannot connect to database" is the more specific DB-unreachable signal.
|
||||
if (stderr.includes("Cannot connect to database")) return "broken-db";
|
||||
if (stderr.includes("config.json")) return "broken-config";
|
||||
|
||||
// Defensive default per codex #8: unrecognized failures classify as
|
||||
// broken-config so the user sees the raw stderr surfaced upstream.
|
||||
return "broken-config";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify the local gbrain engine status. Cached for 60s; bypassable.
|
||||
*
|
||||
* Returns one of 5 states. Never throws — failure modes are surfaced as states.
|
||||
*/
|
||||
export function localEngineStatus(opts: ClassifyOptions = {}): LocalEngineStatus {
|
||||
const env = opts.env ?? process.env;
|
||||
const gbrainBin = resolveGbrainBin(env);
|
||||
const gbrainVersion = gbrainBin ? readGbrainVersion(env) : "";
|
||||
const key = buildCacheKey(gbrainBin, gbrainVersion, env);
|
||||
|
||||
if (!opts.noCache) {
|
||||
const cached = readCache(key);
|
||||
if (cached) return cached;
|
||||
}
|
||||
|
||||
const fresh = freshClassify(env);
|
||||
writeCache(fresh, key);
|
||||
return fresh;
|
||||
}
|
||||
184
lib/gbrain-sources.ts
Normal file
184
lib/gbrain-sources.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* gbrain-sources — TypeScript helper for idempotent gbrain federated source registration.
|
||||
*
|
||||
* Mirrors the bash logic in bin/gstack-gbrain-source-wireup:204-310 but in a form
|
||||
* importable by other TS callers (currently bin/gstack-gbrain-sync.ts; future
|
||||
* callers welcome). gbrain has no `sources update` — drift recovery is
|
||||
* `sources remove` followed by `sources add`.
|
||||
*
|
||||
* Per /plan-eng-review D3 (DRY extraction).
|
||||
*/
|
||||
|
||||
import { execFileSync, spawnSync } from "child_process";
|
||||
import { withErrorContext } from "./gstack-memory-helpers";
|
||||
|
||||
export interface SourceState {
|
||||
/** "absent" — id not registered. "match" — id at expected path. "drift" — id at different path. */
|
||||
status: "absent" | "match" | "drift";
|
||||
/** Path gbrain has registered for this id. Only set when status !== "absent". */
|
||||
registered_path?: string;
|
||||
}
|
||||
|
||||
export interface EnsureResult {
|
||||
/** True if registration state changed (added or re-registered). False on no-op. */
|
||||
changed: boolean;
|
||||
/** Final source state after the call. */
|
||||
state: SourceState;
|
||||
}
|
||||
|
||||
export interface EnsureOptions {
|
||||
/** Pass --federated to `gbrain sources add`. Default false. */
|
||||
federated?: boolean;
|
||||
/** When status=drift, force a remove+add to update the registered path. Default true. */
|
||||
reregister_on_drift?: boolean;
|
||||
/**
|
||||
* Optional env override for the spawned `gbrain` calls. Production callers
|
||||
* leave this unset (inherit process.env). Tests pass a custom env to point
|
||||
* at a fake `gbrain` on PATH (Bun's execFileSync does not respect runtime
|
||||
* mutations of process.env.PATH unless env is passed explicitly).
|
||||
*/
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe the registration state of a source by id.
|
||||
*
|
||||
* Errors:
|
||||
* - "gbrain CLI not on PATH" (exit 127) — caller should treat as absent + skip stage.
|
||||
* - "gbrain DB connection failed" — caller should treat as absent + skip stage.
|
||||
* - JSON parse error — propagate via withErrorContext caller.
|
||||
*/
|
||||
export function probeSource(id: string, env?: NodeJS.ProcessEnv): SourceState {
|
||||
let stdout: string;
|
||||
try {
|
||||
stdout = execFileSync("gbrain", ["sources", "list", "--json"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 30_000,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
env,
|
||||
});
|
||||
} catch (err) {
|
||||
const e = err as NodeJS.ErrnoException & { stderr?: Buffer };
|
||||
const stderr = e.stderr?.toString() || "";
|
||||
if (e.code === "ENOENT" || stderr.includes("command not found")) {
|
||||
throw new Error("gbrain CLI not on PATH");
|
||||
}
|
||||
if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) {
|
||||
throw new Error("gbrain not configured (run /setup-gbrain)");
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
let parsed: { sources?: Array<{ id?: string; local_path?: string }> };
|
||||
try {
|
||||
parsed = JSON.parse(stdout);
|
||||
} catch (err) {
|
||||
throw new Error(`gbrain sources list returned non-JSON output: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
const sources = parsed.sources || [];
|
||||
const match = sources.find((s) => s.id === id);
|
||||
if (!match) return { status: "absent" };
|
||||
return {
|
||||
status: "match",
|
||||
registered_path: match.local_path,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure source <id> is registered at <path>. Idempotent.
|
||||
*
|
||||
* Behavior:
|
||||
* - status=absent → `gbrain sources add <id> --path <path> [--federated]`, returns changed=true.
|
||||
* - status=match + same path → no-op, returns changed=false.
|
||||
* - status=match + different path → `sources remove` + `sources add`, returns changed=true.
|
||||
* (Skip when reregister_on_drift=false; returns changed=false.)
|
||||
*
|
||||
* Caller is responsible for catching errors. The function uses withErrorContext for
|
||||
* forensic logging to ~/.gstack/.gbrain-errors.jsonl.
|
||||
*/
|
||||
export async function ensureSourceRegistered(
|
||||
id: string,
|
||||
path: string,
|
||||
options: EnsureOptions = {}
|
||||
): Promise<EnsureResult> {
|
||||
const federated = options.federated ?? false;
|
||||
const reregister_on_drift = options.reregister_on_drift ?? true;
|
||||
const env = options.env;
|
||||
|
||||
return withErrorContext(`ensureSourceRegistered:${id}`, () => {
|
||||
const probed = probeSource(id, env);
|
||||
|
||||
// Disambiguate match-but-different-path
|
||||
let state: SourceState = probed;
|
||||
if (probed.status === "match" && probed.registered_path !== path) {
|
||||
state = { status: "drift", registered_path: probed.registered_path };
|
||||
}
|
||||
|
||||
if (state.status === "match") {
|
||||
return { changed: false, state };
|
||||
}
|
||||
|
||||
if (state.status === "drift" && !reregister_on_drift) {
|
||||
return { changed: false, state };
|
||||
}
|
||||
|
||||
// For drift, remove first.
|
||||
if (state.status === "drift") {
|
||||
const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 30_000,
|
||||
env,
|
||||
});
|
||||
if (rm.status !== 0) {
|
||||
throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Add.
|
||||
const addArgs = ["sources", "add", id, "--path", path];
|
||||
if (federated) addArgs.push("--federated");
|
||||
const add = spawnSync("gbrain", addArgs, {
|
||||
encoding: "utf-8",
|
||||
timeout: 30_000,
|
||||
env,
|
||||
});
|
||||
if (add.status !== 0) {
|
||||
throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`);
|
||||
}
|
||||
|
||||
return {
|
||||
changed: true,
|
||||
state: { status: "match", registered_path: path },
|
||||
};
|
||||
}, "gbrain-sources");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get page_count for a registered source. Returns null if source is absent or if
|
||||
* page_count is missing/invalid in the JSON. Used by the verdict block + preamble
|
||||
* variant selection.
|
||||
*/
|
||||
export function sourcePageCount(id: string, env?: NodeJS.ProcessEnv): number | null {
|
||||
let stdout: string;
|
||||
try {
|
||||
stdout = execFileSync("gbrain", ["sources", "list", "--json"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 30_000,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
env,
|
||||
});
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(stdout) as { sources?: Array<{ id?: string; page_count?: number }> };
|
||||
const match = (parsed.sources || []).find((s) => s.id === id);
|
||||
if (!match) return null;
|
||||
if (typeof match.page_count !== "number") return null;
|
||||
return match.page_count;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
472
lib/gstack-memory-helpers.ts
Normal file
472
lib/gstack-memory-helpers.ts
Normal file
@@ -0,0 +1,472 @@
|
||||
/**
|
||||
* gstack-memory-helpers — shared helpers for the V1 memory ingest + retrieval pipeline.
|
||||
*
|
||||
* Imported by:
|
||||
* - bin/gstack-memory-ingest.ts (Lane A)
|
||||
* - bin/gstack-gbrain-sync.ts (Lane B)
|
||||
* - bin/gstack-brain-context-load.ts (Lane C)
|
||||
* - scripts/gen-skill-docs.ts (manifest validation)
|
||||
*
|
||||
* Design refs in the plan:
|
||||
* §"Eng review additions" — DRY refactor (Section 1A)
|
||||
* §"V1 final scope clarification" — schema_version: 1 standardization (Section 2A)
|
||||
* ED1 — engine-tier cache lives in ~/.gstack/.gbrain-engine-cache.json (60s TTL)
|
||||
*
|
||||
* NOTE: secretScanFile() currently shells out to `gitleaks` from PATH; the vendored
|
||||
* binary install is part of Lane E (setup-gbrain). When gitleaks is missing, the
|
||||
* helper warns once and returns an empty findings list — fail-safe defaults.
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync, mkdirSync, statSync, appendFileSync } from "fs";
|
||||
import { dirname, join } from "path";
|
||||
import { execSync, execFileSync } from "child_process";
|
||||
import { homedir } from "os";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface SecretFinding {
|
||||
rule_id: string;
|
||||
description: string;
|
||||
line: number;
|
||||
redacted_match: string;
|
||||
}
|
||||
|
||||
export interface SecretScanResult {
|
||||
scanned: boolean;
|
||||
findings: SecretFinding[];
|
||||
scanner: "gitleaks" | "missing" | "error";
|
||||
}
|
||||
|
||||
export type EngineTier = "pglite" | "supabase" | "unknown";
|
||||
|
||||
export interface EngineDetect {
|
||||
engine: EngineTier;
|
||||
supabase_url?: string;
|
||||
detected_at: number;
|
||||
schema_version: 1;
|
||||
}
|
||||
|
||||
export interface GbrainManifestQuery {
|
||||
id: string;
|
||||
kind: "vector" | "list" | "filesystem";
|
||||
render_as: string;
|
||||
// kind=vector
|
||||
query?: string;
|
||||
// kind=list
|
||||
filter?: Record<string, unknown>;
|
||||
sort?: string;
|
||||
// kind=filesystem
|
||||
glob?: string;
|
||||
tail?: number;
|
||||
// common
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface GbrainManifest {
|
||||
schema: number; // gbrain.schema in frontmatter; V1 = 1
|
||||
context_queries: GbrainManifestQuery[];
|
||||
}
|
||||
|
||||
export interface ErrorContextEntry {
|
||||
ts: string;
|
||||
op: string;
|
||||
duration_ms: number;
|
||||
outcome: "ok" | "error";
|
||||
error?: string;
|
||||
schema_version: 1;
|
||||
last_writer: string;
|
||||
}
|
||||
|
||||
// ── Public: canonicalizeRemote ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Normalize a git remote URL to a canonical form: `host/org/repo` (no scheme,
|
||||
* no trailing `.git`). Used as the dedup key for cross-Mac transcript routing
|
||||
* (per ED1 — gbrain-side session_id dedup uses repo as a tag).
|
||||
*
|
||||
* Examples:
|
||||
* https://github.com/garrytan/gstack.git → github.com/garrytan/gstack
|
||||
* git@github.com:garrytan/gstack.git → github.com/garrytan/gstack
|
||||
* ssh://git@gitlab.com/foo/bar → gitlab.com/foo/bar
|
||||
* (empty / null) → ""
|
||||
*/
|
||||
export function canonicalizeRemote(url: string | null | undefined): string {
|
||||
if (!url) return "";
|
||||
let s = url.trim();
|
||||
if (!s) return "";
|
||||
// strip surrounding quotes that some configs add
|
||||
s = s.replace(/^['"]|['"]$/g, "");
|
||||
// git@host:path/repo → host/path/repo
|
||||
const scpMatch = s.match(/^[^@\s]+@([^:]+):(.+)$/);
|
||||
if (scpMatch) {
|
||||
s = `${scpMatch[1]}/${scpMatch[2]}`;
|
||||
} else {
|
||||
// strip scheme (https://, ssh://, git://, http://)
|
||||
s = s.replace(/^[a-z][a-z0-9+.-]*:\/\//i, "");
|
||||
// strip user@ prefix on URL-style remotes
|
||||
s = s.replace(/^[^@\/]+@/, "");
|
||||
}
|
||||
// strip trailing .git
|
||||
s = s.replace(/\.git$/i, "");
|
||||
// strip trailing slash
|
||||
s = s.replace(/\/+$/, "");
|
||||
// collapse multiple slashes (after path normalization)
|
||||
s = s.replace(/\/{2,}/g, "/");
|
||||
return s.toLowerCase();
|
||||
}
|
||||
|
||||
// ── Public: secretScanFile (gitleaks wrapper) ─────────────────────────────
|
||||
|
||||
let _gitleaksAvailability: boolean | null = null;
|
||||
|
||||
function gitleaksAvailable(): boolean {
|
||||
if (_gitleaksAvailability !== null) return _gitleaksAvailability;
|
||||
try {
|
||||
execSync("command -v gitleaks", { stdio: "ignore" });
|
||||
_gitleaksAvailability = true;
|
||||
} catch {
|
||||
_gitleaksAvailability = false;
|
||||
// Only warn once per process — Lane E will vendor the binary.
|
||||
process.stderr.write(
|
||||
"[gstack-memory-helpers] gitleaks not in PATH; secret scanning disabled. " +
|
||||
"Run /setup-gbrain to install (or `brew install gitleaks`).\n"
|
||||
);
|
||||
}
|
||||
return _gitleaksAvailability;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan a file for embedded secrets using gitleaks. Returns findings list
|
||||
* (empty if clean). When gitleaks is not in PATH, returns scanned=false with
|
||||
* scanner="missing" — caller decides whether to skip the file or proceed.
|
||||
*
|
||||
* Per D19: gitleaks runs at ingest time before any put_page / put_file write.
|
||||
* Replaces the inadequate regex scanner in bin/gstack-brain-sync (which only
|
||||
* applies to staged git diffs).
|
||||
*/
|
||||
export function secretScanFile(path: string): SecretScanResult {
|
||||
if (!existsSync(path)) {
|
||||
return { scanned: false, findings: [], scanner: "error" };
|
||||
}
|
||||
if (!gitleaksAvailable()) {
|
||||
return { scanned: false, findings: [], scanner: "missing" };
|
||||
}
|
||||
try {
|
||||
// gitleaks detect --no-git --source <path> --report-format json --report-path -
|
||||
// Returns 0 on clean, 1 on findings, 126/127 on bad invocation.
|
||||
const out = execFileSync(
|
||||
"gitleaks",
|
||||
["detect", "--no-git", "--source", path, "--report-format", "json", "--report-path", "/dev/stdout", "--exit-code", "0"],
|
||||
{ encoding: "utf-8", maxBuffer: 16 * 1024 * 1024 }
|
||||
);
|
||||
const trimmed = out.trim();
|
||||
if (!trimmed) return { scanned: true, findings: [], scanner: "gitleaks" };
|
||||
const parsed = JSON.parse(trimmed) as Array<{
|
||||
RuleID: string;
|
||||
Description: string;
|
||||
StartLine: number;
|
||||
Match?: string;
|
||||
Secret?: string;
|
||||
}>;
|
||||
const findings: SecretFinding[] = (parsed || []).map((f) => ({
|
||||
rule_id: f.RuleID || "unknown",
|
||||
description: f.Description || "",
|
||||
line: f.StartLine || 0,
|
||||
redacted_match: redactMatch(f.Secret || f.Match || ""),
|
||||
}));
|
||||
return { scanned: true, findings, scanner: "gitleaks" };
|
||||
} catch (err) {
|
||||
return {
|
||||
scanned: false,
|
||||
findings: [],
|
||||
scanner: "error",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function redactMatch(s: string): string {
|
||||
if (!s) return "";
|
||||
if (s.length <= 8) return "[REDACTED]";
|
||||
return `${s.slice(0, 4)}...${s.slice(-4)}`;
|
||||
}
|
||||
|
||||
// ── Public: detectEngineTier (cached) ─────────────────────────────────────
|
||||
|
||||
const ENGINE_CACHE_TTL_MS = 60 * 1000;
|
||||
|
||||
function gstackHome(): string {
|
||||
return process.env.GSTACK_HOME || join(homedir(), ".gstack");
|
||||
}
|
||||
|
||||
function engineCachePath(): string {
|
||||
return join(gstackHome(), ".gbrain-engine-cache.json");
|
||||
}
|
||||
|
||||
function errorLogPath(): string {
|
||||
return join(gstackHome(), ".gbrain-errors.jsonl");
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect which gbrain engine is active (PGLite vs Supabase) and cache the
|
||||
* answer for 60s in ~/.gstack/.gbrain-engine-cache.json. Caching avoids
|
||||
* fork+exec'ing `gbrain doctor --json` on every skill start.
|
||||
*
|
||||
* Per ED1 (state files local-only): this cache is gitignored from the brain
|
||||
* repo. Per Section 2A: schema_version: 1 + last_writer field for forensic
|
||||
* tracing.
|
||||
*/
|
||||
export function detectEngineTier(): EngineDetect {
|
||||
// Try cache first
|
||||
if (existsSync(engineCachePath())) {
|
||||
try {
|
||||
const stat = statSync(engineCachePath());
|
||||
const ageMs = Date.now() - stat.mtimeMs;
|
||||
if (ageMs < ENGINE_CACHE_TTL_MS) {
|
||||
const cached = JSON.parse(readFileSync(engineCachePath(), "utf-8")) as EngineDetect;
|
||||
if (cached.schema_version === 1) return cached;
|
||||
}
|
||||
} catch {
|
||||
// Cache corrupt; fall through to fresh detect.
|
||||
}
|
||||
}
|
||||
|
||||
const fresh = freshDetectEngineTier();
|
||||
try {
|
||||
mkdirSync(dirname(engineCachePath()), { recursive: true });
|
||||
writeFileSync(
|
||||
engineCachePath(),
|
||||
JSON.stringify({ ...fresh, last_writer: "gstack-memory-helpers.detectEngineTier" }, null, 2),
|
||||
"utf-8"
|
||||
);
|
||||
} catch {
|
||||
// Cache write failure is non-fatal.
|
||||
}
|
||||
return fresh;
|
||||
}
|
||||
|
||||
// Returns gbrain's config.json path, honoring GBRAIN_HOME env var with a
|
||||
// fallback to ~/.gbrain. gbrain >=0.25 dropped the top-level `engine` field
|
||||
// from doctor output, so this file is the only reliable source for engine
|
||||
// detection on that version. See #1415.
|
||||
function gbrainConfigPath(): string {
|
||||
const root = process.env.GBRAIN_HOME || join(homedir(), ".gbrain");
|
||||
return join(root, "config.json");
|
||||
}
|
||||
|
||||
// Best-effort JSONL append to ~/.gstack/.gbrain-errors.jsonl. Never throws.
|
||||
function logGbrainError(kind: string, detail: string): void {
|
||||
try {
|
||||
const path = errorLogPath();
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
appendFileSync(
|
||||
path,
|
||||
JSON.stringify({ ts: new Date().toISOString(), kind, detail: detail.slice(0, 500) }) + "\n",
|
||||
"utf-8"
|
||||
);
|
||||
} catch { /* logging is best-effort */ }
|
||||
}
|
||||
|
||||
function freshDetectEngineTier(): EngineDetect {
|
||||
const now = Date.now();
|
||||
let parsed: Record<string, unknown> | null = null;
|
||||
|
||||
// execFileSync (not execSync) avoids shell redirection — portable to
|
||||
// environments where `2>/dev/null` is bash-specific. The stdio array
|
||||
// suppresses stderr without invoking a shell.
|
||||
try {
|
||||
const out = execFileSync("gbrain", ["doctor", "--json", "--fast"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 5000,
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
});
|
||||
parsed = JSON.parse(out);
|
||||
} catch (err: unknown) {
|
||||
// execFileSync throws on non-zero exit; stdout is still on the error
|
||||
// object. gbrain doctor exits 1 whenever health_score < 100, which is
|
||||
// essentially always on fresh installs (resolver_health warnings are
|
||||
// normal). Recover stdout and re-parse. See #1415.
|
||||
try {
|
||||
const stdout = (err as { stdout?: Buffer | string })?.stdout ?? "";
|
||||
const stdoutStr = typeof stdout === "string" ? stdout : stdout.toString("utf-8");
|
||||
if (stdoutStr) parsed = JSON.parse(stdoutStr);
|
||||
} catch (parseErr) {
|
||||
logGbrainError("doctor_parse_failure", String(parseErr));
|
||||
}
|
||||
}
|
||||
|
||||
let engine: EngineTier =
|
||||
parsed?.engine === "supabase" ? "supabase" :
|
||||
parsed?.engine === "pglite" ? "pglite" : "unknown";
|
||||
|
||||
// gbrain >=0.25 ships schema_version:2 doctor output which dropped the
|
||||
// top-level `engine` field. Fall back to gbrain's config.json (respects
|
||||
// GBRAIN_HOME). "supabase" here means "remote postgres" — gbrain config
|
||||
// uses engine:"postgres" for real Supabase AND any other remote postgres
|
||||
// (e.g. local-postgres-for-testing). Downstream sync code treats them the
|
||||
// same, so the label compression is intentional.
|
||||
if (engine === "unknown") {
|
||||
try {
|
||||
const cfg = JSON.parse(readFileSync(gbrainConfigPath(), "utf-8"));
|
||||
if (cfg?.engine === "pglite") engine = "pglite";
|
||||
else if (cfg?.engine === "postgres" || cfg?.database_url) engine = "supabase";
|
||||
} catch (cfgErr) {
|
||||
logGbrainError("config_read_failure", String(cfgErr));
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
engine,
|
||||
supabase_url: parsed?.supabase_url as string | undefined,
|
||||
detected_at: now,
|
||||
schema_version: 1,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Public: parseSkillManifest ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parse the `gbrain:` section out of a SKILL.md.tmpl frontmatter block.
|
||||
* Returns null if no manifest is declared OR if the file has no frontmatter.
|
||||
*
|
||||
* Schema validation (full kind/required-fields check) lives in
|
||||
* scripts/gen-skill-docs.ts and runs at generation time. This parser is the
|
||||
* runtime read path used by gstack-brain-context-load; it tolerates extra
|
||||
* fields and relies on validation having already happened upstream.
|
||||
*/
|
||||
export function parseSkillManifest(skillFilePath: string): GbrainManifest | null {
|
||||
if (!existsSync(skillFilePath)) return null;
|
||||
const content = readFileSync(skillFilePath, "utf-8");
|
||||
const frontmatter = extractFrontmatter(content);
|
||||
if (!frontmatter) return null;
|
||||
const gbrain = extractGbrainBlock(frontmatter);
|
||||
if (!gbrain) return null;
|
||||
return gbrain;
|
||||
}
|
||||
|
||||
function extractFrontmatter(content: string): string | null {
|
||||
// Supports both `---\n...\n---` (YAML) and `+++\n...\n+++` (TOML, rare).
|
||||
const yamlMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n/);
|
||||
if (yamlMatch) return yamlMatch[1];
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractGbrainBlock(frontmatter: string): GbrainManifest | null {
|
||||
// Naive YAML extraction — finds the `gbrain:` key and parses its sub-tree.
|
||||
// Real YAML parsing avoided to keep zero-deps; gen-skill-docs validates the
|
||||
// shape strictly at build time.
|
||||
const lines = frontmatter.split("\n");
|
||||
const start = lines.findIndex((l) => /^gbrain\s*:/.test(l));
|
||||
if (start === -1) return null;
|
||||
|
||||
// Collect indented lines under `gbrain:` until next top-level key or EOF
|
||||
const block: string[] = [];
|
||||
for (let i = start + 1; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (/^[A-Za-z_][A-Za-z0-9_-]*\s*:/.test(line)) break; // next top-level key
|
||||
block.push(line);
|
||||
}
|
||||
|
||||
const text = block.join("\n");
|
||||
// Extract schema number
|
||||
const schemaMatch = text.match(/\n\s*schema\s*:\s*(\d+)/);
|
||||
const schema = schemaMatch ? parseInt(schemaMatch[1], 10) : 1;
|
||||
|
||||
// Extract context_queries items
|
||||
const queries: GbrainManifestQuery[] = [];
|
||||
const cqMatch = text.match(/\n\s*context_queries\s*:\s*\n([\s\S]+)/);
|
||||
if (cqMatch) {
|
||||
const cqText = cqMatch[1];
|
||||
// Split using a positive lookahead so each chunk begins with the list-item dash.
|
||||
// Pattern: line starting with 4-6 spaces + "-" + whitespace.
|
||||
const rawItems = cqText.split(/(?=^[ ]{4,6}-\s)/m);
|
||||
const items = rawItems.filter((s) => /^[ ]{4,6}-\s/.test(s));
|
||||
for (const item of items) {
|
||||
const q: Partial<GbrainManifestQuery> = {};
|
||||
// Strip the leading list-item marker so id/kind/etc. regexes can use line-start.
|
||||
const body = item.replace(/^[ ]{4,6}-\s+/, " ");
|
||||
const idM = body.match(/(?:^|\n)\s*id\s*:\s*([^\n]+)/);
|
||||
const kindM = body.match(/(?:^|\n)\s*kind\s*:\s*([^\n]+)/);
|
||||
const renderM = body.match(/(?:^|\n)\s*render_as\s*:\s*"?([^"\n]+?)"?\s*$/m);
|
||||
const queryM = body.match(/(?:^|\n)\s*query\s*:\s*"?([^"\n]+?)"?\s*$/m);
|
||||
const limitM = body.match(/(?:^|\n)\s*limit\s*:\s*(\d+)/);
|
||||
const globM = body.match(/(?:^|\n)\s*glob\s*:\s*"?([^"\n]+?)"?\s*$/m);
|
||||
const sortM = body.match(/(?:^|\n)\s*sort\s*:\s*([^\n]+)/);
|
||||
const tailM = body.match(/(?:^|\n)\s*tail\s*:\s*(\d+)/);
|
||||
|
||||
if (idM) q.id = idM[1].trim();
|
||||
if (kindM) {
|
||||
const k = kindM[1].trim();
|
||||
if (k === "vector" || k === "list" || k === "filesystem") q.kind = k;
|
||||
}
|
||||
if (renderM) q.render_as = renderM[1].trim();
|
||||
if (queryM) q.query = queryM[1].trim();
|
||||
if (limitM) q.limit = parseInt(limitM[1], 10);
|
||||
if (globM) q.glob = globM[1].trim();
|
||||
if (sortM) q.sort = sortM[1].trim();
|
||||
if (tailM) q.tail = parseInt(tailM[1], 10);
|
||||
|
||||
if (q.id && q.kind && q.render_as) {
|
||||
queries.push(q as GbrainManifestQuery);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { schema, context_queries: queries };
|
||||
}
|
||||
|
||||
// ── Public: withErrorContext ──────────────────────────────────────────────
|
||||
|
||||
const ERROR_LOG_PATH = join(gstackHome(), ".gbrain-errors.jsonl");
|
||||
|
||||
/**
|
||||
* Wrap an op with structured error logging. Logs success/failure + duration
|
||||
* to ~/.gstack/.gbrain-errors.jsonl for forensic debugging. Replaces ad-hoc
|
||||
* try/catch sites across the three Bun helpers (Section 2B).
|
||||
*
|
||||
* On error: the error is RE-THROWN after logging — caller still owns flow.
|
||||
*/
|
||||
export async function withErrorContext<T>(
|
||||
op: string,
|
||||
fn: () => T | Promise<T>,
|
||||
caller: string = "unknown"
|
||||
): Promise<T> {
|
||||
const t0 = Date.now();
|
||||
try {
|
||||
const result = await fn();
|
||||
logErrorContext({
|
||||
ts: new Date().toISOString(),
|
||||
op,
|
||||
duration_ms: Date.now() - t0,
|
||||
outcome: "ok",
|
||||
schema_version: 1,
|
||||
last_writer: caller,
|
||||
});
|
||||
return result;
|
||||
} catch (err) {
|
||||
logErrorContext({
|
||||
ts: new Date().toISOString(),
|
||||
op,
|
||||
duration_ms: Date.now() - t0,
|
||||
outcome: "error",
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
schema_version: 1,
|
||||
last_writer: caller,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
function logErrorContext(entry: ErrorContextEntry): void {
|
||||
try {
|
||||
const path = errorLogPath();
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
appendFileSync(path, JSON.stringify(entry) + "\n", "utf-8");
|
||||
} catch {
|
||||
// Logging failure is non-fatal — never block the op.
|
||||
}
|
||||
}
|
||||
|
||||
// Test-only export for resetting the gitleaks availability cache between tests.
|
||||
export function _resetGitleaksAvailabilityCache(): void {
|
||||
_gitleaksAvailability = null;
|
||||
}
|
||||
307
lib/worktree.ts
Normal file
307
lib/worktree.ts
Normal file
@@ -0,0 +1,307 @@
|
||||
/**
|
||||
* Git worktree manager for isolated test execution with change harvesting.
|
||||
*
|
||||
* Creates git worktrees for test suites that need real repo context,
|
||||
* harvests any changes the test agent makes as patches, and provides
|
||||
* deduplication across runs.
|
||||
*
|
||||
* Reusable platform module — future /batch or /codex challenge skills
|
||||
* can import this directly.
|
||||
*/
|
||||
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as crypto from 'crypto';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
// --- Interfaces ---
|
||||
|
||||
export interface WorktreeInfo {
|
||||
path: string;
|
||||
testName: string;
|
||||
originalSha: string;
|
||||
createdAt: number;
|
||||
}
|
||||
|
||||
export interface HarvestResult {
|
||||
testName: string;
|
||||
worktreePath: string;
|
||||
diffStat: string;
|
||||
patchPath: string;
|
||||
changedFiles: string[];
|
||||
isDuplicate: boolean;
|
||||
}
|
||||
|
||||
// --- Utility ---
|
||||
|
||||
/** Recursive directory copy (pure TypeScript, no external deps). */
|
||||
function copyDirSync(src: string, dest: string): void {
|
||||
fs.mkdirSync(dest, { recursive: true });
|
||||
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
|
||||
// Skip symlinks to avoid infinite recursion (e.g., .claude/skills/gstack → repo root)
|
||||
if (entry.isSymbolicLink()) continue;
|
||||
const srcPath = path.join(src, entry.name);
|
||||
const destPath = path.join(dest, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
copyDirSync(srcPath, destPath);
|
||||
} else {
|
||||
fs.copyFileSync(srcPath, destPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Run a git command and return stdout. Throws on failure unless tolerateFailure is set. */
|
||||
function git(args: string[], cwd: string, tolerateFailure = false): string {
|
||||
const result = spawnSync('git', args, { cwd, stdio: 'pipe', timeout: 30_000 });
|
||||
const stdout = result.stdout?.toString().trim() ?? '';
|
||||
const stderr = result.stderr?.toString().trim() ?? '';
|
||||
if (result.status !== 0 && !tolerateFailure) {
|
||||
throw new Error(`git ${args.join(' ')} failed (exit ${result.status}): ${stderr || stdout}`);
|
||||
}
|
||||
return stdout;
|
||||
}
|
||||
|
||||
// --- Dedup index ---
|
||||
|
||||
interface DedupIndex {
|
||||
hashes: Record<string, string>; // hash → first-seen runId
|
||||
}
|
||||
|
||||
function getDedupPath(): string {
|
||||
return path.join(os.homedir(), '.gstack-dev', 'harvests', 'dedup.json');
|
||||
}
|
||||
|
||||
function loadDedupIndex(): DedupIndex {
|
||||
try {
|
||||
const raw = fs.readFileSync(getDedupPath(), 'utf-8');
|
||||
return JSON.parse(raw);
|
||||
} catch {
|
||||
return { hashes: {} };
|
||||
}
|
||||
}
|
||||
|
||||
function saveDedupIndex(index: DedupIndex): void {
|
||||
const dir = path.dirname(getDedupPath());
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
const tmp = getDedupPath() + '.tmp';
|
||||
fs.writeFileSync(tmp, JSON.stringify(index, null, 2));
|
||||
fs.renameSync(tmp, getDedupPath());
|
||||
}
|
||||
|
||||
// --- WorktreeManager ---
|
||||
|
||||
export class WorktreeManager {
|
||||
private repoRoot: string;
|
||||
private runId: string;
|
||||
private active: Map<string, WorktreeInfo> = new Map();
|
||||
private harvestResults: HarvestResult[] = [];
|
||||
|
||||
constructor(repoRoot?: string) {
|
||||
if (repoRoot) {
|
||||
this.repoRoot = repoRoot;
|
||||
} else {
|
||||
this.repoRoot = git(['rev-parse', '--show-toplevel'], process.cwd());
|
||||
}
|
||||
this.runId = crypto.randomUUID();
|
||||
|
||||
// Register cleanup on process exit
|
||||
process.on('exit', () => {
|
||||
this.cleanupAll();
|
||||
});
|
||||
}
|
||||
|
||||
/** Create an isolated worktree. Returns the worktree path. Throws on failure. */
|
||||
create(testName: string): string {
|
||||
const originalSha = git(['rev-parse', 'HEAD'], this.repoRoot);
|
||||
|
||||
const worktreeBase = path.join(this.repoRoot, '.gstack-worktrees', this.runId);
|
||||
fs.mkdirSync(worktreeBase, { recursive: true });
|
||||
|
||||
const worktreePath = path.join(worktreeBase, testName);
|
||||
|
||||
// Create detached worktree at current HEAD
|
||||
git(['worktree', 'add', '--detach', worktreePath, 'HEAD'], this.repoRoot);
|
||||
|
||||
// Copy gitignored build artifacts that tests need (config-driven)
|
||||
const { getExternalHosts } = require('../hosts/index');
|
||||
for (const hostConfig of getExternalHosts()) {
|
||||
const hostSrc = path.join(this.repoRoot, hostConfig.hostSubdir);
|
||||
if (fs.existsSync(hostSrc)) {
|
||||
copyDirSync(hostSrc, path.join(worktreePath, hostConfig.hostSubdir));
|
||||
}
|
||||
}
|
||||
|
||||
const browseDist = path.join(this.repoRoot, 'browse', 'dist');
|
||||
if (fs.existsSync(browseDist)) {
|
||||
copyDirSync(browseDist, path.join(worktreePath, 'browse', 'dist'));
|
||||
}
|
||||
|
||||
const info: WorktreeInfo = {
|
||||
path: worktreePath,
|
||||
testName,
|
||||
originalSha,
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
this.active.set(testName, info);
|
||||
|
||||
return worktreePath;
|
||||
}
|
||||
|
||||
/** Harvest changes from a worktree. Returns null if clean or on error. */
|
||||
harvest(testName: string): HarvestResult | null {
|
||||
const info = this.active.get(testName);
|
||||
if (!info) return null;
|
||||
|
||||
try {
|
||||
// Check if worktree directory still exists (agent may have deleted it)
|
||||
if (!fs.existsSync(info.path)) {
|
||||
process.stderr.write(` HARVEST [${testName}]: worktree dir deleted, skipping\n`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Stage everything including untracked files
|
||||
git(['-C', info.path, 'add', '-A'], info.path, true);
|
||||
|
||||
// Get diff against original SHA (captures both committed and uncommitted changes)
|
||||
const patch = git(['-C', info.path, 'diff', info.originalSha, '--cached'], info.path, true);
|
||||
|
||||
if (!patch) return null;
|
||||
|
||||
// Get diff stat for human-readable output
|
||||
const diffStat = git(['-C', info.path, 'diff', info.originalSha, '--cached', '--stat'], info.path, true);
|
||||
|
||||
// Get changed file names
|
||||
const nameOnly = git(['-C', info.path, 'diff', info.originalSha, '--cached', '--name-only'], info.path, true);
|
||||
const changedFiles = nameOnly.split('\n').filter(Boolean);
|
||||
|
||||
// Dedup check
|
||||
const hash = crypto.createHash('sha256').update(patch).digest('hex');
|
||||
const dedupIndex = loadDedupIndex();
|
||||
const isDuplicate = hash in dedupIndex.hashes;
|
||||
|
||||
let patchPath = '';
|
||||
|
||||
if (!isDuplicate) {
|
||||
// Save patch
|
||||
const harvestDir = path.join(os.homedir(), '.gstack-dev', 'harvests', this.runId);
|
||||
fs.mkdirSync(harvestDir, { recursive: true });
|
||||
patchPath = path.join(harvestDir, `${testName}.patch`);
|
||||
fs.writeFileSync(patchPath, patch);
|
||||
|
||||
// Update dedup index
|
||||
dedupIndex.hashes[hash] = this.runId;
|
||||
saveDedupIndex(dedupIndex);
|
||||
}
|
||||
|
||||
const result: HarvestResult = {
|
||||
testName,
|
||||
worktreePath: info.path,
|
||||
diffStat,
|
||||
patchPath,
|
||||
changedFiles,
|
||||
isDuplicate,
|
||||
};
|
||||
|
||||
this.harvestResults.push(result);
|
||||
return result;
|
||||
} catch (err) {
|
||||
process.stderr.write(` HARVEST [${testName}]: error — ${err}\n`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Remove a worktree. Non-fatal on error. */
|
||||
cleanup(testName: string): void {
|
||||
const info = this.active.get(testName);
|
||||
if (!info) return;
|
||||
|
||||
try {
|
||||
git(['worktree', 'remove', '--force', info.path], this.repoRoot, true);
|
||||
} catch {
|
||||
// Force remove the directory if git worktree remove fails
|
||||
try {
|
||||
fs.rmSync(info.path, { recursive: true, force: true });
|
||||
git(['worktree', 'prune'], this.repoRoot, true);
|
||||
} catch { /* non-fatal */ }
|
||||
}
|
||||
|
||||
this.active.delete(testName);
|
||||
}
|
||||
|
||||
/** Force-remove all active worktrees (for process exit handler). */
|
||||
cleanupAll(): void {
|
||||
for (const testName of [...this.active.keys()]) {
|
||||
this.cleanup(testName);
|
||||
}
|
||||
|
||||
// Clean up the run directory if empty
|
||||
const runDir = path.join(this.repoRoot, '.gstack-worktrees', this.runId);
|
||||
try {
|
||||
const entries = fs.readdirSync(runDir);
|
||||
if (entries.length === 0) {
|
||||
fs.rmdirSync(runDir);
|
||||
}
|
||||
} catch { /* non-fatal */ }
|
||||
}
|
||||
|
||||
/** Remove worktrees from previous runs that weren't cleaned up. */
|
||||
pruneStale(): void {
|
||||
try {
|
||||
git(['worktree', 'prune'], this.repoRoot, true);
|
||||
|
||||
const worktreeBase = path.join(this.repoRoot, '.gstack-worktrees');
|
||||
if (!fs.existsSync(worktreeBase)) return;
|
||||
|
||||
for (const entry of fs.readdirSync(worktreeBase)) {
|
||||
// Don't prune our own run
|
||||
if (entry === this.runId) continue;
|
||||
|
||||
const entryPath = path.join(worktreeBase, entry);
|
||||
try {
|
||||
// Skip recent worktrees (< 1 hour old) to avoid killing
|
||||
// worktrees from concurrent test runs still in progress
|
||||
const stat = fs.statSync(entryPath);
|
||||
const ageMs = Date.now() - stat.mtimeMs;
|
||||
if (ageMs < 3600_000) continue;
|
||||
fs.rmSync(entryPath, { recursive: true, force: true });
|
||||
} catch { /* non-fatal */ }
|
||||
}
|
||||
} catch {
|
||||
process.stderr.write(' WORKTREE: prune failed (non-fatal)\n');
|
||||
}
|
||||
}
|
||||
|
||||
/** Print harvest report summary. */
|
||||
printReport(): void {
|
||||
if (this.harvestResults.length === 0) return;
|
||||
|
||||
const nonDuplicates = this.harvestResults.filter(r => !r.isDuplicate);
|
||||
process.stderr.write('\n=== HARVEST REPORT ===\n');
|
||||
process.stderr.write(`${nonDuplicates.length} of ${this.harvestResults.length} test suites produced new changes:\n\n`);
|
||||
|
||||
for (const result of this.harvestResults) {
|
||||
if (result.isDuplicate) {
|
||||
process.stderr.write(` ${result.testName}: duplicate patch (skipped)\n`);
|
||||
} else {
|
||||
process.stderr.write(` ${result.testName}: ${result.changedFiles.length} files changed\n`);
|
||||
process.stderr.write(` Patch: ${result.patchPath}\n`);
|
||||
process.stderr.write(` Apply: git apply ${result.patchPath}\n`);
|
||||
if (result.diffStat) {
|
||||
process.stderr.write(` ${result.diffStat}\n`);
|
||||
}
|
||||
}
|
||||
process.stderr.write('\n');
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the run ID (for testing). */
|
||||
getRunId(): string {
|
||||
return this.runId;
|
||||
}
|
||||
|
||||
/** Get active worktree info (for testing). */
|
||||
getInfo(testName: string): WorktreeInfo | undefined {
|
||||
return this.active.get(testName);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user