- commit
- ebd09f3
- parent
- cd43dcf
- author
- im_wower
- date
- 2026-03-22 02:16:10 +0800 CST
Merge feat/T-024-e2e-smoke into main
11 files changed,
+1380,
-8
+33,
-8
1@@ -1,10 +1,10 @@
2 ---
3 task_id: T-024
4 title: 端到端 smoke harness
5-status: todo
6+status: review
7 branch: feat/T-024-e2e-smoke
8 repo: /Users/george/code/baa-conductor
9-base_ref: main
10+base_ref: main@288c748
11 depends_on:
12 - T-019
13 - T-020
14@@ -13,7 +13,7 @@ depends_on:
15 write_scope:
16 - tests/e2e/**
17 - scripts/smoke/**
18-updated_at: 2026-03-22
19+updated_at: 2026-03-22T02:12:15+0800
20 ---
21
22 # T-024 端到端 smoke harness
23@@ -66,20 +66,45 @@ updated_at: 2026-03-22
24
25 ## files_changed
26
27-- 待填写
28+- coordination/tasks/T-024-e2e-smoke.md
29+- scripts/smoke/README.md
30+- scripts/smoke/check-stack.sh
31+- scripts/smoke/control-api-local.mjs
32+- scripts/smoke/d1-sqlite.mjs
33+- scripts/smoke/run-e2e.sh
34+- scripts/smoke/stack-cli.mjs
35+- scripts/smoke/start-stack.sh
36+- scripts/smoke/status-api-local.mjs
37+- scripts/smoke/stop-stack.sh
38+- tests/e2e/smoke.test.mjs
39
40 ## commands_run
41
42-- 待填写
43+- npx --yes pnpm install
44+- node --check scripts/smoke/d1-sqlite.mjs
45+- node --check scripts/smoke/control-api-local.mjs
46+- node --check scripts/smoke/status-api-local.mjs
47+- node --check scripts/smoke/stack-cli.mjs
48+- bash -n scripts/smoke/*.sh
49+- bash scripts/smoke/run-e2e.sh --json
50+- node --test tests/e2e/smoke.test.mjs
51+- bash scripts/smoke/start-stack.sh --json --state-dir /Users/george/code/baa-conductor-T024-e2e-smoke/tmp/manual-smoke-check
52+- bash scripts/smoke/check-stack.sh --json --state-dir /Users/george/code/baa-conductor-T024-e2e-smoke/tmp/manual-smoke-check --expected-leader smoke-mini
53+- bash scripts/smoke/stop-stack.sh --json --state-dir /Users/george/code/baa-conductor-T024-e2e-smoke/tmp/manual-smoke-check
54+- git diff --check
55
56 ## result
57
58-- 待填写
59+- 新增本地 smoke stack CLI 和 shell 包装脚本,可一键或分步启动、检查、停止 control-api、主备 conductor、status-api。
60+- 新增基于 SQLite 的 smoke-only D1 适配层,用于本地共享 control-api 和 status-api 的最小 durable 视图。
61+- 新增端到端 smoke 测试,覆盖探活、`/v1/system/state`、`/v1/status`、queued task 可见性和主备 failover。
62
63 ## risks
64
65-- 待填写
66+- `scripts/smoke/d1-sqlite.mjs` 是本地 smoke 适配层,不等价于真实 Cloudflare D1 / Wrangler 运行时语义。
67+- 当前 smoke 聚焦最小读取面和 lease failover,不覆盖 task claim、worker 执行和真实线上部署链路。
68
69 ## next_handoff
70
71-- 待填写
72+- 后续任务可直接复用 `bash scripts/smoke/run-e2e.sh --json` 作为本地回归入口。
73+- T-025 可复用 `start-stack.sh` / `check-stack.sh` / `stop-stack.sh` 和 `stack-cli.mjs` 的状态目录结构继续扩展 failover rehearsal。
+25,
-0
1@@ -0,0 +1,25 @@
2+# Smoke Harness
3+
4+最小本地 smoke 流程:
5+
6+```bash
7+bash scripts/smoke/run-e2e.sh --json
8+```
9+
10+如果要分步执行:
11+
12+```bash
13+bash scripts/smoke/start-stack.sh --json
14+bash scripts/smoke/check-stack.sh --state-dir <tmp/smoke-...> --expected-leader smoke-mini --json
15+bash scripts/smoke/stop-stack.sh --state-dir <tmp/smoke-...> --json
16+```
17+
18+`run-e2e.sh` 会完成这些动作:
19+
20+- 构建 `control-api-worker` 和 `status-api` 的本地运行产物
21+- 启动本地 `control-api`、主 conductor、备 conductor、`status-api`
22+- 验证 `healthz` / `readyz` / `rolez` / `/v1/system/state` / `/v1/status`
23+- 创建一个 queued task,确认 `status-api` 的 `queueDepth` 能读到
24+- 执行一次最小主备切换,确认 lease 从 `smoke-mini` 漂移到 `smoke-mac`
25+
26+所有临时状态、数据库和日志都会落到 `tmp/smoke-*` 目录。
+6,
-0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" check "$@"
+164,
-0
1@@ -0,0 +1,164 @@
2+import { createServer } from "node:http";
3+import controlApiWorker from "../../apps/control-api-worker/dist/index.js";
4+import { initializeSqliteD1Database } from "./d1-sqlite.mjs";
5+
6+function parseArgs(argv) {
7+ const options = {
8+ authRequired: false,
9+ databasePath: null,
10+ host: "127.0.0.1",
11+ port: null
12+ };
13+
14+ for (let index = 0; index < argv.length; index += 1) {
15+ const token = argv[index];
16+
17+ switch (token) {
18+ case "--auth-required":
19+ options.authRequired = true;
20+ break;
21+ case "--db":
22+ options.databasePath = readValue(argv, token, index);
23+ index += 1;
24+ break;
25+ case "--host":
26+ options.host = readValue(argv, token, index);
27+ index += 1;
28+ break;
29+ case "--port":
30+ options.port = parsePort(readValue(argv, token, index), token);
31+ index += 1;
32+ break;
33+ default:
34+ throw new Error(`Unknown control-api-local option "${token}".`);
35+ }
36+ }
37+
38+ if (!options.databasePath) {
39+ throw new Error("--db is required.");
40+ }
41+
42+ if (options.port == null) {
43+ throw new Error("--port is required.");
44+ }
45+
46+ return options;
47+}
48+
49+function readValue(tokens, flag, index) {
50+ const value = tokens[index + 1];
51+
52+ if (!value || value.startsWith("--")) {
53+ throw new Error(`Missing value for ${flag}.`);
54+ }
55+
56+ return value;
57+}
58+
59+function parsePort(value, flag) {
60+ const port = Number(value);
61+
62+ if (!Number.isInteger(port) || port < 0 || port > 65_535) {
63+ throw new Error(`Invalid value for ${flag}: "${value}".`);
64+ }
65+
66+ return port;
67+}
68+
69+async function readRequestBody(request) {
70+ const chunks = [];
71+
72+ for await (const chunk of request) {
73+ chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
74+ }
75+
76+ return Buffer.concat(chunks);
77+}
78+
79+async function main() {
80+ const options = parseArgs(process.argv.slice(2));
81+ const db = initializeSqliteD1Database({
82+ databasePath: options.databasePath
83+ });
84+ const env = {
85+ CONTROL_API_AUTH_REQUIRED: options.authRequired ? "true" : "false",
86+ CONTROL_API_VERSION: "smoke-local",
87+ CONTROL_DB: db
88+ };
89+ const baseUrl = `http://${options.host}:${options.port}`;
90+ const server = createServer((request, response) => {
91+ void handleRequest(request, response, baseUrl, env);
92+ });
93+
94+ const close = async () => {
95+ await new Promise((resolve, reject) => {
96+ server.close((error) => {
97+ if (error) {
98+ reject(error);
99+ return;
100+ }
101+
102+ resolve();
103+ });
104+ });
105+ db.close();
106+ };
107+
108+ const shutdown = (signal) => {
109+ void close().finally(() => {
110+ process.exitCode = 0;
111+ if (signal) {
112+ process.stderr.write(`control-api-local stopped after ${signal}\n`);
113+ }
114+ });
115+ };
116+
117+ process.once("SIGINT", () => shutdown("SIGINT"));
118+ process.once("SIGTERM", () => shutdown("SIGTERM"));
119+
120+ await new Promise((resolve) => {
121+ server.listen(options.port, options.host, resolve);
122+ });
123+
124+ process.stdout.write(`control-api-local listening on ${baseUrl}\n`);
125+}
126+
127+async function handleRequest(request, response, baseUrl, env) {
128+ try {
129+ const body = await readRequestBody(request);
130+ const url = new URL(request.url ?? "/", baseUrl);
131+ const workerRequest = new Request(url, {
132+ body: request.method === "GET" || request.method === "HEAD" ? undefined : body,
133+ headers: new Headers(request.headers),
134+ method: request.method ?? "GET"
135+ });
136+ const workerResponse = await controlApiWorker.fetch(workerRequest, env, {
137+ passThroughOnException() {},
138+ waitUntil() {}
139+ });
140+
141+ response.statusCode = workerResponse.status;
142+
143+ for (const [name, value] of workerResponse.headers.entries()) {
144+ response.setHeader(name, value);
145+ }
146+
147+ response.end(Buffer.from(await workerResponse.arrayBuffer()));
148+ } catch (error) {
149+ response.statusCode = 500;
150+ response.setHeader("content-type", "application/json; charset=utf-8");
151+ response.end(
152+ `${JSON.stringify(
153+ {
154+ ok: false,
155+ error: "control_api_local_failure",
156+ message: error instanceof Error ? error.message : String(error)
157+ },
158+ null,
159+ 2
160+ )}\n`
161+ );
162+ }
163+}
164+
165+await main();
+165,
-0
1@@ -0,0 +1,165 @@
2+import { mkdirSync, readFileSync } from "node:fs";
3+import { dirname, resolve } from "node:path";
4+import { DatabaseSync } from "node:sqlite";
5+import { fileURLToPath } from "node:url";
6+
7+const DEFAULT_BUSY_TIMEOUT_MS = 5_000;
8+const DEFAULT_AUTOMATION_MODE = "running";
9+
10+function normalizeRow(row) {
11+ if (row == null || typeof row !== "object" || Array.isArray(row)) {
12+ return row ?? null;
13+ }
14+
15+ return { ...row };
16+}
17+
18+function toLastRowId(value) {
19+ if (typeof value === "bigint") {
20+ const normalized = Number(value);
21+ return Number.isSafeInteger(normalized) ? normalized : undefined;
22+ }
23+
24+ return typeof value === "number" ? value : undefined;
25+}
26+
27+class SqliteD1PreparedStatement {
28+ constructor(database, query, values = []) {
29+ this.database = database;
30+ this.query = query;
31+ this.values = [...values];
32+ }
33+
34+ bind(...values) {
35+ return new SqliteD1PreparedStatement(this.database, this.query, values);
36+ }
37+
38+ async all() {
39+ const rows = this.#statement().all(...this.values).map(normalizeRow);
40+
41+ return {
42+ meta: {
43+ rows_read: rows.length
44+ },
45+ results: rows,
46+ success: true
47+ };
48+ }
49+
50+ async first(columnName) {
51+ const row = normalizeRow(this.#statement().get(...this.values));
52+
53+ if (row == null) {
54+ return null;
55+ }
56+
57+ if (columnName == null) {
58+ return row;
59+ }
60+
61+ return Object.prototype.hasOwnProperty.call(row, columnName) ? row[columnName] ?? null : null;
62+ }
63+
64+ async raw(options = {}) {
65+ const statement = this.#statement();
66+ const columns = statement.columns().map((column) => column.name);
67+ const rows = statement.all(...this.values).map((row) => columns.map((columnName) => row[columnName] ?? null));
68+
69+ return options.columnNames ? [columns, ...rows] : rows;
70+ }
71+
72+ async run() {
73+ const result = this.#statement().run(...this.values);
74+
75+ return {
76+ meta: {
77+ changes: result.changes,
78+ last_row_id: toLastRowId(result.lastInsertRowid)
79+ },
80+ success: true
81+ };
82+ }
83+
84+ #statement() {
85+ return this.database.prepare(this.query);
86+ }
87+}
88+
89+export class SqliteD1Database {
90+ constructor(databasePath) {
91+ const resolvedPath = resolve(databasePath);
92+ mkdirSync(dirname(resolvedPath), { recursive: true });
93+ this.path = resolvedPath;
94+ this.database = new DatabaseSync(this.path);
95+ this.database.exec("PRAGMA journal_mode = WAL;");
96+ this.database.exec("PRAGMA synchronous = NORMAL;");
97+ this.database.exec("PRAGMA foreign_keys = ON;");
98+ this.database.exec(`PRAGMA busy_timeout = ${DEFAULT_BUSY_TIMEOUT_MS};`);
99+ }
100+
101+ async batch(statements) {
102+ const results = [];
103+
104+ this.database.exec("BEGIN IMMEDIATE;");
105+
106+ try {
107+ for (const statement of statements) {
108+ if (!(statement instanceof SqliteD1PreparedStatement)) {
109+ throw new TypeError("SqliteD1Database.batch only accepts statements created by this adapter.");
110+ }
111+
112+ results.push(await statement.run());
113+ }
114+
115+ this.database.exec("COMMIT;");
116+ return results;
117+ } catch (error) {
118+ this.database.exec("ROLLBACK;");
119+ throw error;
120+ }
121+ }
122+
123+ close() {
124+ this.database.close();
125+ }
126+
127+ async exec(query) {
128+ this.database.exec(query);
129+ return {};
130+ }
131+
132+ prepare(query) {
133+ return new SqliteD1PreparedStatement(this.database, query);
134+ }
135+}
136+
137+export function createSqliteD1Database(databasePath) {
138+ return new SqliteD1Database(databasePath);
139+}
140+
141+export function getDefaultSmokeSchemaPath() {
142+ return resolve(dirname(fileURLToPath(import.meta.url)), "..", "..", "ops", "sql", "schema.sql");
143+}
144+
145+export function initializeSqliteD1Database({
146+ databasePath,
147+ schemaPath = getDefaultSmokeSchemaPath(),
148+ automationMode = DEFAULT_AUTOMATION_MODE
149+}) {
150+ const db = createSqliteD1Database(databasePath);
151+ const schema = readFileSync(schemaPath, "utf8");
152+ const nowUnixSeconds = Math.floor(Date.now() / 1_000);
153+
154+ db.database.exec(schema);
155+ db.database
156+ .prepare(
157+ `
158+ INSERT INTO system_state (state_key, value_json, updated_at)
159+ VALUES (?, ?, ?)
160+ ON CONFLICT(state_key) DO NOTHING
161+ `
162+ )
163+ .run("automation", JSON.stringify({ mode: automationMode }), nowUnixSeconds);
164+
165+ return db;
166+}
+6,
-0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" run "$@"
+831,
-0
1@@ -0,0 +1,831 @@
2+import { spawn } from "node:child_process";
3+import { appendFileSync, closeSync, openSync } from "node:fs";
4+import { mkdir, mkdtemp, readFile, writeFile } from "node:fs/promises";
5+import { createServer } from "node:net";
6+import { dirname, join, resolve } from "node:path";
7+import { setTimeout as delay } from "node:timers/promises";
8+import { fileURLToPath } from "node:url";
9+
10+const REPO_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
11+const DEFAULT_HOST = "127.0.0.1";
12+const DEFAULT_LEASE_RENEW_INTERVAL_MS = 250;
13+const DEFAULT_HEARTBEAT_INTERVAL_MS = 250;
14+const DEFAULT_LEASE_TTL_SEC = 2;
15+const HTTP_TIMEOUT_MS = 2_000;
16+const START_TIMEOUT_MS = 15_000;
17+const STOP_TIMEOUT_MS = 5_000;
18+const POLL_INTERVAL_MS = 200;
19+const PRIMARY_NODE_ID = "smoke-mini";
20+const STANDBY_NODE_ID = "smoke-mac";
21+
22+async function main() {
23+ const options = parseCli(process.argv.slice(2));
24+
25+ if (options.help) {
26+ writeResult(options, renderHelp());
27+ return;
28+ }
29+
30+ switch (options.command) {
31+ case "start": {
32+ const state = await startStack(options);
33+ writeResult(options, {
34+ ok: true,
35+ stateDir: state.stateDir,
36+ services: summarizeStateServices(state)
37+ });
38+ return;
39+ }
40+
41+ case "check": {
42+ const state = await loadState(options);
43+ const result = await captureLiveStack(state, {
44+ expectedLeader: options.expectedLeader
45+ });
46+
47+ writeResult(options, {
48+ ok: true,
49+ stateDir: state.stateDir,
50+ ...result
51+ });
52+ return;
53+ }
54+
55+ case "stop": {
56+ const state = await loadState(options);
57+ await stopStack(state);
58+ writeResult(options, {
59+ ok: true,
60+ stateDir: state.stateDir,
61+ stopped: true
62+ });
63+ return;
64+ }
65+
66+ case "run": {
67+ const result = await runSmokeFlow(options);
68+ writeResult(options, result);
69+ return;
70+ }
71+ }
72+}
73+
74+function parseCli(argv) {
75+ let command = "run";
76+ let index = 0;
77+
78+ if (argv[0] && !argv[0].startsWith("--")) {
79+ command = argv[0];
80+ index = 1;
81+ }
82+
83+ const options = {
84+ command,
85+ expectedLeader: null,
86+ help: false,
87+ json: false,
88+ skipBuild: false,
89+ stateDir: null
90+ };
91+
92+ for (; index < argv.length; index += 1) {
93+ const token = argv[index];
94+
95+ switch (token) {
96+ case "--expected-leader":
97+ options.expectedLeader = readOptionValue(argv, token, index);
98+ index += 1;
99+ break;
100+ case "--help":
101+ case "-h":
102+ options.help = true;
103+ break;
104+ case "--json":
105+ options.json = true;
106+ break;
107+ case "--skip-build":
108+ options.skipBuild = true;
109+ break;
110+ case "--state-dir":
111+ options.stateDir = resolve(readOptionValue(argv, token, index));
112+ index += 1;
113+ break;
114+ default:
115+ throw new Error(`Unknown smoke command option "${token}".`);
116+ }
117+ }
118+
119+ if (!["start", "check", "run", "stop"].includes(options.command)) {
120+ throw new Error(`Unknown smoke command "${options.command}".`);
121+ }
122+
123+ return options;
124+}
125+
126+function readOptionValue(argv, flag, index) {
127+ const value = argv[index + 1];
128+
129+ if (!value || value.startsWith("--")) {
130+ throw new Error(`Missing value for ${flag}.`);
131+ }
132+
133+ return value;
134+}
135+
136+async function runSmokeFlow(options) {
137+ const state = await startStack(options);
138+
139+ try {
140+ const initial = await captureLiveStack(state, {
141+ expectedLeader: PRIMARY_NODE_ID
142+ });
143+
144+ const drained = await verifyDrainAndResume(state);
145+ const task = await createSmokeTask(state.services.controlApi.baseUrl);
146+ const taskRecord = await readSmokeTask(state.services.controlApi.baseUrl, task.taskId);
147+ const afterQueue = await waitForQueueDepth(state.services.statusApi.baseUrl, 1);
148+
149+ await stopService(state, "primary");
150+
151+ const failover = await waitForFailover(state);
152+
153+ return {
154+ ok: true,
155+ stateDir: state.stateDir,
156+ initial,
157+ drained,
158+ task: {
159+ create: task.response,
160+ read: taskRecord,
161+ taskId: task.taskId
162+ },
163+ afterQueue,
164+ failover
165+ };
166+ } finally {
167+ await stopStack(state);
168+ }
169+}
170+
171+async function startStack(options) {
172+ const stateDir = options.stateDir ?? (await createStateDir());
173+ const state = {
174+ stateDir,
175+ stateFilePath: getStateFilePath(stateDir),
176+ dbPath: join(stateDir, "data", "control-plane.sqlite"),
177+ services: {}
178+ };
179+
180+ await mkdir(join(stateDir, "logs"), { recursive: true });
181+ await mkdir(join(stateDir, "runtime"), { recursive: true });
182+
183+ try {
184+ if (!options.skipBuild) {
185+ await buildDependencies();
186+ }
187+
188+ const ports = {
189+ controlApi: await allocatePort(),
190+ primary: await allocatePort(),
191+ standby: await allocatePort(),
192+ statusApi: await allocatePort()
193+ };
194+
195+ state.services.controlApi = await spawnService({
196+ args: [
197+ "scripts/smoke/control-api-local.mjs",
198+ "--db",
199+ state.dbPath,
200+ "--host",
201+ DEFAULT_HOST,
202+ "--port",
203+ String(ports.controlApi)
204+ ],
205+ baseUrl: `http://${DEFAULT_HOST}:${ports.controlApi}`,
206+ command: "node",
207+ logPath: join(stateDir, "logs", "control-api.log"),
208+ name: "control-api"
209+ });
210+ await writeState(state);
211+
212+ await waitForCondition(
213+ async () => {
214+ const envelope = await getJson(`${state.services.controlApi.baseUrl}/v1/system/state`);
215+ return envelope.ok ? envelope : null;
216+ },
217+ { description: "control-api readiness" }
218+ );
219+
220+ state.services.primary = await spawnConductorService(state, {
221+ host: "mini",
222+ localApiPort: ports.primary,
223+ name: "conductor-primary",
224+ nodeId: PRIMARY_NODE_ID,
225+ role: "primary"
226+ });
227+ await writeState(state);
228+
229+ state.services.standby = await spawnConductorService(state, {
230+ host: "mac",
231+ localApiPort: ports.standby,
232+ name: "conductor-standby",
233+ nodeId: STANDBY_NODE_ID,
234+ role: "standby"
235+ });
236+ await writeState(state);
237+
238+ state.services.statusApi = await spawnService({
239+ args: [
240+ "scripts/smoke/status-api-local.mjs",
241+ "--db",
242+ state.dbPath,
243+ "--host",
244+ DEFAULT_HOST,
245+ "--port",
246+ String(ports.statusApi)
247+ ],
248+ baseUrl: `http://${DEFAULT_HOST}:${ports.statusApi}`,
249+ command: "node",
250+ logPath: join(stateDir, "logs", "status-api.log"),
251+ name: "status-api"
252+ });
253+ await writeState(state);
254+
255+ await waitForCondition(
256+ async () => {
257+ const text = await getText(`${state.services.statusApi.baseUrl}/healthz`);
258+ return text === "ok" ? text : null;
259+ },
260+ { description: "status-api readiness" }
261+ );
262+
263+ await captureLiveStack(state, {
264+ expectedLeader: PRIMARY_NODE_ID
265+ });
266+
267+ return state;
268+ } catch (error) {
269+ await stopStack(state);
270+ throw error;
271+ }
272+}
273+
274+async function spawnConductorService(state, options) {
275+ const runtimeRoot = join(state.stateDir, "runtime", options.role);
276+
277+ for (const directoryName of ["logs", "runs", "state", "tmp", "worktrees"]) {
278+ await mkdir(join(runtimeRoot, directoryName), { recursive: true });
279+ }
280+
281+ const baseUrl = `http://${DEFAULT_HOST}:${options.localApiPort}`;
282+ const service = await spawnService({
283+ args: [
284+ "--experimental-strip-types",
285+ "apps/conductor-daemon/src/index.ts",
286+ "start",
287+ "--node-id",
288+ options.nodeId,
289+ "--host",
290+ options.host,
291+ "--role",
292+ options.role,
293+ "--control-api-base",
294+ state.services.controlApi.baseUrl,
295+ "--local-api",
296+ baseUrl,
297+ "--shared-token",
298+ "smoke-shared-token",
299+ "--heartbeat-interval-ms",
300+ String(DEFAULT_HEARTBEAT_INTERVAL_MS),
301+ "--lease-renew-interval-ms",
302+ String(DEFAULT_LEASE_RENEW_INTERVAL_MS),
303+ "--lease-ttl-sec",
304+ String(DEFAULT_LEASE_TTL_SEC),
305+ "--logs-dir",
306+ join(runtimeRoot, "logs"),
307+ "--runs-dir",
308+ join(runtimeRoot, "runs"),
309+ "--state-dir",
310+ join(runtimeRoot, "state"),
311+ "--tmp-dir",
312+ join(runtimeRoot, "tmp"),
313+ "--worktrees-dir",
314+ join(runtimeRoot, "worktrees")
315+ ],
316+ baseUrl,
317+ command: "node",
318+ logPath: join(state.stateDir, "logs", `${options.name}.log`),
319+ metadata: {
320+ host: options.host,
321+ nodeId: options.nodeId,
322+ role: options.role
323+ },
324+ name: options.name
325+ });
326+
327+ await waitForCondition(
328+ async () => {
329+ const text = await getText(`${baseUrl}/healthz`);
330+ return text === "ok" ? text : null;
331+ },
332+ { description: `${options.name} healthz` }
333+ );
334+
335+ return service;
336+}
337+
338+async function spawnService({ args, baseUrl, command, logPath, metadata = {}, name }) {
339+ appendFileSync(logPath, `\n$ ${command} ${args.join(" ")}\n`);
340+ const logFd = openSync(logPath, "a");
341+ const child = spawn(command, args, {
342+ cwd: REPO_ROOT,
343+ env: process.env,
344+ stdio: ["ignore", logFd, logFd]
345+ });
346+
347+ closeSync(logFd);
348+
349+ if (child.pid == null) {
350+ throw new Error(`Failed to start ${name}.`);
351+ }
352+
353+ return {
354+ ...metadata,
355+ baseUrl,
356+ logPath,
357+ name,
358+ pid: child.pid
359+ };
360+}
361+
362+async function captureLiveStack(state, { expectedLeader }) {
363+ const [controlState, primary, standby, statusSnapshot] = await Promise.all([
364+ readControlState(state.services.controlApi.baseUrl),
365+ readConductorSurface(state.services.primary.baseUrl),
366+ readConductorSurface(state.services.standby.baseUrl),
367+ readStatusSnapshot(state.services.statusApi.baseUrl)
368+ ]);
369+
370+ if (expectedLeader) {
371+ if (controlState.data.holder_id !== expectedLeader) {
372+ throw new Error(
373+ `Expected control-api leader to be ${expectedLeader}, received ${controlState.data.holder_id ?? "null"}.`
374+ );
375+ }
376+
377+ if (statusSnapshot.data.leaderId !== expectedLeader) {
378+ throw new Error(
379+ `Expected status-api leader to be ${expectedLeader}, received ${statusSnapshot.data.leaderId ?? "null"}.`
380+ );
381+ }
382+ }
383+
384+ if (primary.role !== "leader") {
385+ throw new Error(`Expected primary conductor rolez to be leader, received ${primary.role}.`);
386+ }
387+
388+ if (standby.role !== "standby") {
389+ throw new Error(`Expected standby conductor rolez to be standby, received ${standby.role}.`);
390+ }
391+
392+ return {
393+ controlState,
394+ conductors: {
395+ primary,
396+ standby
397+ },
398+ statusSnapshot
399+ };
400+}
401+
402+async function verifyDrainAndResume(state) {
403+ const drainedResponse = await postJson(`${state.services.controlApi.baseUrl}/v1/system/drain`, {
404+ reason: "verify shared state propagation",
405+ requested_by: "smoke-harness"
406+ });
407+
408+ if (!drainedResponse.ok) {
409+ throw new Error(`Expected drain request to succeed: ${JSON.stringify(drainedResponse)}`);
410+ }
411+
412+ const drainedStatus = await waitForCondition(
413+ async () => {
414+ const snapshot = await readStatusSnapshot(state.services.statusApi.baseUrl);
415+ return snapshot.data.mode === "draining" ? snapshot : null;
416+ },
417+ { description: "status-api draining mode" }
418+ );
419+ const drainedControlState = await readControlState(state.services.controlApi.baseUrl);
420+
421+ const resumedResponse = await postJson(`${state.services.controlApi.baseUrl}/v1/system/resume`, {
422+ reason: "continue smoke flow",
423+ requested_by: "smoke-harness"
424+ });
425+
426+ if (!resumedResponse.ok) {
427+ throw new Error(`Expected resume request to succeed: ${JSON.stringify(resumedResponse)}`);
428+ }
429+
430+ const resumedStatus = await waitForCondition(
431+ async () => {
432+ const snapshot = await readStatusSnapshot(state.services.statusApi.baseUrl);
433+ return snapshot.data.mode === "running" ? snapshot : null;
434+ },
435+ { description: "status-api running mode" }
436+ );
437+
438+ return {
439+ drained: {
440+ controlState: drainedControlState,
441+ statusSnapshot: drainedStatus
442+ },
443+ resumed: {
444+ controlState: await readControlState(state.services.controlApi.baseUrl),
445+ statusSnapshot: resumedStatus
446+ }
447+ };
448+}
449+
450+async function createSmokeTask(controlApiBaseUrl) {
451+ const response = await postJson(`${controlApiBaseUrl}/v1/tasks`, {
452+ acceptance: ["status-api queue depth reflects the queued smoke task"],
453+ goal: "Verify the local smoke harness can seed a queued task visible to status-api.",
454+ repo: REPO_ROOT,
455+ task_type: "smoke_harness",
456+ title: "Smoke harness queue probe"
457+ });
458+
459+ if (!response.ok) {
460+ throw new Error(`Expected task creation to succeed: ${JSON.stringify(response)}`);
461+ }
462+
463+ return {
464+ response,
465+ taskId: response.data.task_id
466+ };
467+}
468+
469+async function readSmokeTask(controlApiBaseUrl, taskId) {
470+ const response = await getJson(`${controlApiBaseUrl}/v1/tasks/${taskId}`);
471+
472+ if (!response.ok) {
473+ throw new Error(`Expected task read to succeed: ${JSON.stringify(response)}`);
474+ }
475+
476+ if (response.data.task_id !== taskId) {
477+ throw new Error(`Expected task read to return ${taskId}, received ${response.data.task_id}.`);
478+ }
479+
480+ if (response.data.status !== "queued") {
481+ throw new Error(`Expected task ${taskId} to be queued, received ${response.data.status}.`);
482+ }
483+
484+ return response;
485+}
486+
487+async function waitForQueueDepth(statusApiBaseUrl, minimumQueueDepth) {
488+ return waitForCondition(
489+ async () => {
490+ const snapshot = await readStatusSnapshot(statusApiBaseUrl);
491+ return snapshot.data.queueDepth >= minimumQueueDepth ? snapshot : null;
492+ },
493+ { description: `status-api queue depth >= ${minimumQueueDepth}` }
494+ );
495+}
496+
497+async function waitForFailover(state) {
498+ const standbyBaseUrl = state.services.standby.baseUrl;
499+ const controlApiBaseUrl = state.services.controlApi.baseUrl;
500+ const statusApiBaseUrl = state.services.statusApi.baseUrl;
501+
502+ return waitForCondition(
503+ async () => {
504+ const [controlState, standby, statusSnapshot] = await Promise.all([
505+ readControlState(controlApiBaseUrl),
506+ readConductorSurface(standbyBaseUrl),
507+ readStatusSnapshot(statusApiBaseUrl)
508+ ]);
509+
510+ if (
511+ controlState.data.holder_id !== STANDBY_NODE_ID ||
512+ statusSnapshot.data.leaderId !== STANDBY_NODE_ID ||
513+ standby.role !== "leader" ||
514+ standby.readyz !== "ready"
515+ ) {
516+ return null;
517+ }
518+
519+ return {
520+ controlState,
521+ conductors: {
522+ standby
523+ },
524+ statusSnapshot
525+ };
526+ },
527+ {
528+ description: "standby failover",
529+ timeoutMs: START_TIMEOUT_MS
530+ }
531+ );
532+}
533+
534+async function stopStack(state) {
535+ await stopService(state, "primary");
536+ await stopService(state, "standby");
537+ await stopService(state, "statusApi");
538+ await stopService(state, "controlApi");
539+ await writeState(state);
540+}
541+
542+async function stopService(state, key) {
543+ const service = state.services[key];
544+
545+ if (!service || service.stoppedAt) {
546+ return;
547+ }
548+
549+ if (!isProcessRunning(service.pid)) {
550+ service.stoppedAt = new Date().toISOString();
551+ return;
552+ }
553+
554+ process.kill(service.pid, "SIGTERM");
555+
556+ try {
557+ await waitForCondition(
558+ async () => (!isProcessRunning(service.pid) ? true : null),
559+ {
560+ description: `${service.name} shutdown`,
561+ intervalMs: 100,
562+ timeoutMs: STOP_TIMEOUT_MS
563+ }
564+ );
565+ } catch {
566+ if (isProcessRunning(service.pid)) {
567+ process.kill(service.pid, "SIGKILL");
568+ await waitForCondition(
569+ async () => (!isProcessRunning(service.pid) ? true : null),
570+ {
571+ description: `${service.name} forced shutdown`,
572+ intervalMs: 100,
573+ timeoutMs: STOP_TIMEOUT_MS
574+ }
575+ );
576+ }
577+ }
578+
579+ service.stoppedAt = new Date().toISOString();
580+}
581+
582+function isProcessRunning(pid) {
583+ if (pid == null) {
584+ return false;
585+ }
586+
587+ try {
588+ process.kill(pid, 0);
589+ return true;
590+ } catch (error) {
591+ return !(error instanceof Error) || !("code" in error) || error.code !== "ESRCH";
592+ }
593+}
594+
595+async function readControlState(controlApiBaseUrl) {
596+ const response = await getJson(`${controlApiBaseUrl}/v1/system/state`);
597+
598+ if (!response.ok) {
599+ throw new Error(`Expected control-api system state to succeed: ${JSON.stringify(response)}`);
600+ }
601+
602+ return response;
603+}
604+
605+async function readStatusSnapshot(statusApiBaseUrl) {
606+ const response = await getJson(`${statusApiBaseUrl}/v1/status`);
607+
608+ if (!response.ok) {
609+ throw new Error(`Expected status-api snapshot to succeed: ${JSON.stringify(response)}`);
610+ }
611+
612+ return response;
613+}
614+
615+async function readConductorSurface(conductorBaseUrl) {
616+ const [healthz, readyz, role, runtime] = await Promise.all([
617+ getText(`${conductorBaseUrl}/healthz`),
618+ getText(`${conductorBaseUrl}/readyz`),
619+ getText(`${conductorBaseUrl}/rolez`),
620+ getJson(`${conductorBaseUrl}/v1/runtime`)
621+ ]);
622+
623+ if (!runtime.ok) {
624+ throw new Error(`Expected conductor runtime probe to succeed: ${JSON.stringify(runtime)}`);
625+ }
626+
627+ return {
628+ healthz,
629+ readyz,
630+ role,
631+ runtime
632+ };
633+}
634+
635+async function buildDependencies() {
636+ await runCommand("npx", ["--yes", "pnpm", "--filter", "@baa-conductor/control-api-worker", "build"]);
637+ await runCommand("npx", ["--yes", "pnpm", "--filter", "@baa-conductor/status-api", "build"]);
638+}
639+
640+async function runCommand(command, args) {
641+ await new Promise((resolvePromise, reject) => {
642+ const child = spawn(command, args, {
643+ cwd: REPO_ROOT,
644+ env: process.env,
645+ stdio: ["ignore", "pipe", "pipe"]
646+ });
647+ let output = "";
648+
649+ child.stdout?.on("data", (chunk) => {
650+ output += chunk.toString();
651+ });
652+ child.stderr?.on("data", (chunk) => {
653+ output += chunk.toString();
654+ });
655+ child.on("error", reject);
656+ child.on("close", (code) => {
657+ if (code === 0) {
658+ resolvePromise();
659+ return;
660+ }
661+
662+ reject(new Error(`Command failed: ${command} ${args.join(" ")}\n${output}`));
663+ });
664+ });
665+}
666+
667+async function waitForCondition(
668+ check,
669+ {
670+ description,
671+ intervalMs = POLL_INTERVAL_MS,
672+ timeoutMs = START_TIMEOUT_MS
673+ }
674+) {
675+ const deadline = Date.now() + timeoutMs;
676+ let lastError = null;
677+
678+ while (Date.now() <= deadline) {
679+ try {
680+ const result = await check();
681+
682+ if (result) {
683+ return result;
684+ }
685+ } catch (error) {
686+ lastError = error;
687+ }
688+
689+ await delay(intervalMs);
690+ }
691+
692+ const suffix =
693+ lastError == null
694+ ? ""
695+ : `: ${lastError instanceof Error ? lastError.message : String(lastError)}`;
696+ throw new Error(`Timed out waiting for ${description}${suffix}`);
697+}
698+
699+async function getJson(url, init) {
700+ const response = await fetch(url, {
701+ ...init,
702+ headers: {
703+ "content-type": "application/json",
704+ ...(init?.headers ?? {})
705+ },
706+ signal: AbortSignal.timeout(HTTP_TIMEOUT_MS)
707+ });
708+ const text = await response.text();
709+
710+ try {
711+ return JSON.parse(text);
712+ } catch {
713+ throw new Error(`Expected JSON from ${url}, received status ${response.status}: ${text}`);
714+ }
715+}
716+
717+async function getText(url) {
718+ const response = await fetch(url, {
719+ signal: AbortSignal.timeout(HTTP_TIMEOUT_MS)
720+ });
721+ const text = (await response.text()).trim();
722+
723+ if (!response.ok) {
724+ throw new Error(`Expected ${url} to return 2xx, received ${response.status}: ${text}`);
725+ }
726+
727+ return text;
728+}
729+
730+async function postJson(url, body) {
731+ return getJson(url, {
732+ body: JSON.stringify(body),
733+ method: "POST"
734+ });
735+}
736+
737+async function allocatePort() {
738+ return new Promise((resolvePromise, reject) => {
739+ const server = createServer();
740+
741+ server.once("error", reject);
742+ server.listen(0, DEFAULT_HOST, () => {
743+ const address = server.address();
744+
745+ if (!address || typeof address === "string") {
746+ reject(new Error("Failed to allocate a TCP port."));
747+ return;
748+ }
749+
750+ server.close((error) => {
751+ if (error) {
752+ reject(error);
753+ return;
754+ }
755+
756+ resolvePromise(address.port);
757+ });
758+ });
759+ });
760+}
761+
762+async function createStateDir() {
763+ const tmpRoot = join(REPO_ROOT, "tmp");
764+ await mkdir(tmpRoot, { recursive: true });
765+ return mkdtemp(join(tmpRoot, "smoke-"));
766+}
767+
768+function getStateFilePath(stateDir) {
769+ return join(stateDir, "stack-state.json");
770+}
771+
772+async function writeState(state) {
773+ await writeFile(state.stateFilePath, JSON.stringify(state, null, 2));
774+}
775+
776+async function loadState(options) {
777+ const stateDir = options.stateDir;
778+
779+ if (!stateDir) {
780+ throw new Error("--state-dir is required for check and stop commands.");
781+ }
782+
783+ const content = await readFile(getStateFilePath(stateDir), "utf8");
784+ return JSON.parse(content);
785+}
786+
787+function summarizeStateServices(state) {
788+ return Object.fromEntries(
789+ Object.entries(state.services).map(([key, value]) => [
790+ key,
791+ {
792+ baseUrl: value.baseUrl,
793+ logPath: value.logPath,
794+ pid: value.pid
795+ }
796+ ])
797+ );
798+}
799+
800+function writeResult(options, value) {
801+ if (typeof value === "string") {
802+ process.stdout.write(value.endsWith("\n") ? value : `${value}\n`);
803+ return;
804+ }
805+
806+ process.stdout.write(`${JSON.stringify(value, null, options.json ? 2 : 2)}\n`);
807+}
808+
809+function renderHelp() {
810+ return [
811+ "Usage:",
812+ " node scripts/smoke/stack-cli.mjs [run|start|check|stop] [options]",
813+ "",
814+ "Commands:",
815+ " run Build, start, probe, create a queued task, and fail over to standby.",
816+ " start Build and start the local smoke stack, then write stack-state.json.",
817+ " check Probe an existing stack-state.json and print the current surface snapshot.",
818+ " stop Stop the processes recorded in stack-state.json.",
819+ "",
820+ "Options:",
821+ " --state-dir <path> Reuse or write a specific smoke state directory.",
822+ " --expected-leader <id> Assert the current leader for the check command.",
823+ " --skip-build Skip the control-api/status-api build step.",
824+ " --json Print JSON output.",
825+ " --help, -h Show this help text."
826+ ].join("\n");
827+}
828+
829+await main().catch((error) => {
830+ process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`);
831+ process.exitCode = 1;
832+});
+6,
-0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" start "$@"
+92,
-0
1@@ -0,0 +1,92 @@
2+import { D1StatusSnapshotLoader, startStatusApiServer } from "../../apps/status-api/dist/index.js";
3+import { createSqliteD1Database } from "./d1-sqlite.mjs";
4+
5+function parseArgs(argv) {
6+ const options = {
7+ databasePath: null,
8+ host: "127.0.0.1",
9+ port: null
10+ };
11+
12+ for (let index = 0; index < argv.length; index += 1) {
13+ const token = argv[index];
14+
15+ switch (token) {
16+ case "--db":
17+ options.databasePath = readValue(argv, token, index);
18+ index += 1;
19+ break;
20+ case "--host":
21+ options.host = readValue(argv, token, index);
22+ index += 1;
23+ break;
24+ case "--port":
25+ options.port = parsePort(readValue(argv, token, index), token);
26+ index += 1;
27+ break;
28+ default:
29+ throw new Error(`Unknown status-api-local option "${token}".`);
30+ }
31+ }
32+
33+ if (!options.databasePath) {
34+ throw new Error("--db is required.");
35+ }
36+
37+ if (options.port == null) {
38+ throw new Error("--port is required.");
39+ }
40+
41+ return options;
42+}
43+
44+function readValue(tokens, flag, index) {
45+ const value = tokens[index + 1];
46+
47+ if (!value || value.startsWith("--")) {
48+ throw new Error(`Missing value for ${flag}.`);
49+ }
50+
51+ return value;
52+}
53+
54+function parsePort(value, flag) {
55+ const port = Number(value);
56+
57+ if (!Number.isInteger(port) || port < 0 || port > 65_535) {
58+ throw new Error(`Invalid value for ${flag}: "${value}".`);
59+ }
60+
61+ return port;
62+}
63+
64+async function main() {
65+ const options = parseArgs(process.argv.slice(2));
66+ const db = createSqliteD1Database(options.databasePath);
67+ const server = await startStatusApiServer({
68+ host: options.host,
69+ port: options.port,
70+ snapshotLoader: new D1StatusSnapshotLoader(db)
71+ });
72+
73+ const close = async () => {
74+ await server.close();
75+ db.close();
76+ };
77+
78+ const shutdown = (signal) => {
79+ void close().finally(() => {
80+ process.exitCode = 0;
81+ if (signal) {
82+ process.stderr.write(`status-api-local stopped after ${signal}\n`);
83+ }
84+ });
85+ };
86+
87+ process.once("SIGINT", () => shutdown("SIGINT"));
88+ process.once("SIGTERM", () => shutdown("SIGTERM"));
89+
90+ process.stdout.write(`status-api-local listening on ${server.getBaseUrl()}\n`);
91+}
92+
93+await main();
+6,
-0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" stop "$@"
+46,
-0
1@@ -0,0 +1,46 @@
2+import assert from "node:assert/strict";
3+import { execFile } from "node:child_process";
4+import { rm } from "node:fs/promises";
5+import { promisify } from "node:util";
6+import { dirname, join, resolve } from "node:path";
7+import test from "node:test";
8+import { fileURLToPath } from "node:url";
9+
10+const execFileAsync = promisify(execFile);
11+const REPO_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
12+
13+test("smoke harness starts the local stack, reflects queue state, and fails over to standby", async () => {
14+ const stateDir = join(REPO_ROOT, "tmp", `e2e-smoke-test-${Date.now()}`);
15+ let shouldCleanup = true;
16+
17+ try {
18+ const { stdout } = await execFileAsync(
19+ "bash",
20+ ["scripts/smoke/run-e2e.sh", "--json", "--state-dir", stateDir],
21+ {
22+ cwd: REPO_ROOT,
23+ maxBuffer: 10 * 1024 * 1024,
24+ timeout: 120_000
25+ }
26+ );
27+ const result = JSON.parse(stdout);
28+
29+ assert.equal(result.ok, true);
30+ assert.equal(result.initial.controlState.data.holder_id, "smoke-mini");
31+ assert.equal(result.initial.statusSnapshot.data.leaderId, "smoke-mini");
32+ assert.equal(result.drained.drained.statusSnapshot.data.mode, "draining");
33+ assert.equal(result.drained.resumed.statusSnapshot.data.mode, "running");
34+ assert.equal(result.task.read.data.status, "queued");
35+ assert.equal(result.afterQueue.data.queueDepth, 1);
36+ assert.equal(result.failover.controlState.data.holder_id, "smoke-mac");
37+ assert.equal(result.failover.statusSnapshot.data.leaderId, "smoke-mac");
38+ assert.equal(result.failover.conductors.standby.role, "leader");
39+ } catch (error) {
40+ shouldCleanup = false;
41+ throw error;
42+ } finally {
43+ if (shouldCleanup) {
44+ await rm(stateDir, { force: true, recursive: true });
45+ }
46+ }
47+});