baa-conductor

git clone 

commit
bee9421
parent
288c748
author
im_wower
date
2026-03-22 02:13:03 +0800 CST
feat(smoke): add e2e smoke harness
11 files changed,  +1380, -8
M coordination/tasks/T-024-e2e-smoke.md
+33, -8
 1@@ -1,10 +1,10 @@
 2 ---
 3 task_id: T-024
 4 title: 端到端 smoke harness
 5-status: todo
 6+status: review
 7 branch: feat/T-024-e2e-smoke
 8 repo: /Users/george/code/baa-conductor
 9-base_ref: main
10+base_ref: main@288c748
11 depends_on:
12   - T-019
13   - T-020
14@@ -13,7 +13,7 @@ depends_on:
15 write_scope:
16   - tests/e2e/**
17   - scripts/smoke/**
18-updated_at: 2026-03-22
19+updated_at: 2026-03-22T02:12:15+0800
20 ---
21 
22 # T-024 端到端 smoke harness
23@@ -66,20 +66,45 @@ updated_at: 2026-03-22
24 
25 ## files_changed
26 
27-- 待填写
28+- coordination/tasks/T-024-e2e-smoke.md
29+- scripts/smoke/README.md
30+- scripts/smoke/check-stack.sh
31+- scripts/smoke/control-api-local.mjs
32+- scripts/smoke/d1-sqlite.mjs
33+- scripts/smoke/run-e2e.sh
34+- scripts/smoke/stack-cli.mjs
35+- scripts/smoke/start-stack.sh
36+- scripts/smoke/status-api-local.mjs
37+- scripts/smoke/stop-stack.sh
38+- tests/e2e/smoke.test.mjs
39 
40 ## commands_run
41 
42-- 待填写
43+- npx --yes pnpm install
44+- node --check scripts/smoke/d1-sqlite.mjs
45+- node --check scripts/smoke/control-api-local.mjs
46+- node --check scripts/smoke/status-api-local.mjs
47+- node --check scripts/smoke/stack-cli.mjs
48+- bash -n scripts/smoke/*.sh
49+- bash scripts/smoke/run-e2e.sh --json
50+- node --test tests/e2e/smoke.test.mjs
51+- bash scripts/smoke/start-stack.sh --json --state-dir /Users/george/code/baa-conductor-T024-e2e-smoke/tmp/manual-smoke-check
52+- bash scripts/smoke/check-stack.sh --json --state-dir /Users/george/code/baa-conductor-T024-e2e-smoke/tmp/manual-smoke-check --expected-leader smoke-mini
53+- bash scripts/smoke/stop-stack.sh --json --state-dir /Users/george/code/baa-conductor-T024-e2e-smoke/tmp/manual-smoke-check
54+- git diff --check
55 
56 ## result
57 
58-- 待填写
59+- 新增本地 smoke stack CLI 和 shell 包装脚本,可一键或分步启动、检查、停止 control-api、主备 conductor、status-api。
60+- 新增基于 SQLite 的 smoke-only D1 适配层,用于本地共享 control-api 和 status-api 的最小 durable 视图。
61+- 新增端到端 smoke 测试,覆盖探活、`/v1/system/state`、`/v1/status`、queued task 可见性和主备 failover。
62 
63 ## risks
64 
65-- 待填写
66+- `scripts/smoke/d1-sqlite.mjs` 是本地 smoke 适配层,不等价于真实 Cloudflare D1 / Wrangler 运行时语义。
67+- 当前 smoke 聚焦最小读取面和 lease failover,不覆盖 task claim、worker 执行和真实线上部署链路。
68 
69 ## next_handoff
70 
71-- 待填写
72+- 后续任务可直接复用 `bash scripts/smoke/run-e2e.sh --json` 作为本地回归入口。
73+- T-025 可复用 `start-stack.sh` / `check-stack.sh` / `stop-stack.sh` 和 `stack-cli.mjs` 的状态目录结构继续扩展 failover rehearsal。
A scripts/smoke/README.md
+25, -0
 1@@ -0,0 +1,25 @@
 2+# Smoke Harness
 3+
 4+最小本地 smoke 流程:
 5+
 6+```bash
 7+bash scripts/smoke/run-e2e.sh --json
 8+```
 9+
10+如果要分步执行:
11+
12+```bash
13+bash scripts/smoke/start-stack.sh --json
14+bash scripts/smoke/check-stack.sh --state-dir <tmp/smoke-...> --expected-leader smoke-mini --json
15+bash scripts/smoke/stop-stack.sh --state-dir <tmp/smoke-...> --json
16+```
17+
18+`run-e2e.sh` 会完成这些动作:
19+
20+- 构建 `control-api-worker` 和 `status-api` 的本地运行产物
21+- 启动本地 `control-api`、主 conductor、备 conductor、`status-api`
22+- 验证 `healthz` / `readyz` / `rolez` / `/v1/system/state` / `/v1/status`
23+- 创建一个 queued task,确认 `status-api` 的 `queueDepth` 能读到
24+- 执行一次最小主备切换,确认 lease 从 `smoke-mini` 漂移到 `smoke-mac`
25+
26+所有临时状态、数据库和日志都会落到 `tmp/smoke-*` 目录。
A scripts/smoke/check-stack.sh
+6, -0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" check "$@"
A scripts/smoke/control-api-local.mjs
+164, -0
  1@@ -0,0 +1,164 @@
  2+import { createServer } from "node:http";
  3+import controlApiWorker from "../../apps/control-api-worker/dist/index.js";
  4+import { initializeSqliteD1Database } from "./d1-sqlite.mjs";
  5+
  6+function parseArgs(argv) {
  7+  const options = {
  8+    authRequired: false,
  9+    databasePath: null,
 10+    host: "127.0.0.1",
 11+    port: null
 12+  };
 13+
 14+  for (let index = 0; index < argv.length; index += 1) {
 15+    const token = argv[index];
 16+
 17+    switch (token) {
 18+      case "--auth-required":
 19+        options.authRequired = true;
 20+        break;
 21+      case "--db":
 22+        options.databasePath = readValue(argv, token, index);
 23+        index += 1;
 24+        break;
 25+      case "--host":
 26+        options.host = readValue(argv, token, index);
 27+        index += 1;
 28+        break;
 29+      case "--port":
 30+        options.port = parsePort(readValue(argv, token, index), token);
 31+        index += 1;
 32+        break;
 33+      default:
 34+        throw new Error(`Unknown control-api-local option "${token}".`);
 35+    }
 36+  }
 37+
 38+  if (!options.databasePath) {
 39+    throw new Error("--db is required.");
 40+  }
 41+
 42+  if (options.port == null) {
 43+    throw new Error("--port is required.");
 44+  }
 45+
 46+  return options;
 47+}
 48+
 49+function readValue(tokens, flag, index) {
 50+  const value = tokens[index + 1];
 51+
 52+  if (!value || value.startsWith("--")) {
 53+    throw new Error(`Missing value for ${flag}.`);
 54+  }
 55+
 56+  return value;
 57+}
 58+
 59+function parsePort(value, flag) {
 60+  const port = Number(value);
 61+
 62+  if (!Number.isInteger(port) || port < 0 || port > 65_535) {
 63+    throw new Error(`Invalid value for ${flag}: "${value}".`);
 64+  }
 65+
 66+  return port;
 67+}
 68+
 69+async function readRequestBody(request) {
 70+  const chunks = [];
 71+
 72+  for await (const chunk of request) {
 73+    chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
 74+  }
 75+
 76+  return Buffer.concat(chunks);
 77+}
 78+
 79+async function main() {
 80+  const options = parseArgs(process.argv.slice(2));
 81+  const db = initializeSqliteD1Database({
 82+    databasePath: options.databasePath
 83+  });
 84+  const env = {
 85+    CONTROL_API_AUTH_REQUIRED: options.authRequired ? "true" : "false",
 86+    CONTROL_API_VERSION: "smoke-local",
 87+    CONTROL_DB: db
 88+  };
 89+  const baseUrl = `http://${options.host}:${options.port}`;
 90+  const server = createServer((request, response) => {
 91+    void handleRequest(request, response, baseUrl, env);
 92+  });
 93+
 94+  const close = async () => {
 95+    await new Promise((resolve, reject) => {
 96+      server.close((error) => {
 97+        if (error) {
 98+          reject(error);
 99+          return;
100+        }
101+
102+        resolve();
103+      });
104+    });
105+    db.close();
106+  };
107+
108+  const shutdown = (signal) => {
109+    void close().finally(() => {
110+      process.exitCode = 0;
111+      if (signal) {
112+        process.stderr.write(`control-api-local stopped after ${signal}\n`);
113+      }
114+    });
115+  };
116+
117+  process.once("SIGINT", () => shutdown("SIGINT"));
118+  process.once("SIGTERM", () => shutdown("SIGTERM"));
119+
120+  await new Promise((resolve) => {
121+    server.listen(options.port, options.host, resolve);
122+  });
123+
124+  process.stdout.write(`control-api-local listening on ${baseUrl}\n`);
125+}
126+
127+async function handleRequest(request, response, baseUrl, env) {
128+  try {
129+    const body = await readRequestBody(request);
130+    const url = new URL(request.url ?? "/", baseUrl);
131+    const workerRequest = new Request(url, {
132+      body: request.method === "GET" || request.method === "HEAD" ? undefined : body,
133+      headers: new Headers(request.headers),
134+      method: request.method ?? "GET"
135+    });
136+    const workerResponse = await controlApiWorker.fetch(workerRequest, env, {
137+      passThroughOnException() {},
138+      waitUntil() {}
139+    });
140+
141+    response.statusCode = workerResponse.status;
142+
143+    for (const [name, value] of workerResponse.headers.entries()) {
144+      response.setHeader(name, value);
145+    }
146+
147+    response.end(Buffer.from(await workerResponse.arrayBuffer()));
148+  } catch (error) {
149+    response.statusCode = 500;
150+    response.setHeader("content-type", "application/json; charset=utf-8");
151+    response.end(
152+      `${JSON.stringify(
153+        {
154+          ok: false,
155+          error: "control_api_local_failure",
156+          message: error instanceof Error ? error.message : String(error)
157+        },
158+        null,
159+        2
160+      )}\n`
161+    );
162+  }
163+}
164+
165+await main();
A scripts/smoke/d1-sqlite.mjs
+165, -0
  1@@ -0,0 +1,165 @@
  2+import { mkdirSync, readFileSync } from "node:fs";
  3+import { dirname, resolve } from "node:path";
  4+import { DatabaseSync } from "node:sqlite";
  5+import { fileURLToPath } from "node:url";
  6+
  7+const DEFAULT_BUSY_TIMEOUT_MS = 5_000;
  8+const DEFAULT_AUTOMATION_MODE = "running";
  9+
 10+function normalizeRow(row) {
 11+  if (row == null || typeof row !== "object" || Array.isArray(row)) {
 12+    return row ?? null;
 13+  }
 14+
 15+  return { ...row };
 16+}
 17+
 18+function toLastRowId(value) {
 19+  if (typeof value === "bigint") {
 20+    const normalized = Number(value);
 21+    return Number.isSafeInteger(normalized) ? normalized : undefined;
 22+  }
 23+
 24+  return typeof value === "number" ? value : undefined;
 25+}
 26+
 27+class SqliteD1PreparedStatement {
 28+  constructor(database, query, values = []) {
 29+    this.database = database;
 30+    this.query = query;
 31+    this.values = [...values];
 32+  }
 33+
 34+  bind(...values) {
 35+    return new SqliteD1PreparedStatement(this.database, this.query, values);
 36+  }
 37+
 38+  async all() {
 39+    const rows = this.#statement().all(...this.values).map(normalizeRow);
 40+
 41+    return {
 42+      meta: {
 43+        rows_read: rows.length
 44+      },
 45+      results: rows,
 46+      success: true
 47+    };
 48+  }
 49+
 50+  async first(columnName) {
 51+    const row = normalizeRow(this.#statement().get(...this.values));
 52+
 53+    if (row == null) {
 54+      return null;
 55+    }
 56+
 57+    if (columnName == null) {
 58+      return row;
 59+    }
 60+
 61+    return Object.prototype.hasOwnProperty.call(row, columnName) ? row[columnName] ?? null : null;
 62+  }
 63+
 64+  async raw(options = {}) {
 65+    const statement = this.#statement();
 66+    const columns = statement.columns().map((column) => column.name);
 67+    const rows = statement.all(...this.values).map((row) => columns.map((columnName) => row[columnName] ?? null));
 68+
 69+    return options.columnNames ? [columns, ...rows] : rows;
 70+  }
 71+
 72+  async run() {
 73+    const result = this.#statement().run(...this.values);
 74+
 75+    return {
 76+      meta: {
 77+        changes: result.changes,
 78+        last_row_id: toLastRowId(result.lastInsertRowid)
 79+      },
 80+      success: true
 81+    };
 82+  }
 83+
 84+  #statement() {
 85+    return this.database.prepare(this.query);
 86+  }
 87+}
 88+
 89+export class SqliteD1Database {
 90+  constructor(databasePath) {
 91+    const resolvedPath = resolve(databasePath);
 92+    mkdirSync(dirname(resolvedPath), { recursive: true });
 93+    this.path = resolvedPath;
 94+    this.database = new DatabaseSync(this.path);
 95+    this.database.exec("PRAGMA journal_mode = WAL;");
 96+    this.database.exec("PRAGMA synchronous = NORMAL;");
 97+    this.database.exec("PRAGMA foreign_keys = ON;");
 98+    this.database.exec(`PRAGMA busy_timeout = ${DEFAULT_BUSY_TIMEOUT_MS};`);
 99+  }
100+
101+  async batch(statements) {
102+    const results = [];
103+
104+    this.database.exec("BEGIN IMMEDIATE;");
105+
106+    try {
107+      for (const statement of statements) {
108+        if (!(statement instanceof SqliteD1PreparedStatement)) {
109+          throw new TypeError("SqliteD1Database.batch only accepts statements created by this adapter.");
110+        }
111+
112+        results.push(await statement.run());
113+      }
114+
115+      this.database.exec("COMMIT;");
116+      return results;
117+    } catch (error) {
118+      this.database.exec("ROLLBACK;");
119+      throw error;
120+    }
121+  }
122+
123+  close() {
124+    this.database.close();
125+  }
126+
127+  async exec(query) {
128+    this.database.exec(query);
129+    return {};
130+  }
131+
132+  prepare(query) {
133+    return new SqliteD1PreparedStatement(this.database, query);
134+  }
135+}
136+
137+export function createSqliteD1Database(databasePath) {
138+  return new SqliteD1Database(databasePath);
139+}
140+
141+export function getDefaultSmokeSchemaPath() {
142+  return resolve(dirname(fileURLToPath(import.meta.url)), "..", "..", "ops", "sql", "schema.sql");
143+}
144+
145+export function initializeSqliteD1Database({
146+  databasePath,
147+  schemaPath = getDefaultSmokeSchemaPath(),
148+  automationMode = DEFAULT_AUTOMATION_MODE
149+}) {
150+  const db = createSqliteD1Database(databasePath);
151+  const schema = readFileSync(schemaPath, "utf8");
152+  const nowUnixSeconds = Math.floor(Date.now() / 1_000);
153+
154+  db.database.exec(schema);
155+  db.database
156+    .prepare(
157+      `
158+        INSERT INTO system_state (state_key, value_json, updated_at)
159+        VALUES (?, ?, ?)
160+        ON CONFLICT(state_key) DO NOTHING
161+      `
162+    )
163+    .run("automation", JSON.stringify({ mode: automationMode }), nowUnixSeconds);
164+
165+  return db;
166+}
A scripts/smoke/run-e2e.sh
+6, -0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" run "$@"
A scripts/smoke/stack-cli.mjs
+831, -0
  1@@ -0,0 +1,831 @@
  2+import { spawn } from "node:child_process";
  3+import { appendFileSync, closeSync, openSync } from "node:fs";
  4+import { mkdir, mkdtemp, readFile, writeFile } from "node:fs/promises";
  5+import { createServer } from "node:net";
  6+import { dirname, join, resolve } from "node:path";
  7+import { setTimeout as delay } from "node:timers/promises";
  8+import { fileURLToPath } from "node:url";
  9+
 10+const REPO_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
 11+const DEFAULT_HOST = "127.0.0.1";
 12+const DEFAULT_LEASE_RENEW_INTERVAL_MS = 250;
 13+const DEFAULT_HEARTBEAT_INTERVAL_MS = 250;
 14+const DEFAULT_LEASE_TTL_SEC = 2;
 15+const HTTP_TIMEOUT_MS = 2_000;
 16+const START_TIMEOUT_MS = 15_000;
 17+const STOP_TIMEOUT_MS = 5_000;
 18+const POLL_INTERVAL_MS = 200;
 19+const PRIMARY_NODE_ID = "smoke-mini";
 20+const STANDBY_NODE_ID = "smoke-mac";
 21+
 22+async function main() {
 23+  const options = parseCli(process.argv.slice(2));
 24+
 25+  if (options.help) {
 26+    writeResult(options, renderHelp());
 27+    return;
 28+  }
 29+
 30+  switch (options.command) {
 31+    case "start": {
 32+      const state = await startStack(options);
 33+      writeResult(options, {
 34+        ok: true,
 35+        stateDir: state.stateDir,
 36+        services: summarizeStateServices(state)
 37+      });
 38+      return;
 39+    }
 40+
 41+    case "check": {
 42+      const state = await loadState(options);
 43+      const result = await captureLiveStack(state, {
 44+        expectedLeader: options.expectedLeader
 45+      });
 46+
 47+      writeResult(options, {
 48+        ok: true,
 49+        stateDir: state.stateDir,
 50+        ...result
 51+      });
 52+      return;
 53+    }
 54+
 55+    case "stop": {
 56+      const state = await loadState(options);
 57+      await stopStack(state);
 58+      writeResult(options, {
 59+        ok: true,
 60+        stateDir: state.stateDir,
 61+        stopped: true
 62+      });
 63+      return;
 64+    }
 65+
 66+    case "run": {
 67+      const result = await runSmokeFlow(options);
 68+      writeResult(options, result);
 69+      return;
 70+    }
 71+  }
 72+}
 73+
 74+function parseCli(argv) {
 75+  let command = "run";
 76+  let index = 0;
 77+
 78+  if (argv[0] && !argv[0].startsWith("--")) {
 79+    command = argv[0];
 80+    index = 1;
 81+  }
 82+
 83+  const options = {
 84+    command,
 85+    expectedLeader: null,
 86+    help: false,
 87+    json: false,
 88+    skipBuild: false,
 89+    stateDir: null
 90+  };
 91+
 92+  for (; index < argv.length; index += 1) {
 93+    const token = argv[index];
 94+
 95+    switch (token) {
 96+      case "--expected-leader":
 97+        options.expectedLeader = readOptionValue(argv, token, index);
 98+        index += 1;
 99+        break;
100+      case "--help":
101+      case "-h":
102+        options.help = true;
103+        break;
104+      case "--json":
105+        options.json = true;
106+        break;
107+      case "--skip-build":
108+        options.skipBuild = true;
109+        break;
110+      case "--state-dir":
111+        options.stateDir = resolve(readOptionValue(argv, token, index));
112+        index += 1;
113+        break;
114+      default:
115+        throw new Error(`Unknown smoke command option "${token}".`);
116+    }
117+  }
118+
119+  if (!["start", "check", "run", "stop"].includes(options.command)) {
120+    throw new Error(`Unknown smoke command "${options.command}".`);
121+  }
122+
123+  return options;
124+}
125+
126+function readOptionValue(argv, flag, index) {
127+  const value = argv[index + 1];
128+
129+  if (!value || value.startsWith("--")) {
130+    throw new Error(`Missing value for ${flag}.`);
131+  }
132+
133+  return value;
134+}
135+
136+async function runSmokeFlow(options) {
137+  const state = await startStack(options);
138+
139+  try {
140+    const initial = await captureLiveStack(state, {
141+      expectedLeader: PRIMARY_NODE_ID
142+    });
143+
144+    const drained = await verifyDrainAndResume(state);
145+    const task = await createSmokeTask(state.services.controlApi.baseUrl);
146+    const taskRecord = await readSmokeTask(state.services.controlApi.baseUrl, task.taskId);
147+    const afterQueue = await waitForQueueDepth(state.services.statusApi.baseUrl, 1);
148+
149+    await stopService(state, "primary");
150+
151+    const failover = await waitForFailover(state);
152+
153+    return {
154+      ok: true,
155+      stateDir: state.stateDir,
156+      initial,
157+      drained,
158+      task: {
159+        create: task.response,
160+        read: taskRecord,
161+        taskId: task.taskId
162+      },
163+      afterQueue,
164+      failover
165+    };
166+  } finally {
167+    await stopStack(state);
168+  }
169+}
170+
171+async function startStack(options) {
172+  const stateDir = options.stateDir ?? (await createStateDir());
173+  const state = {
174+    stateDir,
175+    stateFilePath: getStateFilePath(stateDir),
176+    dbPath: join(stateDir, "data", "control-plane.sqlite"),
177+    services: {}
178+  };
179+
180+  await mkdir(join(stateDir, "logs"), { recursive: true });
181+  await mkdir(join(stateDir, "runtime"), { recursive: true });
182+
183+  try {
184+    if (!options.skipBuild) {
185+      await buildDependencies();
186+    }
187+
188+    const ports = {
189+      controlApi: await allocatePort(),
190+      primary: await allocatePort(),
191+      standby: await allocatePort(),
192+      statusApi: await allocatePort()
193+    };
194+
195+    state.services.controlApi = await spawnService({
196+      args: [
197+        "scripts/smoke/control-api-local.mjs",
198+        "--db",
199+        state.dbPath,
200+        "--host",
201+        DEFAULT_HOST,
202+        "--port",
203+        String(ports.controlApi)
204+      ],
205+      baseUrl: `http://${DEFAULT_HOST}:${ports.controlApi}`,
206+      command: "node",
207+      logPath: join(stateDir, "logs", "control-api.log"),
208+      name: "control-api"
209+    });
210+    await writeState(state);
211+
212+    await waitForCondition(
213+      async () => {
214+        const envelope = await getJson(`${state.services.controlApi.baseUrl}/v1/system/state`);
215+        return envelope.ok ? envelope : null;
216+      },
217+      { description: "control-api readiness" }
218+    );
219+
220+    state.services.primary = await spawnConductorService(state, {
221+      host: "mini",
222+      localApiPort: ports.primary,
223+      name: "conductor-primary",
224+      nodeId: PRIMARY_NODE_ID,
225+      role: "primary"
226+    });
227+    await writeState(state);
228+
229+    state.services.standby = await spawnConductorService(state, {
230+      host: "mac",
231+      localApiPort: ports.standby,
232+      name: "conductor-standby",
233+      nodeId: STANDBY_NODE_ID,
234+      role: "standby"
235+    });
236+    await writeState(state);
237+
238+    state.services.statusApi = await spawnService({
239+      args: [
240+        "scripts/smoke/status-api-local.mjs",
241+        "--db",
242+        state.dbPath,
243+        "--host",
244+        DEFAULT_HOST,
245+        "--port",
246+        String(ports.statusApi)
247+      ],
248+      baseUrl: `http://${DEFAULT_HOST}:${ports.statusApi}`,
249+      command: "node",
250+      logPath: join(stateDir, "logs", "status-api.log"),
251+      name: "status-api"
252+    });
253+    await writeState(state);
254+
255+    await waitForCondition(
256+      async () => {
257+        const text = await getText(`${state.services.statusApi.baseUrl}/healthz`);
258+        return text === "ok" ? text : null;
259+      },
260+      { description: "status-api readiness" }
261+    );
262+
263+    await captureLiveStack(state, {
264+      expectedLeader: PRIMARY_NODE_ID
265+    });
266+
267+    return state;
268+  } catch (error) {
269+    await stopStack(state);
270+    throw error;
271+  }
272+}
273+
274+async function spawnConductorService(state, options) {
275+  const runtimeRoot = join(state.stateDir, "runtime", options.role);
276+
277+  for (const directoryName of ["logs", "runs", "state", "tmp", "worktrees"]) {
278+    await mkdir(join(runtimeRoot, directoryName), { recursive: true });
279+  }
280+
281+  const baseUrl = `http://${DEFAULT_HOST}:${options.localApiPort}`;
282+  const service = await spawnService({
283+    args: [
284+      "--experimental-strip-types",
285+      "apps/conductor-daemon/src/index.ts",
286+      "start",
287+      "--node-id",
288+      options.nodeId,
289+      "--host",
290+      options.host,
291+      "--role",
292+      options.role,
293+      "--control-api-base",
294+      state.services.controlApi.baseUrl,
295+      "--local-api",
296+      baseUrl,
297+      "--shared-token",
298+      "smoke-shared-token",
299+      "--heartbeat-interval-ms",
300+      String(DEFAULT_HEARTBEAT_INTERVAL_MS),
301+      "--lease-renew-interval-ms",
302+      String(DEFAULT_LEASE_RENEW_INTERVAL_MS),
303+      "--lease-ttl-sec",
304+      String(DEFAULT_LEASE_TTL_SEC),
305+      "--logs-dir",
306+      join(runtimeRoot, "logs"),
307+      "--runs-dir",
308+      join(runtimeRoot, "runs"),
309+      "--state-dir",
310+      join(runtimeRoot, "state"),
311+      "--tmp-dir",
312+      join(runtimeRoot, "tmp"),
313+      "--worktrees-dir",
314+      join(runtimeRoot, "worktrees")
315+    ],
316+    baseUrl,
317+    command: "node",
318+    logPath: join(state.stateDir, "logs", `${options.name}.log`),
319+    metadata: {
320+      host: options.host,
321+      nodeId: options.nodeId,
322+      role: options.role
323+    },
324+    name: options.name
325+  });
326+
327+  await waitForCondition(
328+    async () => {
329+      const text = await getText(`${baseUrl}/healthz`);
330+      return text === "ok" ? text : null;
331+    },
332+    { description: `${options.name} healthz` }
333+  );
334+
335+  return service;
336+}
337+
338+async function spawnService({ args, baseUrl, command, logPath, metadata = {}, name }) {
339+  appendFileSync(logPath, `\n$ ${command} ${args.join(" ")}\n`);
340+  const logFd = openSync(logPath, "a");
341+  const child = spawn(command, args, {
342+    cwd: REPO_ROOT,
343+    env: process.env,
344+    stdio: ["ignore", logFd, logFd]
345+  });
346+
347+  closeSync(logFd);
348+
349+  if (child.pid == null) {
350+    throw new Error(`Failed to start ${name}.`);
351+  }
352+
353+  return {
354+    ...metadata,
355+    baseUrl,
356+    logPath,
357+    name,
358+    pid: child.pid
359+  };
360+}
361+
362+async function captureLiveStack(state, { expectedLeader }) {
363+  const [controlState, primary, standby, statusSnapshot] = await Promise.all([
364+    readControlState(state.services.controlApi.baseUrl),
365+    readConductorSurface(state.services.primary.baseUrl),
366+    readConductorSurface(state.services.standby.baseUrl),
367+    readStatusSnapshot(state.services.statusApi.baseUrl)
368+  ]);
369+
370+  if (expectedLeader) {
371+    if (controlState.data.holder_id !== expectedLeader) {
372+      throw new Error(
373+        `Expected control-api leader to be ${expectedLeader}, received ${controlState.data.holder_id ?? "null"}.`
374+      );
375+    }
376+
377+    if (statusSnapshot.data.leaderId !== expectedLeader) {
378+      throw new Error(
379+        `Expected status-api leader to be ${expectedLeader}, received ${statusSnapshot.data.leaderId ?? "null"}.`
380+      );
381+    }
382+  }
383+
384+  if (primary.role !== "leader") {
385+    throw new Error(`Expected primary conductor rolez to be leader, received ${primary.role}.`);
386+  }
387+
388+  if (standby.role !== "standby") {
389+    throw new Error(`Expected standby conductor rolez to be standby, received ${standby.role}.`);
390+  }
391+
392+  return {
393+    controlState,
394+    conductors: {
395+      primary,
396+      standby
397+    },
398+    statusSnapshot
399+  };
400+}
401+
402+async function verifyDrainAndResume(state) {
403+  const drainedResponse = await postJson(`${state.services.controlApi.baseUrl}/v1/system/drain`, {
404+    reason: "verify shared state propagation",
405+    requested_by: "smoke-harness"
406+  });
407+
408+  if (!drainedResponse.ok) {
409+    throw new Error(`Expected drain request to succeed: ${JSON.stringify(drainedResponse)}`);
410+  }
411+
412+  const drainedStatus = await waitForCondition(
413+    async () => {
414+      const snapshot = await readStatusSnapshot(state.services.statusApi.baseUrl);
415+      return snapshot.data.mode === "draining" ? snapshot : null;
416+    },
417+    { description: "status-api draining mode" }
418+  );
419+  const drainedControlState = await readControlState(state.services.controlApi.baseUrl);
420+
421+  const resumedResponse = await postJson(`${state.services.controlApi.baseUrl}/v1/system/resume`, {
422+    reason: "continue smoke flow",
423+    requested_by: "smoke-harness"
424+  });
425+
426+  if (!resumedResponse.ok) {
427+    throw new Error(`Expected resume request to succeed: ${JSON.stringify(resumedResponse)}`);
428+  }
429+
430+  const resumedStatus = await waitForCondition(
431+    async () => {
432+      const snapshot = await readStatusSnapshot(state.services.statusApi.baseUrl);
433+      return snapshot.data.mode === "running" ? snapshot : null;
434+    },
435+    { description: "status-api running mode" }
436+  );
437+
438+  return {
439+    drained: {
440+      controlState: drainedControlState,
441+      statusSnapshot: drainedStatus
442+    },
443+    resumed: {
444+      controlState: await readControlState(state.services.controlApi.baseUrl),
445+      statusSnapshot: resumedStatus
446+    }
447+  };
448+}
449+
450+async function createSmokeTask(controlApiBaseUrl) {
451+  const response = await postJson(`${controlApiBaseUrl}/v1/tasks`, {
452+    acceptance: ["status-api queue depth reflects the queued smoke task"],
453+    goal: "Verify the local smoke harness can seed a queued task visible to status-api.",
454+    repo: REPO_ROOT,
455+    task_type: "smoke_harness",
456+    title: "Smoke harness queue probe"
457+  });
458+
459+  if (!response.ok) {
460+    throw new Error(`Expected task creation to succeed: ${JSON.stringify(response)}`);
461+  }
462+
463+  return {
464+    response,
465+    taskId: response.data.task_id
466+  };
467+}
468+
469+async function readSmokeTask(controlApiBaseUrl, taskId) {
470+  const response = await getJson(`${controlApiBaseUrl}/v1/tasks/${taskId}`);
471+
472+  if (!response.ok) {
473+    throw new Error(`Expected task read to succeed: ${JSON.stringify(response)}`);
474+  }
475+
476+  if (response.data.task_id !== taskId) {
477+    throw new Error(`Expected task read to return ${taskId}, received ${response.data.task_id}.`);
478+  }
479+
480+  if (response.data.status !== "queued") {
481+    throw new Error(`Expected task ${taskId} to be queued, received ${response.data.status}.`);
482+  }
483+
484+  return response;
485+}
486+
487+async function waitForQueueDepth(statusApiBaseUrl, minimumQueueDepth) {
488+  return waitForCondition(
489+    async () => {
490+      const snapshot = await readStatusSnapshot(statusApiBaseUrl);
491+      return snapshot.data.queueDepth >= minimumQueueDepth ? snapshot : null;
492+    },
493+    { description: `status-api queue depth >= ${minimumQueueDepth}` }
494+  );
495+}
496+
497+async function waitForFailover(state) {
498+  const standbyBaseUrl = state.services.standby.baseUrl;
499+  const controlApiBaseUrl = state.services.controlApi.baseUrl;
500+  const statusApiBaseUrl = state.services.statusApi.baseUrl;
501+
502+  return waitForCondition(
503+    async () => {
504+      const [controlState, standby, statusSnapshot] = await Promise.all([
505+        readControlState(controlApiBaseUrl),
506+        readConductorSurface(standbyBaseUrl),
507+        readStatusSnapshot(statusApiBaseUrl)
508+      ]);
509+
510+      if (
511+        controlState.data.holder_id !== STANDBY_NODE_ID ||
512+        statusSnapshot.data.leaderId !== STANDBY_NODE_ID ||
513+        standby.role !== "leader" ||
514+        standby.readyz !== "ready"
515+      ) {
516+        return null;
517+      }
518+
519+      return {
520+        controlState,
521+        conductors: {
522+          standby
523+        },
524+        statusSnapshot
525+      };
526+    },
527+    {
528+      description: "standby failover",
529+      timeoutMs: START_TIMEOUT_MS
530+    }
531+  );
532+}
533+
534+async function stopStack(state) {
535+  await stopService(state, "primary");
536+  await stopService(state, "standby");
537+  await stopService(state, "statusApi");
538+  await stopService(state, "controlApi");
539+  await writeState(state);
540+}
541+
542+async function stopService(state, key) {
543+  const service = state.services[key];
544+
545+  if (!service || service.stoppedAt) {
546+    return;
547+  }
548+
549+  if (!isProcessRunning(service.pid)) {
550+    service.stoppedAt = new Date().toISOString();
551+    return;
552+  }
553+
554+  process.kill(service.pid, "SIGTERM");
555+
556+  try {
557+    await waitForCondition(
558+      async () => (!isProcessRunning(service.pid) ? true : null),
559+      {
560+        description: `${service.name} shutdown`,
561+        intervalMs: 100,
562+        timeoutMs: STOP_TIMEOUT_MS
563+      }
564+    );
565+  } catch {
566+    if (isProcessRunning(service.pid)) {
567+      process.kill(service.pid, "SIGKILL");
568+      await waitForCondition(
569+        async () => (!isProcessRunning(service.pid) ? true : null),
570+        {
571+          description: `${service.name} forced shutdown`,
572+          intervalMs: 100,
573+          timeoutMs: STOP_TIMEOUT_MS
574+        }
575+      );
576+    }
577+  }
578+
579+  service.stoppedAt = new Date().toISOString();
580+}
581+
582+function isProcessRunning(pid) {
583+  if (pid == null) {
584+    return false;
585+  }
586+
587+  try {
588+    process.kill(pid, 0);
589+    return true;
590+  } catch (error) {
591+    return !(error instanceof Error) || !("code" in error) || error.code !== "ESRCH";
592+  }
593+}
594+
595+async function readControlState(controlApiBaseUrl) {
596+  const response = await getJson(`${controlApiBaseUrl}/v1/system/state`);
597+
598+  if (!response.ok) {
599+    throw new Error(`Expected control-api system state to succeed: ${JSON.stringify(response)}`);
600+  }
601+
602+  return response;
603+}
604+
605+async function readStatusSnapshot(statusApiBaseUrl) {
606+  const response = await getJson(`${statusApiBaseUrl}/v1/status`);
607+
608+  if (!response.ok) {
609+    throw new Error(`Expected status-api snapshot to succeed: ${JSON.stringify(response)}`);
610+  }
611+
612+  return response;
613+}
614+
615+async function readConductorSurface(conductorBaseUrl) {
616+  const [healthz, readyz, role, runtime] = await Promise.all([
617+    getText(`${conductorBaseUrl}/healthz`),
618+    getText(`${conductorBaseUrl}/readyz`),
619+    getText(`${conductorBaseUrl}/rolez`),
620+    getJson(`${conductorBaseUrl}/v1/runtime`)
621+  ]);
622+
623+  if (!runtime.ok) {
624+    throw new Error(`Expected conductor runtime probe to succeed: ${JSON.stringify(runtime)}`);
625+  }
626+
627+  return {
628+    healthz,
629+    readyz,
630+    role,
631+    runtime
632+  };
633+}
634+
635+async function buildDependencies() {
636+  await runCommand("npx", ["--yes", "pnpm", "--filter", "@baa-conductor/control-api-worker", "build"]);
637+  await runCommand("npx", ["--yes", "pnpm", "--filter", "@baa-conductor/status-api", "build"]);
638+}
639+
640+async function runCommand(command, args) {
641+  await new Promise((resolvePromise, reject) => {
642+    const child = spawn(command, args, {
643+      cwd: REPO_ROOT,
644+      env: process.env,
645+      stdio: ["ignore", "pipe", "pipe"]
646+    });
647+    let output = "";
648+
649+    child.stdout?.on("data", (chunk) => {
650+      output += chunk.toString();
651+    });
652+    child.stderr?.on("data", (chunk) => {
653+      output += chunk.toString();
654+    });
655+    child.on("error", reject);
656+    child.on("close", (code) => {
657+      if (code === 0) {
658+        resolvePromise();
659+        return;
660+      }
661+
662+      reject(new Error(`Command failed: ${command} ${args.join(" ")}\n${output}`));
663+    });
664+  });
665+}
666+
667+async function waitForCondition(
668+  check,
669+  {
670+    description,
671+    intervalMs = POLL_INTERVAL_MS,
672+    timeoutMs = START_TIMEOUT_MS
673+  }
674+) {
675+  const deadline = Date.now() + timeoutMs;
676+  let lastError = null;
677+
678+  while (Date.now() <= deadline) {
679+    try {
680+      const result = await check();
681+
682+      if (result) {
683+        return result;
684+      }
685+    } catch (error) {
686+      lastError = error;
687+    }
688+
689+    await delay(intervalMs);
690+  }
691+
692+  const suffix =
693+    lastError == null
694+      ? ""
695+      : `: ${lastError instanceof Error ? lastError.message : String(lastError)}`;
696+  throw new Error(`Timed out waiting for ${description}${suffix}`);
697+}
698+
699+async function getJson(url, init) {
700+  const response = await fetch(url, {
701+    ...init,
702+    headers: {
703+      "content-type": "application/json",
704+      ...(init?.headers ?? {})
705+    },
706+    signal: AbortSignal.timeout(HTTP_TIMEOUT_MS)
707+  });
708+  const text = await response.text();
709+
710+  try {
711+    return JSON.parse(text);
712+  } catch {
713+    throw new Error(`Expected JSON from ${url}, received status ${response.status}: ${text}`);
714+  }
715+}
716+
717+async function getText(url) {
718+  const response = await fetch(url, {
719+    signal: AbortSignal.timeout(HTTP_TIMEOUT_MS)
720+  });
721+  const text = (await response.text()).trim();
722+
723+  if (!response.ok) {
724+    throw new Error(`Expected ${url} to return 2xx, received ${response.status}: ${text}`);
725+  }
726+
727+  return text;
728+}
729+
730+async function postJson(url, body) {
731+  return getJson(url, {
732+    body: JSON.stringify(body),
733+    method: "POST"
734+  });
735+}
736+
737+async function allocatePort() {
738+  return new Promise((resolvePromise, reject) => {
739+    const server = createServer();
740+
741+    server.once("error", reject);
742+    server.listen(0, DEFAULT_HOST, () => {
743+      const address = server.address();
744+
745+      if (!address || typeof address === "string") {
746+        reject(new Error("Failed to allocate a TCP port."));
747+        return;
748+      }
749+
750+      server.close((error) => {
751+        if (error) {
752+          reject(error);
753+          return;
754+        }
755+
756+        resolvePromise(address.port);
757+      });
758+    });
759+  });
760+}
761+
762+async function createStateDir() {
763+  const tmpRoot = join(REPO_ROOT, "tmp");
764+  await mkdir(tmpRoot, { recursive: true });
765+  return mkdtemp(join(tmpRoot, "smoke-"));
766+}
767+
768+function getStateFilePath(stateDir) {
769+  return join(stateDir, "stack-state.json");
770+}
771+
772+async function writeState(state) {
773+  await writeFile(state.stateFilePath, JSON.stringify(state, null, 2));
774+}
775+
776+async function loadState(options) {
777+  const stateDir = options.stateDir;
778+
779+  if (!stateDir) {
780+    throw new Error("--state-dir is required for check and stop commands.");
781+  }
782+
783+  const content = await readFile(getStateFilePath(stateDir), "utf8");
784+  return JSON.parse(content);
785+}
786+
787+function summarizeStateServices(state) {
788+  return Object.fromEntries(
789+    Object.entries(state.services).map(([key, value]) => [
790+      key,
791+      {
792+        baseUrl: value.baseUrl,
793+        logPath: value.logPath,
794+        pid: value.pid
795+      }
796+    ])
797+  );
798+}
799+
800+function writeResult(options, value) {
801+  if (typeof value === "string") {
802+    process.stdout.write(value.endsWith("\n") ? value : `${value}\n`);
803+    return;
804+  }
805+
806+  process.stdout.write(`${JSON.stringify(value, null, options.json ? 2 : 2)}\n`);
807+}
808+
809+function renderHelp() {
810+  return [
811+    "Usage:",
812+    "  node scripts/smoke/stack-cli.mjs [run|start|check|stop] [options]",
813+    "",
814+    "Commands:",
815+    "  run                     Build, start, probe, create a queued task, and fail over to standby.",
816+    "  start                   Build and start the local smoke stack, then write stack-state.json.",
817+    "  check                   Probe an existing stack-state.json and print the current surface snapshot.",
818+    "  stop                    Stop the processes recorded in stack-state.json.",
819+    "",
820+    "Options:",
821+    "  --state-dir <path>      Reuse or write a specific smoke state directory.",
822+    "  --expected-leader <id>  Assert the current leader for the check command.",
823+    "  --skip-build            Skip the control-api/status-api build step.",
824+    "  --json                  Print JSON output.",
825+    "  --help, -h              Show this help text."
826+  ].join("\n");
827+}
828+
829+await main().catch((error) => {
830+  process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`);
831+  process.exitCode = 1;
832+});
A scripts/smoke/start-stack.sh
+6, -0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" start "$@"
A scripts/smoke/status-api-local.mjs
+92, -0
 1@@ -0,0 +1,92 @@
 2+import { D1StatusSnapshotLoader, startStatusApiServer } from "../../apps/status-api/dist/index.js";
 3+import { createSqliteD1Database } from "./d1-sqlite.mjs";
 4+
 5+function parseArgs(argv) {
 6+  const options = {
 7+    databasePath: null,
 8+    host: "127.0.0.1",
 9+    port: null
10+  };
11+
12+  for (let index = 0; index < argv.length; index += 1) {
13+    const token = argv[index];
14+
15+    switch (token) {
16+      case "--db":
17+        options.databasePath = readValue(argv, token, index);
18+        index += 1;
19+        break;
20+      case "--host":
21+        options.host = readValue(argv, token, index);
22+        index += 1;
23+        break;
24+      case "--port":
25+        options.port = parsePort(readValue(argv, token, index), token);
26+        index += 1;
27+        break;
28+      default:
29+        throw new Error(`Unknown status-api-local option "${token}".`);
30+    }
31+  }
32+
33+  if (!options.databasePath) {
34+    throw new Error("--db is required.");
35+  }
36+
37+  if (options.port == null) {
38+    throw new Error("--port is required.");
39+  }
40+
41+  return options;
42+}
43+
44+function readValue(tokens, flag, index) {
45+  const value = tokens[index + 1];
46+
47+  if (!value || value.startsWith("--")) {
48+    throw new Error(`Missing value for ${flag}.`);
49+  }
50+
51+  return value;
52+}
53+
54+function parsePort(value, flag) {
55+  const port = Number(value);
56+
57+  if (!Number.isInteger(port) || port < 0 || port > 65_535) {
58+    throw new Error(`Invalid value for ${flag}: "${value}".`);
59+  }
60+
61+  return port;
62+}
63+
64+async function main() {
65+  const options = parseArgs(process.argv.slice(2));
66+  const db = createSqliteD1Database(options.databasePath);
67+  const server = await startStatusApiServer({
68+    host: options.host,
69+    port: options.port,
70+    snapshotLoader: new D1StatusSnapshotLoader(db)
71+  });
72+
73+  const close = async () => {
74+    await server.close();
75+    db.close();
76+  };
77+
78+  const shutdown = (signal) => {
79+    void close().finally(() => {
80+      process.exitCode = 0;
81+      if (signal) {
82+        process.stderr.write(`status-api-local stopped after ${signal}\n`);
83+      }
84+    });
85+  };
86+
87+  process.once("SIGINT", () => shutdown("SIGINT"));
88+  process.once("SIGTERM", () => shutdown("SIGTERM"));
89+
90+  process.stdout.write(`status-api-local listening on ${server.getBaseUrl()}\n`);
91+}
92+
93+await main();
A scripts/smoke/stop-stack.sh
+6, -0
1@@ -0,0 +1,6 @@
2+#!/usr/bin/env bash
3+set -euo pipefail
4+
5+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6+
7+exec node "$ROOT_DIR/scripts/smoke/stack-cli.mjs" stop "$@"
A tests/e2e/smoke.test.mjs
+46, -0
 1@@ -0,0 +1,46 @@
 2+import assert from "node:assert/strict";
 3+import { execFile } from "node:child_process";
 4+import { rm } from "node:fs/promises";
 5+import { promisify } from "node:util";
 6+import { dirname, join, resolve } from "node:path";
 7+import test from "node:test";
 8+import { fileURLToPath } from "node:url";
 9+
10+const execFileAsync = promisify(execFile);
11+const REPO_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..", "..");
12+
13+test("smoke harness starts the local stack, reflects queue state, and fails over to standby", async () => {
14+  const stateDir = join(REPO_ROOT, "tmp", `e2e-smoke-test-${Date.now()}`);
15+  let shouldCleanup = true;
16+
17+  try {
18+    const { stdout } = await execFileAsync(
19+      "bash",
20+      ["scripts/smoke/run-e2e.sh", "--json", "--state-dir", stateDir],
21+      {
22+        cwd: REPO_ROOT,
23+        maxBuffer: 10 * 1024 * 1024,
24+        timeout: 120_000
25+      }
26+    );
27+    const result = JSON.parse(stdout);
28+
29+    assert.equal(result.ok, true);
30+    assert.equal(result.initial.controlState.data.holder_id, "smoke-mini");
31+    assert.equal(result.initial.statusSnapshot.data.leaderId, "smoke-mini");
32+    assert.equal(result.drained.drained.statusSnapshot.data.mode, "draining");
33+    assert.equal(result.drained.resumed.statusSnapshot.data.mode, "running");
34+    assert.equal(result.task.read.data.status, "queued");
35+    assert.equal(result.afterQueue.data.queueDepth, 1);
36+    assert.equal(result.failover.controlState.data.holder_id, "smoke-mac");
37+    assert.equal(result.failover.statusSnapshot.data.leaderId, "smoke-mac");
38+    assert.equal(result.failover.conductors.standby.role, "leader");
39+  } catch (error) {
40+    shouldCleanup = false;
41+    throw error;
42+  } finally {
43+    if (shouldCleanup) {
44+      await rm(stateDir, { force: true, recursive: true });
45+    }
46+  }
47+});