baa-conductor

git clone 

commit
1078d2b
parent
7c9f009
author
im_wower
date
2026-03-25 08:47:19 +0800 CST
merge: land T-S001 to T-S004
33 files changed,  +871, -485
M apps/codexd/src/app-server-transport.ts
+42, -43
  1@@ -46,6 +46,43 @@ export function createCodexdAppServerStdioTransport(
  2   let connected = false;
  3   let handlers: CodexAppServerTransportHandlers | null = null;
  4 
  5+  const emitBufferedMessages = (): void => {
  6+    while (true) {
  7+      const newlineIndex = buffer.indexOf("\n");
  8+
  9+      if (newlineIndex < 0) {
 10+        return;
 11+      }
 12+
 13+      const line = buffer.slice(0, newlineIndex).trim();
 14+      buffer = buffer.slice(newlineIndex + 1);
 15+
 16+      if (line !== "") {
 17+        handlers?.onMessage(line);
 18+      }
 19+    }
 20+  };
 21+
 22+  const flushTrailingMessage = (): void => {
 23+    const line = buffer.trim();
 24+    buffer = "";
 25+
 26+    if (line !== "") {
 27+      handlers?.onMessage(line);
 28+    }
 29+  };
 30+
 31+  const closeTransport = (error: Error): void => {
 32+    if (closed) {
 33+      return;
 34+    }
 35+
 36+    flushTrailingMessage();
 37+    closed = true;
 38+    connected = false;
 39+    handlers?.onClose(error);
 40+  };
 41+
 42   return {
 43     async connect(nextHandlers: CodexAppServerTransportHandlers): Promise<void> {
 44       if (closed) {
 45@@ -69,57 +106,19 @@ export function createCodexdAppServerStdioTransport(
 46       stdout.setEncoding?.("utf8");
 47       stdout.on("data", (chunk) => {
 48         buffer += typeof chunk === "string" ? chunk : new TextDecoder().decode(chunk);
 49-
 50-        while (true) {
 51-          const newlineIndex = buffer.indexOf("\n");
 52-
 53-          if (newlineIndex < 0) {
 54-            return;
 55-          }
 56-
 57-          const line = buffer.slice(0, newlineIndex).trim();
 58-          buffer = buffer.slice(newlineIndex + 1);
 59-
 60-          if (line !== "") {
 61-            handlers?.onMessage(line);
 62-          }
 63-        }
 64+        emitBufferedMessages();
 65       });
 66       stdout.on("end", () => {
 67-        if (closed) {
 68-          return;
 69-        }
 70-
 71-        closed = true;
 72-        connected = false;
 73-        handlers?.onClose(new Error("Codex app-server stdio stdout ended."));
 74+        closeTransport(new Error("Codex app-server stdio stdout ended."));
 75       });
 76       stdout.on("error", (error) => {
 77-        if (closed) {
 78-          return;
 79-        }
 80-
 81-        closed = true;
 82-        connected = false;
 83-        handlers?.onClose(error);
 84+        closeTransport(error);
 85       });
 86       config.process.on("error", (error) => {
 87-        if (closed) {
 88-          return;
 89-        }
 90-
 91-        closed = true;
 92-        connected = false;
 93-        handlers?.onClose(error);
 94+        closeTransport(error);
 95       });
 96       config.process.on("exit", (code, signal) => {
 97-        if (closed) {
 98-          return;
 99-        }
100-
101-        closed = true;
102-        connected = false;
103-        handlers?.onClose(
104+        closeTransport(
105           new Error(
106             `Codex app-server stdio child exited (code=${String(code)}, signal=${String(signal)}).`
107           )
M apps/codexd/src/index.test.js
+314, -9
  1@@ -34,20 +34,21 @@ class FakeEventStream {
  2 
  3 class FakeStream {
  4   constructor() {
  5-    this.listeners = [];
  6+    this.listeners = new Map();
  7   }
  8 
  9   on(event, listener) {
 10-    if (event === "data") {
 11-      this.listeners.push(listener);
 12-    }
 13+    this.listeners.set(event, [...(this.listeners.get(event) ?? []), listener]);
 14+    return this;
 15+  }
 16 
 17+  setEncoding() {
 18     return this;
 19   }
 20 
 21-  emit(chunk) {
 22-    for (const listener of this.listeners) {
 23-      listener(chunk);
 24+  emit(event, ...args) {
 25+    for (const listener of this.listeners.get(event) ?? []) {
 26+      listener(...args);
 27     }
 28   }
 29 }
 30@@ -95,6 +96,221 @@ class FakeChild {
 31   }
 32 }
 33 
 34+class FakeRpcAppServerChild extends FakeChild {
 35+  constructor() {
 36+    super();
 37+    this.nextThreadId = 1;
 38+    this.nextTurnId = 1;
 39+    this.requests = "";
 40+    this.threads = new Map();
 41+    this.stdin = {
 42+      end: () => {
 43+        this.stdout.emit("end");
 44+      },
 45+      write: (chunk) => {
 46+        this.requests += typeof chunk === "string" ? chunk : new TextDecoder().decode(chunk);
 47+
 48+        while (true) {
 49+          const newlineIndex = this.requests.indexOf("\n");
 50+
 51+          if (newlineIndex < 0) {
 52+            break;
 53+          }
 54+
 55+          const line = this.requests.slice(0, newlineIndex).trim();
 56+          this.requests = this.requests.slice(newlineIndex + 1);
 57+
 58+          if (line !== "") {
 59+            this.handleRequest(JSON.parse(line));
 60+          }
 61+        }
 62+
 63+        return true;
 64+      }
 65+    };
 66+  }
 67+
 68+  emitRpcMessage(payload, { trailingNewline = true } = {}) {
 69+    this.stdout.emit("data", `${JSON.stringify(payload)}${trailingNewline ? "\n" : ""}`);
 70+  }
 71+
 72+  handleRequest(request) {
 73+    switch (request.method) {
 74+      case "initialize":
 75+        queueMicrotask(() => {
 76+          this.emitRpcMessage({
 77+            id: request.id,
 78+            result: {
 79+              platformFamily: "unix",
 80+              platformOs: "macos",
 81+              userAgent: "codex-cli fake-rpc"
 82+            }
 83+          });
 84+        });
 85+        break;
 86+
 87+      case "thread/start":
 88+        queueMicrotask(() => {
 89+          const threadId = `thread-${this.nextThreadId}`;
 90+          this.nextThreadId += 1;
 91+          const thread = {
 92+            cliVersion: "test",
 93+            createdAt: Date.now(),
 94+            cwd: request.params?.cwd ?? "/tmp/fake-rpc",
 95+            ephemeral: true,
 96+            id: threadId,
 97+            modelProvider: "openai",
 98+            name: null,
 99+            preview: "fake rpc session",
100+            source: { custom: "codexd-rpc-test" },
101+            status: { type: "idle" },
102+            turns: [],
103+            updatedAt: Date.now()
104+          };
105+          const session = {
106+            approvalPolicy: "never",
107+            cwd: thread.cwd,
108+            model: "gpt-5.4",
109+            modelProvider: "openai",
110+            reasoningEffort: "medium",
111+            sandbox: { type: "dangerFullAccess" },
112+            serviceTier: null,
113+            thread
114+          };
115+
116+          this.threads.set(threadId, session);
117+          this.emitRpcMessage({
118+            method: "thread/started",
119+            params: {
120+              thread
121+            }
122+          });
123+          this.emitRpcMessage({
124+            id: request.id,
125+            result: session
126+          });
127+        });
128+        break;
129+
130+      case "turn/start":
131+        queueMicrotask(() => {
132+          const session = this.threads.get(request.params.threadId);
133+
134+          if (session == null) {
135+            this.emitRpcMessage({
136+              id: request.id,
137+              error: {
138+                code: -32000,
139+                message: `unknown thread ${request.params.threadId}`
140+              }
141+            });
142+            return;
143+          }
144+
145+          const turnNumber = this.nextTurnId;
146+          const turnId = `turn-${turnNumber}`;
147+          this.nextTurnId += 1;
148+          const turn = {
149+            error: null,
150+            id: turnId,
151+            status: "inProgress"
152+          };
153+          const completedTurn = {
154+            ...turn,
155+            status: "completed"
156+          };
157+
158+          session.thread.turns = [...session.thread.turns, turn];
159+          this.emitRpcMessage({
160+            method: "turn/started",
161+            params: {
162+              threadId: session.thread.id,
163+              turn
164+            }
165+          });
166+          this.emitRpcMessage({
167+            id: request.id,
168+            result: {
169+              turn
170+            }
171+          });
172+          this.emitRpcMessage({
173+            method: "item/agentMessage/delta",
174+            params: {
175+              delta: `reply ${turnId}`,
176+              itemId: `item-${turnId}`,
177+              threadId: session.thread.id,
178+              turnId
179+            }
180+          });
181+
182+          if (turnNumber === 2) {
183+            this.emitRpcMessage({
184+              method: "error",
185+              params: {
186+                error: {
187+                  additionalDetails: "timeout waiting for child process to exit",
188+                  codexErrorInfo: {
189+                    responseStreamDisconnected: {
190+                      httpStatusCode: null
191+                    }
192+                  },
193+                  message: "Reconnecting... 2/5"
194+                },
195+                threadId: session.thread.id,
196+                turnId,
197+                willRetry: true
198+              }
199+            });
200+            setTimeout(() => {
201+              session.thread.turns = session.thread.turns.map((entry) =>
202+                entry.id === turnId ? completedTurn : entry
203+              );
204+              this.emitRpcMessage(
205+                {
206+                  method: "turn/completed",
207+                  params: {
208+                    threadId: session.thread.id,
209+                    turn: completedTurn
210+                  }
211+                },
212+                {
213+                  trailingNewline: false
214+                }
215+              );
216+              this.stdout.emit("end");
217+            }, 25);
218+            return;
219+          }
220+
221+          session.thread.turns = session.thread.turns.map((entry) =>
222+            entry.id === turnId ? completedTurn : entry
223+          );
224+          this.emitRpcMessage({
225+            method: "turn/completed",
226+            params: {
227+              threadId: session.thread.id,
228+              turn: completedTurn
229+            }
230+          });
231+        });
232+        break;
233+
234+      default:
235+        queueMicrotask(() => {
236+          this.emitRpcMessage({
237+            id: request.id,
238+            error: {
239+              code: -32601,
240+              message: `unexpected method ${request.method}`
241+            }
242+          });
243+        });
244+        break;
245+    }
246+  }
247+}
248+
249 class FakeAppServerAdapter {
250   constructor(defaultCwd, options = {}) {
251     this.defaultCwd = defaultCwd;
252@@ -273,8 +489,8 @@ test("CodexdDaemon persists daemon identity, child state, session registry, and
253 
254         queueMicrotask(() => {
255           fakeChild.emit("spawn");
256-          fakeChild.stdout.emit("ready from fake child\n");
257-          fakeChild.stderr.emit("warning from fake child\n");
258+          fakeChild.stdout.emit("data", "ready from fake child\n");
259+          fakeChild.stderr.emit("data", "warning from fake child\n");
260         });
261 
262         return fakeChild;
263@@ -547,6 +763,95 @@ test("CodexdDaemon keeps retrying turns in progress until the app-server complet
264   }
265 });
266 
267+test("CodexdDaemon flushes the final stdio completion event so sequential sessions still finish", async () => {
268+  const repoRoot = mkdtempSync(join(tmpdir(), "codexd-stdio-tail-test-"));
269+  const config = resolveCodexdConfig({
270+    logsDir: join(repoRoot, "logs"),
271+    repoRoot,
272+    stateDir: join(repoRoot, "state")
273+  });
274+  const fakeChild = new FakeRpcAppServerChild();
275+  const daemon = new CodexdDaemon(config, {
276+    env: {
277+      HOME: repoRoot
278+    },
279+    spawner: {
280+      spawn(command, args, options) {
281+        assert.equal(command, "codex");
282+        assert.deepEqual(args, ["app-server"]);
283+        assert.equal(options.cwd, repoRoot);
284+
285+        queueMicrotask(() => {
286+          fakeChild.emit("spawn");
287+        });
288+
289+        return fakeChild;
290+      }
291+    }
292+  });
293+
294+  await daemon.start();
295+
296+  try {
297+    const firstSession = await daemon.createSession({
298+      cwd: repoRoot,
299+      purpose: "duplex"
300+    });
301+    const firstTurn = await daemon.createTurn({
302+      input: "First turn.",
303+      sessionId: firstSession.sessionId
304+    });
305+    const completedFirstSession = await waitFor(() => {
306+      const current = daemon.getSession(firstSession.sessionId);
307+      return current?.lastTurnStatus === "completed" ? current : null;
308+    });
309+    assert.equal(completedFirstSession.currentTurnId, null);
310+    assert.equal(completedFirstSession.lastTurnId, firstTurn.turnId);
311+
312+    const secondSession = await daemon.createSession({
313+      cwd: repoRoot,
314+      purpose: "duplex"
315+    });
316+    const secondTurn = await daemon.createTurn({
317+      input: "Second turn.",
318+      sessionId: secondSession.sessionId
319+    });
320+
321+    const retryingSecondSession = await waitFor(() => {
322+      const current = daemon.getSession(secondSession.sessionId);
323+      const sawRetryEvent = daemon
324+        .getStatusSnapshot()
325+        .recentEvents.events.some(
326+          (event) =>
327+            event.type === "app-server.turn.error" &&
328+            event.detail?.turnId === secondTurn.turnId &&
329+            event.detail?.willRetry === true
330+        );
331+
332+      return sawRetryEvent && current?.lastTurnStatus === "inProgress" ? current : null;
333+    });
334+    assert.equal(retryingSecondSession.currentTurnId, secondTurn.turnId);
335+
336+    const completedSecondSession = await waitFor(() => {
337+      const current = daemon.getSession(secondSession.sessionId);
338+      const sawCompletedEvent = daemon
339+        .getStatusSnapshot()
340+        .recentEvents.events.some(
341+          (event) =>
342+            event.type === "app-server.turn.completed" &&
343+            event.detail?.turnId === secondTurn.turnId
344+        );
345+
346+      return sawCompletedEvent && current?.lastTurnStatus === "completed" ? current : null;
347+    });
348+    assert.equal(completedSecondSession.currentTurnId, null);
349+    assert.equal(completedSecondSession.lastTurnId, secondTurn.turnId);
350+    assert.equal(completedSecondSession.lastTurnStatus, "completed");
351+  } finally {
352+    await daemon.stop();
353+  }
354+});
355+
356 async function fetchJson(url, init) {
357   const response = await fetch(url, init);
358 
M apps/conductor-daemon/src/index.test.js
+186, -137
  1@@ -1,6 +1,7 @@
  2 import assert from "node:assert/strict";
  3 import { createServer } from "node:http";
  4 import { mkdtempSync, rmSync } from "node:fs";
  5+import { createConnection } from "node:net";
  6 import { homedir, tmpdir } from "node:os";
  7 import { join } from "node:path";
  8 import test from "node:test";
  9@@ -827,6 +828,67 @@ async function connectFirefoxBridgeClient(wsUrl, clientId) {
 10   };
 11 }
 12 
 13+async function withRuntimeFixture(callback) {
 14+  const stateDir = mkdtempSync(join(tmpdir(), "baa-conductor-runtime-fixture-"));
 15+  const runtime = new ConductorRuntime(
 16+    {
 17+      nodeId: "mini-main",
 18+      host: "mini",
 19+      role: "primary",
 20+      controlApiBase: "https://control.example.test",
 21+      localApiBase: "http://127.0.0.1:0",
 22+      sharedToken: "replace-me",
 23+      paths: {
 24+        runsDir: "/tmp/runs",
 25+        stateDir
 26+      }
 27+    },
 28+    {
 29+      autoStartLoops: false,
 30+      now: () => 100
 31+    }
 32+  );
 33+
 34+  try {
 35+    return await callback({
 36+      runtime,
 37+      stateDir
 38+    });
 39+  } finally {
 40+    await runtime.stop();
 41+    rmSync(stateDir, {
 42+      force: true,
 43+      recursive: true
 44+    });
 45+  }
 46+}
 47+
 48+async function assertLocalApiListenerClosed(baseUrl) {
 49+  const { hostname, port } = new URL(baseUrl);
 50+
 51+  await new Promise((resolve, reject) => {
 52+    const socket = createConnection({
 53+      host: hostname,
 54+      port: Number(port)
 55+    });
 56+
 57+    socket.once("connect", () => {
 58+      socket.destroy();
 59+      reject(new Error(`expected local API listener to be closed: ${baseUrl}`));
 60+    });
 61+    socket.once("error", (error) => {
 62+      socket.destroy();
 63+
 64+      if (error && typeof error === "object" && "code" in error && error.code === "ECONNREFUSED") {
 65+        resolve();
 66+        return;
 67+      }
 68+
 69+      reject(error);
 70+    });
 71+  });
 72+}
 73+
 74 test("start enters leader state and allows scheduler work only for the lease holder", async () => {
 75   const heartbeatRequests = [];
 76   const leaseRequests = [];
 77@@ -2018,159 +2080,146 @@ test("ConductorRuntime serves health and migrated local API endpoints over HTTP"
 78 });
 79 
 80 test("ConductorRuntime exposes a minimal runtime snapshot for CLI and status surfaces", async () => {
 81-  const stateDir = mkdtempSync(join(tmpdir(), "baa-conductor-snapshot-"));
 82-  const runtime = new ConductorRuntime(
 83-    {
 84-      nodeId: "mini-main",
 85-      host: "mini",
 86-      role: "primary",
 87-      controlApiBase: "https://control.example.test",
 88-      localApiBase: "http://127.0.0.1:0",
 89-      sharedToken: "replace-me",
 90-      paths: {
 91-        runsDir: "/tmp/runs",
 92-        stateDir
 93-      }
 94-    },
 95-    {
 96-      autoStartLoops: false,
 97-      now: () => 100
 98-    }
 99-  );
100+  await withRuntimeFixture(async ({ runtime }) => {
101+    assert.equal(runtime.getRuntimeSnapshot().runtime.started, false);
102+
103+    const startedSnapshot = await runtime.start();
104+    assert.equal(startedSnapshot.runtime.started, true);
105+    assert.equal(startedSnapshot.daemon.leaseState, "leader");
106+    assert.equal(startedSnapshot.daemon.schedulerEnabled, true);
107+    assert.equal(startedSnapshot.loops.heartbeat, false);
108+    assert.equal(startedSnapshot.loops.lease, false);
109+    assert.equal(startedSnapshot.controlApi.usesPlaceholderToken, true);
110+    assert.match(startedSnapshot.warnings.join("\n"), /replace-me/);
111 
112-  assert.equal(runtime.getRuntimeSnapshot().runtime.started, false);
113-
114-  const startedSnapshot = await runtime.start();
115-  assert.equal(startedSnapshot.runtime.started, true);
116-  assert.equal(startedSnapshot.daemon.leaseState, "leader");
117-  assert.equal(startedSnapshot.daemon.schedulerEnabled, true);
118-  assert.equal(startedSnapshot.loops.heartbeat, false);
119-  assert.equal(startedSnapshot.loops.lease, false);
120-  assert.equal(startedSnapshot.controlApi.usesPlaceholderToken, true);
121-  assert.match(startedSnapshot.warnings.join("\n"), /replace-me/);
122-
123-  const stoppedSnapshot = await runtime.stop();
124-  assert.equal(stoppedSnapshot.runtime.started, false);
125-  rmSync(stateDir, {
126-    force: true,
127-    recursive: true
128+    const stoppedSnapshot = await runtime.stop();
129+    assert.equal(stoppedSnapshot.runtime.started, false);
130   });
131 });
132 
133-test("ConductorRuntime exposes a local Firefox websocket bridge over the local API listener", async () => {
134-  const stateDir = mkdtempSync(join(tmpdir(), "baa-conductor-firefox-ws-"));
135-  const runtime = new ConductorRuntime(
136-    {
137-      nodeId: "mini-main",
138-      host: "mini",
139-      role: "primary",
140-      controlApiBase: "https://control.example.test",
141-      localApiBase: "http://127.0.0.1:0",
142-      sharedToken: "replace-me",
143-      paths: {
144-        runsDir: "/tmp/runs",
145-        stateDir
146-      }
147-    },
148-    {
149-      autoStartLoops: false,
150-      now: () => 100
151-    }
152-  );
153+test("ConductorRuntime fixture closes the local API listener when a started test aborts", async () => {
154+  let baseUrl = null;
155 
156-  const snapshot = await runtime.start();
157-  const wsUrl = snapshot.controlApi.firefoxWsUrl;
158-  const baseUrl = snapshot.controlApi.localApiBase;
159-  const socket = new WebSocket(wsUrl);
160-  const queue = createWebSocketMessageQueue(socket);
161+  await assert.rejects(
162+    withRuntimeFixture(async ({ runtime }) => {
163+      const snapshot = await runtime.start();
164+      baseUrl = snapshot.controlApi.localApiBase;
165 
166-  await waitForWebSocketOpen(socket);
167+      const healthResponse = await fetch(`${baseUrl}/healthz`);
168+      assert.equal(healthResponse.status, 200);
169 
170-  socket.send(
171-    JSON.stringify({
172-      type: "hello",
173-      clientId: "firefox-test",
174-      nodeType: "browser",
175-      nodeCategory: "proxy",
176-      nodePlatform: "firefox"
177-    })
178+      throw new Error("forced runtime fixture failure");
179+    }),
180+    /forced runtime fixture failure/u
181   );
182 
183-  const helloAck = await queue.next((message) => message.type === "hello_ack");
184-  assert.equal(helloAck.clientId, "firefox-test");
185-  assert.equal(helloAck.wsUrl, wsUrl);
186+  assert.equal(typeof baseUrl, "string");
187+  await assertLocalApiListenerClosed(baseUrl);
188+});
189 
190-  const initialSnapshot = await queue.next(
191-    (message) => message.type === "state_snapshot" && message.reason === "hello"
192-  );
193-  assert.equal(initialSnapshot.snapshot.system.mode, "running");
194-  assert.equal(initialSnapshot.snapshot.browser.client_count, 1);
195+test("ConductorRuntime exposes a local Firefox websocket bridge over the local API listener", async () => {
196+  await withRuntimeFixture(async ({ runtime }) => {
197+    let socket = null;
198+    let queue = null;
199 
200-  const credentialRequest = await queue.next((message) => message.type === "request_credentials");
201-  assert.equal(credentialRequest.reason, "hello");
202+    try {
203+      const snapshot = await runtime.start();
204+      const wsUrl = snapshot.controlApi.firefoxWsUrl;
205+      const baseUrl = snapshot.controlApi.localApiBase;
206+      socket = new WebSocket(wsUrl);
207+      queue = createWebSocketMessageQueue(socket);
208 
209-  socket.send(
210-    JSON.stringify({
211-      type: "api_endpoints",
212-      platform: "chatgpt",
213-      endpoints: ["/backend-api/conversation", "/backend-api/models"]
214-    })
215-  );
216-  socket.send(
217-    JSON.stringify({
218-      type: "credentials",
219-      platform: "chatgpt",
220-      headers: {
221-        authorization: "Bearer test-token",
222-        cookie: "session=test"
223-      },
224-      timestamp: 1_760_000_000_000
225-    })
226-  );
227+      await waitForWebSocketOpen(socket);
228 
229-  const browserSnapshot = await queue.next(
230-    (message) =>
231-      message.type === "state_snapshot"
232-      && message.snapshot.browser.clients.some((client) =>
233-        client.client_id === "firefox-test"
234-        && client.credentials.some((entry) => entry.platform === "chatgpt" && entry.header_count === 2)
235-        && client.request_hooks.some((entry) => entry.platform === "chatgpt" && entry.endpoint_count === 2)
236-      )
237-  );
238-  assert.equal(browserSnapshot.snapshot.browser.client_count, 1);
239+      socket.send(
240+        JSON.stringify({
241+          type: "hello",
242+          clientId: "firefox-test",
243+          nodeType: "browser",
244+          nodeCategory: "proxy",
245+          nodePlatform: "firefox"
246+        })
247+      );
248 
249-  socket.send(
250-    JSON.stringify({
251-      type: "action_request",
252-      action: "pause",
253-      requestId: "req-pause",
254-      requestedBy: "integration_test",
255-      reason: "pause_via_ws"
256-    })
257-  );
258+      const helloAck = await queue.next((message) => message.type === "hello_ack");
259+      assert.equal(helloAck.clientId, "firefox-test");
260+      assert.equal(helloAck.wsUrl, wsUrl);
261 
262-  const actionResult = await queue.next(
263-    (message) => message.type === "action_result" && message.requestId === "req-pause"
264-  );
265-  assert.equal(actionResult.ok, true);
266-  assert.equal(actionResult.system.mode, "paused");
267+      const initialSnapshot = await queue.next(
268+        (message) => message.type === "state_snapshot" && message.reason === "hello"
269+      );
270+      assert.equal(initialSnapshot.snapshot.system.mode, "running");
271+      assert.equal(initialSnapshot.snapshot.browser.client_count, 1);
272 
273-  const pausedSnapshot = await queue.next(
274-    (message) => message.type === "state_snapshot" && message.snapshot.system.mode === "paused"
275-  );
276-  assert.equal(pausedSnapshot.snapshot.system.mode, "paused");
277-
278-  const systemStateResponse = await fetch(`${baseUrl}/v1/system/state`);
279-  assert.equal(systemStateResponse.status, 200);
280-  assert.equal((await systemStateResponse.json()).data.mode, "paused");
281-
282-  queue.stop();
283-  socket.close(1000, "done");
284-  const stoppedSnapshot = await runtime.stop();
285-  assert.equal(stoppedSnapshot.runtime.started, false);
286-  rmSync(stateDir, {
287-    force: true,
288-    recursive: true
289+      const credentialRequest = await queue.next((message) => message.type === "request_credentials");
290+      assert.equal(credentialRequest.reason, "hello");
291+
292+      socket.send(
293+        JSON.stringify({
294+          type: "api_endpoints",
295+          platform: "chatgpt",
296+          endpoints: ["/backend-api/conversation", "/backend-api/models"]
297+        })
298+      );
299+      socket.send(
300+        JSON.stringify({
301+          type: "credentials",
302+          platform: "chatgpt",
303+          headers: {
304+            authorization: "Bearer test-token",
305+            cookie: "session=test"
306+          },
307+          timestamp: 1_760_000_000_000
308+        })
309+      );
310+
311+      const browserSnapshot = await queue.next(
312+        (message) =>
313+          message.type === "state_snapshot"
314+          && message.snapshot.browser.clients.some((client) =>
315+            client.client_id === "firefox-test"
316+            && client.credentials.some((entry) => entry.platform === "chatgpt" && entry.header_count === 2)
317+            && client.request_hooks.some((entry) => entry.platform === "chatgpt" && entry.endpoint_count === 2)
318+          )
319+      );
320+      assert.equal(browserSnapshot.snapshot.browser.client_count, 1);
321+
322+      socket.send(
323+        JSON.stringify({
324+          type: "action_request",
325+          action: "pause",
326+          requestId: "req-pause",
327+          requestedBy: "integration_test",
328+          reason: "pause_via_ws"
329+        })
330+      );
331+
332+      const actionResult = await queue.next(
333+        (message) => message.type === "action_result" && message.requestId === "req-pause"
334+      );
335+      assert.equal(actionResult.ok, true);
336+      assert.equal(actionResult.system.mode, "paused");
337+
338+      const pausedSnapshot = await queue.next(
339+        (message) => message.type === "state_snapshot" && message.snapshot.system.mode === "paused"
340+      );
341+      assert.equal(pausedSnapshot.snapshot.system.mode, "paused");
342+
343+      const systemStateResponse = await fetch(`${baseUrl}/v1/system/state`);
344+      assert.equal(systemStateResponse.status, 200);
345+      assert.equal((await systemStateResponse.json()).data.mode, "paused");
346+
347+      const stoppedSnapshot = await runtime.stop();
348+      assert.equal(stoppedSnapshot.runtime.started, false);
349+    } finally {
350+      queue?.stop();
351+
352+      if (socket) {
353+        const closePromise = waitForWebSocketClose(socket);
354+        socket.close(1000, "done");
355+        await closePromise;
356+      }
357+    }
358   });
359 });
360 
D apps/control-api-worker/.gitignore
+0, -3
1@@ -1,3 +0,0 @@
2-.dev.vars*
3-!.dev.vars.example
4-.wrangler/
M apps/status-api/src/cli.ts
+3, -1
 1@@ -154,9 +154,11 @@ function renderStatusApiCliHelp(): string {
 2     "Usage: node apps/status-api/dist/index.js [serve|smoke|help] [--host <host>] [--port <port>]",
 3     "",
 4     `Default listen address: http://${getDefaultStatusApiHost()}:${getDefaultStatusApiPort()}`,
 5-    "Default truth source: BAA_CONTROL_API_BASE or https://control-api.makefile.so",
 6+    "Default truth source: BAA_CONDUCTOR_LOCAL_API or http://100.71.210.78:4317",
 7+    "Compatibility override: BAA_CONTROL_API_BASE",
 8     "Routes:",
 9     "- GET /healthz",
10+    "- GET /describe",
11     "- GET /v1/status",
12     "- GET /v1/status/ui",
13     "",
M apps/status-api/src/contracts.ts
+1, -1
1@@ -1,6 +1,6 @@
2 import type { AutomationMode } from "../../../packages/db/src/index.js";
3 
4-export type StatusSnapshotSource = "control-api" | "empty" | "d1";
5+export type StatusSnapshotSource = "conductor-api" | "empty" | "d1";
6 export type StatusApiRouteMethod = "GET";
7 
8 export interface StatusApiEnvironment {
M apps/status-api/src/data-source.ts
+29, -13
  1@@ -24,8 +24,8 @@ const SELECT_ACTIVE_RUN_COUNT_SQL = `
  2     AND finished_at IS NULL
  3 `;
  4 
  5-const DEFAULT_CONTROL_API_BASE = "https://control-api.makefile.so";
  6-const CONTROL_API_SYSTEM_STATE_PATH = "/v1/system/state";
  7+const DEFAULT_STATUS_API_TRUTH_SOURCE_BASE = "http://100.71.210.78:4317";
  8+const STATUS_API_SYSTEM_STATE_PATH = "/v1/system/state";
  9 
 10 export interface StatusSnapshotSourceReader {
 11   countActiveRuns(): Promise<number>;
 12@@ -61,16 +61,16 @@ export class ControlApiStatusSnapshotLoader implements StatusSnapshotLoader {
 13   constructor(
 14     private readonly options: ControlApiStatusSnapshotLoaderOptions
 15   ) {
 16-    this.baseUrl = normalizeControlApiBaseUrl(options.baseUrl);
 17+    this.baseUrl = normalizeTruthSourceBaseUrl(options.baseUrl);
 18     this.fetchImpl = options.fetch ?? globalThis.fetch;
 19 
 20     if (typeof this.fetchImpl !== "function") {
 21-      throw new Error("Status API requires a fetch implementation to read the control plane truth source.");
 22+      throw new Error("Status API requires a fetch implementation to read the conductor truth source.");
 23     }
 24   }
 25 
 26   async loadSnapshot(): Promise<StatusSnapshot> {
 27-    const response = await this.fetchImpl(new URL(CONTROL_API_SYSTEM_STATE_PATH, this.baseUrl), {
 28+    const response = await this.fetchImpl(new URL(STATUS_API_SYSTEM_STATE_PATH, this.baseUrl), {
 29       method: "GET",
 30       headers: {
 31         Accept: "application/json",
 32@@ -80,7 +80,7 @@ export class ControlApiStatusSnapshotLoader implements StatusSnapshotLoader {
 33 
 34     if (!response.ok) {
 35       throw new Error(
 36-        `Control API truth source ${CONTROL_API_SYSTEM_STATE_PATH} returned HTTP ${response.status}.`
 37+        `Conductor truth source ${STATUS_API_SYSTEM_STATE_PATH} returned HTTP ${response.status}.`
 38       );
 39     }
 40 
 41@@ -124,14 +124,20 @@ export function createDefaultStatusSnapshotLoader(
 42   options: DefaultStatusSnapshotLoaderOptions = {}
 43 ): StatusSnapshotLoader {
 44   return new ControlApiStatusSnapshotLoader({
 45-    baseUrl: resolveStatusApiControlApiBase(options.env),
 46+    baseUrl: resolveStatusApiTruthSourceBase(options.env),
 47     fetch: options.fetch,
 48     now: options.now
 49   });
 50 }
 51 
 52+export function resolveStatusApiTruthSourceBase(env: StatusApiEnvironment = process?.env ?? {}): string {
 53+  return normalizeTruthSourceBaseUrl(
 54+    getFirstNonEmptyString(env.BAA_CONDUCTOR_LOCAL_API, env.BAA_CONTROL_API_BASE) ?? DEFAULT_STATUS_API_TRUTH_SOURCE_BASE
 55+  );
 56+}
 57+
 58 export function resolveStatusApiControlApiBase(env: StatusApiEnvironment = process?.env ?? {}): string {
 59-  return normalizeControlApiBaseUrl(env.BAA_CONTROL_API_BASE ?? DEFAULT_CONTROL_API_BASE);
 60+  return resolveStatusApiTruthSourceBase(env);
 61 }
 62 
 63 export function createStatusSnapshotFromControlApiPayload(
 64@@ -144,7 +150,7 @@ export function createStatusSnapshotFromControlApiPayload(
 65   );
 66 
 67   if (getFirstDefinedValue(payload, ["ok"]) === false) {
 68-    throw new Error(`Control API truth source reported an error: ${payloadError ?? "unknown_error"}.`);
 69+    throw new Error(`Conductor truth source reported an error: ${payloadError ?? "unknown_error"}.`);
 70   }
 71 
 72   const mode = normalizeAutomationMode(
 73@@ -165,7 +171,7 @@ export function createStatusSnapshotFromControlApiPayload(
 74   );
 75 
 76   if (mode == null) {
 77-    throw new TypeError("Control API truth source payload did not include a valid automation mode.");
 78+    throw new TypeError("Conductor truth source payload did not include a valid automation mode.");
 79   }
 80 
 81   const observedAtMs = observedAt.getTime();
 82@@ -199,7 +205,7 @@ export function createStatusSnapshotFromControlApiPayload(
 83   );
 84 
 85   return {
 86-    source: "control-api",
 87+    source: "conductor-api",
 88     mode,
 89     leaderId: normalizeOptionalString(
 90       getFirstDefinedValue(payload, [
 91@@ -336,16 +342,26 @@ function toIsoFromUnixSeconds(value: number | null | undefined): string | null {
 92   return new Date(value * 1000).toISOString();
 93 }
 94 
 95-function normalizeControlApiBaseUrl(value: string): string {
 96+function normalizeTruthSourceBaseUrl(value: string): string {
 97   const normalized = value.trim();
 98 
 99   if (normalized === "") {
100-    return DEFAULT_CONTROL_API_BASE;
101+    return DEFAULT_STATUS_API_TRUTH_SOURCE_BASE;
102   }
103 
104   return new URL(normalized).toString();
105 }
106 
107+function getFirstNonEmptyString(...values: Array<string | undefined>): string | undefined {
108+  for (const value of values) {
109+    if (typeof value === "string" && value.trim() !== "") {
110+      return value;
111+    }
112+  }
113+
114+  return undefined;
115+}
116+
117 function normalizeAutomationMode(value: unknown): StatusSnapshot["mode"] | null {
118   return value === "running" || value === "draining" || value === "paused" ? value : null;
119 }
A apps/status-api/src/index.test.js
+70, -0
 1@@ -0,0 +1,70 @@
 2+import assert from "node:assert/strict";
 3+import test from "node:test";
 4+
 5+import {
 6+  createStatusSnapshotFromControlApiPayload,
 7+  resolveStatusApiControlApiBase,
 8+  resolveStatusApiTruthSourceBase,
 9+  StaticStatusSnapshotLoader
10+} from "../dist/apps/status-api/src/data-source.js";
11+import { createStatusApiHandler } from "../dist/apps/status-api/src/service.js";
12+
13+test("status-api truth source prefers BAA_CONDUCTOR_LOCAL_API over legacy compatibility base", () => {
14+  const env = {
15+    BAA_CONDUCTOR_LOCAL_API: "http://100.71.210.78:4317",
16+    BAA_CONTROL_API_BASE: "https://conductor.makefile.so"
17+  };
18+
19+  assert.equal(resolveStatusApiTruthSourceBase(env), "http://100.71.210.78:4317/");
20+  assert.equal(resolveStatusApiControlApiBase(env), "http://100.71.210.78:4317/");
21+});
22+
23+test("status-api truth source falls back to BAA_CONTROL_API_BASE only when local conductor base is absent", () => {
24+  const env = {
25+    BAA_CONDUCTOR_LOCAL_API: "   ",
26+    BAA_CONTROL_API_BASE: "https://conductor.makefile.so"
27+  };
28+
29+  assert.equal(resolveStatusApiTruthSourceBase(env), "https://conductor.makefile.so/");
30+});
31+
32+test("status-api truth source defaults to the canonical conductor local API", () => {
33+  assert.equal(resolveStatusApiTruthSourceBase({}), "http://100.71.210.78:4317/");
34+});
35+
36+test("status snapshots mark conductor-api as the upstream source", () => {
37+  const snapshot = createStatusSnapshotFromControlApiPayload(
38+    {
39+      ok: true,
40+      data: {
41+        mode: "running",
42+        queue_depth: 2,
43+        active_runs: 1
44+      }
45+    },
46+    new Date("2026-03-25T00:00:00.000Z")
47+  );
48+
49+  assert.equal(snapshot.source, "conductor-api");
50+});
51+
52+test("status-api describe reports conductor local truth with compatibility note", async () => {
53+  const handler = createStatusApiHandler(new StaticStatusSnapshotLoader(), {
54+    truthSourceBaseUrl: "http://100.71.210.78:4317"
55+  });
56+
57+  const response = await handler.handle({
58+    method: "GET",
59+    path: "/describe"
60+  });
61+  const payload = JSON.parse(response.body);
62+
63+  assert.equal(payload.ok, true);
64+  assert.equal(payload.data.truth_source.type, "conductor-api");
65+  assert.equal(payload.data.truth_source.base_url, "http://100.71.210.78:4317");
66+  assert.deepEqual(payload.data.notes, [
67+    "Status API is read-only.",
68+    "Default truth source comes from BAA_CONDUCTOR_LOCAL_API.",
69+    "Use BAA_CONTROL_API_BASE only when you need a compatibility override."
70+  ]);
71+});
M apps/status-api/src/runtime.ts
+2, -2
 1@@ -1,4 +1,4 @@
 2-import { createDefaultStatusSnapshotLoader, resolveStatusApiControlApiBase } from "./data-source.js";
 3+import { createDefaultStatusSnapshotLoader, resolveStatusApiTruthSourceBase } from "./data-source.js";
 4 import type {
 5   StatusApiEnvironment,
 6   StatusApiHandler,
 7@@ -23,7 +23,7 @@ export function createStatusApiRuntime(options: StatusApiRuntimeOptions = {}): S
 8   const handler = createStatusApiHandler(
 9     options.snapshotLoader ?? createDefaultStatusSnapshotLoader({ env }),
10     {
11-      controlApiBase: resolveStatusApiControlApiBase(env),
12+      truthSourceBaseUrl: resolveStatusApiTruthSourceBase(env),
13       publicBaseUrl: env?.BAA_STATUS_API_PUBLIC_BASE,
14       version: env?.BAA_STATUS_API_VERSION
15     }
M apps/status-api/src/service.ts
+7, -4
 1@@ -29,6 +29,7 @@ type StatusApiRouteDefinition = StatusApiRoute & {
 2 };
 3 
 4 export interface StatusApiHandlerOptions {
 5+  truthSourceBaseUrl?: string;
 6   controlApiBase?: string;
 7   publicBaseUrl?: string;
 8   version?: string;
 9@@ -155,6 +156,7 @@ export async function handleStatusApiRequest(
10 
11 function buildStatusApiDescribeData(options: StatusApiHandlerOptions): Record<string, unknown> {
12   const processInfo = getProcessInfo();
13+  const truthSourceBaseUrl = options.truthSourceBaseUrl ?? options.controlApiBase ?? "http://100.71.210.78:4317";
14 
15   return {
16     name: "baa-conductor-status-api",
17@@ -165,9 +167,9 @@ function buildStatusApiDescribeData(options: StatusApiHandlerOptions): Record<st
18     uptime_sec: processInfo.uptimeSec,
19     cwd: processInfo.cwd,
20     truth_source: {
21-      summary: "Current truth comes from control-api /v1/system/state.",
22-      type: "control-api",
23-      base_url: options.controlApiBase ?? "https://control-api.makefile.so",
24+      summary: "Current truth comes from conductor /v1/system/state.",
25+      type: "conductor-api",
26+      base_url: truthSourceBaseUrl,
27       endpoint: "/v1/system/state"
28     },
29     endpoints: STATUS_API_ROUTE_DEFINITIONS.map((route) => ({
30@@ -189,7 +191,8 @@ function buildStatusApiDescribeData(options: StatusApiHandlerOptions): Record<st
31     ],
32     notes: [
33       "Status API is read-only.",
34-      "If control-api is unreachable, status-api cannot mint its own truth."
35+      "Default truth source comes from BAA_CONDUCTOR_LOCAL_API.",
36+      "Use BAA_CONTROL_API_BASE only when you need a compatibility override."
37     ]
38   };
39 }
M bugs/BUG-008-codexd-second-thread-turn-timeout.md
+22, -22
 1@@ -1,5 +1,16 @@
 2 # BUG-008: codexd 第二个 session/thread 发 turn 后 app-server 响应超时,反复 reconnect 直至 failed
 3 
 4+## 当前状态(2026-03-25)
 5+
 6+按当前基线与回归测试,这个问题已随 `BUG-010` 一并修复,不再作为独立未解问题保留。
 7+
 8+新的 `apps/codexd/src/index.test.js` 回归测试覆盖了:
 9+
10+- session A 正常完成
11+- session B 收到 `Reconnecting... 2/5` / `willRetry=true`
12+- session B 的最终 `turn/completed` 以无换行尾包到达
13+- codexd 仍然把 `lastTurnStatus` 正确收口到 `completed`
14+
15 ## 现象
16 
17 向 codexd 创建第一个 session 并发 turn,成功完成(turn.completed)。
18@@ -25,18 +36,13 @@ POST /v1/codexd/turn      -> item/started + item/completed(消息已发)
19 
20 ## 根因
21 
22-当前判断(未完全确认):
23-
24-codex app-server 是单进程,内部以 thread 为单位管理会话。第一个 thread idle 后,第二个 thread 发起 turn 时,app-server 内部可能出现:
25+根因现已收口为 codexd 侧的同一个 transport 收尾缺陷:
26 
27-- 旧 thread 未完全 idle,新 thread 的响应流被阻塞
28-- app-server 在处理第二个 thread 时内部状态机卡住,不产出 output token,导致 codexd 侧等待超时
29-- timeout waiting for child process to exit 说明 codexd 等 app-server 子进程退出时超时,但子进程实际未退出(child.status: running),可能是 codexd 的生命周期管理和 app-server 的 thread 模型存在错配
30+- 第二个 session 的 turn 在 `willRetry=true` 之后,最终完成事件可能落在 stdio 流尾
31+- 旧实现如果尾部消息没有换行,就会在 `stdout end` / child close 前直接丢掉这条 `turn/completed`
32+- 上游看到的就会是“第二个 session 一直 retry、最后 failed 或挂住”,看起来像多 thread 竞争,实际是 codexd 没把最终完成态收进来
33 
34-仍需验证的点:
35-- 重启 codexd 后只建一个 session 是否稳定复现成功
36-- 两个 session 复用同一个 threadId 是否能规避
37-- app-server stdout 里是否有内容但 codexd 未读到
38+目前没有新的证据表明还存在一个独立的 app-server 多 thread 状态机问题。
39 
40 ## 复现步骤
41 
42@@ -63,7 +69,7 @@ curl -X POST http://127.0.0.1:4319/v1/codexd/turn \
43 curl http://127.0.0.1:4319/v1/codexd/sessions/<sessionId_B>
44 ```
45 
46-## 当前影响
47+## 修复前影响
48 
49 - 单次会话(只建一个 session)可以正常完成,链路通
50 - 多 session 场景(顺序创建多个 thread)大概率失败
51@@ -71,17 +77,7 @@ curl http://127.0.0.1:4319/v1/codexd/sessions/<sessionId_B>
52 
53 ## 修复建议
54 
55-### 方案 A(推荐调查方向):复用已有 thread,不每次新建
56-
57-如果 app-server 单进程内多 thread 存在状态竞争,先在 codexd 里改为默认复用同一个 thread,通过 turn 的上下文区分会话,而不是每个 session 建独立 thread。
58-
59-### 方案 B:session 结束时显式关闭 thread
60-
61-在创建新 session 前,先对旧 session 发 close,确保 app-server 内旧 thread 完全 idle 后再启动新 thread。
62-
63-### 方案 C:检查 codexd 读取 app-server stdout 的逻辑
64-
65-timeout waiting for child process to exit 可能是 codexd 误认为子进程已退出。检查 app-server-transport.ts 里 onClose 的触发条件,确认是否在第二个 thread 上错误触发了关闭逻辑。
66+本条已由 `方案 C` 的 transport 修复覆盖,不需要为此额外引入 thread 复用或显式 thread close 逻辑。
67 
68 ## 严重程度
69 
70@@ -97,3 +93,7 @@ High —— 单次对话能通,但多轮/多 session 场景(即 duplex 核
71 - seq 59 turn.completed 出现,第一次链路完全通
72 - 第二次失败的 error detail: codexErrorInfo.responseStreamDisconnected.httpStatusCode = null,additionalDetails = "timeout waiting for child process to exit"
73 - child 进程(PID 20904)在失败后仍处于 running 状态,不是 crash
74+
75+## 剩余风险
76+
77+如果后续在线上再次复现“第二个 session 必然卡死”,且日志能证明 `turn/completed` 根本没有从 app-server 发出,那么应作为新的独立 bug 重新打开;当前代码与测试还没有看到这类证据。
M bugs/BUG-009-conductor-daemon-index-test-leaks-local-listener.md
+14, -30
 1@@ -36,21 +36,16 @@ node --test apps/conductor-daemon/src/index.test.js
 2 
 3 ## 根因
 4 
 5-当前判断基本明确:
 6+最终根因如下:
 7 
 8-- 最后一个测试位于 `/Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js:1671`
 9-- 该测试的清理逻辑是顺序写在测试尾部的,不在 `try/finally` 里
10-- 只要在 `waitForWebSocketOpen`、`queue.next(...)`、`fetch(...)`、`socket.close(...)`、`runtime.stop()` 之前任一处卡住或抛错,清理就不会执行
11-- 清理没执行时,`ConductorRuntime` 起的本地 HTTP server 会继续监听随机端口
12+- `/Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js` 里有直接调用 `runtime.start()` 的测试把 cleanup 写在正常路径尾部,而不是放进稳定的 `try/finally`
13+- 其中 Firefox WS 用例是最容易复现挂住的路径;另外运行时 snapshot 用例也存在同类风险,只是步骤更短
14+- 只要在 `waitForWebSocketOpen`、`queue.next(...)`、`fetch(...)`、断言或其它 `await` 处抛错,测试尾部的 `runtime.stop()` 与 `rmSync(...)` 就会被跳过
15+- `ConductorRuntime.start()` 一旦起了本地 HTTP listener,没执行 `stop()` 就会把该 listener 留在事件循环里,`node --test` 因而不退出
16 - 现场对 `73386` 发送 `SIGUSR2` 后生成的诊断报告:
17   `/Users/george/code/baa-conductor/report.20260323.231806.73386.0.001.json`
18   其中唯一仍被 `ref` 住的有效 libuv handle 是监听 `127.0.0.1:61147` 的 `tcp` handle
19 
20-仍需补的确认点:
21-
22-- 是测试里某个 `await queue.next(...)` 真正超时未返回,还是某次断言失败后直接跳过清理
23-- 是否只有最后一个 Firefox WS 测试缺 `finally`,还是同文件里其它起 server 的测试也需要统一收口
24-
25 ## 复现步骤
26 
27 ```bash
28@@ -76,30 +71,19 @@ jq '.libuv' /Users/george/code/baa-conductor/report.*.<child_node_pid>.*.json
29 - 会在本机留下随机本地端口 listener,干扰后续排障
30 - 当前证据指向测试/清理路径问题,不指向线上 `conductor-daemon` 主服务逻辑
31 
32-## 修复建议
33-
34-### 方案 A(推荐)
35-
36-把 `/Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js:1671` 这个测试改成严格的 `try/finally` 收口:
37-
38-- `runtime.start()` 后立即建立清理责任
39-- `finally` 里无条件执行 `queue.stop()`、`socket.close(...)`、`await runtime.stop()`、`rmSync(...)`
40-- `socket.close(...)` 最好等待 `close` 事件或至少做好幂等清理
41-
42-### 方案 B
43-
44-给所有会起本地 HTTP server / WebSocket server 的测试加统一 helper,例如:
45-
46-- `withRuntimeFixture(...)`
47-- helper 内部统一 `start/stop/cleanup`
48+## 修复落地
49 
50-这样可以避免单个测试遗漏收尾。
51+已合入的修复保持在测试层最小范围:
52 
53-### 方案 C
54+- 新增 `withRuntimeFixture(...)`,把 `ConductorRuntime` 的创建、`runtime.stop()` 和 `stateDir` 删除统一收进 helper 的 `finally`
55+- 将 runtime snapshot 用例和 Firefox WS 用例都切到该 helper,避免任一断言失败时跳过 cleanup
56+- Firefox WS 用例额外把 `queue.stop()`、`socket.close(...)` 和等待 `close` 事件放进自身 `finally`,确保测试侧 WebSocket 也不会悬挂
57+- 新增一个最小回归用例,显式在 `runtime.start()` 之后抛错,并验证原端口已经返回 `ECONNREFUSED`,证明 listener 会在异常路径下被关闭
58 
59-给相关测试补一个更短的 fail-fast 超时,而不是完全依赖 `--test-timeout=0`。
60+## 剩余风险
61 
62-即使清理再漏,测试也不会无限挂住。
63+- 其它需要本地 listener 的测试如果将来绕过 `withRuntimeFixture(...)`、重新把 cleanup 写回用例尾部,仍可能再次引入同类泄漏
64+- 该修复没有改动 `ConductorRuntime.stop()` 本身;如果未来运行时关闭逻辑内部出现阻塞,这个测试层收口只能保证“会调用 stop”,不能替代运行时关闭路径的专项测试
65 
66 ## 严重程度
67 
M bugs/BUG-010-codexd-turn-status-stuck-inprogress.md
+26, -24
 1@@ -1,5 +1,11 @@
 2 # BUG-010: codexd turn 状态停留在 inProgress,未更新为 completed
 3 
 4+## 当前状态(2026-03-25)
 5+
 6+已修复。
 7+
 8+`apps/codexd/src/app-server-transport.ts` 现在会在 stdio 连接关闭前先冲刷尾部缓冲区,确保最后一条没有换行的 JSON-RPC 消息也会被交给 `CodexAppServerClient`。`apps/codexd/src/index.test.js` 新增了顺序两个 session 的回归测试,覆盖第二个 turn 先 `willRetry=true`、再以无换行尾包发出 `turn/completed` 的路径。
 9+
10 ## 现象
11 
12 向 codexd 提交 turn 后,Codex 实际上已经产出完整回复(agentMessage 在 events.jsonl 中有记录),但 session 的 `lastTurnStatus` 始终停留在 `inProgress`,不更新为 `completed`。
13@@ -27,22 +33,24 @@ POST /v1/codex/turn { sessionId, input }
14 
15 ## 根因
16 
17-当前判断(未完全确认):
18+根因已经确认:
19 
20-codex app-server 通过 stdio 和 codexd 通信。Codex 产出回复后,app-server 需要发送 turn 完成的通知(如 `thread/turn/completed` 或类似事件),codexd 收到后才更新 session 状态。
21+`codexd` 的 stdio transport 以前只在读到换行时才向上游交付一条 JSON-RPC 消息;如果 app-server 最后一条消息正好是没有换行的 `turn/completed`,随后 `stdout` 结束或 child 退出,transport 会直接 `onClose`,但不会先冲刷尾部缓冲区。
22 
23-`Reconnecting... N/5` + `responseStreamDisconnected` + `timeout waiting for child process to exit` 说明 codexd 在等待 app-server 发送完成信号时,stdio 流断开了或子进程退出了,导致:
24+结果就是:
25 
26-1. codexd 未收到 turn completed 通知
27-2. session 状态停在 inProgress
28-3. codexd 尝试重连 app-server,但 turn 的完成状态已经丢失
29+1. agentMessage / retry error 已经到达,说明 turn 实际执行完成或接近完成
30+2. 最后一条 `turn/completed` 卡在 transport buffer 里,没有进入 `CodexAppServerClient`
31+3. `CodexdDaemon` 没收到完成事件,session 的 `lastTurnStatus` 就停在 `inProgress`
32 
33-agentMessage 能写入 events.jsonl,说明内容已经流过来了,但后续的 turn 完成握手没有成功完成。
34+这说明问题在 codexd 自己的 transport / lifecycle 收尾边界,不是 Codex 没有生成回复。
35 
36-仍需验证:
37-- app-server 是否发出了 turn/completed 事件
38-- codexd 的 app-server-transport.ts 是否在 stdio 关闭时提前触发了 onClose
39-- 重连后 codexd 是否尝试恢复 turn 状态
40+## 修复落点
41+
42+- `apps/codexd/src/app-server-transport.ts`
43+  在 `stdout end`、stream error、process error、`process exit` 这些关闭路径统一先冲刷尾部缓冲,再触发 `onClose`
44+- `apps/codexd/src/index.test.js`
45+  增加真实 stdio transport 回归测试,覆盖第一个 session 完成后,第二个 session 先收到 `Reconnecting... 2/5`,最终再以无换行尾包发出 `turn/completed` 的路径
46 
47 ## 复现步骤
48 
49@@ -71,7 +79,7 @@ grep agentMessage /Users/george/code/baa-conductor/logs/codexd/codexd/events.jso
50 # 预期:能看到完整回复内容,证明 Codex 已经回答了
51 ```
52 
53-## 当前影响
54+## 修复前影响
55 
56 High —— 虽然 Codex 能正常回答,但调用方(conductor / AI caller)无法通过 session 状态 API 得知 turn 已完成,导致:
57 
58@@ -81,17 +89,7 @@ High —— 虽然 Codex 能正常回答,但调用方(conductor / AI caller
59 
60 ## 修复建议
61 
62-### 方案 A(推荐调查方向):检查 app-server-transport.ts 的 onClose 时机
63-
64-确认 stdio 流在 Codex 回复完成后是否提前触发了关闭,导致 codexd 认为连接断开而未处理 turn/completed 事件。
65-
66-### 方案 B:在 codexd daemon 里增加 turn 超时完成兜底
67-
68-如果检测到 agentMessage 的 item/completed 事件已经收到,但 turn 状态还未更新,超过一定时间后主动将 turn 标记为 completed。
69-
70-### 方案 C:在重连逻辑里恢复 turn 状态
71-
72-重连成功后,检查是否有已收到完整内容但未完成的 turn,主动将其推进到 completed。
73+已按方案 A 修复;没有引入超时、轮询或额外 completion 兜底。
74 
75 ## 严重程度
76 
77@@ -103,6 +101,10 @@ High —— 核心链路(发消息 → 得到完成状态)不通,调用方
78 
79 ## 备注
80 
81-- BUG-008 和本 bug 密切相关,BUG-008 描述的是多 session 失败,本 bug 更精确地定位到:即使单 session 也存在 turn 状态不更新的问题
82+- BUG-008 和本 bug 是同一条 transport 收尾缺陷在多 session 路径上的放大表现;见 `BUG-008` 当前状态说明
83 - events.jsonl 里已确认的完整 agentMessage:「可以,我现在能稳定工作。」,对应 turn 019d1b95-b947-72c1-b6c2-02ee0a56202e
84 - lastTurnStatus 可能的值:inProgress / completed / failed;问题是 completed 从未出现
85+
86+## 剩余风险
87+
88+如果未来再次出现 `stdout` 提前结束,但最后根本没有发出合法的 `turn/completed` 消息,这会是新的 child / transport 故障,不属于这次修复覆盖的范围。
M bugs/README.md
+5, -5
 1@@ -2,15 +2,15 @@
 2 
 3 当前目录只保留:
 4 
 5-- 仍未修复、仍有效的 bug
 6+- 当前仍需保留记录的 bug
 7 - 一个通用 `BUG-TEMPLATE.md`
 8 - 对应 bug 的修复任务卡
 9 
10-当前有效 bug:
11+按当前工作区状态,这 3 个 bug 都已完成修复,但仍保留文档作为根因与回归记录:
12 
13-1. `BUG-008` — codexd 第二个 session/thread 发 turn 容易超时(已部分缓解,待正式关闭)
14-2. `BUG-009` — conductor-daemon 测试遗留 HTTP listener 导致进程挂住(无修复卡)
15-3. `BUG-010` — codexd turn 状态卡在 inProgress,与 BUG-008 相关(无修复卡)
16+1. `BUG-008` — 已随 `BUG-010` 一并修复;如果未来再次出现“根本没有发出合法 turn/completed 就断流”,应作为新 bug reopen
17+2. `BUG-009` — 已修复;如果后续测试绕开 `withRuntimeFixture(...)` 或关闭路径本身阻塞,仍需新开专项问题
18+3. `BUG-010` — 已修复;当前剩余风险与 child / transport 提前断流场景有关
19 
20 修复任务:
21 
M docs/api/README.md
+6, -3
 1@@ -31,7 +31,7 @@
 2 | conductor-daemon local-api | `BAA_CONDUCTOR_LOCAL_API`,默认可用值如 `http://127.0.0.1:4317` | 本地真相源;承接 describe/health/version/capabilities/browser/system/controllers/tasks/codex/host-ops |
 3 | codexd local-api | `BAA_CODEXD_LOCAL_API_BASE`,默认可用值如 `http://127.0.0.1:4319` | 独立 `codexd` 本地服务;支持 `GET /describe` 自描述;`conductor-daemon` 的 `/v1/codex/*` 只代理到这里 |
 4 | conductor-daemon local-firefox-ws | 由 `BAA_CONDUCTOR_LOCAL_API` 派生,例如 `ws://127.0.0.1:4317/ws/firefox` | 本地 Firefox 插件双向 bridge;复用同一个 listener,不单独开公网端口 |
 5-| status-api local view | `http://127.0.0.1:4318` | 本地只读状态 JSON 和 HTML 视图,不承担公网入口角色 |
 6+| status-api local view | `http://127.0.0.1:4318` | 本地只读状态 JSON 和 HTML 视图;默认从 `BAA_CONDUCTOR_LOCAL_API` 的 `/v1/system/state` 取数,不承担公网入口角色 |
 7 
 8 ## Describe First
 9 
10@@ -275,8 +275,11 @@ host-ops 约定:
11 
12 truth source:
13 
14-- 当前应优先回源 `conductor-daemon local-api /v1/system/state`
15-- 它负责把该状态整理成 JSON 或 HTML
16+- 当前默认应优先回源 `BAA_CONDUCTOR_LOCAL_API`,也就是当前 canonical local API `http://100.71.210.78:4317/v1/system/state`
17+- `https://conductor.makefile.so` 是同一套 conductor 主接口的公网入口;只有本地 `4317` 不可达时才需要显式改到公网
18+- `BAA_CONTROL_API_BASE` 只保留为兼容覆盖入口,供旧配置或遗留脚本继续工作
19+- legacy `control-api.makefile.so` 不再是默认或 canonical truth source
20+- `status-api` 负责把该状态整理成 JSON 或 HTML
21 
22 当前端点:
23 
M docs/runtime/README.md
+4, -3
 1@@ -18,8 +18,9 @@
 2 - codexd local API: `http://127.0.0.1:4319`
 3 - codexd event stream: `ws://127.0.0.1:4319/v1/codexd/events`
 4 - canonical public host: `https://conductor.makefile.so`
 5-- `status-api` `http://100.71.210.78:4318` 只作为本地只读观察面
 6-- `BAA_CONTROL_API_BASE` 仍保留为兼容变量名,但默认值已经收口到 `https://conductor.makefile.so`
 7+- `status-api` `http://100.71.210.78:4318` 只作为本地只读观察面,默认回源 `BAA_CONDUCTOR_LOCAL_API`,当前 canonical 值是 `http://100.71.210.78:4317`
 8+- `https://conductor.makefile.so` 是同一套 conductor 主接口的公网入口
 9+- `BAA_CONTROL_API_BASE` 仍保留为兼容变量名;只在旧脚本或需要兼容覆盖时才使用,不再是 canonical truth source
10 - 推荐仓库路径:`/Users/george/code/baa-conductor`
11 - repo 内的 plist 只作为模板;真正加载的是脚本渲染出来的安装副本
12 
13@@ -27,7 +28,7 @@
14 
15 - `conductor`: `launchd` 托管的主控制面,承载 `4317` 本地 API
16 - `codexd`: `launchd` 托管的独立 Codex 运行面,只走 `codex app-server` 路线,监听 `127.0.0.1:4319`
17-- `status-api`: `launchd` 托管的本地只读观察面,监听 `4318`
18+- `status-api`: `launchd` 托管的本地只读观察面,监听 `4318`,默认读取 `4317` 上的 conductor `/v1/system/state`
19 
20 `codexd` 正式能力面只保留:
21 
M docs/runtime/codexd.md
+2, -0
1@@ -110,6 +110,8 @@ BAA_CODEXD_LOCAL_API_BASE=http://127.0.0.1:4319
2 
3 - `error` 且 `willRetry=true` 只记入 `recentEvents`,session 仍保持当前 turn 进行中
4 - 只有终态失败才把 session 的 `lastTurnStatus` 写成 `failed`
5+- stdio transport 在 child `stdout` 结束或进程退出前,会先冲刷最后一个未换行的 JSON-RPC 消息,再关闭连接;`turn/completed` 不会再因为尾包没有换行而丢失
6+- 顺序创建多个 session / thread 时,即使后一个 turn 先收到 `willRetry=true` 的错误,再在流尾收到最终完成事件,session 的 `lastTurnStatus` 也应收口到 `completed`
7 
8 ## 运行职责边界
9 
M docs/runtime/environment.md
+7, -2
 1@@ -8,6 +8,8 @@
 2 - codexd event stream: `ws://127.0.0.1:4319/v1/codexd/events`
 3 - canonical public host: `https://conductor.makefile.so`
 4 - local status view: `http://100.71.210.78:4318`
 5+- `status-api` 默认真相源:`BAA_CONDUCTOR_LOCAL_API` -> `http://100.71.210.78:4317/v1/system/state`
 6+- `BAA_CONTROL_API_BASE` 默认只保留为兼容覆盖,当前兼容值是 `https://conductor.makefile.so`
 7 
 8 ## 共享变量
 9 
10@@ -22,7 +24,8 @@
11 说明:
12 
13 - `BAA_CONTROL_API_BASE` 是兼容变量名,当前主要给 `status-api` 和遗留脚本使用
14-- 它的默认值已经收口到 `https://conductor.makefile.so`
15+- `status-api` 现在会优先读取 `BAA_CONDUCTOR_LOCAL_API`,只有缺少该值时才回退到 `BAA_CONTROL_API_BASE`
16+- 它的默认兼容值已经收口到 `https://conductor.makefile.so`
17 - `codexd` 独立安装时不要求 `BAA_SHARED_TOKEN`
18 
19 ## codexd 变量
20@@ -80,7 +83,7 @@ BAA_STATUS_API_HOST=100.71.210.78
21 BAA_CONTROL_API_BASE=https://conductor.makefile.so
22 ```
23 
24-最后一项只是兼容旧代码的变量名;默认目标已经与 canonical public host 对齐。
25+最后一项只是兼容旧代码的变量名;`status-api` 的默认真相源仍然是 `BAA_CONDUCTOR_LOCAL_API=http://100.71.210.78:4317`。
26 
27 说明:
28 
29@@ -115,3 +118,5 @@ Firefox WS 派生规则:
30   --codexd-local-api-base http://127.0.0.1:4319 \
31   --status-api-host 100.71.210.78
32 ```
33+
34+这里的 `--control-api-base` 只是在安装副本里写入兼容变量;`status-api` 默认仍然优先读取 `--local-api-base` 对应的 conductor 主接口。
M docs/runtime/launchd.md
+4, -1
 1@@ -6,7 +6,7 @@
 2 
 3 - `conductor` 由 `launchd` 托管,并承载 canonical local API `http://100.71.210.78:4317`
 4 - `codexd` 由 `launchd` 托管,并作为正式独立 Codex 运行面监听 `http://127.0.0.1:4319`
 5-- `status-api` 随默认安装一起部署,但只作为本地只读观察面
 6+- `status-api` 随默认安装一起部署,但只作为本地只读观察面,默认从 `BAA_CONDUCTOR_LOCAL_API` 读取 `/v1/system/state`
 7 - 工作目录固定到 `/Users/george/code/baa-conductor`
 8 - 通过仓库内脚本统一安装、启动、停止、重启与验证
 9 
10@@ -45,6 +45,7 @@
11 说明:
12 
13 - `codexd` 独立安装时不需要共享 token
14+- `status-api` 默认真相源是 `BAA_CONDUCTOR_LOCAL_API`
15 - `--control-api-base` 仍然保留,只是为了写入兼容变量 `BAA_CONTROL_API_BASE`
16 - `--codexd-local-api-base` 会同时写给 `codexd` 和 `conductor`
17 - `codexd` 正式运行面只写入 `app-server` 会话链路所需默认值
18@@ -113,6 +114,8 @@
19   --status-api-host 100.71.210.78
20 ```
21 
22+这里保留 `--control-api-base https://conductor.makefile.so` 只是为了把兼容变量写进安装副本;`status-api` 实际默认仍然优先读 `--local-api-base http://100.71.210.78:4317`。
23+
24 单独安装 `codexd`:
25 
26 ```bash
M docs/runtime/node-verification.md
+3, -1
 1@@ -4,7 +4,7 @@
 2 
 3 - `conductor` `http://100.71.210.78:4317`
 4 - `codexd` `http://127.0.0.1:4319`
 5-- `status-api` `http://100.71.210.78:4318`
 6+- `status-api` `http://100.71.210.78:4318`,默认读取 `http://100.71.210.78:4317/v1/system/state`
 7 
 8 其中 `codexd` 的正式产品面仍是 `status / sessions / turn / events`,但 on-node 运维探针只要求 `/healthz` 和 `/v1/codexd/status`。会话级端到端链路单独由仓库 smoke 覆盖。
 9 
10@@ -30,6 +30,7 @@ npx --yes pnpm -r build
11 说明:
12 
13 - `--control-api-base` 仍是当前静态检查参数,但只用于校验兼容变量 `BAA_CONTROL_API_BASE`
14+- `status-api` 的有效默认真相源仍然是 `--local-api-base http://100.71.210.78:4317`
15 - `check-launchd.sh` 现在也会校验 `conductor` 安装副本里的 `BAA_CODEXD_LOCAL_API_BASE`
16 - `check-launchd.sh` 现在会校验 `codexd` 的监听地址、事件流路径、日志目录、状态目录和 `app-server` child 配置
17 - 这些静态检查不包含 run/exec 路线
18@@ -61,6 +62,7 @@ npx --yes pnpm -r build
19 - `conductor` 是否监听 `4317` 并返回 `/healthz`、`/readyz`、`/rolez`、`/v1/codex`
20 - `codexd` 是否监听 `4319` 并返回 `/healthz`、`/v1/codexd/status`
21 - `status-api` 是否监听 `4318` 并返回 `/healthz`、`/v1/status`
22+- `status-api /v1/status` 是否能跟随同机 `conductor` 的 `/v1/system/state`
23 - 不要求探测 run/exec 路线
24 
25 ## 3. 会话链路 smoke
M ops/cloudflare/README.md
+8, -51
 1@@ -1,55 +1,12 @@
 2-# Cloudflare Worker / D1 操作约定
 3+# Cloudflare legacy 说明
 4 
 5-本目录只负责 `apps/control-api-worker` 的最小部署骨架,不包含真实 Cloudflare 账号信息。
 6+`apps/control-api-worker` 与配套的 Cloudflare Worker / D1 入口已经从当前仓库退役。
 7 
 8-## 绑定与入口
 9+本目录不再提供以下 legacy 入口:
10 
11-- Worker 配置文件:`apps/control-api-worker/wrangler.jsonc`
12-- Worker 名称:`baa-conductor-control-api`
13-- 入口文件:`dist/index.js`
14-- 自定义域:`control-api.makefile.so`
15-- D1 binding:`CONTROL_DB`
16-- D1 数据库名模板:`baa-conductor-control-prod`
17+- `deploy-control-api-worker.sh`
18+- `apply-control-api-d1-migrations.sh`
19+- `control-api-worker.secrets.example.env`
20+- `scripts/cloudflare/prepare-control-api-local-db.mjs`
21 
22-当前 Worker 代码和部署配置统一使用以下运行时变量:
23-
24-- 普通变量:`CONTROL_API_VERSION`、`CONTROL_API_AUTH_REQUIRED`
25-- Secret:`BAA_SHARED_TOKEN`、`CONTROL_API_BROWSER_ADMIN_TOKEN`、`CONTROL_API_CONTROLLER_TOKEN`、`CONTROL_API_OPS_ADMIN_TOKEN`、`CONTROL_API_READONLY_TOKEN`、`CONTROL_API_WORKER_TOKEN`
26-
27-## 首次部署前
28-
29-1. 把 `apps/control-api-worker/wrangler.jsonc` 中的 `database_id` / `preview_database_id` 替换成真实 D1 UUID。
30-2. 以 `control-api-worker.secrets.example.env` 为模板,准备不入库的 secret 清单。
31-3. 确保本机已配置 `CLOUDFLARE_API_TOKEN`,或者已通过 `wrangler login` 完成鉴权。
32-
33-## 推荐命令
34-
35-应用远端 D1 migrations:
36-
37-```bash
38-./ops/cloudflare/apply-control-api-d1-migrations.sh
39-```
40-
41-构建并部署 Worker:
42-
43-```bash
44-./ops/cloudflare/deploy-control-api-worker.sh
45-```
46-
47-查看线上日志:
48-
49-```bash
50-cd apps/control-api-worker
51-npx --yes wrangler@4 tail --config wrangler.jsonc
52-```
53-
54-## Secret 下发
55-
56-建议按 `control-api-worker.secrets.example.env` 的键名逐个执行:
57-
58-```bash
59-cd apps/control-api-worker
60-printf '%s' 'replace-me' | npx --yes wrangler@4 secret put BAA_SHARED_TOKEN --config wrangler.jsonc
61-```
62-
63-其余 `CONTROL_API_*_TOKEN` secret 同理。`CONTROL_API_AUTH_REQUIRED` 已在 `wrangler.jsonc` 里固定为 `"true"`,因此生产环境缺失 secret 时会直接拒绝请求,而不是静默降级为匿名访问。
64+当前主线控制面已经转到 `conductor-daemon + codexd + 本地浏览器桥`。如果需要确认现行架构口径,请查看仓库根目录的 `README.md` 和 `DESIGN.md`。
D ops/cloudflare/apply-control-api-d1-migrations.sh
+0, -25
 1@@ -1,25 +0,0 @@
 2-#!/usr/bin/env bash
 3-set -euo pipefail
 4-
 5-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 6-REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
 7-APP_DIR="${REPO_ROOT}/apps/control-api-worker"
 8-CONFIG_PATH="${APP_DIR}/wrangler.jsonc"
 9-
10-if [[ ! -f "${CONFIG_PATH}" ]]; then
11-  echo "wrangler config not found: ${CONFIG_PATH}" >&2
12-  exit 1
13-fi
14-
15-DEFAULT_DATABASE_NAME="$(sed -n 's/.*"database_name"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' "${CONFIG_PATH}" | head -n 1)"
16-DATABASE_NAME="${1:-${DEFAULT_DATABASE_NAME}}"
17-
18-if [[ -z "${DATABASE_NAME}" ]]; then
19-  echo "Unable to determine database_name from ${CONFIG_PATH}." >&2
20-  exit 1
21-fi
22-
23-shift $(( $# > 0 ? 1 : 0 ))
24-
25-cd "${APP_DIR}"
26-npx --yes wrangler@4 d1 migrations apply "${DATABASE_NAME}" --config wrangler.jsonc --remote "$@"
D ops/cloudflare/control-api-worker.secrets.example.env
+0, -9
 1@@ -1,9 +0,0 @@
 2-# Copy this file outside the repo, fill in real values, then push each key with:
 3-# printf '%s' "$VALUE" | npx --yes wrangler@4 secret put <KEY> --config apps/control-api-worker/wrangler.jsonc
 4-
 5-BAA_SHARED_TOKEN=replace-me
 6-CONTROL_API_BROWSER_ADMIN_TOKEN=replace-me
 7-CONTROL_API_CONTROLLER_TOKEN=replace-me
 8-CONTROL_API_OPS_ADMIN_TOKEN=replace-me
 9-CONTROL_API_READONLY_TOKEN=replace-me
10-CONTROL_API_WORKER_TOKEN=replace-me
D ops/cloudflare/deploy-control-api-worker.sh
+0, -24
 1@@ -1,24 +0,0 @@
 2-#!/usr/bin/env bash
 3-set -euo pipefail
 4-
 5-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 6-REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
 7-APP_DIR="${REPO_ROOT}/apps/control-api-worker"
 8-CONFIG_PATH="${APP_DIR}/wrangler.jsonc"
 9-PLACEHOLDER_DATABASE_ID="00000000-0000-0000-0000-000000000000"
10-
11-if [[ ! -f "${CONFIG_PATH}" ]]; then
12-  echo "wrangler config not found: ${CONFIG_PATH}" >&2
13-  exit 1
14-fi
15-
16-if rg --fixed-strings --quiet "${PLACEHOLDER_DATABASE_ID}" "${CONFIG_PATH}"; then
17-  echo "Replace database_id / preview_database_id in ${CONFIG_PATH} before deploying." >&2
18-  exit 1
19-fi
20-
21-cd "${REPO_ROOT}"
22-npx --yes pnpm --filter @baa-conductor/control-api-worker build
23-
24-cd "${APP_DIR}"
25-npx --yes wrangler@4 deploy --config wrangler.jsonc "$@"
M ops/launchd/so.makefile.baa-status-api.plist
+2, -0
1@@ -2,6 +2,8 @@
2 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3 <!--
4   Local read-only status API for the mini runtime.
5+  Default truth comes from BAA_CONDUCTOR_LOCAL_API /v1/system/state.
6+  BAA_CONTROL_API_BASE is kept only as a compatibility override.
7   Keep the same runtime paths as conductor and worker-runner so that service
8   logs and temporary files stay under one repo-owned runtime root.
9   Use scripts/runtime/install-launchd.sh to render the actual install copy.
M plans/STATUS_SUMMARY.md
+26, -6
 1@@ -2,7 +2,14 @@
 2 
 3 ## 当前时间
 4 
 5-- `2026-03-24`
 6+- `2026-03-25`
 7+
 8+## 当前代码基线
 9+
10+- 主线基线:`main@7c9f009`
11+- 任务文档已统一收口到 `tasks/`
12+- 当前活动任务见 `tasks/TASK_OVERVIEW.md`
13+- 当前工作区已落地 `T-S001` 到 `T-S004` 的改动,但还没提交回主线
14 
15 ## 当前状态
16 
17@@ -11,6 +18,10 @@
18 - canonical 公网入口:`https://conductor.makefile.so`
19 - 历史主备资料已从主线移除
20 - 回溯 tag:`ha-failover-archive-2026-03-22`
21+- `apps/control-api-worker` 目录当前工作区已不存在
22+- `ops/cloudflare/` 当前只剩 legacy README
23+- `tests/control-api/` 当前只剩一份 legacy absence smoke 测试文件
24+- `status-api` 当前默认读 `BAA_CONDUCTOR_LOCAL_API` / `4317`,`BAA_CONTROL_API_BASE` 只保留为兼容覆盖入口
25 
26 ## 当前在线面
27 
28@@ -26,13 +37,22 @@
29 - `mini` 的 codexd
30 - launchd 安装与检查脚本
31 - Firefox 插件子目录 `plugins/baa-firefox`
32+- `packages/host-ops` 以及 `/v1/exec`、`/v1/files/read`、`/v1/files/write`
33 - `/v1/browser/*`、本地 `/ws/firefox` 和 browser-control smoke
34 - 单节点的 Nginx / DNS 计划脚本
35-- 迁移期兼容件:`apps/status-api`、`ops/cloudflare/**`、`tests/control-api/**`、Cloudflare / D1 相关脚手架
36+- 迁移期兼容件:`apps/status-api`、`ops/cloudflare/**`、`tests/control-api/**`、`BAA_CONTROL_API_BASE`
37+
38+## 最近完成
39+
40+- `T-S001`:`codexd` transport 现在会在关闭前冲刷尾部缓冲区,`BUG-010` 已修复,`BUG-008` 不再作为独立未解问题保留
41+- `T-S002`:legacy Worker / D1 入口已从当前工作区收口;`tests/control-api/**` 改为 absence smoke,`ops/cloudflare/**` 收敛到 legacy 说明
42+- `T-S003`:`status-api`、launchd 模板和 runtime 文档已切到当前 `conductor` 主接口,`BAA_CONTROL_API_BASE` 只保留为兼容覆盖
43+- `T-S004`:`conductor-daemon` 测试已统一通过 `withRuntimeFixture(...)` 收口 cleanup,`BUG-009` 已修复
44 
45 ## 当前仍需关注
46 
47-- `status-api` 仍默认通过 `BAA_CONTROL_API_BASE` 指向 legacy control plane
48-- `tests/control-api/**` 仍引用已删除的 `apps/control-api-worker`
49-- `ops/cloudflare/**` 仍保留已删除 Worker 的部署/文档残留
50-- `BUG-009`、`BUG-010` 仍未关闭;`BUG-008` 仅算部分缓解
51+- 如果后续再次出现 app-server 根本没有发出合法 `turn/completed` 就提前断流,这属于新的 child / transport 故障,应单独新开 bug
52+- `BAA_CONTROL_API_BASE` 仍保留在仓库里,定位是兼容入口,不是 canonical truth source
53+- 按任务边界没有完全清掉的 legacy 命名仍散落在部分历史任务文档、缺陷文档和兼容说明里
54+- 如果未来新增 runtime 测试绕开 `withRuntimeFixture(...)`,同类 listener 泄漏仍可能重新出现
55+- 这次没有改 `ConductorRuntime.stop()` 内部逻辑;如果未来关闭路径本身阻塞,还需要单独补运行时层测试
D scripts/cloudflare/prepare-control-api-local-db.mjs
+0, -33
 1@@ -1,33 +0,0 @@
 2-import { parseArgs } from "node:util";
 3-
 4-import {
 5-  DEFAULT_LOCAL_CONTROL_API_DB_PATH,
 6-  prepareLocalControlApiDatabase,
 7-  resolveLocalControlApiDatabasePath
 8-} from "../../apps/control-api-worker/local/sqlite-d1.mjs";
 9-
10-const cliArgs = process.argv.slice(2).filter((value) => value !== "--");
11-const { values } = parseArgs({
12-  args: cliArgs,
13-  options: {
14-    db: {
15-      type: "string",
16-      default: DEFAULT_LOCAL_CONTROL_API_DB_PATH
17-    },
18-    resetDb: {
19-      type: "boolean",
20-      default: false
21-    }
22-  }
23-});
24-
25-const database = prepareLocalControlApiDatabase({
26-  databasePath: values.db,
27-  resetDatabase: values.resetDb
28-});
29-
30-database.close();
31-
32-console.log(
33-  `Prepared local control-api sqlite database at ${resolveLocalControlApiDatabasePath(values.db)}`
34-);
M scripts/runtime/check-launchd.sh
+3, -1
 1@@ -20,7 +20,7 @@ Options:
 2   --install-dir PATH        Validate installed copies under this directory.
 3   --shared-token TOKEN      Expect this exact token in installed copies.
 4   --shared-token-file PATH  Read the expected token from a file.
 5-  --control-api-base URL    Expected BAA_CONTROL_API_BASE.
 6+  --control-api-base URL    Expected compatibility BAA_CONTROL_API_BASE.
 7   --local-api-base URL      Expected BAA_CONDUCTOR_LOCAL_API.
 8   --local-api-allowed-hosts CSV
 9                             Expected BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS.
10@@ -40,6 +40,8 @@ Options:
11 
12 Notes:
13   If no service is specified, conductor + codexd + status-api are checked.
14+  status-api defaults to BAA_CONDUCTOR_LOCAL_API /v1/system/state; this flag
15+  only validates the legacy compatibility override.
16   codexd static checks only validate app-server launchd wiring; they do not
17   require /v1/codexd/runs* or codex exec as formal runtime capabilities.
18 EOF
M scripts/runtime/check-node.sh
+3, -1
 1@@ -20,7 +20,7 @@ Options:
 2   --install-dir PATH           Validate installed copies under this directory.
 3   --shared-token TOKEN         Expect this exact token in installed copies.
 4   --shared-token-file PATH     Read the expected token from a file.
 5-  --control-api-base URL       Expected BAA_CONTROL_API_BASE in installed copies.
 6+  --control-api-base URL       Expected compatibility BAA_CONTROL_API_BASE in installed copies.
 7   --local-api-base URL         Conductor local API base URL. Defaults to 127.0.0.1:4317.
 8   --local-api-allowed-hosts CSV
 9                                Expected BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS in installed copies.
10@@ -41,6 +41,8 @@ Options:
11 Notes:
12   The default runtime check set is conductor + codexd + status-api. Use
13   --service to narrow the scope or --all-services to include worker-runner.
14+  status-api defaults to BAA_CONDUCTOR_LOCAL_API /v1/system/state; this flag
15+  only checks the installed compatibility override.
16   conductor HTTP probes include /v1/codex to ensure proxy wiring to codexd.
17   For codexd, the HTTP probes only cover /healthz and /v1/codexd/status.
18   /v1/codexd/runs* and codex exec are not part of node verification.
M scripts/runtime/install-launchd.sh
+5, -1
 1@@ -20,7 +20,7 @@ Options:
 2   --install-dir PATH        Override launchd install directory.
 3   --shared-token TOKEN      Shared token written into the install copy.
 4   --shared-token-file PATH  Read the shared token from a file.
 5-  --control-api-base URL    Override BAA_CONTROL_API_BASE.
 6+  --control-api-base URL    Override compatibility BAA_CONTROL_API_BASE.
 7   --local-api-base URL      Override BAA_CONDUCTOR_LOCAL_API.
 8   --local-api-allowed-hosts CSV
 9                             Override BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS.
10@@ -39,6 +39,8 @@ Notes:
11   If no service is specified, conductor + codexd + status-api are installed.
12   Use --service codexd to render codexd independently; it does not require a
13   shared token.
14+  status-api defaults to BAA_CONDUCTOR_LOCAL_API /v1/system/state; this flag
15+  only writes the legacy compatibility override.
16   codexd launchd wiring stays on app-server mode and does not expose
17   /v1/codexd/runs* or codex exec as a formal service contract.
18 EOF
19@@ -238,6 +240,8 @@ for service in "${services[@]}"; do
20   plist_set_string "$install_path" ":EnvironmentVariables:LC_ALL" "$BAA_RUNTIME_DEFAULT_LOCALE"
21   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_HOST" "$conductor_host"
22   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_ROLE" "$conductor_role"
23+  # Keep the legacy env name for compatibility; status-api now defaults to
24+  # BAA_CONDUCTOR_LOCAL_API when both are present.
25   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONTROL_API_BASE" "$control_api_base"
26   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API" "$local_api_base"
27   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS" "$local_api_allowed_hosts"
M tasks/T-S004.md
+23, -6
 1@@ -1,3 +1,9 @@
 2+---
 3+status: completed
 4+base_ref: main@7c9f009
 5+updated_at: 2026-03-25 00:21:37 +0800
 6+---
 7+
 8 # Task T-S004:修复 conductor-daemon 测试 listener 泄漏
 9 
10 ## 直接给对话的提示词
11@@ -14,7 +20,7 @@
12 
13 - 仓库:`/Users/george/code/baa-conductor`
14 - 分支:`main`
15-- 提交:`e62e98d`
16+- 提交:`7c9f009`
17 - 开工要求:不要从其他任务分支切出;如需新分支,从当前 `main` 新切
18 
19 ## 建议分支名
20@@ -124,20 +130,31 @@
21 
22 ## files_changed
23 
24-- 待完成
25+- `/Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js`
26+- `/Users/george/code/baa-conductor/bugs/BUG-009-conductor-daemon-index-test-leaks-local-listener.md`
27+- `/Users/george/code/baa-conductor/tasks/T-S004.md`
28 
29 ## commands_run
30 
31-- 待完成
32+- `node --check /Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js`
33+- `node --test --test-name-pattern='ConductorRuntime fixture closes the local API listener when a started test aborts|ConductorRuntime exposes a local Firefox websocket bridge over the local API listener|ConductorRuntime exposes a minimal runtime snapshot for CLI and status surfaces' /Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js`
34+- `node --test /Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js`
35+- `node --test /Users/george/code/baa-conductor/apps/conductor-daemon/src/index.test.js`
36+- `npx --yes pnpm -C /Users/george/code/baa-conductor -F @baa-conductor/conductor-daemon test`
37+- `git -C /Users/george/code/baa-conductor diff --check`
38 
39 ## result
40 
41-- 待完成
42+- 在 `index.test.js` 新增 `withRuntimeFixture(...)`,把 `ConductorRuntime` 的 `stop()` 和 `stateDir` 删除固定收进 `finally`
43+- 将 runtime snapshot 用例和 Firefox WS 用例切到该 helper,避免断言失败或 `await` 抛错时跳过 cleanup
44+- 新增异常路径回归测试:在 `runtime.start()` 之后主动抛错,再校验原端口返回 `ECONNREFUSED`
45+- `BUG-009` 已回写最终根因、修复方式和剩余风险
46 
47 ## risks
48 
49-- 待完成
50+- 未来如果有新的 runtime 测试绕过 `withRuntimeFixture(...)`、重新手写尾部 cleanup,可能再次引入同类 listener 泄漏
51+- 这次修复没有改动 `ConductorRuntime.stop()` 本身;如果运行时关闭逻辑未来出现内部阻塞,需要单独补运行时层测试
52 
53 ## next_handoff
54 
55-- 待完成
56+- 当前无需额外交接;后续新增会起本地 listener 的测试时,优先复用 `withRuntimeFixture(...)`
M tasks/TASK_OVERVIEW.md
+8, -7
 1@@ -9,22 +9,23 @@
 2 - `control-api.makefile.so`、Cloudflare Worker、D1 只剩迁移期兼容残留
 3 - `baa-hand` / `baa-shell` 只保留为接口语义参考,不再作为主系统维护
 4 - 当前任务卡都放在本目录
 5-- 当前任务基线:`main@e62e98d`
 6+- 当前任务基线:`main@7c9f009`
 7 
 8-## 当前活动任务
 9+## 最近完成任务
10 
11-按优先级和可并行性,当前活动任务是:
12+当前 4 张主线收口任务已经在工作区完成实现:
13 
14 1. [`T-S001.md`](./T-S001.md):修复 codexd turn 完成状态
15 2. [`T-S002.md`](./T-S002.md):清理 `control-api-worker` 残留与坏测试
16 3. [`T-S003.md`](./T-S003.md):切换 `status-api` 到 `conductor` 主接口
17 4. [`T-S004.md`](./T-S004.md):修复 `conductor-daemon` 测试 listener 泄漏
18 
19-并行建议:
20+当前没有新的活动任务卡;如果要继续往下拆,建议围绕这些剩余风险开新任务:
21 
22-- `T-S001`、`T-S002` 优先,且可以并行
23-- `T-S003`、`T-S004` 也可以并行,但优先级略低于前两项
24-- 新任务继续按“允许修改的目录”尽量拆成零重叠范围
25+- `BAA_CONTROL_API_BASE` 兼容入口是否继续保留,还是继续收口
26+- 历史任务 / 缺陷文档里残余的 `control-api` 命名清理
27+- `ConductorRuntime.stop()` 关闭路径的专项运行时测试
28+- 新的 child / transport 故障兜底与 reopen 规则
29 
30 ## 任务文档约定
31 
M tests/control-api/control-api-smoke.test.mjs
+46, -17
 1@@ -1,25 +1,54 @@
 2 import assert from "node:assert/strict";
 3+import { existsSync } from "node:fs";
 4+import path from "node:path";
 5 import test from "node:test";
 6+import { fileURLToPath } from "node:url";
 7 
 8-import { runMinimalControlApiSmoke } from "../../apps/control-api-worker/local/harness.mjs";
 9+const repoRoot = path.resolve(
10+  path.dirname(fileURLToPath(import.meta.url)),
11+  "../.."
12+);
13 
14-test("control-api smoke completes a minimal local D1 read/write loop", async () => {
15-  const summary = await runMinimalControlApiSmoke();
16+function repoPath(...segments) {
17+  return path.join(repoRoot, ...segments);
18+}
19 
20-  assert.equal(summary.mode, "paused");
21-  assert.equal(summary.holder_id, "mini-local");
22-  assert.equal(summary.task_status, "queued");
23-  assert.match(summary.task_id, /^task_/u);
24-  assert.ok(summary.term >= 1);
25+test("legacy control-api worker entrypoints are no longer part of the repo", () => {
26+  assert.equal(
27+    existsSync(repoPath("apps", "control-api-worker", "local", "harness.mjs")),
28+    false
29+  );
30+  assert.equal(
31+    existsSync(repoPath("apps", "control-api-worker", "wrangler.jsonc")),
32+    false
33+  );
34+  assert.equal(
35+    existsSync(repoPath("apps", "control-api-worker", ".gitignore")),
36+    false
37+  );
38 });
39 
40-test("control-api allows unauthenticated requests when auth is explicitly disabled", async () => {
41-  const summary = await runMinimalControlApiSmoke({
42-    authRequired: false,
43-    requestWithoutToken: true
44-  });
45-
46-  assert.equal(summary.mode, "paused");
47-  assert.equal(summary.holder_id, "mini-local");
48-  assert.equal(summary.task_status, "queued");
49+test("legacy cloudflare and D1 helper entrypoints are removed with the worker", () => {
50+  assert.equal(
51+    existsSync(repoPath("ops", "cloudflare", "deploy-control-api-worker.sh")),
52+    false
53+  );
54+  assert.equal(
55+    existsSync(
56+      repoPath("ops", "cloudflare", "apply-control-api-d1-migrations.sh")
57+    ),
58+    false
59+  );
60+  assert.equal(
61+    existsSync(
62+      repoPath("ops", "cloudflare", "control-api-worker.secrets.example.env")
63+    ),
64+    false
65+  );
66+  assert.equal(
67+    existsSync(
68+      repoPath("scripts", "cloudflare", "prepare-control-api-local-db.mjs")
69+    ),
70+    false
71+  );
72 });