baa-conductor

git clone 

commit
8a3a964
parent
02b3d6a
author
im_wower
date
2026-03-23 23:02:20 +0800 CST
fix(codexd): keep retrying turns in progress
3 files changed,  +138, -21
M apps/codexd/src/daemon.ts
+6, -2
 1@@ -798,9 +798,13 @@ export class CodexdDaemon {
 2       case "turn.error":
 3         await this.patchSessionsByThreadId(event.threadId, (session) => ({
 4           ...session,
 5-          currentTurnId: session.currentTurnId === event.turnId ? null : session.currentTurnId,
 6+          currentTurnId: event.willRetry
 7+            ? session.currentTurnId ?? event.turnId
 8+            : session.currentTurnId === event.turnId
 9+              ? null
10+              : session.currentTurnId,
11           lastTurnId: event.turnId,
12-          lastTurnStatus: "failed",
13+          lastTurnStatus: event.willRetry ? "inProgress" : "failed",
14           updatedAt: new Date().toISOString()
15         }));
16         break;
M apps/codexd/src/index.test.js
+127, -19
  1@@ -96,11 +96,13 @@ class FakeChild {
  2 }
  3 
  4 class FakeAppServerAdapter {
  5-  constructor(defaultCwd) {
  6+  constructor(defaultCwd, options = {}) {
  7     this.defaultCwd = defaultCwd;
  8     this.events = new FakeEventStream();
  9     this.nextThreadId = 1;
 10     this.nextTurnId = 1;
 11+    this.retryingTurnCompletionDelayMs = options.retryingTurnCompletionDelayMs ?? 0;
 12+    this.retryingTurnNumbers = new Set(options.retryingTurnNumbers ?? []);
 13     this.sessions = new Map();
 14   }
 15 
 16@@ -169,7 +171,8 @@ class FakeAppServerAdapter {
 17       throw new Error(`unknown thread ${params.threadId}`);
 18     }
 19 
 20-    const turnId = `turn-${this.nextTurnId}`;
 21+    const turnNumber = this.nextTurnId;
 22+    const turnId = `turn-${turnNumber}`;
 23     this.nextTurnId += 1;
 24     const turn = {
 25       error: null,
 26@@ -186,29 +189,56 @@ class FakeAppServerAdapter {
 27         turn,
 28         type: "turn.started"
 29       });
 30-      this.events.emit({
 31-        delta: "hello from fake adapter",
 32-        itemId: "item-1",
 33-        notificationMethod: "item/agentMessage/delta",
 34-        threadId: params.threadId,
 35-        turnId,
 36-        type: "turn.message.delta"
 37-      });
 38+      const completeTurn = () => {
 39+        this.events.emit({
 40+          delta: "hello from fake adapter",
 41+          itemId: "item-1",
 42+          notificationMethod: "item/agentMessage/delta",
 43+          threadId: params.threadId,
 44+          turnId,
 45+          type: "turn.message.delta"
 46+        });
 47 
 48-      const completedTurn = {
 49-        ...turn,
 50-        status: "completed"
 51+        const completedTurn = {
 52+          ...turn,
 53+          status: "completed"
 54+        };
 55+
 56+        session.thread.turns = session.thread.turns.map((entry) =>
 57+          entry.id === completedTurn.id ? completedTurn : entry
 58+        );
 59+        this.events.emit({
 60+          notificationMethod: "turn/completed",
 61+          threadId: params.threadId,
 62+          turn: completedTurn,
 63+          type: "turn.completed"
 64+        });
 65       };
 66 
 67-      session.thread.turns = session.thread.turns.map((entry) =>
 68-        entry.id === completedTurn.id ? completedTurn : entry
 69-      );
 70+      if (!this.retryingTurnNumbers.has(turnNumber)) {
 71+        completeTurn();
 72+        return;
 73+      }
 74+
 75       this.events.emit({
 76-        notificationMethod: "turn/completed",
 77+        error: {
 78+          additionalDetails: "timeout waiting for child process to exit",
 79+          codexErrorInfo: {
 80+            responseStreamDisconnected: {
 81+              httpStatusCode: null
 82+            }
 83+          },
 84+          message: "Reconnecting... 2/5"
 85+        },
 86+        notificationMethod: "error",
 87         threadId: params.threadId,
 88-        turn: completedTurn,
 89-        type: "turn.completed"
 90+        turnId,
 91+        type: "turn.error",
 92+        willRetry: true
 93       });
 94+      setTimeout(() => {
 95+        completeTurn();
 96+      }, this.retryingTurnCompletionDelayMs);
 97     });
 98 
 99     return {
100@@ -439,6 +469,84 @@ test("CodexdLocalService starts the local HTTP surface and supports status, sess
101   }
102 });
103 
104+test("CodexdDaemon keeps retrying turns in progress until the app-server completes them", async () => {
105+  const repoRoot = mkdtempSync(join(tmpdir(), "codexd-retry-test-"));
106+  const config = resolveCodexdConfig({
107+    logsDir: join(repoRoot, "logs"),
108+    repoRoot,
109+    serverEndpoint: "ws://127.0.0.1:9999/codex-app-server",
110+    serverStrategy: "external",
111+    stateDir: join(repoRoot, "state")
112+  });
113+  const adapter = new FakeAppServerAdapter(repoRoot, {
114+    retryingTurnCompletionDelayMs: 100,
115+    retryingTurnNumbers: [2]
116+  });
117+  const daemon = new CodexdDaemon(config, {
118+    appServerClientFactory: {
119+      create() {
120+        return adapter;
121+      }
122+    },
123+    env: {
124+      HOME: repoRoot
125+    }
126+  });
127+
128+  await daemon.start();
129+
130+  try {
131+    const firstSession = await daemon.createSession({
132+      cwd: repoRoot,
133+      purpose: "duplex"
134+    });
135+    await daemon.createTurn({
136+      input: "Say hello.",
137+      sessionId: firstSession.sessionId
138+    });
139+    await waitFor(() => {
140+      const current = daemon.getSession(firstSession.sessionId);
141+      return current?.lastTurnStatus === "completed" ? current : null;
142+    });
143+
144+    const secondSession = await daemon.createSession({
145+      cwd: repoRoot,
146+      purpose: "duplex"
147+    });
148+    const secondTurn = await daemon.createTurn({
149+      input: "Retry and finish.",
150+      sessionId: secondSession.sessionId
151+    });
152+
153+    const retryingSession = await waitFor(() => {
154+      const current = daemon.getSession(secondSession.sessionId);
155+      const sawRetryEvent = daemon
156+        .getStatusSnapshot()
157+        .recentEvents.events.some(
158+          (event) =>
159+            event.type === "app-server.turn.error" &&
160+            event.detail?.turnId === secondTurn.turnId &&
161+            event.detail?.willRetry === true
162+        );
163+
164+      return sawRetryEvent && current?.currentTurnId === secondTurn.turnId ? current : null;
165+    });
166+    assert.equal(retryingSession.currentTurnId, secondTurn.turnId);
167+    assert.equal(retryingSession.lastTurnId, secondTurn.turnId);
168+    assert.equal(retryingSession.lastTurnStatus, "inProgress");
169+
170+    const completedSession = await waitFor(() => {
171+      const current = daemon.getSession(secondSession.sessionId);
172+      return current?.lastTurnStatus === "completed" ? current : null;
173+    });
174+    assert.equal(completedSession.currentTurnId, null);
175+    assert.equal(completedSession.lastTurnId, secondTurn.turnId);
176+    assert.equal(completedSession.lastTurnStatus, "completed");
177+  } finally {
178+    await daemon.stop();
179+  }
180+});
181+
182 async function fetchJson(url, init) {
183   const response = await fetch(url, init);
184 
M docs/runtime/codexd.md
+5, -0
 1@@ -106,6 +106,11 @@ BAA_CODEXD_LOCAL_API_BASE=http://127.0.0.1:4319
 2 - recent event cache
 3 - 结构化事件日志
 4 
 5+`codexd` 对 app-server turn 的状态口径也要区分中间态和终态:
 6+
 7+- `error` 且 `willRetry=true` 只记入 `recentEvents`,session 仍保持当前 turn 进行中
 8+- 只有终态失败才把 session 的 `lastTurnStatus` 写成 `failed`
 9+
10 ## 运行职责边界
11 
12 - `launchd` 负责开机自启动和硬重启