- commit
- 8a3a964
- parent
- 02b3d6a
- author
- im_wower
- date
- 2026-03-23 23:02:20 +0800 CST
fix(codexd): keep retrying turns in progress
3 files changed,
+138,
-21
+6,
-2
1@@ -798,9 +798,13 @@ export class CodexdDaemon {
2 case "turn.error":
3 await this.patchSessionsByThreadId(event.threadId, (session) => ({
4 ...session,
5- currentTurnId: session.currentTurnId === event.turnId ? null : session.currentTurnId,
6+ currentTurnId: event.willRetry
7+ ? session.currentTurnId ?? event.turnId
8+ : session.currentTurnId === event.turnId
9+ ? null
10+ : session.currentTurnId,
11 lastTurnId: event.turnId,
12- lastTurnStatus: "failed",
13+ lastTurnStatus: event.willRetry ? "inProgress" : "failed",
14 updatedAt: new Date().toISOString()
15 }));
16 break;
+127,
-19
1@@ -96,11 +96,13 @@ class FakeChild {
2 }
3
4 class FakeAppServerAdapter {
5- constructor(defaultCwd) {
6+ constructor(defaultCwd, options = {}) {
7 this.defaultCwd = defaultCwd;
8 this.events = new FakeEventStream();
9 this.nextThreadId = 1;
10 this.nextTurnId = 1;
11+ this.retryingTurnCompletionDelayMs = options.retryingTurnCompletionDelayMs ?? 0;
12+ this.retryingTurnNumbers = new Set(options.retryingTurnNumbers ?? []);
13 this.sessions = new Map();
14 }
15
16@@ -169,7 +171,8 @@ class FakeAppServerAdapter {
17 throw new Error(`unknown thread ${params.threadId}`);
18 }
19
20- const turnId = `turn-${this.nextTurnId}`;
21+ const turnNumber = this.nextTurnId;
22+ const turnId = `turn-${turnNumber}`;
23 this.nextTurnId += 1;
24 const turn = {
25 error: null,
26@@ -186,29 +189,56 @@ class FakeAppServerAdapter {
27 turn,
28 type: "turn.started"
29 });
30- this.events.emit({
31- delta: "hello from fake adapter",
32- itemId: "item-1",
33- notificationMethod: "item/agentMessage/delta",
34- threadId: params.threadId,
35- turnId,
36- type: "turn.message.delta"
37- });
38+ const completeTurn = () => {
39+ this.events.emit({
40+ delta: "hello from fake adapter",
41+ itemId: "item-1",
42+ notificationMethod: "item/agentMessage/delta",
43+ threadId: params.threadId,
44+ turnId,
45+ type: "turn.message.delta"
46+ });
47
48- const completedTurn = {
49- ...turn,
50- status: "completed"
51+ const completedTurn = {
52+ ...turn,
53+ status: "completed"
54+ };
55+
56+ session.thread.turns = session.thread.turns.map((entry) =>
57+ entry.id === completedTurn.id ? completedTurn : entry
58+ );
59+ this.events.emit({
60+ notificationMethod: "turn/completed",
61+ threadId: params.threadId,
62+ turn: completedTurn,
63+ type: "turn.completed"
64+ });
65 };
66
67- session.thread.turns = session.thread.turns.map((entry) =>
68- entry.id === completedTurn.id ? completedTurn : entry
69- );
70+ if (!this.retryingTurnNumbers.has(turnNumber)) {
71+ completeTurn();
72+ return;
73+ }
74+
75 this.events.emit({
76- notificationMethod: "turn/completed",
77+ error: {
78+ additionalDetails: "timeout waiting for child process to exit",
79+ codexErrorInfo: {
80+ responseStreamDisconnected: {
81+ httpStatusCode: null
82+ }
83+ },
84+ message: "Reconnecting... 2/5"
85+ },
86+ notificationMethod: "error",
87 threadId: params.threadId,
88- turn: completedTurn,
89- type: "turn.completed"
90+ turnId,
91+ type: "turn.error",
92+ willRetry: true
93 });
94+ setTimeout(() => {
95+ completeTurn();
96+ }, this.retryingTurnCompletionDelayMs);
97 });
98
99 return {
100@@ -439,6 +469,84 @@ test("CodexdLocalService starts the local HTTP surface and supports status, sess
101 }
102 });
103
104+test("CodexdDaemon keeps retrying turns in progress until the app-server completes them", async () => {
105+ const repoRoot = mkdtempSync(join(tmpdir(), "codexd-retry-test-"));
106+ const config = resolveCodexdConfig({
107+ logsDir: join(repoRoot, "logs"),
108+ repoRoot,
109+ serverEndpoint: "ws://127.0.0.1:9999/codex-app-server",
110+ serverStrategy: "external",
111+ stateDir: join(repoRoot, "state")
112+ });
113+ const adapter = new FakeAppServerAdapter(repoRoot, {
114+ retryingTurnCompletionDelayMs: 100,
115+ retryingTurnNumbers: [2]
116+ });
117+ const daemon = new CodexdDaemon(config, {
118+ appServerClientFactory: {
119+ create() {
120+ return adapter;
121+ }
122+ },
123+ env: {
124+ HOME: repoRoot
125+ }
126+ });
127+
128+ await daemon.start();
129+
130+ try {
131+ const firstSession = await daemon.createSession({
132+ cwd: repoRoot,
133+ purpose: "duplex"
134+ });
135+ await daemon.createTurn({
136+ input: "Say hello.",
137+ sessionId: firstSession.sessionId
138+ });
139+ await waitFor(() => {
140+ const current = daemon.getSession(firstSession.sessionId);
141+ return current?.lastTurnStatus === "completed" ? current : null;
142+ });
143+
144+ const secondSession = await daemon.createSession({
145+ cwd: repoRoot,
146+ purpose: "duplex"
147+ });
148+ const secondTurn = await daemon.createTurn({
149+ input: "Retry and finish.",
150+ sessionId: secondSession.sessionId
151+ });
152+
153+ const retryingSession = await waitFor(() => {
154+ const current = daemon.getSession(secondSession.sessionId);
155+ const sawRetryEvent = daemon
156+ .getStatusSnapshot()
157+ .recentEvents.events.some(
158+ (event) =>
159+ event.type === "app-server.turn.error" &&
160+ event.detail?.turnId === secondTurn.turnId &&
161+ event.detail?.willRetry === true
162+ );
163+
164+ return sawRetryEvent && current?.currentTurnId === secondTurn.turnId ? current : null;
165+ });
166+ assert.equal(retryingSession.currentTurnId, secondTurn.turnId);
167+ assert.equal(retryingSession.lastTurnId, secondTurn.turnId);
168+ assert.equal(retryingSession.lastTurnStatus, "inProgress");
169+
170+ const completedSession = await waitFor(() => {
171+ const current = daemon.getSession(secondSession.sessionId);
172+ return current?.lastTurnStatus === "completed" ? current : null;
173+ });
174+ assert.equal(completedSession.currentTurnId, null);
175+ assert.equal(completedSession.lastTurnId, secondTurn.turnId);
176+ assert.equal(completedSession.lastTurnStatus, "completed");
177+ } finally {
178+ await daemon.stop();
179+ }
180+});
181+
182 async function fetchJson(url, init) {
183 const response = await fetch(url, init);
184
+5,
-0
1@@ -106,6 +106,11 @@ BAA_CODEXD_LOCAL_API_BASE=http://127.0.0.1:4319
2 - recent event cache
3 - 结构化事件日志
4
5+`codexd` 对 app-server turn 的状态口径也要区分中间态和终态:
6+
7+- `error` 且 `willRetry=true` 只记入 `recentEvents`,session 仍保持当前 turn 进行中
8+- 只有终态失败才把 session 的 `lastTurnStatus` 写成 `failed`
9+
10 ## 运行职责边界
11
12 - `launchd` 负责开机自启动和硬重启