baa-conductor

git clone 

commit
9869d57
parent
8a2524a
author
im_wower
date
2026-03-30 02:47:13 +0800 CST
feat: add watchdog with heartbeat, timed wake, fallback and progress tracking
4 files changed,  +225, -0
A tools/watchdog/.gitignore
+1, -0
1@@ -0,0 +1 @@
2+watchdog.log
A tools/watchdog/README.md
+97, -0
 1@@ -0,0 +1,97 @@
 2+# Watchdog — Claude 对话续命看门狗
 3+
 4+## 原理
 5+
 6+1. Claude 写心跳文件 `/tmp/claude_heartbeat.json`
 7+2. Claude 写进度文件 `/tmp/claude_progress.json`(当前任务、做到哪了)
 8+3. 看门狗每 60 秒检查心跳,按条件续命
 9+4. Claude 被唤醒后先读进度文件,接上之前的工作
10+
11+## 状态机
12+
13+| status | 含义 | 看门狗行为 |
14+|--------|------|------------|
15+| working | 干活中 | 不动(除非兜底超时) |
16+| waiting | 等待续命 | 按条件续命 |
17+| renewed | 已续命 | 不动 |
18+| paused | 暂停 | 跳过 |
19+| done | 完成 | 看门狗退出 |
20+
21+## 唤醒条件(三层,任一满足)
22+
23+1. **定时唤醒**:`wake_at > 0 && now >= wake_at`(我判断任务要10分钟就设10分钟后)
24+2. **空闲超时**:`wake_at == 0 && idle > timeout`(不确定多久,靠超时兜底)
25+3. **兜底上限**:`max_wake_at > 0 && now >= max_wake_at`(无论什么状态都叫醒,防止彻底卡死)
26+
27+## 心跳文件 `/tmp/claude_heartbeat.json`
28+
29+```json
30+{
31+  "ts": 1774808270,
32+  "timeout": 300,
33+  "status": "working",
34+  "wake_at": 0,
35+  "max_wake_at": 1774809870,
36+  "tab": "OpenClaw的GUI控制机制 - Claude",
37+  "renewal_msg": "watchdog续命:请继续"
38+}
39+```
40+
41+## 进度文件 `/tmp/claude_progress.json`
42+
43+被唤醒后先读这个文件,知道之前在做什么、做到哪了:
44+
45+```json
46+{
47+  "updated_at": 1774808270,
48+  "task": "conductor开发:files/read kind修复 + 日志落盘 + T-S049",
49+  "subtasks": [
50+    {"id": "fix-kind", "status": "done", "note": "一行改动已提交"},
51+    {"id": "log-ingest", "status": "in_progress", "note": "codex在写,预计5分钟"},
52+    {"id": "T-S049", "status": "pending"}
53+  ],
54+  "codex_windows": ["baa-conductor — codex"],
55+  "claude_windows": ["claude --dangerously-skip-permissions"],
56+  "next_action": "检查codex是否完成log-ingest,完成则合并并开始T-S049"
57+}
58+```
59+
60+## 工作流
61+
62+```
63+Claude 开始工作:
64+  1. 写心跳 status=working
65+  2. 写进度 task/subtasks/next_action
66+  3. 分配任务给 codex/claude
67+  4. 设心跳 status=waiting, wake_at=N分钟后, max_wake_at=30分钟后
68+  5. 停止工具调用,等看门狗唤醒
69+
70+看门狗唤醒 Claude:
71+  → Safari 发续命消息
72+
73+Claude 被唤醒:
74+  1. 读进度文件,知道之前在做什么
75+  2. 检查各 codex/claude 窗口状态
76+  3. 继续工作
77+```
78+
79+## 用法
80+
81+```bash
82+# 启动
83+nohup bash tools/watchdog/watchdog.sh 60 &
84+
85+# 发消息
86+bash tools/watchdog/a11y_msg.sh "标签页全名" "消息"
87+
88+# 停止看门狗
89+python3 -c "import json; d=json.load(open(/tmp/claude_heartbeat.json)); d[status]=done; json.dump(d,open(/tmp/claude_heartbeat.json,w))"
90+```
91+
92+## 文件
93+
94+- watchdog.sh — 看门狗主循环
95+- a11y_msg.sh — Safari 无障碍消息发送
96+- watchdog.log — 运行日志
97+- /tmp/claude_heartbeat.json — 心跳(看门狗读)
98+- /tmp/claude_progress.json — 进度(Claude 读写)
A tools/watchdog/a11y_msg.sh
+41, -0
 1@@ -0,0 +1,41 @@
 2+#!/bin/bash
 3+# a11y 消息发送:通过 Safari 无障碍 API 给指定标签页发消息
 4+# 用法: a11y_msg.sh <标签页全名> <消息>
 5+# 示例: a11y_msg.sh "OpenClaw的GUI控制机制 - Claude" "续命消息"
 6+
 7+TAB_NAME="$1"
 8+shift
 9+MSG="$*"
10+
11+if [ -z "$TAB_NAME" ] || [ -z "$MSG" ]; then
12+    echo "用法: $0 <标签页全名> <消息>"
13+    exit 1
14+fi
15+
16+# 1. 切标签页(全名匹配)
17+osascript -e "tell application \"Safari\"
18+    repeat with w in windows
19+        repeat with t in tabs of w
20+            if name of t is \"$TAB_NAME\" then
21+                set current tab of w to t
22+                set index of w to 1
23+                return \"matched\"
24+            end if
25+        end repeat
26+    end repeat
27+    return \"not found\"
28+end tell" 2>/dev/null
29+
30+sleep 1
31+
32+# 2. 设剪贴板 + 粘贴 + 回车
33+osascript -e "set the clipboard to \"$MSG\""
34+osascript -e 'tell application "System Events" to tell process "Safari"
35+    set frontmost to true
36+    delay 0.5
37+    keystroke "v" using command down
38+    delay 0.5
39+    keystroke return
40+end tell' 2>/dev/null
41+
42+echo "SENT to [$TAB_NAME]"
A tools/watchdog/watchdog.sh
+86, -0
 1@@ -0,0 +1,86 @@
 2+#!/bin/bash
 3+# 看门狗 v3:定时唤醒 + 兜底超时 + 进度文件
 4+# 状态:working/waiting/renewed/paused/done
 5+# 唤醒条件(waiting 状态下,任一满足即续命):
 6+#   1. wake_at > 0 且 now >= wake_at(定时唤醒)
 7+#   2. now >= max_wake_at(兜底,无论什么状态,除了 done/paused)
 8+#   3. wake_at == 0 且 idle > timeout(空闲超时,兼容旧模式)
 9+
10+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
11+CHECK_INTERVAL=${1:-60}
12+HEARTBEAT=/tmp/claude_heartbeat.json
13+LOG="${SCRIPT_DIR}/watchdog.log"
14+
15+echo "[$(date)] watchdog v3 started, interval=${CHECK_INTERVAL}s" >> "$LOG"
16+
17+while true; do
18+    sleep $CHECK_INTERVAL
19+    [ ! -f "$HEARTBEAT" ] && continue
20+
21+    RESULT=$(python3 << PYEOF
22+import json, time
23+try:
24+    d = json.load(open("$HEARTBEAT"))
25+    now       = int(time.time())
26+    ts        = d.get("ts", 0)
27+    timeout   = d.get("timeout", 300)
28+    status    = d.get("status", "unknown")
29+    tab       = d.get("tab", "")
30+    msg       = d.get("renewal_msg", "watchdog续命")
31+    wake_at   = d.get("wake_at", 0)
32+    max_wake  = d.get("max_wake_at", 0)
33+    idle      = now - ts
34+    need      = "no"
35+    reason    = ""
36+
37+    if status in ("done", "paused"):
38+        pass
39+    elif max_wake > 0 and now >= max_wake:
40+        need = "yes"
41+        reason = f"max_wake_at reached ({now}>={max_wake})"
42+    elif status == "waiting":
43+        if wake_at > 0 and now >= wake_at:
44+            need = "yes"
45+            reason = f"wake_at reached ({now}>={wake_at})"
46+        elif wake_at == 0 and idle > timeout:
47+            need = "yes"
48+            reason = f"idle timeout ({idle}s>{timeout}s)"
49+
50+    print(f"{status}|{idle}|{timeout}|{tab}|{need}|{msg}|{reason}|{wake_at}|{max_wake}")
51+except:
52+    print("error|0|0|||no|||0|0")
53+PYEOF
54+)
55+
56+    IFS="|" read -r ST IDLE TOUT TAB NEED MSG REASON WAKE MAXW <<< "$RESULT"
57+
58+    if [ "$ST" = "done" ]; then
59+        echo "[$(date)] EXIT: done" >> "$LOG"
60+        exit 0
61+    fi
62+
63+    if [ "$ST" = "paused" ]; then
64+        echo "[$(date)] PAUSED" >> "$LOG"
65+        continue
66+    fi
67+
68+    if [ "$NEED" = "yes" ]; then
69+        echo "[$(date)] RENEWAL: $REASON" >> "$LOG"
70+        bash "${SCRIPT_DIR}/a11y_msg.sh" "$TAB" "$MSG"
71+        python3 << PYEOF2
72+import json, time
73+d = json.load(open("$HEARTBEAT"))
74+d["ts"] = int(time.time())
75+d["status"] = "renewed"
76+d["wake_at"] = 0
77+d["max_wake_at"] = 0
78+json.dump(d, open("$HEARTBEAT", "w"))
79+PYEOF2
80+        echo "[$(date)] RENEWED" >> "$LOG"
81+    else
82+        extra = ""
83+        [ "$WAKE" != "0" ] && extra=" wake_at=$WAKE"
84+        [ "$MAXW" != "0" ] && extra="$extra max=$MAXW"
85+        echo "[$(date)] OK: status=$ST idle=${IDLE}s/${TOUT}s$extra" >> "$LOG"
86+    fi
87+done