- commit
- 9869d57
- parent
- 8a2524a
- author
- im_wower
- date
- 2026-03-30 02:47:13 +0800 CST
feat: add watchdog with heartbeat, timed wake, fallback and progress tracking
4 files changed,
+225,
-0
+1,
-0
1@@ -0,0 +1 @@
2+watchdog.log
+97,
-0
1@@ -0,0 +1,97 @@
2+# Watchdog — Claude 对话续命看门狗
3+
4+## 原理
5+
6+1. Claude 写心跳文件 `/tmp/claude_heartbeat.json`
7+2. Claude 写进度文件 `/tmp/claude_progress.json`(当前任务、做到哪了)
8+3. 看门狗每 60 秒检查心跳,按条件续命
9+4. Claude 被唤醒后先读进度文件,接上之前的工作
10+
11+## 状态机
12+
13+| status | 含义 | 看门狗行为 |
14+|--------|------|------------|
15+| working | 干活中 | 不动(除非兜底超时) |
16+| waiting | 等待续命 | 按条件续命 |
17+| renewed | 已续命 | 不动 |
18+| paused | 暂停 | 跳过 |
19+| done | 完成 | 看门狗退出 |
20+
21+## 唤醒条件(三层,任一满足)
22+
23+1. **定时唤醒**:`wake_at > 0 && now >= wake_at`(我判断任务要10分钟就设10分钟后)
24+2. **空闲超时**:`wake_at == 0 && idle > timeout`(不确定多久,靠超时兜底)
25+3. **兜底上限**:`max_wake_at > 0 && now >= max_wake_at`(无论什么状态都叫醒,防止彻底卡死)
26+
27+## 心跳文件 `/tmp/claude_heartbeat.json`
28+
29+```json
30+{
31+ "ts": 1774808270,
32+ "timeout": 300,
33+ "status": "working",
34+ "wake_at": 0,
35+ "max_wake_at": 1774809870,
36+ "tab": "OpenClaw的GUI控制机制 - Claude",
37+ "renewal_msg": "watchdog续命:请继续"
38+}
39+```
40+
41+## 进度文件 `/tmp/claude_progress.json`
42+
43+被唤醒后先读这个文件,知道之前在做什么、做到哪了:
44+
45+```json
46+{
47+ "updated_at": 1774808270,
48+ "task": "conductor开发:files/read kind修复 + 日志落盘 + T-S049",
49+ "subtasks": [
50+ {"id": "fix-kind", "status": "done", "note": "一行改动已提交"},
51+ {"id": "log-ingest", "status": "in_progress", "note": "codex在写,预计5分钟"},
52+ {"id": "T-S049", "status": "pending"}
53+ ],
54+ "codex_windows": ["baa-conductor — codex"],
55+ "claude_windows": ["claude --dangerously-skip-permissions"],
56+ "next_action": "检查codex是否完成log-ingest,完成则合并并开始T-S049"
57+}
58+```
59+
60+## 工作流
61+
62+```
63+Claude 开始工作:
64+ 1. 写心跳 status=working
65+ 2. 写进度 task/subtasks/next_action
66+ 3. 分配任务给 codex/claude
67+ 4. 设心跳 status=waiting, wake_at=N分钟后, max_wake_at=30分钟后
68+ 5. 停止工具调用,等看门狗唤醒
69+
70+看门狗唤醒 Claude:
71+ → Safari 发续命消息
72+
73+Claude 被唤醒:
74+ 1. 读进度文件,知道之前在做什么
75+ 2. 检查各 codex/claude 窗口状态
76+ 3. 继续工作
77+```
78+
79+## 用法
80+
81+```bash
82+# 启动
83+nohup bash tools/watchdog/watchdog.sh 60 &
84+
85+# 发消息
86+bash tools/watchdog/a11y_msg.sh "标签页全名" "消息"
87+
88+# 停止看门狗
89+python3 -c "import json; d=json.load(open(/tmp/claude_heartbeat.json)); d[status]=done; json.dump(d,open(/tmp/claude_heartbeat.json,w))"
90+```
91+
92+## 文件
93+
94+- watchdog.sh — 看门狗主循环
95+- a11y_msg.sh — Safari 无障碍消息发送
96+- watchdog.log — 运行日志
97+- /tmp/claude_heartbeat.json — 心跳(看门狗读)
98+- /tmp/claude_progress.json — 进度(Claude 读写)
+41,
-0
1@@ -0,0 +1,41 @@
2+#!/bin/bash
3+# a11y 消息发送:通过 Safari 无障碍 API 给指定标签页发消息
4+# 用法: a11y_msg.sh <标签页全名> <消息>
5+# 示例: a11y_msg.sh "OpenClaw的GUI控制机制 - Claude" "续命消息"
6+
7+TAB_NAME="$1"
8+shift
9+MSG="$*"
10+
11+if [ -z "$TAB_NAME" ] || [ -z "$MSG" ]; then
12+ echo "用法: $0 <标签页全名> <消息>"
13+ exit 1
14+fi
15+
16+# 1. 切标签页(全名匹配)
17+osascript -e "tell application \"Safari\"
18+ repeat with w in windows
19+ repeat with t in tabs of w
20+ if name of t is \"$TAB_NAME\" then
21+ set current tab of w to t
22+ set index of w to 1
23+ return \"matched\"
24+ end if
25+ end repeat
26+ end repeat
27+ return \"not found\"
28+end tell" 2>/dev/null
29+
30+sleep 1
31+
32+# 2. 设剪贴板 + 粘贴 + 回车
33+osascript -e "set the clipboard to \"$MSG\""
34+osascript -e 'tell application "System Events" to tell process "Safari"
35+ set frontmost to true
36+ delay 0.5
37+ keystroke "v" using command down
38+ delay 0.5
39+ keystroke return
40+end tell' 2>/dev/null
41+
42+echo "SENT to [$TAB_NAME]"
+86,
-0
1@@ -0,0 +1,86 @@
2+#!/bin/bash
3+# 看门狗 v3:定时唤醒 + 兜底超时 + 进度文件
4+# 状态:working/waiting/renewed/paused/done
5+# 唤醒条件(waiting 状态下,任一满足即续命):
6+# 1. wake_at > 0 且 now >= wake_at(定时唤醒)
7+# 2. now >= max_wake_at(兜底,无论什么状态,除了 done/paused)
8+# 3. wake_at == 0 且 idle > timeout(空闲超时,兼容旧模式)
9+
10+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
11+CHECK_INTERVAL=${1:-60}
12+HEARTBEAT=/tmp/claude_heartbeat.json
13+LOG="${SCRIPT_DIR}/watchdog.log"
14+
15+echo "[$(date)] watchdog v3 started, interval=${CHECK_INTERVAL}s" >> "$LOG"
16+
17+while true; do
18+ sleep $CHECK_INTERVAL
19+ [ ! -f "$HEARTBEAT" ] && continue
20+
21+ RESULT=$(python3 << PYEOF
22+import json, time
23+try:
24+ d = json.load(open("$HEARTBEAT"))
25+ now = int(time.time())
26+ ts = d.get("ts", 0)
27+ timeout = d.get("timeout", 300)
28+ status = d.get("status", "unknown")
29+ tab = d.get("tab", "")
30+ msg = d.get("renewal_msg", "watchdog续命")
31+ wake_at = d.get("wake_at", 0)
32+ max_wake = d.get("max_wake_at", 0)
33+ idle = now - ts
34+ need = "no"
35+ reason = ""
36+
37+ if status in ("done", "paused"):
38+ pass
39+ elif max_wake > 0 and now >= max_wake:
40+ need = "yes"
41+ reason = f"max_wake_at reached ({now}>={max_wake})"
42+ elif status == "waiting":
43+ if wake_at > 0 and now >= wake_at:
44+ need = "yes"
45+ reason = f"wake_at reached ({now}>={wake_at})"
46+ elif wake_at == 0 and idle > timeout:
47+ need = "yes"
48+ reason = f"idle timeout ({idle}s>{timeout}s)"
49+
50+ print(f"{status}|{idle}|{timeout}|{tab}|{need}|{msg}|{reason}|{wake_at}|{max_wake}")
51+except:
52+ print("error|0|0|||no|||0|0")
53+PYEOF
54+)
55+
56+ IFS="|" read -r ST IDLE TOUT TAB NEED MSG REASON WAKE MAXW <<< "$RESULT"
57+
58+ if [ "$ST" = "done" ]; then
59+ echo "[$(date)] EXIT: done" >> "$LOG"
60+ exit 0
61+ fi
62+
63+ if [ "$ST" = "paused" ]; then
64+ echo "[$(date)] PAUSED" >> "$LOG"
65+ continue
66+ fi
67+
68+ if [ "$NEED" = "yes" ]; then
69+ echo "[$(date)] RENEWAL: $REASON" >> "$LOG"
70+ bash "${SCRIPT_DIR}/a11y_msg.sh" "$TAB" "$MSG"
71+ python3 << PYEOF2
72+import json, time
73+d = json.load(open("$HEARTBEAT"))
74+d["ts"] = int(time.time())
75+d["status"] = "renewed"
76+d["wake_at"] = 0
77+d["max_wake_at"] = 0
78+json.dump(d, open("$HEARTBEAT", "w"))
79+PYEOF2
80+ echo "[$(date)] RENEWED" >> "$LOG"
81+ else
82+ extra = ""
83+ [ "$WAKE" != "0" ] && extra=" wake_at=$WAKE"
84+ [ "$MAXW" != "0" ] && extra="$extra max=$MAXW"
85+ echo "[$(date)] OK: status=$ST idle=${IDLE}s/${TOUT}s$extra" >> "$LOG"
86+ fi
87+done