baa-conductor

git clone 

commit
9643b06
parent
0b59439
author
im_wower
date
2026-03-22 02:59:07 +0800 CST
feat: unblock T-028 tailscale listen
11 files changed,  +476, -15
M apps/conductor-daemon/src/index.test.js
+51, -1
 1@@ -283,6 +283,56 @@ test("parseConductorCliRequest merges launchd env defaults with CLI overrides",
 2   assert.equal(request.config.paths.runsDir, "/tmp/runs");
 3 });
 4 
 5+test("parseConductorCliRequest allows an explicitly listed Tailscale local API host", () => {
 6+  const request = parseConductorCliRequest(["config"], {
 7+    BAA_NODE_ID: "mini-main",
 8+    BAA_CONDUCTOR_HOST: "mini",
 9+    BAA_CONDUCTOR_ROLE: "primary",
10+    BAA_CONTROL_API_BASE: "https://control.example.test/",
11+    BAA_CONDUCTOR_LOCAL_API: "http://100.71.210.78:4317/",
12+    BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS: "100.71.210.78",
13+    BAA_SHARED_TOKEN: "replace-me"
14+  });
15+
16+  assert.equal(request.action, "config");
17+
18+  if (request.action !== "config") {
19+    throw new Error("expected config action");
20+  }
21+
22+  assert.equal(request.config.localApiBase, "http://100.71.210.78:4317");
23+  assert.deepEqual(request.config.localApiAllowedHosts, ["100.71.210.78"]);
24+});
25+
26+test("parseConductorCliRequest rejects unlisted or non-Tailscale local API hosts", () => {
27+  assert.throws(
28+    () =>
29+      parseConductorCliRequest(["config"], {
30+        BAA_NODE_ID: "mini-main",
31+        BAA_CONDUCTOR_HOST: "mini",
32+        BAA_CONDUCTOR_ROLE: "primary",
33+        BAA_CONTROL_API_BASE: "https://control.example.test/",
34+        BAA_CONDUCTOR_LOCAL_API: "http://100.71.210.78:4317/",
35+        BAA_SHARED_TOKEN: "replace-me"
36+      }),
37+    /explicitly allowed Tailscale 100\.x host/
38+  );
39+
40+  assert.throws(
41+    () =>
42+      parseConductorCliRequest(["config"], {
43+        BAA_NODE_ID: "mini-main",
44+        BAA_CONDUCTOR_HOST: "mini",
45+        BAA_CONDUCTOR_ROLE: "primary",
46+        BAA_CONTROL_API_BASE: "https://control.example.test/",
47+        BAA_CONDUCTOR_LOCAL_API: "http://100.71.210.78:4317/",
48+        BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS: "192.168.1.50",
49+        BAA_SHARED_TOKEN: "replace-me"
50+      }),
51+    /localApiAllowedHosts/
52+  );
53+});
54+
55 test("handleConductorHttpRequest keeps degraded runtimes observable but not ready", () => {
56   const snapshot = {
57     daemon: {
58@@ -417,7 +467,7 @@ test("ConductorRuntime exposes a minimal runtime snapshot for CLI and status sur
59       host: "mini",
60       role: "primary",
61       controlApiBase: "https://control.example.test",
62-      localApiBase: "http://127.0.0.1:4317",
63+      localApiBase: "http://127.0.0.1:0",
64       sharedToken: "replace-me",
65       paths: {
66         runsDir: "/tmp/runs"
M apps/conductor-daemon/src/index.ts
+98, -5
  1@@ -102,6 +102,7 @@ export interface ConductorRuntimePaths {
  2 }
  3 
  4 export interface ConductorRuntimeConfig extends ConductorConfig {
  5+  localApiAllowedHosts?: readonly string[] | string | null;
  6   localApiBase?: string | null;
  7   paths?: Partial<ConductorRuntimePaths>;
  8   sharedToken?: string | null;
  9@@ -110,6 +111,7 @@ export interface ConductorRuntimeConfig extends ConductorConfig {
 10 export interface ResolvedConductorRuntimeConfig extends ConductorConfig {
 11   heartbeatIntervalMs: number;
 12   leaseRenewIntervalMs: number;
 13+  localApiAllowedHosts: string[];
 14   leaseTtlSec: number;
 15   localApiBase: string | null;
 16   paths: ConductorRuntimePaths;
 17@@ -297,6 +299,7 @@ interface CliValueOverrides {
 18   host?: string;
 19   leaseRenewIntervalMs?: string;
 20   leaseTtlSec?: string;
 21+  localApiAllowedHosts?: string;
 22   localApiBase?: string;
 23   logsDir?: string;
 24   nodeId?: string;
 25@@ -343,7 +346,8 @@ function normalizePath(value: string): string {
 26 }
 27 
 28 function normalizeLoopbackHost(hostname: string): string {
 29-  return hostname.startsWith("[") && hostname.endsWith("]") ? hostname.slice(1, -1) : hostname;
 30+  const normalized = hostname.startsWith("[") && hostname.endsWith("]") ? hostname.slice(1, -1) : hostname;
 31+  return normalized.toLowerCase();
 32 }
 33 
 34 function isLoopbackHost(hostname: string): boolean {
 35@@ -351,6 +355,85 @@ function isLoopbackHost(hostname: string): boolean {
 36   return normalized === "127.0.0.1" || normalized === "localhost" || normalized === "::1";
 37 }
 38 
 39+function isTailscaleIpv4Host(hostname: string): boolean {
 40+  const normalized = normalizeLoopbackHost(hostname);
 41+
 42+  const octets = normalized.split(".");
 43+
 44+  if (octets.length !== 4) {
 45+    return false;
 46+  }
 47+
 48+  const [firstOctetText, secondOctetText, thirdOctetText, fourthOctetText] = octets;
 49+  const firstOctet = Number(firstOctetText);
 50+  const secondOctet = Number(secondOctetText);
 51+  const thirdOctet = Number(thirdOctetText);
 52+  const fourthOctet = Number(fourthOctetText);
 53+
 54+  if (
 55+    !Number.isInteger(firstOctet) ||
 56+    !Number.isInteger(secondOctet) ||
 57+    !Number.isInteger(thirdOctet) ||
 58+    !Number.isInteger(fourthOctet)
 59+  ) {
 60+    return false;
 61+  }
 62+
 63+  if (
 64+    firstOctet < 0 ||
 65+    firstOctet > 255 ||
 66+    secondOctet < 0 ||
 67+    secondOctet > 255 ||
 68+    thirdOctet < 0 ||
 69+    thirdOctet > 255 ||
 70+    fourthOctet < 0 ||
 71+    fourthOctet > 255
 72+  ) {
 73+    return false;
 74+  }
 75+
 76+  return firstOctet === 100 && secondOctet >= 64 && secondOctet <= 127;
 77+}
 78+
 79+function parseLocalApiAllowedHosts(hosts: readonly string[] | string | null | undefined): string[] {
 80+  if (hosts == null) {
 81+    return [];
 82+  }
 83+
 84+  const tokens = typeof hosts === "string" ? hosts.split(/[,\s]+/u) : hosts;
 85+  const normalizedHosts = new Set<string>();
 86+
 87+  for (const token of tokens) {
 88+    const normalized = normalizeOptionalString(token);
 89+
 90+    if (normalized == null) {
 91+      continue;
 92+    }
 93+
 94+    const host = normalizeLoopbackHost(normalized);
 95+
 96+    if (!isLoopbackHost(host) && !isTailscaleIpv4Host(host)) {
 97+      throw new Error(
 98+        "Conductor localApiAllowedHosts must contain only loopback hosts or Tailscale 100.x IPv4 addresses."
 99+      );
100+    }
101+
102+    normalizedHosts.add(host);
103+  }
104+
105+  return [...normalizedHosts];
106+}
107+
108+function isAllowedLocalApiHost(hostname: string, allowedHosts: readonly string[]): boolean {
109+  const normalized = normalizeLoopbackHost(hostname);
110+
111+  if (isLoopbackHost(normalized)) {
112+    return true;
113+  }
114+
115+  return allowedHosts.includes(normalized);
116+}
117+
118 function readHttpPort(url: URL): number {
119   if (url.port === "") {
120     return 80;
121@@ -372,7 +455,10 @@ function formatLocalApiBaseUrl(hostname: string, port: number): string {
122   return `http://${formattedHost}${port === 80 ? "" : `:${port}`}`;
123 }
124 
125-function resolveLocalApiBase(value: string | null | undefined): string | null {
126+function resolveLocalApiBase(
127+  value: string | null | undefined,
128+  allowedHosts: readonly string[] = []
129+): string | null {
130   const normalized = normalizeOptionalString(value);
131 
132   if (normalized == null) {
133@@ -391,8 +477,10 @@ function resolveLocalApiBase(value: string | null | undefined): string | null {
134     throw new Error("Conductor localApiBase must use the http:// scheme.");
135   }
136 
137-  if (!isLoopbackHost(url.hostname)) {
138-    throw new Error("Conductor localApiBase must use a loopback host.");
139+  if (!isAllowedLocalApiHost(url.hostname, allowedHosts)) {
140+    throw new Error(
141+      "Conductor localApiBase must use a loopback host or an explicitly allowed Tailscale 100.x host."
142+    );
143   }
144 
145   if (url.pathname !== "/" || url.search !== "" || url.hash !== "") {
146@@ -1258,6 +1346,7 @@ export function resolveConductorRuntimeConfig(
147 
148   const heartbeatIntervalMs = config.heartbeatIntervalMs ?? DEFAULT_HEARTBEAT_INTERVAL_MS;
149   const leaseRenewIntervalMs = config.leaseRenewIntervalMs ?? DEFAULT_LEASE_RENEW_INTERVAL_MS;
150+  const localApiAllowedHosts = parseLocalApiAllowedHosts(config.localApiAllowedHosts);
151   const leaseTtlSec = config.leaseTtlSec ?? DEFAULT_LEASE_TTL_SEC;
152   const renewFailureThreshold = config.renewFailureThreshold ?? DEFAULT_RENEW_FAILURE_THRESHOLD;
153   const priority = config.priority ?? (config.role === "primary" ? 100 : 50);
154@@ -1286,8 +1375,9 @@ export function resolveConductorRuntimeConfig(
155     controlApiBase: normalizeBaseUrl(controlApiBase),
156     heartbeatIntervalMs,
157     leaseRenewIntervalMs,
158+    localApiAllowedHosts,
159     leaseTtlSec,
160-    localApiBase: resolveLocalApiBase(config.localApiBase),
161+    localApiBase: resolveLocalApiBase(config.localApiBase, localApiAllowedHosts),
162     paths: resolvePathConfig(config.paths),
163     preferred: config.preferred ?? config.role === "primary",
164     priority,
165@@ -1362,6 +1452,7 @@ function resolveRuntimeConfigFromSources(
166       overrides.renewFailureThreshold ?? env.BAA_CONDUCTOR_RENEW_FAILURE_THRESHOLD,
167       { minimum: 1 }
168     ),
169+    localApiAllowedHosts: overrides.localApiAllowedHosts ?? env.BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS,
170     localApiBase: normalizeOptionalString(overrides.localApiBase ?? env.BAA_CONDUCTOR_LOCAL_API),
171     sharedToken: normalizeOptionalString(overrides.sharedToken ?? env.BAA_SHARED_TOKEN),
172     paths: {
173@@ -1564,6 +1655,7 @@ function formatConfigText(config: ResolvedConductorRuntimeConfig): string {
174     `identity: ${config.nodeId}@${config.host}(${config.role})`,
175     `control_api_base: ${config.controlApiBase}`,
176     `local_api_base: ${config.localApiBase ?? "not-configured"}`,
177+    `local_api_allowed_hosts: ${config.localApiAllowedHosts.join(",") || "loopback-only"}`,
178     `priority: ${config.priority}`,
179     `preferred: ${String(config.preferred)}`,
180     `heartbeat_interval_ms: ${config.heartbeatIntervalMs}`,
181@@ -1629,6 +1721,7 @@ function getUsageText(): string {
182     "  BAA_CONDUCTOR_ROLE",
183     "  BAA_CONTROL_API_BASE",
184     "  BAA_CONDUCTOR_LOCAL_API",
185+    "  BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS",
186     "  BAA_SHARED_TOKEN",
187     "  BAA_CONDUCTOR_PRIORITY",
188     "  BAA_CONDUCTOR_VERSION",
A coordination/tasks/T-028-unblock-tailscale-listen.md
+110, -0
  1@@ -0,0 +1,110 @@
  2+---
  3+task_id: T-028A
  4+title: T-028 Tailscale 监听解阻
  5+status: review
  6+branch: feat/T-028-unblock-tailscale-listen
  7+repo: /Users/george/code/baa-conductor
  8+base_ref: main@0b59439
  9+depends_on:
 10+  - T-028
 11+write_scope:
 12+  - apps/conductor-daemon/**
 13+  - apps/status-api/**
 14+  - docs/runtime/**
 15+  - docs/ops/**
 16+  - scripts/runtime/**
 17+updated_at: 2026-03-22
 18+---
 19+
 20+# T-028 Tailscale 监听解阻
 21+
 22+## 目标
 23+
 24+解除 T-028 的当前真实阻塞:允许 VPS 通过 Tailscale `100.x` 回源到 `mini` / `mac` 的 conductor 与 status-api 监听面,同时保持默认 loopback 行为安全。
 25+
 26+## 本任务包含
 27+
 28+- 为 `conductor-daemon` 增加受控的 Tailscale `100.x` 监听支持
 29+- 为 `status-api` 补齐 launchd/runtime 层的显式 host 配置支持
 30+- 更新 runtime / ops 文档,写清节点监听与 VPS 回源约定
 31+- 运行与本次实现直接相关的本地验证
 32+
 33+## 本任务不包含
 34+
 35+- 修改 `baa-firefox`
 36+- 真实 DNS 切换
 37+- 直接改 VPS 线上 Nginx 配置
 38+- 与监听解阻无关的重构
 39+
 40+## 建议起始文件
 41+
 42+- `apps/conductor-daemon/src/index.ts`
 43+- `apps/conductor-daemon/src/index.test.js`
 44+- `apps/status-api/src/host.ts`
 45+- `scripts/runtime/install-launchd.sh`
 46+- `scripts/runtime/check-launchd.sh`
 47+- `docs/runtime/environment.md`
 48+- `docs/ops/README.md`
 49+
 50+## 交付物
 51+
 52+- 可显式配置为 Tailscale `100.x` 监听的 conductor / status-api
 53+- 更新后的 launchd/runtime 文档与脚本支持
 54+- 记录了验证命令、结果和后续交接项的任务卡
 55+
 56+## 验收
 57+
 58+- `BAA_CONDUCTOR_LOCAL_API` 不再被硬性限制为 loopback,且只接受 loopback 或显式允许的 Tailscale `100.x`
 59+- `status-api` 默认仍监听 `127.0.0.1`,但能通过配置监听节点 Tailscale `100.x`
 60+- runtime / ops 文档说明 mini / mac 配置与 VPS 回源地址
 61+- `typecheck`
 62+- `build`
 63+- conductor-daemon 相关测试
 64+- status-api 最小 smoke 或启动验证
 65+- `git diff --check`
 66+
 67+## files_changed
 68+
 69+- `apps/conductor-daemon/src/index.ts`
 70+- `apps/conductor-daemon/src/index.test.js`
 71+- `scripts/runtime/install-launchd.sh`
 72+- `scripts/runtime/check-launchd.sh`
 73+- `ops/launchd/so.makefile.baa-status-api.plist`
 74+- `docs/runtime/environment.md`
 75+- `docs/runtime/launchd.md`
 76+- `docs/runtime/node-verification.md`
 77+- `docs/ops/README.md`
 78+- `docs/ops/failover-topology.md`
 79+- `coordination/tasks/T-028-unblock-tailscale-listen.md`
 80+
 81+## commands_run
 82+
 83+- `git worktree add -b feat/T-028-unblock-tailscale-listen /Users/george/code/baa-conductor-T028-unblock-tailscale-listen 0b59439`
 84+- `npx --yes pnpm install`
 85+- `npx --yes pnpm --filter @baa-conductor/conductor-daemon typecheck`
 86+- `npx --yes pnpm --filter @baa-conductor/conductor-daemon build`
 87+- `npx --yes pnpm --filter @baa-conductor/conductor-daemon test`
 88+- `npx --yes pnpm typecheck`
 89+- `npx --yes pnpm build`
 90+- `node apps/status-api/dist/index.js smoke`
 91+- `bash -n scripts/runtime/common.sh scripts/runtime/install-launchd.sh scripts/runtime/check-launchd.sh scripts/runtime/check-node.sh`
 92+- `plutil -lint ops/launchd/so.makefile.baa-status-api.plist`
 93+- `git diff --check`
 94+
 95+## result
 96+
 97+- `conductor-daemon` 现在允许 `BAA_CONDUCTOR_LOCAL_API` 绑定 loopback 或显式列入 `BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS` 的 Tailscale `100.x`,不再硬性报错为 loopback-only。
 98+- `status-api` 应用代码本身已支持 `BAA_STATUS_API_HOST`;本分支补齐了 launchd 模板、安装脚本和校验脚本,使真实节点可以稳定写入 `100.71.210.78` / `100.112.239.13`。
 99+- runtime / ops 文档已明确 mini / mac 的监听覆盖值,以及 VPS 继续回源 `100.71.210.78:4317` 主、`100.112.239.13:4317` 备的约定。
100+- 本地验证通过:仓库级 `typecheck`、仓库级 `build`、`conductor-daemon` 测试、`status-api` smoke、shell 语法检查、plist lint、`git diff --check`。
101+
102+## risks
103+
104+- 当前实现是“单 host 显式绑定”,不是 loopback 与 Tailscale 双监听;节点如果切到 `100.x`,本机探针与 `check-node.sh` 也必须同步改用 Tailscale URL。
105+- 本分支没有执行真实 mini / mac launchd 重装,也没有做 VPS/Nginx 线上切换;T-028 后续仍需在真实节点套用文档中的新参数完成落地验证。
106+
107+## next_handoff
108+
109+- 在 `mini` 上重装或更新 launchd:`--all-services --local-api-base http://100.71.210.78:4317 --local-api-allowed-hosts 100.71.210.78 --status-api-host 100.71.210.78`。
110+- 在 `mac` 上重装或更新 launchd:`--all-services --local-api-base http://100.112.239.13:4317 --local-api-allowed-hosts 100.112.239.13 --status-api-host 100.112.239.13`。
111+- 两台节点分别执行 `check-launchd.sh` / `check-node.sh` 的 Tailscale 版本命令,确认 `4317` 与 `4318` 已在各自 `100.x` 上可达,再继续 T-028 的 VPS 回源与公网切换准备。
M docs/ops/README.md
+27, -1
 1@@ -29,7 +29,7 @@
 2 
 3 - Cloudflare DNS 不参与 failover,公网记录仍固定指向 VPS
 4 - `conductor.makefile.so` 的切换依赖 VPS Nginx 对 Tailscale `100.x` upstream 的连通性判断
 5-- `launchd` 决定节点本地 conductor 是否继续对 `127.0.0.1:4317` 和 Tailscale 监听
 6+- `launchd` 决定节点上的 conductor / status-api 是保持 loopback,还是显式切到节点自己的 Tailscale `100.x`
 7 - Nginx 不知道谁持有 leader lease,所以“逻辑 leader 已切走”不等于“公网一定已切走”
 8 
 9 ## 单一来源 inventory
10@@ -63,6 +63,32 @@
11 - `4317` 只允许 VPS 通过 Tailscale 或 WireGuard 访问
12 - `mini-conductor.makefile.so` 与 `mac-conductor.makefile.so` 默认保留 Basic Auth
13 
14+## 节点监听配置
15+
16+要让 VPS 真实回源成功,节点本地 runtime 需要明确切到自己的 Tailscale `100.x`,而不是继续停在 loopback 默认值。
17+
18+`mini`:
19+
20+```text
21+BAA_CONDUCTOR_LOCAL_API=http://100.71.210.78:4317
22+BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS=100.71.210.78
23+BAA_STATUS_API_HOST=100.71.210.78
24+```
25+
26+`mac`:
27+
28+```text
29+BAA_CONDUCTOR_LOCAL_API=http://100.112.239.13:4317
30+BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS=100.112.239.13
31+BAA_STATUS_API_HOST=100.112.239.13
32+```
33+
34+运维边界:
35+
36+- VPS Nginx 继续只回源 conductor,也就是 `100.71.210.78:4317` 主、`100.112.239.13:4317` 备
37+- `status-api` 如需 Tailscale 直连,地址对应 `http://100.71.210.78:4318` / `http://100.112.239.13:4318`
38+- 当前实现不是双监听;切到 `100.x` 后,节点本地检查要同步改用 `check-node.sh --local-api-base ... --status-api-base ...`
39+
40 ## Cloudflare DNS 计划
41 
42 ### 1. 准备 inventory
M docs/ops/failover-topology.md
+7, -0
 1@@ -78,6 +78,13 @@
 2 | `mini` | `mini` | `primary` | `mini-main` |
 3 | `mac` | `mac` | `standby` | `mac-standby` |
 4 
 5+真实回源还要把监听地址显式切到节点自己的 Tailscale `100.x`:
 6+
 7+- `mini`: `BAA_CONDUCTOR_LOCAL_API=http://100.71.210.78:4317`
 8+- `mini`: `BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS=100.71.210.78`
 9+- `mac`: `BAA_CONDUCTOR_LOCAL_API=http://100.112.239.13:4317`
10+- `mac`: `BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS=100.112.239.13`
11+
12 安装/校验/重载脚本见:
13 
14 - [`scripts/runtime/install-launchd.sh`](../../scripts/runtime/install-launchd.sh)
M docs/runtime/environment.md
+32, -1
 1@@ -5,6 +5,7 @@
 2 - `launchd` 不会自动读取 `.zshrc`、`.zprofile` 或 repo 根目录下的 `.env`。
 3 - plist 里出现的路径都必须是绝对路径,不能依赖 `~`、`$HOME` 或 shell 展开。
 4 - `BAA_SHARED_TOKEN` 必须在安装副本里替换成真实值,repo 内源模板保持 `replace-me`。
 5+- `conductor` 与 `status-api` 当前都只绑定一个 listen host;默认是 loopback,真实节点如需给 VPS 回源,必须显式切到对应的 Tailscale `100.x`。
 6 - 如果 `mini` 与 `mac` 都使用 `/Users/george/code/baa-conductor`,目录变量可以完全一致,只有节点身份变量需要区分。
 7 
 8 ## 应用层变量
 9@@ -14,7 +15,9 @@
10 | `BAA_CONDUCTOR_HOST` | `mini` | `mac` | 节点标识;应与节点域名和故障切换语义一致 |
11 | `BAA_CONDUCTOR_ROLE` | `primary` | `standby` | conductor 期望角色;切回主节点前不要让 `mac` 默认成 `primary` |
12 | `BAA_CONTROL_API_BASE` | `https://control-api.makefile.so` | `https://control-api.makefile.so` | 控制平面统一入口 |
13-| `BAA_CONDUCTOR_LOCAL_API` | `http://127.0.0.1:4317` | `http://127.0.0.1:4317` | 本机 loopback 地址;两台机器可以相同 |
14+| `BAA_CONDUCTOR_LOCAL_API` | `http://127.0.0.1:4317` | `http://127.0.0.1:4317` | 默认 loopback;真实 Tailscale 回源时改成节点自己的 `http://100.x:4317` |
15+| `BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS` | 空 | 空 | 仅当 `BAA_CONDUCTOR_LOCAL_API` 改成 Tailscale `100.x` 时填写;必须显式列出允许绑定的 host |
16+| `BAA_STATUS_API_HOST` | `127.0.0.1` | `127.0.0.1` | `status-api` 默认 loopback;真实节点如需 Tailscale 访问时改成节点自己的 `100.x` |
17 | `BAA_RUNS_DIR` | `/Users/george/code/baa-conductor/runs` | `/Users/george/code/baa-conductor/runs` | run 根目录 |
18 | `BAA_WORKTREES_DIR` | `/Users/george/code/baa-conductor/worktrees` | `/Users/george/code/baa-conductor/worktrees` | task worktree 根目录 |
19 | `BAA_LOGS_DIR` | `/Users/george/code/baa-conductor/logs` | `/Users/george/code/baa-conductor/logs` | 服务日志根目录 |
20@@ -25,6 +28,32 @@
21 
22 `scripts/runtime/install-launchd.sh` 会把这些路径写进安装副本;如果 repo 根目录不在 `/Users/george/code/baa-conductor`,直接通过 `--repo-dir` 派生新的路径,不需要再手工改每个 plist。
23 
24+## Tailscale rollout 覆盖值
25+
26+默认表格保持的是安全的 loopback 基线。真实节点要让 VPS 通过 Tailscale `100.x` 回源时,覆盖成下面这样:
27+
28+`mini`:
29+
30+```text
31+BAA_CONDUCTOR_LOCAL_API=http://100.71.210.78:4317
32+BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS=100.71.210.78
33+BAA_STATUS_API_HOST=100.71.210.78
34+```
35+
36+`mac`:
37+
38+```text
39+BAA_CONDUCTOR_LOCAL_API=http://100.112.239.13:4317
40+BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS=100.112.239.13
41+BAA_STATUS_API_HOST=100.112.239.13
42+```
43+
44+约束说明:
45+
46+- `BAA_CONDUCTOR_LOCAL_API` 仍只接受 `http://`,且 host 只能是 loopback 或 `BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS` 里显式列出的 Tailscale `100.x`
47+- `BAA_STATUS_API_HOST` 默认不需要改;只有确实要通过 Tailscale 直连 `status-api` 时才切到 `100.x`
48+- 当前实现不是双监听;切到 `100.x` 后,`check-node.sh` 也要跟着传 `--local-api-base` / `--status-api-base`
49+
50 ## launchd 辅助变量
51 
52 | 变量 | 推荐值 | 说明 |
53@@ -47,6 +76,8 @@
54 - `--home-dir PATH`:决定 `HOME`、`PATH` 与默认 `~/Library/LaunchAgents`
55 - `--shared-token`、`--shared-token-file` 或环境变量 `BAA_SHARED_TOKEN`:写入或校验 `BAA_SHARED_TOKEN`
56 - `--control-api-base`、`--local-api-base`:覆盖默认控制平面地址
57+- `--local-api-allowed-hosts`:覆盖 `BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS`
58+- `--status-api-host`:覆盖 `BAA_STATUS_API_HOST`
59 
60 ## 节点最小差异集
61 
M docs/runtime/launchd.md
+81, -0
  1@@ -36,6 +36,24 @@ repo 内的三个 plist 源模板都默认写成 `mini` 的 canonical 配置:
  2 
  3 实际安装时推荐让脚本做这些改写,而不是手工编辑 plist。
  4 
  5+## 监听策略
  6+
  7+默认安装副本保持安全的 loopback 监听:
  8+
  9+- conductor: `http://127.0.0.1:4317`
 10+- status-api: `http://127.0.0.1:4318`
 11+
 12+真实 rollout 如果要让 VPS 直接回源到节点,就显式切到对应节点的 Tailscale `100.x`:
 13+
 14+- `mini`: conductor `http://100.71.210.78:4317`,status-api `http://100.71.210.78:4318`
 15+- `mac`: conductor `http://100.112.239.13:4317`,status-api `http://100.112.239.13:4318`
 16+
 17+当前实现不是双监听。切到 Tailscale 后,需要同时设置:
 18+
 19+- `--local-api-base`
 20+- `--local-api-allowed-hosts`
 21+- `--status-api-host`
 22+
 23 ## `LaunchAgents` 与 `LaunchDaemons`
 24 
 25 | 目标路径 | 适合场景 | 优点 | 额外要求 |
 26@@ -83,6 +101,18 @@ export BAA_SHARED_TOKEN='replace-with-real-token'
 27   --all-services
 28 ```
 29 
 30+如果这台 `mini` 要给 VPS 真实回源,改成:
 31+
 32+```bash
 33+./scripts/runtime/install-launchd.sh \
 34+  --repo-dir "$REPO_DIR" \
 35+  --node mini \
 36+  --all-services \
 37+  --local-api-base http://100.71.210.78:4317 \
 38+  --local-api-allowed-hosts 100.71.210.78 \
 39+  --status-api-host 100.71.210.78
 40+```
 41+
 42 ### 4. 静态校验安装副本
 43 
 44 ```bash
 45@@ -95,6 +125,19 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
 46   --install-dir "$AGENTS_DIR"
 47 ```
 48 
 49+如果安装副本已经切到 Tailscale `100.x`,校验时也要带同一组覆盖值:
 50+
 51+```bash
 52+./scripts/runtime/check-launchd.sh \
 53+  --repo-dir "$REPO_DIR" \
 54+  --node mini \
 55+  --all-services \
 56+  --install-dir "$AGENTS_DIR" \
 57+  --local-api-base http://100.71.210.78:4317 \
 58+  --local-api-allowed-hosts 100.71.210.78 \
 59+  --status-api-host 100.71.210.78
 60+```
 61+
 62 ### 5. on-node 验证
 63 
 64 `check-launchd.sh` 只覆盖静态层。节点上已经有真实进程后,再补一轮 on-node 检查:
 65@@ -111,6 +154,19 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
 66   --expected-rolez leader
 67 ```
 68 
 69+如果 `mini` 已改成 Tailscale 监听,把 probe URL 也显式改掉:
 70+
 71+```bash
 72+./scripts/runtime/check-node.sh \
 73+  --repo-dir "$REPO_DIR" \
 74+  --node mini \
 75+  --all-services \
 76+  --install-dir "$AGENTS_DIR" \
 77+  --local-api-base http://100.71.210.78:4317 \
 78+  --status-api-base http://100.71.210.78:4318 \
 79+  --expected-rolez leader
 80+```
 81+
 82 如果 `status-api` 暂时不在该节点常驻,把 `--all-services` 改成 `--service conductor`。
 83 
 84 ### 6. 预览或执行重载
 85@@ -145,6 +201,18 @@ export BAA_SHARED_TOKEN='replace-with-real-token'
 86   --node mac
 87 ```
 88 
 89+如果这台 `mac` 要作为 VPS 的 Tailscale 回源备用节点,改成:
 90+
 91+```bash
 92+./scripts/runtime/install-launchd.sh \
 93+  --repo-dir "$REPO_DIR" \
 94+  --node mac \
 95+  --all-services \
 96+  --local-api-base http://100.112.239.13:4317 \
 97+  --local-api-allowed-hosts 100.112.239.13 \
 98+  --status-api-host 100.112.239.13
 99+```
100+
101 ### 3. on-node 验证
102 
103 `mac` 的 steady-state 预期是 standby,因此把 `rolez` 预期值改成 `standby`:
104@@ -161,6 +229,19 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
105   --expected-rolez standby
106 ```
107 
108+如果 `mac` 已改成 Tailscale 监听,把验证地址同步切到 `100.112.239.13`:
109+
110+```bash
111+./scripts/runtime/check-node.sh \
112+  --repo-dir "$REPO_DIR" \
113+  --node mac \
114+  --all-services \
115+  --install-dir "$AGENTS_DIR" \
116+  --local-api-base http://100.112.239.13:4317 \
117+  --status-api-base http://100.112.239.13:4318 \
118+  --expected-rolez standby
119+```
120+
121 如果这是 failover rehearsal,把 `--expected-rolez standby` 改成 `--expected-rolez leader`。
122 
123 ### 4. 加载服务
M docs/runtime/node-verification.md
+32, -7
 1@@ -15,6 +15,18 @@
 2 
 3 默认检查集合是 `conductor + status-api`。如果节点暂时只跑 conductor,可以显式改成 `--service conductor`。如果还要把 `worker-runner` 也纳入同一次节点验证,再加 `--all-services`。
 4 
 5+默认 probe URL 仍假设 loopback:
 6+
 7+- conductor: `http://127.0.0.1:4317`
 8+- status-api: `http://127.0.0.1:4318`
 9+
10+如果节点已经按 rollout 切到 Tailscale `100.x`,记得在下面命令里同步补上:
11+
12+- `--local-api-base http://100.x:4317`
13+- `--status-api-base http://100.x:4318`
14+
15+下面的 `mini` / `mac` 示例按当前真实 rollout 直接写成 Tailscale `100.x` 版本;如果节点仍保持 loopback-only,把这些覆盖参数去掉即可。
16+
17 ## 前置条件
18 
19 在进入 on-node 检查前,先确保这些步骤已经完成:
20@@ -38,7 +50,10 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
21   --repo-dir "$REPO_DIR" \
22   --node mini \
23   --all-services \
24-  --install-dir "$AGENTS_DIR"
25+  --install-dir "$AGENTS_DIR" \
26+  --local-api-base http://100.71.210.78:4317 \
27+  --local-api-allowed-hosts 100.71.210.78 \
28+  --status-api-host 100.71.210.78
29 ```
30 
31 ### 2. on-node 校验
32@@ -54,6 +69,8 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
33   --node mini \
34   --all-services \
35   --install-dir "$AGENTS_DIR" \
36+  --local-api-base http://100.71.210.78:4317 \
37+  --status-api-base http://100.71.210.78:4318 \
38   --expected-rolez leader
39 ```
40 
41@@ -72,12 +89,15 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
42 ### 3. 失败时的人工 spot check
43 
44 ```bash
45+LOCAL_API_BASE="${LOCAL_API_BASE:-http://100.71.210.78:4317}"
46+STATUS_API_BASE="${STATUS_API_BASE:-http://100.71.210.78:4318}"
47+
48 lsof -nP -iTCP:4317 -sTCP:LISTEN
49 lsof -nP -iTCP:4318 -sTCP:LISTEN
50-curl -sS http://127.0.0.1:4317/healthz
51-curl -sS http://127.0.0.1:4317/readyz
52-curl -sS http://127.0.0.1:4317/rolez
53-curl -sS http://127.0.0.1:4318/healthz
54+curl -sS "${LOCAL_API_BASE}/healthz"
55+curl -sS "${LOCAL_API_BASE}/readyz"
56+curl -sS "${LOCAL_API_BASE}/rolez"
57+curl -sS "${STATUS_API_BASE}/healthz"
58 tail -n 50 /Users/george/code/baa-conductor/logs/launchd/so.makefile.baa-conductor.err.log
59 tail -n 50 /Users/george/code/baa-conductor/logs/launchd/so.makefile.baa-status-api.err.log
60 ```
61@@ -94,7 +114,10 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
62   --repo-dir "$REPO_DIR" \
63   --node mac \
64   --all-services \
65-  --install-dir "$AGENTS_DIR"
66+  --install-dir "$AGENTS_DIR" \
67+  --local-api-base http://100.112.239.13:4317 \
68+  --local-api-allowed-hosts 100.112.239.13 \
69+  --status-api-host 100.112.239.13
70 ```
71 
72 ### 2. on-node 校验
73@@ -110,6 +133,8 @@ AGENTS_DIR="$HOME/Library/LaunchAgents"
74   --node mac \
75   --all-services \
76   --install-dir "$AGENTS_DIR" \
77+  --local-api-base http://100.112.239.13:4317 \
78+  --status-api-base http://100.112.239.13:4318 \
79   --expected-rolez standby
80 ```
81 
82@@ -136,6 +161,6 @@ sudo ./scripts/runtime/check-node.sh \
83 
84 - `conductor /readyz` 返回 `503`:节点进程存活,但 runtime 还没进入 ready 状态,先看 conductor stderr 日志。
85 - `conductor /rolez` 与预期不符:节点身份没问题,但当前 lease 角色与预期不一致,先确认是否处于 failover 或 standby 场景。
86-- `status-api` 端口没监听:`status-api` 宿主进程没有起来,或监听地址/端口与默认 `127.0.0.1:4318` 不一致。
87+- `status-api` 端口没监听:`status-api` 宿主进程没有起来,或监听地址/端口与当前配置的 `127.0.0.1:4318` / `100.x:4318` 不一致。
88 - `status-api` 进程匹配失败:节点上可能跑的是旧路径、旧 worktree,或 launchd 仍指向错误的 `dist/index.js`。
89 - launchd 日志文件缺失:安装副本路径虽然存在,但服务尚未真正由 launchd 打开过对应 `StandardOutPath` / `StandardErrorPath`。
M ops/launchd/so.makefile.baa-status-api.plist
+2, -0
1@@ -32,6 +32,8 @@
2       <string>https://control-api.makefile.so</string>
3       <key>BAA_CONDUCTOR_LOCAL_API</key>
4       <string>http://127.0.0.1:4317</string>
5+      <key>BAA_STATUS_API_HOST</key>
6+      <string>127.0.0.1</string>
7       <key>BAA_RUNS_DIR</key>
8       <string>/Users/george/code/baa-conductor/runs</string>
9       <key>BAA_WORKTREES_DIR</key>
M scripts/runtime/check-launchd.sh
+18, -0
 1@@ -22,6 +22,9 @@ Options:
 2   --shared-token-file PATH  Read the expected token from a file.
 3   --control-api-base URL    Expected BAA_CONTROL_API_BASE.
 4   --local-api-base URL      Expected BAA_CONDUCTOR_LOCAL_API.
 5+  --local-api-allowed-hosts CSV
 6+                            Expected BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS.
 7+  --status-api-host HOST    Expected BAA_STATUS_API_HOST.
 8   --username NAME           Expected UserName for LaunchDaemons.
 9   --skip-dist-check         Skip dist/index.js existence checks.
10   --check-loaded            Also require launchctl print to succeed for each selected service.
11@@ -46,6 +49,8 @@ shared_token=""
12 shared_token_file=""
13 control_api_base="${BAA_RUNTIME_DEFAULT_CONTROL_API_BASE}"
14 local_api_base="${BAA_RUNTIME_DEFAULT_LOCAL_API}"
15+local_api_allowed_hosts="${BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS:-}"
16+status_api_host="${BAA_STATUS_API_HOST:-127.0.0.1}"
17 username="$(default_username)"
18 skip_dist_check="0"
19 check_loaded="0"
20@@ -105,6 +110,14 @@ while [[ $# -gt 0 ]]; do
21       local_api_base="$2"
22       shift 2
23       ;;
24+    --local-api-allowed-hosts)
25+      local_api_allowed_hosts="$2"
26+      shift 2
27+      ;;
28+    --status-api-host)
29+      status_api_host="$2"
30+      shift 2
31+      ;;
32     --username)
33       username="$2"
34       shift 2
35@@ -191,6 +204,7 @@ check_installed_plist() {
36   check_string_equals "${service}:BAA_CONDUCTOR_ROLE" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_CONDUCTOR_ROLE")" "$conductor_role"
37   check_string_equals "${service}:BAA_CONTROL_API_BASE" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_CONTROL_API_BASE")" "$control_api_base"
38   check_string_equals "${service}:BAA_CONDUCTOR_LOCAL_API" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API")" "$local_api_base"
39+  check_string_equals "${service}:BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS")" "$local_api_allowed_hosts"
40   check_string_equals "${service}:BAA_RUNS_DIR" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_RUNS_DIR")" "$runs_dir"
41   check_string_equals "${service}:BAA_WORKTREES_DIR" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_WORKTREES_DIR")" "$worktrees_dir"
42   check_string_equals "${service}:BAA_LOGS_DIR" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_LOGS_DIR")" "$logs_dir"
43@@ -213,6 +227,10 @@ check_installed_plist() {
44     check_string_equals "${service}:role-arg" "$(plist_print_value "$plist_path" ":ProgramArguments:6")" "$conductor_role"
45   fi
46 
47+  if [[ "$service" == "status-api" ]]; then
48+    check_string_equals "${service}:BAA_STATUS_API_HOST" "$(plist_print_value "$plist_path" ":EnvironmentVariables:BAA_STATUS_API_HOST")" "$status_api_host"
49+  fi
50+
51   if [[ "$scope" == "daemon" ]]; then
52     check_string_equals "${service}:UserName" "$(plist_print_value "$plist_path" ":UserName")" "$username"
53   fi
M scripts/runtime/install-launchd.sh
+18, -0
 1@@ -22,6 +22,9 @@ Options:
 2   --shared-token-file PATH  Read the shared token from a file.
 3   --control-api-base URL    Override BAA_CONTROL_API_BASE.
 4   --local-api-base URL      Override BAA_CONDUCTOR_LOCAL_API.
 5+  --local-api-allowed-hosts CSV
 6+                            Override BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS.
 7+  --status-api-host HOST    Override BAA_STATUS_API_HOST.
 8   --username NAME           UserName for LaunchDaemons. Defaults to the current user.
 9   --help                    Show this help text.
10 
11@@ -44,6 +47,8 @@ shared_token="${BAA_SHARED_TOKEN:-}"
12 shared_token_file=""
13 control_api_base="${BAA_RUNTIME_DEFAULT_CONTROL_API_BASE}"
14 local_api_base="${BAA_RUNTIME_DEFAULT_LOCAL_API}"
15+local_api_allowed_hosts="${BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS:-}"
16+status_api_host="${BAA_STATUS_API_HOST:-127.0.0.1}"
17 username="$(default_username)"
18 services=()
19 
20@@ -100,6 +105,14 @@ while [[ $# -gt 0 ]]; do
21       local_api_base="$2"
22       shift 2
23       ;;
24+    --local-api-allowed-hosts)
25+      local_api_allowed_hosts="$2"
26+      shift 2
27+      ;;
28+    --status-api-host)
29+      status_api_host="$2"
30+      shift 2
31+      ;;
32     --username)
33       username="$2"
34       shift 2
35@@ -173,6 +186,7 @@ for service in "${services[@]}"; do
36   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_ROLE" "$conductor_role"
37   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONTROL_API_BASE" "$control_api_base"
38   plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API" "$local_api_base"
39+  plist_set_string "$install_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API_ALLOWED_HOSTS" "$local_api_allowed_hosts"
40   plist_set_string "$install_path" ":EnvironmentVariables:BAA_RUNS_DIR" "$runs_dir"
41   plist_set_string "$install_path" ":EnvironmentVariables:BAA_WORKTREES_DIR" "$worktrees_dir"
42   plist_set_string "$install_path" ":EnvironmentVariables:BAA_LOGS_DIR" "$logs_dir"
43@@ -189,6 +203,10 @@ for service in "${services[@]}"; do
44     plist_set_string "$install_path" ":ProgramArguments:6" "$conductor_role"
45   fi
46 
47+  if [[ "$service" == "status-api" ]]; then
48+    plist_set_string "$install_path" ":EnvironmentVariables:BAA_STATUS_API_HOST" "$status_api_host"
49+  fi
50+
51   if [[ "$scope" == "daemon" ]]; then
52     plist_set_string "$install_path" ":UserName" "$username"
53   else