baa-conductor


baa-conductor / scripts / runtime
im_wower  ·  2026-03-26

reload-launchd.sh

  1#!/usr/bin/env bash
  2set -euo pipefail
  3
  4SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
  5# shellcheck source=./common.sh
  6source "${SCRIPT_DIR}/common.sh"
  7
  8usage() {
  9  cat <<'EOF'
 10Usage:
 11  scripts/runtime/reload-launchd.sh [options]
 12
 13Options:
 14  --scope agent|daemon   launchd domain type. Defaults to agent.
 15  --service NAME         Add one service to the reload set. Repeatable.
 16  --all-services         Reload conductor, codexd, worker-runner, and status-api.
 17  --home-dir PATH        Used only to derive the default LaunchAgents path.
 18  --install-dir PATH     Override launchd install directory.
 19  --domain TARGET        Override launchctl domain target. Defaults to gui/<uid> or system.
 20  --skip-kickstart       Skip launchctl kickstart after bootstrap.
 21  --dry-run              Print launchctl commands instead of executing them.
 22  --help                 Show this help text.
 23
 24Notes:
 25  If no service is specified, conductor + codexd are reloaded.
 26  Use --service status-api to reload the optional local read-only observer.
 27  codexd reload recovery waits on /healthz only; /v1/codexd/runs* and
 28  codex exec are not part of reload acceptance.
 29EOF
 30}
 31
 32require_command launchctl
 33require_command plutil
 34require_command curl
 35assert_file /usr/libexec/PlistBuddy
 36
 37scope="agent"
 38home_dir="$(default_home_dir)"
 39install_dir=""
 40domain_target=""
 41dry_run="0"
 42skip_kickstart="0"
 43services=()
 44
 45while [[ $# -gt 0 ]]; do
 46  case "$1" in
 47    --scope)
 48      scope="$2"
 49      shift 2
 50      ;;
 51    --service)
 52      validate_service "$2"
 53      if ! contains_value "$2" "${services[@]-}"; then
 54        services+=("$2")
 55      fi
 56      shift 2
 57      ;;
 58    --all-services)
 59      while IFS= read -r service; do
 60        if ! contains_value "$service" "${services[@]-}"; then
 61          services+=("$service")
 62        fi
 63      done < <(all_services)
 64      shift
 65      ;;
 66    --home-dir)
 67      home_dir="$2"
 68      shift 2
 69      ;;
 70    --install-dir)
 71      install_dir="$2"
 72      shift 2
 73      ;;
 74    --domain)
 75      domain_target="$2"
 76      shift 2
 77      ;;
 78    --skip-kickstart)
 79      skip_kickstart="1"
 80      shift
 81      ;;
 82    --dry-run)
 83      dry_run="1"
 84      shift
 85      ;;
 86    --help)
 87      usage
 88      exit 0
 89      ;;
 90    *)
 91      die "Unknown option: $1"
 92      ;;
 93  esac
 94done
 95
 96validate_scope "$scope"
 97
 98if [[ "${#services[@]}" -eq 0 ]]; then
 99  while IFS= read -r service; do
100    services+=("$service")
101  done < <(default_services)
102fi
103
104if [[ -z "$install_dir" ]]; then
105  install_dir="$(default_install_dir "$scope" "$home_dir")"
106fi
107
108if [[ -z "$domain_target" ]]; then
109  domain_target="$(default_domain_target "$scope")"
110fi
111
112CURL_LAST_EXIT_CODE="0"
113CURL_LAST_HTTP_STATUS="000"
114CURL_LAST_ERROR=""
115
116bootout_service() {
117  local plist_path="$1"
118
119  if [[ "$dry_run" == "1" ]]; then
120    printf '+ launchctl bootout %q %q 2>/dev/null || true\n' "$domain_target" "$plist_path"
121    return 0
122  fi
123
124  launchctl bootout "$domain_target" "$plist_path" 2>/dev/null || true
125}
126
127normalize_url() {
128  local value="$1"
129
130  while [[ "$value" == */ ]]; do
131    value="${value%/}"
132  done
133
134  printf '%s\n' "$value"
135}
136
137read_plist_value_or_default() {
138  local plist_path="$1"
139  local key="$2"
140  local default_value="$3"
141  local value=""
142
143  if value="$(/usr/libexec/PlistBuddy -c "Print ${key}" "$plist_path" 2>/dev/null)"; then
144    printf '%s\n' "$value"
145    return 0
146  fi
147
148  printf '%s\n' "$default_value"
149}
150
151probe_http_healthz() {
152  local url="$1"
153  local stderr_file
154  local http_status="000"
155
156  stderr_file="$(mktemp "${TMPDIR:-/tmp}/baa-runtime-reload-curl.XXXXXX")"
157  CURL_LAST_ERROR=""
158
159  if http_status="$(curl -sS --max-time 5 -o /dev/null -w '%{http_code}' "$url" 2>"$stderr_file")"; then
160    CURL_LAST_EXIT_CODE="0"
161  else
162    CURL_LAST_EXIT_CODE="$?"
163  fi
164
165  CURL_LAST_HTTP_STATUS="${http_status:-000}"
166  CURL_LAST_ERROR="$(tr -d '\r' <"$stderr_file")"
167  rm -f "$stderr_file"
168
169  [[ "$CURL_LAST_EXIT_CODE" == "0" && "$CURL_LAST_HTTP_STATUS" == "200" ]]
170}
171
172wait_for_http_healthz() {
173  local name="$1"
174  local url="$2"
175  local attempts="${3:-30}"
176  local delay="${4:-1}"
177  local index
178
179  for ((index = 1; index <= attempts; index += 1)); do
180    if probe_http_healthz "$url"; then
181      runtime_log "${name} is ready: ${url}"
182      return 0
183    fi
184
185    sleep "$delay"
186  done
187
188  return 1
189}
190
191print_log_tail() {
192  local label="$1"
193  local path="$2"
194
195  if [[ -z "$path" ]]; then
196    runtime_error "${label}: unavailable"
197    return 0
198  fi
199
200  if [[ ! -f "$path" ]]; then
201    runtime_error "${label}: missing ${path}"
202    return 0
203  fi
204
205  runtime_error "${label}: ${path} (last 20 lines)"
206  tail -n 20 "$path" >&2
207}
208
209read_service_healthz_url() {
210  local service="$1"
211  local plist_path="$2"
212  local base_url
213  local status_host
214
215  case "$service" in
216    conductor)
217      base_url="$(read_plist_value_or_default "$plist_path" ":EnvironmentVariables:BAA_CONDUCTOR_LOCAL_API" "$BAA_RUNTIME_DEFAULT_LOCAL_API")"
218      printf '%s/healthz\n' "$(normalize_url "$base_url")"
219      ;;
220    codexd)
221      base_url="$(read_plist_value_or_default "$plist_path" ":EnvironmentVariables:BAA_CODEXD_LOCAL_API_BASE" "$BAA_RUNTIME_DEFAULT_CODEXD_LOCAL_API")"
222      printf '%s/healthz\n' "$(normalize_url "$base_url")"
223      ;;
224    status-api)
225      status_host="$(read_plist_value_or_default "$plist_path" ":EnvironmentVariables:BAA_STATUS_API_HOST" "127.0.0.1")"
226      printf 'http://%s:%s/healthz\n' "$status_host" "$(service_default_port "$service")"
227      ;;
228    *)
229      return 1
230      ;;
231  esac
232}
233
234print_service_diagnostics() {
235  local service="$1"
236  local plist_path="$2"
237  local label="$3"
238  local healthz_url="$4"
239  local stdout_path
240  local stderr_path
241
242  stdout_path="$(read_plist_value_or_default "$plist_path" ":StandardOutPath" "")"
243  stderr_path="$(read_plist_value_or_default "$plist_path" ":StandardErrorPath" "")"
244
245  runtime_error "${service} did not recover after launchd reload"
246  runtime_error "health probe: ${healthz_url}"
247  runtime_error "last curl result: exit=${CURL_LAST_EXIT_CODE} http=${CURL_LAST_HTTP_STATUS:-000}"
248  if [[ -n "$CURL_LAST_ERROR" ]]; then
249    runtime_error "last curl stderr: ${CURL_LAST_ERROR}"
250  fi
251
252  runtime_error "launchctl print ${domain_target}/${label}:"
253  if ! launchctl print "${domain_target}/${label}" >&2; then
254    runtime_error "launchctl print failed for ${domain_target}/${label}"
255  fi
256
257  print_log_tail "${service} stdout" "$stdout_path"
258  print_log_tail "${service} stderr" "$stderr_path"
259  runtime_error "manual recovery hint: launchctl kickstart -k ${domain_target}/${label}"
260}
261
262service_has_health_probe() {
263  case "$1" in
264    conductor | codexd | status-api)
265      return 0
266      ;;
267    *)
268      return 1
269      ;;
270  esac
271}
272
273recover_service_after_reload() {
274  local service="$1"
275  local label
276  local plist_path
277  local healthz_url
278
279  if ! service_has_health_probe "$service"; then
280    return 0
281  fi
282
283  label="$(service_label "$service")"
284  plist_path="$(service_install_path "$install_dir" "$service")"
285  healthz_url="$(read_service_healthz_url "$service" "$plist_path")"
286
287  if wait_for_http_healthz "$service" "$healthz_url"; then
288    return 0
289  fi
290
291  if [[ "$skip_kickstart" == "1" ]]; then
292    print_service_diagnostics "$service" "$plist_path" "$label" "$healthz_url"
293    die "${service} stayed unhealthy after reload with --skip-kickstart"
294  fi
295
296  runtime_error "${service} was loaded but not serving; retrying launchctl kickstart -k ${domain_target}/${label}"
297  if ! launchctl kickstart -k "${domain_target}/${label}"; then
298    runtime_error "retry kickstart returned non-zero for ${domain_target}/${label}"
299    print_service_diagnostics "$service" "$plist_path" "$label" "$healthz_url"
300    die "${service} retry kickstart failed after reload"
301  fi
302
303  if wait_for_http_healthz "$service" "$healthz_url"; then
304    runtime_log "${service} recovered after retry kickstart"
305    return 0
306  fi
307
308  print_service_diagnostics "$service" "$plist_path" "$label" "$healthz_url"
309  die "${service} failed to recover after reload"
310}
311
312recover_services_after_reload() {
313  local service
314
315  for service in "${services[@]}"; do
316    recover_service_after_reload "$service"
317  done
318}
319
320for service in "${services[@]}"; do
321  plist_path="$(service_install_path "$install_dir" "$service")"
322  assert_file "$plist_path"
323  plutil -lint "$plist_path" >/dev/null
324done
325
326for service in "${services[@]}"; do
327  plist_path="$(service_install_path "$install_dir" "$service")"
328  bootout_service "$plist_path"
329done
330
331for service in "${services[@]}"; do
332  plist_path="$(service_install_path "$install_dir" "$service")"
333  run_or_print "$dry_run" launchctl bootstrap "$domain_target" "$plist_path"
334done
335
336if [[ "$skip_kickstart" != "1" ]]; then
337  for service in "${services[@]}"; do
338    run_or_print "$dry_run" launchctl kickstart -k "${domain_target}/$(service_label "$service")"
339  done
340fi
341
342if [[ "$dry_run" != "1" ]]; then
343  recover_services_after_reload
344fi
345
346runtime_log "launchd reload completed for ${domain_target}"