codex@macbookpro
·
2026-03-31
validation.py
1from __future__ import annotations
2
3import argparse
4import json
5import subprocess
6import sys
7from pathlib import Path
8from typing import Any, Dict, Iterable, List, Sequence
9
10from .runtime import CIERuntime, REQUIRED_SNAPSHOT_KEYS
11
12BASE_COMMIT = "164c06af63812c69cab32d8f8a6c770b96f38ef6"
13DEFAULT_BRANCH = "branch-a/task04-validation-reporting"
14REPO_ROOT = Path(__file__).resolve().parent.parent
15DEFAULT_JSON_REPORT_PATH = REPO_ROOT / "reports" / "2026-03-31_task04_branch_a_validation.json"
16DEFAULT_MARKDOWN_REPORT_PATH = REPO_ROOT / "reports" / "2026-03-31_task04_branch_a_validation.md"
17REPORT_TOP_LEVEL_KEYS = (
18 "branch",
19 "base_commit",
20 "runtime_summary",
21 "interface_checks",
22 "smoke_checks",
23 "dynamics_checks",
24 "sedimentation_checks",
25 "snapshot_checks",
26 "known_limitations",
27 "overall_status",
28)
29LOCKED_INTERFACE = (
30 "ingest",
31 "step",
32 "emit",
33 "commit_feedback",
34 "snapshot_state",
35 "reset_session",
36)
37OUTPUT_MODES = ("full", "degraded", "minimal")
38
39
40def _git_stdout(args: Sequence[str], fallback: str) -> str:
41 try:
42 completed = subprocess.run(
43 ["git", *args],
44 cwd=REPO_ROOT,
45 check=True,
46 capture_output=True,
47 text=True,
48 )
49 except (FileNotFoundError, subprocess.CalledProcessError):
50 return fallback
51 value = completed.stdout.strip()
52 return value or fallback
53
54
55def _current_branch() -> str:
56 return _git_stdout(["rev-parse", "--abbrev-ref", "HEAD"], DEFAULT_BRANCH)
57
58
59def _check(name: str, ok: bool, detail: str, **extra: Any) -> Dict[str, Any]:
60 entry: Dict[str, Any] = {
61 "name": name,
62 "status": "pass" if ok else "fail",
63 "detail": detail,
64 }
65 entry.update(extra)
66 return entry
67
68
69def _section(checks: List[Dict[str, Any]], **summary: Any) -> Dict[str, Any]:
70 failed = sum(1 for item in checks if item["status"] != "pass")
71 return {
72 "status": "pass" if failed == 0 else "fail",
73 "passed": len(checks) - failed,
74 "failed": failed,
75 "checks": checks,
76 "summary": summary,
77 }
78
79
80def _section_statuses(report: Dict[str, Any]) -> List[str]:
81 return [
82 report["interface_checks"]["status"],
83 report["smoke_checks"]["status"],
84 report["dynamics_checks"]["status"],
85 report["sedimentation_checks"]["status"],
86 report["snapshot_checks"]["status"],
87 ]
88
89
90def _interface_checks() -> Dict[str, Any]:
91 runtime = CIERuntime()
92 snapshot = runtime.snapshot_state()
93 checks = [
94 _check(
95 "runtime_initializes",
96 snapshot["output_mode"] == "minimal" and snapshot["active_region"] == [],
97 "A fresh runtime exposes minimal output mode and no active region.",
98 observed_output_mode=snapshot["output_mode"],
99 observed_active_region=snapshot["active_region"],
100 ),
101 _check(
102 "locked_interface_presence",
103 all(callable(getattr(runtime, name, None)) for name in LOCKED_INTERFACE),
104 "Branch A exposes the locked runtime interface required by the spec.",
105 interface=list(LOCKED_INTERFACE),
106 ),
107 ]
108 return _section(
109 checks,
110 snapshot_keys=sorted(snapshot),
111 required_snapshot_keys=sorted(REQUIRED_SNAPSHOT_KEYS),
112 )
113
114
115def _smoke_checks() -> Dict[str, Any]:
116 runtime = CIERuntime(capacity_limit=8.0)
117 initial = runtime.snapshot_state()
118 queued = runtime.ingest(
119 "graph native observability",
120 context="branch a validation smoke",
121 anchors="anchor",
122 )
123 stepped = runtime.step(2)
124 output = runtime.emit()
125 queued_feedback = runtime.snapshot_state()
126 after_feedback = runtime.step()
127 checks = [
128 _check(
129 "ingest_queues_signal",
130 bool(queued["queued_tokens"]) and bool(queued["queued_anchors"]),
131 "Ingest stores external tokens and anchor hints before the step loop runs.",
132 queued_tokens=queued["queued_tokens"],
133 queued_anchors=queued["queued_anchors"],
134 ),
135 _check(
136 "step_materializes_graph_state",
137 stepped["phi_summary"]["node_count"] > initial["phi_summary"]["node_count"]
138 and stepped["mu_summary"]["total_activation"] > 0.0
139 and stepped["J_summary"]["edge_count"] > 0,
140 "Stepping from a queued input creates observable phi/mu/J state.",
141 phi_summary=stepped["phi_summary"],
142 mu_summary=stepped["mu_summary"],
143 J_summary=stepped["J_summary"],
144 ),
145 _check(
146 "emit_queues_output_feedback",
147 queued_feedback["feedback_effect"].get("source") == "emit"
148 and bool(queued_feedback["feedback_effect"].get("queued_tokens")),
149 "Emit creates a real output-to-input feedback signal rather than a log-only artifact.",
150 emitted_output=output,
151 feedback_effect=queued_feedback["feedback_effect"],
152 ),
153 _check(
154 "feedback_changes_later_state",
155 after_feedback["feedback_effect"].get("last_applied_step") == runtime.state.step_index
156 and bool(after_feedback["feedback_effect"].get("applied_tokens")),
157 "Queued feedback is applied on the next step and changes later runtime state.",
158 feedback_effect=after_feedback["feedback_effect"],
159 ),
160 ]
161 return _section(
162 checks,
163 emitted_output=output,
164 bound_ability_core=after_feedback["bound_ability_core"],
165 active_region=after_feedback["active_region"],
166 )
167
168
169def _dynamics_checks() -> Dict[str, Any]:
170 evolution_runtime = CIERuntime(capacity_limit=8.0)
171 evolution_runtime.ingest("alpha beta alpha", context="gamma", anchors="anchor")
172 step_one = evolution_runtime.step()
173 step_three = evolution_runtime.step(2)
174
175 full_runtime = CIERuntime(capacity_limit=10.0)
176 full_runtime.ingest("focus focus focus", anchors="anchor")
177 full_runtime.step(2)
178 full_mode = full_runtime.snapshot_state()["output_mode"]
179 full_output = full_runtime.emit()
180
181 degraded_runtime = CIERuntime(capacity_limit=6.0)
182 degraded_runtime.ingest("alpha beta gamma delta epsilon", anchors="anchor")
183 degraded_runtime.step(2)
184 degraded_mode = degraded_runtime.snapshot_state()["output_mode"]
185 degraded_output = degraded_runtime.emit()
186
187 minimal_runtime = CIERuntime(capacity_limit=0.9)
188 minimal_runtime.ingest("alpha beta gamma delta epsilon", anchors="anchor")
189 minimal_runtime.step(2)
190 minimal_mode = minimal_runtime.snapshot_state()["output_mode"]
191 minimal_output = minimal_runtime.emit()
192
193 decay_runtime = CIERuntime(capacity_limit=10.0)
194 for _ in range(4):
195 decay_runtime.ingest("stale alpha beta", anchors="anchor")
196 decay_runtime.step()
197 decay_runtime.reset_session()
198 decay_snapshot = decay_runtime.step(6)
199
200 observed_modes = sorted({full_mode, degraded_mode, minimal_mode})
201 checks = [
202 _check(
203 "phi_mu_J_are_observable",
204 step_one["phi_summary"]["total_potential"] != step_three["phi_summary"]["total_potential"]
205 and step_one["mu_summary"]["total_activation"] != step_three["mu_summary"]["total_activation"]
206 and step_one["J_summary"]["total_flow"] != step_three["J_summary"]["total_flow"],
207 "Multi-step dynamics produce visible changes across phi, mu, and J.",
208 step_one=step_one,
209 step_three=step_three,
210 ),
211 _check(
212 "homing_signals_are_populated",
213 step_three["bound_ability_core"] is not None
214 and step_three["anchor_pull"] > 0.0
215 and 0.0 <= step_three["drift_score"] <= 1.0,
216 "The runtime exposes bound_ability_core, anchor_pull, and drift_score as active homing signals.",
217 bound_ability_core=step_three["bound_ability_core"],
218 anchor_pull=step_three["anchor_pull"],
219 drift_score=step_three["drift_score"],
220 ),
221 _check(
222 "decay_and_forgetting_are_visible",
223 bool(decay_snapshot["decay_events"])
224 and any(event["kind"] == "sedimentation_demote" for event in decay_snapshot["decay_events"]),
225 "Decay/forgetting remains real and observable through decay events and stage demotion.",
226 decay_events=decay_snapshot["decay_events"][-6:],
227 ),
228 _check(
229 "degraded_output_modes_exist",
230 observed_modes == sorted(OUTPUT_MODES)
231 and full_output.startswith("full:")
232 and degraded_output.startswith("degraded:")
233 and minimal_output.startswith("minimal:"),
234 "The runtime emits full, degraded, and minimal outputs under different runtime conditions.",
235 observed_modes=observed_modes,
236 outputs={
237 "full": full_output,
238 "degraded": degraded_output,
239 "minimal": minimal_output,
240 },
241 ),
242 ]
243 return _section(
244 checks,
245 observed_modes=observed_modes,
246 total_decay_events=len(decay_snapshot["decay_events"]),
247 )
248
249
250def _sedimentation_checks() -> Dict[str, Any]:
251 runtime = CIERuntime(capacity_limit=10.0)
252 for _ in range(4):
253 runtime.ingest("alpha beta alpha", anchors="anchor")
254 runtime.step()
255 snapshot = runtime.snapshot_state()
256 alpha_trace = [
257 event for event in snapshot["sedimentation_trace"] if event["node"] == "alpha"
258 ]
259 alpha_candidate = next(
260 item for item in snapshot["skill_belt_candidates"] if item["node"] == "alpha"
261 )
262 checks = [
263 _check(
264 "sedimentation_trace_exists",
265 bool(snapshot["sedimentation_trace"]),
266 "Sedimentation history is exported as explicit runtime trace entries.",
267 trace_tail=snapshot["sedimentation_trace"][-5:],
268 ),
269 _check(
270 "stage_progression_matches_locked_path",
271 [event["to"] for event in alpha_trace] == ["experience", "skill_belt", "ability_core"],
272 "Repeated activation follows the locked memory -> experience -> skill_belt -> ability_core path.",
273 alpha_trace=alpha_trace,
274 ),
275 _check(
276 "skill_belt_candidates_have_evidence",
277 alpha_candidate["stage"] in {"skill_belt", "ability_core"}
278 and alpha_candidate["stable_steps"] >= 2,
279 "Skill-belt candidates are backed by repeated activation, stability, and flow evidence.",
280 alpha_candidate=alpha_candidate,
281 ),
282 _check(
283 "merge_events_are_recorded",
284 bool(snapshot["merge_events"])
285 and any(event["node"] == "alpha" for event in snapshot["merge_events"]),
286 "Stable skill-belt structures can produce explicit merge events into ability-core structures.",
287 merge_events=snapshot["merge_events"],
288 ),
289 ]
290 return _section(
291 checks,
292 experience_regions=snapshot["experience_regions"],
293 bound_ability_core=snapshot["bound_ability_core"],
294 )
295
296
297def _snapshot_checks() -> Dict[str, Any]:
298 runtime = CIERuntime(capacity_limit=8.0)
299 runtime.ingest("branch a graph native feedback", context="runtime state", anchors="anchor")
300 runtime.step(2)
301 runtime.emit()
302 snapshot = runtime.step()
303 checks = [
304 _check(
305 "locked_snapshot_fields_present",
306 set(snapshot) == REQUIRED_SNAPSHOT_KEYS,
307 "snapshot_state returns the locked comparable field set.",
308 observed_keys=sorted(snapshot),
309 required_keys=sorted(REQUIRED_SNAPSHOT_KEYS),
310 ),
311 _check(
312 "summary_fields_are_meaningful",
313 snapshot["phi_summary"]["node_count"] > 0
314 and snapshot["mu_summary"]["active_count"] > 0
315 and snapshot["J_summary"]["edge_count"] > 0,
316 "phi_summary, mu_summary, and J_summary expose non-empty observable summaries after activity.",
317 phi_summary=snapshot["phi_summary"],
318 mu_summary=snapshot["mu_summary"],
319 J_summary=snapshot["J_summary"],
320 ),
321 _check(
322 "feedback_and_output_fields_are_populated",
323 snapshot["output_mode"] in OUTPUT_MODES
324 and bool(snapshot["feedback_effect"].get("applied_tokens")),
325 "snapshot_state exposes output_mode and feedback_effect with applied feedback evidence.",
326 output_mode=snapshot["output_mode"],
327 feedback_effect=snapshot["feedback_effect"],
328 ),
329 _check(
330 "locked_homing_and_sedimentation_fields_are_populated",
331 snapshot["bound_ability_core"] is not None
332 and snapshot["anchor_pull"] > 0.0
333 and snapshot["skill_belt_candidates"]
334 and snapshot["sedimentation_trace"],
335 "The locked homing and sedimentation-facing snapshot fields are populated under a controlled scenario.",
336 bound_ability_core=snapshot["bound_ability_core"],
337 anchor_pull=snapshot["anchor_pull"],
338 drift_score=snapshot["drift_score"],
339 ),
340 ]
341 return _section(
342 checks,
343 output_mode=snapshot["output_mode"],
344 free_capacity=snapshot["free_capacity"],
345 )
346
347
348def _known_limitations() -> List[str]:
349 return [
350 "The validation harness is scenario-based and compact; it is not a benchmark or long-run stability suite.",
351 "Checks focus on the locked observable runtime surface rather than richer semantic task performance.",
352 "Sedimentation and homing remain explicit but heuristic, which is acceptable for the review/comparison stage.",
353 ]
354
355
356def _runtime_summary(report: Dict[str, Any], json_path: Path, markdown_path: Path) -> Dict[str, Any]:
357 section_statuses = _section_statuses(report)
358 return {
359 "status": "pass" if all(status == "pass" for status in section_statuses) else "fail",
360 "scenarios": ["interface", "smoke", "dynamics", "sedimentation", "snapshot"],
361 "output_modes_observed": list(OUTPUT_MODES),
362 "reports_generated": {
363 "json": str(json_path),
364 "markdown": str(markdown_path),
365 },
366 "ready_for_review": all(status == "pass" for status in section_statuses),
367 }
368
369
370def _overall_status(report: Dict[str, Any]) -> Dict[str, Any]:
371 section_statuses = _section_statuses(report)
372 passed_sections = sum(1 for status in section_statuses if status == "pass")
373 failed_sections = len(section_statuses) - passed_sections
374 ready = failed_sections == 0
375 return {
376 "status": "pass" if ready else "fail",
377 "passed_sections": passed_sections,
378 "failed_sections": failed_sections,
379 "ready_for_review": ready,
380 "summary": (
381 "Branch A validation passed and is ready for review/comparison."
382 if ready
383 else "Branch A validation found issues that should be reviewed before comparison."
384 ),
385 }
386
387
388def _render_markdown(report: Dict[str, Any]) -> str:
389 lines = [
390 "# Branch A Validation Report",
391 "",
392 f"- Branch: `{report['branch']}`",
393 f"- Base commit: `{report['base_commit']}`",
394 f"- Overall status: `{report['overall_status']['status'].upper()}`",
395 f"- Ready for review/comparison: `{report['overall_status']['ready_for_review']}`",
396 "",
397 "## Runtime Summary",
398 f"- Status: `{report['runtime_summary']['status'].upper()}`",
399 f"- Scenarios: {', '.join(report['runtime_summary']['scenarios'])}",
400 f"- Output modes observed: {', '.join(report['runtime_summary']['output_modes_observed'])}",
401 "",
402 ]
403 for key, title in (
404 ("interface_checks", "Interface Checks"),
405 ("smoke_checks", "Smoke Checks"),
406 ("dynamics_checks", "Dynamics Checks"),
407 ("sedimentation_checks", "Sedimentation Checks"),
408 ("snapshot_checks", "Snapshot Checks"),
409 ):
410 section = report[key]
411 lines.extend(
412 [
413 f"## {title}",
414 f"- Status: `{section['status'].upper()}`",
415 f"- Passed: `{section['passed']}`",
416 f"- Failed: `{section['failed']}`",
417 ]
418 )
419 for check in section["checks"]:
420 lines.append(f"- `{check['status'].upper()}` {check['name']}: {check['detail']}")
421 lines.append("")
422 lines.extend(
423 [
424 "## Known Limitations",
425 *[f"- {item}" for item in report["known_limitations"]],
426 "",
427 "## Readiness",
428 f"- {report['overall_status']['summary']}",
429 "",
430 ]
431 )
432 return "\n".join(lines)
433
434
435def generate_validation_report(
436 json_path: Path | str = DEFAULT_JSON_REPORT_PATH,
437 markdown_path: Path | str = DEFAULT_MARKDOWN_REPORT_PATH,
438) -> Dict[str, Any]:
439 json_path = Path(json_path)
440 markdown_path = Path(markdown_path)
441 report: Dict[str, Any] = {
442 "branch": _current_branch(),
443 "base_commit": BASE_COMMIT,
444 "runtime_summary": {},
445 "interface_checks": _interface_checks(),
446 "smoke_checks": _smoke_checks(),
447 "dynamics_checks": _dynamics_checks(),
448 "sedimentation_checks": _sedimentation_checks(),
449 "snapshot_checks": _snapshot_checks(),
450 "known_limitations": _known_limitations(),
451 "overall_status": {},
452 }
453 report["runtime_summary"] = _runtime_summary(report, json_path, markdown_path)
454 report["overall_status"] = _overall_status(report)
455 json_path.parent.mkdir(parents=True, exist_ok=True)
456 markdown_path.parent.mkdir(parents=True, exist_ok=True)
457 json_path.write_text(json.dumps(report, indent=2, ensure_ascii=True) + "\n", encoding="utf-8")
458 markdown_path.write_text(_render_markdown(report), encoding="utf-8")
459 return report
460
461
462def _parse_args(argv: Sequence[str]) -> argparse.Namespace:
463 parser = argparse.ArgumentParser(description="Branch A validation and report generation.")
464 parser.add_argument("--json-out", default=str(DEFAULT_JSON_REPORT_PATH))
465 parser.add_argument("--markdown-out", default=str(DEFAULT_MARKDOWN_REPORT_PATH))
466 return parser.parse_args(argv)
467
468
469def main(argv: Sequence[str] | None = None) -> int:
470 args = _parse_args(sys.argv[1:] if argv is None else argv)
471 report = generate_validation_report(args.json_out, args.markdown_out)
472 return 0 if report["overall_status"]["status"] == "pass" else 1
473
474
475if __name__ == "__main__":
476 raise SystemExit(main())