CIE-Unified

git clone 

CIE-Unified / cie
codex@macbookpro  ·  2026-03-31

validation.py

  1from __future__ import annotations
  2
  3import argparse
  4import json
  5import subprocess
  6import sys
  7from pathlib import Path
  8from typing import Any, Dict, Iterable, List, Sequence
  9
 10from .runtime import CIERuntime, REQUIRED_SNAPSHOT_KEYS
 11
 12BASE_COMMIT = "164c06af63812c69cab32d8f8a6c770b96f38ef6"
 13DEFAULT_BRANCH = "branch-a/task04-validation-reporting"
 14REPO_ROOT = Path(__file__).resolve().parent.parent
 15DEFAULT_JSON_REPORT_PATH = REPO_ROOT / "reports" / "2026-03-31_task04_branch_a_validation.json"
 16DEFAULT_MARKDOWN_REPORT_PATH = REPO_ROOT / "reports" / "2026-03-31_task04_branch_a_validation.md"
 17REPORT_TOP_LEVEL_KEYS = (
 18    "branch",
 19    "base_commit",
 20    "runtime_summary",
 21    "interface_checks",
 22    "smoke_checks",
 23    "dynamics_checks",
 24    "sedimentation_checks",
 25    "snapshot_checks",
 26    "known_limitations",
 27    "overall_status",
 28)
 29LOCKED_INTERFACE = (
 30    "ingest",
 31    "step",
 32    "emit",
 33    "commit_feedback",
 34    "snapshot_state",
 35    "reset_session",
 36)
 37OUTPUT_MODES = ("full", "degraded", "minimal")
 38
 39
 40def _git_stdout(args: Sequence[str], fallback: str) -> str:
 41    try:
 42        completed = subprocess.run(
 43            ["git", *args],
 44            cwd=REPO_ROOT,
 45            check=True,
 46            capture_output=True,
 47            text=True,
 48        )
 49    except (FileNotFoundError, subprocess.CalledProcessError):
 50        return fallback
 51    value = completed.stdout.strip()
 52    return value or fallback
 53
 54
 55def _current_branch() -> str:
 56    return _git_stdout(["rev-parse", "--abbrev-ref", "HEAD"], DEFAULT_BRANCH)
 57
 58
 59def _check(name: str, ok: bool, detail: str, **extra: Any) -> Dict[str, Any]:
 60    entry: Dict[str, Any] = {
 61        "name": name,
 62        "status": "pass" if ok else "fail",
 63        "detail": detail,
 64    }
 65    entry.update(extra)
 66    return entry
 67
 68
 69def _section(checks: List[Dict[str, Any]], **summary: Any) -> Dict[str, Any]:
 70    failed = sum(1 for item in checks if item["status"] != "pass")
 71    return {
 72        "status": "pass" if failed == 0 else "fail",
 73        "passed": len(checks) - failed,
 74        "failed": failed,
 75        "checks": checks,
 76        "summary": summary,
 77    }
 78
 79
 80def _section_statuses(report: Dict[str, Any]) -> List[str]:
 81    return [
 82        report["interface_checks"]["status"],
 83        report["smoke_checks"]["status"],
 84        report["dynamics_checks"]["status"],
 85        report["sedimentation_checks"]["status"],
 86        report["snapshot_checks"]["status"],
 87    ]
 88
 89
 90def _interface_checks() -> Dict[str, Any]:
 91    runtime = CIERuntime()
 92    snapshot = runtime.snapshot_state()
 93    checks = [
 94        _check(
 95            "runtime_initializes",
 96            snapshot["output_mode"] == "minimal" and snapshot["active_region"] == [],
 97            "A fresh runtime exposes minimal output mode and no active region.",
 98            observed_output_mode=snapshot["output_mode"],
 99            observed_active_region=snapshot["active_region"],
100        ),
101        _check(
102            "locked_interface_presence",
103            all(callable(getattr(runtime, name, None)) for name in LOCKED_INTERFACE),
104            "Branch A exposes the locked runtime interface required by the spec.",
105            interface=list(LOCKED_INTERFACE),
106        ),
107    ]
108    return _section(
109        checks,
110        snapshot_keys=sorted(snapshot),
111        required_snapshot_keys=sorted(REQUIRED_SNAPSHOT_KEYS),
112    )
113
114
115def _smoke_checks() -> Dict[str, Any]:
116    runtime = CIERuntime(capacity_limit=8.0)
117    initial = runtime.snapshot_state()
118    queued = runtime.ingest(
119        "graph native observability",
120        context="branch a validation smoke",
121        anchors="anchor",
122    )
123    stepped = runtime.step(2)
124    output = runtime.emit()
125    queued_feedback = runtime.snapshot_state()
126    after_feedback = runtime.step()
127    checks = [
128        _check(
129            "ingest_queues_signal",
130            bool(queued["queued_tokens"]) and bool(queued["queued_anchors"]),
131            "Ingest stores external tokens and anchor hints before the step loop runs.",
132            queued_tokens=queued["queued_tokens"],
133            queued_anchors=queued["queued_anchors"],
134        ),
135        _check(
136            "step_materializes_graph_state",
137            stepped["phi_summary"]["node_count"] > initial["phi_summary"]["node_count"]
138            and stepped["mu_summary"]["total_activation"] > 0.0
139            and stepped["J_summary"]["edge_count"] > 0,
140            "Stepping from a queued input creates observable phi/mu/J state.",
141            phi_summary=stepped["phi_summary"],
142            mu_summary=stepped["mu_summary"],
143            J_summary=stepped["J_summary"],
144        ),
145        _check(
146            "emit_queues_output_feedback",
147            queued_feedback["feedback_effect"].get("source") == "emit"
148            and bool(queued_feedback["feedback_effect"].get("queued_tokens")),
149            "Emit creates a real output-to-input feedback signal rather than a log-only artifact.",
150            emitted_output=output,
151            feedback_effect=queued_feedback["feedback_effect"],
152        ),
153        _check(
154            "feedback_changes_later_state",
155            after_feedback["feedback_effect"].get("last_applied_step") == runtime.state.step_index
156            and bool(after_feedback["feedback_effect"].get("applied_tokens")),
157            "Queued feedback is applied on the next step and changes later runtime state.",
158            feedback_effect=after_feedback["feedback_effect"],
159        ),
160    ]
161    return _section(
162        checks,
163        emitted_output=output,
164        bound_ability_core=after_feedback["bound_ability_core"],
165        active_region=after_feedback["active_region"],
166    )
167
168
169def _dynamics_checks() -> Dict[str, Any]:
170    evolution_runtime = CIERuntime(capacity_limit=8.0)
171    evolution_runtime.ingest("alpha beta alpha", context="gamma", anchors="anchor")
172    step_one = evolution_runtime.step()
173    step_three = evolution_runtime.step(2)
174
175    full_runtime = CIERuntime(capacity_limit=10.0)
176    full_runtime.ingest("focus focus focus", anchors="anchor")
177    full_runtime.step(2)
178    full_mode = full_runtime.snapshot_state()["output_mode"]
179    full_output = full_runtime.emit()
180
181    degraded_runtime = CIERuntime(capacity_limit=6.0)
182    degraded_runtime.ingest("alpha beta gamma delta epsilon", anchors="anchor")
183    degraded_runtime.step(2)
184    degraded_mode = degraded_runtime.snapshot_state()["output_mode"]
185    degraded_output = degraded_runtime.emit()
186
187    minimal_runtime = CIERuntime(capacity_limit=0.9)
188    minimal_runtime.ingest("alpha beta gamma delta epsilon", anchors="anchor")
189    minimal_runtime.step(2)
190    minimal_mode = minimal_runtime.snapshot_state()["output_mode"]
191    minimal_output = minimal_runtime.emit()
192
193    decay_runtime = CIERuntime(capacity_limit=10.0)
194    for _ in range(4):
195        decay_runtime.ingest("stale alpha beta", anchors="anchor")
196        decay_runtime.step()
197    decay_runtime.reset_session()
198    decay_snapshot = decay_runtime.step(6)
199
200    observed_modes = sorted({full_mode, degraded_mode, minimal_mode})
201    checks = [
202        _check(
203            "phi_mu_J_are_observable",
204            step_one["phi_summary"]["total_potential"] != step_three["phi_summary"]["total_potential"]
205            and step_one["mu_summary"]["total_activation"] != step_three["mu_summary"]["total_activation"]
206            and step_one["J_summary"]["total_flow"] != step_three["J_summary"]["total_flow"],
207            "Multi-step dynamics produce visible changes across phi, mu, and J.",
208            step_one=step_one,
209            step_three=step_three,
210        ),
211        _check(
212            "homing_signals_are_populated",
213            step_three["bound_ability_core"] is not None
214            and step_three["anchor_pull"] > 0.0
215            and 0.0 <= step_three["drift_score"] <= 1.0,
216            "The runtime exposes bound_ability_core, anchor_pull, and drift_score as active homing signals.",
217            bound_ability_core=step_three["bound_ability_core"],
218            anchor_pull=step_three["anchor_pull"],
219            drift_score=step_three["drift_score"],
220        ),
221        _check(
222            "decay_and_forgetting_are_visible",
223            bool(decay_snapshot["decay_events"])
224            and any(event["kind"] == "sedimentation_demote" for event in decay_snapshot["decay_events"]),
225            "Decay/forgetting remains real and observable through decay events and stage demotion.",
226            decay_events=decay_snapshot["decay_events"][-6:],
227        ),
228        _check(
229            "degraded_output_modes_exist",
230            observed_modes == sorted(OUTPUT_MODES)
231            and full_output.startswith("full:")
232            and degraded_output.startswith("degraded:")
233            and minimal_output.startswith("minimal:"),
234            "The runtime emits full, degraded, and minimal outputs under different runtime conditions.",
235            observed_modes=observed_modes,
236            outputs={
237                "full": full_output,
238                "degraded": degraded_output,
239                "minimal": minimal_output,
240            },
241        ),
242    ]
243    return _section(
244        checks,
245        observed_modes=observed_modes,
246        total_decay_events=len(decay_snapshot["decay_events"]),
247    )
248
249
250def _sedimentation_checks() -> Dict[str, Any]:
251    runtime = CIERuntime(capacity_limit=10.0)
252    for _ in range(4):
253        runtime.ingest("alpha beta alpha", anchors="anchor")
254        runtime.step()
255    snapshot = runtime.snapshot_state()
256    alpha_trace = [
257        event for event in snapshot["sedimentation_trace"] if event["node"] == "alpha"
258    ]
259    alpha_candidate = next(
260        item for item in snapshot["skill_belt_candidates"] if item["node"] == "alpha"
261    )
262    checks = [
263        _check(
264            "sedimentation_trace_exists",
265            bool(snapshot["sedimentation_trace"]),
266            "Sedimentation history is exported as explicit runtime trace entries.",
267            trace_tail=snapshot["sedimentation_trace"][-5:],
268        ),
269        _check(
270            "stage_progression_matches_locked_path",
271            [event["to"] for event in alpha_trace] == ["experience", "skill_belt", "ability_core"],
272            "Repeated activation follows the locked memory -> experience -> skill_belt -> ability_core path.",
273            alpha_trace=alpha_trace,
274        ),
275        _check(
276            "skill_belt_candidates_have_evidence",
277            alpha_candidate["stage"] in {"skill_belt", "ability_core"}
278            and alpha_candidate["stable_steps"] >= 2,
279            "Skill-belt candidates are backed by repeated activation, stability, and flow evidence.",
280            alpha_candidate=alpha_candidate,
281        ),
282        _check(
283            "merge_events_are_recorded",
284            bool(snapshot["merge_events"])
285            and any(event["node"] == "alpha" for event in snapshot["merge_events"]),
286            "Stable skill-belt structures can produce explicit merge events into ability-core structures.",
287            merge_events=snapshot["merge_events"],
288        ),
289    ]
290    return _section(
291        checks,
292        experience_regions=snapshot["experience_regions"],
293        bound_ability_core=snapshot["bound_ability_core"],
294    )
295
296
297def _snapshot_checks() -> Dict[str, Any]:
298    runtime = CIERuntime(capacity_limit=8.0)
299    runtime.ingest("branch a graph native feedback", context="runtime state", anchors="anchor")
300    runtime.step(2)
301    runtime.emit()
302    snapshot = runtime.step()
303    checks = [
304        _check(
305            "locked_snapshot_fields_present",
306            set(snapshot) == REQUIRED_SNAPSHOT_KEYS,
307            "snapshot_state returns the locked comparable field set.",
308            observed_keys=sorted(snapshot),
309            required_keys=sorted(REQUIRED_SNAPSHOT_KEYS),
310        ),
311        _check(
312            "summary_fields_are_meaningful",
313            snapshot["phi_summary"]["node_count"] > 0
314            and snapshot["mu_summary"]["active_count"] > 0
315            and snapshot["J_summary"]["edge_count"] > 0,
316            "phi_summary, mu_summary, and J_summary expose non-empty observable summaries after activity.",
317            phi_summary=snapshot["phi_summary"],
318            mu_summary=snapshot["mu_summary"],
319            J_summary=snapshot["J_summary"],
320        ),
321        _check(
322            "feedback_and_output_fields_are_populated",
323            snapshot["output_mode"] in OUTPUT_MODES
324            and bool(snapshot["feedback_effect"].get("applied_tokens")),
325            "snapshot_state exposes output_mode and feedback_effect with applied feedback evidence.",
326            output_mode=snapshot["output_mode"],
327            feedback_effect=snapshot["feedback_effect"],
328        ),
329        _check(
330            "locked_homing_and_sedimentation_fields_are_populated",
331            snapshot["bound_ability_core"] is not None
332            and snapshot["anchor_pull"] > 0.0
333            and snapshot["skill_belt_candidates"]
334            and snapshot["sedimentation_trace"],
335            "The locked homing and sedimentation-facing snapshot fields are populated under a controlled scenario.",
336            bound_ability_core=snapshot["bound_ability_core"],
337            anchor_pull=snapshot["anchor_pull"],
338            drift_score=snapshot["drift_score"],
339        ),
340    ]
341    return _section(
342        checks,
343        output_mode=snapshot["output_mode"],
344        free_capacity=snapshot["free_capacity"],
345    )
346
347
348def _known_limitations() -> List[str]:
349    return [
350        "The validation harness is scenario-based and compact; it is not a benchmark or long-run stability suite.",
351        "Checks focus on the locked observable runtime surface rather than richer semantic task performance.",
352        "Sedimentation and homing remain explicit but heuristic, which is acceptable for the review/comparison stage.",
353    ]
354
355
356def _runtime_summary(report: Dict[str, Any], json_path: Path, markdown_path: Path) -> Dict[str, Any]:
357    section_statuses = _section_statuses(report)
358    return {
359        "status": "pass" if all(status == "pass" for status in section_statuses) else "fail",
360        "scenarios": ["interface", "smoke", "dynamics", "sedimentation", "snapshot"],
361        "output_modes_observed": list(OUTPUT_MODES),
362        "reports_generated": {
363            "json": str(json_path),
364            "markdown": str(markdown_path),
365        },
366        "ready_for_review": all(status == "pass" for status in section_statuses),
367    }
368
369
370def _overall_status(report: Dict[str, Any]) -> Dict[str, Any]:
371    section_statuses = _section_statuses(report)
372    passed_sections = sum(1 for status in section_statuses if status == "pass")
373    failed_sections = len(section_statuses) - passed_sections
374    ready = failed_sections == 0
375    return {
376        "status": "pass" if ready else "fail",
377        "passed_sections": passed_sections,
378        "failed_sections": failed_sections,
379        "ready_for_review": ready,
380        "summary": (
381            "Branch A validation passed and is ready for review/comparison."
382            if ready
383            else "Branch A validation found issues that should be reviewed before comparison."
384        ),
385    }
386
387
388def _render_markdown(report: Dict[str, Any]) -> str:
389    lines = [
390        "# Branch A Validation Report",
391        "",
392        f"- Branch: `{report['branch']}`",
393        f"- Base commit: `{report['base_commit']}`",
394        f"- Overall status: `{report['overall_status']['status'].upper()}`",
395        f"- Ready for review/comparison: `{report['overall_status']['ready_for_review']}`",
396        "",
397        "## Runtime Summary",
398        f"- Status: `{report['runtime_summary']['status'].upper()}`",
399        f"- Scenarios: {', '.join(report['runtime_summary']['scenarios'])}",
400        f"- Output modes observed: {', '.join(report['runtime_summary']['output_modes_observed'])}",
401        "",
402    ]
403    for key, title in (
404        ("interface_checks", "Interface Checks"),
405        ("smoke_checks", "Smoke Checks"),
406        ("dynamics_checks", "Dynamics Checks"),
407        ("sedimentation_checks", "Sedimentation Checks"),
408        ("snapshot_checks", "Snapshot Checks"),
409    ):
410        section = report[key]
411        lines.extend(
412            [
413                f"## {title}",
414                f"- Status: `{section['status'].upper()}`",
415                f"- Passed: `{section['passed']}`",
416                f"- Failed: `{section['failed']}`",
417            ]
418        )
419        for check in section["checks"]:
420            lines.append(f"- `{check['status'].upper()}` {check['name']}: {check['detail']}")
421        lines.append("")
422    lines.extend(
423        [
424            "## Known Limitations",
425            *[f"- {item}" for item in report["known_limitations"]],
426            "",
427            "## Readiness",
428            f"- {report['overall_status']['summary']}",
429            "",
430        ]
431    )
432    return "\n".join(lines)
433
434
435def generate_validation_report(
436    json_path: Path | str = DEFAULT_JSON_REPORT_PATH,
437    markdown_path: Path | str = DEFAULT_MARKDOWN_REPORT_PATH,
438) -> Dict[str, Any]:
439    json_path = Path(json_path)
440    markdown_path = Path(markdown_path)
441    report: Dict[str, Any] = {
442        "branch": _current_branch(),
443        "base_commit": BASE_COMMIT,
444        "runtime_summary": {},
445        "interface_checks": _interface_checks(),
446        "smoke_checks": _smoke_checks(),
447        "dynamics_checks": _dynamics_checks(),
448        "sedimentation_checks": _sedimentation_checks(),
449        "snapshot_checks": _snapshot_checks(),
450        "known_limitations": _known_limitations(),
451        "overall_status": {},
452    }
453    report["runtime_summary"] = _runtime_summary(report, json_path, markdown_path)
454    report["overall_status"] = _overall_status(report)
455    json_path.parent.mkdir(parents=True, exist_ok=True)
456    markdown_path.parent.mkdir(parents=True, exist_ok=True)
457    json_path.write_text(json.dumps(report, indent=2, ensure_ascii=True) + "\n", encoding="utf-8")
458    markdown_path.write_text(_render_markdown(report), encoding="utf-8")
459    return report
460
461
462def _parse_args(argv: Sequence[str]) -> argparse.Namespace:
463    parser = argparse.ArgumentParser(description="Branch A validation and report generation.")
464    parser.add_argument("--json-out", default=str(DEFAULT_JSON_REPORT_PATH))
465    parser.add_argument("--markdown-out", default=str(DEFAULT_MARKDOWN_REPORT_PATH))
466    return parser.parse_args(argv)
467
468
469def main(argv: Sequence[str] | None = None) -> int:
470    args = _parse_args(sys.argv[1:] if argv is None else argv)
471    report = generate_validation_report(args.json_out, args.markdown_out)
472    return 0 if report["overall_status"]["status"] == "pass" else 1
473
474
475if __name__ == "__main__":
476    raise SystemExit(main())