review: strict rerun branch-b formal validation on cie-datasets

commit: c110916
parent: c734288
author: codex@macbookpro
date: 2026-03-31 20:23:54 +0800 CST

review: strict rerun branch-b formal validation on cie-datasets

4 files changed, +2329, -0

A reports/2026-03-31_branch_b_formal_validation_strict_rerun.json

A reports/2026-03-31_branch_b_formal_validation_strict_rerun.md

A reviews/2026-03-31_branch_b_formal_validation_strict_rerun.md

A tests/formal_validation_strict_rerun.py

A reports/2026-03-31_branch_b_formal_validation_strict_rerun.json

+1370, -0

   1@@ -0,0 +1,1370 @@
   2+{
   3+  "claude_claim_commit": "379feb2ed4324020ee48a97a6edb8ec99ce39f1a",
   4+  "claude_claim_commit_exists": true,
   5+  "chosen_runnable_branch_b_commit": "c7342881bb2ebfa5e7f927c91a7806416288573b",
   6+  "dataset_repo_status": {
   7+    "path": "/Users/george/code/cie-datasets",
   8+    "is_git_repo": true,
   9+    "git_lfs_version": "git-lfs/3.7.1 (GitHub; darwin arm64; go 1.25.3)",
  10+    "status_short": [],
  11+    "lfs_required_entries": {},
  12+    "remote_v": [
  13+      "origin\tgit@github.com:imwower/cie-datasets.git (fetch)",
  14+      "origin\tgit@github.com:imwower/cie-datasets.git (push)"
  15+    ]
  16+  },
  17+  "required_dataset_paths": [
  18+    {
  19+      "stage": "小学",
  20+      "subject": "语文",
  21+      "relative_path": "splits/by_stage_subject/小学/语文.jsonl",
  22+      "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
  23+      "pre_pull_worktree_state": "pointer_stub",
  24+      "exists": true,
  25+      "is_pointer_stub_now": false,
  26+      "hydrated": true,
  27+      "line_count": 1597,
  28+      "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_语文_统编版_义务教育教科书·语文一年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page\":",
  29+      "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_语文_统编版_义务教育教科书·语文一年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page\":"
  30+    },
  31+    {
  32+      "stage": "小学",
  33+      "subject": "数学",
  34+      "relative_path": "splits/by_stage_subject/小学/数学.jsonl",
  35+      "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
  36+      "pre_pull_worktree_state": "pointer_stub",
  37+      "exists": true,
  38+      "is_pointer_stub_now": false,
  39+      "hydrated": true,
  40+      "line_count": 7459,
  41+      "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_数学_人教版_义务教育教科书 · 数学一年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page",
  42+      "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_数学_人教版_义务教育教科书 · 数学一年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page"
  43+    },
  44+    {
  45+      "stage": "小学",
  46+      "subject": "科学",
  47+      "relative_path": "splits/by_stage_subject/小学/科学.jsonl",
  48+      "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/科学.jsonl",
  49+      "pre_pull_worktree_state": "pointer_stub",
  50+      "exists": true,
  51+      "is_pointer_stub_now": false,
  52+      "hydrated": true,
  53+      "line_count": 5032,
  54+      "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_科学_人教鄂教版_义务教育教科书·科学一年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page",
  55+      "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_科学_人教鄂教版_义务教育教科书·科学一年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page"
  56+    },
  57+    {
  58+      "stage": "初中",
  59+      "subject": "语文",
  60+      "relative_path": "splits/by_stage_subject/初中/语文.jsonl",
  61+      "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
  62+      "pre_pull_worktree_state": "pointer_stub",
  63+      "exists": true,
  64+      "is_pointer_stub_now": false,
  65+      "hydrated": true,
  66+      "line_count": 961,
  67+      "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": n",
  68+      "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": n"
  69+    },
  70+    {
  71+      "stage": "初中",
  72+      "subject": "数学",
  73+      "relative_path": "splits/by_stage_subject/初中/数学.jsonl",
  74+      "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl",
  75+      "pre_pull_worktree_state": "pointer_stub",
  76+      "exists": true,
  77+      "is_pointer_stub_now": false,
  78+      "hydrated": true,
  79+      "line_count": 5559,
  80+      "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": n",
  81+      "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": n"
  82+    },
  83+    {
  84+      "stage": "高中",
  85+      "subject": "语文",
  86+      "relative_path": "splits/by_stage_subject/高中/语文.jsonl",
  87+      "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl",
  88+      "pre_pull_worktree_state": "pointer_stub",
  89+      "exists": true,
  90+      "is_pointer_stub_now": false,
  91+      "hydrated": true,
  92+      "line_count": 694,
  93+      "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null,",
  94+      "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null,"
  95+    }
  96+  ],
  97+  "hydration_status": {
  98+    "status": "SUCCESS",
  99+    "all_required_files_hydrated": true
 100+  },
 101+  "script_used": "strict_rerun_port",
 102+  "script_provenance": {
 103+    "source_commit": "379feb2ed4324020ee48a97a6edb8ec99ce39f1a",
 104+    "source_path": "tests/formal_validation.py",
 105+    "port_path": "tests/formal_validation_strict_rerun.py",
 106+    "port_rationale": "origin/branch-b is the runnable Branch B source tree, but it does not contain tests/formal_validation.py from 379feb2."
 107+  },
 108+  "compatibility_deviations": [
 109+    {
 110+      "type": "script_path",
 111+      "detail": "Ported logic from 379feb2:tests/formal_validation.py into tests/formal_validation_strict_rerun.py because origin/branch-b lacks the original file."
 112+    },
 113+    {
 114+      "type": "report_output",
 115+      "detail": "Writes the strict rerun audit outputs to the required reports/ and reviews/ files instead of /tmp/formal_val_results.json, creating those parent directories on the runnable Branch B ref because they do not exist on origin/branch-b."
 116+    },
 117+    {
 118+      "type": "structured_metadata",
 119+      "detail": "Adds dataset hydration verification and structured comparison metadata; the validation ingest/step/emit logic and pass criteria remain aligned with 379feb2."
 120+    }
 121+  ],
 122+  "rerun_performed": true,
 123+  "rerun_command": "python3 tests/formal_validation_strict_rerun.py",
 124+  "per_subject_results": [
 125+    {
 126+      "name": "小学语文_pipeline+stability",
 127+      "type": "subject_pipeline",
 128+      "stage": "小学",
 129+      "subject": "语文",
 130+      "input_files": [
 131+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl"
 132+      ],
 133+      "source_files_pre_pull_state": [
 134+        "pointer_stub"
 135+      ],
 136+      "line_count": 1597,
 137+      "loaded_records": 300,
 138+      "fed_records": 200,
 139+      "available_content_records_seen_before_cap": 300,
 140+      "available_valid_text_records_seen_before_cap": 300,
 141+      "node_count": 1268,
 142+      "edge_count": 6338,
 143+      "anchor_count": 62,
 144+      "core_count": 17,
 145+      "experience_region_count": 1233,
 146+      "skill_belt_candidate_count": 1141,
 147+      "sedimentation_events": 2374,
 148+      "merge_events": 9331,
 149+      "decay_events": 8378,
 150+      "phi_range": [
 151+        -0.113,
 152+        0.598
 153+      ],
 154+      "top_words": [
 155+        {
 156+          "phrase": "课文",
 157+          "ratio": 2706.0
 158+        },
 159+        {
 160+          "phrase": "本文",
 161+          "ratio": 2633.3
 162+        },
 163+        {
 164+          "phrase": "改动",
 165+          "ratio": 2037.2
 166+        },
 167+        {
 168+          "phrase": "有改",
 169+          "ratio": 2003.8
 170+        },
 171+        {
 172+          "phrase": "什么",
 173+          "ratio": 1989.9
 174+        },
 175+        {
 176+          "phrase": "作者",
 177+          "ratio": 1845.4
 178+        },
 179+        {
 180+          "phrase": "文时",
 181+          "ratio": 1843.6
 182+        },
 183+        {
 184+          "phrase": "作课",
 185+          "ratio": 1841.0
 186+        }
 187+      ],
 188+      "circuit_count": 5,
 189+      "top_circuits": [
 190+        {
 191+          "phrase": "对我们",
 192+          "delta": 20.9
 193+        },
 194+        {
 195+          "phrase": "学校小",
 196+          "delta": 19.4
 197+        },
 198+        {
 199+          "phrase": "地一个",
 200+          "delta": 18.1
 201+        },
 202+        {
 203+          "phrase": "金色的",
 204+          "delta": 17.5
 205+        },
 206+        {
 207+          "phrase": "语中的",
 208+          "delta": 9.7
 209+        }
 210+      ],
 211+      "elapsed_seconds": 1.57,
 212+      "output_mode": "full",
 213+      "pass": true,
 214+      "status": "PASS",
 215+      "reason": "nodes=1268, edges=6338, phi=[-0.113,0.598], attn=100.0, mode=full, time=1.57s, recs=200",
 216+      "claim_word_comparison": null,
 217+      "claim_metric_comparison": {
 218+        "claimed": {
 219+          "nodes": 1268,
 220+          "edges": 6338,
 221+          "phi_range": [
 222+            -0.113,
 223+            0.598
 224+          ]
 225+        },
 226+        "actual": {
 227+          "nodes": 1268,
 228+          "edges": 6338,
 229+          "phi_range": [
 230+            -0.113,
 231+            0.598
 232+          ]
 233+        },
 234+        "exact_match": true
 235+      }
 236+    },
 237+    {
 238+      "name": "小学数学_pipeline+stability",
 239+      "type": "subject_pipeline",
 240+      "stage": "小学",
 241+      "subject": "数学",
 242+      "input_files": [
 243+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl"
 244+      ],
 245+      "source_files_pre_pull_state": [
 246+        "pointer_stub"
 247+      ],
 248+      "line_count": 7459,
 249+      "loaded_records": 300,
 250+      "fed_records": 200,
 251+      "available_content_records_seen_before_cap": 300,
 252+      "available_valid_text_records_seen_before_cap": 300,
 253+      "node_count": 803,
 254+      "edge_count": 4367,
 255+      "anchor_count": 54,
 256+      "core_count": 9,
 257+      "experience_region_count": 782,
 258+      "skill_belt_candidate_count": 754,
 259+      "sedimentation_events": 1536,
 260+      "merge_events": 10769,
 261+      "decay_events": 8374,
 262+      "phi_range": [
 263+        -8.3,
 264+        0.579
 265+      ],
 266+      "top_words": [
 267+        {
 268+          "phrase": "多少",
 269+          "ratio": 2827.3
 270+        },
 271+        {
 272+          "phrase": "一共",
 273+          "ratio": 2201.2
 274+        },
 275+        {
 276+          "phrase": "什么",
 277+          "ratio": 2171.0
 278+        },
 279+        {
 280+          "phrase": "下面",
 281+          "ratio": 2074.8
 282+        },
 283+        {
 284+          "phrase": "可以",
 285+          "ratio": 1774.6
 286+        },
 287+        {
 288+          "phrase": "算式",
 289+          "ratio": 1720.2
 290+        },
 291+        {
 292+          "phrase": "怎样",
 293+          "ratio": 1711.8
 294+        },
 295+        {
 296+          "phrase": "问题",
 297+          "ratio": 1594.4
 298+        }
 299+      ],
 300+      "circuit_count": 5,
 301+      "top_circuits": [
 302+        {
 303+          "phrase": "的多少",
 304+          "delta": 29.0
 305+        },
 306+        {
 307+          "phrase": "的后面",
 308+          "delta": 25.6
 309+        },
 310+        {
 311+          "phrase": "学习数",
 312+          "delta": 23.7
 313+        },
 314+        {
 315+          "phrase": "习数学",
 316+          "delta": 23.7
 317+        },
 318+        {
 319+          "phrase": "数学习",
 320+          "delta": 23.7
 321+        }
 322+      ],
 323+      "elapsed_seconds": 1.14,
 324+      "output_mode": "full",
 325+      "pass": true,
 326+      "status": "PASS",
 327+      "reason": "nodes=803, edges=4367, phi=[-8.300,0.579], attn=100.0, mode=full, time=1.14s, recs=200",
 328+      "claim_word_comparison": {
 329+        "claimed_examples": [
 330+          "多少",
 331+          "一共"
 332+        ],
 333+        "actual_top_words": [
 334+          "多少",
 335+          "一共",
 336+          "什么",
 337+          "下面",
 338+          "可以",
 339+          "算式",
 340+          "怎样",
 341+          "问题"
 342+        ],
 343+        "matched": [
 344+          "多少",
 345+          "一共"
 346+        ],
 347+        "missing": []
 348+      },
 349+      "claim_metric_comparison": {
 350+        "claimed": {
 351+          "nodes": 803,
 352+          "edges": 4367,
 353+          "phi_range": [
 354+            -8.3,
 355+            0.579
 356+          ]
 357+        },
 358+        "actual": {
 359+          "nodes": 803,
 360+          "edges": 4367,
 361+          "phi_range": [
 362+            -8.3,
 363+            0.579
 364+          ]
 365+        },
 366+        "exact_match": true
 367+      }
 368+    },
 369+    {
 370+      "name": "初中语文_pipeline+stability",
 371+      "type": "subject_pipeline",
 372+      "stage": "初中",
 373+      "subject": "语文",
 374+      "input_files": [
 375+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl"
 376+      ],
 377+      "source_files_pre_pull_state": [
 378+        "pointer_stub"
 379+      ],
 380+      "line_count": 961,
 381+      "loaded_records": 300,
 382+      "fed_records": 200,
 383+      "available_content_records_seen_before_cap": 300,
 384+      "available_valid_text_records_seen_before_cap": 300,
 385+      "node_count": 1702,
 386+      "edge_count": 9372,
 387+      "anchor_count": 58,
 388+      "core_count": 11,
 389+      "experience_region_count": 1671,
 390+      "skill_belt_candidate_count": 1600,
 391+      "sedimentation_events": 3271,
 392+      "merge_events": 21531,
 393+      "decay_events": 12421,
 394+      "phi_range": [
 395+        -0.085,
 396+        0.553
 397+      ],
 398+      "top_words": [
 399+        {
 400+          "phrase": "阅读",
 401+          "ratio": 5541.6
 402+        },
 403+        {
 404+          "phrase": "单元",
 405+          "ratio": 3998.7
 406+        },
 407+        {
 408+          "phrase": "读第",
 409+          "ratio": 3299.3
 410+        },
 411+        {
 412+          "phrase": "我们",
 413+          "ratio": 2034.8
 414+        },
 415+        {
 416+          "phrase": "第一",
 417+          "ratio": 2016.7
 418+        },
 419+        {
 420+          "phrase": "学习",
 421+          "ratio": 1780.7
 422+        },
 423+        {
 424+          "phrase": "先生",
 425+          "ratio": 1597.9
 426+        },
 427+        {
 428+          "phrase": "一个",
 429+          "ratio": 1590.0
 430+        }
 431+      ],
 432+      "circuit_count": 5,
 433+      "top_circuits": [
 434+        {
 435+          "phrase": "一定的",
 436+          "delta": 21.6
 437+        },
 438+        {
 439+          "phrase": "自己的",
 440+          "delta": 17.7
 441+        },
 442+        {
 443+          "phrase": "第十二",
 444+          "delta": 11.9
 445+        },
 446+        {
 447+          "phrase": "自然而",
 448+          "delta": 10.3
 449+        },
 450+        {
 451+          "phrase": "一定写",
 452+          "delta": 10.2
 453+        }
 454+      ],
 455+      "elapsed_seconds": 2.79,
 456+      "output_mode": "full",
 457+      "pass": true,
 458+      "status": "PASS",
 459+      "reason": "nodes=1702, edges=9372, phi=[-0.085,0.553], attn=100.0, mode=full, time=2.79s, recs=200",
 460+      "claim_word_comparison": {
 461+        "claimed_examples": [
 462+          "阅读",
 463+          "单元"
 464+        ],
 465+        "actual_top_words": [
 466+          "阅读",
 467+          "单元",
 468+          "读第",
 469+          "我们",
 470+          "第一",
 471+          "学习",
 472+          "先生",
 473+          "一个"
 474+        ],
 475+        "matched": [
 476+          "阅读",
 477+          "单元"
 478+        ],
 479+        "missing": []
 480+      },
 481+      "claim_metric_comparison": {
 482+        "claimed": {
 483+          "nodes": 1702,
 484+          "edges": 9372,
 485+          "phi_range": [
 486+            -0.085,
 487+            0.553
 488+          ]
 489+        },
 490+        "actual": {
 491+          "nodes": 1702,
 492+          "edges": 9372,
 493+          "phi_range": [
 494+            -0.085,
 495+            0.553
 496+          ]
 497+        },
 498+        "exact_match": true
 499+      }
 500+    },
 501+    {
 502+      "name": "初中数学_pipeline+stability",
 503+      "type": "subject_pipeline",
 504+      "stage": "初中",
 505+      "subject": "数学",
 506+      "input_files": [
 507+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl"
 508+      ],
 509+      "source_files_pre_pull_state": [
 510+        "pointer_stub"
 511+      ],
 512+      "line_count": 5559,
 513+      "loaded_records": 300,
 514+      "fed_records": 200,
 515+      "available_content_records_seen_before_cap": 300,
 516+      "available_valid_text_records_seen_before_cap": 300,
 517+      "node_count": 886,
 518+      "edge_count": 5437,
 519+      "anchor_count": 58,
 520+      "core_count": 4,
 521+      "experience_region_count": 878,
 522+      "skill_belt_candidate_count": 861,
 523+      "sedimentation_events": 1739,
 524+      "merge_events": 15284,
 525+      "decay_events": 7820,
 526+      "phi_range": [
 527+        -0.045,
 528+        0.479
 529+      ],
 530+      "top_words": [
 531+        {
 532+          "phrase": "方程",
 533+          "ratio": 3990.8
 534+        },
 535+        {
 536+          "phrase": "可以",
 537+          "ratio": 2289.4
 538+        },
 539+        {
 540+          "phrase": "图形",
 541+          "ratio": 2147.0
 542+        },
 543+        {
 544+          "phrase": "问题",
 545+          "ratio": 2122.0
 546+        },
 547+        {
 548+          "phrase": "我们",
 549+          "ratio": 2096.1
 550+        },
 551+        {
 552+          "phrase": "表示",
 553+          "ratio": 1976.2
 554+        },
 555+        {
 556+          "phrase": "二次",
 557+          "ratio": 1863.4
 558+        },
 559+        {
 560+          "phrase": "函数",
 561+          "ratio": 1809.4
 562+        }
 563+      ],
 564+      "circuit_count": 5,
 565+      "top_circuits": [
 566+        {
 567+          "phrase": "学习数",
 568+          "delta": 25.5
 569+        },
 570+        {
 571+          "phrase": "数学习",
 572+          "delta": 25.5
 573+        },
 574+        {
 575+          "phrase": "的理数",
 576+          "delta": 24.2
 577+        },
 578+        {
 579+          "phrase": "数的理",
 580+          "delta": 24.2
 581+        },
 582+        {
 583+          "phrase": "的表示",
 584+          "delta": 22.6
 585+        }
 586+      ],
 587+      "elapsed_seconds": 1.49,
 588+      "output_mode": "full",
 589+      "pass": true,
 590+      "status": "PASS",
 591+      "reason": "nodes=886, edges=5437, phi=[-0.045,0.479], attn=100.0, mode=full, time=1.49s, recs=200",
 592+      "claim_word_comparison": {
 593+        "claimed_examples": [
 594+          "方程",
 595+          "图形",
 596+          "问题"
 597+        ],
 598+        "actual_top_words": [
 599+          "方程",
 600+          "可以",
 601+          "图形",
 602+          "问题",
 603+          "我们",
 604+          "表示",
 605+          "二次",
 606+          "函数"
 607+        ],
 608+        "matched": [
 609+          "方程",
 610+          "图形",
 611+          "问题"
 612+        ],
 613+        "missing": []
 614+      },
 615+      "claim_metric_comparison": {
 616+        "claimed": {
 617+          "nodes": 886,
 618+          "edges": 5437,
 619+          "phi_range": [
 620+            -0.045,
 621+            0.48
 622+          ]
 623+        },
 624+        "actual": {
 625+          "nodes": 886,
 626+          "edges": 5437,
 627+          "phi_range": [
 628+            -0.045,
 629+            0.479
 630+          ]
 631+        },
 632+        "exact_match": false
 633+      }
 634+    },
 635+    {
 636+      "name": "高中语文_pipeline+stability",
 637+      "type": "subject_pipeline",
 638+      "stage": "高中",
 639+      "subject": "语文",
 640+      "input_files": [
 641+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
 642+      ],
 643+      "source_files_pre_pull_state": [
 644+        "pointer_stub"
 645+      ],
 646+      "line_count": 694,
 647+      "loaded_records": 300,
 648+      "fed_records": 200,
 649+      "available_content_records_seen_before_cap": 300,
 650+      "available_valid_text_records_seen_before_cap": 300,
 651+      "node_count": 1857,
 652+      "edge_count": 9983,
 653+      "anchor_count": 56,
 654+      "core_count": 25,
 655+      "experience_region_count": 1829,
 656+      "skill_belt_candidate_count": 1747,
 657+      "sedimentation_events": 3576,
 658+      "merge_events": 21824,
 659+      "decay_events": 18607,
 660+      "phi_range": [
 661+        -8.266,
 662+        0.239
 663+      ],
 664+      "top_words": [
 665+        {
 666+          "phrase": "单元",
 667+          "ratio": 4606.7
 668+        },
 669+        {
 670+          "phrase": "语文",
 671+          "ratio": 4125.0
 672+        },
 673+        {
 674+          "phrase": "必修",
 675+          "ratio": 4103.1
 676+        },
 677+        {
 678+          "phrase": "文必",
 679+          "ratio": 4068.6
 680+        },
 681+        {
 682+          "phrase": "上册",
 683+          "ratio": 3347.0
 684+        },
 685+        {
 686+          "phrase": "修上",
 687+          "ratio": 3327.4
 688+        },
 689+        {
 690+          "phrase": "我们",
 691+          "ratio": 2167.8
 692+        },
 693+        {
 694+          "phrase": "第一",
 695+          "ratio": 1929.2
 696+        }
 697+      ],
 698+      "circuit_count": 5,
 699+      "top_circuits": [
 700+        {
 701+          "phrase": "长大嫂",
 702+          "delta": 4.2
 703+        },
 704+        {
 705+          "phrase": "放和你",
 706+          "delta": 4.1
 707+        },
 708+        {
 709+          "phrase": "东去的",
 710+          "delta": 3.2
 711+        },
 712+        {
 713+          "phrase": "地平对",
 714+          "delta": 3.2
 715+        },
 716+        {
 717+          "phrase": "在地平",
 718+          "delta": 3.0
 719+        }
 720+      ],
 721+      "elapsed_seconds": 3.15,
 722+      "output_mode": "full",
 723+      "pass": true,
 724+      "status": "PASS",
 725+      "reason": "nodes=1857, edges=9983, phi=[-8.266,0.239], attn=100.0, mode=full, time=3.15s, recs=200",
 726+      "claim_word_comparison": null,
 727+      "claim_metric_comparison": {
 728+        "claimed": {
 729+          "nodes": 1857,
 730+          "edges": 9983,
 731+          "phi_range": [
 732+            -8.266,
 733+            0.239
 734+          ]
 735+        },
 736+        "actual": {
 737+          "nodes": 1857,
 738+          "edges": 9983,
 739+          "phi_range": [
 740+            -8.266,
 741+            0.239
 742+          ]
 743+        },
 744+        "exact_match": true
 745+      }
 746+    },
 747+    {
 748+      "name": "cross_stage_语文",
 749+      "type": "cross_stage",
 750+      "input_files": [
 751+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
 752+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
 753+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
 754+      ],
 755+      "source_files_pre_pull_state": [
 756+        "pointer_stub",
 757+        "pointer_stub",
 758+        "pointer_stub"
 759+      ],
 760+      "loaded_records_per_input": [
 761+        40,
 762+        40,
 763+        40
 764+      ],
 765+      "fed_records": 120,
 766+      "node_count": 1128,
 767+      "edge_count": 4139,
 768+      "phi_range": [
 769+        -0.071,
 770+        0.126
 771+      ],
 772+      "top_words": [
 773+        {
 774+          "phrase": "单元",
 775+          "ratio": 2393.7
 776+        },
 777+        {
 778+          "phrase": "阅读",
 779+          "ratio": 1911.0
 780+        },
 781+        {
 782+          "phrase": "第一",
 783+          "ratio": 1751.4
 784+        },
 785+        {
 786+          "phrase": "语文",
 787+          "ratio": 1750.3
 788+        },
 789+        {
 790+          "phrase": "一单",
 791+          "ratio": 1656.9
 792+        },
 793+        {
 794+          "phrase": "课文",
 795+          "ratio": 1387.8
 796+        },
 797+        {
 798+          "phrase": "修上",
 799+          "ratio": 1334.6
 800+        },
 801+        {
 802+          "phrase": "必修",
 803+          "ratio": 1330.0
 804+        }
 805+      ],
 806+      "pass": true,
 807+      "status": "PASS",
 808+      "reason": "nodes=1128, phi_max=0.126",
 809+      "claim_metric_comparison": {
 810+        "claimed": {
 811+          "nodes": 1128,
 812+          "phi_max": 0.112
 813+        },
 814+        "actual": {
 815+          "nodes": 1128,
 816+          "edges": 4139,
 817+          "phi_max": 0.126
 818+        },
 819+        "exact_match": false
 820+      }
 821+    },
 822+    {
 823+      "name": "cross_subject_小学",
 824+      "type": "cross_subject",
 825+      "input_files": [
 826+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
 827+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
 828+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/科学.jsonl"
 829+      ],
 830+      "source_files_pre_pull_state": [
 831+        "pointer_stub",
 832+        "pointer_stub",
 833+        "pointer_stub"
 834+      ],
 835+      "loaded_records_per_input": [
 836+        40,
 837+        40,
 838+        40
 839+      ],
 840+      "fed_records": 120,
 841+      "node_count": 708,
 842+      "edge_count": 2866,
 843+      "phi_range": [
 844+        -0.061,
 845+        10.0
 846+      ],
 847+      "top_words": [
 848+        {
 849+          "phrase": "食物",
 850+          "ratio": 1599.4
 851+        },
 852+        {
 853+          "phrase": "怎样",
 854+          "ratio": 1539.1
 855+        },
 856+        {
 857+          "phrase": "什么",
 858+          "ratio": 1484.0
 859+        },
 860+        {
 861+          "phrase": "我们",
 862+          "ratio": 1329.0
 863+        },
 864+        {
 865+          "phrase": "本文",
 866+          "ratio": 1303.4
 867+        },
 868+        {
 869+          "phrase": "电路",
 870+          "ratio": 1239.6
 871+        },
 872+        {
 873+          "phrase": "溶解",
 874+          "ratio": 1139.3
 875+        },
 876+        {
 877+          "phrase": "哪些",
 878+          "ratio": 1111.8
 879+        }
 880+      ],
 881+      "pass": true,
 882+      "status": "PASS",
 883+      "reason": "nodes=708, phi_max=10.000, cores=21",
 884+      "claim_metric_comparison": {
 885+        "claimed": {
 886+          "nodes": 708,
 887+          "phi_max": 10.0
 888+        },
 889+        "actual": {
 890+          "nodes": 708,
 891+          "edges": 2866,
 892+          "phi_max": 10.0
 893+        },
 894+        "exact_match": true
 895+      }
 896+    },
 897+    {
 898+      "name": "all_in_one_5subjects",
 899+      "type": "all_in_one",
 900+      "input_files": [
 901+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
 902+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
 903+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
 904+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl",
 905+        "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
 906+      ],
 907+      "source_files_pre_pull_state": [
 908+        "pointer_stub",
 909+        "pointer_stub",
 910+        "pointer_stub",
 911+        "pointer_stub",
 912+        "pointer_stub"
 913+      ],
 914+      "loaded_records_per_input": [
 915+        50,
 916+        50,
 917+        50,
 918+        50,
 919+        50
 920+      ],
 921+      "fed_records": 250,
 922+      "node_count": 1373,
 923+      "edge_count": 6863,
 924+      "phi_range": [
 925+        -0.081,
 926+        0.198
 927+      ],
 928+      "top_words": [
 929+        {
 930+          "phrase": "单元",
 931+          "ratio": 2789.9
 932+        },
 933+        {
 934+          "phrase": "阅读",
 935+          "ratio": 2354.9
 936+        },
 937+        {
 938+          "phrase": "语文",
 939+          "ratio": 2125.5
 940+        },
 941+        {
 942+          "phrase": "第一",
 943+          "ratio": 1878.4
 944+        },
 945+        {
 946+          "phrase": "一单",
 947+          "ratio": 1622.3
 948+        },
 949+        {
 950+          "phrase": "什么",
 951+          "ratio": 1617.2
 952+        },
 953+        {
 954+          "phrase": "修上",
 955+          "ratio": 1591.9
 956+        },
 957+        {
 958+          "phrase": "必修",
 959+          "ratio": 1571.5
 960+        }
 961+      ],
 962+      "pass": true,
 963+      "status": "PASS",
 964+      "reason": "fed=250, nodes=1373, edges=6863, phi_max=0.198",
 965+      "claim_metric_comparison": {
 966+        "claimed": {
 967+          "nodes": 1373,
 968+          "edges": 6863,
 969+          "phi_max": 0.167
 970+        },
 971+        "actual": {
 972+          "nodes": 1373,
 973+          "edges": 6863,
 974+          "phi_max": 0.198
 975+        },
 976+        "exact_match": false
 977+      }
 978+    }
 979+  ],
 980+  "analysis": {
 981+    "小学语文": {
 982+      "nodes": 1268,
 983+      "edges": 6338,
 984+      "anchors": 62,
 985+      "cores": 17,
 986+      "exp": 1233,
 987+      "belts": 1141,
 988+      "top_words": [
 989+        {
 990+          "phrase": "课文",
 991+          "ratio": 2706.0
 992+        },
 993+        {
 994+          "phrase": "本文",
 995+          "ratio": 2633.3
 996+        },
 997+        {
 998+          "phrase": "改动",
 999+          "ratio": 2037.2
1000+        },
1001+        {
1002+          "phrase": "有改",
1003+          "ratio": 2003.8
1004+        },
1005+        {
1006+          "phrase": "什么",
1007+          "ratio": 1989.9
1008+        },
1009+        {
1010+          "phrase": "作者",
1011+          "ratio": 1845.4
1012+        },
1013+        {
1014+          "phrase": "文时",
1015+          "ratio": 1843.6
1016+        },
1017+        {
1018+          "phrase": "作课",
1019+          "ratio": 1841.0
1020+        }
1021+      ],
1022+      "circuits": 5,
1023+      "top_circuits": [
1024+        {
1025+          "phrase": "对我们",
1026+          "delta": 20.9
1027+        },
1028+        {
1029+          "phrase": "学校小",
1030+          "delta": 19.4
1031+        },
1032+        {
1033+          "phrase": "地一个",
1034+          "delta": 18.1
1035+        },
1036+        {
1037+          "phrase": "金色的",
1038+          "delta": 17.5
1039+        },
1040+        {
1041+          "phrase": "语中的",
1042+          "delta": 9.7
1043+        }
1044+      ],
1045+      "sed": 2374,
1046+      "merges": 9331,
1047+      "decays": 8378,
1048+      "phi_range": [
1049+        -0.113,
1050+        0.598
1051+      ]
1052+    },
1053+    "小学数学": {
1054+      "nodes": 803,
1055+      "edges": 4367,
1056+      "anchors": 54,
1057+      "cores": 9,
1058+      "exp": 782,
1059+      "belts": 754,
1060+      "top_words": [
1061+        {
1062+          "phrase": "多少",
1063+          "ratio": 2827.3
1064+        },
1065+        {
1066+          "phrase": "一共",
1067+          "ratio": 2201.2
1068+        },
1069+        {
1070+          "phrase": "什么",
1071+          "ratio": 2171.0
1072+        },
1073+        {
1074+          "phrase": "下面",
1075+          "ratio": 2074.8
1076+        },
1077+        {
1078+          "phrase": "可以",
1079+          "ratio": 1774.6
1080+        },
1081+        {
1082+          "phrase": "算式",
1083+          "ratio": 1720.2
1084+        },
1085+        {
1086+          "phrase": "怎样",
1087+          "ratio": 1711.8
1088+        },
1089+        {
1090+          "phrase": "问题",
1091+          "ratio": 1594.4
1092+        }
1093+      ],
1094+      "circuits": 5,
1095+      "top_circuits": [
1096+        {
1097+          "phrase": "的多少",
1098+          "delta": 29.0
1099+        },
1100+        {
1101+          "phrase": "的后面",
1102+          "delta": 25.6
1103+        },
1104+        {
1105+          "phrase": "学习数",
1106+          "delta": 23.7
1107+        },
1108+        {
1109+          "phrase": "习数学",
1110+          "delta": 23.7
1111+        },
1112+        {
1113+          "phrase": "数学习",
1114+          "delta": 23.7
1115+        }
1116+      ],
1117+      "sed": 1536,
1118+      "merges": 10769,
1119+      "decays": 8374,
1120+      "phi_range": [
1121+        -8.3,
1122+        0.579
1123+      ]
1124+    },
1125+    "初中语文": {
1126+      "nodes": 1702,
1127+      "edges": 9372,
1128+      "anchors": 58,
1129+      "cores": 11,
1130+      "exp": 1671,
1131+      "belts": 1600,
1132+      "top_words": [
1133+        {
1134+          "phrase": "阅读",
1135+          "ratio": 5541.6
1136+        },
1137+        {
1138+          "phrase": "单元",
1139+          "ratio": 3998.7
1140+        },
1141+        {
1142+          "phrase": "读第",
1143+          "ratio": 3299.3
1144+        },
1145+        {
1146+          "phrase": "我们",
1147+          "ratio": 2034.8
1148+        },
1149+        {
1150+          "phrase": "第一",
1151+          "ratio": 2016.7
1152+        },
1153+        {
1154+          "phrase": "学习",
1155+          "ratio": 1780.7
1156+        },
1157+        {
1158+          "phrase": "先生",
1159+          "ratio": 1597.9
1160+        },
1161+        {
1162+          "phrase": "一个",
1163+          "ratio": 1590.0
1164+        }
1165+      ],
1166+      "circuits": 5,
1167+      "top_circuits": [
1168+        {
1169+          "phrase": "一定的",
1170+          "delta": 21.6
1171+        },
1172+        {
1173+          "phrase": "自己的",
1174+          "delta": 17.7
1175+        },
1176+        {
1177+          "phrase": "第十二",
1178+          "delta": 11.9
1179+        },
1180+        {
1181+          "phrase": "自然而",
1182+          "delta": 10.3
1183+        },
1184+        {
1185+          "phrase": "一定写",
1186+          "delta": 10.2
1187+        }
1188+      ],
1189+      "sed": 3271,
1190+      "merges": 21531,
1191+      "decays": 12421,
1192+      "phi_range": [
1193+        -0.085,
1194+        0.553
1195+      ]
1196+    },
1197+    "初中数学": {
1198+      "nodes": 886,
1199+      "edges": 5437,
1200+      "anchors": 58,
1201+      "cores": 4,
1202+      "exp": 878,
1203+      "belts": 861,
1204+      "top_words": [
1205+        {
1206+          "phrase": "方程",
1207+          "ratio": 3990.8
1208+        },
1209+        {
1210+          "phrase": "可以",
1211+          "ratio": 2289.4
1212+        },
1213+        {
1214+          "phrase": "图形",
1215+          "ratio": 2147.0
1216+        },
1217+        {
1218+          "phrase": "问题",
1219+          "ratio": 2122.0
1220+        },
1221+        {
1222+          "phrase": "我们",
1223+          "ratio": 2096.1
1224+        },
1225+        {
1226+          "phrase": "表示",
1227+          "ratio": 1976.2
1228+        },
1229+        {
1230+          "phrase": "二次",
1231+          "ratio": 1863.4
1232+        },
1233+        {
1234+          "phrase": "函数",
1235+          "ratio": 1809.4
1236+        }
1237+      ],
1238+      "circuits": 5,
1239+      "top_circuits": [
1240+        {
1241+          "phrase": "学习数",
1242+          "delta": 25.5
1243+        },
1244+        {
1245+          "phrase": "数学习",
1246+          "delta": 25.5
1247+        },
1248+        {
1249+          "phrase": "的理数",
1250+          "delta": 24.2
1251+        },
1252+        {
1253+          "phrase": "数的理",
1254+          "delta": 24.2
1255+        },
1256+        {
1257+          "phrase": "的表示",
1258+          "delta": 22.6
1259+        }
1260+      ],
1261+      "sed": 1739,
1262+      "merges": 15284,
1263+      "decays": 7820,
1264+      "phi_range": [
1265+        -0.045,
1266+        0.479
1267+      ]
1268+    },
1269+    "高中语文": {
1270+      "nodes": 1857,
1271+      "edges": 9983,
1272+      "anchors": 56,
1273+      "cores": 25,
1274+      "exp": 1829,
1275+      "belts": 1747,
1276+      "top_words": [
1277+        {
1278+          "phrase": "单元",
1279+          "ratio": 4606.7
1280+        },
1281+        {
1282+          "phrase": "语文",
1283+          "ratio": 4125.0
1284+        },
1285+        {
1286+          "phrase": "必修",
1287+          "ratio": 4103.1
1288+        },
1289+        {
1290+          "phrase": "文必",
1291+          "ratio": 4068.6
1292+        },
1293+        {
1294+          "phrase": "上册",
1295+          "ratio": 3347.0
1296+        },
1297+        {
1298+          "phrase": "修上",
1299+          "ratio": 3327.4
1300+        },
1301+        {
1302+          "phrase": "我们",
1303+          "ratio": 2167.8
1304+        },
1305+        {
1306+          "phrase": "第一",
1307+          "ratio": 1929.2
1308+        }
1309+      ],
1310+      "circuits": 5,
1311+      "top_circuits": [
1312+        {
1313+          "phrase": "长大嫂",
1314+          "delta": 4.2
1315+        },
1316+        {
1317+          "phrase": "放和你",
1318+          "delta": 4.1
1319+        },
1320+        {
1321+          "phrase": "东去的",
1322+          "delta": 3.2
1323+        },
1324+        {
1325+          "phrase": "地平对",
1326+          "delta": 3.2
1327+        },
1328+        {
1329+          "phrase": "在地平",
1330+          "delta": 3.0
1331+        }
1332+      ],
1333+      "sed": 3576,
1334+      "merges": 21824,
1335+      "decays": 18607,
1336+      "phi_range": [
1337+        -8.266,
1338+        0.239
1339+      ]
1340+    },
1341+    "全灌": {
1342+      "fed": 250,
1343+      "nodes": 1373,
1344+      "edges": 6863,
1345+      "phi_range": [
1346+        -0.081,
1347+        0.198
1348+      ],
1349+      "anchors": 58,
1350+      "cores": 21,
1351+      "sed": 2563,
1352+      "merges": 16921,
1353+      "decays": 8503
1354+    }
1355+  },
1356+  "summary": {
1357+    "passed": 8,
1358+    "failed": 0,
1359+    "total": 8,
1360+    "dataset": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28",
1361+    "status": "PASS"
1362+  },
1363+  "reproduced_8_of_8_pass": true,
1364+  "blockers": [],
1365+  "overall_assessment": {
1366+    "conclusion": "partially reproduced",
1367+    "effect_on_prior_a_vs_b_comparison": "strengthens",
1368+    "trust_call": "trust the rerun outcome, not the exact original-commit execution claim"
1369+  },
1370+  "recommendation": "trust the rerun outcome but not Claude's exact original execution claim verbatim"
1371+}

A reports/2026-03-31_branch_b_formal_validation_strict_rerun.md

+89, -0

 1@@ -0,0 +1,89 @@
 2+# Branch B formal validation strict rerun
 3+
 4+## 1. purpose
 5+
 6+Strictly rerun Claude's claimed Branch B formal dataset validation once on `/Users/george/code/cie-datasets`,
 7+using a runnable Branch B source tree without changing runtime behavior.
 8+
 9+## 2. claimed result being rerun
10+
11+- Claim commit: `379feb2ed4324020ee48a97a6edb8ec99ce39f1a`
12+- Claimed result: `8/8 PASS` on `/Users/george/code/cie-datasets`.
13+- Claimed examples: 小学数学 `多少`, `一共`; 初中数学 `方程`, `图形`, `问题`; 初中语文 `阅读`, `单元`.
14+
15+## 3. chosen runnable Branch B ref
16+
17+- Runnable Branch B base: `c7342881bb2ebfa5e7f927c91a7806416288573b` (`origin/branch-b` / `c734288`).
18+- This ref contains the Branch B runtime source tree (`cie/__init__.py`, `cie/runtime.py`, `cie/graph.py`, `cie/state.py`, `cie/dynamics.py`).
19+
20+## 4. dataset hydration audit
21+
22+- Dataset repo: `/Users/george/code/cie-datasets`
23+- Git LFS: `git-lfs/3.7.1 (GitHub; darwin arm64; go 1.25.3)`
24+- Hydration status: `SUCCESS`
25+
26+| Stage | Subject | Relative path | Pre-pull state | Hydrated now | Line count |
27+|---|---|---|---|---:|---:|
28+| 小学 | 语文 | `splits/by_stage_subject/小学/语文.jsonl` | pointer_stub | yes | 1597 |
29+| 小学 | 数学 | `splits/by_stage_subject/小学/数学.jsonl` | pointer_stub | yes | 7459 |
30+| 小学 | 科学 | `splits/by_stage_subject/小学/科学.jsonl` | pointer_stub | yes | 5032 |
31+| 初中 | 语文 | `splits/by_stage_subject/初中/语文.jsonl` | pointer_stub | yes | 961 |
32+| 初中 | 数学 | `splits/by_stage_subject/初中/数学.jsonl` | pointer_stub | yes | 5559 |
33+| 高中 | 语文 | `splits/by_stage_subject/高中/语文.jsonl` | pointer_stub | yes | 694 |
34+
35+## 5. script provenance audit
36+
37+- Script used: `strict_rerun_port`
38+- Source: `379feb2ed4324020ee48a97a6edb8ec99ce39f1a:tests/formal_validation.py`
39+- Port path: `tests/formal_validation_strict_rerun.py`
40+- Compatibility deviations:
41+- Ported logic from 379feb2:tests/formal_validation.py into tests/formal_validation_strict_rerun.py because origin/branch-b lacks the original file.
42+- Writes the strict rerun audit outputs to the required reports/ and reviews/ files instead of /tmp/formal_val_results.json, creating those parent directories on the runnable Branch B ref because they do not exist on origin/branch-b.
43+- Adds dataset hydration verification and structured comparison metadata; the validation ingest/step/emit logic and pass criteria remain aligned with 379feb2.
44+
45+## 6. exact rerun command
46+
47+- `python3 tests/formal_validation_strict_rerun.py`
48+
49+## 7. per-subject results table
50+
51+| Scenario | Subject/domain | Status | Nodes | Edges | Top emergent words | Reason |
52+|---|---|---|---:|---:|---|---|
53+| 小学语文_pipeline+stability | 语文 | PASS | 1268 | 6338 | 课文, 本文, 改动, 有改, 什么 | nodes=1268, edges=6338, phi=[-0.113,0.598], attn=100.0, mode=full, time=1.57s, recs=200 |
54+| 小学数学_pipeline+stability | 数学 | PASS | 803 | 4367 | 多少, 一共, 什么, 下面, 可以 | nodes=803, edges=4367, phi=[-8.300,0.579], attn=100.0, mode=full, time=1.14s, recs=200 |
55+| 初中语文_pipeline+stability | 语文 | PASS | 1702 | 9372 | 阅读, 单元, 读第, 我们, 第一 | nodes=1702, edges=9372, phi=[-0.085,0.553], attn=100.0, mode=full, time=2.79s, recs=200 |
56+| 初中数学_pipeline+stability | 数学 | PASS | 886 | 5437 | 方程, 可以, 图形, 问题, 我们 | nodes=886, edges=5437, phi=[-0.045,0.479], attn=100.0, mode=full, time=1.49s, recs=200 |
57+| 高中语文_pipeline+stability | 语文 | PASS | 1857 | 9983 | 单元, 语文, 必修, 文必, 上册 | nodes=1857, edges=9983, phi=[-8.266,0.239], attn=100.0, mode=full, time=3.15s, recs=200 |
58+| cross_stage_语文 | cross_stage_语文 | PASS | 1128 | 4139 | 单元, 阅读, 第一, 语文, 一单 | nodes=1128, phi_max=0.126 |
59+| cross_subject_小学 | cross_subject_小学 | PASS | 708 | 2866 | 食物, 怎样, 什么, 我们, 本文 | nodes=708, phi_max=10.000, cores=21 |
60+| all_in_one_5subjects | all_in_one_5subjects | PASS | 1373 | 6863 | 单元, 阅读, 语文, 第一, 一单 | fed=250, nodes=1373, edges=6863, phi_max=0.198 |
61+
62+## 8. whether 8/8 PASS was reproduced
63+
64+- Rerun performed: `True`
65+- Result: `8/8 PASS`
66+- Exact 8/8 reproduction: `True`
67+
68+## 9. comparison to Claude's claimed numbers/words
69+
70+- 小学语文_pipeline+stability: claimed metrics {'nodes': 1268, 'edges': 6338, 'phi_range': [-0.113, 0.598]}; actual {'nodes': 1268, 'edges': 6338, 'phi_range': [-0.113, 0.598]}; exact_match=True.
71+- 小学数学_pipeline+stability: claimed words ['多少', '一共']; matched ['多少', '一共']; missing [].
72+- 小学数学_pipeline+stability: claimed metrics {'nodes': 803, 'edges': 4367, 'phi_range': [-8.3, 0.579]}; actual {'nodes': 803, 'edges': 4367, 'phi_range': [-8.3, 0.579]}; exact_match=True.
73+- 初中语文_pipeline+stability: claimed words ['阅读', '单元']; matched ['阅读', '单元']; missing [].
74+- 初中语文_pipeline+stability: claimed metrics {'nodes': 1702, 'edges': 9372, 'phi_range': [-0.085, 0.553]}; actual {'nodes': 1702, 'edges': 9372, 'phi_range': [-0.085, 0.553]}; exact_match=True.
75+- 初中数学_pipeline+stability: claimed words ['方程', '图形', '问题']; matched ['方程', '图形', '问题']; missing [].
76+- 初中数学_pipeline+stability: claimed metrics {'nodes': 886, 'edges': 5437, 'phi_range': [-0.045, 0.48]}; actual {'nodes': 886, 'edges': 5437, 'phi_range': [-0.045, 0.479]}; exact_match=False.
77+- 高中语文_pipeline+stability: claimed metrics {'nodes': 1857, 'edges': 9983, 'phi_range': [-8.266, 0.239]}; actual {'nodes': 1857, 'edges': 9983, 'phi_range': [-8.266, 0.239]}; exact_match=True.
78+- cross_stage_语文: claimed metrics {'nodes': 1128, 'phi_max': 0.112}; actual {'nodes': 1128, 'edges': 4139, 'phi_max': 0.126}; exact_match=False.
79+- cross_subject_小学: claimed metrics {'nodes': 708, 'phi_max': 10.0}; actual {'nodes': 708, 'edges': 2866, 'phi_max': 10.0}; exact_match=True.
80+- all_in_one_5subjects: claimed metrics {'nodes': 1373, 'edges': 6863, 'phi_max': 0.167}; actual {'nodes': 1373, 'edges': 6863, 'phi_max': 0.198}; exact_match=False.
81+
82+## 10. conclusion
83+
84+- `partially reproduced`
85+- Effect on prior A/B comparison: `strengthens`
86+- Recommendation: `trust the rerun outcome but not Claude's exact original execution claim verbatim`
87+
88+### blockers
89+
90+- none

A reviews/2026-03-31_branch_b_formal_validation_strict_rerun.md

+5, -0

1@@ -0,0 +1,5 @@
2+# Branch B formal validation strict rerun summary
3+
4+- Dataset actually hydrated: yes.
5+- Formal validation actually rerun: yes.
6+- Claude's claimed result should now be trusted: trust the rerun outcome, not the exact original-commit execution claim.

A tests/formal_validation_strict_rerun.py

+865, -0

  1@@ -0,0 +1,865 @@
  2+#!/usr/bin/env python3
  3+"""Strict rerun port of 379feb2:tests/formal_validation.py on runnable Branch B."""
  4+
  5+from __future__ import annotations
  6+
  7+import json
  8+import math
  9+import os
 10+import subprocess
 11+import sys
 12+import time
 13+from pathlib import Path
 14+from typing import Any
 15+
 16+REPO_ROOT = Path(__file__).resolve().parents[1]
 17+if str(REPO_ROOT) not in sys.path:
 18+    sys.path.insert(0, str(REPO_ROOT))
 19+
 20+from cie import CIERuntime
 21+
 22+
 23+CLAUDE_CLAIM_COMMIT = "379feb2ed4324020ee48a97a6edb8ec99ce39f1a"
 24+SCRIPT_SOURCE_PATH = "tests/formal_validation.py"
 25+STRICT_PORT_PATH = "tests/formal_validation_strict_rerun.py"
 26+REPORT_JSON_PATH = REPO_ROOT / "reports/2026-03-31_branch_b_formal_validation_strict_rerun.json"
 27+REPORT_MD_PATH = REPO_ROOT / "reports/2026-03-31_branch_b_formal_validation_strict_rerun.md"
 28+REVIEW_MD_PATH = REPO_ROOT / "reviews/2026-03-31_branch_b_formal_validation_strict_rerun.md"
 29+
 30+DATASET_REPO = Path("/Users/george/code/cie-datasets")
 31+DS = DATASET_REPO / "china_text_book_md" / "v2026-03-28"
 32+
 33+COMBOS = [
 34+    ("小学", "语文"),
 35+    ("小学", "数学"),
 36+    ("初中", "语文"),
 37+    ("初中", "数学"),
 38+    ("高中", "语文"),
 39+]
 40+REQUIRED_DATASET_PATHS = [
 41+    ("小学", "语文"),
 42+    ("小学", "数学"),
 43+    ("小学", "科学"),
 44+    ("初中", "语文"),
 45+    ("初中", "数学"),
 46+    ("高中", "语文"),
 47+]
 48+PRE_PULL_POINTER_STATE = {
 49+    "splits/by_stage_subject/小学/语文.jsonl": "pointer_stub",
 50+    "splits/by_stage_subject/小学/数学.jsonl": "pointer_stub",
 51+    "splits/by_stage_subject/小学/科学.jsonl": "pointer_stub",
 52+    "splits/by_stage_subject/初中/语文.jsonl": "pointer_stub",
 53+    "splits/by_stage_subject/初中/数学.jsonl": "pointer_stub",
 54+    "splits/by_stage_subject/高中/语文.jsonl": "pointer_stub",
 55+}
 56+CLAIMED_METRICS = {
 57+    "小学语文_pipeline+stability": {"nodes": 1268, "edges": 6338, "phi_range": [-0.113, 0.598]},
 58+    "小学数学_pipeline+stability": {"nodes": 803, "edges": 4367, "phi_range": [-8.300, 0.579]},
 59+    "初中语文_pipeline+stability": {"nodes": 1702, "edges": 9372, "phi_range": [-0.085, 0.553]},
 60+    "初中数学_pipeline+stability": {"nodes": 886, "edges": 5437, "phi_range": [-0.045, 0.480]},
 61+    "高中语文_pipeline+stability": {"nodes": 1857, "edges": 9983, "phi_range": [-8.266, 0.239]},
 62+    "cross_stage_语文": {"nodes": 1128, "phi_max": 0.112},
 63+    "cross_subject_小学": {"nodes": 708, "phi_max": 10.000},
 64+    "all_in_one_5subjects": {"nodes": 1373, "edges": 6863, "phi_max": 0.167},
 65+}
 66+CLAIMED_WORDS = {
 67+    "小学数学": ["多少", "一共"],
 68+    "初中数学": ["方程", "图形", "问题"],
 69+    "初中语文": ["阅读", "单元"],
 70+}
 71+COMPATIBILITY_DEVIATIONS = [
 72+    {
 73+        "type": "script_path",
 74+        "detail": (
 75+            "Ported logic from 379feb2:tests/formal_validation.py into "
 76+            "tests/formal_validation_strict_rerun.py because origin/branch-b lacks the original file."
 77+        ),
 78+    },
 79+    {
 80+        "type": "report_output",
 81+        "detail": (
 82+            "Writes the strict rerun audit outputs to the required reports/ and reviews/ files "
 83+            "instead of /tmp/formal_val_results.json, creating those parent directories on the "
 84+            "runnable Branch B ref because they do not exist on origin/branch-b."
 85+        ),
 86+    },
 87+    {
 88+        "type": "structured_metadata",
 89+        "detail": (
 90+            "Adds dataset hydration verification and structured comparison metadata; "
 91+            "the validation ingest/step/emit logic and pass criteria remain aligned with 379feb2."
 92+        ),
 93+    },
 94+]
 95+
 96+
 97+def git(cmd: list[str], cwd: Path = REPO_ROOT, check: bool = True) -> subprocess.CompletedProcess[str]:
 98+    return subprocess.run(
 99+        cmd,
100+        cwd=cwd,
101+        text=True,
102+        capture_output=True,
103+        check=check,
104+    )
105+
106+
107+def rel_dataset_path(stage: str, subject: str) -> str:
108+    return f"splits/by_stage_subject/{stage}/{subject}.jsonl"
109+
110+
111+def abs_dataset_path(stage: str, subject: str) -> Path:
112+    return DS / rel_dataset_path(stage, subject)
113+
114+
115+def is_pointer_stub(path: Path) -> bool:
116+    if not path.exists():
117+        return False
118+    with path.open("r", encoding="utf-8", errors="replace") as handle:
119+        first = handle.readline().strip()
120+    return first == "version https://git-lfs.github.com/spec/v1"
121+
122+
123+def count_lines(path: Path) -> int:
124+    with path.open("r", encoding="utf-8", errors="replace") as handle:
125+        return sum(1 for _ in handle)
126+
127+
128+def collect_dataset_repo_status() -> dict[str, Any]:
129+    git_repo = git(["git", "rev-parse", "--is-inside-work-tree"], cwd=DATASET_REPO)
130+    lfs_version = git(["git", "lfs", "version"], cwd=DATASET_REPO)
131+    status_short = git(["git", "-C", str(DATASET_REPO), "status", "--short"], cwd=REPO_ROOT)
132+    lfs_ls = git(["git", "-C", str(DATASET_REPO), "lfs", "ls-files"], cwd=REPO_ROOT)
133+    remote_v = git(["git", "-C", str(DATASET_REPO), "remote", "-v"], cwd=REPO_ROOT)
134+
135+    relevant_lfs = {}
136+    for line in lfs_ls.stdout.splitlines():
137+        if not line.strip():
138+            continue
139+        parts = line.split(maxsplit=2)
140+        if len(parts) < 3:
141+            continue
142+        oid, marker, rel = parts
143+        if rel in PRE_PULL_POINTER_STATE:
144+            relevant_lfs[rel] = {"oid_prefix": oid, "worktree_marker": marker}
145+
146+    return {
147+        "path": str(DATASET_REPO),
148+        "is_git_repo": git_repo.stdout.strip() == "true",
149+        "git_lfs_version": lfs_version.stdout.strip(),
150+        "status_short": [line for line in status_short.stdout.splitlines() if line.strip()],
151+        "lfs_required_entries": relevant_lfs,
152+        "remote_v": [line for line in remote_v.stdout.splitlines() if line.strip()],
153+    }
154+
155+
156+def verify_required_dataset_paths() -> list[dict[str, Any]]:
157+    verified = []
158+    for stage, subject in REQUIRED_DATASET_PATHS:
159+        rel_path = rel_dataset_path(stage, subject)
160+        path = abs_dataset_path(stage, subject)
161+        exists = path.exists()
162+        pointer = is_pointer_stub(path) if exists else False
163+        first_line = ""
164+        second_line = ""
165+        line_count = 0
166+        if exists:
167+            with path.open("r", encoding="utf-8", errors="replace") as handle:
168+                first_line = handle.readline().rstrip("\n")
169+                second_line = handle.readline().rstrip("\n")
170+            line_count = count_lines(path)
171+        verified.append(
172+            {
173+                "stage": stage,
174+                "subject": subject,
175+                "relative_path": rel_path,
176+                "path": str(path),
177+                "pre_pull_worktree_state": PRE_PULL_POINTER_STATE.get(rel_path, "unknown"),
178+                "exists": exists,
179+                "is_pointer_stub_now": pointer,
180+                "hydrated": exists and not pointer and first_line.startswith("{"),
181+                "line_count": line_count,
182+                "first_line_preview": first_line[:200],
183+                "second_line_preview": second_line[:200],
184+            }
185+        )
186+    return verified
187+
188+
189+def ensure_hydrated(required_paths: list[dict[str, Any]]) -> list[str]:
190+    blockers = []
191+    for entry in required_paths:
192+        if not entry["exists"]:
193+            blockers.append(f"missing dataset file: {entry['path']}")
194+        elif entry["is_pointer_stub_now"]:
195+            blockers.append(f"LFS pointer still present: {entry['path']}")
196+        elif not entry["hydrated"]:
197+            blockers.append(f"dataset file is not usable JSONL: {entry['path']}")
198+    return blockers
199+
200+
201+def load_recs(stage: str, subject: str, max_n: int = 200) -> tuple[list[str], dict[str, Any]]:
202+    path = abs_dataset_path(stage, subject)
203+    recs: list[str] = []
204+    seen_content = 0
205+    seen_valid_text = 0
206+    if not path.exists():
207+        return recs, {
208+            "path": str(path),
209+            "available_content_records": 0,
210+            "available_valid_text_records": 0,
211+            "loaded_records": 0,
212+        }
213+
214+    with path.open("r", encoding="utf-8", errors="replace") as handle:
215+        for line in handle:
216+            rec = json.loads(line)
217+            if not rec.get("is_content"):
218+                continue
219+            seen_content += 1
220+            text = rec.get("text", "")
221+            if len(text) < 4:
222+                continue
223+            seen_valid_text += 1
224+            recs.append(text)
225+            if len(recs) >= max_n:
226+                break
227+
228+    return recs, {
229+        "path": str(path),
230+        "available_content_records": seen_content,
231+        "available_valid_text_records": seen_valid_text,
232+        "loaded_records": len(recs),
233+    }
234+
235+
236+def extract_top_words(rt: CIERuntime) -> list[dict[str, Any]]:
237+    graph = rt.graph
238+    cn_bg = []
239+    for src_edges in graph.fwd_edges.values():
240+        for dst, edge in src_edges.items():
241+            if "\u4e00" <= edge.src <= "\u9fff" and "\u4e00" <= dst <= "\u9fff":
242+                bwd = graph.get_bwd_weight(edge.src, dst)
243+                ratio = edge.weight / bwd if bwd > 0.01 else edge.weight * 100
244+                cn_bg.append({"phrase": edge.src + dst, "ratio": round(ratio, 1)})
245+    cn_bg.sort(key=lambda item: -item["ratio"])
246+    return cn_bg[:8]
247+
248+
249+def extract_circuits(rt: CIERuntime) -> list[dict[str, Any]]:
250+    graph = rt.graph
251+    circuits = []
252+    cn_nodes = [node for node in graph.nodes if "\u4e00" <= node <= "\u9fff"]
253+    for a in cn_nodes[:25]:
254+        for b in graph.neighbors_fwd(a)[:6]:
255+            if not ("\u4e00" <= b <= "\u9fff"):
256+                continue
257+            for c in graph.neighbors_fwd(b)[:6]:
258+                if not ("\u4e00" <= c <= "\u9fff"):
259+                    continue
260+                if a in graph.neighbors_fwd(c):
261+                    fwd = graph.circulation([a, b, c, a])
262+                    rev = graph.circulation([a, c, b, a])
263+                    if abs(fwd - rev) > 0.5:
264+                        circuits.append({"phrase": a + b + c, "delta": round(abs(fwd - rev), 1)})
265+    circuits.sort(key=lambda item: -item["delta"])
266+    return circuits[:5]
267+
268+
269+def compare_claim_words(label: str, top_words: list[dict[str, Any]]) -> dict[str, Any] | None:
270+    claimed = CLAIMED_WORDS.get(label)
271+    if not claimed:
272+        return None
273+    actual_words = [item["phrase"] for item in top_words]
274+    matched = [word for word in claimed if word in actual_words]
275+    missing = [word for word in claimed if word not in actual_words]
276+    return {
277+        "claimed_examples": claimed,
278+        "actual_top_words": actual_words,
279+        "matched": matched,
280+        "missing": missing,
281+    }
282+
283+
284+def compare_claim_metrics(name: str, actual: dict[str, Any]) -> dict[str, Any] | None:
285+    claimed = CLAIMED_METRICS.get(name)
286+    if not claimed:
287+        return None
288+    comparison = {"claimed": claimed, "actual": actual}
289+    exact_match = True
290+    for key, value in claimed.items():
291+        if actual.get(key) != value:
292+            exact_match = False
293+            break
294+    comparison["exact_match"] = exact_match
295+    return comparison
296+
297+
298+def run_subject_combo(stage: str, subject: str, required_paths_map: dict[str, dict[str, Any]]) -> tuple[dict[str, Any], dict[str, Any]]:
299+    label = f"{stage}{subject}"
300+    rel_path = rel_dataset_path(stage, subject)
301+    recs, load_meta = load_recs(stage, subject, 300)
302+    if not recs:
303+        result = {
304+            "name": f"{label}_pipeline+stability",
305+            "type": "subject_pipeline",
306+            "stage": stage,
307+            "subject": subject,
308+            "input_files": [required_paths_map[rel_path]["path"]],
309+            "source_files_pre_pull_state": [required_paths_map[rel_path]["pre_pull_worktree_state"]],
310+            "line_count": required_paths_map[rel_path]["line_count"],
311+            "loaded_records": 0,
312+            "fed_records": 0,
313+            "node_count": 0,
314+            "edge_count": 0,
315+            "phi_range": None,
316+            "top_words": [],
317+            "pass": False,
318+            "status": "FAIL",
319+            "reason": "no data",
320+            "claim_word_comparison": compare_claim_words(label, []),
321+            "claim_metric_comparison": compare_claim_metrics(
322+                f"{label}_pipeline+stability",
323+                {"nodes": 0, "edges": 0, "phi_range": None},
324+            ),
325+        }
326+        return result, {}
327+
328+    rt = CIERuntime(seed=42)
329+    t0 = time.time()
330+    for rec in recs[:200]:
331+        rt.ingest(rec[:80])
332+        rt.step(n=1)
333+    elapsed = time.time() - t0
334+
335+    output = rt.emit()
336+    if output["activated"]:
337+        rt.commit_feedback({"correct": [output["activated"][0]["node"]], "reward": 1.0})
338+
339+    snap = rt.snapshot_state()
340+    ok = True
341+    ok &= snap["phi_summary"]["count"] > 20
342+    ok &= abs(snap["phi_summary"]["max"]) <= 10.1
343+    ok &= abs(snap["phi_summary"]["min"]) <= 10.1
344+    ok &= snap["attention"]["used"] <= snap["attention"]["total"] + 0.01
345+    ok &= all(math.isfinite(value) for value in rt.state.phi.values())
346+
347+    detail = (
348+        f"nodes={snap['phi_summary']['count']}, edges={snap['graph']['edge_count']}, "
349+        f"phi=[{snap['phi_summary']['min']:.3f},{snap['phi_summary']['max']:.3f}], "
350+        f"attn={snap['attention']['used']:.1f}, mode={output['mode']}, "
351+        f"time={elapsed:.2f}s, recs={min(len(recs), 200)}"
352+    )
353+
354+    top_words = extract_top_words(rt)
355+    circuits = extract_circuits(rt)
356+    actual_metrics = {
357+        "nodes": snap["phi_summary"]["count"],
358+        "edges": snap["graph"]["edge_count"],
359+        "phi_range": [round(snap["phi_summary"]["min"], 3), round(snap["phi_summary"]["max"], 3)],
360+    }
361+    result = {
362+        "name": f"{label}_pipeline+stability",
363+        "type": "subject_pipeline",
364+        "stage": stage,
365+        "subject": subject,
366+        "input_files": [required_paths_map[rel_path]["path"]],
367+        "source_files_pre_pull_state": [required_paths_map[rel_path]["pre_pull_worktree_state"]],
368+        "line_count": required_paths_map[rel_path]["line_count"],
369+        "loaded_records": load_meta["loaded_records"],
370+        "fed_records": min(len(recs), 200),
371+        "available_content_records_seen_before_cap": load_meta["available_content_records"],
372+        "available_valid_text_records_seen_before_cap": load_meta["available_valid_text_records"],
373+        "node_count": snap["phi_summary"]["count"],
374+        "edge_count": snap["graph"]["edge_count"],
375+        "anchor_count": len(rt.state.anchor_nodes),
376+        "core_count": len(rt.state.ability_cores),
377+        "experience_region_count": len(rt.state.experience_regions.get("experience", set())),
378+        "skill_belt_candidate_count": len(rt.state.skill_belt_candidates),
379+        "sedimentation_events": len(rt.state.sedimentation_trace),
380+        "merge_events": len(rt.state.merge_events),
381+        "decay_events": len(rt.state.decay_events),
382+        "phi_range": actual_metrics["phi_range"],
383+        "top_words": top_words,
384+        "circuit_count": len(circuits),
385+        "top_circuits": circuits,
386+        "elapsed_seconds": round(elapsed, 2),
387+        "output_mode": output["mode"],
388+        "pass": bool(ok),
389+        "status": "PASS" if ok else "FAIL",
390+        "reason": detail,
391+        "claim_word_comparison": compare_claim_words(label, top_words),
392+        "claim_metric_comparison": compare_claim_metrics(f"{label}_pipeline+stability", actual_metrics),
393+    }
394+    return result, {
395+        "nodes": rt.graph.node_count,
396+        "edges": rt.graph.edge_count,
397+        "anchors": len(rt.state.anchor_nodes),
398+        "cores": len(rt.state.ability_cores),
399+        "exp": len(rt.state.experience_regions.get("experience", set())),
400+        "belts": len(rt.state.skill_belt_candidates),
401+        "top_words": top_words,
402+        "circuits": len(circuits),
403+        "top_circuits": circuits,
404+        "sed": len(rt.state.sedimentation_trace),
405+        "merges": len(rt.state.merge_events),
406+        "decays": len(rt.state.decay_events),
407+        "phi_range": actual_metrics["phi_range"],
408+    }
409+
410+
411+def summarize_runtime(
412+    *,
413+    name: str,
414+    scenario_type: str,
415+    input_files: list[str],
416+    pre_pull_states: list[str],
417+    load_counts: list[int],
418+    fed_records: int,
419+    rt: CIERuntime,
420+    snap: dict[str, Any],
421+    ok: bool,
422+    reason: str,
423+) -> dict[str, Any]:
424+    top_words = extract_top_words(rt)
425+    actual_metrics = {
426+        "nodes": snap["phi_summary"]["count"],
427+        "edges": snap["graph"]["edge_count"],
428+        "phi_max": round(snap["phi_summary"]["max"], 3),
429+    }
430+    return {
431+        "name": name,
432+        "type": scenario_type,
433+        "input_files": input_files,
434+        "source_files_pre_pull_state": pre_pull_states,
435+        "loaded_records_per_input": load_counts,
436+        "fed_records": fed_records,
437+        "node_count": snap["phi_summary"]["count"],
438+        "edge_count": snap["graph"]["edge_count"],
439+        "phi_range": [round(snap["phi_summary"]["min"], 3), round(snap["phi_summary"]["max"], 3)],
440+        "top_words": top_words,
441+        "pass": bool(ok),
442+        "status": "PASS" if ok else "FAIL",
443+        "reason": reason,
444+        "claim_metric_comparison": compare_claim_metrics(name, actual_metrics),
445+    }
446+
447+
448+def run_cross_stage(required_paths_map: dict[str, dict[str, Any]]) -> dict[str, Any]:
449+    rt = CIERuntime(seed=42)
450+    input_files = []
451+    pre_pull_states = []
452+    load_counts = []
453+    for stage in ["小学", "初中", "高中"]:
454+        recs, _ = load_recs(stage, "语文", 60)
455+        input_files.append(required_paths_map[rel_dataset_path(stage, "语文")]["path"])
456+        pre_pull_states.append(required_paths_map[rel_dataset_path(stage, "语文")]["pre_pull_worktree_state"])
457+        load_counts.append(min(len(recs), 40))
458+        for rec in recs[:40]:
459+            rt.ingest(rec[:60])
460+            rt.step(n=1)
461+    snap = rt.snapshot_state()
462+    ok = abs(snap["phi_summary"]["max"]) <= 10.1 and snap["phi_summary"]["count"] > 30
463+    reason = f"nodes={snap['phi_summary']['count']}, phi_max={snap['phi_summary']['max']:.3f}"
464+    return summarize_runtime(
465+        name="cross_stage_语文",
466+        scenario_type="cross_stage",
467+        input_files=input_files,
468+        pre_pull_states=pre_pull_states,
469+        load_counts=load_counts,
470+        fed_records=sum(load_counts),
471+        rt=rt,
472+        snap=snap,
473+        ok=ok,
474+        reason=reason,
475+    )
476+
477+
478+def run_cross_subject(required_paths_map: dict[str, dict[str, Any]]) -> dict[str, Any]:
479+    rt = CIERuntime(seed=42)
480+    input_files = []
481+    pre_pull_states = []
482+    load_counts = []
483+    for subject in ["语文", "数学", "科学"]:
484+        recs, _ = load_recs("小学", subject, 60)
485+        input_files.append(required_paths_map[rel_dataset_path("小学", subject)]["path"])
486+        pre_pull_states.append(required_paths_map[rel_dataset_path("小学", subject)]["pre_pull_worktree_state"])
487+        load_counts.append(min(len(recs), 40))
488+        for rec in recs[:40]:
489+            rt.ingest(rec[:60], anchors=[subject])
490+            rt.step(n=1)
491+    snap = rt.snapshot_state()
492+    ok = abs(snap["phi_summary"]["max"]) <= 10.1 and all(rt.graph.has_node(subject) for subject in ["语文", "数学", "科学"])
493+    reason = (
494+        f"nodes={snap['phi_summary']['count']}, phi_max={snap['phi_summary']['max']:.3f}, "
495+        f"cores={len(rt.state.ability_cores)}"
496+    )
497+    return summarize_runtime(
498+        name="cross_subject_小学",
499+        scenario_type="cross_subject",
500+        input_files=input_files,
501+        pre_pull_states=pre_pull_states,
502+        load_counts=load_counts,
503+        fed_records=sum(load_counts),
504+        rt=rt,
505+        snap=snap,
506+        ok=ok,
507+        reason=reason,
508+    )
509+
510+
511+def run_all_in_one(required_paths_map: dict[str, dict[str, Any]]) -> tuple[dict[str, Any], dict[str, Any]]:
512+    rt = CIERuntime(seed=42)
513+    total = 0
514+    input_files = []
515+    pre_pull_states = []
516+    load_counts = []
517+    for stage, subject in COMBOS:
518+        recs, _ = load_recs(stage, subject, 80)
519+        input_files.append(required_paths_map[rel_dataset_path(stage, subject)]["path"])
520+        pre_pull_states.append(required_paths_map[rel_dataset_path(stage, subject)]["pre_pull_worktree_state"])
521+        count = min(len(recs), 50)
522+        load_counts.append(count)
523+        for rec in recs[:50]:
524+            rt.ingest(rec[:60])
525+            rt.step(n=1)
526+            total += 1
527+    snap = rt.snapshot_state()
528+    ok = abs(snap["phi_summary"]["max"]) <= 10.1
529+    reason = (
530+        f"fed={total}, nodes={snap['phi_summary']['count']}, "
531+        f"edges={snap['graph']['edge_count']}, phi_max={snap['phi_summary']['max']:.3f}"
532+    )
533+    result = summarize_runtime(
534+        name="all_in_one_5subjects",
535+        scenario_type="all_in_one",
536+        input_files=input_files,
537+        pre_pull_states=pre_pull_states,
538+        load_counts=load_counts,
539+        fed_records=total,
540+        rt=rt,
541+        snap=snap,
542+        ok=ok,
543+        reason=reason,
544+    )
545+    analysis = {
546+        "fed": total,
547+        "nodes": snap["phi_summary"]["count"],
548+        "edges": snap["graph"]["edge_count"],
549+        "phi_range": [round(snap["phi_summary"]["min"], 3), round(snap["phi_summary"]["max"], 3)],
550+        "anchors": len(rt.state.anchor_nodes),
551+        "cores": len(rt.state.ability_cores),
552+        "sed": len(rt.state.sedimentation_trace),
553+        "merges": len(rt.state.merge_events),
554+        "decays": len(rt.state.decay_events),
555+    }
556+    return result, analysis
557+
558+
559+def build_overall_assessment(
560+    report: dict[str, Any],
561+    scenario_results: list[dict[str, Any]],
562+    blockers: list[str],
563+) -> tuple[dict[str, Any], str]:
564+    if blockers:
565+        conclusion = "still blocked"
566+        effect = "leaves unchanged"
567+        trust = "do not trust"
568+    else:
569+        all_pass = report["reproduced_8_of_8_pass"]
570+        claim_word_matches = []
571+        claim_metric_exact_matches = []
572+        for result in scenario_results:
573+            word_comparison = result.get("claim_word_comparison")
574+            if word_comparison:
575+                claim_word_matches.extend(word_comparison["matched"])
576+            metric_comparison = result.get("claim_metric_comparison")
577+            if metric_comparison and metric_comparison.get("exact_match"):
578+                claim_metric_exact_matches.append(result["name"])
579+        if all_pass and len(claim_word_matches) >= 7 and len(claim_metric_exact_matches) == 8:
580+            conclusion = "reproduced"
581+            effect = "strengthens"
582+            trust = "trust with the documented port caveat"
583+        elif all_pass:
584+            conclusion = "partially reproduced"
585+            effect = "strengthens"
586+            trust = "trust the rerun outcome, not the exact original-commit execution claim"
587+        else:
588+            conclusion = "not reproduced"
589+            effect = "weakens"
590+            trust = "do not trust the claimed 8/8 PASS as stated"
591+    assessment = {
592+        "conclusion": conclusion,
593+        "effect_on_prior_a_vs_b_comparison": effect,
594+        "trust_call": trust,
595+    }
596+    return assessment, conclusion
597+
598+
599+def render_markdown(report: dict[str, Any]) -> str:
600+    scenario_rows = []
601+    for result in report["per_subject_results"]:
602+        top_words = ", ".join(item["phrase"] for item in result.get("top_words", [])[:5]) or "-"
603+        subject = result.get("subject", result["name"])
604+        scenario_rows.append(
605+            f"| {result['name']} | {subject} | {result['status']} | "
606+            f"{result.get('node_count', 0)} | {result.get('edge_count', 0)} | "
607+            f"{top_words} | {result['reason']} |"
608+        )
609+
610+    dataset_rows = []
611+    for entry in report["required_dataset_paths"]:
612+        dataset_rows.append(
613+            f"| {entry['stage']} | {entry['subject']} | `{entry['relative_path']}` | "
614+            f"{entry['pre_pull_worktree_state']} | {'yes' if entry['hydrated'] else 'no'} | {entry['line_count']} |"
615+        )
616+
617+    comparison_lines = []
618+    for result in report["per_subject_results"]:
619+        word_comp = result.get("claim_word_comparison")
620+        metric_comp = result.get("claim_metric_comparison")
621+        if word_comp:
622+            comparison_lines.append(
623+                f"- {result['name']}: claimed words {word_comp['claimed_examples']}; "
624+                f"matched {word_comp['matched']}; missing {word_comp['missing']}."
625+            )
626+        if metric_comp:
627+            comparison_lines.append(
628+                f"- {result['name']}: claimed metrics {metric_comp['claimed']}; "
629+                f"actual {metric_comp['actual']}; exact_match={metric_comp['exact_match']}."
630+            )
631+
632+    blockers_lines = "\n".join(f"- {blocker}" for blocker in report["blockers"]) or "- none"
633+    deviations_lines = "\n".join(f"- {item['detail']}" for item in report["compatibility_deviations"])
634+
635+    return "\n".join(
636+        [
637+            "# Branch B formal validation strict rerun",
638+            "",
639+            "## 1. purpose",
640+            "",
641+            "Strictly rerun Claude's claimed Branch B formal dataset validation once on `/Users/george/code/cie-datasets`,",
642+            "using a runnable Branch B source tree without changing runtime behavior.",
643+            "",
644+            "## 2. claimed result being rerun",
645+            "",
646+            f"- Claim commit: `{report['claude_claim_commit']}`",
647+            "- Claimed result: `8/8 PASS` on `/Users/george/code/cie-datasets`.",
648+            "- Claimed examples: 小学数学 `多少`, `一共`; 初中数学 `方程`, `图形`, `问题`; 初中语文 `阅读`, `单元`.",
649+            "",
650+            "## 3. chosen runnable Branch B ref",
651+            "",
652+            f"- Runnable Branch B base: `{report['chosen_runnable_branch_b_commit']}` (`origin/branch-b` / `c734288`).",
653+            "- This ref contains the Branch B runtime source tree (`cie/__init__.py`, `cie/runtime.py`, `cie/graph.py`, `cie/state.py`, `cie/dynamics.py`).",
654+            "",
655+            "## 4. dataset hydration audit",
656+            "",
657+            f"- Dataset repo: `{report['dataset_repo_status']['path']}`",
658+            f"- Git LFS: `{report['dataset_repo_status']['git_lfs_version']}`",
659+            f"- Hydration status: `{report['hydration_status']['status']}`",
660+            "",
661+            "| Stage | Subject | Relative path | Pre-pull state | Hydrated now | Line count |",
662+            "|---|---|---|---|---:|---:|",
663+            *dataset_rows,
664+            "",
665+            "## 5. script provenance audit",
666+            "",
667+            f"- Script used: `{report['script_used']}`",
668+            f"- Source: `{report['script_provenance']['source_commit']}:{report['script_provenance']['source_path']}`",
669+            f"- Port path: `{report['script_provenance']['port_path']}`",
670+            "- Compatibility deviations:",
671+            deviations_lines,
672+            "",
673+            "## 6. exact rerun command",
674+            "",
675+            f"- `{report['rerun_command']}`",
676+            "",
677+            "## 7. per-subject results table",
678+            "",
679+            "| Scenario | Subject/domain | Status | Nodes | Edges | Top emergent words | Reason |",
680+            "|---|---|---|---:|---:|---|---|",
681+            *scenario_rows,
682+            "",
683+            "## 8. whether 8/8 PASS was reproduced",
684+            "",
685+            f"- Rerun performed: `{report['rerun_performed']}`",
686+            f"- Result: `{report['summary']['passed']}/{report['summary']['total']} PASS`",
687+            f"- Exact 8/8 reproduction: `{report['reproduced_8_of_8_pass']}`",
688+            "",
689+            "## 9. comparison to Claude's claimed numbers/words",
690+            "",
691+            *comparison_lines,
692+            "",
693+            "## 10. conclusion",
694+            "",
695+            f"- `{report['overall_assessment']['conclusion']}`",
696+            f"- Effect on prior A/B comparison: `{report['overall_assessment']['effect_on_prior_a_vs_b_comparison']}`",
697+            f"- Recommendation: `{report['recommendation']}`",
698+            "",
699+            "### blockers",
700+            "",
701+            blockers_lines,
702+        ]
703+    )
704+
705+
706+def render_review_summary(report: dict[str, Any]) -> str:
707+    return "\n".join(
708+        [
709+            "# Branch B formal validation strict rerun summary",
710+            "",
711+            f"- Dataset actually hydrated: {'yes' if report['hydration_status']['all_required_files_hydrated'] else 'no'}.",
712+            f"- Formal validation actually rerun: {'yes' if report['rerun_performed'] else 'no'}.",
713+            f"- Claude's claimed result should now be trusted: {report['overall_assessment']['trust_call']}.",
714+        ]
715+    )
716+
717+
718+def write_reports(report: dict[str, Any]) -> None:
719+    REPORT_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
720+    REVIEW_MD_PATH.parent.mkdir(parents=True, exist_ok=True)
721+    REPORT_JSON_PATH.write_text(
722+        json.dumps(report, ensure_ascii=False, indent=2) + "\n",
723+        encoding="utf-8",
724+    )
725+    REPORT_MD_PATH.write_text(render_markdown(report) + "\n", encoding="utf-8")
726+    REVIEW_MD_PATH.write_text(render_review_summary(report) + "\n", encoding="utf-8")
727+
728+
729+def build_blocked_report(required_paths: list[dict[str, Any]], blockers: list[str], dataset_status: dict[str, Any]) -> dict[str, Any]:
730+    chosen_commit = git(["git", "rev-parse", "HEAD"]).stdout.strip()
731+    assessment, _ = build_overall_assessment(
732+        {
733+            "reproduced_8_of_8_pass": False,
734+        },
735+        [],
736+        blockers,
737+    )
738+    return {
739+        "claude_claim_commit": CLAUDE_CLAIM_COMMIT,
740+        "claude_claim_commit_exists": True,
741+        "chosen_runnable_branch_b_commit": chosen_commit,
742+        "dataset_repo_status": dataset_status,
743+        "required_dataset_paths": required_paths,
744+        "hydration_status": {
745+            "status": "FAILED",
746+            "all_required_files_hydrated": False,
747+        },
748+        "script_used": "none",
749+        "script_provenance": {
750+            "source_commit": CLAUDE_CLAIM_COMMIT,
751+            "source_path": SCRIPT_SOURCE_PATH,
752+            "port_path": None,
753+        },
754+        "compatibility_deviations": [],
755+        "rerun_performed": False,
756+        "rerun_command": None,
757+        "per_subject_results": [],
758+        "summary": {"passed": 0, "failed": 0, "total": 0, "status": "BLOCKED"},
759+        "reproduced_8_of_8_pass": False,
760+        "overall_assessment": assessment,
761+        "blockers": blockers,
762+        "recommendation": "do not trust the claimed result until the strict rerun blockers are removed",
763+    }
764+
765+
766+def main() -> int:
767+    dataset_status = collect_dataset_repo_status()
768+    required_paths = verify_required_dataset_paths()
769+    blockers = ensure_hydrated(required_paths)
770+    if blockers:
771+        report = build_blocked_report(required_paths, blockers, dataset_status)
772+        write_reports(report)
773+        print("\n".join(f"[BLOCKED] {blocker}" for blocker in blockers))
774+        return 1
775+
776+    required_paths_map = {entry["relative_path"]: entry for entry in required_paths}
777+    chosen_commit = git(["git", "rev-parse", "HEAD"]).stdout.strip()
778+
779+    tests_output = []
780+    analysis = {}
781+    scenario_results = []
782+
783+    for stage, subject in COMBOS:
784+        result, analysis_entry = run_subject_combo(stage, subject, required_paths_map)
785+        tests_output.append(
786+            {"name": result["name"], "status": result["status"], "detail": result["reason"]}
787+        )
788+        scenario_results.append(result)
789+        if analysis_entry:
790+            analysis[f"{stage}{subject}"] = analysis_entry
791+
792+    cross_stage_result = run_cross_stage(required_paths_map)
793+    cross_subject_result = run_cross_subject(required_paths_map)
794+    all_in_one_result, all_in_one_analysis = run_all_in_one(required_paths_map)
795+    for result in [cross_stage_result, cross_subject_result, all_in_one_result]:
796+        tests_output.append({"name": result["name"], "status": result["status"], "detail": result["reason"]})
797+        scenario_results.append(result)
798+    analysis["全灌"] = all_in_one_analysis
799+
800+    passed = sum(1 for item in tests_output if item["status"] == "PASS")
801+    failed = sum(1 for item in tests_output if item["status"] == "FAIL")
802+    reproduced = failed == 0 and len(tests_output) == 8
803+
804+    report = {
805+        "claude_claim_commit": CLAUDE_CLAIM_COMMIT,
806+        "claude_claim_commit_exists": True,
807+        "chosen_runnable_branch_b_commit": chosen_commit,
808+        "dataset_repo_status": dataset_status,
809+        "required_dataset_paths": required_paths,
810+        "hydration_status": {
811+            "status": "SUCCESS",
812+            "all_required_files_hydrated": True,
813+        },
814+        "script_used": "strict_rerun_port",
815+        "script_provenance": {
816+            "source_commit": CLAUDE_CLAIM_COMMIT,
817+            "source_path": SCRIPT_SOURCE_PATH,
818+            "port_path": STRICT_PORT_PATH,
819+            "port_rationale": (
820+                "origin/branch-b is the runnable Branch B source tree, but it does not contain "
821+                "tests/formal_validation.py from 379feb2."
822+            ),
823+        },
824+        "compatibility_deviations": COMPATIBILITY_DEVIATIONS,
825+        "rerun_performed": True,
826+        "rerun_command": "python3 tests/formal_validation_strict_rerun.py",
827+        "per_subject_results": scenario_results,
828+        "analysis": analysis,
829+        "summary": {
830+            "passed": passed,
831+            "failed": failed,
832+            "total": len(tests_output),
833+            "dataset": str(DS),
834+            "status": "PASS" if failed == 0 else "FAIL",
835+        },
836+        "reproduced_8_of_8_pass": reproduced,
837+        "blockers": [],
838+    }
839+    assessment, conclusion = build_overall_assessment(report, scenario_results, [])
840+    report["overall_assessment"] = assessment
841+    if conclusion == "reproduced":
842+        recommendation = "trust the rerun result, with the explicit note that execution used a faithful port on runnable Branch B"
843+    elif conclusion == "partially reproduced":
844+        recommendation = "trust the rerun outcome but not Claude's exact original execution claim verbatim"
845+    else:
846+        recommendation = "do not trust Claude's claimed 8/8 PASS as stated"
847+    report["recommendation"] = recommendation
848+
849+    write_reports(report)
850+
851+    for test in tests_output:
852+        print(f"  [{test['status']}] {test['name']}: {test['detail']}")
853+    print(f"\n总计: {passed} passed, {failed} failed, {len(tests_output)} total")
854+    print("\nAnalysis:")
855+    for key, value in analysis.items():
856+        print(
857+            f"  {key}: nodes={value.get('nodes', '?')}, "
858+            f"words={value.get('top_words', [])[:5]}, circuits={value.get('circuits', '?')}"
859+        )
860+    print(f"\n结论: {report['overall_assessment']['conclusion']}")
861+    print(f"报告: {REPORT_JSON_PATH}")
862+    return 0
863+
864+
865+if __name__ == "__main__":
866+    raise SystemExit(main())