- commit
- 273f416
- parent
- 419ae8d
- author
- codex@macbookpro
- date
- 2026-03-31 20:50:06 +0800 CST
review: strict rerun branch-a formal validation on cie-datasets
5 files changed,
+5643,
-0
1@@ -0,0 +1,4435 @@
2+{
3+ "branch_a_base_commit": "419ae8d39150806011c1eb6082c7fc8c6a337735",
4+ "branch_b_reference_commit": "c11091603a6b60e7d459e77dffd9f8a2ee1c0776",
5+ "dataset_repo_status": {
6+ "path": "/Users/george/code/cie-datasets",
7+ "head": "9f50856fe193aa14feb6474525fecb2076d40ed3",
8+ "is_git_repo": true,
9+ "git_lfs_version": "git-lfs/3.7.1 (GitHub; darwin arm64; go 1.25.3)",
10+ "status_short": [],
11+ "remote_v": [
12+ "origin\tgit@github.com:imwower/cie-datasets.git (fetch)",
13+ "origin\tgit@github.com:imwower/cie-datasets.git (push)"
14+ ],
15+ "lfs_required_entries": [
16+ "2b537ccdd4 * china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl",
17+ "c159f4c2f9 * china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
18+ "4334f7e6b0 * china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
19+ "fc6b830b8e * china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/科学.jsonl",
20+ "819a1e149b * china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
21+ "ad0cc7e80f * china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
22+ ]
23+ },
24+ "required_dataset_paths": [
25+ {
26+ "stage": "小学",
27+ "subject": "语文",
28+ "relative_path": "splits/by_stage_subject/小学/语文.jsonl",
29+ "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
30+ "exists": true,
31+ "is_pointer_stub_now": false,
32+ "hydrated": true,
33+ "line_count": 1597,
34+ "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_语文_统编版_义务教育教科书·语文一年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page\":",
35+ "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_语文_统编版_义务教育教科书·语文一年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page\":"
36+ },
37+ {
38+ "stage": "小学",
39+ "subject": "数学",
40+ "relative_path": "splits/by_stage_subject/小学/数学.jsonl",
41+ "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
42+ "exists": true,
43+ "is_pointer_stub_now": false,
44+ "hydrated": true,
45+ "line_count": 7459,
46+ "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_数学_人教版_义务教育教科书 · 数学一年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page",
47+ "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_数学_人教版_义务教育教科书 · 数学一年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page"
48+ },
49+ {
50+ "stage": "小学",
51+ "subject": "科学",
52+ "relative_path": "splits/by_stage_subject/小学/科学.jsonl",
53+ "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/科学.jsonl",
54+ "exists": true,
55+ "is_pointer_stub_now": false,
56+ "hydrated": true,
57+ "line_count": 5032,
58+ "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_科学_人教鄂教版_义务教育教科书·科学一年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page",
59+ "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:小学_科学_人教鄂教版_义务教育教科书·科学一年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null, \"page"
60+ },
61+ {
62+ "stage": "初中",
63+ "subject": "语文",
64+ "relative_path": "splits/by_stage_subject/初中/语文.jsonl",
65+ "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
66+ "exists": true,
67+ "is_pointer_stub_now": false,
68+ "hydrated": true,
69+ "line_count": 961,
70+ "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": n",
71+ "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": n"
72+ },
73+ {
74+ "stage": "初中",
75+ "subject": "数学",
76+ "relative_path": "splits/by_stage_subject/初中/数学.jsonl",
77+ "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl",
78+ "exists": true,
79+ "is_pointer_stub_now": false,
80+ "hydrated": true,
81+ "line_count": 5559,
82+ "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册:1\", \"lines\": [\"[非正文页]\"], \"error\": n",
83+ "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册:2\", \"lines\": [\"[非正文页]\"], \"error\": n"
84+ },
85+ {
86+ "stage": "高中",
87+ "subject": "语文",
88+ "relative_path": "splits/by_stage_subject/高中/语文.jsonl",
89+ "path": "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl",
90+ "exists": true,
91+ "is_pointer_stub_now": false,
92+ "hydrated": true,
93+ "line_count": 694,
94+ "first_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册:1\", \"lines\": [\"[非正文页]\"], \"error\": null,",
95+ "second_line_preview": "{\"concept\": \"教材非正文页\", \"layer\": \"boundary\", \"provider\": \"china-text-book-md\", \"model\": \"md_textbook_rules_v1\", \"timestamp\": \"md:高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册:2\", \"lines\": [\"[非正文页]\"], \"error\": null,"
96+ }
97+ ],
98+ "hydration_status": {
99+ "status": "SUCCESS",
100+ "all_required_files_hydrated": true,
101+ "lfs_pull_attempted": false,
102+ "lfs_pull_result": null,
103+ "blockers": []
104+ },
105+ "scenario_family": [
106+ "小学语文_pipeline+stability",
107+ "小学数学_pipeline+stability",
108+ "初中语文_pipeline+stability",
109+ "初中数学_pipeline+stability",
110+ "高中语文_pipeline+stability",
111+ "cross_stage_语文",
112+ "cross_subject_小学",
113+ "all_in_one_5subjects"
114+ ],
115+ "per_scenario_results": [
116+ {
117+ "scenario_name": "小学语文_pipeline+stability",
118+ "status": "PASS",
119+ "reason": "Required split ran end-to-end on Branch A and stayed within the mirrored stability ceiling, but output_mode remained minimal with free_capacity=0.0.",
120+ "input_files": [
121+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl"
122+ ],
123+ "input_files_fully_hydrated": true,
124+ "input_line_counts": [
125+ 1597
126+ ],
127+ "record_counts": {
128+ "available_content_records_seen_before_cap": 300,
129+ "available_valid_text_records_seen_before_cap": 300,
130+ "loaded_records_per_input": [
131+ 300
132+ ],
133+ "fed_records": 200,
134+ "feedback_target": "选作课文时有改动",
135+ "feedback_applied": true
136+ },
137+ "branch_b_reference": {
138+ "status": "PASS",
139+ "summary": "nodes=1268, edges=6338, phi=[-0.113,0.598], mode=full, words=课文/本文/改动/有改/什么",
140+ "top_words": [
141+ "课文",
142+ "本文",
143+ "改动",
144+ "有改",
145+ "什么"
146+ ]
147+ },
148+ "branch_a_observables": {
149+ "phi_summary": {
150+ "node_count": 272,
151+ "total_potential": 44.0019,
152+ "top_nodes": [
153+ {
154+ "node": "选作课文时有改动",
155+ "value": 5.5287
156+ },
157+ {
158+ "node": "小事",
159+ "value": 0.562
160+ },
161+ {
162+ "node": "一",
163+ "value": 0.5529
164+ },
165+ {
166+ "node": "读下面的句子",
167+ "value": 0.4553
168+ },
169+ {
170+ "node": "我们的身边每天都在发生各种各样的",
171+ "value": 0.3406
172+ }
173+ ]
174+ },
175+ "mu_summary": {
176+ "active_count": 43,
177+ "total_activation": 9.8299,
178+ "top_nodes": [
179+ {
180+ "node": "选作课文时有改动",
181+ "value": 2.8043
182+ },
183+ {
184+ "node": "王老师急得直跺脚",
185+ "value": 0.4336
186+ },
187+ {
188+ "node": "词句段运用",
189+ "value": 0.352
190+ },
191+ {
192+ "node": "妈妈累得",
193+ "value": 0.3307
194+ },
195+ {
196+ "node": "他跑得像兔子一样快",
197+ "value": 0.316
198+ }
199+ ]
200+ },
201+ "J_summary": {
202+ "edge_count": 328,
203+ "total_flow": 25.5149,
204+ "top_flows": [
205+ {
206+ "edge": "小事->我们的身边每天都在发生各种各样的",
207+ "flow": 0.2726
208+ },
209+ {
210+ "edge": "它的声音小得几乎听不见了->王老师急得直跺脚",
211+ "flow": 0.2702
212+ },
213+ {
214+ "edge": "身边的->小事",
215+ "flow": 0.267
216+ },
217+ {
218+ "edge": "最近我一直在想->我们班能不能开辟一个植物角呢",
219+ "flow": 0.266
220+ },
221+ {
222+ "edge": "妈妈累得->得",
223+ "flow": 0.2469
224+ }
225+ ]
226+ },
227+ "active_region": [
228+ "选作课文时有改动",
229+ "王老师急得直跺脚",
230+ "词句段运用",
231+ "妈妈累得"
232+ ],
233+ "active_region_size": 4,
234+ "bound_ability_core": "选作课文时有改动",
235+ "anchor_pull": 0.0,
236+ "drift_score": 1.0,
237+ "free_capacity": 0.0,
238+ "experience_regions": [
239+ {
240+ "region": "选作课文时有改动",
241+ "nodes": [
242+ "一",
243+ "一个春天的黎明",
244+ "一片树林边走过",
245+ "为什么在动",
246+ "为什么说",
247+ "交流平台",
248+ "人心齐",
249+ "他会在森林里演奏",
250+ "他生怕惊动鸟",
251+ "他的手风",
252+ "他跑得像兔子一样快",
253+ "伸",
254+ "体会生动的语言",
255+ "你会用哪些词语来形容不同的季节",
256+ "你发现了吗",
257+ "你同意下面这些对父亲的判断吗",
258+ "你知道他们唱的是什么吗",
259+ "你认识下面这些加点的字吗",
260+ "儿歌作者王清秀",
261+ "充满力量的声音",
262+ "再说说你在哪里听到过这样的声音",
263+ "初试身手",
264+ "叫人愉悦",
265+ "听听",
266+ "听听唧",
267+ "听听青蛙的歌唱",
268+ "哩",
269+ "哩唧哩哩的虫鸣",
270+ "唱的鸟",
271+ "四",
272+ "在水塘边散步",
273+ "坐在一棵树下",
274+ "填一填",
275+ "天冷极了",
276+ "如同温柔的细语",
277+ "妈妈累得",
278+ "它的声音小得几乎听不见了",
279+ "小事",
280+ "小麻雀叽叽喳喳",
281+ "就喜欢到大自然中去寻找好玩的东西",
282+ "尽可能反映每个人的",
283+ "广阔的大地",
284+ "彩",
285+ "得",
286+ "微风吹来",
287+ "想一想",
288+ "我五六岁时",
289+ "我仔细找",
290+ "我们可以在植物角养花种草",
291+ "我们可以轮流照看它们",
292+ "我们周围有许多美丽的地方",
293+ "我们班能不能开辟一个植物角呢",
294+ "我们的身边每天都在发生各种各样的",
295+ "我只闻到浓浓的苦苦的草木气息",
296+ "我爱故乡的杨梅",
297+ "我真高兴",
298+ "或是种花生",
299+ "文",
300+ "日积月累",
301+ "早晨",
302+ "是大自然的音乐家",
303+ "最新初等小学国文教科书第一册",
304+ "最近我一直在想",
305+ "有两只小蟋蟀",
306+ "本文作者吴然",
307+ "本文作者圣野",
308+ "本文作者夏辇生",
309+ "本文作者屠再华",
310+ "本文作者嵇鸿",
311+ "本文作者张之路",
312+ "本文作者望安",
313+ "本文作者朱维之",
314+ "本文作者樊家信",
315+ "本文作者流火",
316+ "本文作者王一梅",
317+ "本文作者王鲁彦",
318+ "本文作者窦植",
319+ "本文作者罗亚",
320+ "本文作者胡木仁",
321+ "本文作者韦其麟",
322+ "本文作者鲁兵",
323+ "本文作者龚艺兵",
324+ "本文选自人民教育出版社",
325+ "本诗作者毕国瑛",
326+ "样高了",
327+ "每件事都以不同的方式影响着我们的生活",
328+ "水里的虾蟹游鱼",
329+ "汇总小组意见时",
330+ "池塘边的棕榈树高大挺拔",
331+ "沙沙的竹叶声",
332+ "没有找到动着的那几片叶子",
333+ "没有闻到什么鸟的气味",
334+ "沱",
335+ "河岸的",
336+ "清楚地表达自己的看法",
337+ "热闹的音乐会",
338+ "父亲一生最喜欢树林和歌",
339+ "父亲一生最喜欢树林和鸟",
340+ "父亲不是猎人",
341+ "父亲带着我从滹",
342+ "父亲轻声说",
343+ "王老师急得直跺脚",
344+ "琴",
345+ "用几句话写下来和同学交流吧",
346+ "由王文宝搜集整理",
347+ "的声音",
348+ "看看哪个小组总结的方法多",
349+ "睛情晴清请",
350+ "种子被泥土紧紧地包裹着",
351+ "种豆子",
352+ "空中的浮云飞鸟",
353+ "窗外十分安静",
354+ "童年时",
355+ "端",
356+ "第一册",
357+ "给它们浇水",
358+ "荷",
359+ "要",
360+ "词句段运用",
361+ "译者叶君健",
362+ "译者司徒贞",
363+ "译者吴菲",
364+ "说说你的理由",
365+ "读下面的句子",
366+ "读下面的故事",
367+ "读书时",
368+ "读读下面描写声音的词语",
369+ "课文描写了大自然中哪些",
370+ "蹦蹦跳跳的",
371+ "身边的",
372+ "轻快的山中小曲",
373+ "轻轻柔柔的呢喃细语",
374+ "还可以根据季节的变化更换不同的植物",
375+ "还有鸟味",
376+ "雄伟的乐曲",
377+ "项",
378+ "风",
379+ "高远的天空",
380+ "默读课文"
381+ ],
382+ "stage": "ability_core",
383+ "activation": 7.0256,
384+ "potential": 28.1199,
385+ "candidate_score": 378.8322,
386+ "stable_steps": 182
387+ },
388+ {
389+ "region": "2",
390+ "nodes": [
391+ "本文作者金波",
392+ "选作课文时有改动",
393+ "阳",
394+ "雪"
395+ ],
396+ "stage": "ability_core",
397+ "activation": 2.8043,
398+ "potential": 5.9628,
399+ "candidate_score": 16.16,
400+ "stable_steps": 180
401+ },
402+ {
403+ "region": "一",
404+ "nodes": [
405+ "个",
406+ "分",
407+ "副",
408+ "大蒲扇似的叶子在风中摇摆"
409+ ],
410+ "stage": "skill_belt",
411+ "activation": 0.0,
412+ "potential": 0.5125,
413+ "candidate_score": 13.0996,
414+ "stable_steps": 117
415+ },
416+ {
417+ "region": "读下面的句子",
418+ "nodes": [
419+ "体会加点的词语好在哪里",
420+ "再找出类似的语句读一读",
421+ "好玩的东西",
422+ "注意读好文中的长句子"
423+ ],
424+ "stage": "skill_belt",
425+ "activation": 0.0,
426+ "potential": 0.923,
427+ "candidate_score": 12.1253,
428+ "stable_steps": 14
429+ }
430+ ],
431+ "experience_regions_count": 4,
432+ "skill_belt_candidates": [
433+ {
434+ "node": "选作课文时有改动",
435+ "score": 5.24,
436+ "stage": "ability_core",
437+ "flow": 6.2142,
438+ "stable_steps": 180,
439+ "touches": 114,
440+ "target_core": "2"
441+ },
442+ {
443+ "node": "一",
444+ "score": 4.7189,
445+ "stage": "ability_core",
446+ "flow": 0.7353,
447+ "stable_steps": 143,
448+ "touches": 10,
449+ "target_core": "选作课文时有改动"
450+ },
451+ {
452+ "node": "本文作者金波",
453+ "score": 3.8833,
454+ "stage": "ability_core",
455+ "flow": 0.2825,
456+ "stable_steps": 140,
457+ "touches": 7,
458+ "target_core": "2"
459+ },
460+ {
461+ "node": "读下面的句子",
462+ "score": 3.8788,
463+ "stage": "ability_core",
464+ "flow": 0.4735,
465+ "stable_steps": 8,
466+ "touches": 6,
467+ "target_core": "选作课文时有改动"
468+ },
469+ {
470+ "node": "蹦蹦跳跳的",
471+ "score": 3.8451,
472+ "stage": "skill_belt",
473+ "flow": 0.4946,
474+ "stable_steps": 8,
475+ "touches": 4,
476+ "target_core": "选作课文时有改动"
477+ },
478+ {
479+ "node": "本文选自人民教育出版社",
480+ "score": 3.8247,
481+ "stage": "ability_core",
482+ "flow": 0.1178,
483+ "stable_steps": 111,
484+ "touches": 9,
485+ "target_core": "选作课文时有改动"
486+ }
487+ ],
488+ "skill_belt_candidates_count": 6,
489+ "sedimentation_trace_count": 20,
490+ "merge_events_count": 12,
491+ "decay_events_count": 24,
492+ "output_mode": "minimal",
493+ "feedback_effect": {
494+ "source": "feedback",
495+ "mode": "feedback",
496+ "queued_tokens": [
497+ "选作课文时有改动"
498+ ],
499+ "queued_strength": 1.0,
500+ "polarity": 1,
501+ "queued_step": 200,
502+ "last_applied_step": 201,
503+ "applied_tokens": [
504+ "选作课文时有改动"
505+ ],
506+ "phi_delta": 0.0544,
507+ "mu_delta": 0.0748,
508+ "flow_delta": 0.0,
509+ "stage_after": {
510+ "选作课文时有改动": "ability_core"
511+ },
512+ "bound_ability_core": "选作课文时有改动"
513+ },
514+ "phi_range": {
515+ "min": 0.0037,
516+ "max": 5.5287
517+ }
518+ },
519+ "surfaced_tokens_or_phrases": {
520+ "emit_output": "minimal: 选作课文时有改动",
521+ "active_region": [
522+ "选作课文时有改动",
523+ "王老师急得直跺脚",
524+ "词句段运用",
525+ "妈妈累得"
526+ ],
527+ "phi_top_nodes": [
528+ "选作课文时有改动",
529+ "小事",
530+ "一",
531+ "读下面的句子",
532+ "我们的身边每天都在发生各种各样的"
533+ ],
534+ "mu_top_nodes": [
535+ "选作课文时有改动",
536+ "王老师急得直跺脚",
537+ "词句段运用",
538+ "妈妈累得",
539+ "他跑得像兔子一样快"
540+ ],
541+ "top_flow_edges": [
542+ "小事->我们的身边每天都在发生各种各样的",
543+ "它的声音小得几乎听不见了->王老师急得直跺脚",
544+ "身边的->小事",
545+ "最近我一直在想->我们班能不能开辟一个植物角呢",
546+ "妈妈累得->得"
547+ ]
548+ },
549+ "fairness_note": "Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['选作课文时有改动', '王老师急得直跺脚', '词句段运用'] and emit_output='minimal: 选作课文时有改动'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser.",
550+ "metric_alignment_notes": [
551+ {
552+ "metric": "attention_used_total",
553+ "status": "N/A",
554+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
555+ },
556+ {
557+ "metric": "activated_output_payload",
558+ "status": "STRUCTURAL MISMATCH",
559+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
560+ },
561+ {
562+ "metric": "bigram_emergent_words_and_circuits",
563+ "status": "STRUCTURAL MISMATCH",
564+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
565+ }
566+ ],
567+ "stability_checks": {
568+ "all_finite": true,
569+ "within_branch_b_stability_ceiling": true,
570+ "phi_range": {
571+ "min": 0.0037,
572+ "max": 5.5287
573+ }
574+ }
575+ },
576+ {
577+ "scenario_name": "小学数学_pipeline+stability",
578+ "status": "PASS",
579+ "reason": "Required split ran end-to-end on Branch A and stayed within the mirrored stability ceiling, but output_mode remained minimal with free_capacity=0.0.",
580+ "input_files": [
581+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl"
582+ ],
583+ "input_files_fully_hydrated": true,
584+ "input_line_counts": [
585+ 7459
586+ ],
587+ "record_counts": {
588+ "available_content_records_seen_before_cap": 300,
589+ "available_valid_text_records_seen_before_cap": 300,
590+ "loaded_records_per_input": [
591+ 300
592+ ],
593+ "fed_records": 200,
594+ "feedback_target": "1",
595+ "feedback_applied": true
596+ },
597+ "branch_b_reference": {
598+ "status": "PASS",
599+ "summary": "nodes=803, edges=4367, phi=[-8.300,0.579], mode=full, words=多少/一共/什么/下面/可以",
600+ "top_words": [
601+ "多少",
602+ "一共",
603+ "什么",
604+ "下面",
605+ "可以"
606+ ]
607+ },
608+ "branch_a_observables": {
609+ "phi_summary": {
610+ "node_count": 413,
611+ "total_potential": 67.5523,
612+ "top_nodes": [
613+ {
614+ "node": "1",
615+ "value": 7.7346
616+ },
617+ {
618+ "node": "2",
619+ "value": 3.3908
620+ },
621+ {
622+ "node": "4",
623+ "value": 3.3128
624+ },
625+ {
626+ "node": "6",
627+ "value": 2.2611
628+ },
629+ {
630+ "node": "3",
631+ "value": 2.1415
632+ }
633+ ]
634+ },
635+ "mu_summary": {
636+ "active_count": 35,
637+ "total_activation": 8.7279,
638+ "top_nodes": [
639+ {
640+ "node": "1",
641+ "value": 2.5055
642+ },
643+ {
644+ "node": "个",
645+ "value": 0.6537
646+ },
647+ {
648+ "node": "分一分",
649+ "value": 0.4046
650+ },
651+ {
652+ "node": "算一算",
653+ "value": 0.3478
654+ },
655+ {
656+ "node": "涂色部分占总数的几分之几",
657+ "value": 0.3204
658+ }
659+ ]
660+ },
661+ "J_summary": {
662+ "edge_count": 628,
663+ "total_flow": 26.701,
664+ "top_flows": [
665+ {
666+ "edge": "1->9",
667+ "flow": 0.3294
668+ },
669+ {
670+ "edge": "8->1",
671+ "flow": 0.3111
672+ },
673+ {
674+ "edge": "1->6",
675+ "flow": 0.2775
676+ },
677+ {
678+ "edge": "分析与解答->回顾与反思",
679+ "flow": 0.2706
680+ },
681+ {
682+ "edge": "涂色表示下面的分数->分一分",
683+ "flow": 0.2702
684+ }
685+ ]
686+ },
687+ "active_region": [
688+ "1",
689+ "个",
690+ "分一分",
691+ "算一算"
692+ ],
693+ "active_region_size": 4,
694+ "bound_ability_core": "1",
695+ "anchor_pull": 0.0,
696+ "drift_score": 1.0,
697+ "free_capacity": 0.0,
698+ "experience_regions": [
699+ {
700+ "region": "1",
701+ "nodes": [
702+ "101",
703+ "150",
704+ "16",
705+ "226",
706+ "24",
707+ "37",
708+ "48",
709+ "550",
710+ "594",
711+ "700",
712+ "一个苹果约重220",
713+ "一个西瓜",
714+ "一共有",
715+ "一块菜地的种白菜",
716+ "一年级一共需要多少个座位",
717+ "三",
718+ "上衣比裤子贵多少元",
719+ "下面是三",
720+ "下面是我国古代的计时工具",
721+ "个十",
722+ "个十在十位写",
723+ "二年级有",
724+ "人去划船",
725+ "以内所有的进位加法算式并进行整理",
726+ "作品展示",
727+ "你能用这样的数计算吗",
728+ "例如",
729+ "倍",
730+ "做一做",
731+ "元",
732+ "先用摆一摆",
733+ "其中是女生",
734+ "再比较每组分数的大小",
735+ "分一分",
736+ "分数在我国很早以前就有了",
737+ "分数的初步认识",
738+ "分析与解答",
739+ "分米",
740+ "分米长的彩条平均分成",
741+ "剩下的种芹菜",
742+ "千米是",
743+ "印度出现了和我国相似的分数表示法",
744+ "厘米",
745+ "厘米是",
746+ "厘米里的每一个小格的长度是",
747+ "口算乘法",
748+ "可以看作",
749+ "后来",
750+ "哥哥吃了",
751+ "圈是",
752+ "在图中画出一个最大的正方形",
753+ "在空格里填上合适的算式",
754+ "多位数乘一位数",
755+ "多长时间",
756+ "如果每条船都坐满",
757+ "学校新买来",
758+ "小亮踢了3个",
759+ "小军说今天卖出的雪糕不到",
760+ "小时是几时",
761+ "小格的时间是",
762+ "小红踢的个数是小亮的几倍",
763+ "就把手举起来",
764+ "并写出钟表上的时间",
765+ "并比较每组分数的大小",
766+ "并验算",
767+ "弟弟吃了",
768+ "想",
769+ "我吃",
770+ "把",
771+ "拿一张正方形纸折一折",
772+ "按要求画线段",
773+ "按顺序填数",
774+ "捆成一捆",
775+ "数学书的厚度不到",
776+ "整理和复习",
777+ "是一个图形的",
778+ "是男生",
779+ "最初分数的表示法跟现在不一样",
780+ "有10",
781+ "条",
782+ "核对一下",
783+ "根小棒",
784+ "桃的个数是香蕉的",
785+ "比一比",
786+ "比少",
787+ "毫米",
788+ "没有涂色的部分",
789+ "涂色表示下面的分数",
790+ "涂色部分",
791+ "涂色部分占总数的几分之几",
792+ "涂色部分是整个图形的几分之几",
793+ "爸爸今年多少岁",
794+ "爸爸的年龄是她的6倍",
795+ "状",
796+ "王奕飞三",
797+ "班有29人去参观",
798+ "班黑板报的布局",
799+ "用分数表示下面各图的涂色部分和没有涂色的部分",
800+ "用分数表示涂色部分",
801+ "画一条长",
802+ "画一画",
803+ "看图写出分数",
804+ "看图计算",
805+ "秒针走",
806+ "等于3",
807+ "算一算",
808+ "米",
809+ "表示成",
810+ "要用数字卡片摆出两个三位数",
811+ "要解决的问题是什么",
812+ "计算下面各题",
813+ "说一说乘的顺序",
814+ "说一说左边的加法",
815+ "说一说晶晶是怎样整理的",
816+ "说一说表里的算式是怎样排列的",
817+ "请你在方格纸上画出来",
818+ "过",
819+ "这两所初中的学生同时去这家影院看电影",
820+ "这个图形可能是什么形",
821+ "选择合适的方法解决下面的问题",
822+ "通常",
823+ "里填上合适的单位",
824+ "问题是什么",
825+ "阅读与理解",
826+ "马拉松长跑比赛全程约42"
827+ ],
828+ "stage": "ability_core",
829+ "activation": 3.1083,
830+ "potential": 21.549,
831+ "candidate_score": 362.5028,
832+ "stable_steps": 184
833+ },
834+ {
835+ "region": "4",
836+ "nodes": [
837+ "0",
838+ "10",
839+ "12",
840+ "13",
841+ "14",
842+ "20",
843+ "21",
844+ "6",
845+ "62",
846+ "6雨燕每小时飞行的距离是野兔每小时奔跑距离的",
847+ "7",
848+ "8",
849+ "9",
850+ "一",
851+ "一个长方形的宽是",
852+ "万以内的加法和减法",
853+ "个",
854+ "个拼成的",
855+ "个相同的",
856+ "人",
857+ "从左边数",
858+ "任意指一道算式",
859+ "份",
860+ "再写算式",
861+ "减1",
862+ "只",
863+ "哪些是用",
864+ "场电影",
865+ "幼儿园买了",
866+ "快速地说出得数",
867+ "或",
868+ "把一张正方形纸折成同样大的",
869+ "摆",
870+ "教室里扫地的有",
871+ "有",
872+ "有几种分法",
873+ "朵",
874+ "条蚕共吐丝约多少米",
875+ "根摆一摆",
876+ "每天放映",
877+ "用",
878+ "第一行的",
879+ "读作",
880+ "辆",
881+ "钱是跳棋的",
882+ "长方形的周长",
883+ "麋鹿都属于哺乳类动物"
884+ ],
885+ "stage": "ability_core",
886+ "activation": 1.5611,
887+ "potential": 13.8429,
888+ "candidate_score": 167.7031,
889+ "stable_steps": 190
890+ },
891+ {
892+ "region": "2",
893+ "nodes": [
894+ "122",
895+ "246",
896+ "32",
897+ "33",
898+ "432",
899+ "④星期一的前一天",
900+ "一共有多少支彩笔",
901+ "一头犀牛约重3",
902+ "一瓶矿泉水",
903+ "一辆卡车载质量",
904+ "上衣和裤子一共多少元",
905+ "二年级一共需要多少个座位",
906+ "什么这样剪",
907+ "他说的对吗",
908+ "你从表中发现了哪些有趣的排列",
909+ "你是怎样想的",
910+ "你还能提出其他数学问题并解答吗",
911+ "元可以买哪些商品",
912+ "光明小学各年级捐赠图书情况统计表",
913+ "再把余下的算式填出来",
914+ "减法",
915+ "剩下的图形是一个长方形",
916+ "去年爸爸的年龄是小丽的几倍",
917+ "取出它的",
918+ "只小鸟圈起来",
919+ "可以怎样租船",
920+ "吨",
921+ "和同学交流一下",
922+ "如果租一条大船",
923+ "小明踢的个数是小亮的2倍",
924+ "把左边4",
925+ "排第",
926+ "是怎样排列的",
927+ "用自己的方式表示1",
928+ "练习十一",
929+ "语文书厚约6",
930+ "选中你要买的商品"
931+ ],
932+ "stage": "skill_belt",
933+ "activation": 0.0869,
934+ "potential": 2.1459,
935+ "candidate_score": 108.7044,
936+ "stable_steps": 191
937+ },
938+ {
939+ "region": "3",
940+ "nodes": [
941+ "02",
942+ "一辆出租车每天大约行驶300",
943+ "个碗用了",
944+ "个铁环连在一起有多长",
945+ "减号",
946+ "刘叔叔每小时检测",
947+ "只小鸟涂上颜色",
948+ "填数",
949+ "妈妈买",
950+ "把一个圆平均分成",
951+ "按",
952+ "时",
953+ "看图说一说算式表示的意思",
954+ "看谁搭得又稳又高",
955+ "要用所有的积木搭",
956+ "计算最后一行算式",
957+ "计算第一列算式",
958+ "过街天桥长约30",
959+ "这",
960+ "里可以填几",
961+ "长是宽的"
962+ ],
963+ "stage": "skill_belt",
964+ "activation": 0.0,
965+ "potential": 0.9822,
966+ "candidate_score": 63.1808,
967+ "stable_steps": 187
968+ },
969+ {
970+ "region": "从现在开始",
971+ "nodes": [
972+ "1",
973+ "2",
974+ "3",
975+ "4",
976+ "5",
977+ "从第",
978+ "再数一数其他事物",
979+ "右说一说",
980+ "填一填",
981+ "把左边的4",
982+ "有几种放法",
983+ "看数涂色",
984+ "贴一贴"
985+ ],
986+ "stage": "ability_core",
987+ "activation": 3.1934,
988+ "potential": 18.5158,
989+ "candidate_score": 53.7914,
990+ "stable_steps": 194
991+ },
992+ {
993+ "region": "10",
994+ "nodes": [
995+ "⑦",
996+ "个一",
997+ "你知道吗",
998+ "使每条线上的三个数相加都得",
999+ "先圈出",
1000+ "先用手势表示出",
1001+ "再填数",
1002+ "千克",
1003+ "厘米的长度",
1004+ "哪两个数相加得",
1005+ "指出",
1006+ "摆一摆",
1007+ "根",
1008+ "的认识和加减法5",
1009+ "秒",
1010+ "袋重多少千克"
1011+ ],
1012+ "stage": "skill_belt",
1013+ "activation": 0.0,
1014+ "potential": 0.3781,
1015+ "candidate_score": 50.7624,
1016+ "stable_steps": 137
1017+ }
1018+ ],
1019+ "experience_regions_count": 6,
1020+ "skill_belt_candidates": [
1021+ {
1022+ "node": "1",
1023+ "score": 5.24,
1024+ "stage": "ability_core",
1025+ "flow": 9.2436,
1026+ "stable_steps": 193,
1027+ "touches": 242,
1028+ "target_core": "从现在开始"
1029+ },
1030+ {
1031+ "node": "4",
1032+ "score": 5.118,
1033+ "stage": "ability_core",
1034+ "flow": 1.9832,
1035+ "stable_steps": 194,
1036+ "touches": 207,
1037+ "target_core": "从现在开始"
1038+ },
1039+ {
1040+ "node": "2",
1041+ "score": 5.0927,
1042+ "stage": "ability_core",
1043+ "flow": 1.7723,
1044+ "stable_steps": 192,
1045+ "touches": 234,
1046+ "target_core": "从现在开始"
1047+ },
1048+ {
1049+ "node": "6",
1050+ "score": 5.0228,
1051+ "stage": "ability_core",
1052+ "flow": 1.6829,
1053+ "stable_steps": 172,
1054+ "touches": 171,
1055+ "target_core": "4"
1056+ },
1057+ {
1058+ "node": "个",
1059+ "score": 4.9921,
1060+ "stage": "ability_core",
1061+ "flow": 1.764,
1062+ "stable_steps": 165,
1063+ "touches": 33,
1064+ "target_core": "4"
1065+ },
1066+ {
1067+ "node": "3",
1068+ "score": 4.9615,
1069+ "stage": "ability_core",
1070+ "flow": 1.2516,
1071+ "stable_steps": 191,
1072+ "touches": 204,
1073+ "target_core": "从现在开始"
1074+ }
1075+ ],
1076+ "skill_belt_candidates_count": 6,
1077+ "sedimentation_trace_count": 20,
1078+ "merge_events_count": 12,
1079+ "decay_events_count": 24,
1080+ "output_mode": "minimal",
1081+ "feedback_effect": {
1082+ "source": "feedback",
1083+ "mode": "feedback",
1084+ "queued_tokens": [
1085+ "1"
1086+ ],
1087+ "queued_strength": 1.0,
1088+ "polarity": 1,
1089+ "queued_step": 200,
1090+ "last_applied_step": 201,
1091+ "applied_tokens": [
1092+ "1"
1093+ ],
1094+ "phi_delta": 0.0544,
1095+ "mu_delta": 0.0748,
1096+ "flow_delta": 0.0,
1097+ "stage_after": {
1098+ "1": "ability_core"
1099+ },
1100+ "bound_ability_core": "1"
1101+ },
1102+ "phi_range": {
1103+ "min": 0.0017,
1104+ "max": 7.7346
1105+ }
1106+ },
1107+ "surfaced_tokens_or_phrases": {
1108+ "emit_output": "minimal: 1",
1109+ "active_region": [
1110+ "1",
1111+ "个",
1112+ "分一分",
1113+ "算一算"
1114+ ],
1115+ "phi_top_nodes": [
1116+ "1",
1117+ "2",
1118+ "4",
1119+ "6",
1120+ "3"
1121+ ],
1122+ "mu_top_nodes": [
1123+ "1",
1124+ "个",
1125+ "分一分",
1126+ "算一算",
1127+ "涂色部分占总数的几分之几"
1128+ ],
1129+ "top_flow_edges": [
1130+ "1->9",
1131+ "8->1",
1132+ "1->6",
1133+ "分析与解答->回顾与反思",
1134+ "涂色表示下面的分数->分一分"
1135+ ]
1136+ },
1137+ "fairness_note": "Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['1', '个', '分一分'] and emit_output='minimal: 1'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser.",
1138+ "metric_alignment_notes": [
1139+ {
1140+ "metric": "attention_used_total",
1141+ "status": "N/A",
1142+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
1143+ },
1144+ {
1145+ "metric": "activated_output_payload",
1146+ "status": "STRUCTURAL MISMATCH",
1147+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
1148+ },
1149+ {
1150+ "metric": "bigram_emergent_words_and_circuits",
1151+ "status": "STRUCTURAL MISMATCH",
1152+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
1153+ }
1154+ ],
1155+ "stability_checks": {
1156+ "all_finite": true,
1157+ "within_branch_b_stability_ceiling": true,
1158+ "phi_range": {
1159+ "min": 0.0017,
1160+ "max": 7.7346
1161+ }
1162+ }
1163+ },
1164+ {
1165+ "scenario_name": "初中语文_pipeline+stability",
1166+ "status": "PASS",
1167+ "reason": "Required split ran end-to-end on Branch A and stayed within the mirrored stability ceiling, but output_mode remained minimal with free_capacity=0.0.",
1168+ "input_files": [
1169+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl"
1170+ ],
1171+ "input_files_fully_hydrated": true,
1172+ "input_line_counts": [
1173+ 961
1174+ ],
1175+ "record_counts": {
1176+ "available_content_records_seen_before_cap": 300,
1177+ "available_valid_text_records_seen_before_cap": 300,
1178+ "loaded_records_per_input": [
1179+ 300
1180+ ],
1181+ "fed_records": 200,
1182+ "feedback_target": "①",
1183+ "feedback_applied": true
1184+ },
1185+ "branch_b_reference": {
1186+ "status": "PASS",
1187+ "summary": "nodes=1702, edges=9372, phi=[-0.085,0.553], mode=full, words=阅读/单元/读第/我们/第一",
1188+ "top_words": [
1189+ "阅读",
1190+ "单元",
1191+ "读第",
1192+ "我们",
1193+ "第一"
1194+ ]
1195+ },
1196+ "branch_a_observables": {
1197+ "phi_summary": {
1198+ "node_count": 337,
1199+ "total_potential": 56.0401,
1200+ "top_nodes": [
1201+ {
1202+ "node": "①",
1203+ "value": 7.7871
1204+ },
1205+ {
1206+ "node": "阅读",
1207+ "value": 2.607
1208+ },
1209+ {
1210+ "node": "阅读第二单元",
1211+ "value": 2.0385
1212+ },
1213+ {
1214+ "node": "阅读第一单元",
1215+ "value": 1.1916
1216+ },
1217+ {
1218+ "node": "综合性学习",
1219+ "value": 0.8132
1220+ }
1221+ ]
1222+ },
1223+ "mu_summary": {
1224+ "active_count": 39,
1225+ "total_activation": 9.4318,
1226+ "top_nodes": [
1227+ {
1228+ "node": "①",
1229+ "value": 2.6202
1230+ },
1231+ {
1232+ "node": "综合性学习",
1233+ "value": 0.5089
1234+ },
1235+ {
1236+ "node": "保天下者",
1237+ "value": 0.4289
1238+ },
1239+ {
1240+ "node": "昭公四年",
1241+ "value": 0.3489
1242+ },
1243+ {
1244+ "node": "左传",
1245+ "value": 0.3375
1246+ }
1247+ ]
1248+ },
1249+ "J_summary": {
1250+ "edge_count": 380,
1251+ "total_flow": 28.3727,
1252+ "top_flows": [
1253+ {
1254+ "edge": "①->选自",
1255+ "flow": 0.5294
1256+ },
1257+ {
1258+ "edge": "阅读->①",
1259+ "flow": 0.3536
1260+ },
1261+ {
1262+ "edge": "①->预习",
1263+ "flow": 0.3179
1264+ },
1265+ {
1266+ "edge": "木兰诗->①",
1267+ "flow": 0.2967
1268+ },
1269+ {
1270+ "edge": "阅读第二单元->①",
1271+ "flow": 0.2951
1272+ }
1273+ ]
1274+ },
1275+ "active_region": [
1276+ "①",
1277+ "综合性学习",
1278+ "保天下者",
1279+ "昭公四年"
1280+ ],
1281+ "active_region_size": 4,
1282+ "bound_ability_core": "①",
1283+ "anchor_pull": 0.0,
1284+ "drift_score": 1.0,
1285+ "free_capacity": 0.0,
1286+ "experience_regions": [
1287+ {
1288+ "region": "①",
1289+ "nodes": [
1290+ "15",
1291+ "1936",
1292+ "2",
1293+ "3",
1294+ "6",
1295+ "7",
1296+ "8",
1297+ "②",
1298+ "③",
1299+ "一",
1300+ "一年之计在于春",
1301+ "一年到头",
1302+ "一调芭蕉扇",
1303+ "万里赴戎机",
1304+ "与友期行",
1305+ "东流",
1306+ "也烧着了",
1307+ "乡书何处达",
1308+ "了",
1309+ "人们就经常讨论",
1310+ "人有恒言",
1311+ "从百草园到三味书屋",
1312+ "保天下者",
1313+ "关山",
1314+ "其一",
1315+ "其二",
1316+ "具体化",
1317+ "再塑生命的人",
1318+ "再走一步",
1319+ "写作",
1320+ "写作第二单元",
1321+ "冰心",
1322+ "列子",
1323+ "刘湛秋",
1324+ "刘禹锡",
1325+ "刚起头儿",
1326+ "劳动的声音从这头响到那头",
1327+ "匹夫之贱与有责焉耳矣",
1328+ "十二章",
1329+ "印珂勒惠支",
1330+ "原野到处有一种鸣叫",
1331+ "叙述了一个传奇的故事",
1332+ "吹面不寒杨柳风",
1333+ "吾十有五",
1334+ "四人帮",
1335+ "回忆鲁迅先生",
1336+ "在两千多年前的战国时期",
1337+ "在翻滚着一种红熟",
1338+ "在那稻棵上我捉过蚱蜢",
1339+ "地",
1340+ "多次被改编为戏曲",
1341+ "天上的街市",
1342+ "天空清亮透明",
1343+ "好",
1344+ "孙权劝学",
1345+ "孙行者",
1346+ "孟子说",
1347+ "小伙伴",
1348+ "小侣",
1349+ "屋子里的家具什物",
1350+ "岑参",
1351+ "峨眉山月歌",
1352+ "左传",
1353+ "归雁洛阳边",
1354+ "当我记起故乡的时候",
1355+ "形象化",
1356+ "思考探究",
1357+ "愁",
1358+ "我也怅然",
1359+ "我便能看见那大地的深层",
1360+ "我有好几",
1361+ "批判围攻的时候",
1362+ "抒发个人报国之志",
1363+ "搜集一些爱国诗词",
1364+ "昭公四年",
1365+ "是一个古老的话题",
1366+ "是一首叙事诗",
1367+ "是用比喻将",
1368+ "暴",
1369+ "曲肱",
1370+ "最好的工作是捉了苍蝇喂蚂蚁",
1371+ "最振奋人心的旋律",
1372+ "朝花夕拾",
1373+ "木兰诗",
1374+ "朵月季花真",
1375+ "李白",
1376+ "杞人忧天",
1377+ "梳理课文的故事情节",
1378+ "植树的牧羊人",
1379+ "止",
1380+ "正的水鸭长得大相径庭",
1381+ "死生以之",
1382+ "泰戈尔",
1383+ "海伦",
1384+ "漂亮啊",
1385+ "爱国情怀成为这些诗作最感动人",
1386+ "猫",
1387+ "田垄里埋葬过我的欢笑",
1388+ "电影等艺术形式",
1389+ "的时候",
1390+ "的画",
1391+ "看哪些地方叙述得详细",
1392+ "碎首黄尘燕然勒功",
1393+ "秃鹰",
1394+ "秋天",
1395+ "秋思",
1396+ "积累拓展",
1397+ "空气里都有一种欢喜的声音",
1398+ "综合性学习",
1399+ "绿萍",
1400+ "而志于学",
1401+ "臧克家",
1402+ "至今热血犹殷红",
1403+ "节选",
1404+ "节选自",
1405+ "苟利社稷",
1406+ "莫怀戚",
1407+ "莫顿",
1408+ "蚊子和狮子",
1409+ "蜕",
1410+ "行军九日思长安故园",
1411+ "表达对国家命运的牵挂",
1412+ "西南联大",
1413+ "觉得那时的赛会",
1414+ "让",
1415+ "诸葛亮",
1416+ "读读写写",
1417+ "谁是最可爱的人",
1418+ "资治通鉴",
1419+ "赞颂爱国历史人物",
1420+ "起",
1421+ "还是在二次战役",
1422+ "这位官员的运气并不比头一位钦差大臣",
1423+ "这几",
1424+ "这种声音已经和我的心取得了永远的沟通",
1425+ "选自",
1426+ "选自组诗",
1427+ "邓稼先",
1428+ "邓稼先与奥本海默",
1429+ "那里",
1430+ "郭沫若",
1431+ "金色花",
1432+ "银线似的蛛丝在牛角上挂着",
1433+ "闻王昌龄左迁龙标遥有此寄",
1434+ "阅读第三单元",
1435+ "阅读第二单元",
1436+ "阅读第五单元",
1437+ "阅读第六单元",
1438+ "阅读第四单元",
1439+ "陆游",
1440+ "陈太丘",
1441+ "陶庵梦忆",
1442+ "雨的四季",
1443+ "预习",
1444+ "预警",
1445+ "饮水",
1446+ "马致远",
1447+ "马革裹尸",
1448+ "魏巍"
1449+ ],
1450+ "stage": "ability_core",
1451+ "activation": 6.0995,
1452+ "potential": 30.1232,
1453+ "candidate_score": 479.7935,
1454+ "stable_steps": 197
1455+ },
1456+ {
1457+ "region": "②",
1458+ "nodes": [
1459+ "一屠",
1460+ "三十而立",
1461+ "东方朔",
1462+ "以制作宫廷菜而闻名",
1463+ "吊古战场文",
1464+ "哪里",
1465+ "四十而不惑",
1466+ "小桥流水人家",
1467+ "度若飞",
1468+ "我心里怀着挚痛",
1469+ "我父亲诞生于",
1470+ "所寄",
1471+ "指将士战死于战场",
1472+ "晚归",
1473+ "朔气传金柝",
1474+ "期日中",
1475+ "杨花",
1476+ "枯藤老树昏鸦",
1477+ "的热",
1478+ "而枕之",
1479+ "落尽子规",
1480+ "豌豆黄儿",
1481+ "身亡",
1482+ "陈太丘与友期行"
1483+ ],
1484+ "stage": "skill_belt",
1485+ "activation": 0.0,
1486+ "potential": 0.79,
1487+ "candidate_score": 69.9026,
1488+ "stable_steps": 18
1489+ },
1490+ {
1491+ "region": "阅读",
1492+ "nodes": [
1493+ "10",
1494+ "9",
1495+ "不要掉队呀",
1496+ "二孔子及其弟子在学习态度和学习方法上有哪些观点",
1497+ "子曰",
1498+ "我想他们此刻",
1499+ "批再创造一批吗",
1500+ "杯",
1501+ "端木蕻良",
1502+ "第一流的战士",
1503+ "第三单元",
1504+ "这个营的营长向我叙说了以上的情形",
1505+ "雨似的窜出来",
1506+ "饭吃"
1507+ ],
1508+ "stage": "skill_belt",
1509+ "activation": 0.0,
1510+ "potential": 1.5705,
1511+ "candidate_score": 43.7037,
1512+ "stable_steps": 199
1513+ },
1514+ {
1515+ "region": "预习",
1516+ "nodes": [
1517+ "1950",
1518+ "一个孤独的农夫",
1519+ "中国是诗的国度",
1520+ "乔诺",
1521+ "你以前一定读过不少寓言故事",
1522+ "你知道",
1523+ "古人往往在家信中寄语子女弟侄",
1524+ "古代诗歌四首",
1525+ "寓言四则",
1526+ "很多同学都知道老子",
1527+ "无论想象多么荒诞",
1528+ "木兰从军的故事千百年来广为传颂",
1529+ "课文说的是一家人散步的琐事"
1530+ ],
1531+ "stage": "skill_belt",
1532+ "activation": 0.0,
1533+ "potential": 0.4108,
1534+ "candidate_score": 38.0193,
1535+ "stable_steps": 37
1536+ },
1537+ {
1538+ "region": "1",
1539+ "nodes": [
1540+ "三理解下列句中加点词的含义",
1541+ "三解释下列句中加点的词",
1542+ "二说说你对以下两句话的理解",
1543+ "人不知而不愠",
1544+ "回答下列问题",
1545+ "在那",
1546+ "完成练习",
1547+ "布置文学角",
1548+ "所谓的",
1549+ "排比",
1550+ "注意其中加点的词",
1551+ "狼不敢前",
1552+ "这一段描写景物",
1553+ "选择教室中的一角作为文学角"
1554+ ],
1555+ "stage": "skill_belt",
1556+ "activation": 0.0,
1557+ "potential": 0.5758,
1558+ "candidate_score": 31.4674,
1559+ "stable_steps": 6
1560+ },
1561+ {
1562+ "region": "综合性学习",
1563+ "nodes": [
1564+ "157",
1565+ "5",
1566+ "二",
1567+ "但不能算是专门学者",
1568+ "天下国家",
1569+ "少年正是读书时",
1570+ "山",
1571+ "我的语文生活",
1572+ "文学部落",
1573+ "有朋自远方来",
1574+ "爱国名言小窗口",
1575+ "讨论时",
1576+ "附录"
1577+ ],
1578+ "stage": "skill_belt",
1579+ "activation": 0.5635,
1580+ "potential": 1.126,
1581+ "candidate_score": 29.1496,
1582+ "stable_steps": 4
1583+ }
1584+ ],
1585+ "experience_regions_count": 6,
1586+ "skill_belt_candidates": [
1587+ {
1588+ "node": "①",
1589+ "score": 5.24,
1590+ "stage": "ability_core",
1591+ "flow": 9.3468,
1592+ "stable_steps": 197,
1593+ "touches": 236,
1594+ "target_core": "2"
1595+ },
1596+ {
1597+ "node": "阅读",
1598+ "score": 5.0939,
1599+ "stage": "ability_core",
1600+ "flow": 2.0445,
1601+ "stable_steps": 201,
1602+ "touches": 123,
1603+ "target_core": "朱自清"
1604+ },
1605+ {
1606+ "node": "阅读第二单元",
1607+ "score": 4.9971,
1608+ "stage": "ability_core",
1609+ "flow": 1.6171,
1610+ "stable_steps": 177,
1611+ "touches": 38,
1612+ "target_core": "①"
1613+ },
1614+ {
1615+ "node": "阅读第一单元",
1616+ "score": 4.8305,
1617+ "stage": "ability_core",
1618+ "flow": 0.793,
1619+ "stable_steps": 197,
1620+ "touches": 56,
1621+ "target_core": "春"
1622+ },
1623+ {
1624+ "node": "了",
1625+ "score": 4.7835,
1626+ "stage": "ability_core",
1627+ "flow": 0.7232,
1628+ "stable_steps": 45,
1629+ "touches": 9,
1630+ "target_core": "①"
1631+ },
1632+ {
1633+ "node": "选自",
1634+ "score": 4.7397,
1635+ "stage": "ability_core",
1636+ "flow": 0.7533,
1637+ "stable_steps": 197,
1638+ "touches": 45,
1639+ "target_core": "①"
1640+ }
1641+ ],
1642+ "skill_belt_candidates_count": 6,
1643+ "sedimentation_trace_count": 20,
1644+ "merge_events_count": 12,
1645+ "decay_events_count": 24,
1646+ "output_mode": "minimal",
1647+ "feedback_effect": {
1648+ "source": "feedback",
1649+ "mode": "feedback",
1650+ "queued_tokens": [
1651+ "①"
1652+ ],
1653+ "queued_strength": 1.0,
1654+ "polarity": 1,
1655+ "queued_step": 200,
1656+ "last_applied_step": 201,
1657+ "applied_tokens": [
1658+ "①"
1659+ ],
1660+ "phi_delta": 0.0544,
1661+ "mu_delta": 0.0748,
1662+ "flow_delta": 0.0,
1663+ "stage_after": {
1664+ "①": "ability_core"
1665+ },
1666+ "bound_ability_core": "①"
1667+ },
1668+ "phi_range": {
1669+ "min": 0.0003,
1670+ "max": 7.7871
1671+ }
1672+ },
1673+ "surfaced_tokens_or_phrases": {
1674+ "emit_output": "minimal: ①",
1675+ "active_region": [
1676+ "①",
1677+ "综合性学习",
1678+ "保天下者",
1679+ "昭公四年"
1680+ ],
1681+ "phi_top_nodes": [
1682+ "①",
1683+ "阅读",
1684+ "阅读第二单元",
1685+ "阅读第一单元",
1686+ "综合性学习"
1687+ ],
1688+ "mu_top_nodes": [
1689+ "①",
1690+ "综合性学习",
1691+ "保天下者",
1692+ "昭公四年",
1693+ "左传"
1694+ ],
1695+ "top_flow_edges": [
1696+ "①->选自",
1697+ "阅读->①",
1698+ "①->预习",
1699+ "木兰诗->①",
1700+ "阅读第二单元->①"
1701+ ]
1702+ },
1703+ "fairness_note": "Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['①', '综合性学习', '保天下者'] and emit_output='minimal: ①'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser.",
1704+ "metric_alignment_notes": [
1705+ {
1706+ "metric": "attention_used_total",
1707+ "status": "N/A",
1708+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
1709+ },
1710+ {
1711+ "metric": "activated_output_payload",
1712+ "status": "STRUCTURAL MISMATCH",
1713+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
1714+ },
1715+ {
1716+ "metric": "bigram_emergent_words_and_circuits",
1717+ "status": "STRUCTURAL MISMATCH",
1718+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
1719+ }
1720+ ],
1721+ "stability_checks": {
1722+ "all_finite": true,
1723+ "within_branch_b_stability_ceiling": true,
1724+ "phi_range": {
1725+ "min": 0.0003,
1726+ "max": 7.7871
1727+ }
1728+ }
1729+ },
1730+ {
1731+ "scenario_name": "初中数学_pipeline+stability",
1732+ "status": "FAIL",
1733+ "reason": "Required split ran, but Branch A exceeded the mirrored Branch B stability ceiling of ±10.1 with phi_range={'min': 0.0007, 'max': 12.0662}.",
1734+ "input_files": [
1735+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl"
1736+ ],
1737+ "input_files_fully_hydrated": true,
1738+ "input_line_counts": [
1739+ 5559
1740+ ],
1741+ "record_counts": {
1742+ "available_content_records_seen_before_cap": 300,
1743+ "available_valid_text_records_seen_before_cap": 300,
1744+ "loaded_records_per_input": [
1745+ 300
1746+ ],
1747+ "fed_records": 200,
1748+ "feedback_target": "1",
1749+ "feedback_applied": true
1750+ },
1751+ "branch_b_reference": {
1752+ "status": "PASS",
1753+ "summary": "nodes=886, edges=5437, phi=[-0.045,0.479], mode=full, words=方程/可以/图形/问题/我们",
1754+ "top_words": [
1755+ "方程",
1756+ "可以",
1757+ "图形",
1758+ "问题",
1759+ "我们"
1760+ ]
1761+ },
1762+ "branch_a_observables": {
1763+ "phi_summary": {
1764+ "node_count": 396,
1765+ "total_potential": 76.2717,
1766+ "top_nodes": [
1767+ {
1768+ "node": "1",
1769+ "value": 12.0662
1770+ },
1771+ {
1772+ "node": "2",
1773+ "value": 10.4091
1774+ },
1775+ {
1776+ "node": "0",
1777+ "value": 3.3822
1778+ },
1779+ {
1780+ "node": "4",
1781+ "value": 2.327
1782+ },
1783+ {
1784+ "node": "3",
1785+ "value": 2.3207
1786+ }
1787+ ]
1788+ },
1789+ "mu_summary": {
1790+ "active_count": 32,
1791+ "total_activation": 8.7265,
1792+ "top_nodes": [
1793+ {
1794+ "node": "1",
1795+ "value": 2.6586
1796+ },
1797+ {
1798+ "node": "2",
1799+ "value": 0.9244
1800+ },
1801+ {
1802+ "node": "进而从图象探索二次函数的性质",
1803+ "value": 0.4286
1804+ },
1805+ {
1806+ "node": "1",
1807+ "value": 0.3426
1808+ },
1809+ {
1810+ "node": "可以方便地画出二次函数的图象",
1811+ "value": 0.3346
1812+ }
1813+ ]
1814+ },
1815+ "J_summary": {
1816+ "edge_count": 629,
1817+ "total_flow": 32.5215,
1818+ "top_flows": [
1819+ {
1820+ "edge": "2->1",
1821+ "flow": 1.9984
1822+ },
1823+ {
1824+ "edge": "1->2",
1825+ "flow": 1.0096
1826+ },
1827+ {
1828+ "edge": "1->0",
1829+ "flow": 0.5938
1830+ },
1831+ {
1832+ "edge": "1->4",
1833+ "flow": 0.5608
1834+ },
1835+ {
1836+ "edge": "1->3",
1837+ "flow": 0.4126
1838+ }
1839+ ]
1840+ },
1841+ "active_region": [
1842+ "1",
1843+ "2",
1844+ "进而从图象探索二次函数的性质",
1845+ "1"
1846+ ],
1847+ "active_region_size": 4,
1848+ "bound_ability_core": "1",
1849+ "anchor_pull": 0.0,
1850+ "drift_score": 1.0,
1851+ "free_capacity": 0.0,
1852+ "experience_regions": [
1853+ {
1854+ "region": "1",
1855+ "nodes": [
1856+ "0",
1857+ "300",
1858+ "5",
1859+ "6",
1860+ "7",
1861+ "一个两位数的个位上的数是犪",
1862+ "一个矩形的长是宽的2倍",
1863+ "一元一次方程",
1864+ "一次项系数和常数项",
1865+ "一般地",
1866+ "一般是近似的",
1867+ "丢番图",
1868+ "两个角互为补角简称为两个角互补",
1869+ "中午上升了1",
1870+ "举出生活中一些可以看成直线",
1871+ "也可以代表其他数",
1872+ "习题2",
1873+ "二次函数",
1874+ "从二次函数狔",
1875+ "以前我们从一次函数的角度看一元一次方程",
1876+ "例如0",
1877+ "例1",
1878+ "其中",
1879+ "写出下列各数的倒数",
1880+ "写出这个矩形的面积关于宽的函数解析式",
1881+ "分数",
1882+ "分析",
1883+ "列式表示",
1884+ "利用等式的性质解下列方程",
1885+ "利用等式的性质解下列方程并检验",
1886+ "化简",
1887+ "北京冬季里某一天的气温为",
1888+ "北偏西3",
1889+ "半夜又下降了9",
1890+ "即写出这个一次函数的解析式狔",
1891+ "口算",
1892+ "可以方便地画出二次函数的图象",
1893+ "合并下列各式的同类项",
1894+ "图2",
1895+ "图4",
1896+ "在含有字母的式子中如果出现乘号",
1897+ "填空",
1898+ "复习一下全章的内容吧",
1899+ "复习题4",
1900+ "如图",
1901+ "如图1",
1902+ "如图1所示",
1903+ "山水市去年居民的人均可支配收入为",
1904+ "已知函数狔",
1905+ "并利用",
1906+ "并用刻度尺或圆规验证你的结论",
1907+ "得",
1908+ "我们可以利用二次函数的图象求一元二次方程的根",
1909+ "我们来看本章引言中的问题",
1910+ "把下面的有理数填在相应的大括号里",
1911+ "把3换成0",
1912+ "抛物线狔",
1913+ "探索二次函数的性质",
1914+ "支干和小分支的总数是9",
1915+ "本章知识结构图",
1916+ "本节我们从二次函数的角度看一元二次方程",
1917+ "李善兰",
1918+ "某人工作一年的报酬是年终给他一件衣服和",
1919+ "某种商品的价格是2元",
1920+ "正数大于0",
1921+ "每个小书包比大书包的进价少",
1922+ "每个支干长出多少小分支",
1923+ "比较你所学过的各种整式方程",
1924+ "犪",
1925+ "犪狓2",
1926+ "犫",
1927+ "犫狓",
1928+ "犮",
1929+ "犽狓",
1930+ "狓",
1931+ "狓狔2",
1932+ "狓1",
1933+ "狓2",
1934+ "狓7",
1935+ "狔",
1936+ "由上面的结论",
1937+ "由于作图或观察可能存在误差",
1938+ "由图象求得的根",
1939+ "由我们数组成的式子有确切的大小",
1940+ "确定一次函数",
1941+ "立体图形与平面图形",
1942+ "第二章整式的加减",
1943+ "第2题",
1944+ "第3题",
1945+ "算学启蒙",
1946+ "结果保留两位小数",
1947+ "角",
1948+ "解下列方程",
1949+ "计算",
1950+ "计算0",
1951+ "认识了一次函数与一元一次方程的联系",
1952+ "说出下列图形的名称",
1953+ "这三个数分别是多少",
1954+ "这类问题中常常把总工作量看作",
1955+ "进而从图象探索二次函数的性质",
1956+ "那么你认为下面的空格应填写什么数",
1957+ "队名比赛场次胜场负场积分前进",
1958+ "需求出犽",
1959+ "0",
1960+ "0元",
1961+ "0枚银币",
1962+ "0狋",
1963+ "0狌",
1964+ "1一元二次方程",
1965+ "1二次函数的图象和性质",
1966+ "12",
1967+ "16",
1968+ "2整式的加减",
1969+ "2有理数",
1970+ "2狓",
1971+ "2解一元二次方程",
1972+ "2题",
1973+ "3",
1974+ "3实际问题与一元二次方程",
1975+ "3有理数的加减法",
1976+ "3狓",
1977+ "4",
1978+ "4有理数的乘除法",
1979+ "42",
1980+ "5",
1981+ "5有理数的乘方",
1982+ "5狓",
1983+ "5狓2",
1984+ "5的各图中包含哪些简单平面图形",
1985+ "6",
1986+ "6时整",
1987+ "6狓",
1988+ "7",
1989+ "7犪",
1990+ "8",
1991+ "8狓",
1992+ "9",
1993+ "9是一些立体图形的展开图"
1994+ ],
1995+ "stage": "ability_core",
1996+ "activation": 3.7169,
1997+ "potential": 36.1571,
1998+ "candidate_score": 438.0445,
1999+ "stable_steps": 200
2000+ },
2001+ {
2002+ "region": "2",
2003+ "nodes": [
2004+ "20021204",
2005+ "一元二次方程有哪",
2006+ "下列整式中哪些是单项式",
2007+ "与抛物线狔",
2008+ "两个负数",
2009+ "义务教育数学课程标准",
2010+ "乘方",
2011+ "习题3",
2012+ "习题4",
2013+ "买一个篮球需要狓元",
2014+ "产生数1",
2015+ "你能写出这些方程的一般形式吗",
2016+ "你能结合图",
2017+ "使一年期存款的年利率由",
2018+ "例5的运算过程也可以简单地写为",
2019+ "其中9个格中的点数分别是1",
2020+ "南偏东6",
2021+ "即",
2022+ "圆柱体的底面半径",
2023+ "在一张普通的月历中",
2024+ "多项式狏",
2025+ "如图2",
2026+ "它们的积是正的还是负的",
2027+ "平均每天小李比小张多跑多少米",
2028+ "改为图",
2029+ "有理数",
2030+ "有理数的除法",
2031+ "某年",
2032+ "正数大于负数",
2033+ "点",
2034+ "犫2犪",
2035+ "犺",
2036+ "犽与狔",
2037+ "狀",
2038+ "狆",
2039+ "狌",
2040+ "用图4",
2041+ "用式子表示在这个月内销售这种商品的收入",
2042+ "用式子表示船在这条河中顺水行驶和逆水行驶时的速度",
2043+ "的形状",
2044+ "第2",
2045+ "第4题",
2046+ "等式的性质",
2047+ "解方程",
2048+ "这些函数的图象如图2",
2049+ "配方法",
2050+ "题",
2051+ "0分呢",
2052+ "0狓2",
2053+ "1的图象",
2054+ "12犪犺",
2055+ "12狓",
2056+ "15狓狔2",
2057+ "2二次函数与一元二次方程",
2058+ "2所示",
2059+ "29",
2060+ "3狓2狔",
2061+ "4狓",
2062+ "5犪",
2063+ "5的项是狏与",
2064+ "59",
2065+ "6的方式来表示射线犗犃"
2066+ ],
2067+ "stage": "skill_belt",
2068+ "activation": 0.0853,
2069+ "potential": 4.8106,
2070+ "candidate_score": 183.655,
2071+ "stable_steps": 199
2072+ },
2073+ {
2074+ "region": "书书书",
2075+ "nodes": [
2076+ "α",
2077+ "下面各数哪些是正数",
2078+ "为我们解决许多问题带来方便",
2079+ "主编的话",
2080+ "习题1",
2081+ "例如",
2082+ "几何学的起源",
2083+ "去括号",
2084+ "图1",
2085+ "成由点运动形成的",
2086+ "所以",
2087+ "方程③中未知数的个数和最高次数各是多少",
2088+ "种布料各买了多少米",
2089+ "第一章有理数",
2090+ "第三章一元一次方程",
2091+ "第二十一章一元二次方程",
2092+ "第二十二章二次函数",
2093+ "第8题",
2094+ "结果是仍在起点处",
2095+ "统计资料表明",
2096+ "西宁格尔木拉萨",
2097+ "解",
2098+ "这个问题说明",
2099+ "配方",
2100+ "1",
2101+ "1正数和负数",
2102+ "2"
2103+ ],
2104+ "stage": "ability_core",
2105+ "activation": 3.9121,
2106+ "potential": 24.0382,
2107+ "candidate_score": 90.7741,
2108+ "stable_steps": 200
2109+ },
2110+ {
2111+ "region": "0",
2112+ "nodes": [
2113+ "①",
2114+ "①中有一个未知数狓",
2115+ "一个角是7",
2116+ "列出方程狓6",
2117+ "图中的几个图形能否折叠成为棱柱",
2118+ "如果能",
2119+ "或",
2120+ "根据二次函数图象上三个点的坐标",
2121+ "每个小书包的盈利率为3",
2122+ "这个方程与我们学过的一元一次方程不同",
2123+ "0狓",
2124+ "00",
2125+ "13",
2126+ "2犪",
2127+ "30",
2128+ "50",
2129+ "51",
2130+ "70",
2131+ "9元",
2132+ "9狓2"
2133+ ],
2134+ "stage": "skill_belt",
2135+ "activation": 0.0,
2136+ "potential": 1.407,
2137+ "candidate_score": 59.5316,
2138+ "stable_steps": 183
2139+ },
2140+ {
2141+ "region": "5",
2142+ "nodes": [
2143+ "关于直线和线段有哪些重要结论",
2144+ "再如",
2145+ "写成算式就是",
2146+ "哪些是负数",
2147+ "将各数用逗号分开",
2148+ "我们已经见过像2狓",
2149+ "有理数有哪些运算律",
2150+ "有理数的混合运算都能转化为加法与乘法运算吗",
2151+ "本章学习了有关角的哪些知识",
2152+ "某年我国人均水资源比上年的增幅是",
2153+ "狋小于1",
2154+ "用式子表示生长了狀年的树苗的高度",
2155+ "礼堂第1排有犪个座位",
2156+ "虽然立体图形与平面图形是两类不同的几何图形",
2157+ "请再举出一些平面图形的例子",
2158+ "2狋",
2159+ "31",
2160+ "57",
2161+ "67"
2162+ ],
2163+ "stage": "skill_belt",
2164+ "activation": 0.0,
2165+ "potential": 0.3127,
2166+ "candidate_score": 56.1949,
2167+ "stable_steps": 196
2168+ },
2169+ {
2170+ "region": "3",
2171+ "nodes": [
2172+ "下列图形中可以作为一个正方体的展开图的是",
2173+ "余角和补角",
2174+ "使每个四边形中都有",
2175+ "北偏东1",
2176+ "去年同期这项收入为多少元",
2177+ "古代问题",
2178+ "德国增长1",
2179+ "有理数的加法",
2180+ "次数是2",
2181+ "求出函数的解析式",
2182+ "犪3",
2183+ "用上面的方法考虑0",
2184+ "相邻三行里同一列的三个日期数之和能否为",
2185+ "第7题",
2186+ "结果其中火车所走的路程和这段时间内火车的平均速度",
2187+ "1从算式到方程",
2188+ "2解一元一次方程",
2189+ "4实际问题与一元一次方程",
2190+ "7中"
2191+ ],
2192+ "stage": "skill_belt",
2193+ "activation": 0.0,
2194+ "potential": 0.6887,
2195+ "candidate_score": 56.1293,
2196+ "stable_steps": 198
2197+ }
2198+ ],
2199+ "experience_regions_count": 6,
2200+ "skill_belt_candidates": [
2201+ {
2202+ "node": "1",
2203+ "score": 5.24,
2204+ "stage": "ability_core",
2205+ "flow": 14.8749,
2206+ "stable_steps": 200,
2207+ "touches": 303,
2208+ "target_core": "书书书"
2209+ },
2210+ {
2211+ "node": "2",
2212+ "score": 5.24,
2213+ "stage": "ability_core",
2214+ "flow": 7.9818,
2215+ "stable_steps": 199,
2216+ "touches": 293,
2217+ "target_core": "书书书"
2218+ },
2219+ {
2220+ "node": "0",
2221+ "score": 5.1773,
2222+ "stage": "ability_core",
2223+ "flow": 2.4771,
2224+ "stable_steps": 186,
2225+ "touches": 187,
2226+ "target_core": "1"
2227+ },
2228+ {
2229+ "node": "3",
2230+ "score": 5.0408,
2231+ "stage": "ability_core",
2232+ "flow": 1.7932,
2233+ "stable_steps": 190,
2234+ "touches": 221,
2235+ "target_core": "1"
2236+ },
2237+ {
2238+ "node": "4",
2239+ "score": 5.0183,
2240+ "stage": "ability_core",
2241+ "flow": 1.6012,
2242+ "stable_steps": 198,
2243+ "touches": 192,
2244+ "target_core": "1"
2245+ },
2246+ {
2247+ "node": "狓",
2248+ "score": 5.0012,
2249+ "stage": "ability_core",
2250+ "flow": 1.7476,
2251+ "stable_steps": 120,
2252+ "touches": 30,
2253+ "target_core": "1"
2254+ }
2255+ ],
2256+ "skill_belt_candidates_count": 6,
2257+ "sedimentation_trace_count": 20,
2258+ "merge_events_count": 12,
2259+ "decay_events_count": 24,
2260+ "output_mode": "minimal",
2261+ "feedback_effect": {
2262+ "source": "feedback",
2263+ "mode": "feedback",
2264+ "queued_tokens": [
2265+ "1"
2266+ ],
2267+ "queued_strength": 1.0,
2268+ "polarity": 1,
2269+ "queued_step": 200,
2270+ "last_applied_step": 201,
2271+ "applied_tokens": [
2272+ "1"
2273+ ],
2274+ "phi_delta": 0.0544,
2275+ "mu_delta": 0.0748,
2276+ "flow_delta": 0.0,
2277+ "stage_after": {
2278+ "1": "ability_core"
2279+ },
2280+ "bound_ability_core": "1"
2281+ },
2282+ "phi_range": {
2283+ "min": 0.0007,
2284+ "max": 12.0662
2285+ }
2286+ },
2287+ "surfaced_tokens_or_phrases": {
2288+ "emit_output": "minimal: 1",
2289+ "active_region": [
2290+ "1",
2291+ "2",
2292+ "进而从图象探索二次函数的性质",
2293+ "1"
2294+ ],
2295+ "phi_top_nodes": [
2296+ "1",
2297+ "2",
2298+ "0",
2299+ "4",
2300+ "3"
2301+ ],
2302+ "mu_top_nodes": [
2303+ "1",
2304+ "2",
2305+ "进而从图象探索二次函数的性质",
2306+ "1",
2307+ "可以方便地画出二次函数的图象"
2308+ ],
2309+ "top_flow_edges": [
2310+ "2->1",
2311+ "1->2",
2312+ "1->0",
2313+ "1->4",
2314+ "1->3"
2315+ ]
2316+ },
2317+ "fairness_note": "Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['1', '2', '进而从图象探索二次函数的性质'] and emit_output='minimal: 1'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser.",
2318+ "metric_alignment_notes": [
2319+ {
2320+ "metric": "attention_used_total",
2321+ "status": "N/A",
2322+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
2323+ },
2324+ {
2325+ "metric": "activated_output_payload",
2326+ "status": "STRUCTURAL MISMATCH",
2327+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
2328+ },
2329+ {
2330+ "metric": "bigram_emergent_words_and_circuits",
2331+ "status": "STRUCTURAL MISMATCH",
2332+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
2333+ }
2334+ ],
2335+ "stability_checks": {
2336+ "all_finite": true,
2337+ "within_branch_b_stability_ceiling": false,
2338+ "phi_range": {
2339+ "min": 0.0007,
2340+ "max": 12.0662
2341+ }
2342+ }
2343+ },
2344+ {
2345+ "scenario_name": "高中语文_pipeline+stability",
2346+ "status": "PASS",
2347+ "reason": "Required split ran end-to-end on Branch A and stayed within the mirrored stability ceiling, but output_mode remained minimal with free_capacity=0.0.",
2348+ "input_files": [
2349+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
2350+ ],
2351+ "input_files_fully_hydrated": true,
2352+ "input_line_counts": [
2353+ 694
2354+ ],
2355+ "record_counts": {
2356+ "available_content_records_seen_before_cap": 300,
2357+ "available_valid_text_records_seen_before_cap": 300,
2358+ "loaded_records_per_input": [
2359+ 300
2360+ ],
2361+ "fed_records": 200,
2362+ "feedback_target": "语文必修上册",
2363+ "feedback_applied": true
2364+ },
2365+ "branch_b_reference": {
2366+ "status": "PASS",
2367+ "summary": "nodes=1857, edges=9983, phi=[-8.266,0.239], mode=full, words=单元/语文/必修/文必/上册",
2368+ "top_words": [
2369+ "单元",
2370+ "语文",
2371+ "必修",
2372+ "文必",
2373+ "上册"
2374+ ]
2375+ },
2376+ "branch_a_observables": {
2377+ "phi_summary": {
2378+ "node_count": 304,
2379+ "total_potential": 50.2645,
2380+ "top_nodes": [
2381+ {
2382+ "node": "语文必修上册",
2383+ "value": 5.1234
2384+ },
2385+ {
2386+ "node": "语文必修下册",
2387+ "value": 3.9899
2388+ },
2389+ {
2390+ "node": "第二单元",
2391+ "value": 1.8372
2392+ },
2393+ {
2394+ "node": "第三单元",
2395+ "value": 0.9353
2396+ },
2397+ {
2398+ "node": "雷雨",
2399+ "value": 0.6657
2400+ }
2401+ ]
2402+ },
2403+ "mu_summary": {
2404+ "active_count": 34,
2405+ "total_activation": 8.0376,
2406+ "top_nodes": [
2407+ {
2408+ "node": "语文必修上册",
2409+ "value": 2.3467
2410+ },
2411+ {
2412+ "node": "书中超空间旅行的发现推动了一个银河帝国的兴起",
2413+ "value": 0.4289
2414+ },
2415+ {
2416+ "node": "语文必修下册",
2417+ "value": 0.42
2418+ },
2419+ {
2420+ "node": "系列惊叹不已",
2421+ "value": 0.3489
2422+ },
2423+ {
2424+ "node": "基地",
2425+ "value": 0.3375
2426+ }
2427+ ]
2428+ },
2429+ "J_summary": {
2430+ "edge_count": 335,
2431+ "total_flow": 26.4198,
2432+ "top_flows": [
2433+ {
2434+ "edge": "书中超空间旅行的发现推动了一个银河帝国的兴起->童年时的第二件事也给我",
2435+ "flow": 0.2498
2436+ },
2437+ {
2438+ "edge": "语文必修下册->语文必修上册",
2439+ "flow": 0.2477
2440+ },
2441+ {
2442+ "edge": "系列惊叹不已->书中超空间旅行的发现推动了一个银河帝国的兴起",
2443+ "flow": 0.2469
2444+ },
2445+ {
2446+ "edge": "我对阿西莫夫的->基地",
2447+ "flow": 0.2469
2448+ },
2449+ {
2450+ "edge": "基地->系列惊叹不已",
2451+ "flow": 0.2469
2452+ }
2453+ ]
2454+ },
2455+ "active_region": [
2456+ "语文必修上册",
2457+ "书中超空间旅行的发现推动了一个银河帝国的兴起",
2458+ "语文必修下册",
2459+ "系列惊叹不已"
2460+ ],
2461+ "active_region_size": 4,
2462+ "bound_ability_core": "语文必修上册",
2463+ "anchor_pull": 0.0,
2464+ "drift_score": 1.0,
2465+ "free_capacity": 0.0,
2466+ "experience_regions": [
2467+ {
2468+ "region": "语文必修上册",
2469+ "nodes": [
2470+ "1993",
2471+ "2",
2472+ "23",
2473+ "25",
2474+ "27",
2475+ "29",
2476+ "33",
2477+ "35",
2478+ "一般认为",
2479+ "与生俱来的好奇心和想象力",
2480+ "两人闷坐了一会儿",
2481+ "义",
2482+ "也看得吃力",
2483+ "乡土中国",
2484+ "书中超空间旅行的发现推动了一个银河帝国的兴起",
2485+ "人类征服疾病的一小步",
2486+ "他有一种想把时间抢回来的劲头",
2487+ "以工匠精神雕琢时代品质",
2488+ "使之最大程度地造福人类",
2489+ "儿做过些什么",
2490+ "再论雷峰塔的倒掉",
2491+ "创新的过程充满了挑战",
2492+ "力的回答",
2493+ "劝学",
2494+ "加来道雄",
2495+ "努力创新",
2496+ "医学研究奖",
2497+ "单元学习任务",
2498+ "又饶有趣味",
2499+ "反对党八股",
2500+ "发现青蒿素的抗疟疗效",
2501+ "向",
2502+ "和你的乐声相比",
2503+ "和尊敬",
2504+ "哈姆莱特谢谢你",
2505+ "喜看稻菽千重浪",
2506+ "在本单元的课文中",
2507+ "在社会主义建设和中华民族伟大复兴的历程中",
2508+ "在这个过程中",
2509+ "基地",
2510+ "多的良药",
2511+ "奥菲利娅殿下",
2512+ "好",
2513+ "好这位同志呀",
2514+ "如何做到情景交融",
2515+ "子路",
2516+ "学习活动",
2517+ "学写诗歌",
2518+ "学出版社",
2519+ "它们可能给这种错觉起一个高深莫测",
2520+ "它们将得出这样一个结论",
2521+ "对中小学生来说",
2522+ "将人生的有价值的东西毁灭给人看",
2523+ "将这个天然分子变为药物",
2524+ "屠呦呦",
2525+ "年",
2526+ "并与之殊死抗争",
2527+ "并且引导我走上成为一个理论物理学家的历程",
2528+ "广阔的未知世界",
2529+ "得那样深沉",
2530+ "德要求的精神与感性的富有乐趣和魅力的日常生活和谐协调",
2531+ "总之",
2532+ "恶",
2533+ "悲剧",
2534+ "愚",
2535+ "我与地坛",
2536+ "我们发现生长在北方的青蒿的青蒿素含量比较低",
2537+ "我们现在处在社会主义社会的初级阶段",
2538+ "我们随即转向第二步",
2539+ "我呼吁大力加强国际合作",
2540+ "我对阿西莫夫的",
2541+ "指称乡土社会的概念指称其他社会的对应概念",
2542+ "推动对中医以及其他传统医学的研究",
2543+ "故都的秋",
2544+ "是因为有一种看不见的神秘力在对它起作用",
2545+ "是我们研究进展的第一步",
2546+ "有些批评家说",
2547+ "有改动",
2548+ "有时候登场人物众多",
2549+ "有时候登场人物比较少",
2550+ "本单元特别选入这两首描写劳动的古诗",
2551+ "果从那变化的一面看",
2552+ "柜台走",
2553+ "欣慰的事吗",
2554+ "死母本花朵中雄蕊的措施",
2555+ "水面上存在的水波",
2556+ "永远歌唱着飞翔",
2557+ "江水寒",
2558+ "没有内部联系的概念",
2559+ "注",
2560+ "注意梳理哈姆莱特与现实之间的各种冲突",
2561+ "洞",
2562+ "烟视",
2563+ "然的样子",
2564+ "爱说话是她的天性",
2565+ "理",
2566+ "琵琶行并序",
2567+ "用计算机技术研究植物学问题",
2568+ "甫",
2569+ "疟疾威胁人类健康长达数千年",
2570+ "的卓越",
2571+ "的奖项",
2572+ "的少女",
2573+ "真没治",
2574+ "眯着眼看",
2575+ "睡莲之所以能够不被触摸而运动",
2576+ "短歌行",
2577+ "立在地球边上放号",
2578+ "童年时的第二件事也给我",
2579+ "童年的两件趣事极大地丰富了我对世界的理解",
2580+ "第三单元",
2581+ "第二单元",
2582+ "系列惊叹不已",
2583+ "而不见",
2584+ "而当钟扬的工作重心转到西藏时",
2585+ "良知泯灭的黑暗现实",
2586+ "药物生",
2587+ "虞美人",
2588+ "要想建立与世界文学的生动联系",
2589+ "议论要有针对性",
2590+ "记得那时我的父母亲不时带我去",
2591+ "语文必修下册",
2592+ "调一致",
2593+ "谓",
2594+ "赤壁赋",
2595+ "车上",
2596+ "这一生物医学领域最负盛名",
2597+ "这一疗法极大地减轻了疟疾的症状",
2598+ "那",
2599+ "阅读时",
2600+ "附三",
2601+ "雷雨",
2602+ "非常荣幸在这里接受今年的拉斯克临床",
2603+ "香雪想快点跑过去",
2604+ "驱动我们不断追求"
2605+ ],
2606+ "stage": "ability_core",
2607+ "activation": 4.9412,
2608+ "potential": 28.1626,
2609+ "candidate_score": 401.891,
2610+ "stable_steps": 195
2611+ },
2612+ {
2613+ "region": "语文必修下册",
2614+ "nodes": [
2615+ "一名物理学家的教育历程",
2616+ "一场戏中",
2617+ "你们不到两个月整个地就要关门的",
2618+ "周朴园梅家的一个年轻小姐",
2619+ "在莎士比亚笔下",
2620+ "奥菲利娅我的好殿下",
2621+ "已",
2622+ "干方面梳理一下",
2623+ "庖丁解牛",
2624+ "康长寿",
2625+ "成一朵梅花补上的",
2626+ "扮监斩官上",
2627+ "春秋时期",
2628+ "曹禺认为",
2629+ "汉的开国功臣之一",
2630+ "清明澄澈",
2631+ "烛之武退秦师",
2632+ "秘消失的船只是不是进入了一个空间漏",
2633+ "起来却是毫无拘束",
2634+ "青蒿素",
2635+ "青蒿素的发现",
2636+ "鲁侍萍你不要怕",
2637+ "齐桓晋文之事"
2638+ ],
2639+ "stage": "skill_belt",
2640+ "activation": 0.3245,
2641+ "potential": 4.1715,
2642+ "candidate_score": 70.2417,
2643+ "stable_steps": 50
2644+ },
2645+ {
2646+ "region": "第二单元",
2647+ "nodes": [
2648+ "31",
2649+ "37",
2650+ "39",
2651+ "41",
2652+ "43",
2653+ "45",
2654+ "47",
2655+ "49",
2656+ "51",
2657+ "53",
2658+ "55",
2659+ "劳动改造世界"
2660+ ],
2661+ "stage": "skill_belt",
2662+ "activation": 0.212,
2663+ "potential": 1.9963,
2664+ "candidate_score": 40.4466,
2665+ "stable_steps": 36
2666+ },
2667+ {
2668+ "region": "第三单元",
2669+ "nodes": [
2670+ "59",
2671+ "61",
2672+ "63",
2673+ "65",
2674+ "67",
2675+ "69",
2676+ "人类在不断的探索与发现中推动文明的进步",
2677+ "优美的古诗词是中华传统文化的瑰宝"
2678+ ],
2679+ "stage": "skill_belt",
2680+ "activation": 0.0,
2681+ "potential": 0.3994,
2682+ "candidate_score": 25.2434,
2683+ "stable_steps": 11
2684+ },
2685+ {
2686+ "region": "第一单元",
2687+ "nodes": [
2688+ "7",
2689+ "如今",
2690+ "梦游天姥吟留别",
2691+ "版",
2692+ "语文必修上册"
2693+ ],
2694+ "stage": "ability_core",
2695+ "activation": 2.4651,
2696+ "potential": 5.8085,
2697+ "candidate_score": 20.3247,
2698+ "stable_steps": 200
2699+ },
2700+ {
2701+ "region": "雷雨",
2702+ "nodes": [
2703+ "中周朴园与鲁侍萍相认",
2704+ "丹心谱",
2705+ "在刻画人物时避免了脸谱化和扁平化",
2706+ "比如",
2707+ "节选"
2708+ ],
2709+ "stage": "skill_belt",
2710+ "activation": 0.0,
2711+ "potential": 0.9782,
2712+ "candidate_score": 15.5685,
2713+ "stable_steps": 16
2714+ }
2715+ ],
2716+ "experience_regions_count": 6,
2717+ "skill_belt_candidates": [
2718+ {
2719+ "node": "语文必修上册",
2720+ "score": 5.24,
2721+ "stage": "ability_core",
2722+ "flow": 5.6745,
2723+ "stable_steps": 200,
2724+ "touches": 210,
2725+ "target_core": "第一单元"
2726+ },
2727+ {
2728+ "node": "语文必修下册",
2729+ "score": 5.2399,
2730+ "stage": "ability_core",
2731+ "flow": 2.9992,
2732+ "stable_steps": 52,
2733+ "touches": 65,
2734+ "target_core": "语文必修上册"
2735+ },
2736+ {
2737+ "node": "第二单元",
2738+ "score": 4.9302,
2739+ "stage": "ability_core",
2740+ "flow": 1.1934,
2741+ "stable_steps": 31,
2742+ "touches": 55,
2743+ "target_core": "语文必修上册"
2744+ },
2745+ {
2746+ "node": "第三单元",
2747+ "score": 4.8341,
2748+ "stage": "ability_core",
2749+ "flow": 0.9938,
2750+ "stable_steps": 9,
2751+ "touches": 23,
2752+ "target_core": "语文必修上册"
2753+ },
2754+ {
2755+ "node": "雷雨",
2756+ "score": 4.7222,
2757+ "stage": "ability_core",
2758+ "flow": 0.5014,
2759+ "stable_steps": 28,
2760+ "touches": 8,
2761+ "target_core": "语文必修上册"
2762+ },
2763+ {
2764+ "node": "如今",
2765+ "score": 4.3961,
2766+ "stage": "ability_core",
2767+ "flow": 0.6072,
2768+ "stable_steps": 153,
2769+ "touches": 8,
2770+ "target_core": "第一单元"
2771+ }
2772+ ],
2773+ "skill_belt_candidates_count": 6,
2774+ "sedimentation_trace_count": 20,
2775+ "merge_events_count": 12,
2776+ "decay_events_count": 24,
2777+ "output_mode": "minimal",
2778+ "feedback_effect": {
2779+ "source": "feedback",
2780+ "mode": "feedback",
2781+ "queued_tokens": [
2782+ "语文必修上册"
2783+ ],
2784+ "queued_strength": 1.0,
2785+ "polarity": 1,
2786+ "queued_step": 200,
2787+ "last_applied_step": 201,
2788+ "applied_tokens": [
2789+ "语文必修上册"
2790+ ],
2791+ "phi_delta": 0.0544,
2792+ "mu_delta": 0.0748,
2793+ "flow_delta": 0.0,
2794+ "stage_after": {
2795+ "语文必修上册": "ability_core"
2796+ },
2797+ "bound_ability_core": "语文必修上册"
2798+ },
2799+ "phi_range": {
2800+ "min": 0.0094,
2801+ "max": 5.1234
2802+ }
2803+ },
2804+ "surfaced_tokens_or_phrases": {
2805+ "emit_output": "minimal: 语文必修上册",
2806+ "active_region": [
2807+ "语文必修上册",
2808+ "书中超空间旅行的发现推动了一个银河帝国的兴起",
2809+ "语文必修下册",
2810+ "系列惊叹不已"
2811+ ],
2812+ "phi_top_nodes": [
2813+ "语文必修上册",
2814+ "语文必修下册",
2815+ "第二单元",
2816+ "第三单元",
2817+ "雷雨"
2818+ ],
2819+ "mu_top_nodes": [
2820+ "语文必修上册",
2821+ "书中超空间旅行的发现推动了一个银河帝国的兴起",
2822+ "语文必修下册",
2823+ "系列惊叹不已",
2824+ "基地"
2825+ ],
2826+ "top_flow_edges": [
2827+ "书中超空间旅行的发现推动了一个银河帝国的兴起->童年时的第二件事也给我",
2828+ "语文必修下册->语文必修上册",
2829+ "系列惊叹不已->书中超空间旅行的发现推动了一个银河帝国的兴起",
2830+ "我对阿西莫夫的->基地",
2831+ "基地->系列惊叹不已"
2832+ ]
2833+ },
2834+ "fairness_note": "Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['语文必修上册', '语文必修下册', '书中超空间旅行的发现推动了一个银河帝国的兴起'] and emit_output='minimal: 语文必修上册'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser.",
2835+ "metric_alignment_notes": [
2836+ {
2837+ "metric": "attention_used_total",
2838+ "status": "N/A",
2839+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
2840+ },
2841+ {
2842+ "metric": "activated_output_payload",
2843+ "status": "STRUCTURAL MISMATCH",
2844+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
2845+ },
2846+ {
2847+ "metric": "bigram_emergent_words_and_circuits",
2848+ "status": "STRUCTURAL MISMATCH",
2849+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
2850+ }
2851+ ],
2852+ "stability_checks": {
2853+ "all_finite": true,
2854+ "within_branch_b_stability_ceiling": true,
2855+ "phi_range": {
2856+ "min": 0.0094,
2857+ "max": 5.1234
2858+ }
2859+ }
2860+ },
2861+ {
2862+ "scenario_name": "cross_stage_语文",
2863+ "status": "PASS",
2864+ "reason": "Cross-stage Chinese pipeline stayed finite on the same 3-split schedule and remained within the mirrored ±10.1 ceiling.",
2865+ "input_files": [
2866+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
2867+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
2868+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
2869+ ],
2870+ "input_files_fully_hydrated": true,
2871+ "input_line_counts": [
2872+ 1597,
2873+ 961,
2874+ 694
2875+ ],
2876+ "record_counts": {
2877+ "loaded_records_per_input": [
2878+ 40,
2879+ 40,
2880+ 40
2881+ ],
2882+ "fed_records": 120
2883+ },
2884+ "branch_b_reference": {
2885+ "status": "PASS",
2886+ "summary": "nodes=1128, edges=4139, phi_max=0.126, words=单元/阅读/第一/语文/一单",
2887+ "top_words": [
2888+ "单元",
2889+ "阅读",
2890+ "第一",
2891+ "语文",
2892+ "一单"
2893+ ]
2894+ },
2895+ "branch_a_observables": {
2896+ "phi_summary": {
2897+ "node_count": 261,
2898+ "total_potential": 53.1193,
2899+ "top_nodes": [
2900+ {
2901+ "node": "①",
2902+ "value": 4.5744
2903+ },
2904+ {
2905+ "node": "语文必修上册",
2906+ "value": 3.4177
2907+ },
2908+ {
2909+ "node": "第一单元",
2910+ "value": 2.5123
2911+ },
2912+ {
2913+ "node": "第二单元",
2914+ "value": 1.095
2915+ },
2916+ {
2917+ "node": "阅读第一单元",
2918+ "value": 0.6821
2919+ }
2920+ ]
2921+ },
2922+ "mu_summary": {
2923+ "active_count": 50,
2924+ "total_activation": 12.7418,
2925+ "top_nodes": [
2926+ {
2927+ "node": "①",
2928+ "value": 2.5093
2929+ },
2930+ {
2931+ "node": "第二单元",
2932+ "value": 0.6689
2933+ },
2934+ {
2935+ "node": "当然要做梦",
2936+ "value": 0.614
2937+ },
2938+ {
2939+ "node": "同时也是一个凡人",
2940+ "value": 0.5132
2941+ },
2942+ {
2943+ "node": "袁隆平是一位世界级的伟大科学家",
2944+ "value": 0.5132
2945+ }
2946+ ]
2947+ },
2948+ "J_summary": {
2949+ "edge_count": 309,
2950+ "total_flow": 28.277,
2951+ "top_flows": [
2952+ {
2953+ "edge": "①->节选自",
2954+ "flow": 0.3642
2955+ },
2956+ {
2957+ "edge": "①->选自",
2958+ "flow": 0.356
2959+ },
2960+ {
2961+ "edge": "①->阅读第一单元",
2962+ "flow": 0.3044
2963+ },
2964+ {
2965+ "edge": "①->阅读",
2966+ "flow": 0.2978
2967+ },
2968+ {
2969+ "edge": "至于是不是得担风险->袁隆",
2970+ "flow": 0.2497
2971+ }
2972+ ]
2973+ },
2974+ "active_region": [
2975+ "①",
2976+ "第二单元",
2977+ "当然要做梦",
2978+ "同时也是一个凡人"
2979+ ],
2980+ "active_region_size": 4,
2981+ "bound_ability_core": "①",
2982+ "anchor_pull": 0.0,
2983+ "drift_score": 1.0,
2984+ "free_capacity": 0.0,
2985+ "experience_regions": [
2986+ {
2987+ "region": "①",
2988+ "nodes": [
2989+ "3",
2990+ "5",
2991+ "7",
2992+ "8",
2993+ "一年之计在于春",
2994+ "一本单元作品抒发的都是青春情怀",
2995+ "与友期行",
2996+ "也就一定能把这种优势应用到生产上",
2997+ "也许三分",
2998+ "也许十分",
2999+ "从而大幅度提高水稻的产量",
3000+ "他爱人有个亲戚就住在站上",
3001+ "他都能挺身而出毫不含糊地阐明事实",
3002+ "他高兴地回",
3003+ "但脚为什么变得异常沉重",
3004+ "作品中的哪些地方最让你感动",
3005+ "八分",
3006+ "冰心",
3007+ "凡是涉及不顾农民利益",
3008+ "凤娇照例跑到第三节车厢去找她的",
3009+ "出来的早稻常规品种正在勾头散籽",
3010+ "刘湛秋",
3011+ "刚起头儿",
3012+ "到中流击水2001",
3013+ "劳动创造文明",
3014+ "北京话",
3015+ "单元学习任务",
3016+ "同时也是一个凡人",
3017+ "后面我",
3018+ "吹面不寒杨柳风",
3019+ "呈现一派丰收",
3020+ "哪些是你以前未曾留意",
3021+ "四分",
3022+ "回头望着笔直的铁轨",
3023+ "在幼苗靠近土壤的茎节上生出分枝",
3024+ "在独立开展杂交水稻研究很长时间之后",
3025+ "她站在枕木上",
3026+ "子都会求上门来",
3027+ "家",
3028+ "尊重劳动",
3029+ "崇尚劳动",
3030+ "年春节过后的第二",
3031+ "当然要做梦",
3032+ "把臂弯里的篮子换了换手",
3033+ "无私奉献",
3034+ "无视事实的事",
3035+ "早在",
3036+ "是中华民族世代相传的美德",
3037+ "曾记否",
3038+ "有改动",
3039+ "本单元课文主要是写学习生活的",
3040+ "水稻的一类",
3041+ "沁园春",
3042+ "沈英甲",
3043+ "泰戈尔",
3044+ "激发诗情和灵感",
3045+ "火车也会停得久一些",
3046+ "热情的",
3047+ "热爱劳动",
3048+ "生产",
3049+ "的奠基人",
3050+ "秋思",
3051+ "稻",
3052+ "第一单元",
3053+ "第二单元",
3054+ "绿萍",
3055+ "而读过之",
3056+ "育出人工杂交稻",
3057+ "至于是不是得担风险",
3058+ "莫怀戚",
3059+ "获得1933年诺贝尔生理学或医学奖",
3060+ "袁隆",
3061+ "袁隆平",
3062+ "袁隆平才从国外资料中了解到",
3063+ "袁隆平是一位世界级的伟大科学家",
3064+ "记首届国家最高科技奖获得者袁隆平",
3065+ "诗歌是情感的艺术",
3066+ "语文必修上册",
3067+ "还告诉她",
3068+ "金色花",
3069+ "铁轨在月亮的照耀下泛着清淡的光",
3070+ "长沙",
3071+ "闻王昌龄左迁龙标遥有此寄",
3072+ "陈太丘",
3073+ "需要把握时代的脉搏",
3074+ "需要生活的体验和积累",
3075+ "香雪系紧头上的紫红色线围巾",
3076+ "马致远"
3077+ ],
3078+ "stage": "ability_core",
3079+ "activation": 8.6021,
3080+ "potential": 28.8448,
3081+ "candidate_score": 247.6723,
3082+ "stable_steps": 75
3083+ },
3084+ {
3085+ "region": "2",
3086+ "nodes": [
3087+ "6",
3088+ "①",
3089+ "春",
3090+ "李白",
3091+ "节选自",
3092+ "读读写写",
3093+ "选自",
3094+ "阅读",
3095+ "阅读第一单元",
3096+ "阅读第二单元",
3097+ "雨的四季",
3098+ "预习"
3099+ ],
3100+ "stage": "ability_core",
3101+ "activation": 2.8704,
3102+ "potential": 8.7634,
3103+ "candidate_score": 50.7603,
3104+ "stable_steps": 80
3105+ },
3106+ {
3107+ "region": "语文必修上册",
3108+ "nodes": [
3109+ "两人闷坐了一会儿",
3110+ "和你的乐声相比",
3111+ "喜看稻菽千重浪",
3112+ "好这位同志呀",
3113+ "学写诗歌",
3114+ "得那样深沉",
3115+ "欣慰的事吗",
3116+ "死母本花朵中雄蕊的措施",
3117+ "永远歌唱着飞翔",
3118+ "爱说话是她的天性",
3119+ "版",
3120+ "真没治",
3121+ "立在地球边上放号",
3122+ "车上",
3123+ "那",
3124+ "香雪想快点跑过去"
3125+ ],
3126+ "stage": "skill_belt",
3127+ "activation": 0.335,
3128+ "potential": 3.4226,
3129+ "candidate_score": 48.5545,
3130+ "stable_steps": 34
3131+ },
3132+ {
3133+ "region": "第一单元",
3134+ "nodes": [
3135+ "11",
3136+ "13",
3137+ "15",
3138+ "17",
3139+ "19",
3140+ "21",
3141+ "23",
3142+ "25",
3143+ "27",
3144+ "29",
3145+ "9"
3146+ ],
3147+ "stage": "experience",
3148+ "activation": 0.0,
3149+ "potential": 2.1062,
3150+ "candidate_score": 35.5631,
3151+ "stable_steps": 29
3152+ },
3153+ {
3154+ "region": "阅读第一单元",
3155+ "nodes": [
3156+ "4",
3157+ "冬日灰蒙蒙的天空中",
3158+ "天净沙",
3159+ "的一份事去"
3160+ ],
3161+ "stage": "skill_belt",
3162+ "activation": 0.0,
3163+ "potential": 0.2386,
3164+ "candidate_score": 12.3236,
3165+ "stable_steps": 74
3166+ },
3167+ {
3168+ "region": "第二单元",
3169+ "nodes": [
3170+ "33",
3171+ "35",
3172+ "37",
3173+ "劳动改造世界",
3174+ "第三单元"
3175+ ],
3176+ "stage": "skill_belt",
3177+ "activation": 0.9343,
3178+ "potential": 1.2973,
3179+ "candidate_score": 11.787,
3180+ "stable_steps": 80
3181+ }
3182+ ],
3183+ "experience_regions_count": 6,
3184+ "skill_belt_candidates": [
3185+ {
3186+ "node": "①",
3187+ "score": 5.24,
3188+ "stage": "ability_core",
3189+ "flow": 5.9504,
3190+ "stable_steps": 76,
3191+ "touches": 59,
3192+ "target_core": "2"
3193+ },
3194+ {
3195+ "node": "语文必修上册",
3196+ "score": 5.1867,
3197+ "stage": "ability_core",
3198+ "flow": 2.5555,
3199+ "stable_steps": 39,
3200+ "touches": 50,
3201+ "target_core": "①"
3202+ },
3203+ {
3204+ "node": "第一单元",
3205+ "score": 5.0305,
3206+ "stage": "ability_core",
3207+ "flow": 1.579,
3208+ "stable_steps": 40,
3209+ "touches": 38,
3210+ "target_core": "①"
3211+ },
3212+ {
3213+ "node": "第二单元",
3214+ "score": 4.8691,
3215+ "stage": "ability_core",
3216+ "flow": 1.1794,
3217+ "stable_steps": 41,
3218+ "touches": 11,
3219+ "target_core": "①"
3220+ },
3221+ {
3222+ "node": "阅读",
3223+ "score": 4.7086,
3224+ "stage": "ability_core",
3225+ "flow": 0.5594,
3226+ "stable_steps": 80,
3227+ "touches": 29,
3228+ "target_core": "2"
3229+ },
3230+ {
3231+ "node": "阅读第一单元",
3232+ "score": 4.706,
3233+ "stage": "ability_core",
3234+ "flow": 0.5371,
3235+ "stable_steps": 76,
3236+ "touches": 25,
3237+ "target_core": "2"
3238+ }
3239+ ],
3240+ "skill_belt_candidates_count": 6,
3241+ "sedimentation_trace_count": 20,
3242+ "merge_events_count": 12,
3243+ "decay_events_count": 24,
3244+ "output_mode": "minimal",
3245+ "feedback_effect": {
3246+ "source": "emit",
3247+ "mode": "minimal",
3248+ "queued_tokens": [
3249+ "①"
3250+ ],
3251+ "queued_strength": 0.22,
3252+ "confidence_proxy": 0.3338,
3253+ "queued_step": 120,
3254+ "last_applied_step": null
3255+ },
3256+ "phi_range": {
3257+ "min": 0.0037,
3258+ "max": 4.5744
3259+ }
3260+ },
3261+ "surfaced_tokens_or_phrases": {
3262+ "emit_output": "minimal: ①",
3263+ "active_region": [
3264+ "①",
3265+ "第二单元",
3266+ "当然要做梦",
3267+ "同时也是一个凡人"
3268+ ],
3269+ "phi_top_nodes": [
3270+ "①",
3271+ "语文必修上册",
3272+ "第一单元",
3273+ "第二单元",
3274+ "阅读第一单元"
3275+ ],
3276+ "mu_top_nodes": [
3277+ "①",
3278+ "第二单元",
3279+ "当然要做梦",
3280+ "同时也是一个凡人",
3281+ "袁隆平是一位世界级的伟大科学家"
3282+ ],
3283+ "top_flow_edges": [
3284+ "①->节选自",
3285+ "①->选自",
3286+ "①->阅读第一单元",
3287+ "①->阅读",
3288+ "至于是不是得担风险->袁隆"
3289+ ]
3290+ },
3291+ "fairness_note": "Branch B passed this same 120-record cross-stage composition with stronger subject-word emergence. Branch A completed the same split family but stayed in minimal mode and surfaced ['①', '第二单元', '当然要做梦'].",
3292+ "metric_alignment_notes": [
3293+ {
3294+ "metric": "attention_used_total",
3295+ "status": "N/A",
3296+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
3297+ },
3298+ {
3299+ "metric": "activated_output_payload",
3300+ "status": "STRUCTURAL MISMATCH",
3301+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
3302+ },
3303+ {
3304+ "metric": "bigram_emergent_words_and_circuits",
3305+ "status": "STRUCTURAL MISMATCH",
3306+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
3307+ }
3308+ ],
3309+ "stability_checks": {
3310+ "all_finite": true,
3311+ "within_branch_b_stability_ceiling": true,
3312+ "phi_range": {
3313+ "min": 0.0037,
3314+ "max": 4.5744
3315+ }
3316+ }
3317+ },
3318+ {
3319+ "scenario_name": "cross_subject_小学",
3320+ "status": "FAIL",
3321+ "reason": "Cross-subject primary-school pipeline ran, but phi_range={'min': 0.0156, 'max': 20.1786} exceeded the mirrored Branch B ceiling of ±10.1.",
3322+ "input_files": [
3323+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
3324+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
3325+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/科学.jsonl"
3326+ ],
3327+ "input_files_fully_hydrated": true,
3328+ "input_line_counts": [
3329+ 1597,
3330+ 7459,
3331+ 5032
3332+ ],
3333+ "record_counts": {
3334+ "loaded_records_per_input": [
3335+ 40,
3336+ 40,
3337+ 40
3338+ ],
3339+ "fed_records": 120,
3340+ "anchors_used": [
3341+ "语文",
3342+ "数学",
3343+ "科学"
3344+ ]
3345+ },
3346+ "branch_b_reference": {
3347+ "status": "PASS",
3348+ "summary": "nodes=708, edges=2866, phi_max=10.000, cores=21, words=食物/怎样/什么/我们/本文",
3349+ "top_words": [
3350+ "食物",
3351+ "怎样",
3352+ "什么",
3353+ "我们",
3354+ "本文"
3355+ ]
3356+ },
3357+ "branch_a_observables": {
3358+ "phi_summary": {
3359+ "node_count": 301,
3360+ "total_potential": 95.8719,
3361+ "top_nodes": [
3362+ {
3363+ "node": "语文",
3364+ "value": 20.1786
3365+ },
3366+ {
3367+ "node": "数学",
3368+ "value": 11.5795
3369+ },
3370+ {
3371+ "node": "科学",
3372+ "value": 7.7065
3373+ },
3374+ {
3375+ "node": "1",
3376+ "value": 4.4921
3377+ },
3378+ {
3379+ "node": "4",
3380+ "value": 2.0832
3381+ }
3382+ ]
3383+ },
3384+ "mu_summary": {
3385+ "active_count": 41,
3386+ "total_activation": 11.1426,
3387+ "top_nodes": [
3388+ {
3389+ "node": "语文",
3390+ "value": 1.9801
3391+ },
3392+ {
3393+ "node": "数学",
3394+ "value": 0.8662
3395+ },
3396+ {
3397+ "node": "知道了开关的作用",
3398+ "value": 0.5933
3399+ },
3400+ {
3401+ "node": "在简单电路中需要开关",
3402+ "value": 0.4959
3403+ },
3404+ {
3405+ "node": "知道了简单电路的连接方式",
3406+ "value": 0.4959
3407+ }
3408+ ]
3409+ },
3410+ "J_summary": {
3411+ "edge_count": 466,
3412+ "total_flow": 30.3394,
3413+ "top_flows": [
3414+ {
3415+ "edge": "科学->拓展与应用",
3416+ "flow": 0.9419
3417+ },
3418+ {
3419+ "edge": "科学->单元回顾",
3420+ "flow": 0.3889
3421+ },
3422+ {
3423+ "edge": "1->语文",
3424+ "flow": 0.3341
3425+ },
3426+ {
3427+ "edge": "语文->本文由人民教育出版社小学语文室编写",
3428+ "flow": 0.3263
3429+ },
3430+ {
3431+ "edge": "语文->本文是自编课文",
3432+ "flow": 0.3251
3433+ }
3434+ ]
3435+ },
3436+ "active_region": [
3437+ "语文",
3438+ "数学",
3439+ "知道了开关的作用",
3440+ "在简单电路中需要开关"
3441+ ],
3442+ "active_region_size": 4,
3443+ "bound_ability_core": "语文",
3444+ "anchor_pull": 0.3941,
3445+ "drift_score": 1.0,
3446+ "free_capacity": 0.0,
3447+ "experience_regions": [
3448+ {
3449+ "region": "语文",
3450+ "nodes": [
3451+ "1",
3452+ "2",
3453+ "3",
3454+ "4",
3455+ "5",
3456+ "6",
3457+ "7",
3458+ "一二三四五上下",
3459+ "一片两片三四片",
3460+ "下面这些做",
3461+ "不久",
3462+ "个",
3463+ "了解他们是怎样解",
3464+ "人们经常用一些仪器",
3465+ "从第",
3466+ "儿歌作者王清秀",
3467+ "具有很大的破坏性",
3468+ "准确地检测设",
3469+ "前后左右东西南北",
3470+ "口耳目手足站坐",
3471+ "向有经验的人请教",
3472+ "哪些地方用到了电",
3473+ "在实际生产和生活中",
3474+ "在少的后面画",
3475+ "在日常生活中",
3476+ "在简单电路中需要开关",
3477+ "填一填",
3478+ "如何",
3479+ "如果不注意安全用电",
3480+ "如果手电筒不能正常工作了",
3481+ "对人们的生产生活和生命安全具有较大威胁",
3482+ "小白兔和小灰兔",
3483+ "小白兔回到家里",
3484+ "展示台",
3485+ "并把它连接到电路中",
3486+ "应该怎样检查它的电路故障",
3487+ "快速",
3488+ "想一想",
3489+ "想办法用身边的材料制作一个开关",
3490+ "我也喜欢踢毽子",
3491+ "我们一起来做游戏吧",
3492+ "我还会说",
3493+ "拓展与应用",
3494+ "控制小灯泡的亮和灭",
3495+ "摆一摆",
3496+ "数学",
3497+ "断开接通",
3498+ "旅行",
3499+ "日月水火山石田禾",
3500+ "春天来了",
3501+ "有几种放法",
3502+ "本单元通过观察我们一日三餐吃了哪些食物",
3503+ "本单元通过食盐和白糖的溶解实验",
3504+ "本文作者夏辇生",
3505+ "本文作者嵇鸿",
3506+ "本文作者是苏联的阿",
3507+ "本文作者窦植",
3508+ "本文作者胡木仁",
3509+ "本文作者龚艺兵",
3510+ "本文是自编课文",
3511+ "本文根据",
3512+ "本文根据壮族民歌改写",
3513+ "本文由人民教育出版社小学语文室编写",
3514+ "本文选自人民教育出版社",
3515+ "本文选自北京师范大学出版社",
3516+ "本章学习了简单电路",
3517+ "用电调查",
3518+ "电与我们的生活关系非常密切",
3519+ "知道了开关的作用",
3520+ "知道了简单电路的连接方式",
3521+ "科学",
3522+ "科学实践",
3523+ "观察哪些能使小灯泡",
3524+ "语文园地三",
3525+ "语文第一册",
3526+ "请问",
3527+ "读书真快乐",
3528+ "课文",
3529+ "谁的样",
3530+ "调查家中哪些地方用到了电",
3531+ "通过分析比较",
3532+ "通过实验",
3533+ "雷电是一种大气中的剧烈放电现象"
3534+ ],
3535+ "stage": "ability_core",
3536+ "activation": 7.7685,
3537+ "potential": 50.2825,
3538+ "candidate_score": 255.4401,
3539+ "stable_steps": 119
3540+ },
3541+ {
3542+ "region": "1",
3543+ "nodes": [
3544+ "50毫升水能溶解多少食盐",
3545+ "9",
3546+ "了解营养平衡膳食宝塔",
3547+ "分析电路出了什么故障",
3548+ "右说一说",
3549+ "吃一口馒头",
3550+ "哪些因素会影响方糖的溶解",
3551+ "哪些材料容易导电",
3552+ "天地人",
3553+ "开关为什么能控制电路呢",
3554+ "想办法用下面的材料点亮小灯泡",
3555+ "我们如何将它们分离开呢",
3556+ "手电筒是怎样发光的",
3557+ "把不同的材料分别连接在电路中",
3558+ "把左边的4",
3559+ "拆开一个手电筒",
3560+ "按顺序填数",
3561+ "昨天我们早餐",
3562+ "条",
3563+ "比少",
3564+ "点亮小灯泡",
3565+ "电的应用非常广泛",
3566+ "秋天",
3567+ "等于3",
3568+ "观察一个有故障的电",
3569+ "观察一种",
3570+ "识字",
3571+ "试一试",
3572+ "说一说左边的加法",
3573+ "这两份午餐的营养搭配合理吗"
3574+ ],
3575+ "stage": "skill_belt",
3576+ "activation": 0.1172,
3577+ "potential": 4.4003,
3578+ "candidate_score": 92.3705,
3579+ "stable_steps": 72
3580+ },
3581+ {
3582+ "region": "科学",
3583+ "nodes": [
3584+ "25",
3585+ "了解消化器官的功能",
3586+ "制作一个简易电路检测器",
3587+ "制作浓盐水",
3588+ "单元回顾",
3589+ "安全用电",
3590+ "家庭用电第三单元",
3591+ "对照人体消化器官示意图",
3592+ "将开关连接在电路中",
3593+ "小明的手电筒不亮了",
3594+ "开关",
3595+ "怎样加快溶解",
3596+ "怎样把食盐中的沙子分离出来",
3597+ "我们来设计一份营养平衡的一日食谱",
3598+ "把它们分离",
3599+ "把盐析出来",
3600+ "毫升水能溶解多少白糖",
3601+ "溶解与分离第二单元",
3602+ "点燃酒精灯要用火柴",
3603+ "电与我们",
3604+ "盐和糖的溶解",
3605+ "第三单元家庭用电",
3606+ "观察小灯泡和电池",
3607+ "观察手电筒",
3608+ "调查一天中吃了哪些食物",
3609+ "这些食物是从哪里来的",
3610+ "食物与消化第一单元",
3611+ "食物的消化",
3612+ "食物的营养",
3613+ "饮食与健康"
3614+ ],
3615+ "stage": "skill_belt",
3616+ "activation": 0.5334,
3617+ "potential": 5.0465,
3618+ "candidate_score": 88.562,
3619+ "stable_steps": 40
3620+ },
3621+ {
3622+ "region": "数学",
3623+ "nodes": [
3624+ "下面的物品放在什么位置合适",
3625+ "你能发现什么",
3626+ "共有3",
3627+ "只",
3628+ "哪两张卡片上的点子数相加得",
3629+ "图中还可以比什么",
3630+ "在的上面",
3631+ "怎样搭呢",
3632+ "怎样解答",
3633+ "我的多",
3634+ "我说你猜",
3635+ "把",
3636+ "摆",
3637+ "摸摸你的左耳",
3638+ "明明获得第名",
3639+ "涂一涂",
3640+ "班小明小华小刚小玉小云",
3641+ "生活中的数学",
3642+ "用",
3643+ "用自己的方式表示1",
3644+ "练习九",
3645+ "练习十",
3646+ "编者",
3647+ "读作",
3648+ "长方体"
3649+ ],
3650+ "stage": "skill_belt",
3651+ "activation": 0.0,
3652+ "potential": 2.3413,
3653+ "candidate_score": 75.7112,
3654+ "stable_steps": 80
3655+ },
3656+ {
3657+ "region": "2",
3658+ "nodes": [
3659+ "54",
3660+ "一二三四五",
3661+ "从左边数",
3662+ "任意指一道算式",
3663+ "再数一数其他事物",
3664+ "加",
3665+ "只小鸟圈起来",
3666+ "小小的船",
3667+ "怎样使一块方糖在水中尽快溶解",
3668+ "我上学了",
3669+ "我是小学生",
3670+ "排第",
3671+ "是怎样排列的",
3672+ "根据记录进行讨",
3673+ "比较能使小灯泡亮起来的连接方法",
3674+ "禁止向燃着的酒精灯里添加酒精",
3675+ "绝对禁止用酒精灯引燃另一只酒精灯",
3676+ "观看时眼睛与刻度线相平",
3677+ "语文",
3678+ "通过实验进行研究"
3679+ ],
3680+ "stage": "ability_core",
3681+ "activation": 1.9801,
3682+ "potential": 21.9178,
3683+ "candidate_score": 65.4951,
3684+ "stable_steps": 120
3685+ },
3686+ {
3687+ "region": "3",
3688+ "nodes": [
3689+ "为了保护消化器官",
3690+ "减号",
3691+ "分析小灯泡不亮的连接方法",
3692+ "只小鸟涂上颜色",
3693+ "它们有什么共同点",
3694+ "往蒸发皿里倒入少许浓盐水",
3695+ "快速地说出得数",
3696+ "用完酒精灯",
3697+ "用简易电路检测器查找出电路故障",
3698+ "看图说一说算式表示的意思",
3699+ "看谁搭得又稳又高",
3700+ "要用所有的积木搭",
3701+ "说一说设计的理由",
3702+ "里可以填几"
3703+ ],
3704+ "stage": "skill_belt",
3705+ "activation": 0.0,
3706+ "potential": 1.5791,
3707+ "candidate_score": 42.1747,
3708+ "stable_steps": 61
3709+ }
3710+ ],
3711+ "experience_regions_count": 6,
3712+ "skill_belt_candidates": [
3713+ {
3714+ "node": "科学",
3715+ "score": 5.29,
3716+ "stage": "ability_core",
3717+ "flow": 4.2637,
3718+ "stable_steps": 40,
3719+ "touches": 80,
3720+ "target_core": "语文"
3721+ },
3722+ {
3723+ "node": "语文",
3724+ "score": 5.29,
3725+ "stage": "ability_core",
3726+ "flow": 4.8922,
3727+ "stable_steps": 120,
3728+ "touches": 99,
3729+ "target_core": "2"
3730+ },
3731+ {
3732+ "node": "1",
3733+ "score": 5.24,
3734+ "stage": "ability_core",
3735+ "flow": 3.3475,
3736+ "stable_steps": 78,
3737+ "touches": 92,
3738+ "target_core": "语文"
3739+ },
3740+ {
3741+ "node": "数学",
3742+ "score": 5.1043,
3743+ "stage": "ability_core",
3744+ "flow": 1.4525,
3745+ "stable_steps": 80,
3746+ "touches": 95,
3747+ "target_core": "语文"
3748+ },
3749+ {
3750+ "node": "拓展与应用",
3751+ "score": 5.0524,
3752+ "stage": "ability_core",
3753+ "flow": 2.2744,
3754+ "stable_steps": 35,
3755+ "touches": 19,
3756+ "target_core": "语文"
3757+ },
3758+ {
3759+ "node": "3",
3760+ "score": 4.9311,
3761+ "stage": "ability_core",
3762+ "flow": 1.0836,
3763+ "stable_steps": 70,
3764+ "touches": 52,
3765+ "target_core": "语文"
3766+ }
3767+ ],
3768+ "skill_belt_candidates_count": 6,
3769+ "sedimentation_trace_count": 20,
3770+ "merge_events_count": 12,
3771+ "decay_events_count": 24,
3772+ "output_mode": "minimal",
3773+ "feedback_effect": {
3774+ "source": "emit",
3775+ "mode": "minimal",
3776+ "queued_tokens": [
3777+ "语文"
3778+ ],
3779+ "queued_strength": 0.22,
3780+ "confidence_proxy": 0.2826,
3781+ "queued_step": 120,
3782+ "last_applied_step": null
3783+ },
3784+ "phi_range": {
3785+ "min": 0.0156,
3786+ "max": 20.1786
3787+ }
3788+ },
3789+ "surfaced_tokens_or_phrases": {
3790+ "emit_output": "minimal: 语文",
3791+ "active_region": [
3792+ "语文",
3793+ "数学",
3794+ "知道了开关的作用",
3795+ "在简单电路中需要开关"
3796+ ],
3797+ "phi_top_nodes": [
3798+ "语文",
3799+ "数学",
3800+ "科学",
3801+ "1",
3802+ "4"
3803+ ],
3804+ "mu_top_nodes": [
3805+ "语文",
3806+ "数学",
3807+ "知道了开关的作用",
3808+ "在简单电路中需要开关",
3809+ "知道了简单电路的连接方式"
3810+ ],
3811+ "top_flow_edges": [
3812+ "科学->拓展与应用",
3813+ "科学->单元回顾",
3814+ "1->语文",
3815+ "语文->本文由人民教育出版社小学语文室编写",
3816+ "语文->本文是自编课文"
3817+ ]
3818+ },
3819+ "fairness_note": "Branch B passed this same anchor-conditioned three-subject composition at the stability ceiling. Branch A kept subject anchors ['语文', '数学', '科学'] visible, but emit_output='minimal: 语文' and the run stayed in minimal mode.",
3820+ "metric_alignment_notes": [
3821+ {
3822+ "metric": "attention_used_total",
3823+ "status": "N/A",
3824+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
3825+ },
3826+ {
3827+ "metric": "activated_output_payload",
3828+ "status": "STRUCTURAL MISMATCH",
3829+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
3830+ },
3831+ {
3832+ "metric": "bigram_emergent_words_and_circuits",
3833+ "status": "STRUCTURAL MISMATCH",
3834+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
3835+ }
3836+ ],
3837+ "stability_checks": {
3838+ "all_finite": true,
3839+ "within_branch_b_stability_ceiling": false,
3840+ "phi_range": {
3841+ "min": 0.0156,
3842+ "max": 20.1786
3843+ }
3844+ }
3845+ },
3846+ {
3847+ "scenario_name": "all_in_one_5subjects",
3848+ "status": "PASS",
3849+ "reason": "All five audited splits were fed through Branch A on the same 250-record schedule and stayed within the mirrored ±10.1 ceiling.",
3850+ "input_files": [
3851+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/语文.jsonl",
3852+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/小学/数学.jsonl",
3853+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/语文.jsonl",
3854+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/初中/数学.jsonl",
3855+ "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28/splits/by_stage_subject/高中/语文.jsonl"
3856+ ],
3857+ "input_files_fully_hydrated": true,
3858+ "input_line_counts": [
3859+ 1597,
3860+ 7459,
3861+ 961,
3862+ 5559,
3863+ 694
3864+ ],
3865+ "record_counts": {
3866+ "loaded_records_per_input": [
3867+ 50,
3868+ 50,
3869+ 50,
3870+ 50,
3871+ 50
3872+ ],
3873+ "fed_records": 250
3874+ },
3875+ "branch_b_reference": {
3876+ "status": "PASS",
3877+ "summary": "fed=250, nodes=1373, edges=6863, phi_max=0.198, words=单元/阅读/语文/第一/一单",
3878+ "top_words": [
3879+ "单元",
3880+ "阅读",
3881+ "语文",
3882+ "第一",
3883+ "一单"
3884+ ]
3885+ },
3886+ "branch_a_observables": {
3887+ "phi_summary": {
3888+ "node_count": 304,
3889+ "total_potential": 62.8682,
3890+ "top_nodes": [
3891+ {
3892+ "node": "1",
3893+ "value": 5.6672
3894+ },
3895+ {
3896+ "node": "语文必修上册",
3897+ "value": 3.8102
3898+ },
3899+ {
3900+ "node": "2",
3901+ "value": 2.3673
3902+ },
3903+ {
3904+ "node": "第二单元",
3905+ "value": 2.1107
3906+ },
3907+ {
3908+ "node": "第一单元",
3909+ "value": 1.8546
3910+ }
3911+ ]
3912+ },
3913+ "mu_summary": {
3914+ "active_count": 52,
3915+ "total_activation": 13.5318,
3916+ "top_nodes": [
3917+ {
3918+ "node": "1",
3919+ "value": 2.6191
3920+ },
3921+ {
3922+ "node": "第二单元",
3923+ "value": 0.6567
3924+ },
3925+ {
3926+ "node": "科学队长",
3927+ "value": 0.5132
3928+ },
3929+ {
3930+ "node": "钟扬可以称作",
3931+ "value": 0.5132
3932+ },
3933+ {
3934+ "node": "了",
3935+ "value": 0.512
3936+ }
3937+ ]
3938+ },
3939+ "J_summary": {
3940+ "edge_count": 412,
3941+ "total_flow": 30.686,
3942+ "top_flows": [
3943+ {
3944+ "edge": "1->2",
3945+ "flow": 0.6168
3946+ },
3947+ {
3948+ "edge": "1->5",
3949+ "flow": 0.595
3950+ },
3951+ {
3952+ "edge": "1->3",
3953+ "flow": 0.436
3954+ },
3955+ {
3956+ "edge": "1->计算",
3957+ "flow": 0.4015
3958+ },
3959+ {
3960+ "edge": "1->4",
3961+ "flow": 0.3622
3962+ }
3963+ ]
3964+ },
3965+ "active_region": [
3966+ "1",
3967+ "第二单元",
3968+ "科学队长",
3969+ "钟扬可以称作"
3970+ ],
3971+ "active_region_size": 4,
3972+ "bound_ability_core": "1",
3973+ "anchor_pull": 0.0,
3974+ "drift_score": 1.0,
3975+ "free_capacity": 0.0,
3976+ "experience_regions": [
3977+ {
3978+ "region": "1",
3979+ "nodes": [
3980+ "1",
3981+ "17",
3982+ "1964",
3983+ "1984",
3984+ "2017",
3985+ "3",
3986+ "39",
3987+ "5",
3988+ "7",
3989+ "8",
3990+ "9",
3991+ "一书中写道",
3992+ "下面各数哪些是正数",
3993+ "中午上升了1",
3994+ "也许那个时候",
3995+ "了",
3996+ "他扎根西藏",
3997+ "他曾用两年的业余",
3998+ "他都能挺身而出毫不含糊地阐明事实",
3999+ "他高兴地回",
4000+ "你们的服务态度真好",
4001+ "例如0",
4002+ "写出下列各数的倒数",
4003+ "几天后",
4004+ "分数",
4005+ "努力为人类建",
4006+ "劳动创造文明",
4007+ "化简",
4008+ "北京冬季里某一天的气温为",
4009+ "半夜又下降了9",
4010+ "又拿出几块用小纸袋装好",
4011+ "口算",
4012+ "台儿沟的姑娘们刚把晚饭端上桌就慌了神",
4013+ "同时也是一个凡人",
4014+ "后边的也嚷道",
4015+ "嘱咐道",
4016+ "在幼苗靠近土壤的茎节上生出分枝",
4017+ "在独立开展杂交水稻研究很长时间之后",
4018+ "塞进孩子的衣兜里",
4019+ "她准是遇到了什么不顺心的事",
4020+ "好像在默默地向大山诉说着自己的虔诚",
4021+ "孩",
4022+ "对中小学生来说",
4023+ "帕尔伯格在他",
4024+ "年",
4025+ "年来无人涉足",
4026+ "张秉贵也随着她向柜台东头走去",
4027+ "当他累得额头渗满汗珠",
4028+ "当然要做梦",
4029+ "您先擦擦汗",
4030+ "我越是要热情接待她",
4031+ "把一大包枣和梨放在柜台上",
4032+ "把下面的有理数填在相应的大括号里",
4033+ "把剩下的糖果包捆结实递给顾客",
4034+ "把3换成0",
4035+ "按钟扬的话说",
4036+ "改变人类的命运",
4037+ "无视事实的事",
4038+ "有改动",
4039+ "有时",
4040+ "植物资源被严重低估",
4041+ "正数大于0",
4042+ "水稻的一类",
4043+ "沁园春",
4044+ "湖南邵阳人",
4045+ "版",
4046+ "牛舌饼",
4047+ "生产",
4048+ "的奠基人",
4049+ "科学队长",
4050+ "稻",
4051+ "第一单元",
4052+ "第二单元",
4053+ "等待着有一天",
4054+ "结果保留两位小数",
4055+ "美国学者唐",
4056+ "老同志",
4057+ "胖",
4058+ "至于是不是得担风险",
4059+ "袁隆",
4060+ "袁隆平使",
4061+ "袁隆平才从国外资料中了解到",
4062+ "袁隆平是一位世界级的伟大科学家",
4063+ "计算",
4064+ "计算0",
4065+ "语文必修上册",
4066+ "走向丰衣足食的世界",
4067+ "越是这样",
4068+ "边走边想",
4069+ "这些种子静静地沉睡在一个又一个玻璃罐里",
4070+ "这位女顾客又来到柜台前",
4071+ "那么你认为下面的空格应填写什么数",
4072+ "那时",
4073+ "钟扬",
4074+ "钟扬可以称作",
4075+ "钟扬在西藏",
4076+ "钟扬曾在一次公开演讲中这样介绍",
4077+ "钟扬被分配到中国科学院武汉植物研究所工作",
4078+ "顾客就按住秤盘说",
4079+ "顾客感激地说",
4080+ "饥饿的威胁在退却",
4081+ "2有理数",
4082+ "3有理数的加减法",
4083+ "4有理数的乘除法",
4084+ "5有理数的乘方",
4085+ "7",
4086+ "8"
4087+ ],
4088+ "stage": "ability_core",
4089+ "activation": 8.8044,
4090+ "potential": 32.345,
4091+ "candidate_score": 307.3775,
4092+ "stable_steps": 220
4093+ },
4094+ {
4095+ "region": "语文必修上册",
4096+ "nodes": [
4097+ "两人闷坐了一会儿",
4098+ "力的回答",
4099+ "和你的乐声相比",
4100+ "和尊敬",
4101+ "喜看稻菽千重浪",
4102+ "好这位同志呀",
4103+ "如今",
4104+ "学写诗歌",
4105+ "得那样深沉",
4106+ "柜台走",
4107+ "欣慰的事吗",
4108+ "死母本花朵中雄蕊的措施",
4109+ "永远歌唱着飞翔",
4110+ "爱说话是她的天性",
4111+ "用计算机技术研究植物学问题",
4112+ "真没治",
4113+ "立在地球边上放号",
4114+ "车上",
4115+ "那",
4116+ "香雪想快点跑过去"
4117+ ],
4118+ "stage": "skill_belt",
4119+ "activation": 0.3252,
4120+ "potential": 3.879,
4121+ "candidate_score": 62.1191,
4122+ "stable_steps": 44
4123+ },
4124+ {
4125+ "region": "2",
4126+ "nodes": [
4127+ "一般地",
4128+ "习题1",
4129+ "书书书",
4130+ "例如",
4131+ "图1",
4132+ "0",
4133+ "1",
4134+ "1正数和负数",
4135+ "2",
4136+ "3",
4137+ "4",
4138+ "5",
4139+ "6"
4140+ ],
4141+ "stage": "ability_core",
4142+ "activation": 3.1686,
4143+ "potential": 14.7568,
4144+ "candidate_score": 56.2438,
4145+ "stable_steps": 100
4146+ },
4147+ {
4148+ "region": "7",
4149+ "nodes": [
4150+ "0",
4151+ "2",
4152+ "4",
4153+ "6",
4154+ "82",
4155+ "一共有缸",
4156+ "个分成两堆",
4157+ "他连续",
4158+ "只",
4159+ "图里有什么",
4160+ "填一填",
4161+ "山海",
4162+ "散文诗二首",
4163+ "有几种分法",
4164+ "根摆一摆",
4165+ "用",
4166+ "练习十",
4167+ "阅读第二单元"
4168+ ],
4169+ "stage": "experience",
4170+ "activation": 0.4386,
4171+ "potential": 0.2724,
4172+ "candidate_score": 50.6499,
4173+ "stable_steps": 162
4174+ },
4175+ {
4176+ "region": "2",
4177+ "nodes": [
4178+ "20021204",
4179+ "两个负数",
4180+ "乘方",
4181+ "产生数1",
4182+ "例5的运算过程也可以简单地写为",
4183+ "其中9个格中的点数分别是1",
4184+ "即",
4185+ "它们的积是正的还是负的",
4186+ "有理数",
4187+ "有理数的除法",
4188+ "某年",
4189+ "正数大于负数",
4190+ "29",
4191+ "59",
4192+ "9"
4193+ ],
4194+ "stage": "skill_belt",
4195+ "activation": 0.0,
4196+ "potential": 0.6808,
4197+ "candidate_score": 45.4331,
4198+ "stable_steps": 98
4199+ },
4200+ {
4201+ "region": "第一单元",
4202+ "nodes": [
4203+ "11",
4204+ "13",
4205+ "15",
4206+ "19",
4207+ "21",
4208+ "23",
4209+ "25",
4210+ "27",
4211+ "29"
4212+ ],
4213+ "stage": "experience",
4214+ "activation": 0.0,
4215+ "potential": 1.1865,
4216+ "candidate_score": 27.3443,
4217+ "stable_steps": 31
4218+ }
4219+ ],
4220+ "experience_regions_count": 6,
4221+ "skill_belt_candidates": [
4222+ {
4223+ "node": "1",
4224+ "score": 5.24,
4225+ "stage": "ability_core",
4226+ "flow": 6.3569,
4227+ "stable_steps": 99,
4228+ "touches": 83,
4229+ "target_core": "2"
4230+ },
4231+ {
4232+ "node": "语文必修上册",
4233+ "score": 5.1961,
4234+ "stage": "ability_core",
4235+ "flow": 2.6345,
4236+ "stable_steps": 49,
4237+ "touches": 58,
4238+ "target_core": "1"
4239+ },
4240+ {
4241+ "node": "第二单元",
4242+ "score": 5.0418,
4243+ "stage": "ability_core",
4244+ "flow": 1.9409,
4245+ "stable_steps": 50,
4246+ "touches": 22,
4247+ "target_core": "1"
4248+ },
4249+ {
4250+ "node": "2",
4251+ "score": 4.9748,
4252+ "stage": "ability_core",
4253+ "flow": 1.2117,
4254+ "stable_steps": 98,
4255+ "touches": 103,
4256+ "target_core": "2"
4257+ },
4258+ {
4259+ "node": "5",
4260+ "score": 4.8806,
4261+ "stage": "ability_core",
4262+ "flow": 1.0354,
4263+ "stable_steps": 95,
4264+ "touches": 65,
4265+ "target_core": "2"
4266+ },
4267+ {
4268+ "node": "3",
4269+ "score": 4.8705,
4270+ "stage": "ability_core",
4271+ "flow": 0.9162,
4272+ "stable_steps": 89,
4273+ "touches": 59,
4274+ "target_core": "2"
4275+ }
4276+ ],
4277+ "skill_belt_candidates_count": 6,
4278+ "sedimentation_trace_count": 20,
4279+ "merge_events_count": 12,
4280+ "decay_events_count": 24,
4281+ "output_mode": "minimal",
4282+ "feedback_effect": {
4283+ "source": "emit",
4284+ "mode": "minimal",
4285+ "queued_tokens": [
4286+ "1"
4287+ ],
4288+ "queued_strength": 0.22,
4289+ "confidence_proxy": 0.3352,
4290+ "queued_step": 250,
4291+ "last_applied_step": null
4292+ },
4293+ "phi_range": {
4294+ "min": 0.0009,
4295+ "max": 5.6672
4296+ }
4297+ },
4298+ "surfaced_tokens_or_phrases": {
4299+ "emit_output": "minimal: 1",
4300+ "active_region": [
4301+ "1",
4302+ "第二单元",
4303+ "科学队长",
4304+ "钟扬可以称作"
4305+ ],
4306+ "phi_top_nodes": [
4307+ "1",
4308+ "语文必修上册",
4309+ "2",
4310+ "第二单元",
4311+ "第一单元"
4312+ ],
4313+ "mu_top_nodes": [
4314+ "1",
4315+ "第二单元",
4316+ "科学队长",
4317+ "钟扬可以称作",
4318+ "了"
4319+ ],
4320+ "top_flow_edges": [
4321+ "1->2",
4322+ "1->5",
4323+ "1->3",
4324+ "1->计算",
4325+ "1->4"
4326+ ]
4327+ },
4328+ "fairness_note": "Branch B passed this exact 250-record all-in-one composition with a larger graph and cleaner subject-word surface. Branch A completed the same split family, but emit_output='minimal: 1' and output_mode=minimal.",
4329+ "metric_alignment_notes": [
4330+ {
4331+ "metric": "attention_used_total",
4332+ "status": "N/A",
4333+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
4334+ },
4335+ {
4336+ "metric": "activated_output_payload",
4337+ "status": "STRUCTURAL MISMATCH",
4338+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload."
4339+ },
4340+ {
4341+ "metric": "bigram_emergent_words_and_circuits",
4342+ "status": "STRUCTURAL MISMATCH",
4343+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
4344+ }
4345+ ],
4346+ "stability_checks": {
4347+ "all_finite": true,
4348+ "within_branch_b_stability_ceiling": true,
4349+ "phi_range": {
4350+ "min": 0.0009,
4351+ "max": 5.6672
4352+ }
4353+ }
4354+ }
4355+ ],
4356+ "overall_summary": {
4357+ "scenario_count": 8,
4358+ "status_counts": {
4359+ "PASS": 6,
4360+ "FAIL": 2,
4361+ "STRUCTURAL MISMATCH": 0,
4362+ "N/A": 0
4363+ },
4364+ "failed_scenarios": [
4365+ "初中数学_pipeline+stability",
4366+ "cross_subject_小学"
4367+ ],
4368+ "structural_mismatch_scenarios": [],
4369+ "same_hydrated_formal_dataset_family_as_branch_b": true,
4370+ "fairness_gap_materially_reduced": true,
4371+ "materially_changes_earlier_ab_recommendation": false,
4372+ "current_recommendation": "keep Branch B kernel, absorb Branch A shell/reporting ideas",
4373+ "summary": "Branch A has now been executed on the same hydrated cie-datasets formal split family and the same eight-scenario family used by the audited Branch B strict rerun. The fairness gap is materially reduced, but Branch A still shows two stability failures under the mirrored ceiling and multiple surface mismatches, so the earlier 'kernel from B, shell from A' recommendation does not change."
4374+ },
4375+ "structural_mismatches": [
4376+ {
4377+ "scope": "global",
4378+ "metric": "activated_output_payload",
4379+ "status": "STRUCTURAL MISMATCH",
4380+ "detail": "Branch A emit() exposes a string plus snapshot summaries, not Branch B's structured activated-node payload."
4381+ },
4382+ {
4383+ "scope": "global",
4384+ "metric": "bigram_emergent_words_and_circuits",
4385+ "status": "STRUCTURAL MISMATCH",
4386+ "detail": "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows."
4387+ },
4388+ {
4389+ "scope": "global",
4390+ "metric": "attention_used_total",
4391+ "status": "N/A",
4392+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total."
4393+ },
4394+ {
4395+ "scope": "subject_pipeline",
4396+ "metric": "post_feedback_snapshot_timing",
4397+ "status": "STRUCTURAL MISMATCH",
4398+ "detail": "Branch B's subject pipeline report observes immediate post-feedback state. Branch A feedback is queued, so the port steps once after commit_feedback() to record the applied effect."
4399+ }
4400+ ],
4401+ "known_limitations": [
4402+ "Branch A tokenizes each ingest call with a Unicode word regex and keeps at most 8 tokens, so the same JSONL records compress into a much coarser graph than Branch B's character/bigram surface.",
4403+ "Branch A remained in minimal output mode with zero free_capacity on all eight audited scenarios, so PASS here means stable execution on the same hydrated dataset family, not parity with Branch B's richer surface.",
4404+ "Branch A phi range is derived from runtime.state.phi for audit purposes because snapshot_state() does not expose phi min/max directly.",
4405+ "The strict rerun is validation-only: it does not modify cie/runtime.py, cie/state.py, cie/graph.py, cie/validation.py, or any Branch B runtime code."
4406+ ],
4407+ "recommendation": {
4408+ "decision": "does not change the current recommendation",
4409+ "kernel_from_b_shell_from_a": "unchanged",
4410+ "reason": "Branch A now has a fair same-dataset rerun record (6 PASS, 2 FAIL, 0 STRUCTURAL MISMATCH, 0 N/A at the scenario level), but it still does not match Branch B's formal validation cleanliness or observable richness. The earlier recommendation to keep the kernel from Branch B and absorb the shell/reporting strengths from Branch A remains the most defensible conclusion."
4411+ },
4412+ "branch_under_test": "review/branch-a-formal-dataset-strict-rerun",
4413+ "script_used": "tests/formal_validation_branch_a_strict_rerun.py",
4414+ "script_provenance": {
4415+ "scenario_template_from": "c11091603a6b60e7d459e77dffd9f8a2ee1c0776:tests/formal_validation_strict_rerun.py",
4416+ "reference_report_from": "c11091603a6b60e7d459e77dffd9f8a2ee1c0776:reports/2026-03-31_branch_b_formal_validation_strict_rerun.md",
4417+ "port_path": "tests/formal_validation_branch_a_strict_rerun.py",
4418+ "runtime_under_test": "cie runtime exactly as frozen at 419ae8d39150806011c1eb6082c7fc8c6a337735"
4419+ },
4420+ "compatibility_deviations": [
4421+ {
4422+ "type": "output_surface",
4423+ "detail": "Branch A emit() returns a plain string, not Branch B's structured activated-node payload. This port records emit_output plus Branch A's actual active_region/top-node observables instead."
4424+ },
4425+ {
4426+ "type": "feedback_timing",
4427+ "detail": "The subject pipeline scenarios use the first active_region node as the closest honest Branch A feedback target and then run one additional step after commit_feedback(), because Branch A applies feedback on the next step."
4428+ },
4429+ {
4430+ "type": "metric_alignment",
4431+ "detail": "Branch A has no Branch-B-style bigram/circuit extractor and no attention used/total counters; those metrics are reported as STRUCTURAL MISMATCH or N/A rather than fabricated."
4432+ }
4433+ ],
4434+ "rerun_performed": true,
4435+ "rerun_command": "python3 tests/formal_validation_branch_a_strict_rerun.py"
4436+}
1@@ -0,0 +1,71 @@
2+# Branch A formal validation strict rerun
3+
4+## 1. purpose
5+
6+Run frozen Branch A on the same hydrated `/Users/george/code/cie-datasets` formal split family and the same eight-scenario family used by the audited Branch B strict rerun, without modifying Branch A runtime behavior.
7+
8+## 2. Branch A base commit
9+
10+- Branch A base commit: `419ae8d39150806011c1eb6082c7fc8c6a337735`
11+- Branch under test: `review/branch-a-formal-dataset-strict-rerun`
12+- Script used: `tests/formal_validation_branch_a_strict_rerun.py`
13+
14+## 3. Branch B reference strict rerun basis
15+
16+- Reference commit: `c11091603a6b60e7d459e77dffd9f8a2ee1c0776`
17+- Reference script: `c11091603a6b60e7d459e77dffd9f8a2ee1c0776:tests/formal_validation_strict_rerun.py`
18+- Reference report: `c11091603a6b60e7d459e77dffd9f8a2ee1c0776:reports/2026-03-31_branch_b_formal_validation_strict_rerun.md`
19+- Compatibility deviations:
20+- Branch A emit() returns a plain string, not Branch B's structured activated-node payload. This port records emit_output plus Branch A's actual active_region/top-node observables instead.
21+- The subject pipeline scenarios use the first active_region node as the closest honest Branch A feedback target and then run one additional step after commit_feedback(), because Branch A applies feedback on the next step.
22+- Branch A has no Branch-B-style bigram/circuit extractor and no attention used/total counters; those metrics are reported as STRUCTURAL MISMATCH or N/A rather than fabricated.
23+
24+## 4. dataset hydration check
25+
26+- Dataset repo: `/Users/george/code/cie-datasets`
27+- Git LFS: `git-lfs/3.7.1 (GitHub; darwin arm64; go 1.25.3)`
28+- Hydration status: `SUCCESS`
29+
30+| Stage | Subject | Relative path | Hydrated | Line count | First line preview |
31+|---|---|---|---|---:|---|
32+| 小学 | 语文 | `splits/by_stage_subject/小学/语文.jsonl` | yes | 1597 | {"concept": "教材非正文页", "layer": "boundary |
33+| 小学 | 数学 | `splits/by_stage_subject/小学/数学.jsonl` | yes | 7459 | {"concept": "教材非正文页", "layer": "boundary |
34+| 小学 | 科学 | `splits/by_stage_subject/小学/科学.jsonl` | yes | 5032 | {"concept": "教材非正文页", "layer": "boundary |
35+| 初中 | 语文 | `splits/by_stage_subject/初中/语文.jsonl` | yes | 961 | {"concept": "教材非正文页", "layer": "boundary |
36+| 初中 | 数学 | `splits/by_stage_subject/初中/数学.jsonl` | yes | 5559 | {"concept": "教材非正文页", "layer": "boundary |
37+| 高中 | 语文 | `splits/by_stage_subject/高中/语文.jsonl` | yes | 694 | {"concept": "教材非正文页", "layer": "boundary |
38+
39+## 5. scenario-by-scenario result table
40+
41+| Scenario | Status | Records fed | Branch B strict rerun reference | Branch A observed | Fairness note |
42+|---|---|---:|---|---|---|
43+| 小学语文_pipeline+stability | PASS | 200 | nodes=1268, edges=6338, phi=[-0.113,0.598], mode=full, words=课文/本文/改动/有改/什么 | phi_nodes=272, mu_active=43, J_edges=328, phi=[0.0037,5.5287], mode=minimal | Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['选作课文时有改动', '王老师急得直跺脚', '词句段运用'] and emit_output='minimal: 选作课文时有改动'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser. |
44+| 小学数学_pipeline+stability | PASS | 200 | nodes=803, edges=4367, phi=[-8.300,0.579], mode=full, words=多少/一共/什么/下面/可以 | phi_nodes=413, mu_active=35, J_edges=628, phi=[0.0017,7.7346], mode=minimal | Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['1', '个', '分一分'] and emit_output='minimal: 1'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser. |
45+| 初中语文_pipeline+stability | PASS | 200 | nodes=1702, edges=9372, phi=[-0.085,0.553], mode=full, words=阅读/单元/读第/我们/第一 | phi_nodes=337, mu_active=39, J_edges=380, phi=[0.0003,7.7871], mode=minimal | Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['①', '综合性学习', '保天下者'] and emit_output='minimal: ①'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser. |
46+| 初中数学_pipeline+stability | FAIL | 200 | nodes=886, edges=5437, phi=[-0.045,0.479], mode=full, words=方程/可以/图形/问题/我们 | phi_nodes=396, mu_active=32, J_edges=629, phi=[0.0007,12.0662], mode=minimal | Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['1', '2', '进而从图象探索二次函数的性质'] and emit_output='minimal: 1'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser. |
47+| 高中语文_pipeline+stability | PASS | 200 | nodes=1857, edges=9983, phi=[-8.266,0.239], mode=full, words=单元/语文/必修/文必/上册 | phi_nodes=304, mu_active=34, J_edges=335, phi=[0.0094,5.1234], mode=minimal | Branch B passed this scenario in full mode with subject-specific emergent words. Branch A surfaced ['语文必修上册', '语文必修下册', '书中超空间旅行的发现推动了一个银河帝国的兴起'] and emit_output='minimal: 语文必修上册'; the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser. |
48+| cross_stage_语文 | PASS | 120 | nodes=1128, edges=4139, phi_max=0.126, words=单元/阅读/第一/语文/一单 | phi_nodes=261, mu_active=50, J_edges=309, phi=[0.0037,4.5744], mode=minimal | Branch B passed this same 120-record cross-stage composition with stronger subject-word emergence. Branch A completed the same split family but stayed in minimal mode and surfaced ['①', '第二单元', '当然要做梦']. |
49+| cross_subject_小学 | FAIL | 120 | nodes=708, edges=2866, phi_max=10.000, cores=21, words=食物/怎样/什么/我们/本文 | phi_nodes=301, mu_active=41, J_edges=466, phi=[0.0156,20.1786], mode=minimal | Branch B passed this same anchor-conditioned three-subject composition at the stability ceiling. Branch A kept subject anchors ['语文', '数学', '科学'] visible, but emit_output='minimal: 语文' and the run stayed in minimal mode. |
50+| all_in_one_5subjects | PASS | 250 | fed=250, nodes=1373, edges=6863, phi_max=0.198, words=单元/阅读/语文/第一/一单 | phi_nodes=304, mu_active=52, J_edges=412, phi=[0.0009,5.6672], mode=minimal | Branch B passed this exact 250-record all-in-one composition with a larger graph and cleaner subject-word surface. Branch A completed the same split family, but emit_output='minimal: 1' and output_mode=minimal. |
51+
52+## 6. structural mismatch section
53+
54+- `activated_output_payload` (STRUCTURAL MISMATCH): Branch A emit() exposes a string plus snapshot summaries, not Branch B's structured activated-node payload.
55+- `bigram_emergent_words_and_circuits` (STRUCTURAL MISMATCH): Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from emit_output, active_region, top phi nodes, top mu nodes, and top flows.
56+- `attention_used_total` (N/A): Branch A exposes free_capacity but not attention.used/attention.total.
57+- `post_feedback_snapshot_timing` (STRUCTURAL MISMATCH): Branch B's subject pipeline report observes immediate post-feedback state. Branch A feedback is queued, so the port steps once after commit_feedback() to record the applied effect.
58+
59+## 7. fairness interpretation
60+
61+- Branch A can now honestly be said to have been evaluated on the same hydrated formal dataset family as Branch B: `True`.
62+- Scenario counts: PASS=6, FAIL=2, STRUCTURAL MISMATCH=0, N/A=0.
63+- Branch A still differs materially in surface quality: every scenario stayed in minimal mode with zero free_capacity, and Branch-B-specific output/word/circuit metrics remain unavailable.
64+
65+## 8. does this reduce the final A/B fairness gap?
66+
67+- Yes. `True`. The last major fairness gap was unmatched formal-dataset coverage, and this rerun closes that gap materially.
68+
69+## 9. does this change the current A/B recommendation?
70+
71+- No. `unchanged`.
72+- Reason: Branch A now has a fair same-dataset rerun record (6 PASS, 2 FAIL, 0 STRUCTURAL MISMATCH, 0 N/A at the scenario level), but it still does not match Branch B's formal validation cleanliness or observable richness. The earlier recommendation to keep the kernel from Branch B and absorb the shell/reporting strengths from Branch A remains the most defensible conclusion.
1@@ -0,0 +1,16 @@
2+# Branch A formal validation strict rerun summary
3+
4+## What Was Run
5+- Frozen Branch A runtime from `419ae8d39150806011c1eb6082c7fc8c6a337735` on branch `review/branch-a-formal-dataset-strict-rerun`.
6+- Same hydrated cie-datasets formal split family used by the audited Branch B strict rerun under `/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28`.
7+- Same eight-scenario family: 小学语文_pipeline+stability, 小学数学_pipeline+stability, 初中语文_pipeline+stability, 初中数学_pipeline+stability, 高中语文_pipeline+stability, cross_stage_语文, cross_subject_小学, all_in_one_5subjects.
8+
9+## Outcome
10+- Succeeded: 小学语文_pipeline+stability, 小学数学_pipeline+stability, 初中语文_pipeline+stability, 高中语文_pipeline+stability, cross_stage_语文, all_in_one_5subjects
11+- Failed: 初中数学_pipeline+stability, cross_subject_小学
12+- Scenario-level structural mismatches: none
13+- Global structural mismatches/N/A metrics: activated_output_payload, bigram_emergent_words_and_circuits, attention_used_total, post_feedback_snapshot_timing
14+
15+## Recommendation
16+- The current 'kernel from B, shell from A' recommendation does not change.
17+- Reason: Branch A now has a fair same-dataset rerun record (6 PASS, 2 FAIL, 0 STRUCTURAL MISMATCH, 0 N/A at the scenario level), but it still does not match Branch B's formal validation cleanliness or observable richness. The earlier recommendation to keep the kernel from Branch B and absorb the shell/reporting strengths from Branch A remains the most defensible conclusion.
+1071,
-0
1@@ -0,0 +1,1071 @@
2+#!/usr/bin/env python3
3+"""Strict fairness rerun of frozen Branch A on the hydrated formal dataset family."""
4+
5+from __future__ import annotations
6+
7+import json
8+import math
9+import subprocess
10+import sys
11+from pathlib import Path
12+from typing import Any
13+
14+REPO_ROOT = Path(__file__).resolve().parents[1]
15+if str(REPO_ROOT) not in sys.path:
16+ sys.path.insert(0, str(REPO_ROOT))
17+
18+from cie import CIERuntime
19+
20+
21+BRANCH_A_BASE_COMMIT = "419ae8d39150806011c1eb6082c7fc8c6a337735"
22+BRANCH_B_REFERENCE_COMMIT = "c11091603a6b60e7d459e77dffd9f8a2ee1c0776"
23+BRANCH_B_REFERENCE_SCRIPT = "tests/formal_validation_strict_rerun.py"
24+BRANCH_B_REFERENCE_REPORT_MD = "reports/2026-03-31_branch_b_formal_validation_strict_rerun.md"
25+STRICT_RERUN_BRANCH = "review/branch-a-formal-dataset-strict-rerun"
26+
27+REPORT_JSON_PATH = REPO_ROOT / "reports/2026-03-31_branch_a_formal_validation_strict_rerun.json"
28+REPORT_MD_PATH = REPO_ROOT / "reports/2026-03-31_branch_a_formal_validation_strict_rerun.md"
29+REVIEW_MD_PATH = REPO_ROOT / "reviews/2026-03-31_branch_a_formal_validation_strict_rerun.md"
30+
31+DATASET_REPO = Path("/Users/george/code/cie-datasets")
32+DATASET_ROOT = DATASET_REPO / "china_text_book_md" / "v2026-03-28"
33+
34+REQUIRED_DATASET_PATHS = [
35+ ("小学", "语文"),
36+ ("小学", "数学"),
37+ ("小学", "科学"),
38+ ("初中", "语文"),
39+ ("初中", "数学"),
40+ ("高中", "语文"),
41+]
42+PRIMARY_COMBOS = [
43+ ("小学", "语文"),
44+ ("小学", "数学"),
45+ ("初中", "语文"),
46+ ("初中", "数学"),
47+ ("高中", "语文"),
48+]
49+SCENARIO_FAMILY = [
50+ "小学语文_pipeline+stability",
51+ "小学数学_pipeline+stability",
52+ "初中语文_pipeline+stability",
53+ "初中数学_pipeline+stability",
54+ "高中语文_pipeline+stability",
55+ "cross_stage_语文",
56+ "cross_subject_小学",
57+ "all_in_one_5subjects",
58+]
59+VALID_SCENARIO_STATUSES = ("PASS", "FAIL", "STRUCTURAL MISMATCH", "N/A")
60+STABILITY_LIMIT = 10.1
61+
62+REQUIRED_JSON_TOP_LEVEL_KEYS = (
63+ "branch_a_base_commit",
64+ "branch_b_reference_commit",
65+ "dataset_repo_status",
66+ "required_dataset_paths",
67+ "hydration_status",
68+ "scenario_family",
69+ "per_scenario_results",
70+ "overall_summary",
71+ "structural_mismatches",
72+ "known_limitations",
73+ "recommendation",
74+)
75+
76+COMPATIBILITY_DEVIATIONS = [
77+ {
78+ "type": "output_surface",
79+ "detail": (
80+ "Branch A emit() returns a plain string, not Branch B's structured activated-node payload. "
81+ "This port records emit_output plus Branch A's actual active_region/top-node observables instead."
82+ ),
83+ },
84+ {
85+ "type": "feedback_timing",
86+ "detail": (
87+ "The subject pipeline scenarios use the first active_region node as the closest honest Branch A feedback "
88+ "target and then run one additional step after commit_feedback(), because Branch A applies feedback on the next step."
89+ ),
90+ },
91+ {
92+ "type": "metric_alignment",
93+ "detail": (
94+ "Branch A has no Branch-B-style bigram/circuit extractor and no attention used/total counters; those metrics are "
95+ "reported as STRUCTURAL MISMATCH or N/A rather than fabricated."
96+ ),
97+ },
98+]
99+
100+BRANCH_B_REFERENCE = {
101+ "小学语文_pipeline+stability": {
102+ "status": "PASS",
103+ "summary": "nodes=1268, edges=6338, phi=[-0.113,0.598], mode=full, words=课文/本文/改动/有改/什么",
104+ "top_words": ["课文", "本文", "改动", "有改", "什么"],
105+ },
106+ "小学数学_pipeline+stability": {
107+ "status": "PASS",
108+ "summary": "nodes=803, edges=4367, phi=[-8.300,0.579], mode=full, words=多少/一共/什么/下面/可以",
109+ "top_words": ["多少", "一共", "什么", "下面", "可以"],
110+ },
111+ "初中语文_pipeline+stability": {
112+ "status": "PASS",
113+ "summary": "nodes=1702, edges=9372, phi=[-0.085,0.553], mode=full, words=阅读/单元/读第/我们/第一",
114+ "top_words": ["阅读", "单元", "读第", "我们", "第一"],
115+ },
116+ "初中数学_pipeline+stability": {
117+ "status": "PASS",
118+ "summary": "nodes=886, edges=5437, phi=[-0.045,0.479], mode=full, words=方程/可以/图形/问题/我们",
119+ "top_words": ["方程", "可以", "图形", "问题", "我们"],
120+ },
121+ "高中语文_pipeline+stability": {
122+ "status": "PASS",
123+ "summary": "nodes=1857, edges=9983, phi=[-8.266,0.239], mode=full, words=单元/语文/必修/文必/上册",
124+ "top_words": ["单元", "语文", "必修", "文必", "上册"],
125+ },
126+ "cross_stage_语文": {
127+ "status": "PASS",
128+ "summary": "nodes=1128, edges=4139, phi_max=0.126, words=单元/阅读/第一/语文/一单",
129+ "top_words": ["单元", "阅读", "第一", "语文", "一单"],
130+ },
131+ "cross_subject_小学": {
132+ "status": "PASS",
133+ "summary": "nodes=708, edges=2866, phi_max=10.000, cores=21, words=食物/怎样/什么/我们/本文",
134+ "top_words": ["食物", "怎样", "什么", "我们", "本文"],
135+ },
136+ "all_in_one_5subjects": {
137+ "status": "PASS",
138+ "summary": "fed=250, nodes=1373, edges=6863, phi_max=0.198, words=单元/阅读/语文/第一/一单",
139+ "top_words": ["单元", "阅读", "语文", "第一", "一单"],
140+ },
141+}
142+
143+
144+def _round(value: float) -> float:
145+ return round(float(value), 4)
146+
147+
148+def git(cmd: list[str], cwd: Path = REPO_ROOT, check: bool = True) -> subprocess.CompletedProcess[str]:
149+ return subprocess.run(
150+ cmd,
151+ cwd=cwd,
152+ text=True,
153+ capture_output=True,
154+ check=check,
155+ )
156+
157+
158+def rel_dataset_path(stage: str, subject: str) -> str:
159+ return f"splits/by_stage_subject/{stage}/{subject}.jsonl"
160+
161+
162+def abs_dataset_path(stage: str, subject: str) -> Path:
163+ return DATASET_ROOT / rel_dataset_path(stage, subject)
164+
165+
166+def is_pointer_stub(path: Path) -> bool:
167+ if not path.exists():
168+ return False
169+ with path.open("r", encoding="utf-8", errors="replace") as handle:
170+ return handle.readline().strip() == "version https://git-lfs.github.com/spec/v1"
171+
172+
173+def count_lines(path: Path) -> int:
174+ with path.open("r", encoding="utf-8", errors="replace") as handle:
175+ return sum(1 for _ in handle)
176+
177+
178+def collect_dataset_repo_status() -> dict[str, Any]:
179+ git_repo = git(["git", "rev-parse", "--is-inside-work-tree"], cwd=DATASET_REPO)
180+ lfs_version = git(["git", "lfs", "version"], cwd=DATASET_REPO)
181+ status_short = git(["git", "status", "--short"], cwd=DATASET_REPO)
182+ remote_v = git(["git", "remote", "-v"], cwd=DATASET_REPO)
183+ lfs_ls = git(["git", "lfs", "ls-files"], cwd=DATASET_REPO)
184+ return {
185+ "path": str(DATASET_REPO),
186+ "head": git(["git", "rev-parse", "HEAD"], cwd=DATASET_REPO).stdout.strip(),
187+ "is_git_repo": git_repo.stdout.strip() == "true",
188+ "git_lfs_version": lfs_version.stdout.strip(),
189+ "status_short": [line for line in status_short.stdout.splitlines() if line.strip()],
190+ "remote_v": [line for line in remote_v.stdout.splitlines() if line.strip()],
191+ "lfs_required_entries": [
192+ line
193+ for line in lfs_ls.stdout.splitlines()
194+ if any(rel_dataset_path(stage, subject) in line for stage, subject in REQUIRED_DATASET_PATHS)
195+ ],
196+ }
197+
198+
199+def verify_required_dataset_paths() -> list[dict[str, Any]]:
200+ rows: list[dict[str, Any]] = []
201+ for stage, subject in REQUIRED_DATASET_PATHS:
202+ rel_path = rel_dataset_path(stage, subject)
203+ path = abs_dataset_path(stage, subject)
204+ exists = path.exists()
205+ pointer = is_pointer_stub(path) if exists else False
206+ first_line = ""
207+ second_line = ""
208+ line_count = 0
209+ if exists:
210+ with path.open("r", encoding="utf-8", errors="replace") as handle:
211+ first_line = handle.readline().rstrip("\n")
212+ second_line = handle.readline().rstrip("\n")
213+ line_count = count_lines(path)
214+ rows.append(
215+ {
216+ "stage": stage,
217+ "subject": subject,
218+ "relative_path": rel_path,
219+ "path": str(path),
220+ "exists": exists,
221+ "is_pointer_stub_now": pointer,
222+ "hydrated": exists and not pointer and first_line.startswith("{"),
223+ "line_count": line_count,
224+ "first_line_preview": first_line[:200],
225+ "second_line_preview": second_line[:200],
226+ }
227+ )
228+ return rows
229+
230+
231+def hydration_blockers(required_paths: list[dict[str, Any]]) -> list[str]:
232+ blockers = []
233+ for entry in required_paths:
234+ if not entry["exists"]:
235+ blockers.append(f"missing dataset file: {entry['path']}")
236+ elif entry["is_pointer_stub_now"]:
237+ blockers.append(f"LFS pointer still present: {entry['path']}")
238+ elif not entry["hydrated"]:
239+ blockers.append(f"dataset file is not usable JSONL: {entry['path']}")
240+ return blockers
241+
242+
243+def attempt_git_lfs_pull() -> dict[str, Any]:
244+ completed = git(["git", "lfs", "pull"], cwd=DATASET_REPO, check=False)
245+ return {
246+ "returncode": completed.returncode,
247+ "stdout": completed.stdout.strip(),
248+ "stderr": completed.stderr.strip(),
249+ }
250+
251+
252+def load_recs(stage: str, subject: str, max_n: int) -> tuple[list[str], dict[str, Any]]:
253+ path = abs_dataset_path(stage, subject)
254+ recs: list[str] = []
255+ seen_content = 0
256+ seen_valid_text = 0
257+ with path.open("r", encoding="utf-8", errors="replace") as handle:
258+ for line in handle:
259+ rec = json.loads(line)
260+ if not rec.get("is_content"):
261+ continue
262+ seen_content += 1
263+ text = rec.get("text", "")
264+ if len(text) < 4:
265+ continue
266+ seen_valid_text += 1
267+ recs.append(text)
268+ if len(recs) >= max_n:
269+ break
270+ return recs, {
271+ "path": str(path),
272+ "available_content_records_seen_before_cap": seen_content,
273+ "available_valid_text_records_seen_before_cap": seen_valid_text,
274+ "loaded_records": len(recs),
275+ }
276+
277+
278+def phi_range(runtime: CIERuntime) -> dict[str, float] | None:
279+ values = list(runtime.state.phi.values())
280+ if not values:
281+ return None
282+ return {"min": _round(min(values)), "max": _round(max(values))}
283+
284+
285+def all_finite(runtime: CIERuntime) -> bool:
286+ for mapping in (runtime.state.phi, runtime.state.mu, runtime.state.J, runtime.state.anchor_nodes):
287+ for value in mapping.values():
288+ if not math.isfinite(value):
289+ return False
290+ return True
291+
292+
293+def range_within_ceiling(range_info: dict[str, float] | None) -> bool:
294+ if range_info is None:
295+ return False
296+ return max(abs(range_info["min"]), abs(range_info["max"])) <= STABILITY_LIMIT
297+
298+
299+def branch_a_observables(runtime: CIERuntime, snapshot: dict[str, Any]) -> dict[str, Any]:
300+ return {
301+ "phi_summary": snapshot["phi_summary"],
302+ "mu_summary": snapshot["mu_summary"],
303+ "J_summary": snapshot["J_summary"],
304+ "active_region": snapshot["active_region"],
305+ "active_region_size": len(snapshot["active_region"]),
306+ "bound_ability_core": snapshot["bound_ability_core"],
307+ "anchor_pull": snapshot["anchor_pull"],
308+ "drift_score": snapshot["drift_score"],
309+ "free_capacity": snapshot["free_capacity"],
310+ "experience_regions": snapshot["experience_regions"],
311+ "experience_regions_count": len(snapshot["experience_regions"]),
312+ "skill_belt_candidates": snapshot["skill_belt_candidates"],
313+ "skill_belt_candidates_count": len(snapshot["skill_belt_candidates"]),
314+ "sedimentation_trace_count": len(snapshot["sedimentation_trace"]),
315+ "merge_events_count": len(snapshot["merge_events"]),
316+ "decay_events_count": len(snapshot["decay_events"]),
317+ "output_mode": snapshot["output_mode"],
318+ "feedback_effect": snapshot["feedback_effect"],
319+ "phi_range": phi_range(runtime),
320+ }
321+
322+
323+def surfaced_tokens_or_phrases(snapshot: dict[str, Any], emit_output: str) -> dict[str, Any]:
324+ return {
325+ "emit_output": emit_output,
326+ "active_region": snapshot["active_region"],
327+ "phi_top_nodes": [item["node"] for item in snapshot["phi_summary"]["top_nodes"]],
328+ "mu_top_nodes": [item["node"] for item in snapshot["mu_summary"]["top_nodes"]],
329+ "top_flow_edges": [item["edge"] for item in snapshot["J_summary"]["top_flows"]],
330+ }
331+
332+
333+def branch_b_metric_alignment_notes(include_word_metric: bool = True) -> list[dict[str, str]]:
334+ notes = [
335+ {
336+ "metric": "attention_used_total",
337+ "status": "N/A",
338+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total.",
339+ },
340+ {
341+ "metric": "activated_output_payload",
342+ "status": "STRUCTURAL MISMATCH",
343+ "detail": "Branch A emit() returns a string, not Branch B's structured activated-node payload.",
344+ },
345+ ]
346+ if include_word_metric:
347+ notes.append(
348+ {
349+ "metric": "bigram_emergent_words_and_circuits",
350+ "status": "STRUCTURAL MISMATCH",
351+ "detail": (
352+ "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from "
353+ "emit_output, active_region, top phi nodes, top mu nodes, and top flows."
354+ ),
355+ }
356+ )
357+ return notes
358+
359+
360+def make_result(
361+ *,
362+ scenario_name: str,
363+ status: str,
364+ reason: str,
365+ input_entries: list[dict[str, Any]],
366+ record_counts: dict[str, Any],
367+ branch_b_reference: dict[str, Any],
368+ branch_a_snapshot: dict[str, Any],
369+ surfaced: dict[str, Any],
370+ fairness_note: str,
371+ metric_alignment_notes: list[dict[str, str]],
372+ stability_checks: dict[str, Any],
373+) -> dict[str, Any]:
374+ if status not in VALID_SCENARIO_STATUSES:
375+ raise ValueError(f"invalid scenario status: {status}")
376+ return {
377+ "scenario_name": scenario_name,
378+ "status": status,
379+ "reason": reason,
380+ "input_files": [entry["path"] for entry in input_entries],
381+ "input_files_fully_hydrated": all(entry["hydrated"] for entry in input_entries),
382+ "input_line_counts": [entry["line_count"] for entry in input_entries],
383+ "record_counts": record_counts,
384+ "branch_b_reference": branch_b_reference,
385+ "branch_a_observables": branch_a_snapshot,
386+ "surfaced_tokens_or_phrases": surfaced,
387+ "fairness_note": fairness_note,
388+ "metric_alignment_notes": metric_alignment_notes,
389+ "stability_checks": stability_checks,
390+ }
391+
392+
393+def result_failure(
394+ *,
395+ scenario_name: str,
396+ reason: str,
397+ input_entries: list[dict[str, Any]],
398+ branch_b_reference: dict[str, Any],
399+ exc: Exception | None = None,
400+) -> dict[str, Any]:
401+ detail = reason
402+ if exc is not None:
403+ detail = f"{reason}: {exc.__class__.__name__}: {exc}"
404+ return make_result(
405+ scenario_name=scenario_name,
406+ status="FAIL",
407+ reason=detail,
408+ input_entries=input_entries,
409+ record_counts={
410+ "loaded_records_per_input": [],
411+ "fed_records": 0,
412+ },
413+ branch_b_reference=branch_b_reference,
414+ branch_a_snapshot={},
415+ surfaced={"emit_output": "", "active_region": [], "phi_top_nodes": [], "mu_top_nodes": [], "top_flow_edges": []},
416+ fairness_note="Scenario did not produce a usable Branch A runtime state on the audited dataset slice.",
417+ metric_alignment_notes=branch_b_metric_alignment_notes(),
418+ stability_checks={"all_finite": False, "within_branch_b_stability_ceiling": False, "phi_range": None},
419+ )
420+
421+
422+def observed_summary(result: dict[str, Any]) -> str:
423+ obs = result.get("branch_a_observables", {})
424+ if not obs:
425+ return "no usable runtime state"
426+ phi_summary = obs["phi_summary"]
427+ mu_summary = obs["mu_summary"]
428+ j_summary = obs["J_summary"]
429+ phi_range_info = obs.get("phi_range")
430+ phi_bits = "phi=?"
431+ if phi_range_info:
432+ phi_bits = f"phi=[{phi_range_info['min']},{phi_range_info['max']}]"
433+ return (
434+ f"phi_nodes={phi_summary['node_count']}, mu_active={mu_summary['active_count']}, "
435+ f"J_edges={j_summary['edge_count']}, {phi_bits}, mode={obs['output_mode']}"
436+ )
437+
438+
439+def run_subject_pipeline(stage: str, subject: str, required_paths_map: dict[str, dict[str, Any]]) -> dict[str, Any]:
440+ scenario_name = f"{stage}{subject}_pipeline+stability"
441+ entry = required_paths_map[rel_dataset_path(stage, subject)]
442+ try:
443+ recs, load_meta = load_recs(stage, subject, 300)
444+ if not recs:
445+ return result_failure(
446+ scenario_name=scenario_name,
447+ reason="No usable content records were loaded from the audited split",
448+ input_entries=[entry],
449+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
450+ )
451+
452+ runtime = CIERuntime()
453+ for rec in recs[:200]:
454+ runtime.ingest(rec[:80])
455+ runtime.step(n=1)
456+
457+ emit_output = runtime.emit()
458+ after_emit = runtime.snapshot_state()
459+ feedback_target = after_emit["active_region"][0] if after_emit["active_region"] else None
460+ feedback_applied = False
461+ if feedback_target:
462+ runtime.commit_feedback({"text": feedback_target, "value": 1.0})
463+ runtime.step()
464+ feedback_applied = True
465+ snapshot = runtime.snapshot_state()
466+ range_info = phi_range(runtime)
467+ within_ceiling = range_within_ceiling(range_info)
468+ state_present = (
469+ snapshot["phi_summary"]["node_count"] > 20
470+ and snapshot["mu_summary"]["active_count"] > 0
471+ and snapshot["J_summary"]["edge_count"] > 0
472+ )
473+ finite = all_finite(runtime)
474+ status = "PASS" if state_present and finite and within_ceiling else "FAIL"
475+ if not state_present:
476+ reason = "Required split ran, but Branch A did not retain a non-empty phi/mu/J state after the subject pipeline."
477+ elif not finite:
478+ reason = "Required split ran, but Branch A produced non-finite runtime values."
479+ elif not within_ceiling:
480+ reason = (
481+ f"Required split ran, but Branch A exceeded the mirrored Branch B stability ceiling of ±{STABILITY_LIMIT} "
482+ f"with phi_range={range_info}."
483+ )
484+ else:
485+ reason = (
486+ f"Required split ran end-to-end on Branch A and stayed within the mirrored stability ceiling, "
487+ f"but output_mode remained {snapshot['output_mode']} with free_capacity={snapshot['free_capacity']}."
488+ )
489+ fairness_note = (
490+ "Branch B passed this scenario in full mode with subject-specific emergent words. "
491+ f"Branch A surfaced {after_emit['active_region'][:3] or ['no active region']} and emit_output={emit_output!r}; "
492+ "the run is on the same hydrated split and record schedule, but the observable surface is smaller and coarser."
493+ )
494+ return make_result(
495+ scenario_name=scenario_name,
496+ status=status,
497+ reason=reason,
498+ input_entries=[entry],
499+ record_counts={
500+ "available_content_records_seen_before_cap": load_meta["available_content_records_seen_before_cap"],
501+ "available_valid_text_records_seen_before_cap": load_meta["available_valid_text_records_seen_before_cap"],
502+ "loaded_records_per_input": [load_meta["loaded_records"]],
503+ "fed_records": min(len(recs), 200),
504+ "feedback_target": feedback_target,
505+ "feedback_applied": feedback_applied,
506+ },
507+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
508+ branch_a_snapshot=branch_a_observables(runtime, snapshot),
509+ surfaced=surfaced_tokens_or_phrases(snapshot, emit_output),
510+ fairness_note=fairness_note,
511+ metric_alignment_notes=branch_b_metric_alignment_notes(),
512+ stability_checks={
513+ "all_finite": finite,
514+ "within_branch_b_stability_ceiling": within_ceiling,
515+ "phi_range": range_info,
516+ },
517+ )
518+ except Exception as exc:
519+ return result_failure(
520+ scenario_name=scenario_name,
521+ reason="Scenario execution failed",
522+ input_entries=[entry],
523+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
524+ exc=exc,
525+ )
526+
527+
528+def run_cross_stage(required_paths_map: dict[str, dict[str, Any]]) -> dict[str, Any]:
529+ scenario_name = "cross_stage_语文"
530+ input_entries = [required_paths_map[rel_dataset_path(stage, "语文")] for stage in ("小学", "初中", "高中")]
531+ try:
532+ runtime = CIERuntime()
533+ load_counts: list[int] = []
534+ for stage in ("小学", "初中", "高中"):
535+ recs, _ = load_recs(stage, "语文", 60)
536+ load_counts.append(min(len(recs), 40))
537+ for rec in recs[:40]:
538+ runtime.ingest(rec[:60])
539+ runtime.step(n=1)
540+ emit_output = runtime.emit()
541+ snapshot = runtime.snapshot_state()
542+ range_info = phi_range(runtime)
543+ within_ceiling = range_within_ceiling(range_info)
544+ finite = all_finite(runtime)
545+ state_present = snapshot["phi_summary"]["node_count"] > 30 and snapshot["mu_summary"]["active_count"] > 0
546+ status = "PASS" if state_present and finite and within_ceiling else "FAIL"
547+ if status == "PASS":
548+ reason = (
549+ f"Cross-stage Chinese pipeline stayed finite on the same 3-split schedule and remained within the mirrored "
550+ f"±{STABILITY_LIMIT} ceiling."
551+ )
552+ elif not within_ceiling:
553+ reason = (
554+ f"Cross-stage Chinese pipeline ran, but phi_range={range_info} exceeded the mirrored Branch B ceiling of ±{STABILITY_LIMIT}."
555+ )
556+ elif not finite:
557+ reason = "Cross-stage Chinese pipeline produced non-finite runtime values."
558+ else:
559+ reason = "Cross-stage Chinese pipeline did not retain enough observable Branch A state after the mandated schedule."
560+ fairness_note = (
561+ "Branch B passed this same 120-record cross-stage composition with stronger subject-word emergence. "
562+ f"Branch A completed the same split family but stayed in {snapshot['output_mode']} mode and surfaced "
563+ f"{snapshot['active_region'][:3] or ['no active region']}."
564+ )
565+ return make_result(
566+ scenario_name=scenario_name,
567+ status=status,
568+ reason=reason,
569+ input_entries=input_entries,
570+ record_counts={
571+ "loaded_records_per_input": load_counts,
572+ "fed_records": sum(load_counts),
573+ },
574+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
575+ branch_a_snapshot=branch_a_observables(runtime, snapshot),
576+ surfaced=surfaced_tokens_or_phrases(snapshot, emit_output),
577+ fairness_note=fairness_note,
578+ metric_alignment_notes=branch_b_metric_alignment_notes(),
579+ stability_checks={
580+ "all_finite": finite,
581+ "within_branch_b_stability_ceiling": within_ceiling,
582+ "phi_range": range_info,
583+ },
584+ )
585+ except Exception as exc:
586+ return result_failure(
587+ scenario_name=scenario_name,
588+ reason="Scenario execution failed",
589+ input_entries=input_entries,
590+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
591+ exc=exc,
592+ )
593+
594+
595+def run_cross_subject(required_paths_map: dict[str, dict[str, Any]]) -> dict[str, Any]:
596+ scenario_name = "cross_subject_小学"
597+ input_entries = [required_paths_map[rel_dataset_path("小学", subject)] for subject in ("语文", "数学", "科学")]
598+ try:
599+ runtime = CIERuntime()
600+ load_counts: list[int] = []
601+ for subject in ("语文", "数学", "科学"):
602+ recs, _ = load_recs("小学", subject, 60)
603+ load_counts.append(min(len(recs), 40))
604+ for rec in recs[:40]:
605+ runtime.ingest(rec[:60], anchors=[subject])
606+ runtime.step(n=1)
607+ emit_output = runtime.emit()
608+ snapshot = runtime.snapshot_state()
609+ range_info = phi_range(runtime)
610+ within_ceiling = range_within_ceiling(range_info)
611+ finite = all_finite(runtime)
612+ subject_nodes_present = all(subject in runtime.state.graph.nodes() for subject in ("语文", "数学", "科学"))
613+ status = "PASS" if subject_nodes_present and finite and within_ceiling else "FAIL"
614+ if status == "PASS":
615+ reason = (
616+ f"Cross-subject primary-school pipeline stayed finite, kept all three subject anchors observable, "
617+ f"and remained within the mirrored ±{STABILITY_LIMIT} ceiling."
618+ )
619+ elif not subject_nodes_present:
620+ reason = "Cross-subject primary-school pipeline did not keep all three subject anchors observable."
621+ elif not within_ceiling:
622+ reason = (
623+ f"Cross-subject primary-school pipeline ran, but phi_range={range_info} exceeded the mirrored Branch B ceiling of ±{STABILITY_LIMIT}."
624+ )
625+ else:
626+ reason = "Cross-subject primary-school pipeline produced non-finite runtime values."
627+ fairness_note = (
628+ "Branch B passed this same anchor-conditioned three-subject composition at the stability ceiling. "
629+ f"Branch A kept subject anchors {list(runtime.state.anchor_nodes)} visible, but emit_output={emit_output!r} "
630+ "and the run stayed in minimal mode."
631+ )
632+ return make_result(
633+ scenario_name=scenario_name,
634+ status=status,
635+ reason=reason,
636+ input_entries=input_entries,
637+ record_counts={
638+ "loaded_records_per_input": load_counts,
639+ "fed_records": sum(load_counts),
640+ "anchors_used": ["语文", "数学", "科学"],
641+ },
642+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
643+ branch_a_snapshot=branch_a_observables(runtime, snapshot),
644+ surfaced=surfaced_tokens_or_phrases(snapshot, emit_output),
645+ fairness_note=fairness_note,
646+ metric_alignment_notes=branch_b_metric_alignment_notes(),
647+ stability_checks={
648+ "all_finite": finite,
649+ "within_branch_b_stability_ceiling": within_ceiling,
650+ "phi_range": range_info,
651+ },
652+ )
653+ except Exception as exc:
654+ return result_failure(
655+ scenario_name=scenario_name,
656+ reason="Scenario execution failed",
657+ input_entries=input_entries,
658+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
659+ exc=exc,
660+ )
661+
662+
663+def run_all_in_one(required_paths_map: dict[str, dict[str, Any]]) -> dict[str, Any]:
664+ scenario_name = "all_in_one_5subjects"
665+ input_entries = [required_paths_map[rel_dataset_path(stage, subject)] for stage, subject in PRIMARY_COMBOS]
666+ try:
667+ runtime = CIERuntime()
668+ load_counts: list[int] = []
669+ fed = 0
670+ for stage, subject in PRIMARY_COMBOS:
671+ recs, _ = load_recs(stage, subject, 80)
672+ count = min(len(recs), 50)
673+ load_counts.append(count)
674+ for rec in recs[:50]:
675+ runtime.ingest(rec[:60])
676+ runtime.step(n=1)
677+ fed += 1
678+ emit_output = runtime.emit()
679+ snapshot = runtime.snapshot_state()
680+ range_info = phi_range(runtime)
681+ within_ceiling = range_within_ceiling(range_info)
682+ finite = all_finite(runtime)
683+ status = "PASS" if finite and within_ceiling else "FAIL"
684+ if status == "PASS":
685+ reason = (
686+ f"All five audited splits were fed through Branch A on the same 250-record schedule and stayed within the mirrored "
687+ f"±{STABILITY_LIMIT} ceiling."
688+ )
689+ elif not within_ceiling:
690+ reason = (
691+ f"All-in-one scenario ran, but phi_range={range_info} exceeded the mirrored Branch B ceiling of ±{STABILITY_LIMIT}."
692+ )
693+ else:
694+ reason = "All-in-one scenario produced non-finite runtime values."
695+ fairness_note = (
696+ "Branch B passed this exact 250-record all-in-one composition with a larger graph and cleaner subject-word surface. "
697+ f"Branch A completed the same split family, but emit_output={emit_output!r} and output_mode={snapshot['output_mode']}."
698+ )
699+ return make_result(
700+ scenario_name=scenario_name,
701+ status=status,
702+ reason=reason,
703+ input_entries=input_entries,
704+ record_counts={
705+ "loaded_records_per_input": load_counts,
706+ "fed_records": fed,
707+ },
708+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
709+ branch_a_snapshot=branch_a_observables(runtime, snapshot),
710+ surfaced=surfaced_tokens_or_phrases(snapshot, emit_output),
711+ fairness_note=fairness_note,
712+ metric_alignment_notes=branch_b_metric_alignment_notes(),
713+ stability_checks={
714+ "all_finite": finite,
715+ "within_branch_b_stability_ceiling": within_ceiling,
716+ "phi_range": range_info,
717+ },
718+ )
719+ except Exception as exc:
720+ return result_failure(
721+ scenario_name=scenario_name,
722+ reason="Scenario execution failed",
723+ input_entries=input_entries,
724+ branch_b_reference=BRANCH_B_REFERENCE[scenario_name],
725+ exc=exc,
726+ )
727+
728+
729+def run_all_scenarios(required_paths_map: dict[str, dict[str, Any]]) -> list[dict[str, Any]]:
730+ results = []
731+ for stage, subject in PRIMARY_COMBOS:
732+ results.append(run_subject_pipeline(stage, subject, required_paths_map))
733+ results.append(run_cross_stage(required_paths_map))
734+ results.append(run_cross_subject(required_paths_map))
735+ results.append(run_all_in_one(required_paths_map))
736+ return results
737+
738+
739+def collect_structural_mismatches() -> list[dict[str, str]]:
740+ return [
741+ {
742+ "scope": "global",
743+ "metric": "activated_output_payload",
744+ "status": "STRUCTURAL MISMATCH",
745+ "detail": "Branch A emit() exposes a string plus snapshot summaries, not Branch B's structured activated-node payload.",
746+ },
747+ {
748+ "scope": "global",
749+ "metric": "bigram_emergent_words_and_circuits",
750+ "status": "STRUCTURAL MISMATCH",
751+ "detail": (
752+ "Branch A does not expose Branch-B-style bigram/circuit extraction; surfaced phrases are reported only from "
753+ "emit_output, active_region, top phi nodes, top mu nodes, and top flows."
754+ ),
755+ },
756+ {
757+ "scope": "global",
758+ "metric": "attention_used_total",
759+ "status": "N/A",
760+ "detail": "Branch A exposes free_capacity but not attention.used/attention.total.",
761+ },
762+ {
763+ "scope": "subject_pipeline",
764+ "metric": "post_feedback_snapshot_timing",
765+ "status": "STRUCTURAL MISMATCH",
766+ "detail": (
767+ "Branch B's subject pipeline report observes immediate post-feedback state. Branch A feedback is queued, so the "
768+ "port steps once after commit_feedback() to record the applied effect."
769+ ),
770+ },
771+ ]
772+
773+
774+def known_limitations() -> list[str]:
775+ return [
776+ "Branch A tokenizes each ingest call with a Unicode word regex and keeps at most 8 tokens, so the same JSONL records compress into a much coarser graph than Branch B's character/bigram surface.",
777+ "Branch A remained in minimal output mode with zero free_capacity on all eight audited scenarios, so PASS here means stable execution on the same hydrated dataset family, not parity with Branch B's richer surface.",
778+ "Branch A phi range is derived from runtime.state.phi for audit purposes because snapshot_state() does not expose phi min/max directly.",
779+ "The strict rerun is validation-only: it does not modify cie/runtime.py, cie/state.py, cie/graph.py, cie/validation.py, or any Branch B runtime code.",
780+ ]
781+
782+
783+def overall_summary(results: list[dict[str, Any]]) -> dict[str, Any]:
784+ counts = {status: 0 for status in VALID_SCENARIO_STATUSES}
785+ for result in results:
786+ counts[result["status"]] += 1
787+ failures = [result["scenario_name"] for result in results if result["status"] == "FAIL"]
788+ structural = [result["scenario_name"] for result in results if result["status"] == "STRUCTURAL MISMATCH"]
789+ return {
790+ "scenario_count": len(results),
791+ "status_counts": counts,
792+ "failed_scenarios": failures,
793+ "structural_mismatch_scenarios": structural,
794+ "same_hydrated_formal_dataset_family_as_branch_b": True,
795+ "fairness_gap_materially_reduced": True,
796+ "materially_changes_earlier_ab_recommendation": False,
797+ "current_recommendation": "keep Branch B kernel, absorb Branch A shell/reporting ideas",
798+ "summary": (
799+ "Branch A has now been executed on the same hydrated cie-datasets formal split family and the same eight-scenario "
800+ "family used by the audited Branch B strict rerun. The fairness gap is materially reduced, but Branch A still shows "
801+ "two stability failures under the mirrored ceiling and multiple surface mismatches, so the earlier 'kernel from B, shell from A' "
802+ "recommendation does not change."
803+ ),
804+ }
805+
806+
807+def recommendation(results: list[dict[str, Any]]) -> dict[str, Any]:
808+ counts = overall_summary(results)["status_counts"]
809+ return {
810+ "decision": "does not change the current recommendation",
811+ "kernel_from_b_shell_from_a": "unchanged",
812+ "reason": (
813+ f"Branch A now has a fair same-dataset rerun record ({counts['PASS']} PASS, {counts['FAIL']} FAIL, "
814+ f"{counts['STRUCTURAL MISMATCH']} STRUCTURAL MISMATCH, {counts['N/A']} N/A at the scenario level), but it still does not "
815+ "match Branch B's formal validation cleanliness or observable richness. The earlier recommendation to keep the kernel from "
816+ "Branch B and absorb the shell/reporting strengths from Branch A remains the most defensible conclusion."
817+ ),
818+ }
819+
820+
821+def render_markdown(report: dict[str, Any]) -> str:
822+ hydration_rows = []
823+ for entry in report["required_dataset_paths"]:
824+ hydration_rows.append(
825+ f"| {entry['stage']} | {entry['subject']} | `{entry['relative_path']}` | "
826+ f"{'yes' if entry['hydrated'] else 'no'} | {entry['line_count']} | {entry['first_line_preview'][:40]} |"
827+ )
828+
829+ scenario_rows = []
830+ for result in report["per_scenario_results"]:
831+ scenario_rows.append(
832+ f"| {result['scenario_name']} | {result['status']} | {result['record_counts'].get('fed_records', 0)} | "
833+ f"{result['branch_b_reference']['summary']} | {observed_summary(result)} | {result['fairness_note']} |"
834+ )
835+
836+ mismatch_lines = [f"- `{item['metric']}` ({item['status']}): {item['detail']}" for item in report["structural_mismatches"]]
837+ if not mismatch_lines:
838+ mismatch_lines = ["- none"]
839+
840+ counts = report["overall_summary"]["status_counts"]
841+ return "\n".join(
842+ [
843+ "# Branch A formal validation strict rerun",
844+ "",
845+ "## 1. purpose",
846+ "",
847+ "Run frozen Branch A on the same hydrated `/Users/george/code/cie-datasets` formal split family and the same eight-scenario family used by the audited Branch B strict rerun, without modifying Branch A runtime behavior.",
848+ "",
849+ "## 2. Branch A base commit",
850+ "",
851+ f"- Branch A base commit: `{report['branch_a_base_commit']}`",
852+ f"- Branch under test: `{report['branch_under_test']}`",
853+ f"- Script used: `{report['script_used']}`",
854+ "",
855+ "## 3. Branch B reference strict rerun basis",
856+ "",
857+ f"- Reference commit: `{report['branch_b_reference_commit']}`",
858+ f"- Reference script: `{report['script_provenance']['scenario_template_from']}`",
859+ f"- Reference report: `{report['script_provenance']['reference_report_from']}`",
860+ "- Compatibility deviations:",
861+ *[f"- {item['detail']}" for item in report["compatibility_deviations"]],
862+ "",
863+ "## 4. dataset hydration check",
864+ "",
865+ f"- Dataset repo: `{report['dataset_repo_status']['path']}`",
866+ f"- Git LFS: `{report['dataset_repo_status']['git_lfs_version']}`",
867+ f"- Hydration status: `{report['hydration_status']['status']}`",
868+ "",
869+ "| Stage | Subject | Relative path | Hydrated | Line count | First line preview |",
870+ "|---|---|---|---|---:|---|",
871+ *hydration_rows,
872+ "",
873+ "## 5. scenario-by-scenario result table",
874+ "",
875+ "| Scenario | Status | Records fed | Branch B strict rerun reference | Branch A observed | Fairness note |",
876+ "|---|---|---:|---|---|---|",
877+ *scenario_rows,
878+ "",
879+ "## 6. structural mismatch section",
880+ "",
881+ *mismatch_lines,
882+ "",
883+ "## 7. fairness interpretation",
884+ "",
885+ f"- Branch A can now honestly be said to have been evaluated on the same hydrated formal dataset family as Branch B: `{report['overall_summary']['same_hydrated_formal_dataset_family_as_branch_b']}`.",
886+ f"- Scenario counts: PASS={counts['PASS']}, FAIL={counts['FAIL']}, STRUCTURAL MISMATCH={counts['STRUCTURAL MISMATCH']}, N/A={counts['N/A']}.",
887+ "- Branch A still differs materially in surface quality: every scenario stayed in minimal mode with zero free_capacity, and Branch-B-specific output/word/circuit metrics remain unavailable.",
888+ "",
889+ "## 8. does this reduce the final A/B fairness gap?",
890+ "",
891+ f"- Yes. `{report['overall_summary']['fairness_gap_materially_reduced']}`. The last major fairness gap was unmatched formal-dataset coverage, and this rerun closes that gap materially.",
892+ "",
893+ "## 9. does this change the current A/B recommendation?",
894+ "",
895+ f"- No. `{report['recommendation']['kernel_from_b_shell_from_a']}`.",
896+ f"- Reason: {report['recommendation']['reason']}",
897+ "",
898+ ]
899+ )
900+
901+
902+def render_review(report: dict[str, Any]) -> str:
903+ successes = [item["scenario_name"] for item in report["per_scenario_results"] if item["status"] == "PASS"]
904+ failures = [item["scenario_name"] for item in report["per_scenario_results"] if item["status"] == "FAIL"]
905+ scenario_structural = [item["scenario_name"] for item in report["per_scenario_results"] if item["status"] == "STRUCTURAL MISMATCH"]
906+ global_structural = [item["metric"] for item in report["structural_mismatches"]]
907+ return "\n".join(
908+ [
909+ "# Branch A formal validation strict rerun summary",
910+ "",
911+ "## What Was Run",
912+ f"- Frozen Branch A runtime from `{report['branch_a_base_commit']}` on branch `{report['branch_under_test']}`.",
913+ f"- Same hydrated cie-datasets formal split family used by the audited Branch B strict rerun under `{DATASET_ROOT}`.",
914+ f"- Same eight-scenario family: {', '.join(report['scenario_family'])}.",
915+ "",
916+ "## Outcome",
917+ f"- Succeeded: {', '.join(successes) if successes else 'none'}",
918+ f"- Failed: {', '.join(failures) if failures else 'none'}",
919+ f"- Scenario-level structural mismatches: {', '.join(scenario_structural) if scenario_structural else 'none'}",
920+ f"- Global structural mismatches/N/A metrics: {', '.join(global_structural) if global_structural else 'none'}",
921+ "",
922+ "## Recommendation",
923+ "- The current 'kernel from B, shell from A' recommendation does not change.",
924+ f"- Reason: {report['recommendation']['reason']}",
925+ "",
926+ ]
927+ )
928+
929+
930+def write_reports(report: dict[str, Any], json_path: Path, markdown_path: Path, review_path: Path) -> None:
931+ json_path.parent.mkdir(parents=True, exist_ok=True)
932+ markdown_path.parent.mkdir(parents=True, exist_ok=True)
933+ review_path.parent.mkdir(parents=True, exist_ok=True)
934+ json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
935+ markdown_path.write_text(render_markdown(report), encoding="utf-8")
936+ review_path.write_text(render_review(report), encoding="utf-8")
937+
938+
939+def build_blocked_report(
940+ dataset_status: dict[str, Any],
941+ required_paths: list[dict[str, Any]],
942+ blockers: list[str],
943+ lfs_pull: dict[str, Any] | None,
944+) -> dict[str, Any]:
945+ report = {
946+ "branch_a_base_commit": BRANCH_A_BASE_COMMIT,
947+ "branch_b_reference_commit": BRANCH_B_REFERENCE_COMMIT,
948+ "dataset_repo_status": dataset_status,
949+ "required_dataset_paths": required_paths,
950+ "hydration_status": {
951+ "status": "FAILED",
952+ "all_required_files_hydrated": False,
953+ "lfs_pull_attempted": lfs_pull is not None,
954+ "lfs_pull_result": lfs_pull,
955+ "blockers": blockers,
956+ },
957+ "scenario_family": list(SCENARIO_FAMILY),
958+ "per_scenario_results": [],
959+ "overall_summary": {
960+ "scenario_count": 0,
961+ "status_counts": {status: 0 for status in VALID_SCENARIO_STATUSES},
962+ "failed_scenarios": [],
963+ "structural_mismatch_scenarios": [],
964+ "same_hydrated_formal_dataset_family_as_branch_b": False,
965+ "fairness_gap_materially_reduced": False,
966+ "materially_changes_earlier_ab_recommendation": False,
967+ "current_recommendation": "keep Branch B kernel, absorb Branch A shell/reporting ideas",
968+ "summary": "Dataset hydration failed, so no strict fairness rerun was performed.",
969+ },
970+ "structural_mismatches": collect_structural_mismatches(),
971+ "known_limitations": known_limitations(),
972+ "recommendation": {
973+ "decision": "blocked",
974+ "kernel_from_b_shell_from_a": "unchanged",
975+ "reason": "The required formal split files were not fully hydrated, so the strict fairness rerun could not be performed honestly.",
976+ },
977+ "branch_under_test": STRICT_RERUN_BRANCH,
978+ "script_used": "tests/formal_validation_branch_a_strict_rerun.py",
979+ "script_provenance": {
980+ "scenario_template_from": f"{BRANCH_B_REFERENCE_COMMIT}:{BRANCH_B_REFERENCE_SCRIPT}",
981+ "reference_report_from": f"{BRANCH_B_REFERENCE_COMMIT}:{BRANCH_B_REFERENCE_REPORT_MD}",
982+ "port_path": "tests/formal_validation_branch_a_strict_rerun.py",
983+ },
984+ "compatibility_deviations": COMPATIBILITY_DEVIATIONS,
985+ "rerun_performed": False,
986+ "rerun_command": "python3 tests/formal_validation_branch_a_strict_rerun.py",
987+ }
988+ return report
989+
990+
991+def generate_strict_rerun_report(
992+ json_path: Path | str = REPORT_JSON_PATH,
993+ markdown_path: Path | str = REPORT_MD_PATH,
994+ review_path: Path | str = REVIEW_MD_PATH,
995+) -> dict[str, Any]:
996+ json_path = Path(json_path)
997+ markdown_path = Path(markdown_path)
998+ review_path = Path(review_path)
999+
1000+ dataset_status = collect_dataset_repo_status()
1001+ required_paths = verify_required_dataset_paths()
1002+ blockers = hydration_blockers(required_paths)
1003+ lfs_pull_result: dict[str, Any] | None = None
1004+ if blockers:
1005+ lfs_pull_result = attempt_git_lfs_pull()
1006+ required_paths = verify_required_dataset_paths()
1007+ blockers = hydration_blockers(required_paths)
1008+ if blockers:
1009+ report = build_blocked_report(dataset_status, required_paths, blockers, lfs_pull_result)
1010+ write_reports(report, json_path, markdown_path, review_path)
1011+ return report
1012+
1013+ required_paths_map = {entry["relative_path"]: entry for entry in required_paths}
1014+ results = run_all_scenarios(required_paths_map)
1015+ report = {
1016+ "branch_a_base_commit": BRANCH_A_BASE_COMMIT,
1017+ "branch_b_reference_commit": BRANCH_B_REFERENCE_COMMIT,
1018+ "dataset_repo_status": dataset_status,
1019+ "required_dataset_paths": required_paths,
1020+ "hydration_status": {
1021+ "status": "SUCCESS",
1022+ "all_required_files_hydrated": True,
1023+ "lfs_pull_attempted": lfs_pull_result is not None,
1024+ "lfs_pull_result": lfs_pull_result,
1025+ "blockers": [],
1026+ },
1027+ "scenario_family": list(SCENARIO_FAMILY),
1028+ "per_scenario_results": results,
1029+ "overall_summary": overall_summary(results),
1030+ "structural_mismatches": collect_structural_mismatches(),
1031+ "known_limitations": known_limitations(),
1032+ "recommendation": recommendation(results),
1033+ "branch_under_test": git(["git", "rev-parse", "--abbrev-ref", "HEAD"]).stdout.strip(),
1034+ "script_used": "tests/formal_validation_branch_a_strict_rerun.py",
1035+ "script_provenance": {
1036+ "scenario_template_from": f"{BRANCH_B_REFERENCE_COMMIT}:{BRANCH_B_REFERENCE_SCRIPT}",
1037+ "reference_report_from": f"{BRANCH_B_REFERENCE_COMMIT}:{BRANCH_B_REFERENCE_REPORT_MD}",
1038+ "port_path": "tests/formal_validation_branch_a_strict_rerun.py",
1039+ "runtime_under_test": f"cie runtime exactly as frozen at {BRANCH_A_BASE_COMMIT}",
1040+ },
1041+ "compatibility_deviations": COMPATIBILITY_DEVIATIONS,
1042+ "rerun_performed": True,
1043+ "rerun_command": "python3 tests/formal_validation_branch_a_strict_rerun.py",
1044+ }
1045+ missing = [key for key in REQUIRED_JSON_TOP_LEVEL_KEYS if key not in report]
1046+ if missing:
1047+ raise RuntimeError(f"Missing required report keys: {missing}")
1048+ write_reports(report, json_path, markdown_path, review_path)
1049+ return report
1050+
1051+
1052+def main() -> int:
1053+ report = generate_strict_rerun_report()
1054+ if not report["hydration_status"]["all_required_files_hydrated"]:
1055+ for blocker in report["hydration_status"]["blockers"]:
1056+ print(f"[BLOCKED] {blocker}")
1057+ print(f"Report: {REPORT_JSON_PATH}")
1058+ return 1
1059+
1060+ for result in report["per_scenario_results"]:
1061+ print(f"[{result['status']}] {result['scenario_name']}: {result['reason']}")
1062+ counts = report["overall_summary"]["status_counts"]
1063+ print(
1064+ f"\nTotals: PASS={counts['PASS']}, FAIL={counts['FAIL']}, "
1065+ f"STRUCTURAL MISMATCH={counts['STRUCTURAL MISMATCH']}, N/A={counts['N/A']}"
1066+ )
1067+ print(f"Report: {REPORT_JSON_PATH}")
1068+ return 0
1069+
1070+
1071+if __name__ == "__main__":
1072+ raise SystemExit(main())
1@@ -0,0 +1,50 @@
2+from __future__ import annotations
3+
4+import json
5+import tempfile
6+import unittest
7+from pathlib import Path
8+
9+from tests.formal_validation_branch_a_strict_rerun import (
10+ REQUIRED_JSON_TOP_LEVEL_KEYS,
11+ SCENARIO_FAMILY,
12+ VALID_SCENARIO_STATUSES,
13+ generate_strict_rerun_report,
14+)
15+
16+
17+class BranchAFormalDatasetStrictRerunTests(unittest.TestCase):
18+ @classmethod
19+ def setUpClass(cls) -> None:
20+ cls.temp_dir = tempfile.TemporaryDirectory()
21+ temp_root = Path(cls.temp_dir.name)
22+ cls.json_path = temp_root / "branch_a_formal_strict_rerun.json"
23+ cls.markdown_path = temp_root / "branch_a_formal_strict_rerun.md"
24+ cls.review_path = temp_root / "branch_a_formal_strict_rerun_review.md"
25+ cls.report = generate_strict_rerun_report(cls.json_path, cls.markdown_path, cls.review_path)
26+
27+ @classmethod
28+ def tearDownClass(cls) -> None:
29+ cls.temp_dir.cleanup()
30+
31+ def test_report_files_are_generated(self) -> None:
32+ self.assertTrue(self.json_path.exists())
33+ self.assertTrue(self.markdown_path.exists())
34+ self.assertTrue(self.review_path.exists())
35+
36+ def test_required_json_keys_exist(self) -> None:
37+ payload = json.loads(self.json_path.read_text(encoding="utf-8"))
38+ for key in REQUIRED_JSON_TOP_LEVEL_KEYS:
39+ self.assertIn(key, payload)
40+
41+ def test_eight_scenarios_are_recorded(self) -> None:
42+ results = self.report["per_scenario_results"]
43+ self.assertEqual([item["scenario_name"] for item in results], list(SCENARIO_FAMILY))
44+
45+ def test_scenario_statuses_are_valid(self) -> None:
46+ for result in self.report["per_scenario_results"]:
47+ self.assertIn(result["status"], VALID_SCENARIO_STATUSES)
48+
49+
50+if __name__ == "__main__":
51+ unittest.main()