im_wower
·
2026-04-01
test_comprehensive.py
1"""
2CIE Branch-B 综合验证测试
3===========================
4使用真实课本数据(AsahiLuna/china-text-book-md)
5
6三大类:
7 A. 真实数据验证(小初高课本跑完整 pipeline)
8 B. 边界条件(空输入、单字、超长文本、极端参数)
9 C. 反例/对抗(垃圾输入、类型错误、连续 reset、反复回灌)
10
11数据选取:
12 - 小学语文一上(简单汉字,短句)
13 - 小学数学一上(数字+汉字混合)
14 - 初中语文七上(中等复杂度文本)
15 - 初中数学七上(公式+文字混合)
16 - 高中语文必修上(长文本、文言文)
17
18SPEC §7 覆盖 + 额外边界/反例。
19"""
20
21import sys
22import os
23import time
24import math
25import json
26import traceback
27
28sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
29from cie import CIERuntime
30
31# ── 数据路径 ──
32DATA_DIR = "/Users/george/code/china-text-book-md"
33
34TEXTBOOKS = {
35 "小学语文一上": "小学_语文_统编版_义务教育教科书·语文一年级上册.md",
36 "小学数学一上": "小学_数学_人教版_义务教育教科书 · 数学一年级上册.md",
37 "初中语文七上": "初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md",
38 "初中数学七上": "初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md",
39 "高中语文必修上": "高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md",
40}
41
42
43def load_textbook(name):
44 """加载课本,提取纯文本段落(跳过 markdown 标记和乱码)"""
45 path = os.path.join(DATA_DIR, TEXTBOOKS[name])
46 with open(path, "r", encoding="utf-8") as f:
47 raw = f.read()
48
49 paragraphs = []
50 for line in raw.split("\n"):
51 line = line.strip()
52 # 跳过 markdown 标记、空行、乱码行
53 if not line:
54 continue
55 if line.startswith("#") or line.startswith("**") or line.startswith("---"):
56 continue
57 if line.startswith("!["): # images
58 continue
59 # 过滤掉控制字符过多的行(OCR 乱码)
60 ctrl_count = sum(1 for c in line if ord(c) < 32 and c not in '\n\t')
61 if ctrl_count > len(line) * 0.3:
62 continue
63 # 至少 2 个中文字符
64 cn_count = sum(1 for c in line if '\u4e00' <= c <= '\u9fff')
65 if cn_count >= 2:
66 paragraphs.append(line)
67
68 return paragraphs
69
70
71# ╔══════════════════════════════════════════════╗
72# ║ A. 真实数据验证 ║
73# ╚══════════════════════════════════════════════╝
74
75def test_A01_xiaoxue_yuwen_pipeline():
76 """A01: 小学语文一上——完整 pipeline(ingest→step→emit→feedback→snapshot)"""
77 rt = CIERuntime(seed=42)
78 paras = load_textbook("小学语文一上")
79 assert len(paras) > 10, f"Too few paragraphs: {len(paras)}"
80
81 # 喂前 30 段
82 for p in paras[:30]:
83 rt.ingest(p[:50]) # 截断到50字
84 rt.step(n=3)
85
86 out = rt.emit()
87 snap = rt.snapshot_state()
88
89 assert out['active_count'] > 0, "No active nodes after feeding textbook"
90 assert snap['phi_summary']['count'] > 20, "Too few phi nodes"
91 assert snap['attention']['used'] > 0, "No attention used"
92
93 # 反馈
94 if out['activated']:
95 rt.commit_feedback({'correct': [out['activated'][0]['node']], 'reward': 1.0})
96
97 print(f" PASS: 小学语文 — {snap['phi_summary']['count']} nodes, "
98 f"{snap['J_summary']['count']} flows, mode={out['mode']}")
99
100
101def test_A02_xiaoxue_shuxue_mixed():
102 """A02: 小学数学一上——数字+汉字混合输入"""
103 rt = CIERuntime(seed=42)
104 paras = load_textbook("小学数学一上")
105
106 # 数学课本有数字、符号、汉字混合
107 for p in paras[:20]:
108 rt.ingest(p[:40])
109 rt.step(n=3)
110
111 out = rt.emit()
112 snap = rt.snapshot_state()
113
114 # 验证数字和汉字节点都存在
115 has_digit = any(c.isdigit() for c in rt.graph.nodes)
116 has_cn = any('\u4e00' <= c <= '\u9fff' for c in rt.graph.nodes)
117 assert has_cn, "No Chinese characters in graph"
118
119 print(f" PASS: 小学数学 — {snap['phi_summary']['count']} nodes, "
120 f"has_digit={has_digit}, has_cn={has_cn}")
121
122
123def test_A03_chuzhong_yuwen_complexity():
124 """A03: 初中语文七上——中等复杂度,验证沉积路径"""
125 rt = CIERuntime(seed=42)
126 paras = load_textbook("初中语文七上")
127
128 for p in paras[:50]:
129 rt.ingest(p[:60])
130 rt.step(n=3)
131
132 snap = rt.snapshot_state()
133
134 # 50段文本应该产生沉积
135 exp_count = len(snap.get('experience_regions', {}).get('experience', []))
136 sed_count = len(snap['sedimentation_trace'])
137
138 assert snap['phi_summary']['count'] > 50, "Too few nodes from 50 paragraphs"
139 print(f" PASS: 初中语文 — {snap['phi_summary']['count']} nodes, "
140 f"experience={exp_count}, sed_traces={sed_count}")
141
142
143def test_A04_chuzhong_shuxue_formula():
144 """A04: 初中数学七上——公式文字混合,验证非对称边"""
145 rt = CIERuntime(seed=42)
146 paras = load_textbook("初中数学七上")
147
148 for p in paras[:30]:
149 rt.ingest(p[:50])
150 rt.step(n=3)
151
152 # 验证边的非对称性(旋度来源)
153 asym_count = 0
154 total_edges = 0
155 for src_edges in rt.graph.fwd_edges.values():
156 for dst, edge in src_edges.items():
157 bwd_w = rt.graph.get_bwd_weight(dst, edge.src) # dst←src 反向权重
158 if abs(edge.weight - bwd_w) > 0.01:
159 asym_count += 1
160 total_edges += 1
161
162 asym_ratio = asym_count / max(total_edges, 1)
163 assert asym_ratio > 0.5, f"Asymmetry too low: {asym_ratio:.2f}"
164 print(f" PASS: 初中数学 — {total_edges} edges, asym_ratio={asym_ratio:.2f}")
165
166
167def test_A05_gaozhong_yuwen_long_text():
168 """A05: 高中语文必修上——长文本,验证注意力不溢出"""
169 rt = CIERuntime(seed=42)
170 paras = load_textbook("高中语文必修上")
171
172 for p in paras[:80]:
173 rt.ingest(p[:80])
174 rt.step(n=2)
175
176 snap = rt.snapshot_state()
177
178 # 注意力不应溢出
179 assert snap['attention']['used'] <= snap['attention']['total'] + 0.01, \
180 f"Attention overflow: {snap['attention']['used']} > {snap['attention']['total']}"
181
182 # phi 不应发散
183 assert abs(snap['phi_summary']['max']) <= 10.1, \
184 f"Phi diverged: max={snap['phi_summary']['max']}"
185 assert abs(snap['phi_summary']['min']) <= 10.1, \
186 f"Phi diverged: min={snap['phi_summary']['min']}"
187
188 print(f" PASS: 高中语文 — {snap['phi_summary']['count']} nodes, "
189 f"phi_range=[{snap['phi_summary']['min']:.2f}, {snap['phi_summary']['max']:.2f}], "
190 f"attention={snap['attention']['used']:.1f}/{snap['attention']['total']:.0f}")
191
192
193def test_A06_cross_subject_learning():
194 """A06: 跨学科学习——先语文后数学,验证激活核迁移"""
195 rt = CIERuntime(seed=42)
196
197 # Phase 1: 语文
198 yuwen = load_textbook("小学语文一上")
199 for p in yuwen[:15]:
200 rt.ingest(p[:40])
201 rt.step(n=3)
202 snap_yuwen = rt.snapshot_state()
203 active_yuwen = set(snap_yuwen['active_region'])
204
205 # Phase 2: 数学(不 reset)
206 shuxue = load_textbook("小学数学一上")
207 for p in shuxue[:15]:
208 rt.ingest(p[:40])
209 rt.step(n=3)
210 snap_shuxue = rt.snapshot_state()
211 active_shuxue = set(snap_shuxue['active_region'])
212
213 # 激活区域应迁移
214 new_nodes = active_shuxue - active_yuwen
215 assert len(new_nodes) > 0, "No activation migration on subject switch"
216
217 # 语文的结构应该还在(phi 不为零)
218 yuwen_nodes_alive = sum(1 for n in active_yuwen
219 if abs(rt.state.phi.get(n, 0.0)) > 0.001)
220
221 print(f" PASS: 跨学科 — 语文active={len(active_yuwen)}, 数学active={len(active_shuxue)}, "
222 f"new={len(new_nodes)}, 语文nodes_alive={yuwen_nodes_alive}")
223
224
225def test_A07_session_reset_preserves_long_term():
226 """A07: session reset 保留长期结构"""
227 rt = CIERuntime(seed=42)
228
229 paras = load_textbook("初中语文七上")
230 for p in paras[:30]:
231 rt.ingest(p[:50])
232 rt.step(n=3)
233
234 # 记录长期结构
235 phi_before = dict(rt.state.phi)
236 cores_before = dict(rt.state.ability_cores)
237
238 # Reset
239 rt.reset_session()
240
241 # 验证
242 assert sum(rt.state.mu.values()) == 0, "mu not cleared after reset"
243 assert len(rt.state.active_region) == 0, "active_region not cleared"
244 assert rt.state.attention.free == 100.0, "attention not restored"
245
246 # phi 应该保留
247 phi_preserved = sum(1 for k in phi_before if abs(rt.state.phi.get(k, 0.0)) > 0.001)
248 assert phi_preserved > 0, "All phi lost after reset"
249
250 print(f" PASS: reset — phi_preserved={phi_preserved}/{len(phi_before)}, "
251 f"cores={len(cores_before)}")
252
253
254def test_A08_multi_round_feedback():
255 """A08: 多轮反馈——正负交替,验证置信度变化"""
256 rt = CIERuntime(seed=42)
257
258 paras = load_textbook("小学语文一上")
259 rt.ingest(paras[0][:30])
260 rt.step(n=5)
261 out = rt.emit()
262
263 if not out['activated']:
264 print(" SKIP: no activated nodes")
265 return
266
267 target = out['activated'][0]['node']
268 c_initial = rt.state.get_confidence(target)
269
270 # 正反馈
271 for _ in range(5):
272 rt.commit_feedback({'correct': [target], 'reward': 1.0})
273 c_positive = rt.state.get_confidence(target)
274
275 # 负反馈
276 rt.commit_feedback({'wrong': [target], 'reward': -0.5})
277 c_after_neg = rt.state.get_confidence(target)
278 phi_after_neg = rt.state.phi.get(target, 0.0)
279
280 assert c_positive >= c_initial, "Positive feedback didn't increase confidence"
281 print(f" PASS: 多轮反馈 — c: {c_initial:.3f} → +fb → {c_positive:.3f} → -fb → {c_after_neg:.3f}")
282
283
284def test_A09_incremental_learning_sedimentation():
285 """A09: 渐进学习——同一课本反复喂,验证沉积逐步加深"""
286 rt = CIERuntime(seed=42)
287
288 paras = load_textbook("小学语文一上")[:10]
289
290 sed_history = []
291 for round_i in range(5):
292 for p in paras:
293 rt.ingest(p[:30])
294 rt.step(n=3)
295 snap = rt.snapshot_state()
296 sed_count = len(snap['sedimentation_trace'])
297 skill_count = len(snap['skill_belt_candidates'])
298 sed_history.append((sed_count, skill_count))
299
300 # 沉积应该逐步增多
301 assert sed_history[-1][0] >= sed_history[0][0], \
302 f"Sedimentation not increasing: {sed_history}"
303
304 print(f" PASS: 渐进沉积 — rounds: {sed_history}")
305
306
307def test_A10_snapshot_completeness():
308 """A10: snapshot 输出完整性(SPEC §6 所有字段)"""
309 rt = CIERuntime(seed=42)
310
311 paras = load_textbook("初中数学七上")
312 for p in paras[:20]:
313 rt.ingest(p[:40])
314 rt.step(n=3)
315 rt.emit()
316 rt.commit_feedback({'correct': [], 'reward': 0.5})
317
318 snap = rt.snapshot_state()
319
320 required_fields = [
321 'phi_summary', 'mu_summary', 'J_summary',
322 'active_region', 'bound_ability_core', 'anchor_pull',
323 'drift_score', 'free_capacity', 'experience_regions',
324 'skill_belt_candidates', 'sedimentation_trace',
325 'merge_events', 'decay_events', 'output_mode',
326 'feedback_effect', 'attention',
327 ]
328
329 missing = [f for f in required_fields if f not in snap]
330 assert not missing, f"Missing snapshot fields: {missing}"
331
332 print(f" PASS: snapshot 完整 — {len(required_fields)} fields all present")
333
334
335# ╔══════════════════════════════════════════════╗
336# ║ B. 边界条件 ║
337# ╚══════════════════════════════════════════════╝
338
339def test_B01_empty_input():
340 """B01: 空输入不崩"""
341 rt = CIERuntime(seed=42)
342 rt.ingest("")
343 rt.step(n=3)
344 out = rt.emit()
345 assert out is not None
346 assert out['mode'] == 'minimal'
347 print(f" PASS: 空输入 — mode={out['mode']}, active={out['active_count']}")
348
349
350def test_B02_single_char():
351 """B02: 单字输入"""
352 rt = CIERuntime(seed=42)
353 rt.ingest("我")
354 rt.step(n=5)
355 out = rt.emit()
356 assert rt.graph.node_count >= 1
357 print(f" PASS: 单字 — nodes={rt.graph.node_count}, active={out['active_count']}")
358
359
360def test_B03_very_long_input():
361 """B03: 超长输入(10000字)不崩不溢出"""
362 rt = CIERuntime(seed=42)
363 paras = load_textbook("高中语文必修上")
364 long_text = "".join(p for p in paras[:200])[:10000]
365
366 t0 = time.time()
367 rt.ingest(long_text)
368 rt.step(n=3)
369 out = rt.emit()
370 elapsed = time.time() - t0
371
372 snap = rt.snapshot_state()
373 assert snap['attention']['used'] <= snap['attention']['total'] + 0.01
374 assert abs(snap['phi_summary']['max']) <= 10.1
375
376 print(f" PASS: 超长输入({len(long_text)}字) — "
377 f"nodes={snap['phi_summary']['count']}, time={elapsed:.2f}s")
378
379
380def test_B04_repeated_same_input():
381 """B04: 同一输入反复注入100次,数值不发散"""
382 rt = CIERuntime(seed=42)
383 for i in range(100):
384 rt.ingest("重复")
385 rt.step(n=1)
386
387 snap = rt.snapshot_state()
388 assert abs(snap['phi_summary']['max']) <= 10.1, \
389 f"Phi diverged after 100 repeats: {snap['phi_summary']['max']}"
390 assert snap['attention']['used'] <= snap['attention']['total'] + 0.01
391
392 print(f" PASS: 100次重复 — phi_max={snap['phi_summary']['max']:.3f}, "
393 f"attention={snap['attention']['used']:.1f}")
394
395
396def test_B05_step_zero():
397 """B05: step(0) 不改变状态"""
398 rt = CIERuntime(seed=42)
399 rt.ingest("测试")
400 snap1 = json.dumps(rt.snapshot_state(), sort_keys=True, default=str)
401 rt.step(n=0)
402 snap2 = json.dumps(rt.snapshot_state(), sort_keys=True, default=str)
403 assert snap1 == snap2, "step(0) changed state"
404 print(" PASS: step(0) 不改变状态")
405
406
407def test_B06_step_large_n():
408 """B06: step(1000) 不崩,phi 不发散"""
409 rt = CIERuntime(seed=42)
410 rt.ingest("大步长测试")
411 rt.step(n=1000)
412
413 snap = rt.snapshot_state()
414 assert abs(snap['phi_summary']['max']) <= 10.1
415 # 大量 step 后激活应该衰减到很低
416 total_mu = snap['mu_summary']['total']
417 print(f" PASS: step(1000) — phi_max={snap['phi_summary']['max']:.3f}, "
418 f"mu_total={total_mu:.4f}")
419
420
421def test_B07_attention_exact_boundary():
422 """B07: 注意力池精确到0"""
423 rt = CIERuntime(seed=42)
424 rt.state.attention.total = 10.0 # 很小的池
425
426 # 连续注入直到耗尽
427 for i in range(20):
428 rt.ingest(f"字{i}")
429 rt.step(n=1)
430
431 assert rt.state.attention.free >= 0, \
432 f"Attention went negative: {rt.state.attention.free}"
433 print(f" PASS: 注意力边界 — free={rt.state.attention.free:.4f}, "
434 f"used={rt.state.attention.used:.4f}")
435
436
437def test_B08_emit_before_ingest():
438 """B08: 还没 ingest 就 emit"""
439 rt = CIERuntime(seed=42)
440 out = rt.emit()
441 assert out is not None
442 assert out['mode'] in ('full', 'degraded', 'minimal')
443 assert len(out['activated']) == 0
444 print(f" PASS: emit before ingest — mode={out['mode']}")
445
446
447def test_B09_unicode_special_chars():
448 """B09: 各种 Unicode 特殊字符"""
449 rt = CIERuntime(seed=42)
450 special = "αβγ∑∏∫≈≠∞π²√½⅓㊀㊁㊂🎉🔥"
451 rt.ingest(special)
452 rt.step(n=3)
453 out = rt.emit()
454 assert rt.graph.node_count >= len(set(special))
455 print(f" PASS: Unicode特殊字符 — nodes={rt.graph.node_count}")
456
457
458def test_B10_snapshot_after_reset():
459 """B10: reset 后 snapshot 不崩"""
460 rt = CIERuntime(seed=42)
461 rt.ingest("测试")
462 rt.step(n=5)
463 rt.reset_session()
464 snap = rt.snapshot_state()
465 assert snap is not None
466 assert snap['mu_summary']['total'] == 0
467 print(" PASS: reset后snapshot正常")
468
469
470# ╔══════════════════════════════════════════════╗
471# ║ C. 反例/对抗 ║
472# ╚══════════════════════════════════════════════╝
473
474def test_C01_garbage_bytes():
475 """C01: 纯乱码/二进制输入"""
476 rt = CIERuntime(seed=42)
477 garbage = "".join(chr(i) for i in range(1, 128))
478 rt.ingest(garbage)
479 rt.step(n=3)
480 out = rt.emit()
481 # 不崩就行
482 assert out is not None
483 print(f" PASS: 乱码输入 — nodes={rt.graph.node_count}, active={out['active_count']}")
484
485
486def test_C02_list_input():
487 """C02: list 输入(非字符串)"""
488 rt = CIERuntime(seed=42)
489 rt.ingest(["你", "好", "世", "界"])
490 rt.step(n=3)
491 out = rt.emit()
492 assert rt.graph.has_node("你")
493 assert rt.graph.has_node("世")
494 print(f" PASS: list输入 — nodes={rt.graph.node_count}")
495
496
497def test_C03_numeric_input():
498 """C03: 纯数字输入"""
499 rt = CIERuntime(seed=42)
500 rt.ingest("3.14159265358979")
501 rt.step(n=5)
502 out = rt.emit()
503 assert rt.graph.has_node("3")
504 assert rt.graph.has_node(".")
505 print(f" PASS: 纯数字 — nodes={rt.graph.node_count}")
506
507
508def test_C04_rapid_reset_cycle():
509 """C04: 快速反复 reset-ingest 循环"""
510 rt = CIERuntime(seed=42)
511 for i in range(50):
512 rt.ingest(f"循环{i}")
513 rt.step(n=1)
514 rt.reset_session()
515
516 # reset 后再正常使用
517 rt.ingest("恢复正常")
518 rt.step(n=5)
519 out = rt.emit()
520 assert out is not None
521 assert rt.state.attention.free >= 0
522 print(f" PASS: 50次快速reset — active={out['active_count']}, free={rt.state.attention.free:.1f}")
523
524
525def test_C05_feedback_nonexistent_nodes():
526 """C05: 对不存在的节点做反馈"""
527 rt = CIERuntime(seed=42)
528 rt.ingest("测试")
529 rt.step(n=3)
530
531 # 不存在的节点
532 rt.commit_feedback({'correct': ['不存在的节点'], 'wrong': ['也不存在']})
533 # 不应崩
534 snap = rt.snapshot_state()
535 assert snap is not None
536 print(" PASS: 不存在节点的反馈不崩")
537
538
539def test_C06_negative_reward_extreme():
540 """C06: 极端负奖励"""
541 rt = CIERuntime(seed=42)
542 rt.ingest("极端测试")
543 rt.step(n=5)
544
545 rt.commit_feedback({'reward': -100.0})
546 rt.step(n=3)
547
548 snap = rt.snapshot_state()
549 # phi 不应变成 NaN 或 Inf
550 for v in rt.state.phi.values():
551 assert math.isfinite(v), f"Phi became non-finite: {v}"
552
553 print(f" PASS: 极端负奖励 — phi全有限, max={snap['phi_summary']['max']:.3f}")
554
555
556def test_C07_anchor_overload():
557 """C07: 大量锚点注入"""
558 rt = CIERuntime(seed=42)
559 anchors = [f"锚{i}" for i in range(50)]
560 rt.ingest("测试", anchors=anchors)
561 rt.step(n=5)
562
563 snap = rt.snapshot_state()
564 assert snap['attention']['used'] <= snap['attention']['total'] + 0.01
565 print(f" PASS: 50个锚点 — nodes={snap['phi_summary']['count']}, "
566 f"anchors={len(rt.state.anchor_nodes)}")
567
568
569def test_C08_output_to_input_chain():
570 """C08: 验证回灌链——多轮只靠回灌推动"""
571 rt = CIERuntime(seed=42)
572
573 # 只注入一次
574 rt.ingest("种子输入")
575 rt.step(n=5)
576 out1 = rt.emit()
577
578 # 后续只靠回灌
579 outputs = [out1]
580 for i in range(5):
581 rt.ingest("") # 空输入触发回灌
582 rt.step(n=3)
583 out = rt.emit()
584 outputs.append(out)
585
586 # 回灌应该维持一些激活(不会立刻归零)
587 has_activity = any(o['active_count'] > 0 for o in outputs[1:])
588 print(f" PASS: 回灌链 — activities={[o['active_count'] for o in outputs]}")
589
590
591def test_C09_concurrent_subjects_no_contamination():
592 """C09: 交替喂完全不同的内容,验证结构分离"""
593 rt = CIERuntime(seed=42)
594
595 # 交替喂语文和数学
596 yuwen = load_textbook("小学语文一上")[:10]
597 shuxue = load_textbook("小学数学一上")[:10]
598
599 for i in range(min(len(yuwen), len(shuxue))):
600 rt.ingest(yuwen[i][:30], anchors=["语文"])
601 rt.step(n=2)
602 rt.ingest(shuxue[i][:30], anchors=["数学"])
603 rt.step(n=2)
604
605 # 两个锚点都应存在且有不同的 phi
606 phi_yw = rt.state.phi.get("语文", 0.0)
607 phi_sx = rt.state.phi.get("数学", 0.0)
608
609 assert rt.graph.has_node("语文"), "语文 anchor missing"
610 assert rt.graph.has_node("数学"), "数学 anchor missing"
611
612 print(f" PASS: 交替学科 — phi(语文)={phi_yw:.3f}, phi(数学)={phi_sx:.3f}")
613
614
615def test_C10_all_textbooks_stability():
616 """C10: 所有5本课本依次喂入同一个runtime,验证全局稳定性"""
617 rt = CIERuntime(seed=42)
618
619 book_stats = {}
620 for name in TEXTBOOKS:
621 paras = load_textbook(name)
622 for p in paras[:20]:
623 rt.ingest(p[:50])
624 rt.step(n=2)
625
626 snap = rt.snapshot_state()
627 book_stats[name] = {
628 'nodes': snap['phi_summary']['count'],
629 'phi_max': snap['phi_summary']['max'],
630 'attention_used': snap['attention']['used'],
631 }
632
633 # 每本书后检查稳定性
634 assert abs(snap['phi_summary']['max']) <= 10.1, \
635 f"Phi diverged after {name}: {snap['phi_summary']['max']}"
636 assert snap['attention']['used'] <= snap['attention']['total'] + 0.01, \
637 f"Attention overflow after {name}"
638
639 for v in rt.state.phi.values():
640 assert math.isfinite(v), f"Non-finite phi after {name}"
641
642 final_snap = rt.snapshot_state()
643 print(f" PASS: 全5本课本 — 最终nodes={final_snap['phi_summary']['count']}, "
644 f"edges={final_snap['graph']['edge_count']}, "
645 f"experience={len(final_snap.get('experience_regions', {}).get('experience', []))}, "
646 f"merges={len(final_snap['merge_events'])}")
647 for name, stats in book_stats.items():
648 print(f" {name}: nodes={stats['nodes']}, phi_max={stats['phi_max']:.3f}")
649
650
651# ══════════════════════════════════════════════
652# 运行器
653# ══════════════════════════════════════════════
654
655def run_all():
656 groups = [
657 ("A. 真实数据验证", [
658 ("A01_小学语文pipeline", test_A01_xiaoxue_yuwen_pipeline),
659 ("A02_小学数学mixed", test_A02_xiaoxue_shuxue_mixed),
660 ("A03_初中语文complexity", test_A03_chuzhong_yuwen_complexity),
661 ("A04_初中数学formula", test_A04_chuzhong_shuxue_formula),
662 ("A05_高中语文long_text", test_A05_gaozhong_yuwen_long_text),
663 ("A06_跨学科learning", test_A06_cross_subject_learning),
664 ("A07_session_reset", test_A07_session_reset_preserves_long_term),
665 ("A08_多轮feedback", test_A08_multi_round_feedback),
666 ("A09_渐进沉积", test_A09_incremental_learning_sedimentation),
667 ("A10_snapshot完整性", test_A10_snapshot_completeness),
668 ]),
669 ("B. 边界条件", [
670 ("B01_空输入", test_B01_empty_input),
671 ("B02_单字", test_B02_single_char),
672 ("B03_超长输入", test_B03_very_long_input),
673 ("B04_重复输入100次", test_B04_repeated_same_input),
674 ("B05_step(0)", test_B05_step_zero),
675 ("B06_step(1000)", test_B06_step_large_n),
676 ("B07_注意力边界", test_B07_attention_exact_boundary),
677 ("B08_emit_before_ingest", test_B08_emit_before_ingest),
678 ("B09_unicode特殊字符", test_B09_unicode_special_chars),
679 ("B10_reset后snapshot", test_B10_snapshot_after_reset),
680 ]),
681 ("C. 反例/对抗", [
682 ("C01_乱码输入", test_C01_garbage_bytes),
683 ("C02_list输入", test_C02_list_input),
684 ("C03_纯数字", test_C03_numeric_input),
685 ("C04_快速reset循环", test_C04_rapid_reset_cycle),
686 ("C05_不存在节点feedback", test_C05_feedback_nonexistent_nodes),
687 ("C06_极端负奖励", test_C06_negative_reward_extreme),
688 ("C07_大量锚点", test_C07_anchor_overload),
689 ("C08_回灌链", test_C08_output_to_input_chain),
690 ("C09_交替学科", test_C09_concurrent_subjects_no_contamination),
691 ("C10_全5本课本稳定性", test_C10_all_textbooks_stability),
692 ]),
693 ]
694
695 total_pass = 0
696 total_fail = 0
697 total_skip = 0
698 failures = []
699
700 for group_name, tests in groups:
701 print(f"\n{'='*60}")
702 print(f" {group_name}")
703 print(f"{'='*60}")
704
705 for test_name, test_fn in tests:
706 try:
707 print(f"\n[{test_name}]")
708 test_fn()
709 total_pass += 1
710 except AssertionError as e:
711 print(f" FAIL: {e}")
712 total_fail += 1
713 failures.append((test_name, str(e)))
714 except Exception as e:
715 print(f" ERROR: {e}")
716 traceback.print_exc()
717 total_fail += 1
718 failures.append((test_name, f"ERROR: {e}"))
719
720 print(f"\n{'='*60}")
721 print(f" 总计: {total_pass} passed, {total_fail} failed, "
722 f"{total_pass + total_fail} total")
723 print(f"{'='*60}")
724
725 if failures:
726 print("\n失败项:")
727 for name, reason in failures:
728 print(f" ✗ {name}: {reason}")
729
730 return total_fail == 0
731
732
733if __name__ == '__main__':
734 success = run_all()
735 sys.exit(0 if success else 1)