im_wower
·
2026-04-01
formal_validation.py
1import sys, os, json, math, time
2sys.path.insert(0, "/Users/george/code/CIE-Unified")
3from cie import CIERuntime
4
5DS = "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28"
6
7def load_recs(stage, subject, n=80):
8 path = os.path.join(DS, "splits", "by_stage_subject", stage, f"{subject}.jsonl")
9 recs = []
10 if not os.path.exists(path): return recs
11 with open(path) as f:
12 for line in f:
13 rec = json.loads(line)
14 if not rec.get("is_content"): continue
15 t = rec.get("text","")
16 if len(t) >= 4: recs.append(t)
17 if len(recs) >= n: break
18 return recs
19
20combos = [("小学","语文"),("小学","数学"),("初中","语文"),("初中","数学"),("高中","语文")]
21tests = []
22
23# Pipeline + Stability per combo
24for stage, subj in combos:
25 label = stage + subj
26 recs = load_recs(stage, subj, 80)
27 if not recs:
28 tests.append({"name": label, "status": "FAIL", "detail": "no data"})
29 continue
30 rt = CIERuntime(seed=42)
31 t0 = time.time()
32 for r in recs:
33 rt.ingest(r[:60])
34 rt.step(n=1)
35 elapsed = time.time() - t0
36 ot = rt.emit()
37 if ot["activated"]:
38 rt.commit_feedback({"correct": [ot["activated"][0]["node"]], "reward": 1.0})
39 snap = rt.snapshot_state()
40 ok = snap["phi_summary"]["count"] > 20 and abs(snap["phi_summary"]["max"]) <= 10.1
41 ok &= snap["attention"]["used"] <= snap["attention"]["total"] + 0.01
42 ok &= all(math.isfinite(v) for v in rt.state.phi.values())
43
44 # Emergence: top words
45 g = rt.graph
46 cn_bg = []
47 for se in g.fwd_edges.values():
48 for dst, edge in se.items():
49 if "\u4e00" <= edge.src <= "\u9fff" and "\u4e00" <= dst <= "\u9fff":
50 bwd = g.get_bwd_weight(dst, edge.src) # dst←src 反向权重
51 ratio = edge.weight / bwd if bwd > 0.01 else edge.weight * 100
52 cn_bg.append((edge.src+dst, round(ratio,1)))
53 cn_bg.sort(key=lambda x: -x[1])
54
55 d = f"n={snap['phi_summary']['count']}, e={snap['graph']['edge_count']}, phi=[{snap['phi_summary']['min']:.3f},{snap['phi_summary']['max']:.3f}], mode={ot['mode']}, t={elapsed:.1f}s, words={cn_bg[:5]}"
56 tests.append({"name": label, "status": "PASS" if ok else "FAIL", "detail": d})
57
58# Cross-stage
59rt2 = CIERuntime(seed=42)
60for stage in ["小学","初中","高中"]:
61 for r in load_recs(stage, "语文", 30):
62 rt2.ingest(r[:50])
63 rt2.step(n=1)
64s2 = rt2.snapshot_state()
65ok2 = abs(s2["phi_summary"]["max"]) <= 10.1 and s2["phi_summary"]["count"] > 30
66tests.append({"name": "cross_stage", "status": "PASS" if ok2 else "FAIL",
67 "detail": f"n={s2['phi_summary']['count']}, phi={s2['phi_summary']['max']:.3f}"})
68
69# Cross-subject
70rt3 = CIERuntime(seed=42)
71for subj in ["语文","数学","科学"]:
72 for r in load_recs("小学", subj, 30):
73 rt3.ingest(r[:50], anchors=[subj])
74 rt3.step(n=1)
75s3 = rt3.snapshot_state()
76ok3 = abs(s3["phi_summary"]["max"]) <= 10.1
77tests.append({"name": "cross_subject", "status": "PASS" if ok3 else "FAIL",
78 "detail": f"n={s3['phi_summary']['count']}, phi={s3['phi_summary']['max']:.3f}, cores={len(rt3.state.ability_cores)}"})
79
80# Summary
81passed = sum(1 for t in tests if t["status"]=="PASS")
82failed = sum(1 for t in tests if t["status"]=="FAIL")
83for t in tests:
84 print(f"[{t['status']}] {t['name']}: {t['detail']}")
85print(f"\n总计: {passed}/{len(tests)} PASS")
86
87with open("/tmp/formal_val_results.json", "w") as f:
88 json.dump({"tests": tests, "summary": {"passed": passed, "failed": failed, "total": len(tests), "dataset": DS}}, f, ensure_ascii=False, indent=2, default=str)