tests/formal_validation.py@test/adversarial-verification

CIE-Unified / tests
integration phase2.3: 清理残留bwd_weight调用+收紧attention gate+context语义 (53/53)
12af7eb
im_wower · 2026-04-01
formal_validation.py

 1import sys, os, json, math, time
 2sys.path.insert(0, "/Users/george/code/CIE-Unified")
 3from cie import CIERuntime
 4
 5DS = "/Users/george/code/cie-datasets/china_text_book_md/v2026-03-28"
 6
 7def load_recs(stage, subject, n=80):
 8    path = os.path.join(DS, "splits", "by_stage_subject", stage, f"{subject}.jsonl")
 9    recs = []
10    if not os.path.exists(path): return recs
11    with open(path) as f:
12        for line in f:
13            rec = json.loads(line)
14            if not rec.get("is_content"): continue
15            t = rec.get("text","")
16            if len(t) >= 4: recs.append(t)
17            if len(recs) >= n: break
18    return recs
19
20combos = [("小学","语文"),("小学","数学"),("初中","语文"),("初中","数学"),("高中","语文")]
21tests = []
22
23# Pipeline + Stability per combo
24for stage, subj in combos:
25    label = stage + subj
26    recs = load_recs(stage, subj, 80)
27    if not recs:
28        tests.append({"name": label, "status": "FAIL", "detail": "no data"})
29        continue
30    rt = CIERuntime(seed=42)
31    t0 = time.time()
32    for r in recs:
33        rt.ingest(r[:60])
34        rt.step(n=1)
35    elapsed = time.time() - t0
36    ot = rt.emit()
37    if ot["activated"]:
38        rt.commit_feedback({"correct": [ot["activated"][0]["node"]], "reward": 1.0})
39    snap = rt.snapshot_state()
40    ok = snap["phi_summary"]["count"] > 20 and abs(snap["phi_summary"]["max"]) <= 10.1
41    ok &= snap["attention"]["used"] <= snap["attention"]["total"] + 0.01
42    ok &= all(math.isfinite(v) for v in rt.state.phi.values())
43    
44    # Emergence: top words
45    g = rt.graph
46    cn_bg = []
47    for se in g.fwd_edges.values():
48        for dst, edge in se.items():
49            if "\u4e00" <= edge.src <= "\u9fff" and "\u4e00" <= dst <= "\u9fff":
50                bwd = g.get_bwd_weight(dst, edge.src)  # dst←src 反向权重
51                ratio = edge.weight / bwd if bwd > 0.01 else edge.weight * 100
52                cn_bg.append((edge.src+dst, round(ratio,1)))
53    cn_bg.sort(key=lambda x: -x[1])
54    
55    d = f"n={snap['phi_summary']['count']}, e={snap['graph']['edge_count']}, phi=[{snap['phi_summary']['min']:.3f},{snap['phi_summary']['max']:.3f}], mode={ot['mode']}, t={elapsed:.1f}s, words={cn_bg[:5]}"
56    tests.append({"name": label, "status": "PASS" if ok else "FAIL", "detail": d})
57
58# Cross-stage
59rt2 = CIERuntime(seed=42)
60for stage in ["小学","初中","高中"]:
61    for r in load_recs(stage, "语文", 30):
62        rt2.ingest(r[:50])
63        rt2.step(n=1)
64s2 = rt2.snapshot_state()
65ok2 = abs(s2["phi_summary"]["max"]) <= 10.1 and s2["phi_summary"]["count"] > 30
66tests.append({"name": "cross_stage", "status": "PASS" if ok2 else "FAIL",
67    "detail": f"n={s2['phi_summary']['count']}, phi={s2['phi_summary']['max']:.3f}"})
68
69# Cross-subject
70rt3 = CIERuntime(seed=42)
71for subj in ["语文","数学","科学"]:
72    for r in load_recs("小学", subj, 30):
73        rt3.ingest(r[:50], anchors=[subj])
74        rt3.step(n=1)
75s3 = rt3.snapshot_state()
76ok3 = abs(s3["phi_summary"]["max"]) <= 10.1
77tests.append({"name": "cross_subject", "status": "PASS" if ok3 else "FAIL",
78    "detail": f"n={s3['phi_summary']['count']}, phi={s3['phi_summary']['max']:.3f}, cores={len(rt3.state.ability_cores)}"})
79
80# Summary
81passed = sum(1 for t in tests if t["status"]=="PASS")
82failed = sum(1 for t in tests if t["status"]=="FAIL")
83for t in tests:
84    print(f"[{t['status']}] {t['name']}: {t['detail']}")
85print(f"\n总计: {passed}/{len(tests)} PASS")
86
87with open("/tmp/formal_val_results.json", "w") as f:
88    json.dump({"tests": tests, "summary": {"passed": passed, "failed": failed, "total": len(tests), "dataset": DS}}, f, ensure_ascii=False, indent=2, default=str)