review: branch-a real textbook validation against branch-b dataset

commit: 58075c8
parent: 419ae8d
author: codex@macbookpro
date: 2026-03-31 19:31:17 +0800 CST

review: branch-a real textbook validation against branch-b dataset

5 files changed, +2791, -0

A cie/validation_real_textbooks.py

A reports/2026-03-31_branch_a_real_textbook_validation.json

A reports/2026-03-31_branch_a_real_textbook_validation.md

A reviews/2026-03-31_branch_a_real_textbook_validation.md

A tests/test_branch_a_real_textbooks.py

A cie/validation_real_textbooks.py

+996, -0

  1@@ -0,0 +1,996 @@
  2+from __future__ import annotations
  3+
  4+import argparse
  5+import json
  6+import math
  7+import subprocess
  8+import sys
  9+from pathlib import Path
 10+from typing import Any, Callable, Dict, List, Sequence
 11+
 12+from .runtime import CIERuntime, REQUIRED_SNAPSHOT_KEYS
 13+
 14+
 15+BASE_COMMIT = "419ae8d39150806011c1eb6082c7fc8c6a337735"
 16+BRANCH_B_REFERENCE_COMMIT = "c7342881bb2ebfa5e7f927c91a7806416288573b"
 17+BRANCH_B_REFERENCE_LABEL = "c734288"
 18+REPO_ROOT = Path(__file__).resolve().parent.parent
 19+DATASET_DIR = Path("/Users/george/code/china-text-book-md")
 20+DEFAULT_JSON_REPORT_PATH = REPO_ROOT / "reports" / "2026-03-31_branch_a_real_textbook_validation.json"
 21+DEFAULT_MARKDOWN_REPORT_PATH = REPO_ROOT / "reports" / "2026-03-31_branch_a_real_textbook_validation.md"
 22+DEFAULT_REVIEW_REPORT_PATH = REPO_ROOT / "reviews" / "2026-03-31_branch_a_real_textbook_validation.md"
 23+VALID_SCENARIO_STATUSES = ("PASS", "FAIL", "N/A", "STRUCTURAL MISMATCH")
 24+STAGE_NAMES = ("memory", "experience", "skill_belt", "ability_core")
 25+REQUIRED_REPORT_KEYS = (
 26+    "branch",
 27+    "base_commit",
 28+    "branch_b_reference_commit",
 29+    "dataset_dir",
 30+    "dataset_files",
 31+    "dataset_check",
 32+    "scenarios",
 33+    "overall_summary",
 34+    "structural_mismatches",
 35+    "known_limitations",
 36+    "recommendation",
 37+)
 38+
 39+TEXTBOOKS = {
 40+    "小学语文一上": "小学_语文_统编版_义务教育教科书·语文一年级上册.md",
 41+    "小学数学一上": "小学_数学_人教版_义务教育教科书 · 数学一年级上册.md",
 42+    "初中语文七上": "初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md",
 43+    "初中数学七上": "初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md",
 44+    "高中语文必修上": "高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md",
 45+}
 46+
 47+BRANCH_B_REFERENCE_EXPECTATIONS = {
 48+    "A01": {
 49+        "summary": "Branch B stage report: PASS; 126 nodes, 166 flows on 小学语文一上 pipeline.",
 50+        "source": "STAGE_REPORT.md @ c734288",
 51+    },
 52+    "A02": {
 53+        "summary": "Branch B stage report: PASS; 58 nodes, has_cn=True on 小学数学一上 mixed text.",
 54+        "source": "STAGE_REPORT.md @ c734288",
 55+    },
 56+    "A03": {
 57+        "summary": "Branch B stage report: PASS; 276 nodes, 20 sedimentation traces on 初中语文七上.",
 58+        "source": "STAGE_REPORT.md @ c734288",
 59+    },
 60+    "A04": {
 61+        "summary": "Branch B stage report: PASS; 294 edges, asymmetry ratio 1.00 on 初中数学七上.",
 62+        "source": "STAGE_REPORT.md @ c734288",
 63+    },
 64+    "A05": {
 65+        "summary": "Branch B stage report: PASS; 397 nodes, phi range [-0.13, 0.15] on 高中语文必修上.",
 66+        "source": "STAGE_REPORT.md @ c734288",
 67+    },
 68+    "A06": {
 69+        "summary": "Branch B stage report: PASS; 8 new nodes after 语文→数学 subject switch.",
 70+        "source": "STAGE_REPORT.md @ c734288",
 71+    },
 72+    "A07": {
 73+        "summary": "Branch B stage report: PASS; 182/189 phi entries preserved after reset.",
 74+        "source": "STAGE_REPORT.md @ c734288",
 75+    },
 76+    "A08": {
 77+        "summary": "Branch B stage report: PASS; confidence 0.333→0.889→0.381 after positive/negative feedback.",
 78+        "source": "STAGE_REPORT.md @ c734288",
 79+    },
 80+    "A09": {
 81+        "summary": "Branch B stage report: PASS; sedimentation gradient (20,4)→(20,10).",
 82+        "source": "STAGE_REPORT.md @ c734288",
 83+    },
 84+    "A10": {
 85+        "summary": "Branch B stage report: PASS; 16 snapshot fields present on real textbook state.",
 86+        "source": "STAGE_REPORT.md @ c734288",
 87+    },
 88+}
 89+
 90+
 91+def _round(value: float) -> float:
 92+    return round(float(value), 4)
 93+
 94+
 95+def _git_stdout(args: Sequence[str], fallback: str) -> str:
 96+    try:
 97+        completed = subprocess.run(
 98+            ["git", *args],
 99+            cwd=REPO_ROOT,
100+            check=True,
101+            capture_output=True,
102+            text=True,
103+        )
104+    except (FileNotFoundError, subprocess.CalledProcessError):
105+        return fallback
106+    output = completed.stdout.strip()
107+    return output or fallback
108+
109+
110+def _current_branch() -> str:
111+    return _git_stdout(["rev-parse", "--abbrev-ref", "HEAD"], "review/branch-a-real-textbook-validation")
112+
113+
114+def dataset_file_rows() -> List[Dict[str, Any]]:
115+    rows = []
116+    for textbook, filename in TEXTBOOKS.items():
117+        path = DATASET_DIR / filename
118+        rows.append(
119+            {
120+                "textbook": textbook,
121+                "filename": filename,
122+                "path": str(path),
123+                "exists": path.is_file(),
124+            }
125+        )
126+    return rows
127+
128+
129+def check_dataset() -> Dict[str, Any]:
130+    rows = dataset_file_rows()
131+    missing_paths = [row["path"] for row in rows if not row["exists"]]
132+    directory_exists = DATASET_DIR.is_dir()
133+    return {
134+        "directory_exists": directory_exists,
135+        "all_required_files_exist": directory_exists and not missing_paths,
136+        "missing_paths": missing_paths,
137+        "file_results": rows,
138+    }
139+
140+
141+def _require_dataset() -> Dict[str, Any]:
142+    dataset_check = check_dataset()
143+    if dataset_check["all_required_files_exist"]:
144+        return dataset_check
145+    missing = dataset_check["missing_paths"] or [str(DATASET_DIR)]
146+    raise FileNotFoundError("Required textbook dataset is missing:\n" + "\n".join(missing))
147+
148+
149+def load_textbook_paragraphs(name: str) -> List[str]:
150+    path = DATASET_DIR / TEXTBOOKS[name]
151+    paragraphs: List[str] = []
152+    raw = path.read_text(encoding="utf-8")
153+    for line in raw.splitlines():
154+        line = line.strip()
155+        if not line:
156+            continue
157+        if line.startswith("#") or line.startswith("**") or line.startswith("---"):
158+            continue
159+        if line.startswith("!["):
160+            continue
161+        control_chars = sum(1 for char in line if ord(char) < 32 and char not in "\n\t")
162+        if control_chars > len(line) * 0.3:
163+            continue
164+        chinese_chars = sum(1 for char in line if "\u4e00" <= char <= "\u9fff")
165+        if chinese_chars >= 2:
166+            paragraphs.append(line)
167+    return paragraphs
168+
169+
170+def _slice_paragraphs(name: str, paragraph_count: int) -> List[str]:
171+    paragraphs = load_textbook_paragraphs(name)
172+    if len(paragraphs) < paragraph_count:
173+        raise ValueError(f"{name} only has {len(paragraphs)} cleaned paragraphs; need {paragraph_count}.")
174+    return paragraphs[:paragraph_count]
175+
176+
177+def _feed(runtime: CIERuntime, paragraphs: List[str], char_limit: int, step_n: int) -> List[str]:
178+    used_slices: List[str] = []
179+    for paragraph in paragraphs:
180+        used = paragraph[:char_limit]
181+        runtime.ingest(used)
182+        runtime.step(n=step_n)
183+        used_slices.append(used)
184+    return used_slices
185+
186+
187+def _stage_counts(runtime: CIERuntime) -> Dict[str, int]:
188+    counts = {stage: 0 for stage in STAGE_NAMES}
189+    for profile in runtime.state.sedimentation.values():
190+        counts[profile.stage] = counts.get(profile.stage, 0) + 1
191+    return counts
192+
193+
194+def _phi_range(runtime: CIERuntime) -> Dict[str, float] | None:
195+    values = list(runtime.state.phi.values())
196+    if not values:
197+        return None
198+    return {"min": _round(min(values)), "max": _round(max(values))}
199+
200+
201+def _max_abs(mapping: Dict[Any, float]) -> float:
202+    if not mapping:
203+        return 0.0
204+    return _round(max(abs(value) for value in mapping.values()))
205+
206+
207+def _all_finite(runtime: CIERuntime) -> bool:
208+    for mapping in (runtime.state.phi, runtime.state.mu, runtime.state.J, runtime.state.anchor_nodes):
209+        for value in mapping.values():
210+            if not math.isfinite(value):
211+                return False
212+    return True
213+
214+
215+def _contains_chinese(node: str) -> bool:
216+    return any("\u4e00" <= char <= "\u9fff" for char in node)
217+
218+
219+def _contains_digit(node: str) -> bool:
220+    return any(char.isdigit() for char in node)
221+
222+
223+def _top_level_snapshot_metrics(runtime: CIERuntime, snapshot: Dict[str, Any], output: str | None = None) -> Dict[str, Any]:
224+    metrics: Dict[str, Any] = {
225+        "phi_summary": snapshot["phi_summary"],
226+        "mu_summary": snapshot["mu_summary"],
227+        "J_summary": snapshot["J_summary"],
228+        "active_region": snapshot["active_region"],
229+        "active_region_size": len(snapshot["active_region"]),
230+        "bound_ability_core": snapshot["bound_ability_core"],
231+        "anchor_pull": snapshot["anchor_pull"],
232+        "drift_score": snapshot["drift_score"],
233+        "free_capacity": snapshot["free_capacity"],
234+        "experience_regions_count": len(snapshot["experience_regions"]),
235+        "skill_belt_candidates_count": len(snapshot["skill_belt_candidates"]),
236+        "sedimentation_trace_count": len(snapshot["sedimentation_trace"]),
237+        "merge_events_count": len(snapshot["merge_events"]),
238+        "decay_events_count": len(snapshot["decay_events"]),
239+        "output_mode": snapshot["output_mode"],
240+        "feedback_effect": snapshot["feedback_effect"],
241+        "phi_range": _phi_range(runtime),
242+        "stage_counts": _stage_counts(runtime),
243+        "graph_node_count": len(runtime.state.graph.nodes()),
244+        "graph_edge_count_proxy": len(runtime.state.J),
245+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
246+    }
247+    if output is not None:
248+        metrics["emit_output"] = output
249+    return metrics
250+
251+
252+def _scenario_result(
253+    scenario_id: str,
254+    title: str,
255+    textbooks: List[str],
256+    status: str,
257+    reason: str,
258+    *,
259+    ran_successfully: bool,
260+    key_metrics: Dict[str, Any],
261+    fairness_notes: List[str] | None = None,
262+    structural_mismatch: str | None = None,
263+) -> Dict[str, Any]:
264+    if status not in VALID_SCENARIO_STATUSES:
265+        raise ValueError(f"Invalid scenario status: {status}")
266+    return {
267+        "scenario_id": scenario_id,
268+        "title": title,
269+        "textbook_used": textbooks,
270+        "dataset_files": [str(DATASET_DIR / TEXTBOOKS[name]) for name in textbooks],
271+        "ran_successfully": ran_successfully,
272+        "status": status,
273+        "reason": reason,
274+        "key_metrics": key_metrics,
275+        "branch_b_reference_expectation": BRANCH_B_REFERENCE_EXPECTATIONS[scenario_id],
276+        "fairness_notes": fairness_notes or [],
277+        "structural_mismatch": structural_mismatch,
278+    }
279+
280+
281+def _scenario_failure(
282+    scenario_id: str,
283+    title: str,
284+    textbooks: List[str],
285+    exc: Exception,
286+) -> Dict[str, Any]:
287+    return _scenario_result(
288+        scenario_id,
289+        title,
290+        textbooks,
291+        "FAIL",
292+        f"Scenario execution raised {exc.__class__.__name__}: {exc}",
293+        ran_successfully=False,
294+        key_metrics={"exception": str(exc)},
295+        fairness_notes=[],
296+        structural_mismatch=None,
297+    )
298+
299+
300+def run_a01() -> Dict[str, Any]:
301+    runtime = CIERuntime()
302+    used = _feed(runtime, _slice_paragraphs("小学语文一上", 30), char_limit=50, step_n=3)
303+    output = runtime.emit()
304+    snapshot = runtime.snapshot_state()
305+    status = "PASS" if snapshot["mu_summary"]["active_count"] > 0 and snapshot["J_summary"]["edge_count"] > 0 else "FAIL"
306+    reason = (
307+        "Pipeline ran on the required real-data slice and produced non-empty phi/mu/J state."
308+        if status == "PASS"
309+        else "Pipeline did not retain active state after the required real-data slice."
310+    )
311+    metrics = _top_level_snapshot_metrics(runtime, snapshot, output)
312+    metrics.update(
313+        {
314+            "input_slice": {"paragraphs": 30, "chars_per_paragraph": 50, "step_n": 3},
315+            "used_paragraph_count": len(used),
316+        }
317+    )
318+    return _scenario_result(
319+        "A01",
320+        "小学语文一上 — pipeline",
321+        ["小学语文一上"],
322+        status,
323+        reason,
324+        ran_successfully=True,
325+        key_metrics=metrics,
326+        fairness_notes=[
327+            "Branch A emit() returns a string, so output mode and active counts come from snapshot_state().",
328+            "Observed state is much smaller than Branch B's reference counts, but the scenario does complete end-to-end.",
329+        ],
330+        structural_mismatch=None,
331+    )
332+
333+
334+def run_a02() -> Dict[str, Any]:
335+    runtime = CIERuntime()
336+    _feed(runtime, _slice_paragraphs("小学数学一上", 20), char_limit=40, step_n=3)
337+    output = runtime.emit()
338+    snapshot = runtime.snapshot_state()
339+    nodes = runtime.state.graph.nodes()
340+    chinese_nodes = [node for node in nodes if _contains_chinese(node)]
341+    digit_nodes = [node for node in nodes if _contains_digit(node)]
342+    status = "PASS" if chinese_nodes else "FAIL"
343+    reason = (
344+        "Chinese-bearing nodes exist on the mixed textbook slice; digit-bearing nodes are reported separately."
345+        if status == "PASS"
346+        else "No Chinese-bearing nodes were formed on the mixed textbook slice."
347+    )
348+    metrics = _top_level_snapshot_metrics(runtime, snapshot, output)
349+    metrics.update(
350+        {
351+            "input_slice": {"paragraphs": 20, "chars_per_paragraph": 40, "step_n": 3},
352+            "has_chinese_nodes": bool(chinese_nodes),
353+            "has_digit_nodes": bool(digit_nodes),
354+            "chinese_node_count": len(chinese_nodes),
355+            "digit_node_count": len(digit_nodes),
356+            "sample_digit_nodes": digit_nodes[:5],
357+        }
358+    )
359+    return _scenario_result(
360+        "A02",
361+        "小学数学一上 — mixed text",
362+        ["小学数学一上"],
363+        status,
364+        reason,
365+        ran_successfully=True,
366+        key_metrics=metrics,
367+        fairness_notes=[
368+            "Branch A tokenizes with Unicode word regexes, so digits may be absorbed into coarse tokens or absent from this slice.",
369+            "The required honest report here is whether Chinese nodes exist and whether digit-bearing nodes were actually observed.",
370+        ],
371+        structural_mismatch=None,
372+    )
373+
374+
375+def run_a03() -> Dict[str, Any]:
376+    runtime = CIERuntime()
377+    _feed(runtime, _slice_paragraphs("初中语文七上", 50), char_limit=60, step_n=3)
378+    snapshot = runtime.snapshot_state()
379+    has_sedimentation = bool(snapshot["sedimentation_trace"]) and bool(snapshot["experience_regions"])
380+    status = "PASS" if has_sedimentation else "FAIL"
381+    reason = (
382+        "Sedimentation and experience-region observables are present on the required real-data slice."
383+        if status == "PASS"
384+        else "Sedimentation observables did not materialize on the required real-data slice."
385+    )
386+    metrics = _top_level_snapshot_metrics(runtime, snapshot)
387+    metrics.update({"input_slice": {"paragraphs": 50, "chars_per_paragraph": 60, "step_n": 3}})
388+    return _scenario_result(
389+        "A03",
390+        "初中语文七上 — complexity / sedimentation",
391+        ["初中语文七上"],
392+        status,
393+        reason,
394+        ran_successfully=True,
395+        key_metrics=metrics,
396+        fairness_notes=[
397+            "Branch A exposes sedimentation_trace and experience_regions, but its tokenized graph remains much smaller than Branch B's reference run.",
398+            "sedimentation_trace is capped, so count saturation is expected and should not be over-interpreted.",
399+        ],
400+        structural_mismatch=None,
401+    )
402+
403+
404+def run_a04() -> Dict[str, Any]:
405+    runtime = CIERuntime()
406+    _feed(runtime, _slice_paragraphs("初中数学七上", 30), char_limit=50, step_n=3)
407+    snapshot = runtime.snapshot_state()
408+    asymmetry_proxy = []
409+    for (left, right), value in runtime.state.J.items():
410+        reverse = runtime.state.J.get((right, left), 0.0)
411+        asymmetry_proxy.append(abs(value - reverse) / max(value, reverse, 1e-9))
412+    metrics = _top_level_snapshot_metrics(runtime, snapshot)
413+    metrics.update(
414+        {
415+            "input_slice": {"paragraphs": 30, "chars_per_paragraph": 50, "step_n": 3},
416+            "directed_flow_asymmetry_proxy_avg": _round(sum(asymmetry_proxy) / len(asymmetry_proxy)) if asymmetry_proxy else 0.0,
417+            "top_flows": snapshot["J_summary"]["top_flows"],
418+        }
419+    )
420+    mismatch = (
421+        "Branch B's A04 metric is based on forward/backward graph edge weights. Branch A only exposes directed J flow, "
422+        "not a directly comparable directed graph-edge surface, so a fair asymmetry-ratio comparison is a structural mismatch."
423+    )
424+    return _scenario_result(
425+        "A04",
426+        "初中数学七上 — formula / structure",
427+        ["初中数学七上"],
428+        "STRUCTURAL MISMATCH",
429+        "The scenario ran, but the primary Branch B asymmetry-ratio metric does not map cleanly onto Branch A.",
430+        ran_successfully=True,
431+        key_metrics=metrics,
432+        fairness_notes=[
433+            "Directed J flow can be described, but it is not the same observable as Branch B's directed graph edge weights.",
434+            "Using the J proxy as if it were the same metric would overstate comparability.",
435+        ],
436+        structural_mismatch=mismatch,
437+    )
438+
439+
440+def run_a05() -> Dict[str, Any]:
441+    runtime = CIERuntime()
442+    _feed(runtime, _slice_paragraphs("高中语文必修上", 80), char_limit=80, step_n=2)
443+    snapshot = runtime.snapshot_state()
444+    finite = _all_finite(runtime)
445+    obvious_divergence = (
446+        not finite
447+        or _max_abs(runtime.state.phi) > 1000.0
448+        or _max_abs(runtime.state.mu) > 1000.0
449+        or _max_abs(runtime.state.J) > 1000.0
450+    )
451+    status = "PASS" if not obvious_divergence else "FAIL"
452+    reason = (
453+        "Long-text run stayed finite and showed no obvious overflow/divergence symptom."
454+        if status == "PASS"
455+        else "Long-text run showed non-finite values or obvious divergence."
456+    )
457+    metrics = _top_level_snapshot_metrics(runtime, snapshot)
458+    metrics.update(
459+        {
460+            "input_slice": {"paragraphs": 80, "chars_per_paragraph": 80, "step_n": 2},
461+            "all_finite": finite,
462+            "max_abs_phi": _max_abs(runtime.state.phi),
463+            "max_abs_mu": _max_abs(runtime.state.mu),
464+            "max_abs_J": _max_abs(runtime.state.J),
465+        }
466+    )
467+    return _scenario_result(
468+        "A05",
469+        "高中语文必修上 — long text stability",
470+        ["高中语文必修上"],
471+        status,
472+        reason,
473+        ran_successfully=True,
474+        key_metrics=metrics,
475+        fairness_notes=[
476+            "Branch A does not expose attention.used/total; free_capacity is the closest locked observable.",
477+            "phi min/max are derived from runtime.state.phi because Branch A's snapshot summary does not include range fields.",
478+        ],
479+        structural_mismatch=None,
480+    )
481+
482+
483+def run_a06() -> Dict[str, Any]:
484+    runtime = CIERuntime()
485+    _feed(runtime, _slice_paragraphs("小学语文一上", 15), char_limit=40, step_n=3)
486+    before = runtime.snapshot_state()
487+    before_active = set(before["active_region"])
488+    _feed(runtime, _slice_paragraphs("小学数学一上", 15), char_limit=40, step_n=3)
489+    after = runtime.snapshot_state()
490+    after_active = set(after["active_region"])
491+    new_nodes = sorted(after_active - before_active)
492+    preserved = sum(1 for node in before_active if abs(runtime.state.phi.get(node, 0.0)) > 0.001)
493+    status = "PASS" if new_nodes and preserved > 0 else "FAIL"
494+    reason = (
495+        "Active region changes under subject switch while some earlier structures remain alive."
496+        if status == "PASS"
497+        else "Subject switch did not show both migration and persistence under the required schedule."
498+    )
499+    metrics = _top_level_snapshot_metrics(runtime, after)
500+    metrics.update(
501+        {
502+            "input_slice": {
503+                "phase_1": {"textbook": "小学语文一上", "paragraphs": 15, "chars_per_paragraph": 40, "step_n": 3},
504+                "phase_2": {"textbook": "小学数学一上", "paragraphs": 15, "chars_per_paragraph": 40, "step_n": 3},
505+            },
506+            "active_region_before": sorted(before_active),
507+            "active_region_after": sorted(after_active),
508+            "new_active_nodes_after_switch": new_nodes,
509+            "preserved_prior_active_phi_count": preserved,
510+        }
511+    )
512+    return _scenario_result(
513+        "A06",
514+        "cross-subject transfer",
515+        ["小学语文一上", "小学数学一上"],
516+        status,
517+        reason,
518+        ran_successfully=True,
519+        key_metrics=metrics,
520+        fairness_notes=[
521+            "This mirrors Branch B's no-reset subject switch. Branch A does show migration, but on a much smaller token set.",
522+        ],
523+        structural_mismatch=None,
524+    )
525+
526+
527+def run_a07() -> Dict[str, Any]:
528+    runtime = CIERuntime()
529+    _feed(runtime, _slice_paragraphs("初中语文七上", 30), char_limit=50, step_n=3)
530+    phi_before = dict(runtime.state.phi)
531+    j_before = dict(runtime.state.J)
532+    graph_node_count_before = len(runtime.state.graph.nodes())
533+    stage_counts_before = _stage_counts(runtime)
534+    runtime.reset_session()
535+    snapshot = runtime.snapshot_state()
536+    preserved_phi_entries = sum(
537+        1 for node, value in phi_before.items() if runtime.state.phi.get(node) == value
538+    )
539+    status = "PASS" if snapshot["mu_summary"]["active_count"] == 0 and not snapshot["active_region"] and preserved_phi_entries == len(phi_before) else "FAIL"
540+    reason = (
541+        "reset_session() clears session activation while preserving long-term graph/potential structure."
542+        if status == "PASS"
543+        else "reset_session() did not cleanly separate session state from long-term structure."
544+    )
545+    metrics = _top_level_snapshot_metrics(runtime, snapshot)
546+    metrics.update(
547+        {
548+            "input_slice": {"paragraphs": 30, "chars_per_paragraph": 50, "step_n": 3},
549+            "phi_entries_before_reset": len(phi_before),
550+            "phi_entries_preserved_exactly": preserved_phi_entries,
551+            "J_entries_before_reset": len(j_before),
552+            "J_entries_after_reset": len(runtime.state.J),
553+            "graph_nodes_before_reset": graph_node_count_before,
554+            "graph_nodes_after_reset": len(runtime.state.graph.nodes()),
555+            "stage_counts_before_reset": stage_counts_before,
556+            "stage_counts_after_reset": _stage_counts(runtime),
557+        }
558+    )
559+    return _scenario_result(
560+        "A07",
561+        "session reset preserves long-term structure",
562+        ["初中语文七上"],
563+        status,
564+        reason,
565+        ran_successfully=True,
566+        key_metrics=metrics,
567+        fairness_notes=[
568+            "This is one of Branch A's clearer matched wins: session clearing and long-term retention separate cleanly.",
569+        ],
570+        structural_mismatch=None,
571+    )
572+
573+
574+def run_a08() -> Dict[str, Any]:
575+    runtime = CIERuntime()
576+    paragraph = _slice_paragraphs("小学语文一上", 1)[0][:30]
577+    runtime.ingest(paragraph)
578+    runtime.step(n=5)
579+    output = runtime.emit()
580+    snapshot = runtime.snapshot_state()
581+    target = snapshot["active_region"][0] if snapshot["active_region"] else None
582+    metrics = _top_level_snapshot_metrics(runtime, snapshot, output)
583+    metrics.update({"input_slice": {"paragraphs": 1, "chars_per_paragraph": 30, "step_n": 5}, "feedback_target": target})
584+    if not target or output == "minimal: idle":
585+        return _scenario_result(
586+            "A08",
587+            "multi-round feedback",
588+            ["小学语文一上"],
589+            "FAIL",
590+            "emit() returned no activated output target on the required slice, so the positive/negative feedback loop could not be meaningfully exercised.",
591+            ran_successfully=True,
592+            key_metrics=metrics,
593+            fairness_notes=[
594+                "This is reported as a real Branch A failure, not normalized away.",
595+                "Branch A feedback is queued and applied on the next step, but that did not matter here because no target emerged.",
596+            ],
597+            structural_mismatch=None,
598+        )
599+
600+    initial = {
601+        "phi": _round(runtime.state.phi.get(target, 0.0)),
602+        "mu": _round(runtime.state.mu.get(target, 0.0)),
603+    }
604+    positive_rounds = []
605+    for round_index in range(5):
606+        runtime.commit_feedback({"text": target, "value": 1.0})
607+        runtime.step()
608+        positive_rounds.append(
609+            {
610+                "round": round_index + 1,
611+                "phi": _round(runtime.state.phi.get(target, 0.0)),
612+                "mu": _round(runtime.state.mu.get(target, 0.0)),
613+                "feedback_effect": dict(runtime.state.feedback_effect),
614+            }
615+        )
616+    runtime.commit_feedback({"text": target, "value": -0.5})
617+    runtime.step()
618+    negative_round = {
619+        "phi": _round(runtime.state.phi.get(target, 0.0)),
620+        "mu": _round(runtime.state.mu.get(target, 0.0)),
621+        "feedback_effect": dict(runtime.state.feedback_effect),
622+    }
623+    positive_improves = positive_rounds[-1]["phi"] >= initial["phi"] and positive_rounds[-1]["mu"] >= initial["mu"]
624+    negative_reduces = negative_round["phi"] <= positive_rounds[-1]["phi"] and negative_round["mu"] <= positive_rounds[-1]["mu"]
625+    status = "PASS" if positive_improves and negative_reduces else "FAIL"
626+    reason = (
627+        "Positive rounds strengthened the chosen target and the negative round weakened it."
628+        if status == "PASS"
629+        else "Feedback rounds did not show the expected positive-then-negative observable change."
630+    )
631+    metrics.update(
632+        {
633+            "initial_target_state": initial,
634+            "positive_rounds": positive_rounds,
635+            "negative_round": negative_round,
636+        }
637+    )
638+    return _scenario_result(
639+        "A08",
640+        "multi-round feedback",
641+        ["小学语文一上"],
642+        status,
643+        reason,
644+        ran_successfully=True,
645+        key_metrics=metrics,
646+        fairness_notes=[
647+            "Branch A feedback is asynchronous: commit_feedback() queues a signal that is applied on the next step.",
648+        ],
649+        structural_mismatch=None,
650+    )
651+
652+
653+def run_a09() -> Dict[str, Any]:
654+    runtime = CIERuntime()
655+    paragraphs = _slice_paragraphs("小学语文一上", 10)
656+    round_history = []
657+    for round_index in range(5):
658+        _feed(runtime, paragraphs, char_limit=30, step_n=3)
659+        snapshot = runtime.snapshot_state()
660+        round_history.append(
661+            {
662+                "round": round_index + 1,
663+                "sedimentation_trace_count": len(snapshot["sedimentation_trace"]),
664+                "experience_regions_count": len(snapshot["experience_regions"]),
665+                "skill_belt_candidates_count": len(snapshot["skill_belt_candidates"]),
666+                "phi_node_count": snapshot["phi_summary"]["node_count"],
667+                "active_count": snapshot["mu_summary"]["active_count"],
668+                "stage_counts": _stage_counts(runtime),
669+            }
670+        )
671+    initial_complexity = round_history[0]["stage_counts"]["skill_belt"] + round_history[0]["stage_counts"]["ability_core"]
672+    final_complexity = round_history[-1]["stage_counts"]["skill_belt"] + round_history[-1]["stage_counts"]["ability_core"]
673+    progressed = final_complexity > initial_complexity or any(
674+        round_entry["stage_counts"]["skill_belt"] > round_history[0]["stage_counts"]["skill_belt"]
675+        for round_entry in round_history[1:]
676+    )
677+    status = "PASS" if progressed else "FAIL"
678+    reason = (
679+        "Repeated rounds show incremental stage progression, even though several observable lists are capped."
680+        if status == "PASS"
681+        else "Repeated rounds did not show incremental sedimentation progression."
682+    )
683+    final_snapshot = runtime.snapshot_state()
684+    metrics = _top_level_snapshot_metrics(runtime, final_snapshot)
685+    metrics.update(
686+        {
687+            "input_slice": {"paragraphs": 10, "chars_per_paragraph": 30, "step_n": 3, "rounds": 5},
688+            "round_history": round_history,
689+        }
690+    )
691+    return _scenario_result(
692+        "A09",
693+        "incremental sedimentation",
694+        ["小学语文一上"],
695+        status,
696+        reason,
697+        ran_successfully=True,
698+        key_metrics=metrics,
699+        fairness_notes=[
700+            "sedimentation_trace and skill_belt_candidates are capped lists in Branch A, so stage_counts are the more honest growth indicator here.",
701+        ],
702+        structural_mismatch=None,
703+    )
704+
705+
706+def run_a10() -> Dict[str, Any]:
707+    runtime = CIERuntime()
708+    _feed(runtime, _slice_paragraphs("初中数学七上", 20), char_limit=40, step_n=3)
709+    output = runtime.emit()
710+    runtime.step()
711+    runtime.commit_feedback({"text": "validation", "value": 0.2})
712+    runtime.step()
713+    snapshot = runtime.snapshot_state()
714+    missing = sorted(REQUIRED_SNAPSHOT_KEYS.difference(snapshot))
715+    status = "PASS" if not missing else "FAIL"
716+    reason = (
717+        "All Branch A locked snapshot fields are present on real textbook-driven state."
718+        if status == "PASS"
719+        else f"Snapshot is missing required locked fields: {missing}"
720+    )
721+    metrics = _top_level_snapshot_metrics(runtime, snapshot, output)
722+    metrics.update(
723+        {
724+            "input_slice": {"paragraphs": 20, "chars_per_paragraph": 40, "step_n": 3},
725+            "required_snapshot_keys": sorted(REQUIRED_SNAPSHOT_KEYS),
726+            "observed_snapshot_keys": sorted(snapshot),
727+            "missing_snapshot_keys": missing,
728+        }
729+    )
730+    return _scenario_result(
731+        "A10",
732+        "snapshot completeness on real textbook input",
733+        ["初中数学七上"],
734+        status,
735+        reason,
736+        ran_successfully=True,
737+        key_metrics=metrics,
738+        fairness_notes=[
739+            "Branch A needs one extra step after feedback to observe the applied feedback_effect because feedback is queued.",
740+            "Branch B's report mentions 16 fields including attention, but Branch A's locked comparable surface is the 15-field spec set.",
741+        ],
742+        structural_mismatch=None,
743+    )
744+
745+
746+SCENARIOS: List[Dict[str, Any]] = [
747+    {"id": "A01", "title": "小学语文一上 — pipeline", "textbooks": ["小学语文一上"], "runner": run_a01},
748+    {"id": "A02", "title": "小学数学一上 — mixed text", "textbooks": ["小学数学一上"], "runner": run_a02},
749+    {"id": "A03", "title": "初中语文七上 — complexity / sedimentation", "textbooks": ["初中语文七上"], "runner": run_a03},
750+    {"id": "A04", "title": "初中数学七上 — formula / structure", "textbooks": ["初中数学七上"], "runner": run_a04},
751+    {"id": "A05", "title": "高中语文必修上 — long text stability", "textbooks": ["高中语文必修上"], "runner": run_a05},
752+    {"id": "A06", "title": "cross-subject transfer", "textbooks": ["小学语文一上", "小学数学一上"], "runner": run_a06},
753+    {"id": "A07", "title": "session reset preserves long-term structure", "textbooks": ["初中语文七上"], "runner": run_a07},
754+    {"id": "A08", "title": "multi-round feedback", "textbooks": ["小学语文一上"], "runner": run_a08},
755+    {"id": "A09", "title": "incremental sedimentation", "textbooks": ["小学语文一上"], "runner": run_a09},
756+    {"id": "A10", "title": "snapshot completeness on real textbook input", "textbooks": ["初中数学七上"], "runner": run_a10},
757+]
758+
759+
760+def _run_scenarios() -> List[Dict[str, Any]]:
761+    results = []
762+    for scenario in SCENARIOS:
763+        try:
764+            results.append(scenario["runner"]())
765+        except Exception as exc:
766+            results.append(_scenario_failure(scenario["id"], scenario["title"], scenario["textbooks"], exc))
767+    return results
768+
769+
770+def _status_counts(scenarios: List[Dict[str, Any]]) -> Dict[str, int]:
771+    counts = {status: 0 for status in VALID_SCENARIO_STATUSES}
772+    for scenario in scenarios:
773+        counts[scenario["status"]] = counts.get(scenario["status"], 0) + 1
774+    return counts
775+
776+
777+def _collect_structural_mismatches(scenarios: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
778+    mismatches: List[Dict[str, Any]] = []
779+    for scenario in scenarios:
780+        if scenario["structural_mismatch"]:
781+            mismatches.append(
782+                {
783+                    "scenario_id": scenario["scenario_id"],
784+                    "title": scenario["title"],
785+                    "detail": scenario["structural_mismatch"],
786+                }
787+            )
788+    mismatches.extend(
789+        [
790+            {
791+                "scenario_id": "A01/A08",
792+                "title": "emit surface mismatch",
793+                "detail": "Branch A emit() returns a plain string, not Branch B's structured payload with activated nodes and active_count.",
794+            },
795+            {
796+                "scenario_id": "A05/A10",
797+                "title": "attention surface mismatch",
798+                "detail": "Branch A does not expose attention used/total. free_capacity is the locked comparable field instead.",
799+            },
800+            {
801+                "scenario_id": "A08/A10",
802+                "title": "feedback timing mismatch",
803+                "detail": "Branch A commit_feedback() is queued and becomes observable on the next step, unlike Branch B's more immediate feedback probes.",
804+            },
805+        ]
806+    )
807+    return mismatches
808+
809+
810+def _overall_summary(scenarios: List[Dict[str, Any]], dataset_check: Dict[str, Any]) -> Dict[str, Any]:
811+    counts = _status_counts(scenarios)
812+    ran_successfully = sum(1 for scenario in scenarios if scenario["ran_successfully"])
813+    return {
814+        "scenario_count": len(scenarios),
815+        "ran_successfully_count": ran_successfully,
816+        "status_counts": counts,
817+        "fairness_gap_reduced": bool(dataset_check["all_required_files_exist"] and len(scenarios) == 10),
818+        "materially_changes_previous_ab_conclusion": False,
819+        "summary": (
820+            "Branch A was run on the same 5 real textbooks and A01-A10 scenario family used by Branch B. "
821+            "Eight scenarios passed on Branch A's own observable surface, A04 is a structural mismatch, and A08 failed on the required slice."
822+        ),
823+    }
824+
825+
826+def _known_limitations() -> List[str]:
827+    return [
828+        "Branch A tokenizes each ingest call with a Unicode word regex and keeps at most 8 tokens, so long textbook slices compress into coarse tokens instead of Branch B's character/bigram view.",
829+        "Branch A does not expose attention.used/attention.total; free_capacity is the closest locked observable, and phi min/max must be derived from internal state for diagnostics.",
830+        "Branch A emit() is string-only and commit_feedback() is asynchronous, so some Branch B feedback/output probes can only be approximated, not matched exactly.",
831+        "The first cleaned textbook paragraphs include front matter and publishing metadata; this is shared with Branch B's slice definition but is amplified by Branch A's coarse tokenization.",
832+    ]
833+
834+
835+def _recommendation() -> Dict[str, str]:
836+    return {
837+        "decision": "enough to proceed with merge decision",
838+        "reason": (
839+            "The main A/B fairness gap was the unmatched real-data harness. This validation closes that gap enough to make a merge decision on current evidence. "
840+            "The remaining issues are explicit Branch A results: one failed scenario (A08) and one true structural mismatch (A04), not hidden harness differences."
841+        ),
842+    }
843+
844+
845+def _result_textbooks(result: Dict[str, Any]) -> str:
846+    return ", ".join(result["textbook_used"])
847+
848+
849+def _branch_a_observed_summary(result: Dict[str, Any]) -> str:
850+    metrics = result["key_metrics"]
851+    if result["scenario_id"] == "A04":
852+        return (
853+            f"phi={metrics['phi_summary']['node_count']}, J={metrics['J_summary']['edge_count']}, "
854+            f"flow-asym-proxy={metrics['directed_flow_asymmetry_proxy_avg']}"
855+        )
856+    if result["scenario_id"] == "A08":
857+        return f"mode={metrics['output_mode']}, emit={metrics.get('emit_output', '')}, active={metrics['mu_summary']['active_count']}"
858+    return (
859+        f"phi={metrics['phi_summary']['node_count']}, mu={metrics['mu_summary']['active_count']}, "
860+        f"J={metrics['J_summary']['edge_count']}, mode={metrics['output_mode']}"
861+    )
862+
863+
864+def _render_markdown(report: Dict[str, Any]) -> str:
865+    dataset_check = report["dataset_check"]
866+    lines = [
867+        "# Branch A Real Textbook Validation",
868+        "",
869+        "## Purpose",
870+        "Run Branch A on the same 5 real textbook files and the same A01-A10 real-data scenario family used by Branch B, then report the result honestly without changing Branch A runtime behavior.",
871+        "",
872+        "## Base Commits",
873+        f"- Branch A base commit: `{report['base_commit']}`",
874+        f"- Branch B reference commit: `{report['branch_b_reference_commit']}` (`{BRANCH_B_REFERENCE_LABEL}`)",
875+        f"- Branch under test: `{report['branch']}`",
876+        "",
877+        "## Dataset Path And File Check",
878+        f"- Dataset path: `{report['dataset_dir']}`",
879+        f"- Directory exists: `{dataset_check['directory_exists']}`",
880+        f"- All 5 required files present: `{dataset_check['all_required_files_exist']}`",
881+    ]
882+    for row in report["dataset_files"]:
883+        lines.append(f"- {'OK' if row['exists'] else 'MISSING'} `{row['path']}`")
884+    lines.extend(
885+        [
886+            "",
887+            "## Scenario Results",
888+            "| ID | Textbook | Status | Branch B Reference | Branch A Observed | Reason |",
889+            "| --- | --- | --- | --- | --- | --- |",
890+        ]
891+    )
892+    for result in report["scenarios"]:
893+        lines.append(
894+            f"| {result['scenario_id']} | {_result_textbooks(result)} | {result['status']} | "
895+            f"{result['branch_b_reference_expectation']['summary']} | {_branch_a_observed_summary(result)} | {result['reason']} |"
896+        )
897+    lines.extend(["", "## Explicit Structural Mismatch"])
898+    for mismatch in report["structural_mismatches"]:
899+        lines.append(f"- `{mismatch['scenario_id']}`: {mismatch['detail']}")
900+    lines.extend(
901+        [
902+            "",
903+            "## Concise Fairness Interpretation",
904+            "- This run materially reduces the main A/B fairness gap because Branch A was executed on the same dataset, same file set, and same A01-A10 slice family as Branch B.",
905+            "- It does not erase Branch A's current disadvantages: A08 fails on the mandated slice, A04 is not directly comparable, and most Branch A state sizes remain much smaller than Branch B's reference values.",
906+            "",
907+            "## Does This Reduce The Main A/B Fairness Gap?",
908+            "- Yes. The earlier fairness concern was unmatched real-data coverage. That concern is now materially reduced because Branch A was run on the same real textbooks and scenario family.",
909+            "",
910+            "## Recommendation",
911+            f"- Decision: `{report['recommendation']['decision']}`",
912+            f"- Reason: {report['recommendation']['reason']}",
913+            "",
914+        ]
915+    )
916+    return "\n".join(lines)
917+
918+
919+def _render_review(report: Dict[str, Any]) -> str:
920+    passed = [scenario["scenario_id"] for scenario in report["scenarios"] if scenario["status"] == "PASS"]
921+    failed = [scenario["scenario_id"] for scenario in report["scenarios"] if scenario["status"] == "FAIL"]
922+    mismatched = [scenario["scenario_id"] for scenario in report["scenarios"] if scenario["status"] == "STRUCTURAL MISMATCH"]
923+    lines = [
924+        "# Review: Branch A Real Textbook Validation",
925+        "",
926+        "## What Was Run",
927+        f"- Branch A base commit `{report['base_commit']}` on branch `{report['branch']}`.",
928+        f"- Branch B reference commit `{report['branch_b_reference_commit']}` for dataset/scenario parity.",
929+        f"- Same dataset directory: `{report['dataset_dir']}` with the exact 5 textbook files required by Branch B.",
930+        f"- Same real-data scenario family: A01-A10.",
931+        "",
932+        "## Outcome",
933+        f"- Succeeded: {', '.join(passed) if passed else 'none'}",
934+        f"- Failed: {', '.join(failed) if failed else 'none'}",
935+        f"- Structurally not comparable: {', '.join(mismatched) if mismatched else 'none'}",
936+        "",
937+        "## Decision Readout",
938+        "- The matched real-textbook run materially reduces the earlier fairness gap.",
939+        "- It does not materially change a conclusion that Branch B currently has broader and cleaner real-data validation coverage.",
940+        f"- Recommendation: `{report['recommendation']['decision']}`",
941+        f"- Rationale: {report['recommendation']['reason']}",
942+        "",
943+    ]
944+    return "\n".join(lines)
945+
946+
947+def generate_validation_report(
948+    json_path: Path | str = DEFAULT_JSON_REPORT_PATH,
949+    markdown_path: Path | str = DEFAULT_MARKDOWN_REPORT_PATH,
950+    review_path: Path | str = DEFAULT_REVIEW_REPORT_PATH,
951+) -> Dict[str, Any]:
952+    dataset_check = _require_dataset()
953+    scenarios = _run_scenarios()
954+    report: Dict[str, Any] = {
955+        "branch": _current_branch(),
956+        "base_commit": BASE_COMMIT,
957+        "branch_b_reference_commit": BRANCH_B_REFERENCE_COMMIT,
958+        "dataset_dir": str(DATASET_DIR),
959+        "dataset_files": dataset_file_rows(),
960+        "dataset_check": dataset_check,
961+        "scenarios": scenarios,
962+        "overall_summary": _overall_summary(scenarios, dataset_check),
963+        "structural_mismatches": _collect_structural_mismatches(scenarios),
964+        "known_limitations": _known_limitations(),
965+        "recommendation": _recommendation(),
966+    }
967+    if tuple(report) != REQUIRED_REPORT_KEYS:
968+        raise RuntimeError(f"Unexpected report key order: {tuple(report)!r}")
969+
970+    json_path = Path(json_path)
971+    markdown_path = Path(markdown_path)
972+    review_path = Path(review_path)
973+    json_path.parent.mkdir(parents=True, exist_ok=True)
974+    markdown_path.parent.mkdir(parents=True, exist_ok=True)
975+    review_path.parent.mkdir(parents=True, exist_ok=True)
976+    json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
977+    markdown_path.write_text(_render_markdown(report), encoding="utf-8")
978+    review_path.write_text(_render_review(report), encoding="utf-8")
979+    return report
980+
981+
982+def _parse_args(argv: Sequence[str]) -> argparse.Namespace:
983+    parser = argparse.ArgumentParser(description="Branch A real-textbook validation against Branch B dataset/scenarios.")
984+    parser.add_argument("--json-out", default=str(DEFAULT_JSON_REPORT_PATH))
985+    parser.add_argument("--markdown-out", default=str(DEFAULT_MARKDOWN_REPORT_PATH))
986+    parser.add_argument("--review-out", default=str(DEFAULT_REVIEW_REPORT_PATH))
987+    return parser.parse_args(argv)
988+
989+
990+def main(argv: Sequence[str] | None = None) -> int:
991+    args = _parse_args(sys.argv[1:] if argv is None else argv)
992+    generate_validation_report(args.json_out, args.markdown_out, args.review_out)
993+    return 0
994+
995+
996+if __name__ == "__main__":
997+    raise SystemExit(main())

A reports/2026-03-31_branch_a_real_textbook_validation.json

+1650, -0

   1@@ -0,0 +1,1650 @@
   2+{
   3+  "branch": "review/branch-a-real-textbook-validation",
   4+  "base_commit": "419ae8d39150806011c1eb6082c7fc8c6a337735",
   5+  "branch_b_reference_commit": "c7342881bb2ebfa5e7f927c91a7806416288573b",
   6+  "dataset_dir": "/Users/george/code/china-text-book-md",
   7+  "dataset_files": [
   8+    {
   9+      "textbook": "小学语文一上",
  10+      "filename": "小学_语文_统编版_义务教育教科书·语文一年级上册.md",
  11+      "path": "/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md",
  12+      "exists": true
  13+    },
  14+    {
  15+      "textbook": "小学数学一上",
  16+      "filename": "小学_数学_人教版_义务教育教科书 · 数学一年级上册.md",
  17+      "path": "/Users/george/code/china-text-book-md/小学_数学_人教版_义务教育教科书 · 数学一年级上册.md",
  18+      "exists": true
  19+    },
  20+    {
  21+      "textbook": "初中语文七上",
  22+      "filename": "初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md",
  23+      "path": "/Users/george/code/china-text-book-md/初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md",
  24+      "exists": true
  25+    },
  26+    {
  27+      "textbook": "初中数学七上",
  28+      "filename": "初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md",
  29+      "path": "/Users/george/code/china-text-book-md/初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md",
  30+      "exists": true
  31+    },
  32+    {
  33+      "textbook": "高中语文必修上",
  34+      "filename": "高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md",
  35+      "path": "/Users/george/code/china-text-book-md/高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md",
  36+      "exists": true
  37+    }
  38+  ],
  39+  "dataset_check": {
  40+    "directory_exists": true,
  41+    "all_required_files_exist": true,
  42+    "missing_paths": [],
  43+    "file_results": [
  44+      {
  45+        "textbook": "小学语文一上",
  46+        "filename": "小学_语文_统编版_义务教育教科书·语文一年级上册.md",
  47+        "path": "/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md",
  48+        "exists": true
  49+      },
  50+      {
  51+        "textbook": "小学数学一上",
  52+        "filename": "小学_数学_人教版_义务教育教科书 · 数学一年级上册.md",
  53+        "path": "/Users/george/code/china-text-book-md/小学_数学_人教版_义务教育教科书 · 数学一年级上册.md",
  54+        "exists": true
  55+      },
  56+      {
  57+        "textbook": "初中语文七上",
  58+        "filename": "初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md",
  59+        "path": "/Users/george/code/china-text-book-md/初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md",
  60+        "exists": true
  61+      },
  62+      {
  63+        "textbook": "初中数学七上",
  64+        "filename": "初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md",
  65+        "path": "/Users/george/code/china-text-book-md/初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md",
  66+        "exists": true
  67+      },
  68+      {
  69+        "textbook": "高中语文必修上",
  70+        "filename": "高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md",
  71+        "path": "/Users/george/code/china-text-book-md/高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md",
  72+        "exists": true
  73+      }
  74+    ]
  75+  },
  76+  "scenarios": [
  77+    {
  78+      "scenario_id": "A01",
  79+      "title": "小学语文一上 — pipeline",
  80+      "textbook_used": [
  81+        "小学语文一上"
  82+      ],
  83+      "dataset_files": [
  84+        "/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md"
  85+      ],
  86+      "ran_successfully": true,
  87+      "status": "PASS",
  88+      "reason": "Pipeline ran on the required real-data slice and produced non-empty phi/mu/J state.",
  89+      "key_metrics": {
  90+        "phi_summary": {
  91+          "node_count": 40,
  92+          "total_potential": 7.5222,
  93+          "top_nodes": [
  94+            {
  95+              "node": "一年级",
  96+              "value": 1.317
  97+            },
  98+            {
  99+              "node": "上册语文",
 100+              "value": 0.7616
 101+            },
 102+            {
 103+              "node": "邮编",
 104+              "value": 0.3652
 105+            },
 106+            {
 107+              "node": "com",
 108+              "value": 0.3266
 109+            },
 110+            {
 111+              "node": "语文",
 112+              "value": 0.317
 113+            }
 114+          ]
 115+        },
 116+        "mu_summary": {
 117+          "active_count": 13,
 118+          "total_activation": 2.6715,
 119+          "top_nodes": [
 120+            {
 121+              "node": "一年级",
 122+              "value": 0.6329
 123+            },
 124+            {
 125+              "node": "com",
 126+              "value": 0.2969
 127+            },
 128+            {
 129+              "node": "址",
 130+              "value": 0.272
 131+            },
 132+            {
 133+              "node": "pep",
 134+              "value": 0.2444
 135+            },
 136+            {
 137+              "node": "http",
 138+              "value": 0.2432
 139+            }
 140+          ]
 141+        },
 142+        "J_summary": {
 143+          "edge_count": 35,
 144+          "total_flow": 4.6941,
 145+          "top_flows": [
 146+            {
 147+              "edge": "一年级->上册语文",
 148+              "flow": 0.7
 149+            },
 150+            {
 151+              "edge": "上册语文->一年级",
 152+              "flow": 0.3904
 153+            },
 154+            {
 155+              "edge": "一年级->上册",
 156+              "flow": 0.2772
 157+            },
 158+            {
 159+              "edge": "网->址",
 160+              "flow": 0.266
 161+            },
 162+            {
 163+              "edge": "com->cn",
 164+              "flow": 0.2465
 165+            }
 166+          ]
 167+        },
 168+        "active_region": [
 169+          "一年级",
 170+          "com",
 171+          "址",
 172+          "pep"
 173+        ],
 174+        "active_region_size": 4,
 175+        "bound_ability_core": "一年级",
 176+        "anchor_pull": 0.0,
 177+        "drift_score": 1.0,
 178+        "free_capacity": 0.4063,
 179+        "experience_regions_count": 1,
 180+        "skill_belt_candidates_count": 6,
 181+        "sedimentation_trace_count": 20,
 182+        "merge_events_count": 8,
 183+        "decay_events_count": 24,
 184+        "output_mode": "degraded",
 185+        "feedback_effect": {
 186+          "source": "emit",
 187+          "mode": "degraded",
 188+          "queued_tokens": [
 189+            "一年级",
 190+            "com"
 191+          ],
 192+          "queued_strength": 0.38,
 193+          "confidence_proxy": 0.3406,
 194+          "queued_step": 90,
 195+          "last_applied_step": null
 196+        },
 197+        "phi_range": {
 198+          "min": 0.0158,
 199+          "max": 1.317
 200+        },
 201+        "stage_counts": {
 202+          "memory": 47,
 203+          "experience": 5,
 204+          "skill_belt": 10,
 205+          "ability_core": 2
 206+        },
 207+        "graph_node_count": 64,
 208+        "graph_edge_count_proxy": 35,
 209+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
 210+        "emit_output": "degraded: 一年级 / com",
 211+        "input_slice": {
 212+          "paragraphs": 30,
 213+          "chars_per_paragraph": 50,
 214+          "step_n": 3
 215+        },
 216+        "used_paragraph_count": 30
 217+      },
 218+      "branch_b_reference_expectation": {
 219+        "summary": "Branch B stage report: PASS; 126 nodes, 166 flows on 小学语文一上 pipeline.",
 220+        "source": "STAGE_REPORT.md @ c734288"
 221+      },
 222+      "fairness_notes": [
 223+        "Branch A emit() returns a string, so output mode and active counts come from snapshot_state().",
 224+        "Observed state is much smaller than Branch B's reference counts, but the scenario does complete end-to-end."
 225+      ],
 226+      "structural_mismatch": null
 227+    },
 228+    {
 229+      "scenario_id": "A02",
 230+      "title": "小学数学一上 — mixed text",
 231+      "textbook_used": [
 232+        "小学数学一上"
 233+      ],
 234+      "dataset_files": [
 235+        "/Users/george/code/china-text-book-md/小学_数学_人教版_义务教育教科书 · 数学一年级上册.md"
 236+      ],
 237+      "ran_successfully": true,
 238+      "status": "PASS",
 239+      "reason": "Chinese-bearing nodes exist on the mixed textbook slice; digit-bearing nodes are reported separately.",
 240+      "key_metrics": {
 241+        "phi_summary": {
 242+          "node_count": 15,
 243+          "total_potential": 4.0333,
 244+          "top_nodes": [
 245+            {
 246+              "node": "上册",
 247+              "value": 0.7514
 248+            },
 249+            {
 250+              "node": "一年级",
 251+              "value": 0.7284
 252+            },
 253+            {
 254+              "node": "图",
 255+              "value": 0.3652
 256+            },
 257+            {
 258+              "node": "陈",
 259+              "value": 0.3494
 260+            },
 261+            {
 262+              "node": "曦",
 263+              "value": 0.2671
 264+            }
 265+          ]
 266+        },
 267+        "mu_summary": {
 268+          "active_count": 7,
 269+          "total_activation": 1.381,
 270+          "top_nodes": [
 271+            {
 272+              "node": "上册",
 273+              "value": 0.364
 274+            },
 275+            {
 276+              "node": "陈",
 277+              "value": 0.3417
 278+            },
 279+            {
 280+              "node": "一年级",
 281+              "value": 0.182
 282+            },
 283+            {
 284+              "node": "曦",
 285+              "value": 0.1787
 286+            },
 287+            {
 288+              "node": "责任编辑",
 289+              "value": 0.157
 290+            }
 291+          ]
 292+        },
 293+        "J_summary": {
 294+          "edge_count": 18,
 295+          "total_flow": 2.4105,
 296+          "top_flows": [
 297+            {
 298+              "edge": "上册->一年级",
 299+              "flow": 0.3931
 300+            },
 301+            {
 302+              "edge": "一年级->上册",
 303+              "flow": 0.3781
 304+            },
 305+            {
 306+              "edge": "责任编辑->陈",
 307+              "flow": 0.2666
 308+            },
 309+            {
 310+              "edge": "陈->曦",
 311+              "flow": 0.2506
 312+            },
 313+            {
 314+              "edge": "绘->图",
 315+              "flow": 0.2368
 316+            }
 317+          ]
 318+        },
 319+        "active_region": [
 320+          "上册",
 321+          "陈",
 322+          "一年级",
 323+          "曦"
 324+        ],
 325+        "active_region_size": 4,
 326+        "bound_ability_core": "上册",
 327+        "anchor_pull": 0.0,
 328+        "drift_score": 1.0,
 329+        "free_capacity": 0.6931,
 330+        "experience_regions_count": 2,
 331+        "skill_belt_candidates_count": 6,
 332+        "sedimentation_trace_count": 20,
 333+        "merge_events_count": 2,
 334+        "decay_events_count": 24,
 335+        "output_mode": "minimal",
 336+        "feedback_effect": {
 337+          "source": "emit",
 338+          "mode": "minimal",
 339+          "queued_tokens": [
 340+            "上册"
 341+          ],
 342+          "queued_strength": 0.22,
 343+          "confidence_proxy": 0.2243,
 344+          "queued_step": 60,
 345+          "last_applied_step": null
 346+        },
 347+        "phi_range": {
 348+          "min": 0.031,
 349+          "max": 0.7514
 350+        },
 351+        "stage_counts": {
 352+          "memory": 14,
 353+          "experience": 4,
 354+          "skill_belt": 4,
 355+          "ability_core": 2
 356+        },
 357+        "graph_node_count": 24,
 358+        "graph_edge_count_proxy": 18,
 359+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
 360+        "emit_output": "minimal: 上册",
 361+        "input_slice": {
 362+          "paragraphs": 20,
 363+          "chars_per_paragraph": 40,
 364+          "step_n": 3
 365+        },
 366+        "has_chinese_nodes": true,
 367+        "has_digit_nodes": false,
 368+        "chinese_node_count": 24,
 369+        "digit_node_count": 0,
 370+        "sample_digit_nodes": []
 371+      },
 372+      "branch_b_reference_expectation": {
 373+        "summary": "Branch B stage report: PASS; 58 nodes, has_cn=True on 小学数学一上 mixed text.",
 374+        "source": "STAGE_REPORT.md @ c734288"
 375+      },
 376+      "fairness_notes": [
 377+        "Branch A tokenizes with Unicode word regexes, so digits may be absorbed into coarse tokens or absent from this slice.",
 378+        "The required honest report here is whether Chinese nodes exist and whether digit-bearing nodes were actually observed."
 379+      ],
 380+      "structural_mismatch": null
 381+    },
 382+    {
 383+      "scenario_id": "A03",
 384+      "title": "初中语文七上 — complexity / sedimentation",
 385+      "textbook_used": [
 386+        "初中语文七上"
 387+      ],
 388+      "dataset_files": [
 389+        "/Users/george/code/china-text-book-md/初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md"
 390+      ],
 391+      "ran_successfully": true,
 392+      "status": "PASS",
 393+      "reason": "Sedimentation and experience-region observables are present on the required real-data slice.",
 394+      "key_metrics": {
 395+        "phi_summary": {
 396+          "node_count": 36,
 397+          "total_potential": 5.7975,
 398+          "top_nodes": [
 399+            {
 400+              "node": "七年级",
 401+              "value": 0.6747
 402+            },
 403+            {
 404+              "node": "母亲",
 405+              "value": 0.3494
 406+            },
 407+            {
 408+              "node": "散文诗二首",
 409+              "value": 0.3234
 410+            },
 411+            {
 412+              "node": "泰戈尔",
 413+              "value": 0.2969
 414+            },
 415+            {
 416+              "node": "金色花",
 417+              "value": 0.2969
 418+            }
 419+          ]
 420+        },
 421+        "mu_summary": {
 422+          "active_count": 6,
 423+          "total_activation": 1.0825,
 424+          "top_nodes": [
 425+            {
 426+              "node": "母亲",
 427+              "value": 0.3417
 428+            },
 429+            {
 430+              "node": "七年级",
 431+              "value": 0.2607
 432+            },
 433+            {
 434+              "node": "冰心",
 435+              "value": 0.1787
 436+            },
 437+            {
 438+              "node": "荷叶",
 439+              "value": 0.157
 440+            },
 441+            {
 442+              "node": "泰戈尔",
 443+              "value": 0.0722
 444+            }
 445+          ]
 446+        },
 447+        "J_summary": {
 448+          "edge_count": 28,
 449+          "total_flow": 2.6829,
 450+          "top_flows": [
 451+            {
 452+              "edge": "荷叶->母亲",
 453+              "flow": 0.2666
 454+            },
 455+            {
 456+              "edge": "金色花->泰戈尔",
 457+              "flow": 0.2523
 458+            },
 459+            {
 460+              "edge": "母亲->冰心",
 461+              "flow": 0.2506
 462+            },
 463+            {
 464+              "edge": "七年级->上册语文",
 465+              "flow": 0.2458
 466+            },
 467+            {
 468+              "edge": "七年级->上册",
 469+              "flow": 0.2291
 470+            }
 471+          ]
 472+        },
 473+        "active_region": [
 474+          "母亲",
 475+          "七年级",
 476+          "冰心",
 477+          "荷叶"
 478+        ],
 479+        "active_region_size": 4,
 480+        "bound_ability_core": "七年级",
 481+        "anchor_pull": 0.0,
 482+        "drift_score": 1.0,
 483+        "free_capacity": 0.7594,
 484+        "experience_regions_count": 1,
 485+        "skill_belt_candidates_count": 6,
 486+        "sedimentation_trace_count": 20,
 487+        "merge_events_count": 9,
 488+        "decay_events_count": 24,
 489+        "output_mode": "minimal",
 490+        "feedback_effect": {},
 491+        "phi_range": {
 492+          "min": 0.0163,
 493+          "max": 0.6747
 494+        },
 495+        "stage_counts": {
 496+          "memory": 109,
 497+          "experience": 4,
 498+          "skill_belt": 6,
 499+          "ability_core": 2
 500+        },
 501+        "graph_node_count": 121,
 502+        "graph_edge_count_proxy": 28,
 503+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
 504+        "input_slice": {
 505+          "paragraphs": 50,
 506+          "chars_per_paragraph": 60,
 507+          "step_n": 3
 508+        }
 509+      },
 510+      "branch_b_reference_expectation": {
 511+        "summary": "Branch B stage report: PASS; 276 nodes, 20 sedimentation traces on 初中语文七上.",
 512+        "source": "STAGE_REPORT.md @ c734288"
 513+      },
 514+      "fairness_notes": [
 515+        "Branch A exposes sedimentation_trace and experience_regions, but its tokenized graph remains much smaller than Branch B's reference run.",
 516+        "sedimentation_trace is capped, so count saturation is expected and should not be over-interpreted."
 517+      ],
 518+      "structural_mismatch": null
 519+    },
 520+    {
 521+      "scenario_id": "A04",
 522+      "title": "初中数学七上 — formula / structure",
 523+      "textbook_used": [
 524+        "初中数学七上"
 525+      ],
 526+      "dataset_files": [
 527+        "/Users/george/code/china-text-book-md/初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md"
 528+      ],
 529+      "ran_successfully": true,
 530+      "status": "STRUCTURAL MISMATCH",
 531+      "reason": "The scenario ran, but the primary Branch B asymmetry-ratio metric does not map cleanly onto Branch A.",
 532+      "key_metrics": {
 533+        "phi_summary": {
 534+          "node_count": 35,
 535+          "total_potential": 7.545,
 536+          "top_nodes": [
 537+            {
 538+              "node": "上册",
 539+              "value": 1.0361
 540+            },
 541+            {
 542+              "node": "七年级",
 543+              "value": 0.7468
 544+            },
 545+            {
 546+              "node": "王俊宏",
 547+              "value": 0.6371
 548+            },
 549+            {
 550+              "node": "数学",
 551+              "value": 0.5754
 552+            },
 553+            {
 554+              "node": "金",
 555+              "value": 0.3389
 556+            }
 557+          ]
 558+        },
 559+        "mu_summary": {
 560+          "active_count": 7,
 561+          "total_activation": 1.2396,
 562+          "top_nodes": [
 563+            {
 564+              "node": "上册",
 565+              "value": 0.3317
 566+            },
 567+            {
 568+              "node": "封面",
 569+              "value": 0.2296
 570+            },
 571+            {
 572+              "node": "文鲁工作室",
 573+              "value": 0.2296
 574+            },
 575+            {
 576+              "node": "七年级",
 577+              "value": 0.1435
 578+            },
 579+            {
 580+              "node": "王俊宏",
 581+              "value": 0.1176
 582+            }
 583+          ]
 584+        },
 585+        "J_summary": {
 586+          "edge_count": 32,
 587+          "total_flow": 3.5946,
 588+          "top_flows": [
 589+            {
 590+              "edge": "上册->七年级",
 591+              "flow": 0.5308
 592+            },
 593+            {
 594+              "edge": "上册->数学",
 595+              "flow": 0.3589
 596+            },
 597+            {
 598+              "edge": "七年级->上册",
 599+              "flow": 0.3309
 600+            },
 601+            {
 602+              "edge": "文鲁工作室->封面",
 603+              "flow": 0.2759
 604+            },
 605+            {
 606+              "edge": "版式设计->王俊宏",
 607+              "flow": 0.233
 608+            }
 609+          ]
 610+        },
 611+        "active_region": [
 612+          "上册",
 613+          "封面",
 614+          "文鲁工作室",
 615+          "七年级"
 616+        ],
 617+        "active_region_size": 4,
 618+        "bound_ability_core": "上册",
 619+        "anchor_pull": 0.0,
 620+        "drift_score": 0.8893,
 621+        "free_capacity": 0.7245,
 622+        "experience_regions_count": 3,
 623+        "skill_belt_candidates_count": 6,
 624+        "sedimentation_trace_count": 20,
 625+        "merge_events_count": 11,
 626+        "decay_events_count": 24,
 627+        "output_mode": "minimal",
 628+        "feedback_effect": {},
 629+        "phi_range": {
 630+          "min": 0.0181,
 631+          "max": 1.0361
 632+        },
 633+        "stage_counts": {
 634+          "memory": 64,
 635+          "experience": 3,
 636+          "skill_belt": 4,
 637+          "ability_core": 4
 638+        },
 639+        "graph_node_count": 75,
 640+        "graph_edge_count_proxy": 32,
 641+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
 642+        "input_slice": {
 643+          "paragraphs": 30,
 644+          "chars_per_paragraph": 50,
 645+          "step_n": 3
 646+        },
 647+        "directed_flow_asymmetry_proxy_avg": 0.7721,
 648+        "top_flows": [
 649+          {
 650+            "edge": "上册->七年级",
 651+            "flow": 0.5308
 652+          },
 653+          {
 654+            "edge": "上册->数学",
 655+            "flow": 0.3589
 656+          },
 657+          {
 658+            "edge": "七年级->上册",
 659+            "flow": 0.3309
 660+          },
 661+          {
 662+            "edge": "文鲁工作室->封面",
 663+            "flow": 0.2759
 664+          },
 665+          {
 666+            "edge": "版式设计->王俊宏",
 667+            "flow": 0.233
 668+          }
 669+        ]
 670+      },
 671+      "branch_b_reference_expectation": {
 672+        "summary": "Branch B stage report: PASS; 294 edges, asymmetry ratio 1.00 on 初中数学七上.",
 673+        "source": "STAGE_REPORT.md @ c734288"
 674+      },
 675+      "fairness_notes": [
 676+        "Directed J flow can be described, but it is not the same observable as Branch B's directed graph edge weights.",
 677+        "Using the J proxy as if it were the same metric would overstate comparability."
 678+      ],
 679+      "structural_mismatch": "Branch B's A04 metric is based on forward/backward graph edge weights. Branch A only exposes directed J flow, not a directly comparable directed graph-edge surface, so a fair asymmetry-ratio comparison is a structural mismatch."
 680+    },
 681+    {
 682+      "scenario_id": "A05",
 683+      "title": "高中语文必修上 — long text stability",
 684+      "textbook_used": [
 685+        "高中语文必修上"
 686+      ],
 687+      "dataset_files": [
 688+        "/Users/george/code/china-text-book-md/高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md"
 689+      ],
 690+      "ran_successfully": true,
 691+      "status": "PASS",
 692+      "reason": "Long-text run stayed finite and showed no obvious overflow/divergence symptom.",
 693+      "key_metrics": {
 694+        "phi_summary": {
 695+          "node_count": 55,
 696+          "total_potential": 10.1412,
 697+          "top_nodes": [
 698+            {
 699+              "node": "上",
 700+              "value": 1.4606
 701+            },
 702+            {
 703+              "node": "册",
 704+              "value": 1.3248
 705+            },
 706+            {
 707+              "node": "节选",
 708+              "value": 0.3926
 709+            },
 710+            {
 711+              "node": "朱自清",
 712+              "value": 0.3652
 713+            },
 714+            {
 715+              "node": "郁达夫",
 716+              "value": 0.3258
 717+            }
 718+          ]
 719+        },
 720+        "mu_summary": {
 721+          "active_count": 13,
 722+          "total_activation": 3.3429,
 723+          "top_nodes": [
 724+            {
 725+              "node": "上",
 726+              "value": 0.8747
 727+            },
 728+            {
 729+              "node": "苏轼",
 730+              "value": 0.4225
 731+            },
 732+            {
 733+              "node": "册",
 734+              "value": 0.422
 735+            },
 736+            {
 737+              "node": "赤壁赋",
 738+              "value": 0.3678
 739+            },
 740+            {
 741+              "node": "16",
 742+              "value": 0.2329
 743+            }
 744+          ]
 745+        },
 746+        "J_summary": {
 747+          "edge_count": 53,
 748+          "total_flow": 5.9531,
 749+          "top_flows": [
 750+            {
 751+              "edge": "上->册",
 752+              "flow": 1.2355
 753+            },
 754+            {
 755+              "edge": "册->上",
 756+              "flow": 0.6558
 757+            },
 758+            {
 759+              "edge": "16->赤壁赋",
 760+              "flow": 0.2702
 761+            },
 762+            {
 763+              "edge": "15->我与地坛",
 764+              "flow": 0.2567
 765+            },
 766+            {
 767+              "edge": "赤壁赋->苏轼",
 768+              "flow": 0.2482
 769+            }
 770+          ]
 771+        },
 772+        "active_region": [
 773+          "上",
 774+          "苏轼",
 775+          "册",
 776+          "赤壁赋"
 777+        ],
 778+        "active_region_size": 4,
 779+        "bound_ability_core": "上",
 780+        "anchor_pull": 0.0,
 781+        "drift_score": 1.0,
 782+        "free_capacity": 0.2571,
 783+        "experience_regions_count": 2,
 784+        "skill_belt_candidates_count": 6,
 785+        "sedimentation_trace_count": 20,
 786+        "merge_events_count": 12,
 787+        "decay_events_count": 24,
 788+        "output_mode": "degraded",
 789+        "feedback_effect": {},
 790+        "phi_range": {
 791+          "min": 0.0111,
 792+          "max": 1.4606
 793+        },
 794+        "stage_counts": {
 795+          "memory": 190,
 796+          "experience": 9,
 797+          "skill_belt": 12,
 798+          "ability_core": 3
 799+        },
 800+        "graph_node_count": 214,
 801+        "graph_edge_count_proxy": 53,
 802+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
 803+        "input_slice": {
 804+          "paragraphs": 80,
 805+          "chars_per_paragraph": 80,
 806+          "step_n": 2
 807+        },
 808+        "all_finite": true,
 809+        "max_abs_phi": 1.4606,
 810+        "max_abs_mu": 0.8747,
 811+        "max_abs_J": 1.2355
 812+      },
 813+      "branch_b_reference_expectation": {
 814+        "summary": "Branch B stage report: PASS; 397 nodes, phi range [-0.13, 0.15] on 高中语文必修上.",
 815+        "source": "STAGE_REPORT.md @ c734288"
 816+      },
 817+      "fairness_notes": [
 818+        "Branch A does not expose attention.used/total; free_capacity is the closest locked observable.",
 819+        "phi min/max are derived from runtime.state.phi because Branch A's snapshot summary does not include range fields."
 820+      ],
 821+      "structural_mismatch": null
 822+    },
 823+    {
 824+      "scenario_id": "A06",
 825+      "title": "cross-subject transfer",
 826+      "textbook_used": [
 827+        "小学语文一上",
 828+        "小学数学一上"
 829+      ],
 830+      "dataset_files": [
 831+        "/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md",
 832+        "/Users/george/code/china-text-book-md/小学_数学_人教版_义务教育教科书 · 数学一年级上册.md"
 833+      ],
 834+      "ran_successfully": true,
 835+      "status": "PASS",
 836+      "reason": "Active region changes under subject switch while some earlier structures remain alive.",
 837+      "key_metrics": {
 838+        "phi_summary": {
 839+          "node_count": 17,
 840+          "total_potential": 3.6564,
 841+          "top_nodes": [
 842+            {
 843+              "node": "一年级",
 844+              "value": 1.188
 845+            },
 846+            {
 847+              "node": "上册",
 848+              "value": 0.6865
 849+            },
 850+            {
 851+              "node": "上册语文",
 852+              "value": 0.4696
 853+            },
 854+            {
 855+              "node": "人民教育出版社",
 856+              "value": 0.2969
 857+            },
 858+            {
 859+              "node": "课程教材研究所",
 860+              "value": 0.2969
 861+            }
 862+          ]
 863+        },
 864+        "mu_summary": {
 865+          "active_count": 6,
 866+          "total_activation": 0.4746,
 867+          "top_nodes": [
 868+            {
 869+              "node": "一年级",
 870+              "value": 0.167
 871+            },
 872+            {
 873+              "node": "人民教育出版社",
 874+              "value": 0.0722
 875+            },
 876+            {
 877+              "node": "课程教材研究所",
 878+              "value": 0.0722
 879+            },
 880+            {
 881+              "node": "上册语文",
 882+              "value": 0.0572
 883+            },
 884+            {
 885+              "node": "上册",
 886+              "value": 0.0548
 887+            }
 888+          ]
 889+        },
 890+        "J_summary": {
 891+          "edge_count": 9,
 892+          "total_flow": 1.6315,
 893+          "top_flows": [
 894+            {
 895+              "edge": "一年级->上册语文",
 896+              "flow": 0.4283
 897+            },
 898+            {
 899+              "edge": "一年级->上册",
 900+              "flow": 0.3071
 901+            },
 902+            {
 903+              "edge": "人民教育出版社->课程教材研究所",
 904+              "flow": 0.2523
 905+            },
 906+            {
 907+              "edge": "上册->一年级",
 908+              "flow": 0.246
 909+            },
 910+            {
 911+              "edge": "上册语文->一年级",
 912+              "flow": 0.2442
 913+            }
 914+          ]
 915+        },
 916+        "active_region": [
 917+          "一年级",
 918+          "人民教育出版社",
 919+          "课程教材研究所",
 920+          "上册语文"
 921+        ],
 922+        "active_region_size": 4,
 923+        "bound_ability_core": "一年级",
 924+        "anchor_pull": 0.0,
 925+        "drift_score": 0.6876,
 926+        "free_capacity": 0.8945,
 927+        "experience_regions_count": 1,
 928+        "skill_belt_candidates_count": 6,
 929+        "sedimentation_trace_count": 20,
 930+        "merge_events_count": 7,
 931+        "decay_events_count": 24,
 932+        "output_mode": "degraded",
 933+        "feedback_effect": {},
 934+        "phi_range": {
 935+          "min": 0.0177,
 936+          "max": 1.188
 937+        },
 938+        "stage_counts": {
 939+          "memory": 18,
 940+          "experience": 0,
 941+          "skill_belt": 2,
 942+          "ability_core": 3
 943+        },
 944+        "graph_node_count": 23,
 945+        "graph_edge_count_proxy": 9,
 946+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
 947+        "input_slice": {
 948+          "phase_1": {
 949+            "textbook": "小学语文一上",
 950+            "paragraphs": 15,
 951+            "chars_per_paragraph": 40,
 952+            "step_n": 3
 953+          },
 954+          "phase_2": {
 955+            "textbook": "小学数学一上",
 956+            "paragraphs": 15,
 957+            "chars_per_paragraph": 40,
 958+            "step_n": 3
 959+          }
 960+        },
 961+        "active_region_before": [
 962+          "一年级",
 963+          "上册语文",
 964+          "京",
 965+          "北"
 966+        ],
 967+        "active_region_after": [
 968+          "一年级",
 969+          "上册语文",
 970+          "人民教育出版社",
 971+          "课程教材研究所"
 972+        ],
 973+        "new_active_nodes_after_switch": [
 974+          "人民教育出版社",
 975+          "课程教材研究所"
 976+        ],
 977+        "preserved_prior_active_phi_count": 2
 978+      },
 979+      "branch_b_reference_expectation": {
 980+        "summary": "Branch B stage report: PASS; 8 new nodes after 语文→数学 subject switch.",
 981+        "source": "STAGE_REPORT.md @ c734288"
 982+      },
 983+      "fairness_notes": [
 984+        "This mirrors Branch B's no-reset subject switch. Branch A does show migration, but on a much smaller token set."
 985+      ],
 986+      "structural_mismatch": null
 987+    },
 988+    {
 989+      "scenario_id": "A07",
 990+      "title": "session reset preserves long-term structure",
 991+      "textbook_used": [
 992+        "初中语文七上"
 993+      ],
 994+      "dataset_files": [
 995+        "/Users/george/code/china-text-book-md/初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md"
 996+      ],
 997+      "ran_successfully": true,
 998+      "status": "PASS",
 999+      "reason": "reset_session() clears session activation while preserving long-term graph/potential structure.",
1000+      "key_metrics": {
1001+        "phi_summary": {
1002+          "node_count": 46,
1003+          "total_potential": 8.9316,
1004+          "top_nodes": [
1005+            {
1006+              "node": "七年级",
1007+              "value": 1.284
1008+            },
1009+            {
1010+              "node": "朱于国",
1011+              "value": 0.5912
1012+            },
1013+            {
1014+              "node": "王本华",
1015+              "value": 0.5782
1016+            },
1017+            {
1018+              "node": "上册语文",
1019+              "value": 0.4876
1020+            },
1021+            {
1022+              "node": "上册",
1023+              "value": 0.4048
1024+            }
1025+          ]
1026+        },
1027+        "mu_summary": {
1028+          "active_count": 0,
1029+          "total_activation": 0,
1030+          "top_nodes": []
1031+        },
1032+        "J_summary": {
1033+          "edge_count": 41,
1034+          "total_flow": 4.7459,
1035+          "top_flows": [
1036+            {
1037+              "edge": "七年级->上册语文",
1038+              "flow": 0.4475
1039+            },
1040+            {
1041+              "edge": "七年级->上册",
1042+              "flow": 0.3653
1043+            },
1044+            {
1045+              "edge": "责任编辑->朱于国",
1046+              "flow": 0.2634
1047+            },
1048+            {
1049+              "edge": "七年级->语文",
1050+              "flow": 0.2599
1051+            },
1052+            {
1053+              "edge": "上册语文->七年级",
1054+              "flow": 0.2496
1055+            }
1056+          ]
1057+        },
1058+        "active_region": [],
1059+        "active_region_size": 0,
1060+        "bound_ability_core": null,
1061+        "anchor_pull": 0.0,
1062+        "drift_score": 0.0,
1063+        "free_capacity": 1.0,
1064+        "experience_regions_count": 6,
1065+        "skill_belt_candidates_count": 6,
1066+        "sedimentation_trace_count": 20,
1067+        "merge_events_count": 6,
1068+        "decay_events_count": 24,
1069+        "output_mode": "minimal",
1070+        "feedback_effect": {
1071+          "source": "reset_session",
1072+          "mode": "minimal",
1073+          "queued_tokens": [],
1074+          "queued_strength": 0.0,
1075+          "last_applied_step": 90
1076+        },
1077+        "phi_range": {
1078+          "min": 0.0177,
1079+          "max": 1.284
1080+        },
1081+        "stage_counts": {
1082+          "memory": 51,
1083+          "experience": 7,
1084+          "skill_belt": 8,
1085+          "ability_core": 4
1086+        },
1087+        "graph_node_count": 70,
1088+        "graph_edge_count_proxy": 41,
1089+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
1090+        "input_slice": {
1091+          "paragraphs": 30,
1092+          "chars_per_paragraph": 50,
1093+          "step_n": 3
1094+        },
1095+        "phi_entries_before_reset": 46,
1096+        "phi_entries_preserved_exactly": 46,
1097+        "J_entries_before_reset": 41,
1098+        "J_entries_after_reset": 41,
1099+        "graph_nodes_before_reset": 70,
1100+        "graph_nodes_after_reset": 70,
1101+        "stage_counts_before_reset": {
1102+          "memory": 51,
1103+          "experience": 7,
1104+          "skill_belt": 8,
1105+          "ability_core": 4
1106+        },
1107+        "stage_counts_after_reset": {
1108+          "memory": 51,
1109+          "experience": 7,
1110+          "skill_belt": 8,
1111+          "ability_core": 4
1112+        }
1113+      },
1114+      "branch_b_reference_expectation": {
1115+        "summary": "Branch B stage report: PASS; 182/189 phi entries preserved after reset.",
1116+        "source": "STAGE_REPORT.md @ c734288"
1117+      },
1118+      "fairness_notes": [
1119+        "This is one of Branch A's clearer matched wins: session clearing and long-term retention separate cleanly."
1120+      ],
1121+      "structural_mismatch": null
1122+    },
1123+    {
1124+      "scenario_id": "A08",
1125+      "title": "multi-round feedback",
1126+      "textbook_used": [
1127+        "小学语文一上"
1128+      ],
1129+      "dataset_files": [
1130+        "/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md"
1131+      ],
1132+      "ran_successfully": true,
1133+      "status": "FAIL",
1134+      "reason": "emit() returned no activated output target on the required slice, so the positive/negative feedback loop could not be meaningfully exercised.",
1135+      "key_metrics": {
1136+        "phi_summary": {
1137+          "node_count": 1,
1138+          "total_potential": 0.1848,
1139+          "top_nodes": [
1140+            {
1141+              "node": "一年级",
1142+              "value": 0.1848
1143+            }
1144+          ]
1145+        },
1146+        "mu_summary": {
1147+          "active_count": 0,
1148+          "total_activation": 0,
1149+          "top_nodes": []
1150+        },
1151+        "J_summary": {
1152+          "edge_count": 0,
1153+          "total_flow": 0,
1154+          "top_flows": []
1155+        },
1156+        "active_region": [],
1157+        "active_region_size": 0,
1158+        "bound_ability_core": "一年级",
1159+        "anchor_pull": 0.0,
1160+        "drift_score": 0.0,
1161+        "free_capacity": 1.0,
1162+        "experience_regions_count": 0,
1163+        "skill_belt_candidates_count": 0,
1164+        "sedimentation_trace_count": 2,
1165+        "merge_events_count": 0,
1166+        "decay_events_count": 9,
1167+        "output_mode": "minimal",
1168+        "feedback_effect": {
1169+          "source": "emit",
1170+          "mode": "minimal",
1171+          "queued_tokens": [
1172+            "idle"
1173+          ],
1174+          "queued_strength": 0.22,
1175+          "confidence_proxy": 0.0,
1176+          "queued_step": 5,
1177+          "last_applied_step": null
1178+        },
1179+        "phi_range": {
1180+          "min": 0.1848,
1181+          "max": 0.1848
1182+        },
1183+        "stage_counts": {
1184+          "memory": 1,
1185+          "experience": 0,
1186+          "skill_belt": 0,
1187+          "ability_core": 0
1188+        },
1189+        "graph_node_count": 1,
1190+        "graph_edge_count_proxy": 0,
1191+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
1192+        "emit_output": "minimal: idle",
1193+        "input_slice": {
1194+          "paragraphs": 1,
1195+          "chars_per_paragraph": 30,
1196+          "step_n": 5
1197+        },
1198+        "feedback_target": null
1199+      },
1200+      "branch_b_reference_expectation": {
1201+        "summary": "Branch B stage report: PASS; confidence 0.333→0.889→0.381 after positive/negative feedback.",
1202+        "source": "STAGE_REPORT.md @ c734288"
1203+      },
1204+      "fairness_notes": [
1205+        "This is reported as a real Branch A failure, not normalized away.",
1206+        "Branch A feedback is queued and applied on the next step, but that did not matter here because no target emerged."
1207+      ],
1208+      "structural_mismatch": null
1209+    },
1210+    {
1211+      "scenario_id": "A09",
1212+      "title": "incremental sedimentation",
1213+      "textbook_used": [
1214+        "小学语文一上"
1215+      ],
1216+      "dataset_files": [
1217+        "/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md"
1218+      ],
1219+      "ran_successfully": true,
1220+      "status": "PASS",
1221+      "reason": "Repeated rounds show incremental stage progression, even though several observable lists are capped.",
1222+      "key_metrics": {
1223+        "phi_summary": {
1224+          "node_count": 15,
1225+          "total_potential": 4.8793,
1226+          "top_nodes": [
1227+            {
1228+              "node": "一年级",
1229+              "value": 1.3746
1230+            },
1231+            {
1232+              "node": "上册语文",
1233+              "value": 1.0367
1234+            },
1235+            {
1236+              "node": "教",
1237+              "value": 0.2848
1238+            },
1239+            {
1240+              "node": "7",
1241+              "value": 0.2721
1242+            },
1243+            {
1244+              "node": "绿色印刷产品",
1245+              "value": 0.2173
1246+            }
1247+          ]
1248+        },
1249+        "mu_summary": {
1250+          "active_count": 3,
1251+          "total_activation": 0.2337,
1252+          "top_nodes": [
1253+            {
1254+              "node": "一年级",
1255+              "value": 0.1057
1256+            },
1257+            {
1258+              "node": "上册语文",
1259+              "value": 0.0767
1260+            },
1261+            {
1262+              "node": "义务教育教科书",
1263+              "value": 0.0512
1264+            }
1265+          ]
1266+        },
1267+        "J_summary": {
1268+          "edge_count": 11,
1269+          "total_flow": 1.9704,
1270+          "top_flows": [
1271+            {
1272+              "edge": "一年级->上册语文",
1273+              "flow": 0.9627
1274+            },
1275+            {
1276+              "edge": "上册语文->一年级",
1277+              "flow": 0.5001
1278+            },
1279+            {
1280+              "edge": "7->75元",
1281+              "flow": 0.1124
1282+            },
1283+            {
1284+              "edge": "定价->7",
1285+              "flow": 0.1004
1286+            },
1287+            {
1288+              "edge": "教->育",
1289+              "flow": 0.0552
1290+            }
1291+          ]
1292+        },
1293+        "active_region": [
1294+          "一年级",
1295+          "上册语文",
1296+          "义务教育教科书"
1297+        ],
1298+        "active_region_size": 3,
1299+        "bound_ability_core": "一年级",
1300+        "anchor_pull": 0.0,
1301+        "drift_score": 0.3484,
1302+        "free_capacity": 0.9481,
1303+        "experience_regions_count": 1,
1304+        "skill_belt_candidates_count": 6,
1305+        "sedimentation_trace_count": 20,
1306+        "merge_events_count": 12,
1307+        "decay_events_count": 24,
1308+        "output_mode": "minimal",
1309+        "feedback_effect": {},
1310+        "phi_range": {
1311+          "min": 0.0351,
1312+          "max": 1.3746
1313+        },
1314+        "stage_counts": {
1315+          "memory": 9,
1316+          "experience": 2,
1317+          "skill_belt": 2,
1318+          "ability_core": 2
1319+        },
1320+        "graph_node_count": 15,
1321+        "graph_edge_count_proxy": 11,
1322+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
1323+        "input_slice": {
1324+          "paragraphs": 10,
1325+          "chars_per_paragraph": 30,
1326+          "step_n": 3,
1327+          "rounds": 5
1328+        },
1329+        "round_history": [
1330+          {
1331+            "round": 1,
1332+            "sedimentation_trace_count": 20,
1333+            "experience_regions_count": 1,
1334+            "skill_belt_candidates_count": 6,
1335+            "phi_node_count": 15,
1336+            "active_count": 3,
1337+            "stage_counts": {
1338+              "memory": 11,
1339+              "experience": 2,
1340+              "skill_belt": 0,
1341+              "ability_core": 2
1342+            }
1343+          },
1344+          {
1345+            "round": 2,
1346+            "sedimentation_trace_count": 20,
1347+            "experience_regions_count": 1,
1348+            "skill_belt_candidates_count": 6,
1349+            "phi_node_count": 15,
1350+            "active_count": 3,
1351+            "stage_counts": {
1352+              "memory": 10,
1353+              "experience": 2,
1354+              "skill_belt": 1,
1355+              "ability_core": 2
1356+            }
1357+          },
1358+          {
1359+            "round": 3,
1360+            "sedimentation_trace_count": 20,
1361+            "experience_regions_count": 1,
1362+            "skill_belt_candidates_count": 6,
1363+            "phi_node_count": 15,
1364+            "active_count": 3,
1365+            "stage_counts": {
1366+              "memory": 10,
1367+              "experience": 2,
1368+              "skill_belt": 1,
1369+              "ability_core": 2
1370+            }
1371+          },
1372+          {
1373+            "round": 4,
1374+            "sedimentation_trace_count": 20,
1375+            "experience_regions_count": 1,
1376+            "skill_belt_candidates_count": 6,
1377+            "phi_node_count": 15,
1378+            "active_count": 3,
1379+            "stage_counts": {
1380+              "memory": 9,
1381+              "experience": 2,
1382+              "skill_belt": 2,
1383+              "ability_core": 2
1384+            }
1385+          },
1386+          {
1387+            "round": 5,
1388+            "sedimentation_trace_count": 20,
1389+            "experience_regions_count": 1,
1390+            "skill_belt_candidates_count": 6,
1391+            "phi_node_count": 15,
1392+            "active_count": 3,
1393+            "stage_counts": {
1394+              "memory": 9,
1395+              "experience": 2,
1396+              "skill_belt": 2,
1397+              "ability_core": 2
1398+            }
1399+          }
1400+        ]
1401+      },
1402+      "branch_b_reference_expectation": {
1403+        "summary": "Branch B stage report: PASS; sedimentation gradient (20,4)→(20,10).",
1404+        "source": "STAGE_REPORT.md @ c734288"
1405+      },
1406+      "fairness_notes": [
1407+        "sedimentation_trace and skill_belt_candidates are capped lists in Branch A, so stage_counts are the more honest growth indicator here."
1408+      ],
1409+      "structural_mismatch": null
1410+    },
1411+    {
1412+      "scenario_id": "A10",
1413+      "title": "snapshot completeness on real textbook input",
1414+      "textbook_used": [
1415+        "初中数学七上"
1416+      ],
1417+      "dataset_files": [
1418+        "/Users/george/code/china-text-book-md/初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md"
1419+      ],
1420+      "ran_successfully": true,
1421+      "status": "PASS",
1422+      "reason": "All Branch A locked snapshot fields are present on real textbook-driven state.",
1423+      "key_metrics": {
1424+        "phi_summary": {
1425+          "node_count": 32,
1426+          "total_potential": 7.9444,
1427+          "top_nodes": [
1428+            {
1429+              "node": "上册",
1430+              "value": 1.0569
1431+            },
1432+            {
1433+              "node": "七年级",
1434+              "value": 0.7192
1435+            },
1436+            {
1437+              "node": "数学",
1438+              "value": 0.659
1439+            },
1440+            {
1441+              "node": "联系调换",
1442+              "value": 0.3541
1443+            },
1444+            {
1445+              "node": "pep",
1446+              "value": 0.3273
1447+            }
1448+          ]
1449+        },
1450+        "mu_summary": {
1451+          "active_count": 10,
1452+          "total_activation": 1.5574,
1453+          "top_nodes": [
1454+            {
1455+              "node": "上册",
1456+              "value": 0.476
1457+            },
1458+            {
1459+              "node": "七年级",
1460+              "value": 0.208
1461+            },
1462+            {
1463+              "node": "联系调换",
1464+              "value": 0.1814
1465+            },
1466+            {
1467+              "node": "数学",
1468+              "value": 0.1445
1469+            },
1470+            {
1471+              "node": "装质量问题",
1472+              "value": 0.1201
1473+            }
1474+          ]
1475+        },
1476+        "J_summary": {
1477+          "edge_count": 47,
1478+          "total_flow": 4.7937,
1479+          "top_flows": [
1480+            {
1481+              "edge": "七年级->上册",
1482+              "flow": 0.3959
1483+            },
1484+            {
1485+              "edge": "上册->七年级",
1486+              "flow": 0.3954
1487+            },
1488+            {
1489+              "edge": "上册->数学",
1490+              "flow": 0.352
1491+            },
1492+            {
1493+              "edge": "如发现印->装质量问题",
1494+              "flow": 0.2464
1495+            },
1496+            {
1497+              "edge": "装质量问题->影响阅读",
1498+              "flow": 0.2293
1499+            }
1500+          ]
1501+        },
1502+        "active_region": [
1503+          "上册",
1504+          "七年级",
1505+          "联系调换",
1506+          "数学"
1507+        ],
1508+        "active_region_size": 4,
1509+        "bound_ability_core": "上册",
1510+        "anchor_pull": 0.0,
1511+        "drift_score": 0.883,
1512+        "free_capacity": 0.6539,
1513+        "experience_regions_count": 3,
1514+        "skill_belt_candidates_count": 6,
1515+        "sedimentation_trace_count": 20,
1516+        "merge_events_count": 4,
1517+        "decay_events_count": 24,
1518+        "output_mode": "degraded",
1519+        "feedback_effect": {
1520+          "source": "feedback",
1521+          "mode": "feedback",
1522+          "queued_tokens": [
1523+            "validation"
1524+          ],
1525+          "queued_strength": 0.2,
1526+          "polarity": 1,
1527+          "queued_step": 61,
1528+          "last_applied_step": 62,
1529+          "applied_tokens": [
1530+            "validation"
1531+          ],
1532+          "phi_delta": 0.0109,
1533+          "mu_delta": 0.015,
1534+          "flow_delta": 0.0,
1535+          "stage_after": {
1536+            "validation": "memory"
1537+          },
1538+          "bound_ability_core": "上册"
1539+        },
1540+        "phi_range": {
1541+          "min": 0.0188,
1542+          "max": 1.0569
1543+        },
1544+        "stage_counts": {
1545+          "memory": 26,
1546+          "experience": 8,
1547+          "skill_belt": 6,
1548+          "ability_core": 4
1549+        },
1550+        "graph_node_count": 44,
1551+        "graph_edge_count_proxy": 47,
1552+        "attention_usage": "NOT APPLICABLE: Branch A exposes free_capacity but not attention used/total.",
1553+        "emit_output": "minimal: 上册",
1554+        "input_slice": {
1555+          "paragraphs": 20,
1556+          "chars_per_paragraph": 40,
1557+          "step_n": 3
1558+        },
1559+        "required_snapshot_keys": [
1560+          "J_summary",
1561+          "active_region",
1562+          "anchor_pull",
1563+          "bound_ability_core",
1564+          "decay_events",
1565+          "drift_score",
1566+          "experience_regions",
1567+          "feedback_effect",
1568+          "free_capacity",
1569+          "merge_events",
1570+          "mu_summary",
1571+          "output_mode",
1572+          "phi_summary",
1573+          "sedimentation_trace",
1574+          "skill_belt_candidates"
1575+        ],
1576+        "observed_snapshot_keys": [
1577+          "J_summary",
1578+          "active_region",
1579+          "anchor_pull",
1580+          "bound_ability_core",
1581+          "decay_events",
1582+          "drift_score",
1583+          "experience_regions",
1584+          "feedback_effect",
1585+          "free_capacity",
1586+          "merge_events",
1587+          "mu_summary",
1588+          "output_mode",
1589+          "phi_summary",
1590+          "sedimentation_trace",
1591+          "skill_belt_candidates"
1592+        ],
1593+        "missing_snapshot_keys": []
1594+      },
1595+      "branch_b_reference_expectation": {
1596+        "summary": "Branch B stage report: PASS; 16 snapshot fields present on real textbook state.",
1597+        "source": "STAGE_REPORT.md @ c734288"
1598+      },
1599+      "fairness_notes": [
1600+        "Branch A needs one extra step after feedback to observe the applied feedback_effect because feedback is queued.",
1601+        "Branch B's report mentions 16 fields including attention, but Branch A's locked comparable surface is the 15-field spec set."
1602+      ],
1603+      "structural_mismatch": null
1604+    }
1605+  ],
1606+  "overall_summary": {
1607+    "scenario_count": 10,
1608+    "ran_successfully_count": 10,
1609+    "status_counts": {
1610+      "PASS": 8,
1611+      "FAIL": 1,
1612+      "N/A": 0,
1613+      "STRUCTURAL MISMATCH": 1
1614+    },
1615+    "fairness_gap_reduced": true,
1616+    "materially_changes_previous_ab_conclusion": false,
1617+    "summary": "Branch A was run on the same 5 real textbooks and A01-A10 scenario family used by Branch B. Eight scenarios passed on Branch A's own observable surface, A04 is a structural mismatch, and A08 failed on the required slice."
1618+  },
1619+  "structural_mismatches": [
1620+    {
1621+      "scenario_id": "A04",
1622+      "title": "初中数学七上 — formula / structure",
1623+      "detail": "Branch B's A04 metric is based on forward/backward graph edge weights. Branch A only exposes directed J flow, not a directly comparable directed graph-edge surface, so a fair asymmetry-ratio comparison is a structural mismatch."
1624+    },
1625+    {
1626+      "scenario_id": "A01/A08",
1627+      "title": "emit surface mismatch",
1628+      "detail": "Branch A emit() returns a plain string, not Branch B's structured payload with activated nodes and active_count."
1629+    },
1630+    {
1631+      "scenario_id": "A05/A10",
1632+      "title": "attention surface mismatch",
1633+      "detail": "Branch A does not expose attention used/total. free_capacity is the locked comparable field instead."
1634+    },
1635+    {
1636+      "scenario_id": "A08/A10",
1637+      "title": "feedback timing mismatch",
1638+      "detail": "Branch A commit_feedback() is queued and becomes observable on the next step, unlike Branch B's more immediate feedback probes."
1639+    }
1640+  ],
1641+  "known_limitations": [
1642+    "Branch A tokenizes each ingest call with a Unicode word regex and keeps at most 8 tokens, so long textbook slices compress into coarse tokens instead of Branch B's character/bigram view.",
1643+    "Branch A does not expose attention.used/attention.total; free_capacity is the closest locked observable, and phi min/max must be derived from internal state for diagnostics.",
1644+    "Branch A emit() is string-only and commit_feedback() is asynchronous, so some Branch B feedback/output probes can only be approximated, not matched exactly.",
1645+    "The first cleaned textbook paragraphs include front matter and publishing metadata; this is shared with Branch B's slice definition but is amplified by Branch A's coarse tokenization."
1646+  ],
1647+  "recommendation": {
1648+    "decision": "enough to proceed with merge decision",
1649+    "reason": "The main A/B fairness gap was the unmatched real-data harness. This validation closes that gap enough to make a merge decision on current evidence. The remaining issues are explicit Branch A results: one failed scenario (A08) and one true structural mismatch (A04), not hidden harness differences."
1650+  }
1651+}

A reports/2026-03-31_branch_a_real_textbook_validation.md

+50, -0

 1@@ -0,0 +1,50 @@
 2+# Branch A Real Textbook Validation
 3+
 4+## Purpose
 5+Run Branch A on the same 5 real textbook files and the same A01-A10 real-data scenario family used by Branch B, then report the result honestly without changing Branch A runtime behavior.
 6+
 7+## Base Commits
 8+- Branch A base commit: `419ae8d39150806011c1eb6082c7fc8c6a337735`
 9+- Branch B reference commit: `c7342881bb2ebfa5e7f927c91a7806416288573b` (`c734288`)
10+- Branch under test: `review/branch-a-real-textbook-validation`
11+
12+## Dataset Path And File Check
13+- Dataset path: `/Users/george/code/china-text-book-md`
14+- Directory exists: `True`
15+- All 5 required files present: `True`
16+- OK `/Users/george/code/china-text-book-md/小学_语文_统编版_义务教育教科书·语文一年级上册.md`
17+- OK `/Users/george/code/china-text-book-md/小学_数学_人教版_义务教育教科书 · 数学一年级上册.md`
18+- OK `/Users/george/code/china-text-book-md/初中_语文_统编版-人民教育出版社_七年级_义务教育教科书·语文七年级上册.md`
19+- OK `/Users/george/code/china-text-book-md/初中_数学_人教版-人民教育出版社_七年级_义务教育教科书·数学七年级上册.md`
20+- OK `/Users/george/code/china-text-book-md/高中_语文_统编版-人民教育出版社_普通高中教科书·语文必修 上册.md`
21+
22+## Scenario Results
23+| ID | Textbook | Status | Branch B Reference | Branch A Observed | Reason |
24+| --- | --- | --- | --- | --- | --- |
25+| A01 | 小学语文一上 | PASS | Branch B stage report: PASS; 126 nodes, 166 flows on 小学语文一上 pipeline. | phi=40, mu=13, J=35, mode=degraded | Pipeline ran on the required real-data slice and produced non-empty phi/mu/J state. |
26+| A02 | 小学数学一上 | PASS | Branch B stage report: PASS; 58 nodes, has_cn=True on 小学数学一上 mixed text. | phi=15, mu=7, J=18, mode=minimal | Chinese-bearing nodes exist on the mixed textbook slice; digit-bearing nodes are reported separately. |
27+| A03 | 初中语文七上 | PASS | Branch B stage report: PASS; 276 nodes, 20 sedimentation traces on 初中语文七上. | phi=36, mu=6, J=28, mode=minimal | Sedimentation and experience-region observables are present on the required real-data slice. |
28+| A04 | 初中数学七上 | STRUCTURAL MISMATCH | Branch B stage report: PASS; 294 edges, asymmetry ratio 1.00 on 初中数学七上. | phi=35, J=32, flow-asym-proxy=0.7721 | The scenario ran, but the primary Branch B asymmetry-ratio metric does not map cleanly onto Branch A. |
29+| A05 | 高中语文必修上 | PASS | Branch B stage report: PASS; 397 nodes, phi range [-0.13, 0.15] on 高中语文必修上. | phi=55, mu=13, J=53, mode=degraded | Long-text run stayed finite and showed no obvious overflow/divergence symptom. |
30+| A06 | 小学语文一上, 小学数学一上 | PASS | Branch B stage report: PASS; 8 new nodes after 语文→数学 subject switch. | phi=17, mu=6, J=9, mode=degraded | Active region changes under subject switch while some earlier structures remain alive. |
31+| A07 | 初中语文七上 | PASS | Branch B stage report: PASS; 182/189 phi entries preserved after reset. | phi=46, mu=0, J=41, mode=minimal | reset_session() clears session activation while preserving long-term graph/potential structure. |
32+| A08 | 小学语文一上 | FAIL | Branch B stage report: PASS; confidence 0.333→0.889→0.381 after positive/negative feedback. | mode=minimal, emit=minimal: idle, active=0 | emit() returned no activated output target on the required slice, so the positive/negative feedback loop could not be meaningfully exercised. |
33+| A09 | 小学语文一上 | PASS | Branch B stage report: PASS; sedimentation gradient (20,4)→(20,10). | phi=15, mu=3, J=11, mode=minimal | Repeated rounds show incremental stage progression, even though several observable lists are capped. |
34+| A10 | 初中数学七上 | PASS | Branch B stage report: PASS; 16 snapshot fields present on real textbook state. | phi=32, mu=10, J=47, mode=degraded | All Branch A locked snapshot fields are present on real textbook-driven state. |
35+
36+## Explicit Structural Mismatch
37+- `A04`: Branch B's A04 metric is based on forward/backward graph edge weights. Branch A only exposes directed J flow, not a directly comparable directed graph-edge surface, so a fair asymmetry-ratio comparison is a structural mismatch.
38+- `A01/A08`: Branch A emit() returns a plain string, not Branch B's structured payload with activated nodes and active_count.
39+- `A05/A10`: Branch A does not expose attention used/total. free_capacity is the locked comparable field instead.
40+- `A08/A10`: Branch A commit_feedback() is queued and becomes observable on the next step, unlike Branch B's more immediate feedback probes.
41+
42+## Concise Fairness Interpretation
43+- This run materially reduces the main A/B fairness gap because Branch A was executed on the same dataset, same file set, and same A01-A10 slice family as Branch B.
44+- It does not erase Branch A's current disadvantages: A08 fails on the mandated slice, A04 is not directly comparable, and most Branch A state sizes remain much smaller than Branch B's reference values.
45+
46+## Does This Reduce The Main A/B Fairness Gap?
47+- Yes. The earlier fairness concern was unmatched real-data coverage. That concern is now materially reduced because Branch A was run on the same real textbooks and scenario family.
48+
49+## Recommendation
50+- Decision: `enough to proceed with merge decision`
51+- Reason: The main A/B fairness gap was the unmatched real-data harness. This validation closes that gap enough to make a merge decision on current evidence. The remaining issues are explicit Branch A results: one failed scenario (A08) and one true structural mismatch (A04), not hidden harness differences.

A reviews/2026-03-31_branch_a_real_textbook_validation.md

+18, -0

 1@@ -0,0 +1,18 @@
 2+# Review: Branch A Real Textbook Validation
 3+
 4+## What Was Run
 5+- Branch A base commit `419ae8d39150806011c1eb6082c7fc8c6a337735` on branch `review/branch-a-real-textbook-validation`.
 6+- Branch B reference commit `c7342881bb2ebfa5e7f927c91a7806416288573b` for dataset/scenario parity.
 7+- Same dataset directory: `/Users/george/code/china-text-book-md` with the exact 5 textbook files required by Branch B.
 8+- Same real-data scenario family: A01-A10.
 9+
10+## Outcome
11+- Succeeded: A01, A02, A03, A05, A06, A07, A09, A10
12+- Failed: A08
13+- Structurally not comparable: A04
14+
15+## Decision Readout
16+- The matched real-textbook run materially reduces the earlier fairness gap.
17+- It does not materially change a conclusion that Branch B currently has broader and cleaner real-data validation coverage.
18+- Recommendation: `enough to proceed with merge decision`
19+- Rationale: The main A/B fairness gap was the unmatched real-data harness. This validation closes that gap enough to make a merge decision on current evidence. The remaining issues are explicit Branch A results: one failed scenario (A08) and one true structural mismatch (A04), not hidden harness differences.

A tests/test_branch_a_real_textbooks.py

+77, -0

 1@@ -0,0 +1,77 @@
 2+from __future__ import annotations
 3+
 4+import json
 5+import subprocess
 6+import sys
 7+import tempfile
 8+import unittest
 9+from pathlib import Path
10+
11+from cie.validation_real_textbooks import (
12+    DATASET_DIR,
13+    DEFAULT_JSON_REPORT_PATH,
14+    DEFAULT_MARKDOWN_REPORT_PATH,
15+    DEFAULT_REVIEW_REPORT_PATH,
16+    REQUIRED_REPORT_KEYS,
17+    TEXTBOOKS,
18+    VALID_SCENARIO_STATUSES,
19+    generate_validation_report,
20+)
21+
22+
23+REPO_ROOT = Path(__file__).resolve().parent.parent
24+
25+
26+class BranchARealTextbookValidationTests(unittest.TestCase):
27+    @classmethod
28+    def setUpClass(cls) -> None:
29+        cls.temp_dir = tempfile.TemporaryDirectory()
30+        temp_root = Path(cls.temp_dir.name)
31+        cls.json_path = temp_root / "branch_a_real_textbooks.json"
32+        cls.markdown_path = temp_root / "branch_a_real_textbooks.md"
33+        cls.review_path = temp_root / "branch_a_real_textbooks_review.md"
34+        cls.report = generate_validation_report(cls.json_path, cls.markdown_path, cls.review_path)
35+
36+    @classmethod
37+    def tearDownClass(cls) -> None:
38+        cls.temp_dir.cleanup()
39+
40+    def test_dataset_exists_with_required_files(self) -> None:
41+        self.assertTrue(DATASET_DIR.is_dir())
42+        for filename in TEXTBOOKS.values():
43+            self.assertTrue((DATASET_DIR / filename).is_file(), msg=filename)
44+
45+    def test_validation_generates_all_report_files(self) -> None:
46+        self.assertTrue(self.json_path.exists())
47+        self.assertTrue(self.markdown_path.exists())
48+        self.assertTrue(self.review_path.exists())
49+
50+    def test_json_report_has_required_top_level_keys(self) -> None:
51+        payload = json.loads(self.json_path.read_text(encoding="utf-8"))
52+        self.assertEqual(tuple(payload), REQUIRED_REPORT_KEYS)
53+        self.assertTrue(payload["dataset_check"]["all_required_files_exist"])
54+        self.assertIn("fairness_gap_reduced", payload["overall_summary"])
55+
56+    def test_scenarios_cover_a01_to_a10_with_valid_statuses(self) -> None:
57+        scenarios = self.report["scenarios"]
58+        self.assertEqual([item["scenario_id"] for item in scenarios], [f"A{index:02d}" for index in range(1, 11)])
59+        for scenario in scenarios:
60+            self.assertIn(scenario["status"], VALID_SCENARIO_STATUSES)
61+            self.assertIn("reason", scenario)
62+            self.assertIn("key_metrics", scenario)
63+
64+    def test_entrypoint_runs_with_default_paths(self) -> None:
65+        completed = subprocess.run(
66+            [sys.executable, "-m", "cie.validation_real_textbooks"],
67+            cwd=REPO_ROOT,
68+            capture_output=True,
69+            text=True,
70+        )
71+        self.assertEqual(completed.returncode, 0, msg=completed.stderr)
72+        self.assertTrue(DEFAULT_JSON_REPORT_PATH.exists())
73+        self.assertTrue(DEFAULT_MARKDOWN_REPORT_PATH.exists())
74+        self.assertTrue(DEFAULT_REVIEW_REPORT_PATH.exists())
75+
76+
77+if __name__ == "__main__":
78+    unittest.main()