Recovers how a script method id maps to its implementation, the foundation for body-level normalisation. Each CMC_*_Runner::run is a switch(id) (vtable slot 17); every case is the method body — inline (MSVC6) or a tail-call to a separate show()/load() (MSVC8). The extractor parses the jump table at the disassembly level (Ghidra's decompiler jump-table recovery silently dropped the big runners), fingerprints each case by its ordered CALL anchors (Class::method / vtbl+0xNN), and expands thin wrappers one level so MSVC8 lines up with MSVC6. Validated on the golden pair: Animo SHOW..RESUME (id 1-4) yield identical leaves (getAnimo + vtbl+0xa0/0xa4/0x4c/0x50) across both compilers. Coverage 30/32 runners; Piklib 475 / BlooMoo 619 dispatch rows. - extract_engine_surface.py: extract_method_dispatch (schema_version -> 4) - snapshots regenerated with the method_dispatch axis - ams: Snapshot.method_dispatch; diff axis keyed (owner,id) on [impl,calls] with method-name join; render METHOD BODIES section; cli --only dispatch; owner filter - UI: "Ciała metod" diff axis + browse tab - tests: body-change unit + cross-compiler vtbl assertion -> 29/29 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
131 lines
5.9 KiB
Python
131 lines
5.9 KiB
Python
"""Unit tests for the diff engine + an integration test over the committed golden pair."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from ams.diff import compute_diff, filter_by_owner
|
|
from ams.render import render_text
|
|
from ams.snapshot import Snapshot
|
|
|
|
SNAP_DIR = Path(__file__).resolve().parents[1] / "snapshots"
|
|
|
|
|
|
def _snap(**axes) -> Snapshot:
|
|
base = {"binary": {"name": "x", "engine": "e", "compiler": "c"},
|
|
"types": [], "methods": [], "events": [], "fields": [],
|
|
"struct_layout": [], "method_inheritance": [], "field_inheritance": []}
|
|
base.update(axes)
|
|
return Snapshot(base)
|
|
|
|
|
|
# --- unit --------------------------------------------------------------------------------------
|
|
def test_types_added_removed_changed():
|
|
old = _snap(types=[
|
|
{"script_name": "ANIMO", "cpp_class": "CMC_Animo", "object_size": 108, "via_module_iface": False},
|
|
{"script_name": "OLD", "cpp_class": "CMC_Old", "object_size": 10, "via_module_iface": False},
|
|
])
|
|
new = _snap(types=[
|
|
{"script_name": "ANIMO", "cpp_class": "CMC_Animo", "object_size": 128, "via_module_iface": False},
|
|
{"script_name": "NEW", "cpp_class": "CMC_New", "object_size": 20, "via_module_iface": False},
|
|
])
|
|
d = compute_diff(old, new)["types"]
|
|
assert [t["script_name"] for t in d["added"]] == ["NEW"]
|
|
assert [t["script_name"] for t in d["removed"]] == ["OLD"]
|
|
assert len(d["changed"]) == 1
|
|
assert d["changed"][0]["changes"]["object_size"] == [108, 128]
|
|
|
|
|
|
def test_dual_multiarray_kept_distinct():
|
|
# same script_name, different via_module_iface -> two distinct, stable keys
|
|
items = [
|
|
{"script_name": "MULTIARRAY", "cpp_class": "CMC_MultiArray", "object_size": 88, "via_module_iface": False},
|
|
{"script_name": "MULTIARRAY", "cpp_class": "CMC_MultiArray", "object_size": 88, "via_module_iface": True},
|
|
]
|
|
d = compute_diff(_snap(types=items), _snap(types=items))["types"]
|
|
assert d == {"added": [], "removed": [], "changed": []}
|
|
|
|
|
|
def test_method_id_change_and_move():
|
|
old = _snap(methods=[
|
|
{"owner": "CMC_Animo", "name": "SHOW", "id": 1},
|
|
{"owner": "CMC_Animo", "name": "PING", "id": 9},
|
|
])
|
|
new = _snap(methods=[
|
|
{"owner": "CMC_Animo", "name": "SHOW", "id": 2}, # id changed
|
|
{"owner": "CMC", "name": "PING", "id": 9}, # moved Animo -> base CMC
|
|
])
|
|
d = compute_diff(old, new)
|
|
assert d["methods"]["changed"][0]["changes"]["id"] == [1, 2]
|
|
assert d["moved_methods"] == [{"name": "PING", "from_owners": ["CMC_Animo"], "to_owners": ["CMC"]}]
|
|
|
|
|
|
def test_field_type_change_and_owner_filter():
|
|
old = _snap(fields=[{"owner": "CMC_Animo", "name": "FPS", "type": "int", "order": 0},
|
|
{"owner": "CMC_Sound", "name": "VOLUME", "type": "int", "order": 0}])
|
|
new = _snap(fields=[{"owner": "CMC_Animo", "name": "FPS", "type": "double", "order": 0},
|
|
{"owner": "CMC_Sound", "name": "VOLUME", "type": "int", "order": 0}])
|
|
d = filter_by_owner(compute_diff(old, new), "CMC_Animo")
|
|
assert d["fields"]["changed"][0]["changes"]["type"] == ["int", "double"]
|
|
assert d["fields"]["added"] == [] and d["fields"]["removed"] == []
|
|
|
|
|
|
def test_method_dispatch_body_change():
|
|
old = _snap(
|
|
methods=[{"owner": "CMC_Animo", "name": "SHOW", "id": 1}],
|
|
method_dispatch=[{"owner": "CMC_Animo", "id": 1, "impl": None,
|
|
"impl_addr": "0x1", "calls": ["CMC_Animo::getAnimo", "vtbl+0xa0"]}],
|
|
)
|
|
new = _snap(
|
|
methods=[{"owner": "CMC_Animo", "name": "SHOW", "id": 1}],
|
|
method_dispatch=[{"owner": "CMC_Animo", "id": 1, "impl": None,
|
|
"impl_addr": "0x1", "calls": ["CMC_Animo::getAnimo", "vtbl+0xa4"]}],
|
|
)
|
|
d = compute_diff(old, new)["method_dispatch"]
|
|
assert len(d["changed"]) == 1
|
|
ch = d["changed"][0]
|
|
assert ch["item"]["name"] == "SHOW" # name joined from the methods axis on (owner, id)
|
|
assert ch["changes"]["calls"] == [["CMC_Animo::getAnimo", "vtbl+0xa0"],
|
|
["CMC_Animo::getAnimo", "vtbl+0xa4"]]
|
|
assert "METHOD BODIES" in render_text(compute_diff(old, new))
|
|
|
|
|
|
def test_render_no_diff():
|
|
out = render_text(compute_diff(_snap(), _snap()))
|
|
assert "(no differences)" in out
|
|
|
|
|
|
# --- integration over the committed golden pair ------------------------------------------------
|
|
@pytest.mark.skipif(not (SNAP_DIR / "PIKLIB8.dll.snapshot.json").exists(),
|
|
reason="golden snapshots not present")
|
|
def test_golden_pair_piklib_to_bloomoo():
|
|
old = Snapshot.load(str(SNAP_DIR / "PIKLIB8.dll.snapshot.json")) # VS6
|
|
new = Snapshot.load(str(SNAP_DIR / "bloomoodll.dll.snapshot.json")) # VS8
|
|
d = compute_diff(old, new)
|
|
|
|
added_types = {t["script_name"] for t in d["types"]["added"]}
|
|
assert {"GRBUFFER", "INTERNET"} <= added_types
|
|
assert d["types"]["removed"] == []
|
|
|
|
size_changes = {c["item"]["script_name"]: c["changes"].get("object_size")
|
|
for c in d["types"]["changed"]}
|
|
assert size_changes.get("MOUSE") == [104, 128]
|
|
|
|
# BlooMoo added Animo methods; none removed for Animo
|
|
animo = filter_by_owner(d, "CMC_Animo")
|
|
assert "GETFPS" in {m["name"] for m in animo["methods"]["added"]}
|
|
assert animo["fields"]["added"] == [] # Animo's script fields are identical across the pair
|
|
|
|
# rendering must not raise and must mention the new types
|
|
text = render_text(d)
|
|
assert "GRBUFFER" in text and "MOUSE" in text
|
|
|
|
# method bodies recovered cross-compiler: Animo SHOW (id 1) maps to the same vtable leaf
|
|
# despite MSVC6 inlining it and MSVC8 keeping it as a separate show() function
|
|
disp_old = {(r["owner"], r["id"]): r for r in old.method_dispatch}
|
|
disp_new = {(r["owner"], r["id"]): r for r in new.method_dispatch}
|
|
assert disp_old[("CMC_Animo", 1)]["calls"][-1] == "vtbl+0xa0"
|
|
assert disp_new[("CMC_Animo", 1)]["calls"][-1] == "vtbl+0xa0"
|