Files
Aidem-Media-DLL-Analysis/tests/test_diff.py
Patryk Gensch 27399a52b1 Method dispatch axis: map id -> body via Runner::run switch
Recovers how a script method id maps to its implementation, the foundation for
body-level normalisation. Each CMC_*_Runner::run is a switch(id) (vtable slot 17);
every case is the method body — inline (MSVC6) or a tail-call to a separate
show()/load() (MSVC8). The extractor parses the jump table at the disassembly
level (Ghidra's decompiler jump-table recovery silently dropped the big runners),
fingerprints each case by its ordered CALL anchors (Class::method / vtbl+0xNN),
and expands thin wrappers one level so MSVC8 lines up with MSVC6.

Validated on the golden pair: Animo SHOW..RESUME (id 1-4) yield identical leaves
(getAnimo + vtbl+0xa0/0xa4/0x4c/0x50) across both compilers. Coverage 30/32
runners; Piklib 475 / BlooMoo 619 dispatch rows.

- extract_engine_surface.py: extract_method_dispatch (schema_version -> 4)
- snapshots regenerated with the method_dispatch axis
- ams: Snapshot.method_dispatch; diff axis keyed (owner,id) on [impl,calls] with
  method-name join; render METHOD BODIES section; cli --only dispatch; owner filter
- UI: "Ciała metod" diff axis + browse tab
- tests: body-change unit + cross-compiler vtbl assertion -> 29/29

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 13:15:58 +02:00

131 lines
5.9 KiB
Python

"""Unit tests for the diff engine + an integration test over the committed golden pair."""
from __future__ import annotations
from pathlib import Path
import pytest
from ams.diff import compute_diff, filter_by_owner
from ams.render import render_text
from ams.snapshot import Snapshot
SNAP_DIR = Path(__file__).resolve().parents[1] / "snapshots"
def _snap(**axes) -> Snapshot:
base = {"binary": {"name": "x", "engine": "e", "compiler": "c"},
"types": [], "methods": [], "events": [], "fields": [],
"struct_layout": [], "method_inheritance": [], "field_inheritance": []}
base.update(axes)
return Snapshot(base)
# --- unit --------------------------------------------------------------------------------------
def test_types_added_removed_changed():
old = _snap(types=[
{"script_name": "ANIMO", "cpp_class": "CMC_Animo", "object_size": 108, "via_module_iface": False},
{"script_name": "OLD", "cpp_class": "CMC_Old", "object_size": 10, "via_module_iface": False},
])
new = _snap(types=[
{"script_name": "ANIMO", "cpp_class": "CMC_Animo", "object_size": 128, "via_module_iface": False},
{"script_name": "NEW", "cpp_class": "CMC_New", "object_size": 20, "via_module_iface": False},
])
d = compute_diff(old, new)["types"]
assert [t["script_name"] for t in d["added"]] == ["NEW"]
assert [t["script_name"] for t in d["removed"]] == ["OLD"]
assert len(d["changed"]) == 1
assert d["changed"][0]["changes"]["object_size"] == [108, 128]
def test_dual_multiarray_kept_distinct():
# same script_name, different via_module_iface -> two distinct, stable keys
items = [
{"script_name": "MULTIARRAY", "cpp_class": "CMC_MultiArray", "object_size": 88, "via_module_iface": False},
{"script_name": "MULTIARRAY", "cpp_class": "CMC_MultiArray", "object_size": 88, "via_module_iface": True},
]
d = compute_diff(_snap(types=items), _snap(types=items))["types"]
assert d == {"added": [], "removed": [], "changed": []}
def test_method_id_change_and_move():
old = _snap(methods=[
{"owner": "CMC_Animo", "name": "SHOW", "id": 1},
{"owner": "CMC_Animo", "name": "PING", "id": 9},
])
new = _snap(methods=[
{"owner": "CMC_Animo", "name": "SHOW", "id": 2}, # id changed
{"owner": "CMC", "name": "PING", "id": 9}, # moved Animo -> base CMC
])
d = compute_diff(old, new)
assert d["methods"]["changed"][0]["changes"]["id"] == [1, 2]
assert d["moved_methods"] == [{"name": "PING", "from_owners": ["CMC_Animo"], "to_owners": ["CMC"]}]
def test_field_type_change_and_owner_filter():
old = _snap(fields=[{"owner": "CMC_Animo", "name": "FPS", "type": "int", "order": 0},
{"owner": "CMC_Sound", "name": "VOLUME", "type": "int", "order": 0}])
new = _snap(fields=[{"owner": "CMC_Animo", "name": "FPS", "type": "double", "order": 0},
{"owner": "CMC_Sound", "name": "VOLUME", "type": "int", "order": 0}])
d = filter_by_owner(compute_diff(old, new), "CMC_Animo")
assert d["fields"]["changed"][0]["changes"]["type"] == ["int", "double"]
assert d["fields"]["added"] == [] and d["fields"]["removed"] == []
def test_method_dispatch_body_change():
old = _snap(
methods=[{"owner": "CMC_Animo", "name": "SHOW", "id": 1}],
method_dispatch=[{"owner": "CMC_Animo", "id": 1, "impl": None,
"impl_addr": "0x1", "calls": ["CMC_Animo::getAnimo", "vtbl+0xa0"]}],
)
new = _snap(
methods=[{"owner": "CMC_Animo", "name": "SHOW", "id": 1}],
method_dispatch=[{"owner": "CMC_Animo", "id": 1, "impl": None,
"impl_addr": "0x1", "calls": ["CMC_Animo::getAnimo", "vtbl+0xa4"]}],
)
d = compute_diff(old, new)["method_dispatch"]
assert len(d["changed"]) == 1
ch = d["changed"][0]
assert ch["item"]["name"] == "SHOW" # name joined from the methods axis on (owner, id)
assert ch["changes"]["calls"] == [["CMC_Animo::getAnimo", "vtbl+0xa0"],
["CMC_Animo::getAnimo", "vtbl+0xa4"]]
assert "METHOD BODIES" in render_text(compute_diff(old, new))
def test_render_no_diff():
out = render_text(compute_diff(_snap(), _snap()))
assert "(no differences)" in out
# --- integration over the committed golden pair ------------------------------------------------
@pytest.mark.skipif(not (SNAP_DIR / "PIKLIB8.dll.snapshot.json").exists(),
reason="golden snapshots not present")
def test_golden_pair_piklib_to_bloomoo():
old = Snapshot.load(str(SNAP_DIR / "PIKLIB8.dll.snapshot.json")) # VS6
new = Snapshot.load(str(SNAP_DIR / "bloomoodll.dll.snapshot.json")) # VS8
d = compute_diff(old, new)
added_types = {t["script_name"] for t in d["types"]["added"]}
assert {"GRBUFFER", "INTERNET"} <= added_types
assert d["types"]["removed"] == []
size_changes = {c["item"]["script_name"]: c["changes"].get("object_size")
for c in d["types"]["changed"]}
assert size_changes.get("MOUSE") == [104, 128]
# BlooMoo added Animo methods; none removed for Animo
animo = filter_by_owner(d, "CMC_Animo")
assert "GETFPS" in {m["name"] for m in animo["methods"]["added"]}
assert animo["fields"]["added"] == [] # Animo's script fields are identical across the pair
# rendering must not raise and must mention the new types
text = render_text(d)
assert "GRBUFFER" in text and "MOUSE" in text
# method bodies recovered cross-compiler: Animo SHOW (id 1) maps to the same vtable leaf
# despite MSVC6 inlining it and MSVC8 keeping it as a separate show() function
disp_old = {(r["owner"], r["id"]): r for r in old.method_dispatch}
disp_new = {(r["owner"], r["id"]): r for r in new.method_dispatch}
assert disp_old[("CMC_Animo", 1)]["calls"][-1] == "vtbl+0xa0"
assert disp_new[("CMC_Animo", 1)]["calls"][-1] == "vtbl+0xa0"