Files
Aidem-Media-DLL-Analysis/ams/diff.py
Patryk Gensch 27399a52b1 Method dispatch axis: map id -> body via Runner::run switch
Recovers how a script method id maps to its implementation, the foundation for
body-level normalisation. Each CMC_*_Runner::run is a switch(id) (vtable slot 17);
every case is the method body — inline (MSVC6) or a tail-call to a separate
show()/load() (MSVC8). The extractor parses the jump table at the disassembly
level (Ghidra's decompiler jump-table recovery silently dropped the big runners),
fingerprints each case by its ordered CALL anchors (Class::method / vtbl+0xNN),
and expands thin wrappers one level so MSVC8 lines up with MSVC6.

Validated on the golden pair: Animo SHOW..RESUME (id 1-4) yield identical leaves
(getAnimo + vtbl+0xa0/0xa4/0x4c/0x50) across both compilers. Coverage 30/32
runners; Piklib 475 / BlooMoo 619 dispatch rows.

- extract_engine_surface.py: extract_method_dispatch (schema_version -> 4)
- snapshots regenerated with the method_dispatch axis
- ams: Snapshot.method_dispatch; diff axis keyed (owner,id) on [impl,calls] with
  method-name join; render METHOD BODIES section; cli --only dispatch; owner filter
- UI: "Ciała metod" diff axis + browse tab
- tests: body-change unit + cross-compiler vtbl assertion -> 29/29

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 13:15:58 +02:00

134 lines
5.5 KiB
Python

"""Compute a structured diff between two engine-surface snapshots.
The result is a plain dict (JSON-serialisable). Each axis is a {added, removed, changed} block
produced by `keyed_diff`; `changed` entries carry the per-field old->new deltas. Methods also get
a cross-owner `moved` pass to surface hierarchy reparenting.
"""
from __future__ import annotations
from typing import Any, Callable, Hashable
from .snapshot import Snapshot
Item = dict
KeyFn = Callable[[Item], Hashable]
def _index(items: list[Item], key: KeyFn) -> dict[Hashable, Item]:
return {key(it): it for it in items}
def keyed_diff(a_items: list[Item], b_items: list[Item], key: KeyFn,
compare_fields: list[str]) -> dict[str, Any]:
"""Set-diff two item lists by `key`; for items present in both, report changed compare_fields."""
a = _index(a_items, key)
b = _index(b_items, key)
added = [b[k] for k in b if k not in a]
removed = [a[k] for k in a if k not in b]
changed = []
for k in a:
if k not in b:
continue
deltas = {f: [a[k].get(f), b[k].get(f)] for f in compare_fields if a[k].get(f) != b[k].get(f)}
if deltas:
changed.append({"item": b[k], "changes": deltas})
return {"added": added, "removed": removed, "changed": changed}
# --- per-axis keys -----------------------------------------------------------------------------
# Types: script_name + via_module_iface keeps the dual-dispatch MULTIARRAY entries distinct and
# stable across versions (addresses change, this semantic flag does not).
def _type_key(t: Item) -> Hashable:
return (t["script_name"], bool(t.get("via_module_iface")))
def _owner_name_key(x: Item) -> Hashable:
return (x["owner"], x["name"])
def _layout_key(x: Item) -> Hashable:
return (x["owner"], x["offset"])
def _detect_method_moves(old_m: list[Item], new_m: list[Item]) -> list[Item]:
"""A method name that left some owner and appeared under another - i.e. moved in the hierarchy."""
def owners_by_name(items: list[Item]) -> dict[str, set]:
out: dict[str, set] = {}
for m in items:
out.setdefault(m["name"], set()).add(m["owner"])
return out
old_o, new_o = owners_by_name(old_m), owners_by_name(new_m)
moves = []
for name in sorted(set(old_o) & set(new_o)):
lost = old_o[name] - new_o[name]
gained = new_o[name] - old_o[name]
if lost and gained:
moves.append({"name": name, "from_owners": sorted(lost), "to_owners": sorted(gained)})
return moves
def _dispatch_key(x: Item) -> Hashable:
return (x["owner"], x["id"])
def _dispatch_with_names(snap: Snapshot) -> list[Item]:
"""Attach the method name (from the methods axis, joined on owner+id) to each dispatch row,
so a body-level diff reads as 'SHOW body changed' rather than 'CMC_Animo id 1 changed'."""
name_by = {(m["owner"], m.get("id")): m["name"] for m in snap.methods}
out = []
for r in snap.method_dispatch:
rr = dict(r)
rr["name"] = name_by.get((r["owner"], r["id"]))
out.append(rr)
return out
def compute_diff(old: Snapshot, new: Snapshot) -> dict[str, Any]:
return {
"binary": {"from": old.binary, "to": new.binary},
"types": keyed_diff(old.types, new.types, _type_key, ["cpp_class", "object_size"]),
"methods": keyed_diff(old.methods, new.methods, _owner_name_key, ["id"]),
"events": keyed_diff(old.events, new.events, _owner_name_key, ["order"]),
"fields": keyed_diff(old.fields, new.fields, _owner_name_key, ["type"]),
"struct_layout": keyed_diff(old.struct_layout, new.struct_layout, _layout_key,
["size", "is_vtable"]),
"method_dispatch": keyed_diff(_dispatch_with_names(old), _dispatch_with_names(new),
_dispatch_key, ["impl", "calls"]),
"method_inheritance": keyed_diff(old.method_inheritance, new.method_inheritance,
lambda x: x["runner"], ["base_runner"]),
"field_inheritance": keyed_diff(old.field_inheritance, new.field_inheritance,
lambda x: x["class"], ["base_class"]),
"moved_methods": _detect_method_moves(old.methods, new.methods),
}
# --- owner filtering (for `--owner CMC_Animo`) -------------------------------------------------
def _item_owner(axis: str, item: Item) -> str | None:
if axis == "types":
return item.get("cpp_class")
if axis in ("methods", "events", "fields", "struct_layout", "method_dispatch"):
return item.get("owner")
if axis == "method_inheritance":
return item.get("runner")
if axis == "field_inheritance":
return item.get("class")
return None
def filter_by_owner(diff: dict[str, Any], owner: str) -> dict[str, Any]:
"""Restrict every axis to a single class/owner. `binary` and `moved_methods` are kept whole."""
out: dict[str, Any] = {"binary": diff["binary"]}
out["moved_methods"] = [m for m in diff["moved_methods"]
if owner in m["from_owners"] or owner in m["to_owners"]]
for axis, block in diff.items():
if axis in ("binary", "moved_methods"):
continue
out[axis] = {
"added": [i for i in block["added"] if _item_owner(axis, i) == owner],
"removed": [i for i in block["removed"] if _item_owner(axis, i) == owner],
"changed": [c for c in block["changed"] if _item_owner(axis, c["item"]) == owner],
}
return out