Turns the dispatch axis from a binary changed/unchanged into a "how much" measure
of code change — the original goal. ams.normalize compares two body fingerprints
(the ordered leaf-call anchors) with difflib after collapsing consecutive-duplicate
anchors (a load-twice codegen artefact), yielding a 0-100 similarity and the exact
leaves that appeared/vanished.
Every dispatch `changed` entry now carries body={similarity, added, removed}, and the
block carries a summary={shared, identical, changed, mean_similarity}.
Golden pair (cross-compiler): 470 shared bodies, 131 identical, mean 66% similar;
Animo SHOW/HIDE/PAUSE/RESUME come out 100% despite MSVC6 vs MSVC8, LOAD 50% with the
swapped leaves spelled out.
- normalize.py: canonical / body_similarity / body_delta
- diff: _dispatch_diff enriches changed with body + adds summary
- render: METHOD BODIES shows %, leaf delta, summary line
- UI: similarity % + leaf delta + axis summary
- tests: 5 new -> 34/34
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
160 lines
6.6 KiB
Python
160 lines
6.6 KiB
Python
"""Compute a structured diff between two engine-surface snapshots.
|
|
|
|
The result is a plain dict (JSON-serialisable). Each axis is a {added, removed, changed} block
|
|
produced by `keyed_diff`; `changed` entries carry the per-field old->new deltas. Methods also get
|
|
a cross-owner `moved` pass to surface hierarchy reparenting.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Callable, Hashable
|
|
|
|
from .snapshot import Snapshot
|
|
|
|
Item = dict
|
|
KeyFn = Callable[[Item], Hashable]
|
|
|
|
|
|
def _index(items: list[Item], key: KeyFn) -> dict[Hashable, Item]:
|
|
return {key(it): it for it in items}
|
|
|
|
|
|
def keyed_diff(a_items: list[Item], b_items: list[Item], key: KeyFn,
|
|
compare_fields: list[str]) -> dict[str, Any]:
|
|
"""Set-diff two item lists by `key`; for items present in both, report changed compare_fields."""
|
|
a = _index(a_items, key)
|
|
b = _index(b_items, key)
|
|
added = [b[k] for k in b if k not in a]
|
|
removed = [a[k] for k in a if k not in b]
|
|
changed = []
|
|
for k in a:
|
|
if k not in b:
|
|
continue
|
|
deltas = {f: [a[k].get(f), b[k].get(f)] for f in compare_fields if a[k].get(f) != b[k].get(f)}
|
|
if deltas:
|
|
changed.append({"item": b[k], "changes": deltas})
|
|
return {"added": added, "removed": removed, "changed": changed}
|
|
|
|
|
|
# --- per-axis keys -----------------------------------------------------------------------------
|
|
# Types: script_name + via_module_iface keeps the dual-dispatch MULTIARRAY entries distinct and
|
|
# stable across versions (addresses change, this semantic flag does not).
|
|
def _type_key(t: Item) -> Hashable:
|
|
return (t["script_name"], bool(t.get("via_module_iface")))
|
|
|
|
|
|
def _owner_name_key(x: Item) -> Hashable:
|
|
return (x["owner"], x["name"])
|
|
|
|
|
|
def _layout_key(x: Item) -> Hashable:
|
|
return (x["owner"], x["offset"])
|
|
|
|
|
|
def _detect_method_moves(old_m: list[Item], new_m: list[Item]) -> list[Item]:
|
|
"""A method name that left some owner and appeared under another - i.e. moved in the hierarchy."""
|
|
def owners_by_name(items: list[Item]) -> dict[str, set]:
|
|
out: dict[str, set] = {}
|
|
for m in items:
|
|
out.setdefault(m["name"], set()).add(m["owner"])
|
|
return out
|
|
|
|
old_o, new_o = owners_by_name(old_m), owners_by_name(new_m)
|
|
moves = []
|
|
for name in sorted(set(old_o) & set(new_o)):
|
|
lost = old_o[name] - new_o[name]
|
|
gained = new_o[name] - old_o[name]
|
|
if lost and gained:
|
|
moves.append({"name": name, "from_owners": sorted(lost), "to_owners": sorted(gained)})
|
|
return moves
|
|
|
|
|
|
def _dispatch_key(x: Item) -> Hashable:
|
|
return (x["owner"], x["id"])
|
|
|
|
|
|
def _dispatch_with_names(snap: Snapshot) -> list[Item]:
|
|
"""Attach the method name (from the methods axis, joined on owner+id) to each dispatch row,
|
|
so a body-level diff reads as 'SHOW body changed' rather than 'CMC_Animo id 1 changed'."""
|
|
name_by = {(m["owner"], m.get("id")): m["name"] for m in snap.methods}
|
|
out = []
|
|
for r in snap.method_dispatch:
|
|
rr = dict(r)
|
|
rr["name"] = name_by.get((r["owner"], r["id"]))
|
|
out.append(rr)
|
|
return out
|
|
|
|
|
|
def _dispatch_diff(old: Snapshot, new: Snapshot) -> dict[str, Any]:
|
|
"""Dispatch axis with body-level normalisation: every `changed` entry carries a `body`
|
|
{similarity, added, removed} from ams.normalize, and the block gets a `summary` measuring
|
|
how much the shared bodies changed overall (mean similarity, identical/changed counts)."""
|
|
from .normalize import body_delta, body_similarity
|
|
|
|
do = _dispatch_with_names(old)
|
|
dn = _dispatch_with_names(new)
|
|
block = keyed_diff(do, dn, _dispatch_key, ["impl", "calls"])
|
|
|
|
old_calls = {_dispatch_key(r): r.get("calls", []) for r in do}
|
|
new_calls = {_dispatch_key(r): r.get("calls", []) for r in dn}
|
|
for ch in block["changed"]:
|
|
k = _dispatch_key(ch["item"])
|
|
ch["body"] = body_delta(old_calls.get(k, []), new_calls.get(k, []))
|
|
|
|
shared = set(old_calls) & set(new_calls)
|
|
sims = [body_similarity(old_calls[k], new_calls[k]) for k in shared]
|
|
block["summary"] = {
|
|
"shared": len(shared),
|
|
"identical": sum(1 for s in sims if s == 100),
|
|
"changed": sum(1 for s in sims if s < 100),
|
|
"mean_similarity": int(round(sum(sims) / len(sims))) if sims else 100,
|
|
}
|
|
return block
|
|
|
|
|
|
def compute_diff(old: Snapshot, new: Snapshot) -> dict[str, Any]:
|
|
return {
|
|
"binary": {"from": old.binary, "to": new.binary},
|
|
"types": keyed_diff(old.types, new.types, _type_key, ["cpp_class", "object_size"]),
|
|
"methods": keyed_diff(old.methods, new.methods, _owner_name_key, ["id"]),
|
|
"events": keyed_diff(old.events, new.events, _owner_name_key, ["order"]),
|
|
"fields": keyed_diff(old.fields, new.fields, _owner_name_key, ["type"]),
|
|
"struct_layout": keyed_diff(old.struct_layout, new.struct_layout, _layout_key,
|
|
["size", "is_vtable"]),
|
|
"method_dispatch": _dispatch_diff(old, new),
|
|
"method_inheritance": keyed_diff(old.method_inheritance, new.method_inheritance,
|
|
lambda x: x["runner"], ["base_runner"]),
|
|
"field_inheritance": keyed_diff(old.field_inheritance, new.field_inheritance,
|
|
lambda x: x["class"], ["base_class"]),
|
|
"moved_methods": _detect_method_moves(old.methods, new.methods),
|
|
}
|
|
|
|
|
|
# --- owner filtering (for `--owner CMC_Animo`) -------------------------------------------------
|
|
def _item_owner(axis: str, item: Item) -> str | None:
|
|
if axis == "types":
|
|
return item.get("cpp_class")
|
|
if axis in ("methods", "events", "fields", "struct_layout", "method_dispatch"):
|
|
return item.get("owner")
|
|
if axis == "method_inheritance":
|
|
return item.get("runner")
|
|
if axis == "field_inheritance":
|
|
return item.get("class")
|
|
return None
|
|
|
|
|
|
def filter_by_owner(diff: dict[str, Any], owner: str) -> dict[str, Any]:
|
|
"""Restrict every axis to a single class/owner. `binary` and `moved_methods` are kept whole."""
|
|
out: dict[str, Any] = {"binary": diff["binary"]}
|
|
out["moved_methods"] = [m for m in diff["moved_methods"]
|
|
if owner in m["from_owners"] or owner in m["to_owners"]]
|
|
for axis, block in diff.items():
|
|
if axis in ("binary", "moved_methods"):
|
|
continue
|
|
out[axis] = {
|
|
"added": [i for i in block["added"] if _item_owner(axis, i) == owner],
|
|
"removed": [i for i in block["removed"] if _item_owner(axis, i) == owner],
|
|
"changed": [c for c in block["changed"] if _item_owner(axis, c["item"]) == owner],
|
|
}
|
|
return out
|