"""Compute a structured diff between two engine-surface snapshots. The result is a plain dict (JSON-serialisable). Each axis is a {added, removed, changed} block produced by `keyed_diff`; `changed` entries carry the per-field old->new deltas. Methods also get a cross-owner `moved` pass to surface hierarchy reparenting. """ from __future__ import annotations from typing import Any, Callable, Hashable from .snapshot import Snapshot Item = dict KeyFn = Callable[[Item], Hashable] def _index(items: list[Item], key: KeyFn) -> dict[Hashable, Item]: return {key(it): it for it in items} def keyed_diff(a_items: list[Item], b_items: list[Item], key: KeyFn, compare_fields: list[str]) -> dict[str, Any]: """Set-diff two item lists by `key`; for items present in both, report changed compare_fields.""" a = _index(a_items, key) b = _index(b_items, key) added = [b[k] for k in b if k not in a] removed = [a[k] for k in a if k not in b] changed = [] for k in a: if k not in b: continue deltas = {f: [a[k].get(f), b[k].get(f)] for f in compare_fields if a[k].get(f) != b[k].get(f)} if deltas: changed.append({"item": b[k], "changes": deltas}) return {"added": added, "removed": removed, "changed": changed} # --- per-axis keys ----------------------------------------------------------------------------- # Types: script_name + via_module_iface keeps the dual-dispatch MULTIARRAY entries distinct and # stable across versions (addresses change, this semantic flag does not). def _type_key(t: Item) -> Hashable: return (t["script_name"], bool(t.get("via_module_iface"))) def _owner_name_key(x: Item) -> Hashable: return (x["owner"], x["name"]) def _layout_key(x: Item) -> Hashable: return (x["owner"], x["offset"]) def _detect_method_moves(old_m: list[Item], new_m: list[Item]) -> list[Item]: """A method name that left some owner and appeared under another - i.e. moved in the hierarchy.""" def owners_by_name(items: list[Item]) -> dict[str, set]: out: dict[str, set] = {} for m in items: out.setdefault(m["name"], set()).add(m["owner"]) return out old_o, new_o = owners_by_name(old_m), owners_by_name(new_m) moves = [] for name in sorted(set(old_o) & set(new_o)): lost = old_o[name] - new_o[name] gained = new_o[name] - old_o[name] if lost and gained: moves.append({"name": name, "from_owners": sorted(lost), "to_owners": sorted(gained)}) return moves def _dispatch_key(x: Item) -> Hashable: return (x["owner"], x["id"]) def _dispatch_with_names(snap: Snapshot) -> list[Item]: """Attach the method name (from the methods axis, joined on owner+id) to each dispatch row, so a body-level diff reads as 'SHOW body changed' rather than 'CMC_Animo id 1 changed'.""" name_by = {(m["owner"], m.get("id")): m["name"] for m in snap.methods} out = [] for r in snap.method_dispatch: rr = dict(r) rr["name"] = name_by.get((r["owner"], r["id"])) out.append(rr) return out def _dispatch_diff(old: Snapshot, new: Snapshot) -> dict[str, Any]: """Dispatch axis with body-level normalisation: every `changed` entry carries a `body` {similarity, added, removed} from ams.normalize, and the block gets a `summary` measuring how much the shared bodies changed overall (mean similarity, identical/changed counts).""" from .normalize import body_delta, body_similarity do = _dispatch_with_names(old) dn = _dispatch_with_names(new) block = keyed_diff(do, dn, _dispatch_key, ["impl", "calls"]) old_calls = {_dispatch_key(r): r.get("calls", []) for r in do} new_calls = {_dispatch_key(r): r.get("calls", []) for r in dn} for ch in block["changed"]: k = _dispatch_key(ch["item"]) ch["body"] = body_delta(old_calls.get(k, []), new_calls.get(k, [])) shared = set(old_calls) & set(new_calls) sims = [body_similarity(old_calls[k], new_calls[k]) for k in shared] block["summary"] = { "shared": len(shared), "identical": sum(1 for s in sims if s == 100), "changed": sum(1 for s in sims if s < 100), "mean_similarity": int(round(sum(sims) / len(sims))) if sims else 100, } return block def compute_diff(old: Snapshot, new: Snapshot) -> dict[str, Any]: return { "binary": {"from": old.binary, "to": new.binary}, "types": keyed_diff(old.types, new.types, _type_key, ["cpp_class", "object_size"]), "methods": keyed_diff(old.methods, new.methods, _owner_name_key, ["id"]), "events": keyed_diff(old.events, new.events, _owner_name_key, ["order"]), "fields": keyed_diff(old.fields, new.fields, _owner_name_key, ["type"]), "struct_layout": keyed_diff(old.struct_layout, new.struct_layout, _layout_key, ["size", "is_vtable"]), "method_dispatch": _dispatch_diff(old, new), "method_inheritance": keyed_diff(old.method_inheritance, new.method_inheritance, lambda x: x["runner"], ["base_runner"]), "field_inheritance": keyed_diff(old.field_inheritance, new.field_inheritance, lambda x: x["class"], ["base_class"]), "moved_methods": _detect_method_moves(old.methods, new.methods), } # --- owner filtering (for `--owner CMC_Animo`) ------------------------------------------------- def _item_owner(axis: str, item: Item) -> str | None: if axis == "types": return item.get("cpp_class") if axis in ("methods", "events", "fields", "struct_layout", "method_dispatch"): return item.get("owner") if axis == "method_inheritance": return item.get("runner") if axis == "field_inheritance": return item.get("class") return None def filter_by_owner(diff: dict[str, Any], owner: str) -> dict[str, Any]: """Restrict every axis to a single class/owner. `binary` and `moved_methods` are kept whole.""" out: dict[str, Any] = {"binary": diff["binary"]} out["moved_methods"] = [m for m in diff["moved_methods"] if owner in m["from_owners"] or owner in m["to_owners"]] for axis, block in diff.items(): if axis in ("binary", "moved_methods"): continue out[axis] = { "added": [i for i in block["added"] if _item_owner(axis, i) == owner], "removed": [i for i in block["removed"] if _item_owner(axis, i) == owner], "changed": [c for c in block["changed"] if _item_owner(axis, c["item"]) == owner], } return out