Files
Aidem-Media-DLL-Analysis/tests/test_normalize.py
Patryk Gensch b0d3d22445 Body normalisation: per-method similarity score + leaf delta
Turns the dispatch axis from a binary changed/unchanged into a "how much" measure
of code change — the original goal. ams.normalize compares two body fingerprints
(the ordered leaf-call anchors) with difflib after collapsing consecutive-duplicate
anchors (a load-twice codegen artefact), yielding a 0-100 similarity and the exact
leaves that appeared/vanished.

Every dispatch `changed` entry now carries body={similarity, added, removed}, and the
block carries a summary={shared, identical, changed, mean_similarity}.

Golden pair (cross-compiler): 470 shared bodies, 131 identical, mean 66% similar;
Animo SHOW/HIDE/PAUSE/RESUME come out 100% despite MSVC6 vs MSVC8, LOAD 50% with the
swapped leaves spelled out.

- normalize.py: canonical / body_similarity / body_delta
- diff: _dispatch_diff enriches changed with body + adds summary
- render: METHOD BODIES shows %, leaf delta, summary line
- UI: similarity % + leaf delta + axis summary
- tests: 5 new -> 34/34

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 13:23:15 +02:00

60 lines
2.3 KiB
Python

"""Body-level normalisation: similarity score + leaf delta, and its wiring into the diff."""
from __future__ import annotations
from ams.normalize import body_delta, body_similarity, canonical
def test_canonical_collapses_consecutive_dups():
assert canonical(["a", "a", "b", "b", "a"]) == ["a", "b", "a"]
assert canonical([]) == []
def test_body_similarity_bounds():
assert body_similarity([], []) == 100
assert body_similarity(["a", "b"], ["a", "b"]) == 100
assert body_similarity(["a", "b", "c"], ["x", "y", "z"]) == 0
# one inserted leaf out of a shared core -> high but < 100
s = body_similarity(["a", "b", "c"], ["a", "b", "X", "c"])
assert 70 <= s < 100
def test_body_similarity_ignores_load_twice_artifact():
# the doubled anchor is a codegen artefact; canonical() makes the two bodies identical
assert body_similarity(["getAnimo", "vtbl+0x3c", "vtbl+0x3c"], ["getAnimo", "vtbl+0x3c"]) == 100
def test_body_delta_added_removed():
d = body_delta(["a", "b", "c"], ["a", "X", "c"])
assert d["removed"] == ["b"] and d["added"] == ["X"]
assert d["similarity"] < 100
def test_dispatch_diff_carries_body_and_summary():
from ams.diff import compute_diff
from ams.snapshot import Snapshot
def snap(calls1, calls7):
return Snapshot({
"binary": {}, "types": [], "events": [], "fields": [],
"struct_layout": [], "method_inheritance": [], "field_inheritance": [],
"methods": [{"owner": "CMC_Animo", "name": "SHOW", "id": 1},
{"owner": "CMC_Animo", "name": "LOAD", "id": 7}],
"method_dispatch": [
{"owner": "CMC_Animo", "id": 1, "impl": None, "calls": calls1},
{"owner": "CMC_Animo", "id": 7, "impl": None, "calls": calls7},
],
})
old = snap(["getAnimo", "vtbl+0xa0"], ["a", "b", "c"])
new = snap(["getAnimo", "vtbl+0xa0"], ["a", "X", "c"]) # SHOW identical, LOAD changed
block = compute_diff(old, new)["method_dispatch"]
s = block["summary"]
assert s["shared"] == 2 and s["identical"] == 1 and s["changed"] == 1
assert 75 <= s["mean_similarity"] <= 92 # mean of 100 (SHOW) and ~67 (LOAD)
ch = block["changed"]
assert len(ch) == 1 and ch[0]["item"]["name"] == "LOAD"
assert ch[0]["body"]["added"] == ["X"] and ch[0]["body"]["removed"] == ["b"]