Body normalisation: per-method similarity score + leaf delta
Turns the dispatch axis from a binary changed/unchanged into a "how much" measure
of code change — the original goal. ams.normalize compares two body fingerprints
(the ordered leaf-call anchors) with difflib after collapsing consecutive-duplicate
anchors (a load-twice codegen artefact), yielding a 0-100 similarity and the exact
leaves that appeared/vanished.
Every dispatch `changed` entry now carries body={similarity, added, removed}, and the
block carries a summary={shared, identical, changed, mean_similarity}.
Golden pair (cross-compiler): 470 shared bodies, 131 identical, mean 66% similar;
Animo SHOW/HIDE/PAUSE/RESUME come out 100% despite MSVC6 vs MSVC8, LOAD 50% with the
swapped leaves spelled out.
- normalize.py: canonical / body_similarity / body_delta
- diff: _dispatch_diff enriches changed with body + adds summary
- render: METHOD BODIES shows %, leaf delta, summary line
- UI: similarity % + leaf delta + axis summary
- tests: 5 new -> 34/34
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -82,11 +82,23 @@ def _dispatch_name(r: dict) -> str:
|
||||
return r.get("name") or "id {0}".format(r.get("id"))
|
||||
|
||||
|
||||
def _leaves(items: list, cap: int = 4) -> str:
|
||||
shown = items[:cap]
|
||||
extra = "…+{0}".format(len(items) - cap) if len(items) > cap else ""
|
||||
return ", ".join(shown) + extra
|
||||
|
||||
|
||||
def _section_dispatch(out: list[str], block: dict) -> None:
|
||||
"""Method-body fingerprints (per owner+id). `calls` deltas are summarised by length so the
|
||||
line stays readable; the full anchor lists live in the JSON."""
|
||||
"""Method bodies (per owner+id), normalised. Each changed entry shows a similarity score and
|
||||
the leaf-level delta (which calls appeared/vanished); a summary line gives the overall drift."""
|
||||
out.append("")
|
||||
out.append("{0:<16} {1}".format("METHOD BODIES", _counts(block)))
|
||||
summ = block.get("summary")
|
||||
head = "METHOD BODIES"
|
||||
if summ:
|
||||
head = "{0} (shared {1}, ~{2} changed, mean {3}% similar)".format(
|
||||
"METHOD BODIES", summ["shared"], summ["changed"], summ["mean_similarity"])
|
||||
out.append("{0}".format(head))
|
||||
out.append("{0:<16} {1}".format("", _counts(block)))
|
||||
owner_of = lambda r: r["owner"]
|
||||
added = _group_by(block["added"], owner_of)
|
||||
removed = _group_by(block["removed"], owner_of)
|
||||
@@ -99,14 +111,18 @@ def _section_dispatch(out: list[str], block: dict) -> None:
|
||||
for it in sorted(removed.get(owner, []), key=_dispatch_name):
|
||||
out.append(" - {0}".format(_dispatch_name(it)))
|
||||
for it in sorted(changed.get(owner, []), key=_dispatch_name):
|
||||
ch = change_by_id[id(it)]["changes"]
|
||||
ch = change_by_id[id(it)]
|
||||
body = ch.get("body", {})
|
||||
sim = body.get("similarity")
|
||||
bits = []
|
||||
if "impl" in ch:
|
||||
bits.append("impl {0} -> {1}".format(ch["impl"][0], ch["impl"][1]))
|
||||
if "calls" in ch:
|
||||
a, b = ch["calls"]
|
||||
bits.append("calls {0} -> {1}".format(len(a or []), len(b or [])))
|
||||
out.append(" ~ {0:<22} {1}".format(_dispatch_name(it), "; ".join(bits)))
|
||||
if body.get("added"):
|
||||
bits.append("+[{0}]".format(_leaves(body["added"])))
|
||||
if body.get("removed"):
|
||||
bits.append("-[{0}]".format(_leaves(body["removed"])))
|
||||
if not bits and "impl" in ch["changes"]:
|
||||
bits.append("impl {0} -> {1}".format(*ch["changes"]["impl"]))
|
||||
label = "{0} {1}%".format(_dispatch_name(it), sim) if sim is not None else _dispatch_name(it)
|
||||
out.append(" ~ {0:<26} {1}".format(label, " ".join(bits)))
|
||||
|
||||
|
||||
_EMPTY = {"added": [], "removed": [], "changed": []}
|
||||
|
||||
Reference in New Issue
Block a user