Extract Piklib 6.1 methods by name from Runner::run

6.1 has no prepareMthHashSet and no id-switch: CMC_*_Runner::run takes the method
*name* (CXString) and dispatches via a `CXString(tmp,"name"); equalsIgnoreCase(name)`
chain. extract_methods now falls back (only when the hashset pass finds nothing) to
scanning run() for that pattern, recovering the names (no numeric ids).

6.1 now yields 186 methods (Animo: show/hide/play/setFPS/...); dispatch stays 0 since
the string chain isn't a jump table. 7.1/8.x untouched (they have prepareMthHashSet).

Note: 6.1 names are camelCase/lowercase vs 7.1+ UPPERCASE (engine compares case-
insensitively), so a cross-version method diff needs case folding to be clean.

38/38 tests (test_versions updated: 6.1 methods present with id=None).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Patryk Gensch
2026-05-31 20:25:20 +02:00
parent 67cbc32a2c
commit 8875540186
3 changed files with 1182 additions and 9 deletions

View File

@@ -328,9 +328,59 @@ def extract_methods(program):
methods.extend(own)
if base is not None:
inheritance.append({"runner": runner, "base_runner": base})
if not methods:
# Piklib 6.1 predates prepareMthHashSet: methods are dispatched by NAME inside
# <Runner>::run, so recover the names from there instead (no numeric ids).
methods = extract_methods_by_name(program)
return methods, inheritance
def _extract_name_methods_from_run(program, run_func, owner, runner):
"""Piklib 6.1's `run(CXString name, ...)` is a chain of `CXString(tmp, "name")` then
`equalsIgnoreCase(name)`; the literal built right before each compare is a method name."""
listing = program.getListing()
methods = []
recent = []
pending = None
seen = set()
it = listing.getInstructions(resolve_thunk(run_func).getBody(), True)
while it.hasNext():
instr = it.next()
if instr.getMnemonicString() == "CALL":
f = call_target(program, instr)
nm = f.getName() if f is not None else None
if nm == "CXString": # CXString(tmp, <strptr>) builds the literal
s = lookback_string(program, recent)
if s is not None and _is_method_name(s):
pending = s
elif nm == "equalsIgnoreCase" and pending is not None:
if pending not in seen:
seen.add(pending)
methods.append({"owner": owner, "runner": runner, "name": pending, "id": None})
pending = None
recent.append(instr)
if len(recent) > LOOKBACK:
recent.pop(0)
return methods
def extract_methods_by_name(program):
"""Name-dispatched method recovery for engines without prepareMthHashSet (Piklib 6.1)."""
fm = program.getFunctionManager()
methods = []
it = fm.getFunctions(True)
while it.hasNext():
f = it.next()
if f.getName() != "run":
continue
ns = f.getParentNamespace()
runner = ns.getName() if ns is not None else "?"
if not runner.endswith("_Runner"):
continue
methods.extend(_extract_name_methods_from_run(program, f, _owner_from_runner(runner), runner))
return methods
_VTBL_OFF = re.compile(r"\[\w+ \+ (0x[0-9a-fA-F]+)\]")
_MEM_OFF = re.compile(r"\[\w+ \+ (0x[0-9a-fA-F]+)\]")

File diff suppressed because it is too large Load Diff

View File

@@ -30,9 +30,13 @@ def test_piklib61_early_engine_partial_surface():
names = {t["script_name"] for t in s.types}
assert len(names) >= 20 and {"ANIMO", "ARRAY", "BUTTON"} <= names
assert all(t.get("cpp_class") is None or t["cpp_class"].startswith("CMC_") for t in s.types)
# events + script fields work; method registration (prepareMthHashSet) doesn't exist yet
assert s.events and s.fields
assert s.methods == [] and s.method_dispatch == []
# 6.1 predates prepareMthHashSet: methods are dispatched by NAME in run(), so they're
# recovered without numeric ids, and the id-switch dispatch axis stays empty.
assert s.methods and all(m["id"] is None for m in s.methods)
assert s.method_dispatch == []
animo = {m["name"] for m in s.methods if m["owner"] == "CMC_Animo"}
assert {"show", "hide", "play"} <= animo # 6.1 uses lower/camelCase names
def test_piklib71_full_surface():
@@ -43,10 +47,12 @@ def test_piklib71_full_surface():
assert sum(1 for t in s.types if t.get("cpp_class")) > len(s.types) // 2
def test_61_to_71_diff_adds_methods():
def test_61_to_71_method_id_evolution():
s61, s71 = _load(P61), _load(P71)
shared = {t["script_name"] for t in s61.types} & {t["script_name"] for t in s71.types}
assert {"ANIMO", "ARRAY", "BUTTON"} <= shared # stable core across the two early versions
# 7.1 introduces the registered-method machinery 6.1 lacked entirely
d = compute_diff(s61, s71)
assert len(d["methods"]["added"]) > 100 and d["methods"]["removed"] == []
assert {"ANIMO", "ARRAY", "BUTTON"} <= shared # stable type core across the two early versions
# both expose a rich method surface, but only 7.1 carries numeric ids (prepareMthHashSet);
# 6.1's are name-only. (Names also differ in case - 6.1 camelCase vs 7.1 UPPERCASE - which is
# why a raw method diff is noisy without case folding.)
assert len(s61.methods) > 100 and all(m["id"] is None for m in s61.methods)
assert len(s71.methods) > 100 and all(m["id"] is not None for m in s71.methods)