Extract Piklib 6.1 methods by name from Runner::run

6.1 has no prepareMthHashSet and no id-switch: CMC_*_Runner::run takes the method *name* (CXString) and dispatches via a `CXString(tmp,"name"); equalsIgnoreCase(name)` chain. extract_methods now falls back (only when the hashset pass finds nothing) to scanning run() for that pattern, recovering the names (no numeric ids). 6.1 now yields 186 methods (Animo: show/hide/play/setFPS/...); dispatch stays 0 since the string chain isn't a jump table. 7.1/8.x untouched (they have prepareMthHashSet). Note: 6.1 names are camelCase/lowercase vs 7.1+ UPPERCASE (engine compares case- insensitively), so a cross-version method diff needs case folding to be clean. 38/38 tests (test_versions updated: 6.1 methods present with id=None). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 20:25:20 +02:00
parent 67cbc32a2c
commit 8875540186
3 changed files with 1182 additions and 9 deletions
--- a/ghidra_scripts/extract_engine_surface.py
+++ b/ghidra_scripts/extract_engine_surface.py
@@ -328,9 +328,59 @@ def extract_methods(program):
        methods.extend(own)
        if base is not None:
            inheritance.append({"runner": runner, "base_runner": base})
+    if not methods:
+        # Piklib 6.1 predates prepareMthHashSet: methods are dispatched by NAME inside
+        # <Runner>::run, so recover the names from there instead (no numeric ids).
+        methods = extract_methods_by_name(program)
    return methods, inheritance


+def _extract_name_methods_from_run(program, run_func, owner, runner):
+    """Piklib 6.1's `run(CXString name, ...)` is a chain of `CXString(tmp, "name")` then
+    `equalsIgnoreCase(name)`; the literal built right before each compare is a method name."""
+    listing = program.getListing()
+    methods = []
+    recent = []
+    pending = None
+    seen = set()
+    it = listing.getInstructions(resolve_thunk(run_func).getBody(), True)
+    while it.hasNext():
+        instr = it.next()
+        if instr.getMnemonicString() == "CALL":
+            f = call_target(program, instr)
+            nm = f.getName() if f is not None else None
+            if nm == "CXString":                 # CXString(tmp, <strptr>) builds the literal
+                s = lookback_string(program, recent)
+                if s is not None and _is_method_name(s):
+                    pending = s
+            elif nm == "equalsIgnoreCase" and pending is not None:
+                if pending not in seen:
+                    seen.add(pending)
+                    methods.append({"owner": owner, "runner": runner, "name": pending, "id": None})
+                pending = None
+        recent.append(instr)
+        if len(recent) > LOOKBACK:
+            recent.pop(0)
+    return methods
+
+
+def extract_methods_by_name(program):
+    """Name-dispatched method recovery for engines without prepareMthHashSet (Piklib 6.1)."""
+    fm = program.getFunctionManager()
+    methods = []
+    it = fm.getFunctions(True)
+    while it.hasNext():
+        f = it.next()
+        if f.getName() != "run":
+            continue
+        ns = f.getParentNamespace()
+        runner = ns.getName() if ns is not None else "?"
+        if not runner.endswith("_Runner"):
+            continue
+        methods.extend(_extract_name_methods_from_run(program, f, _owner_from_runner(runner), runner))
+    return methods
+
+
 _VTBL_OFF = re.compile(r"\[\w+ \+ (0x[0-9a-fA-F]+)\]")
 _MEM_OFF = re.compile(r"\[\w+ \+ (0x[0-9a-fA-F]+)\]")

--- a/snapshots/PIKLib61.dll.snapshot.json
+++ b/snapshots/PIKLib61.dll.snapshot.json
--- a/tests/test_versions.py
+++ b/tests/test_versions.py
@@ -30,9 +30,13 @@ def test_piklib61_early_engine_partial_surface():
    names = {t["script_name"] for t in s.types}
    assert len(names) >= 20 and {"ANIMO", "ARRAY", "BUTTON"} <= names
    assert all(t.get("cpp_class") is None or t["cpp_class"].startswith("CMC_") for t in s.types)
-    # events + script fields work; method registration (prepareMthHashSet) doesn't exist yet
    assert s.events and s.fields
-    assert s.methods == [] and s.method_dispatch == []
+    # 6.1 predates prepareMthHashSet: methods are dispatched by NAME in run(), so they're
+    # recovered without numeric ids, and the id-switch dispatch axis stays empty.
+    assert s.methods and all(m["id"] is None for m in s.methods)
+    assert s.method_dispatch == []
+    animo = {m["name"] for m in s.methods if m["owner"] == "CMC_Animo"}
+    assert {"show", "hide", "play"} <= animo  # 6.1 uses lower/camelCase names


 def test_piklib71_full_surface():
@@ -43,10 +47,12 @@ def test_piklib71_full_surface():
    assert sum(1 for t in s.types if t.get("cpp_class")) > len(s.types) // 2


-def test_61_to_71_diff_adds_methods():
+def test_61_to_71_method_id_evolution():
    s61, s71 = _load(P61), _load(P71)
    shared = {t["script_name"] for t in s61.types} & {t["script_name"] for t in s71.types}
-    assert {"ANIMO", "ARRAY", "BUTTON"} <= shared  # stable core across the two early versions
-    # 7.1 introduces the registered-method machinery 6.1 lacked entirely
-    d = compute_diff(s61, s71)
-    assert len(d["methods"]["added"]) > 100 and d["methods"]["removed"] == []
+    assert {"ANIMO", "ARRAY", "BUTTON"} <= shared  # stable type core across the two early versions
+    # both expose a rich method surface, but only 7.1 carries numeric ids (prepareMthHashSet);
+    # 6.1's are name-only. (Names also differ in case - 6.1 camelCase vs 7.1 UPPERCASE - which is
+    # why a raw method diff is noisy without case folding.)
+    assert len(s61.methods) > 100 and all(m["id"] is None for m in s61.methods)
+    assert len(s71.methods) > 100 and all(m["id"] is not None for m in s71.methods)