"""Drive Ghidra to run the engine-surface extractor on a DLL. This is the heavy worker step: it imports the binary into a throwaway Ghidra project, auto-analyses it, then runs `ghidra_scripts/extract_engine_surface.py` to write the snapshot JSON to a path we pick. Two back-ends, picked by the `AMS_USE_PYGHIDRA` env var: * default — `analyzeHeadless` runs the script as a post-script via Ghidra's bundled **Jython**. Works on Ghidra <= 11.3.x; on 11.4+/12.x Jython is gone and the script silently doesn't run ("Ghidra was not started with PyGhidra"). * `AMS_USE_PYGHIDRA=1` — run the same script through **PyGhidra** (CPython) via `pyghidra.run_script`, so modern Ghidra (11.4+/12.x) works. Needs `pip install pyghidra` and Ghidra's dir in `$GHIDRA_INSTALL_DIR` (falls back to `$GHIDRA_HOME`). analyzeHeadless resolution order: $GHIDRA_HEADLESS, $GHIDRA_HOME/support/analyzeHeadless, then `analyzeHeadless` on PATH. """ from __future__ import annotations import os import shutil import subprocess import tempfile import uuid from pathlib import Path _SCRIPT_NAME = "extract_engine_surface.py" # ams/acquire/ghidra.py -> repo root is two parents up _SCRIPT_DIR = Path(__file__).resolve().parents[2] / "ghidra_scripts" class GhidraNotFound(RuntimeError): pass class GhidraRunError(RuntimeError): pass def find_headless() -> str | None: """Locate the analyzeHeadless launcher, or None if Ghidra isn't configured.""" env = os.environ.get("GHIDRA_HEADLESS") if env and os.path.isfile(env): return env home = os.environ.get("GHIDRA_HOME") if home: for name in ("analyzeHeadless", "analyzeHeadless.bat"): cand = os.path.join(home, "support", name) if os.path.isfile(cand): return cand return shutil.which("analyzeHeadless") def run_extractor( dll_path: str, out_path: str, *, headless: str | None = None, script_dir: str | None = None, timeout: int = 1800, ) -> str: """Headless-analyse `dll_path` and write the snapshot to `out_path`; returns `out_path`. Raises GhidraNotFound if no launcher is configured, GhidraRunError on failure or if the script produced no output.""" if os.environ.get("AMS_USE_PYGHIDRA"): return run_extractor_pyghidra(dll_path, out_path, script_dir=script_dir) headless = headless or find_headless() if not headless: raise GhidraNotFound( "analyzeHeadless not found — set $GHIDRA_HEADLESS or $GHIDRA_HOME (Ghidra's install dir)") script_dir = script_dir or os.environ.get("AMS_GHIDRA_SCRIPTS") or str(_SCRIPT_DIR) out_path = os.path.abspath(out_path) os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) proj_dir = tempfile.mkdtemp(prefix="ams_ghidra_") proj_name = "ams_" + uuid.uuid4().hex[:8] cmd = [ headless, proj_dir, proj_name, "-import", dll_path, "-scriptPath", script_dir, "-postScript", _SCRIPT_NAME, out_path, "-deleteProject", ] try: proc = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=timeout) except subprocess.TimeoutExpired: raise GhidraRunError("analyzeHeadless timed out after {0}s".format(timeout)) except OSError as e: raise GhidraRunError("failed to launch analyzeHeadless: {0}".format(e)) finally: shutil.rmtree(proj_dir, ignore_errors=True) if not os.path.isfile(out_path): tail = proc.stdout.decode("utf-8", "replace")[-2000:] if proc.stdout else "" raise GhidraRunError( "extractor produced no snapshot at {0}\n--- headless tail ---\n{1}".format(out_path, tail)) return out_path def run_extractor_pyghidra(dll_path: str, out_path: str, *, script_dir: str | None = None) -> str: """Run the extractor through PyGhidra (CPython) instead of analyzeHeadless/Jython. `pyghidra.run_script` boots Ghidra in-process, imports + auto-analyses the binary, and executes our GhidraScript with `getScriptArgs() == [out_path]` - the same script, just under CPython, so it works on Ghidra 11.4+/12.x where Jython is gone.""" os.environ.setdefault("GHIDRA_INSTALL_DIR", os.environ.get("GHIDRA_HOME", "")) try: import pyghidra except ImportError: raise GhidraNotFound( "AMS_USE_PYGHIDRA is set but the 'pyghidra' package isn't installed (pip install pyghidra)") script_dir = script_dir or os.environ.get("AMS_GHIDRA_SCRIPTS") or str(_SCRIPT_DIR) script_path = os.path.join(script_dir, _SCRIPT_NAME) out_path = os.path.abspath(out_path) os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) proj_dir = tempfile.mkdtemp(prefix="ams_pyghidra_") try: pyghidra.run_script( dll_path, script_path, project_location=proj_dir, project_name="ams_" + uuid.uuid4().hex[:8], script_args=[out_path], analyze=True, verbose=False, ) except Exception as e: # jpype/Ghidra errors aren't a tidy hierarchy raise GhidraRunError("pyghidra.run_script failed: {0}".format(e)) finally: shutil.rmtree(proj_dir, ignore_errors=True) if not os.path.isfile(out_path): raise GhidraRunError("extractor produced no snapshot at {0} (PyGhidra path)".format(out_path)) return out_path