The .py extractor runs fine under PyGhidra in the GUI; only `analyzeHeadless` doesn't init PyGhidra. Add an env-gated CPython path so modern Ghidra works headless: - ghidra.run_extractor_pyghidra(): runs the same GhidraScript via pyghidra.run_script (boots Ghidra in-process, imports+analyses, getScriptArgs()=[out_path]); run_extractor dispatches to it when AMS_USE_PYGHIDRA is set. No script changes needed. - worker image installs pyghidra + sets GHIDRA_INSTALL_DIR; compose exposes AMS_USE_PYGHIDRA (default off). Jython path stays the default and untouched. - README documents both variants (Jython <=11.3.x vs PyGhidra 11.4+/12.x). - test: AMS_USE_PYGHIDRA routes to the PyGhidra back-end (clear error if pkg missing). 35/35 tests pass. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
139 lines
5.2 KiB
Python
139 lines
5.2 KiB
Python
"""Drive Ghidra to run the engine-surface extractor on a DLL.
|
|
|
|
This is the heavy worker step: it imports the binary into a throwaway Ghidra
|
|
project, auto-analyses it, then runs `ghidra_scripts/extract_engine_surface.py`
|
|
to write the snapshot JSON to a path we pick.
|
|
|
|
Two back-ends, picked by the `AMS_USE_PYGHIDRA` env var:
|
|
|
|
* default — `analyzeHeadless` runs the script as a post-script via Ghidra's bundled
|
|
**Jython**. Works on Ghidra <= 11.3.x; on 11.4+/12.x Jython is gone and the script
|
|
silently doesn't run ("Ghidra was not started with PyGhidra").
|
|
* `AMS_USE_PYGHIDRA=1` — run the same script through **PyGhidra** (CPython) via
|
|
`pyghidra.run_script`, so modern Ghidra (11.4+/12.x) works. Needs `pip install pyghidra`
|
|
and Ghidra's dir in `$GHIDRA_INSTALL_DIR` (falls back to `$GHIDRA_HOME`).
|
|
|
|
analyzeHeadless resolution order: $GHIDRA_HEADLESS, $GHIDRA_HOME/support/analyzeHeadless,
|
|
then `analyzeHeadless` on PATH.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import tempfile
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
_SCRIPT_NAME = "extract_engine_surface.py"
|
|
# ams/acquire/ghidra.py -> repo root is two parents up
|
|
_SCRIPT_DIR = Path(__file__).resolve().parents[2] / "ghidra_scripts"
|
|
|
|
|
|
class GhidraNotFound(RuntimeError):
|
|
pass
|
|
|
|
|
|
class GhidraRunError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def find_headless() -> str | None:
|
|
"""Locate the analyzeHeadless launcher, or None if Ghidra isn't configured."""
|
|
env = os.environ.get("GHIDRA_HEADLESS")
|
|
if env and os.path.isfile(env):
|
|
return env
|
|
home = os.environ.get("GHIDRA_HOME")
|
|
if home:
|
|
for name in ("analyzeHeadless", "analyzeHeadless.bat"):
|
|
cand = os.path.join(home, "support", name)
|
|
if os.path.isfile(cand):
|
|
return cand
|
|
return shutil.which("analyzeHeadless")
|
|
|
|
|
|
def run_extractor(
|
|
dll_path: str,
|
|
out_path: str,
|
|
*,
|
|
headless: str | None = None,
|
|
script_dir: str | None = None,
|
|
timeout: int = 1800,
|
|
) -> str:
|
|
"""Headless-analyse `dll_path` and write the snapshot to `out_path`; returns `out_path`.
|
|
|
|
Raises GhidraNotFound if no launcher is configured, GhidraRunError on failure or if
|
|
the script produced no output."""
|
|
if os.environ.get("AMS_USE_PYGHIDRA"):
|
|
return run_extractor_pyghidra(dll_path, out_path, script_dir=script_dir)
|
|
|
|
headless = headless or find_headless()
|
|
if not headless:
|
|
raise GhidraNotFound(
|
|
"analyzeHeadless not found — set $GHIDRA_HEADLESS or $GHIDRA_HOME (Ghidra's install dir)")
|
|
|
|
script_dir = script_dir or os.environ.get("AMS_GHIDRA_SCRIPTS") or str(_SCRIPT_DIR)
|
|
out_path = os.path.abspath(out_path)
|
|
os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
|
|
|
|
proj_dir = tempfile.mkdtemp(prefix="ams_ghidra_")
|
|
proj_name = "ams_" + uuid.uuid4().hex[:8]
|
|
cmd = [
|
|
headless, proj_dir, proj_name,
|
|
"-import", dll_path,
|
|
"-scriptPath", script_dir,
|
|
"-postScript", _SCRIPT_NAME, out_path,
|
|
"-deleteProject",
|
|
]
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=timeout)
|
|
except subprocess.TimeoutExpired:
|
|
raise GhidraRunError("analyzeHeadless timed out after {0}s".format(timeout))
|
|
except OSError as e:
|
|
raise GhidraRunError("failed to launch analyzeHeadless: {0}".format(e))
|
|
finally:
|
|
shutil.rmtree(proj_dir, ignore_errors=True)
|
|
|
|
if not os.path.isfile(out_path):
|
|
tail = proc.stdout.decode("utf-8", "replace")[-2000:] if proc.stdout else ""
|
|
raise GhidraRunError(
|
|
"extractor produced no snapshot at {0}\n--- headless tail ---\n{1}".format(out_path, tail))
|
|
return out_path
|
|
|
|
|
|
def run_extractor_pyghidra(dll_path: str, out_path: str, *, script_dir: str | None = None) -> str:
|
|
"""Run the extractor through PyGhidra (CPython) instead of analyzeHeadless/Jython.
|
|
|
|
`pyghidra.run_script` boots Ghidra in-process, imports + auto-analyses the binary, and
|
|
executes our GhidraScript with `getScriptArgs() == [out_path]` - the same script, just under
|
|
CPython, so it works on Ghidra 11.4+/12.x where Jython is gone."""
|
|
os.environ.setdefault("GHIDRA_INSTALL_DIR", os.environ.get("GHIDRA_HOME", ""))
|
|
try:
|
|
import pyghidra
|
|
except ImportError:
|
|
raise GhidraNotFound(
|
|
"AMS_USE_PYGHIDRA is set but the 'pyghidra' package isn't installed (pip install pyghidra)")
|
|
|
|
script_dir = script_dir or os.environ.get("AMS_GHIDRA_SCRIPTS") or str(_SCRIPT_DIR)
|
|
script_path = os.path.join(script_dir, _SCRIPT_NAME)
|
|
out_path = os.path.abspath(out_path)
|
|
os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
|
|
|
|
proj_dir = tempfile.mkdtemp(prefix="ams_pyghidra_")
|
|
try:
|
|
pyghidra.run_script(
|
|
dll_path, script_path,
|
|
project_location=proj_dir, project_name="ams_" + uuid.uuid4().hex[:8],
|
|
script_args=[out_path], analyze=True, verbose=False,
|
|
)
|
|
except Exception as e: # jpype/Ghidra errors aren't a tidy hierarchy
|
|
raise GhidraRunError("pyghidra.run_script failed: {0}".format(e))
|
|
finally:
|
|
shutil.rmtree(proj_dir, ignore_errors=True)
|
|
|
|
if not os.path.isfile(out_path):
|
|
raise GhidraRunError("extractor produced no snapshot at {0} (PyGhidra path)".format(out_path))
|
|
return out_path
|