From aa65beb7c17326f9a9cf786d6e4e583ceae28cbe Mon Sep 17 00:00:00 2001 From: Patryk Gensch <43010113+patryk025@users.noreply.github.com> Date: Sun, 31 May 2026 17:57:17 +0200 Subject: [PATCH] worker: pin Jython-era Ghidra (11.2.1) for headless .py post-script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ghidra 11.4+/12.x dropped the bundled Jython, so the .py extractor fails headless with "Ghidra was not started with PyGhidra. Python is not available" — analysis succeeds but the post-script never runs, so no snapshot is produced. Default GHIDRA_URL now points at 11.2.1 (Jython); README documents the constraint and the PyGhidra path for staying on 12.x. Keeps the local Dockerfile fixes (pip upgrade, non-editable install). Co-Authored-By: Claude Opus 4.8 --- README.md | 12 ++++++++++-- docker/api.Dockerfile | 5 ++--- docker/worker.Dockerfile | 13 +++++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c2ce3e9..19746ff 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,19 @@ Ghidra headless + `extract_engine_surface.py` → snapshot → import do Postgre Worker (`docker/worker.Dockerfile`, `eclipse-temurin:21-jdk`) pobiera Ghidrę i ustawia `GHIDRA_HOME=/opt/ghidra`. Wersja jest przypięta w `ARG GHIDRA_URL`. Jeśli build padnie na pobieraniu, nadpisz URL realnym wydaniem z [releases NSA](https://github.com/NationalSecurityAgency/ghidra/releases) -(nazwa pliku: `ghidra__PUBLIC_.zip`): +(nazwa pliku: `ghidra__PUBLIC_.zip`). + +> **Musi to być Ghidra ≤ 11.3.x.** Ekstraktor to skrypt **Pythona (`.py`)**, który Ghidra w trybie +> headless uruchamia przez wbudowanego **Jythona**. Ghidra **11.4+ / 12.x usunęły Jythona** — tam +> `.py` headless wymaga **PyGhidry** (CPython), której ten obraz nie inicjalizuje, i dostaniesz +> `Ghidra was not started with PyGhidra. Python is not available` (analiza przejdzie, ale post-skrypt +> nie wyemituje snapshotu). Domyślny `GHIDRA_URL` celuje w 11.2.1 (z Jythonem). Chcesz zostać na 12.x? +> Trzeba doinstalować `pyghidra` i odpalać headless przez PyGhidrę — sam skrypt jest CPython-kompatybilny, +> więc zadziała, gdy interpreter wstanie (patrz dokumentacja PyGhidra w danej wersji Ghidry). ```bash docker compose build worker \ - --build-arg GHIDRA_URL=https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.3.2_build/ghidra_11.3.2_PUBLIC_20250415.zip + --build-arg GHIDRA_URL=https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.2.1_build/ghidra_11.2.1_PUBLIC_20241105.zip docker compose up ``` diff --git a/docker/api.Dockerfile b/docker/api.Dockerfile index 350d25e..606a144 100644 --- a/docker/api.Dockerfile +++ b/docker/api.Dockerfile @@ -9,9 +9,8 @@ COPY ams ./ams COPY ghidra_scripts ./ghidra_scripts COPY snapshots ./snapshots -# Editable install keeps ams + ghidra_scripts co-located (the worker resolves the script -# path relative to the package). The API needs the queue client too, to enqueue jobs. -RUN pip install --no-cache-dir -e ".[api]" rq redis "psycopg[binary]>=3.1" +# The API needs the queue client too, to enqueue jobs. +RUN pip install --no-cache-dir ".[api]" rq redis "psycopg[binary]>=3.1" ENV AMS_UPLOAD_DIR=/data/uploads EXPOSE 8000 diff --git a/docker/worker.Dockerfile b/docker/worker.Dockerfile index 62224c2..422928e 100644 --- a/docker/worker.Dockerfile +++ b/docker/worker.Dockerfile @@ -3,9 +3,16 @@ # # Override the Ghidra build without editing this file: # docker build --build-arg GHIDRA_URL=https://github.com/.../ghidra_X_PUBLIC_DATE.zip ... +# +# IMPORTANT: the extractor is a Python (.py) headless post-script, which Ghidra runs via its +# bundled **Jython**. Ghidra 11.4+ / 12.x REMOVED Jython - there `.py` headless needs PyGhidra +# (CPython), which this image doesn't initialise, and you'll get: +# "Ghidra was not started with PyGhidra. Python is not available" +# So pin a Jython-era release (<= 11.3.x). If this URL 404s, copy the exact filename from +# https://github.com/NationalSecurityAgency/ghidra/releases (form: ghidra__PUBLIC_.zip). FROM eclipse-temurin:21-jdk-jammy -ARG GHIDRA_URL=https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.3_build/ghidra_11.3_PUBLIC_20250205.zip +ARG GHIDRA_URL=https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.2.1_build/ghidra_11.2.1_PUBLIC_20241105.zip # Runtime deps: python (the package), unzip/wget (fetch Ghidra), libarchive-tools (bsdtar: # unpacks ISO9660 + ZIP game archives). @@ -19,6 +26,8 @@ RUN wget -q "$GHIDRA_URL" -O /tmp/ghidra.zip \ && mv /opt/ghidra_* /opt/ghidra \ && rm /tmp/ghidra.zip +RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel + ENV GHIDRA_HOME=/opt/ghidra ENV AMS_GHIDRA_SCRIPTS=/app/ghidra_scripts ENV AMS_UPLOAD_DIR=/data/uploads @@ -29,7 +38,7 @@ COPY ams ./ams COPY ghidra_scripts ./ghidra_scripts COPY snapshots ./snapshots -RUN pip3 install --no-cache-dir -e ".[api,acquire,worker]" +RUN pip3 install --no-cache-dir ".[api,acquire,worker]" # Drain the 'acquire' queue. Shell form so $REDIS_URL expands at runtime. CMD rq worker --url "${REDIS_URL:-redis://redis:6379/0}" acquire