Containerise: Postgres + Redis/RQ + API + Ghidra worker

Brings up the documented target architecture as a docker-compose stack — a
modular monolith with the Ghidra step split into its own async worker.

- worker/: RQ queue (lazy redis import) + run_acquisition task (Job status
  queued→started→finished/failed, drives ams.acquire with sink=db)
- Job model + JobOut schema; Snapshot.data is JSONB on Postgres
- POST/GET /jobs: stream an upload to a shared volume, enqueue, poll status
- docker/api.Dockerfile (slim) + docker/worker.Dockerfile (JDK21 + Ghidra
  fetched at build, overridable via GHIDRA_URL) + docker-compose.yml
- ghidra.py: AMS_GHIDRA_SCRIPTS override for in-container script path
- pyproject: [worker] extra (rq/redis/psycopg), python-multipart in [api]
- tests: 4 new (task success/failure + endpoint enqueue/503) -> 22/22

Verified: API image builds, container serves /health + /ui + /jobs; compose
config validates. Worker image (downloads ~1 GB Ghidra) not built here.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Patryk Gensch
2026-05-31 12:24:47 +02:00
parent 6797ad5ddb
commit f4aa7caaa9
15 changed files with 511 additions and 3 deletions

View File

@@ -15,7 +15,7 @@ from fastapi.staticfiles import StaticFiles
from .. import __version__
from .db import configure, init_db
from .routes import diff, games, snapshots
from .routes import diff, games, jobs, snapshots
_STATIC = Path(__file__).parent / "static"
@@ -28,6 +28,7 @@ def create_app(database_url: str | None = None) -> FastAPI:
app.include_router(games.router)
app.include_router(snapshots.router)
app.include_router(diff.router)
app.include_router(jobs.router)
@app.get("/health", tags=["meta"])
def health() -> dict[str, str]:

View File

@@ -7,10 +7,14 @@ from __future__ import annotations
from datetime import datetime, timezone
from sqlalchemy import ForeignKey, JSON, String, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from .db import Base
# JSONB on Postgres (indexable, typed), plain JSON elsewhere (e.g. SQLite in dev/tests).
_JSON = JSON().with_variant(JSONB, "postgresql")
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
@@ -46,6 +50,33 @@ class Snapshot(Base):
n_fields: Mapped[int] = mapped_column(default=0)
created_at: Mapped[datetime] = mapped_column(default=_utcnow)
data: Mapped[dict] = mapped_column(JSON)
data: Mapped[dict] = mapped_column(_JSON)
game: Mapped["Game | None"] = relationship(back_populates="snapshots")
class Job(Base):
"""An acquisition job: an uploaded archive/DLL handed to the Ghidra worker.
The API row is the durable source of truth (survives Redis); `rq_id` links to the
transient RQ job. The worker walks status queued → started → finished/failed and,
on success, points `snapshot_id` at the catalog entry it produced."""
__tablename__ = "jobs"
id: Mapped[int] = mapped_column(primary_key=True)
rq_id: Mapped[str | None] = mapped_column(String, default=None, index=True)
status: Mapped[str] = mapped_column(String, default="queued", index=True)
source_name: Mapped[str] = mapped_column(String) # original upload filename
source_path: Mapped[str] = mapped_column(String) # path on the shared volume
game_name: Mapped[str | None] = mapped_column(String, default=None)
snapshot_id: Mapped[int | None] = mapped_column(ForeignKey("snapshots.id"), default=None)
dll_name: Mapped[str | None] = mapped_column(String, default=None)
error: Mapped[str | None] = mapped_column(String, default=None)
created_at: Mapped[datetime] = mapped_column(default=_utcnow)
updated_at: Mapped[datetime] = mapped_column(default=_utcnow, onupdate=_utcnow)
snapshot: Mapped["Snapshot | None"] = relationship()

70
ams/api/routes/jobs.py Normal file
View File

@@ -0,0 +1,70 @@
"""Acquisition jobs: upload a game archive/DLL, hand it to the Ghidra worker, poll status.
The upload is streamed to a shared volume ($AMS_UPLOAD_DIR) that the worker container also
mounts; only the path travels through Redis, not the bytes."""
from __future__ import annotations
import os
import shutil
import uuid
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from sqlalchemy import select
from sqlalchemy.orm import Session
from .. import models, schemas
from ..db import get_db
router = APIRouter(prefix="/jobs", tags=["jobs"])
UPLOAD_DIR = os.environ.get("AMS_UPLOAD_DIR", "./uploads")
def _save_upload(upload: UploadFile) -> tuple[str, str]:
"""Stream the upload to the shared dir under a unique name; return (path, original_name)."""
os.makedirs(UPLOAD_DIR, exist_ok=True)
original = os.path.basename(upload.filename or "upload.bin")
dest = os.path.join(UPLOAD_DIR, "{0}_{1}".format(uuid.uuid4().hex[:8], original))
with open(dest, "wb") as fh:
shutil.copyfileobj(upload.file, fh)
return dest, original
@router.post("", response_model=schemas.JobOut, status_code=202)
def create_job(
file: UploadFile = File(..., description="an ISO/ZIP archive or a loose engine DLL"),
game: str | None = Form(None, description="link the resulting snapshot to this game"),
db: Session = Depends(get_db),
) -> models.Job:
path, original = _save_upload(file)
job = models.Job(source_name=original, source_path=path, game_name=game, status="queued")
db.add(job)
db.commit()
db.refresh(job)
try:
from ...worker.queue import enqueue_acquisition
job.rq_id = enqueue_acquisition(path, game, job.id)
db.commit()
db.refresh(job)
except Exception as exc: # Redis/RQ down or missing — surface, don't leave a phantom job
job.status = "failed"
job.error = "enqueue failed: {0}".format(exc)
db.commit()
raise HTTPException(503, job.error)
return job
@router.get("", response_model=list[schemas.JobOut])
def list_jobs(db: Session = Depends(get_db)) -> list[models.Job]:
return list(db.scalars(select(models.Job).order_by(models.Job.id.desc())))
@router.get("/{job_id}", response_model=schemas.JobOut)
def get_job(job_id: int, db: Session = Depends(get_db)) -> models.Job:
job = db.get(models.Job, job_id)
if job is None:
raise HTTPException(404, "job not found")
return job

View File

@@ -41,3 +41,17 @@ class SnapshotDetail(SnapshotOut):
class GameDetail(GameOut):
snapshots: list[SnapshotOut] = []
class JobOut(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: int
rq_id: str | None
status: str
source_name: str
game_name: str | None
snapshot_id: int | None
dll_name: str | None
error: str | None
created_at: datetime
updated_at: datetime