Containerise: Postgres + Redis/RQ + API + Ghidra worker
Brings up the documented target architecture as a docker-compose stack — a modular monolith with the Ghidra step split into its own async worker. - worker/: RQ queue (lazy redis import) + run_acquisition task (Job status queued→started→finished/failed, drives ams.acquire with sink=db) - Job model + JobOut schema; Snapshot.data is JSONB on Postgres - POST/GET /jobs: stream an upload to a shared volume, enqueue, poll status - docker/api.Dockerfile (slim) + docker/worker.Dockerfile (JDK21 + Ghidra fetched at build, overridable via GHIDRA_URL) + docker-compose.yml - ghidra.py: AMS_GHIDRA_SCRIPTS override for in-container script path - pyproject: [worker] extra (rq/redis/psycopg), python-multipart in [api] - tests: 4 new (task success/failure + endpoint enqueue/503) -> 22/22 Verified: API image builds, container serves /health + /ui + /jobs; compose config validates. Worker image (downloads ~1 GB Ghidra) not built here. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -15,7 +15,7 @@ from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from .. import __version__
|
||||
from .db import configure, init_db
|
||||
from .routes import diff, games, snapshots
|
||||
from .routes import diff, games, jobs, snapshots
|
||||
|
||||
_STATIC = Path(__file__).parent / "static"
|
||||
|
||||
@@ -28,6 +28,7 @@ def create_app(database_url: str | None = None) -> FastAPI:
|
||||
app.include_router(games.router)
|
||||
app.include_router(snapshots.router)
|
||||
app.include_router(diff.router)
|
||||
app.include_router(jobs.router)
|
||||
|
||||
@app.get("/health", tags=["meta"])
|
||||
def health() -> dict[str, str]:
|
||||
|
||||
@@ -7,10 +7,14 @@ from __future__ import annotations
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import ForeignKey, JSON, String, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from .db import Base
|
||||
|
||||
# JSONB on Postgres (indexable, typed), plain JSON elsewhere (e.g. SQLite in dev/tests).
|
||||
_JSON = JSON().with_variant(JSONB, "postgresql")
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
@@ -46,6 +50,33 @@ class Snapshot(Base):
|
||||
n_fields: Mapped[int] = mapped_column(default=0)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(default=_utcnow)
|
||||
data: Mapped[dict] = mapped_column(JSON)
|
||||
data: Mapped[dict] = mapped_column(_JSON)
|
||||
|
||||
game: Mapped["Game | None"] = relationship(back_populates="snapshots")
|
||||
|
||||
|
||||
class Job(Base):
|
||||
"""An acquisition job: an uploaded archive/DLL handed to the Ghidra worker.
|
||||
|
||||
The API row is the durable source of truth (survives Redis); `rq_id` links to the
|
||||
transient RQ job. The worker walks status queued → started → finished/failed and,
|
||||
on success, points `snapshot_id` at the catalog entry it produced."""
|
||||
|
||||
__tablename__ = "jobs"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
rq_id: Mapped[str | None] = mapped_column(String, default=None, index=True)
|
||||
status: Mapped[str] = mapped_column(String, default="queued", index=True)
|
||||
|
||||
source_name: Mapped[str] = mapped_column(String) # original upload filename
|
||||
source_path: Mapped[str] = mapped_column(String) # path on the shared volume
|
||||
game_name: Mapped[str | None] = mapped_column(String, default=None)
|
||||
|
||||
snapshot_id: Mapped[int | None] = mapped_column(ForeignKey("snapshots.id"), default=None)
|
||||
dll_name: Mapped[str | None] = mapped_column(String, default=None)
|
||||
error: Mapped[str | None] = mapped_column(String, default=None)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(default=_utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(default=_utcnow, onupdate=_utcnow)
|
||||
|
||||
snapshot: Mapped["Snapshot | None"] = relationship()
|
||||
|
||||
70
ams/api/routes/jobs.py
Normal file
70
ams/api/routes/jobs.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Acquisition jobs: upload a game archive/DLL, hand it to the Ghidra worker, poll status.
|
||||
|
||||
The upload is streamed to a shared volume ($AMS_UPLOAD_DIR) that the worker container also
|
||||
mounts; only the path travels through Redis, not the bytes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .. import models, schemas
|
||||
from ..db import get_db
|
||||
|
||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||
|
||||
UPLOAD_DIR = os.environ.get("AMS_UPLOAD_DIR", "./uploads")
|
||||
|
||||
|
||||
def _save_upload(upload: UploadFile) -> tuple[str, str]:
|
||||
"""Stream the upload to the shared dir under a unique name; return (path, original_name)."""
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
original = os.path.basename(upload.filename or "upload.bin")
|
||||
dest = os.path.join(UPLOAD_DIR, "{0}_{1}".format(uuid.uuid4().hex[:8], original))
|
||||
with open(dest, "wb") as fh:
|
||||
shutil.copyfileobj(upload.file, fh)
|
||||
return dest, original
|
||||
|
||||
|
||||
@router.post("", response_model=schemas.JobOut, status_code=202)
|
||||
def create_job(
|
||||
file: UploadFile = File(..., description="an ISO/ZIP archive or a loose engine DLL"),
|
||||
game: str | None = Form(None, description="link the resulting snapshot to this game"),
|
||||
db: Session = Depends(get_db),
|
||||
) -> models.Job:
|
||||
path, original = _save_upload(file)
|
||||
job = models.Job(source_name=original, source_path=path, game_name=game, status="queued")
|
||||
db.add(job)
|
||||
db.commit()
|
||||
db.refresh(job)
|
||||
|
||||
try:
|
||||
from ...worker.queue import enqueue_acquisition
|
||||
|
||||
job.rq_id = enqueue_acquisition(path, game, job.id)
|
||||
db.commit()
|
||||
db.refresh(job)
|
||||
except Exception as exc: # Redis/RQ down or missing — surface, don't leave a phantom job
|
||||
job.status = "failed"
|
||||
job.error = "enqueue failed: {0}".format(exc)
|
||||
db.commit()
|
||||
raise HTTPException(503, job.error)
|
||||
return job
|
||||
|
||||
|
||||
@router.get("", response_model=list[schemas.JobOut])
|
||||
def list_jobs(db: Session = Depends(get_db)) -> list[models.Job]:
|
||||
return list(db.scalars(select(models.Job).order_by(models.Job.id.desc())))
|
||||
|
||||
|
||||
@router.get("/{job_id}", response_model=schemas.JobOut)
|
||||
def get_job(job_id: int, db: Session = Depends(get_db)) -> models.Job:
|
||||
job = db.get(models.Job, job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "job not found")
|
||||
return job
|
||||
@@ -41,3 +41,17 @@ class SnapshotDetail(SnapshotOut):
|
||||
|
||||
class GameDetail(GameOut):
|
||||
snapshots: list[SnapshotOut] = []
|
||||
|
||||
|
||||
class JobOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
id: int
|
||||
rq_id: str | None
|
||||
status: str
|
||||
source_name: str
|
||||
game_name: str | None
|
||||
snapshot_id: int | None
|
||||
dll_name: str | None
|
||||
error: str | None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
Reference in New Issue
Block a user