Files
Aidem-Media-DLL-Analysis/ams/acquire/cli.py
Patryk Gensch 6797ad5ddb Add ISO/ZIP acquisition pipeline (ams.acquire worker)
Closes the chain from a game file to a catalog entry: unpack an ISO/ZIP,
content-identify the engine DLL (CMC_ObjectsContainer marker in RTTI, so a
renamed file is still found), hash it (sha256 + md5 + optional ssdeep via
ppdeep), run Ghidra headless with the extractor, enrich and import the snapshot.

- unpack.py: bsdtar (ISO9660 + ZIP) with a pure-Python zipfile fallback
- identify.py: content-based engine-DLL picker + hashing
- ghidra.py: analyzeHeadless launcher discovery + post-script run
- pipeline.py: orchestration with injectable extract_fn; sink db|http|none
- cli.py: python -m ams.acquire (incl. --identify-only dry run)
- tests: 7 new (forged PE markers + stubbed extractor) -> 18/18

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 12:11:56 +02:00

88 lines
3.8 KiB
Python

"""Acquire a game from an ISO/ZIP (or loose DLL) into the catalog.
python -m ams.acquire game.iso --game "Reksio i UFO"
python -m ams.acquire dump_dir --game "Reksio i UFO" --sink http --post http://127.0.0.1:8000
python -m ams.acquire PIKLIB8.dll --no-ghidra-check # identify+hash only, skip the heavy pass
"""
from __future__ import annotations
import argparse
import sys
from . import acquire
from .ghidra import GhidraNotFound, find_headless
from .identify import find_engine_dlls, hash_file
from .pipeline import AcquireError
from .unpack import detect_kind, unpack
def _cmd_identify(args) -> int:
"""Dry run: unpack (if needed), list engine-DLL candidates, hash the best one."""
kind = detect_kind(args.source)
root = unpack(args.source) if kind in ("iso", "zip") else args.source
cands = find_engine_dlls(root)
if not cands:
print("[!] no engine DLL found under {0}".format(root))
return 1
for i, c in enumerate(cands):
mark = "*" if i == 0 else " "
print("{0} score={1:3d} engine={2:<8} {3} ({4})".format(
mark, c.score, c.engine or "?", c.path, c.reason))
h = hash_file(cands[0].path)
print("\nbest: {0}\n sha256 {1}\n md5 {2}\n size {3}\n fuzzy {4}".format(
cands[0].path, h.sha256, h.md5, h.size, h.fuzzy or "(install ppdeep for fuzzy hashing)"))
return 0
def main(argv: list[str] | None = None) -> int:
p = argparse.ArgumentParser(prog="ams-acquire", description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("source", help="ISO/ZIP archive, an unpacked directory, or a loose engine DLL")
p.add_argument("--game", help="link the snapshot to this game (created if missing)")
p.add_argument("--out-dir", help="where to write the snapshot.json (default: repo snapshots/)")
p.add_argument("--sink", choices=("db", "http", "none"), default="db",
help="where the snapshot lands (default: db)")
p.add_argument("--post", dest="post_url", help="base URL for --sink http (e.g. http://127.0.0.1:8000)")
p.add_argument("--keep", action="store_true", help="keep the temp unpack dir")
p.add_argument("--identify-only", action="store_true",
help="just unpack+identify+hash; don't run Ghidra or import")
args = p.parse_args(argv)
if args.identify_only:
try:
return _cmd_identify(args)
except (AcquireError, OSError) as e:
print("[!] {0}".format(e))
return 1
if args.sink != "none" and find_headless() is None:
print("[i] Ghidra headless not configured (set $GHIDRA_HEADLESS or $GHIDRA_HOME).")
print(" Run with --identify-only to test unpack/identify/hash without Ghidra.")
try:
r = acquire(args.source, args.game, out_dir=args.out_dir,
sink=args.sink, post_url=args.post_url, keep_workdir=args.keep)
except GhidraNotFound as e:
print("[!] {0}".format(e))
return 2
except (AcquireError, OSError) as e:
print("[!] {0}".format(e))
return 1
print("[+] DLL: {0} (engine={1}, {2})".format(r.dll, r.engine, r.candidate.reason))
print(" sha256 {0} size {1} fuzzy {2}".format(
r.hashes.sha256, r.hashes.size, r.hashes.fuzzy or "-"))
print(" snapshot: {0}".format(r.snapshot_path))
b = r.snapshot.get("binary", {})
print(" types={0} methods={1} events={2} fields={3}".format(
len(r.snapshot.get("types", [])), len(r.snapshot.get("methods", [])),
len(r.snapshot.get("events", [])), len(r.snapshot.get("fields", []))))
if r.sink != "none":
print(" → catalog ({0}) id={1} game={2}".format(r.sink, r.imported_id, args.game or "-"))
return 0
if __name__ == "__main__":
sys.exit(main())