Closes the chain from a game file to a catalog entry: unpack an ISO/ZIP, content-identify the engine DLL (CMC_ObjectsContainer marker in RTTI, so a renamed file is still found), hash it (sha256 + md5 + optional ssdeep via ppdeep), run Ghidra headless with the extractor, enrich and import the snapshot. - unpack.py: bsdtar (ISO9660 + ZIP) with a pure-Python zipfile fallback - identify.py: content-based engine-DLL picker + hashing - ghidra.py: analyzeHeadless launcher discovery + post-script run - pipeline.py: orchestration with injectable extract_fn; sink db|http|none - cli.py: python -m ams.acquire (incl. --identify-only dry run) - tests: 7 new (forged PE markers + stubbed extractor) -> 18/18 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
88 lines
3.8 KiB
Python
88 lines
3.8 KiB
Python
"""Acquire a game from an ISO/ZIP (or loose DLL) into the catalog.
|
|
|
|
python -m ams.acquire game.iso --game "Reksio i UFO"
|
|
python -m ams.acquire dump_dir --game "Reksio i UFO" --sink http --post http://127.0.0.1:8000
|
|
python -m ams.acquire PIKLIB8.dll --no-ghidra-check # identify+hash only, skip the heavy pass
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import sys
|
|
|
|
from . import acquire
|
|
from .ghidra import GhidraNotFound, find_headless
|
|
from .identify import find_engine_dlls, hash_file
|
|
from .pipeline import AcquireError
|
|
from .unpack import detect_kind, unpack
|
|
|
|
|
|
def _cmd_identify(args) -> int:
|
|
"""Dry run: unpack (if needed), list engine-DLL candidates, hash the best one."""
|
|
kind = detect_kind(args.source)
|
|
root = unpack(args.source) if kind in ("iso", "zip") else args.source
|
|
cands = find_engine_dlls(root)
|
|
if not cands:
|
|
print("[!] no engine DLL found under {0}".format(root))
|
|
return 1
|
|
for i, c in enumerate(cands):
|
|
mark = "*" if i == 0 else " "
|
|
print("{0} score={1:3d} engine={2:<8} {3} ({4})".format(
|
|
mark, c.score, c.engine or "?", c.path, c.reason))
|
|
h = hash_file(cands[0].path)
|
|
print("\nbest: {0}\n sha256 {1}\n md5 {2}\n size {3}\n fuzzy {4}".format(
|
|
cands[0].path, h.sha256, h.md5, h.size, h.fuzzy or "(install ppdeep for fuzzy hashing)"))
|
|
return 0
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
p = argparse.ArgumentParser(prog="ams-acquire", description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
p.add_argument("source", help="ISO/ZIP archive, an unpacked directory, or a loose engine DLL")
|
|
p.add_argument("--game", help="link the snapshot to this game (created if missing)")
|
|
p.add_argument("--out-dir", help="where to write the snapshot.json (default: repo snapshots/)")
|
|
p.add_argument("--sink", choices=("db", "http", "none"), default="db",
|
|
help="where the snapshot lands (default: db)")
|
|
p.add_argument("--post", dest="post_url", help="base URL for --sink http (e.g. http://127.0.0.1:8000)")
|
|
p.add_argument("--keep", action="store_true", help="keep the temp unpack dir")
|
|
p.add_argument("--identify-only", action="store_true",
|
|
help="just unpack+identify+hash; don't run Ghidra or import")
|
|
args = p.parse_args(argv)
|
|
|
|
if args.identify_only:
|
|
try:
|
|
return _cmd_identify(args)
|
|
except (AcquireError, OSError) as e:
|
|
print("[!] {0}".format(e))
|
|
return 1
|
|
|
|
if args.sink != "none" and find_headless() is None:
|
|
print("[i] Ghidra headless not configured (set $GHIDRA_HEADLESS or $GHIDRA_HOME).")
|
|
print(" Run with --identify-only to test unpack/identify/hash without Ghidra.")
|
|
|
|
try:
|
|
r = acquire(args.source, args.game, out_dir=args.out_dir,
|
|
sink=args.sink, post_url=args.post_url, keep_workdir=args.keep)
|
|
except GhidraNotFound as e:
|
|
print("[!] {0}".format(e))
|
|
return 2
|
|
except (AcquireError, OSError) as e:
|
|
print("[!] {0}".format(e))
|
|
return 1
|
|
|
|
print("[+] DLL: {0} (engine={1}, {2})".format(r.dll, r.engine, r.candidate.reason))
|
|
print(" sha256 {0} size {1} fuzzy {2}".format(
|
|
r.hashes.sha256, r.hashes.size, r.hashes.fuzzy or "-"))
|
|
print(" snapshot: {0}".format(r.snapshot_path))
|
|
b = r.snapshot.get("binary", {})
|
|
print(" types={0} methods={1} events={2} fields={3}".format(
|
|
len(r.snapshot.get("types", [])), len(r.snapshot.get("methods", [])),
|
|
len(r.snapshot.get("events", [])), len(r.snapshot.get("fields", []))))
|
|
if r.sink != "none":
|
|
print(" → catalog ({0}) id={1} game={2}".format(r.sink, r.imported_id, args.game or "-"))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|