Similar versions: surface-overlap metric + endpoint + UI panel
Ranks catalogued engine versions by how much of their CMC_* surface they share,
which (unlike a binary fuzzy hash) stays meaningful across compilers — the golden
pair PIKLIB8/MSVC6 vs bloomoodll/MSVC8 scores 85%.
- similarity.py: jaccard, surface_similarity (per-axis + pooled overall),
fuzzy_similarity (ssdeep via ppdeep, secondary signal)
- service.similar_snapshots + GET /snapshots/{id}/similar?min=N (SimilarHit)
- UI: "Podobne wersje" panel in the snapshot browser (overlap bar + ⇄ diff)
- tests: 6 new (jaccard, identical/disjoint, golden pair 0<x<100, fuzzy,
endpoint + min filter) -> 28/28
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -39,3 +39,20 @@ def get_snapshot(snapshot_id: int, db: Session = Depends(get_db)) -> models.Snap
|
||||
if snap is None:
|
||||
raise HTTPException(404, "snapshot not found")
|
||||
return snap
|
||||
|
||||
|
||||
@router.get("/{snapshot_id}/similar", response_model=list[schemas.SimilarHit])
|
||||
def similar_snapshots(
|
||||
snapshot_id: int,
|
||||
min: int = Query(0, ge=0, le=100, description="drop hits below this overall score"),
|
||||
db: Session = Depends(get_db),
|
||||
) -> list[schemas.SimilarHit]:
|
||||
hits = service.similar_snapshots(db, snapshot_id, minimum=min)
|
||||
if hits is None:
|
||||
raise HTTPException(404, "snapshot not found")
|
||||
return [
|
||||
schemas.SimilarHit(
|
||||
snapshot=schemas.SnapshotOut.model_validate(snap),
|
||||
overall=score["overall"], fuzzy=score["fuzzy"], axes=score["axes"])
|
||||
for snap, score in hits
|
||||
]
|
||||
|
||||
@@ -43,6 +43,13 @@ class GameDetail(GameOut):
|
||||
snapshots: list[SnapshotOut] = []
|
||||
|
||||
|
||||
class SimilarHit(BaseModel):
|
||||
snapshot: SnapshotOut
|
||||
overall: int # pooled surface-overlap score 0–100
|
||||
fuzzy: int | None # ssdeep similarity of the raw binary, when available
|
||||
axes: dict # per-axis {shared, only_a, only_b, score}
|
||||
|
||||
|
||||
class JobOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
id: int
|
||||
|
||||
@@ -42,6 +42,30 @@ def _get_or_create_game(db: Session, name: str) -> models.Game:
|
||||
return game
|
||||
|
||||
|
||||
def similar_snapshots(
|
||||
db: Session, snapshot_id: int, minimum: int = 0
|
||||
) -> list[tuple[models.Snapshot, dict]]:
|
||||
"""Rank every other catalogued snapshot against #snapshot_id by surface similarity.
|
||||
|
||||
Returns (snapshot, score) pairs (score = ams.similarity report) sorted by `overall` desc,
|
||||
dropping anything below `minimum`. Returns None if the target doesn't exist."""
|
||||
from ..similarity import similarity
|
||||
from ..snapshot import Snapshot as Surface
|
||||
|
||||
target = db.get(models.Snapshot, snapshot_id)
|
||||
if target is None:
|
||||
return None
|
||||
t_surface = Surface(target.data)
|
||||
|
||||
hits: list[tuple[models.Snapshot, dict]] = []
|
||||
for other in db.scalars(select(models.Snapshot).where(models.Snapshot.id != snapshot_id)):
|
||||
score = similarity(t_surface, Surface(other.data))
|
||||
if score["overall"] >= minimum:
|
||||
hits.append((other, score))
|
||||
hits.sort(key=lambda pair: pair[1]["overall"], reverse=True)
|
||||
return hits
|
||||
|
||||
|
||||
def import_snapshot(db: Session, data: dict[str, Any], game_name: str | None = None) -> models.Snapshot:
|
||||
"""Upsert a snapshot, deduped by the binary's sha256 (falling back to a content hash)."""
|
||||
sha = data.get("binary", {}).get("sha256") or _content_sha(data)
|
||||
|
||||
@@ -273,6 +273,9 @@ async function browse(id) {
|
||||
];
|
||||
out.innerHTML = "";
|
||||
out.append(el("div", { class: "diff-head" }, "Przegląd: ", el("b", {}, `${snap.binary_name} [${snap.engine}/${snap.compiler}]`)));
|
||||
const simBox = el("div", { class: "similar" });
|
||||
out.append(simBox);
|
||||
loadSimilar(id, simBox);
|
||||
const filter = el("input", { class: "owner browse-filter", placeholder: "filtruj…", oninput: () => render() });
|
||||
const tabbar = el("div", {});
|
||||
const list = el("div", {});
|
||||
@@ -292,6 +295,27 @@ async function browse(id) {
|
||||
render();
|
||||
}
|
||||
|
||||
async function loadSimilar(targetId, box) {
|
||||
let hits;
|
||||
try { hits = await jget("/snapshots/" + targetId + "/similar"); }
|
||||
catch { return; } // endpoint absent / single-snapshot catalog — just show nothing
|
||||
if (!hits.length) return;
|
||||
box.append(el("div", { class: "similar-title" }, "Podobne wersje (overlap powierzchni)"));
|
||||
for (const h of hits.slice(0, 6)) {
|
||||
const s = h.snapshot;
|
||||
const bar = el("span", { class: "simbar" },
|
||||
el("span", { class: "simfill", style: "width:" + h.overall + "%" }));
|
||||
box.append(el("div", { class: "simrow" },
|
||||
el("span", { class: "simscore" }, h.overall + "%"),
|
||||
bar,
|
||||
el("span", { class: "simname", title: "przejrzyj", onclick: () => browse(s.id) },
|
||||
`${s.binary_name} [${s.engine || "?"}]`),
|
||||
h.fuzzy != null ? el("span", { class: "simfuzzy", title: "ssdeep binarki" }, "fuzzy " + h.fuzzy) : null,
|
||||
el("span", { class: "simdiff", title: "porównaj tę wersję z aktualną",
|
||||
onclick: () => { state.a = targetId; state.b = s.id; refreshSelection(); compare(); } }, "⇄ diff")));
|
||||
}
|
||||
}
|
||||
|
||||
// --- boot -------------------------------------------------------------------------------------
|
||||
$("compare").addEventListener("click", compare);
|
||||
$("owner").addEventListener("keydown", (e) => { if (e.key === "Enter") compare(); });
|
||||
|
||||
@@ -100,6 +100,19 @@ body { background: var(--bg); color: var(--fg); font: 13px/1.45 var(--mono); }
|
||||
.empty { color: var(--dim); font-style: italic; }
|
||||
.moved { color: var(--accent); }
|
||||
|
||||
.similar { margin: 4px 0 16px; }
|
||||
.similar-title { color: var(--dim); text-transform: uppercase; font-size: 11px; letter-spacing: 1px; margin-bottom: 6px; }
|
||||
.simrow { display: flex; align-items: center; gap: 10px; padding: 4px 0; }
|
||||
.simscore { width: 38px; text-align: right; color: var(--accent); font-weight: 600; }
|
||||
.simbar { flex: 0 0 120px; height: 7px; background: #16202c; border: 1px solid var(--border);
|
||||
border-radius: 4px; overflow: hidden; }
|
||||
.simfill { display: block; height: 100%; background: linear-gradient(90deg, var(--accent2), var(--add)); }
|
||||
.simname { flex: 1; min-width: 0; color: var(--fg); cursor: pointer; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
||||
.simname:hover { color: var(--accent); text-decoration: underline; }
|
||||
.simfuzzy { color: var(--dim); font-size: 11px; }
|
||||
.simdiff { color: var(--accent); cursor: pointer; font-size: 11px; }
|
||||
.simdiff:hover { text-decoration: underline; }
|
||||
|
||||
.browse-filter { margin-bottom: 10px; }
|
||||
.btab { display: inline-block; padding: 4px 10px; margin-right: 6px; border: 1px solid var(--border);
|
||||
border-radius: 6px; cursor: pointer; color: var(--dim); }
|
||||
|
||||
Reference in New Issue
Block a user