fix(qdrant): complete optimization gaps from v1

- qdrant/main.py: search/file now accepts hnsw_ef, exact, indexed_only form fields
  (was silently ignoring them, using server defaults only)
- qdrant/main.py: add GET /inspect endpoint — full diagnostic summary for all
  collections: HNSW, optimizer, quantization, segment count, payload index coverage,
  raw RAM estimate (vectors * dim * 4B * 1.5)
- gateway/main.py: vectors/search/file now forwards hnsw_ef, exact, indexed_only
- gateway/main.py: add GET /vectors/inspect proxy
This commit is contained in:
2026-03-31 20:01:52 +02:00
parent c7ea347e2b
commit 609485a0f0
2 changed files with 90 additions and 2 deletions

View File

@@ -210,6 +210,79 @@ def health():
return {"status": "error", "detail": str(e)}
@app.get("/inspect")
def inspect():
"""Return a full diagnostic summary for every collection.
Covers: vector counts, segment counts, HNSW config, optimizer config,
quantization, payload indexes and their coverage. Designed for production
health checks and the Qdrant optimization workflow.
"""
try:
all_collections = client.get_collections().collections
except Exception as exc:
return {"status": "error", "detail": str(exc)}
result = {}
for col_desc in all_collections:
name = col_desc.name
try:
info = client.get_collection(name)
cfg = info.config
hnsw = cfg.hnsw_config
opt = cfg.optimizer_config
quant = cfg.quantization_config
params = cfg.params
# Estimate raw RAM footprint: vectors * dim * 4 bytes * 1.5 safety factor
vec_count = info.vectors_count or 0
vec_dim = (
params.vectors.size
if hasattr(params.vectors, "size")
else VECTOR_DIM
)
ram_estimate_mb = round(vec_count * vec_dim * 4 * 1.5 / 1_048_576, 1)
result[name] = {
"status": info.status.value if info.status else None,
"optimizer_status": str(info.optimizer_status) if info.optimizer_status else None,
"vectors_count": vec_count,
"indexed_vectors_count": info.indexed_vectors_count,
"points_count": info.points_count,
"segments_count": info.segments_count,
"ram_estimate_mb": ram_estimate_mb,
"hnsw": {
"m": hnsw.m,
"ef_construct": hnsw.ef_construct,
"on_disk": hnsw.on_disk,
"full_scan_threshold": hnsw.full_scan_threshold,
"max_indexing_threads": hnsw.max_indexing_threads,
} if hnsw else None,
"optimizer": {
"indexing_threshold": opt.indexing_threshold,
"default_segment_number": opt.default_segment_number,
"max_segment_size": opt.max_segment_size,
"memmap_threshold": opt.memmap_threshold,
"flush_interval_sec": opt.flush_interval_sec,
} if opt else None,
"quantization": str(quant) if quant else None,
"payload_indexes": {
k: {
"type": v.data_type.value if hasattr(v.data_type, "value") else str(v.data_type),
"points": v.points,
"coverage_pct": round(v.points / max(vec_count, 1) * 100, 1),
}
for k, v in (info.payload_schema or {}).items()
},
"payload_index_count": len(info.payload_schema or {}),
"search_hnsw_ef": SEARCH_HNSW_EF,
}
except Exception as exc:
result[name] = {"error": str(exc)}
return {"collections": result, "total": len(result)}
# ---------------------------------------------------------------------------
# Collection management
# ---------------------------------------------------------------------------
@@ -384,11 +457,14 @@ async def search_file(
limit: int = Form(5),
score_threshold: Optional[float] = Form(None),
collection: Optional[str] = Form(None),
hnsw_ef: Optional[int] = Form(None),
exact: bool = Form(False),
indexed_only: bool = Form(False),
):
"""Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
data = await file.read()
vector = await _embed_bytes(data)
return _do_search(vector, int(limit), score_threshold, collection, {})
return _do_search(vector, int(limit), score_threshold, collection, {}, hnsw_ef, exact, indexed_only)
@app.post("/search/vector")