fix(qdrant): complete optimization gaps from v1
- qdrant/main.py: search/file now accepts hnsw_ef, exact, indexed_only form fields (was silently ignoring them, using server defaults only) - qdrant/main.py: add GET /inspect endpoint — full diagnostic summary for all collections: HNSW, optimizer, quantization, segment count, payload index coverage, raw RAM estimate (vectors * dim * 4B * 1.5) - gateway/main.py: vectors/search/file now forwards hnsw_ef, exact, indexed_only - gateway/main.py: add GET /vectors/inspect proxy
This commit is contained in:
@@ -210,6 +210,79 @@ def health():
|
||||
return {"status": "error", "detail": str(e)}
|
||||
|
||||
|
||||
@app.get("/inspect")
|
||||
def inspect():
|
||||
"""Return a full diagnostic summary for every collection.
|
||||
|
||||
Covers: vector counts, segment counts, HNSW config, optimizer config,
|
||||
quantization, payload indexes and their coverage. Designed for production
|
||||
health checks and the Qdrant optimization workflow.
|
||||
"""
|
||||
try:
|
||||
all_collections = client.get_collections().collections
|
||||
except Exception as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
result = {}
|
||||
for col_desc in all_collections:
|
||||
name = col_desc.name
|
||||
try:
|
||||
info = client.get_collection(name)
|
||||
cfg = info.config
|
||||
hnsw = cfg.hnsw_config
|
||||
opt = cfg.optimizer_config
|
||||
quant = cfg.quantization_config
|
||||
params = cfg.params
|
||||
|
||||
# Estimate raw RAM footprint: vectors * dim * 4 bytes * 1.5 safety factor
|
||||
vec_count = info.vectors_count or 0
|
||||
vec_dim = (
|
||||
params.vectors.size
|
||||
if hasattr(params.vectors, "size")
|
||||
else VECTOR_DIM
|
||||
)
|
||||
ram_estimate_mb = round(vec_count * vec_dim * 4 * 1.5 / 1_048_576, 1)
|
||||
|
||||
result[name] = {
|
||||
"status": info.status.value if info.status else None,
|
||||
"optimizer_status": str(info.optimizer_status) if info.optimizer_status else None,
|
||||
"vectors_count": vec_count,
|
||||
"indexed_vectors_count": info.indexed_vectors_count,
|
||||
"points_count": info.points_count,
|
||||
"segments_count": info.segments_count,
|
||||
"ram_estimate_mb": ram_estimate_mb,
|
||||
"hnsw": {
|
||||
"m": hnsw.m,
|
||||
"ef_construct": hnsw.ef_construct,
|
||||
"on_disk": hnsw.on_disk,
|
||||
"full_scan_threshold": hnsw.full_scan_threshold,
|
||||
"max_indexing_threads": hnsw.max_indexing_threads,
|
||||
} if hnsw else None,
|
||||
"optimizer": {
|
||||
"indexing_threshold": opt.indexing_threshold,
|
||||
"default_segment_number": opt.default_segment_number,
|
||||
"max_segment_size": opt.max_segment_size,
|
||||
"memmap_threshold": opt.memmap_threshold,
|
||||
"flush_interval_sec": opt.flush_interval_sec,
|
||||
} if opt else None,
|
||||
"quantization": str(quant) if quant else None,
|
||||
"payload_indexes": {
|
||||
k: {
|
||||
"type": v.data_type.value if hasattr(v.data_type, "value") else str(v.data_type),
|
||||
"points": v.points,
|
||||
"coverage_pct": round(v.points / max(vec_count, 1) * 100, 1),
|
||||
}
|
||||
for k, v in (info.payload_schema or {}).items()
|
||||
},
|
||||
"payload_index_count": len(info.payload_schema or {}),
|
||||
"search_hnsw_ef": SEARCH_HNSW_EF,
|
||||
}
|
||||
except Exception as exc:
|
||||
result[name] = {"error": str(exc)}
|
||||
|
||||
return {"collections": result, "total": len(result)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Collection management
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -384,11 +457,14 @@ async def search_file(
|
||||
limit: int = Form(5),
|
||||
score_threshold: Optional[float] = Form(None),
|
||||
collection: Optional[str] = Form(None),
|
||||
hnsw_ef: Optional[int] = Form(None),
|
||||
exact: bool = Form(False),
|
||||
indexed_only: bool = Form(False),
|
||||
):
|
||||
"""Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
|
||||
data = await file.read()
|
||||
vector = await _embed_bytes(data)
|
||||
return _do_search(vector, int(limit), score_threshold, collection, {})
|
||||
return _do_search(vector, int(limit), score_threshold, collection, {}, hnsw_ef, exact, indexed_only)
|
||||
|
||||
|
||||
@app.post("/search/vector")
|
||||
|
||||
Reference in New Issue
Block a user