fix(qdrant): complete optimization gaps from v1

- qdrant/main.py: search/file now accepts hnsw_ef, exact, indexed_only form fields (was silently ignoring them, using server defaults only) - qdrant/main.py: add GET /inspect endpoint — full diagnostic summary for all collections: HNSW, optimizer, quantization, segment count, payload index coverage, raw RAM estimate (vectors * dim * 4B * 1.5) - gateway/main.py: vectors/search/file now forwards hnsw_ef, exact, indexed_only - gateway/main.py: add GET /vectors/inspect proxy
2026-03-31 20:01:52 +02:00
parent c7ea347e2b
commit 609485a0f0
2 changed files with 90 additions and 2 deletions
--- a/qdrant/main.py
+++ b/qdrant/main.py
@@ -210,6 +210,79 @@ def health():
        return {"status": "error", "detail": str(e)}


+@app.get("/inspect")
+def inspect():
+    """Return a full diagnostic summary for every collection.
+
+    Covers: vector counts, segment counts, HNSW config, optimizer config,
+    quantization, payload indexes and their coverage. Designed for production
+    health checks and the Qdrant optimization workflow.
+    """
+    try:
+        all_collections = client.get_collections().collections
+    except Exception as exc:
+        return {"status": "error", "detail": str(exc)}
+
+    result = {}
+    for col_desc in all_collections:
+        name = col_desc.name
+        try:
+            info = client.get_collection(name)
+            cfg = info.config
+            hnsw = cfg.hnsw_config
+            opt = cfg.optimizer_config
+            quant = cfg.quantization_config
+            params = cfg.params
+
+            # Estimate raw RAM footprint: vectors * dim * 4 bytes * 1.5 safety factor
+            vec_count = info.vectors_count or 0
+            vec_dim = (
+                params.vectors.size
+                if hasattr(params.vectors, "size")
+                else VECTOR_DIM
+            )
+            ram_estimate_mb = round(vec_count * vec_dim * 4 * 1.5 / 1_048_576, 1)
+
+            result[name] = {
+                "status": info.status.value if info.status else None,
+                "optimizer_status": str(info.optimizer_status) if info.optimizer_status else None,
+                "vectors_count": vec_count,
+                "indexed_vectors_count": info.indexed_vectors_count,
+                "points_count": info.points_count,
+                "segments_count": info.segments_count,
+                "ram_estimate_mb": ram_estimate_mb,
+                "hnsw": {
+                    "m": hnsw.m,
+                    "ef_construct": hnsw.ef_construct,
+                    "on_disk": hnsw.on_disk,
+                    "full_scan_threshold": hnsw.full_scan_threshold,
+                    "max_indexing_threads": hnsw.max_indexing_threads,
+                } if hnsw else None,
+                "optimizer": {
+                    "indexing_threshold": opt.indexing_threshold,
+                    "default_segment_number": opt.default_segment_number,
+                    "max_segment_size": opt.max_segment_size,
+                    "memmap_threshold": opt.memmap_threshold,
+                    "flush_interval_sec": opt.flush_interval_sec,
+                } if opt else None,
+                "quantization": str(quant) if quant else None,
+                "payload_indexes": {
+                    k: {
+                        "type": v.data_type.value if hasattr(v.data_type, "value") else str(v.data_type),
+                        "points": v.points,
+                        "coverage_pct": round(v.points / max(vec_count, 1) * 100, 1),
+                    }
+                    for k, v in (info.payload_schema or {}).items()
+                },
+                "payload_index_count": len(info.payload_schema or {}),
+                "search_hnsw_ef": SEARCH_HNSW_EF,
+            }
+        except Exception as exc:
+            result[name] = {"error": str(exc)}
+
+    return {"collections": result, "total": len(result)}
+
+
 # ---------------------------------------------------------------------------
 # Collection management
 # ---------------------------------------------------------------------------
@@ -384,11 +457,14 @@ async def search_file(
    limit: int = Form(5),
    score_threshold: Optional[float] = Form(None),
    collection: Optional[str] = Form(None),
+    hnsw_ef: Optional[int] = Form(None),
+    exact: bool = Form(False),
+    indexed_only: bool = Form(False),
 ):
    """Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
    data = await file.read()
    vector = await _embed_bytes(data)
-    return _do_search(vector, int(limit), score_threshold, collection, {})
+    return _do_search(vector, int(limit), score_threshold, collection, {}, hnsw_ef, exact, indexed_only)


@app.post("/search/vector")