diff --git a/gateway/main.py b/gateway/main.py index 8d61883..f196249 100644 --- a/gateway/main.py +++ b/gateway/main.py @@ -243,13 +243,18 @@ async def vectors_search_file( limit: int = Form(5), score_threshold: Optional[float] = Form(None), collection: Optional[str] = Form(None), + hnsw_ef: Optional[int] = Form(None), + exact: bool = Form(False), + indexed_only: bool = Form(False), ): data = await file.read() - fields: Dict[str, Any] = {"limit": int(limit)} + fields: Dict[str, Any] = {"limit": int(limit), "exact": exact, "indexed_only": indexed_only} if score_threshold is not None: fields["score_threshold"] = float(score_threshold) if collection is not None: fields["collection"] = collection + if hnsw_ef is not None: + fields["hnsw_ef"] = int(hnsw_ef) async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client: return await _post_file(client, f"{QDRANT_SVC_URL}/search/file", data, fields) @@ -284,6 +289,13 @@ async def vectors_collection_info(name: str): return await _get_json(client, f"{QDRANT_SVC_URL}/collections/{name}") +@app.get("/vectors/inspect") +async def vectors_inspect(): + """Full diagnostic summary for all Qdrant collections (HNSW, optimizer, payload indexes, RAM estimate).""" + async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client: + return await _get_json(client, f"{QDRANT_SVC_URL}/inspect") + + @app.delete("/vectors/collections/{name}") async def vectors_delete_collection(name: str): async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client: diff --git a/qdrant/main.py b/qdrant/main.py index 5d50687..4206e77 100644 --- a/qdrant/main.py +++ b/qdrant/main.py @@ -210,6 +210,79 @@ def health(): return {"status": "error", "detail": str(e)} +@app.get("/inspect") +def inspect(): + """Return a full diagnostic summary for every collection. + + Covers: vector counts, segment counts, HNSW config, optimizer config, + quantization, payload indexes and their coverage. Designed for production + health checks and the Qdrant optimization workflow. + """ + try: + all_collections = client.get_collections().collections + except Exception as exc: + return {"status": "error", "detail": str(exc)} + + result = {} + for col_desc in all_collections: + name = col_desc.name + try: + info = client.get_collection(name) + cfg = info.config + hnsw = cfg.hnsw_config + opt = cfg.optimizer_config + quant = cfg.quantization_config + params = cfg.params + + # Estimate raw RAM footprint: vectors * dim * 4 bytes * 1.5 safety factor + vec_count = info.vectors_count or 0 + vec_dim = ( + params.vectors.size + if hasattr(params.vectors, "size") + else VECTOR_DIM + ) + ram_estimate_mb = round(vec_count * vec_dim * 4 * 1.5 / 1_048_576, 1) + + result[name] = { + "status": info.status.value if info.status else None, + "optimizer_status": str(info.optimizer_status) if info.optimizer_status else None, + "vectors_count": vec_count, + "indexed_vectors_count": info.indexed_vectors_count, + "points_count": info.points_count, + "segments_count": info.segments_count, + "ram_estimate_mb": ram_estimate_mb, + "hnsw": { + "m": hnsw.m, + "ef_construct": hnsw.ef_construct, + "on_disk": hnsw.on_disk, + "full_scan_threshold": hnsw.full_scan_threshold, + "max_indexing_threads": hnsw.max_indexing_threads, + } if hnsw else None, + "optimizer": { + "indexing_threshold": opt.indexing_threshold, + "default_segment_number": opt.default_segment_number, + "max_segment_size": opt.max_segment_size, + "memmap_threshold": opt.memmap_threshold, + "flush_interval_sec": opt.flush_interval_sec, + } if opt else None, + "quantization": str(quant) if quant else None, + "payload_indexes": { + k: { + "type": v.data_type.value if hasattr(v.data_type, "value") else str(v.data_type), + "points": v.points, + "coverage_pct": round(v.points / max(vec_count, 1) * 100, 1), + } + for k, v in (info.payload_schema or {}).items() + }, + "payload_index_count": len(info.payload_schema or {}), + "search_hnsw_ef": SEARCH_HNSW_EF, + } + except Exception as exc: + result[name] = {"error": str(exc)} + + return {"collections": result, "total": len(result)} + + # --------------------------------------------------------------------------- # Collection management # --------------------------------------------------------------------------- @@ -384,11 +457,14 @@ async def search_file( limit: int = Form(5), score_threshold: Optional[float] = Form(None), collection: Optional[str] = Form(None), + hnsw_ef: Optional[int] = Form(None), + exact: bool = Form(False), + indexed_only: bool = Form(False), ): """Embed an uploaded image via CLIP, then search Qdrant for similar vectors.""" data = await file.read() vector = await _embed_bytes(data) - return _do_search(vector, int(limit), score_threshold, collection, {}) + return _do_search(vector, int(limit), score_threshold, collection, {}, hnsw_ef, exact, indexed_only) @app.post("/search/vector")