from __future__ import annotations import importlib import os import unittest from types import SimpleNamespace from unittest.mock import patch import httpx BASE_ENV = { "MODEL_PATH": "D:/Sites/vision/models/qwen3/Qwen3-1.7B-Instruct-Q4_K_M.gguf", "LLM_MODEL_NAME": "qwen3-1.7b-instruct-q4_k_m", "LLM_CONTEXT_SIZE": "4096", "LLM_THREADS": "4", "LLM_GPU_LAYERS": "0", "LLM_PORT": "8080", "LLAMA_SERVER_PORT": "8081", } def load_llm_module(): with patch.dict(os.environ, BASE_ENV, clear=False): import llm.main as llm_main return importlib.reload(llm_main) class StubHTTPClient: def __init__(self, response: httpx.Response): self.response = response async def get(self, *_args, **_kwargs): return self.response class LLMServiceTests(unittest.IsolatedAsyncioTestCase): async def test_health_returns_repo_owned_contract(self): module = load_llm_module() module._llama_process = SimpleNamespace(poll=lambda: None) module._http_client = StubHTTPClient( httpx.Response(200, json={"object": "list", "data": []}, request=httpx.Request("GET", "http://127.0.0.1:8081/v1/models")) ) transport = httpx.ASGITransport(app=module.app) async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: response = await client.get("/health") self.assertEqual(response.status_code, 200) self.assertEqual( response.json(), { "status": "ok", "model": "Qwen3-1.7B-Instruct-Q4_K_M.gguf", "model_alias": "qwen3-1.7b-instruct-q4_k_m", "context_size": 4096, "threads": 4, "gpu_layers": 0, }, ) async def test_health_reports_unavailable_when_process_is_down(self): module = load_llm_module() module._llama_process = SimpleNamespace(poll=lambda: 1) module._http_client = StubHTTPClient( httpx.Response(200, json={"object": "list", "data": []}, request=httpx.Request("GET", "http://127.0.0.1:8081/v1/models")) ) transport = httpx.ASGITransport(app=module.app) async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: response = await client.get("/health") self.assertEqual(response.status_code, 503) self.assertEqual(response.json()["status"], "unavailable") if __name__ == "__main__": unittest.main()