llm: add FastAPI shim, gateway LLM endpoints, tests, and docs
This commit is contained in:
53
llm/Dockerfile
Normal file
53
llm/Dockerfile
Normal file
@@ -0,0 +1,53 @@
|
||||
FROM debian:bookworm-slim AS builder
|
||||
|
||||
ARG LLAMA_CPP_REPO=https://github.com/ggml-org/llama.cpp.git
|
||||
ARG LLAMA_CPP_REF=
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /src
|
||||
RUN git clone --depth 1 ${LLAMA_CPP_REPO} llama.cpp \
|
||||
&& if [ -n "${LLAMA_CPP_REF}" ]; then cd llama.cpp && git fetch --depth 1 origin "${LLAMA_CPP_REF}" && git checkout "${LLAMA_CPP_REF}"; fi
|
||||
|
||||
WORKDIR /src/llama.cpp
|
||||
RUN cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON \
|
||||
&& cmake --build build --config Release --target llama-server -j"$(nproc)"
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
bash \
|
||||
ca-certificates \
|
||||
curl \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY llm/requirements.txt /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt
|
||||
|
||||
COPY --from=builder /src/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
|
||||
COPY llm/main.py /app/main.py
|
||||
COPY llm/entrypoint.sh /entrypoint.sh
|
||||
|
||||
RUN chmod +x /entrypoint.sh /usr/local/bin/llama-server
|
||||
|
||||
ENV MODEL_PATH=/models/Qwen3-1.7B-Instruct-Q4_K_M.gguf \
|
||||
LLM_MODEL_NAME=qwen3-1.7b-instruct-q4_k_m \
|
||||
LLM_CONTEXT_SIZE=4096 \
|
||||
LLM_THREADS=4 \
|
||||
LLM_GPU_LAYERS=0 \
|
||||
LLM_PORT=8080 \
|
||||
LLAMA_SERVER_PORT=8081 \
|
||||
LLM_STARTUP_TIMEOUT=120 \
|
||||
LLM_EXTRA_ARGS=
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
Reference in New Issue
Block a user