57 lines
1.7 KiB
YAML
57 lines
1.7 KiB
YAML
services:
|
|
gateway:
|
|
build:
|
|
context: .
|
|
dockerfile: docker/gateway/Dockerfile
|
|
restart: unless-stopped
|
|
environment:
|
|
GATEWAY_HOST: 0.0.0.0
|
|
GATEWAY_PORT: 8080
|
|
BACKEND_BASE: http://backend:8081
|
|
MODEL_SERVER: http://backend:8081/v1
|
|
BACKEND_WAIT_HINT: docker compose logs -f backend
|
|
ACCESS_URLS: http://127.0.0.1:${GATEWAY_PORT:-8080}
|
|
READONLY_FS_ROOTS: /workspace
|
|
ports:
|
|
- "${GATEWAY_PORT:-8080}:8080"
|
|
volumes:
|
|
- .:/workspace:ro
|
|
depends_on:
|
|
- backend
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8080/gateway/health >/dev/null || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
|
|
backend:
|
|
build:
|
|
context: .
|
|
dockerfile: docker/backend/Dockerfile
|
|
restart: unless-stopped
|
|
environment:
|
|
HOST: 0.0.0.0
|
|
PORT: 8081
|
|
THINK_MODE: ${THINK_MODE:-think-on}
|
|
CTX_SIZE: ${CTX_SIZE:-16384}
|
|
IMAGE_MIN_TOKENS: ${IMAGE_MIN_TOKENS:-256}
|
|
IMAGE_MAX_TOKENS: ${IMAGE_MAX_TOKENS:-1024}
|
|
MMPROJ_OFFLOAD: ${MMPROJ_OFFLOAD:-off}
|
|
MODEL_PATH: /models/model.gguf
|
|
MMPROJ_PATH: /models/mmproj.gguf
|
|
MODEL_GGUF_URL: ${MODEL_GGUF_URL:-https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf}
|
|
MODEL_MMPROJ_URL: ${MODEL_MMPROJ_URL:-https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/mmproj-Qwen3.5-9B-BF16.gguf}
|
|
MODEL_GGUF_SHA256: ${MODEL_GGUF_SHA256:-}
|
|
MODEL_MMPROJ_SHA256: ${MODEL_MMPROJ_SHA256:-}
|
|
expose:
|
|
- "8081"
|
|
volumes:
|
|
- toolhub-models:/models
|
|
gpus: all
|
|
healthcheck:
|
|
test: ["NONE"]
|
|
|
|
volumes:
|
|
toolhub-models:
|