Files
2026-03-11 16:49:00 +08:00

57 lines
1.7 KiB
YAML

services:
gateway:
build:
context: .
dockerfile: docker/gateway/Dockerfile
restart: unless-stopped
environment:
GATEWAY_HOST: 0.0.0.0
GATEWAY_PORT: 8080
BACKEND_BASE: http://backend:8081
MODEL_SERVER: http://backend:8081/v1
BACKEND_WAIT_HINT: docker compose logs -f backend
ACCESS_URLS: http://127.0.0.1:${GATEWAY_PORT:-8080}
READONLY_FS_ROOTS: /workspace
ports:
- "${GATEWAY_PORT:-8080}:8080"
volumes:
- .:/workspace:ro
depends_on:
- backend
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8080/gateway/health >/dev/null || exit 1"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
backend:
build:
context: .
dockerfile: docker/backend/Dockerfile
restart: unless-stopped
environment:
HOST: 0.0.0.0
PORT: 8081
THINK_MODE: ${THINK_MODE:-think-on}
CTX_SIZE: ${CTX_SIZE:-16384}
IMAGE_MIN_TOKENS: ${IMAGE_MIN_TOKENS:-256}
IMAGE_MAX_TOKENS: ${IMAGE_MAX_TOKENS:-1024}
MMPROJ_OFFLOAD: ${MMPROJ_OFFLOAD:-off}
MODEL_PATH: /models/model.gguf
MMPROJ_PATH: /models/mmproj.gguf
MODEL_GGUF_URL: ${MODEL_GGUF_URL:-https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf}
MODEL_MMPROJ_URL: ${MODEL_MMPROJ_URL:-https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/mmproj-Qwen3.5-9B-BF16.gguf}
MODEL_GGUF_SHA256: ${MODEL_GGUF_SHA256:-}
MODEL_MMPROJ_SHA256: ${MODEL_MMPROJ_SHA256:-}
expose:
- "8081"
volumes:
- toolhub-models:/models
gpus: all
healthcheck:
test: ["NONE"]
volumes:
toolhub-models: