跳到主要内容

sglang

services:
sglang:
image: lmsysorg/sglang:latest
container_name: sglang-qwen35-9b

restart: unless-stopped

ports:
- "51810:30000"

ipc: host

environment:
HF_HOME: /root/.cache/huggingface
CUDA_VISIBLE_DEVICES: "7"

volumes:
- ~/.cache/huggingface:/root/.cache/huggingface

gpus: all

command: >
python3 -m sglang.launch_server
--model-path /root/.cache/huggingface/hub/models--Qwen--Qwen3.5-9B/snapshots/c202236235762e1c871ad0ccb60c8ee5ba337b9a
--host 0.0.0.0
--port 30000
--trust-remote-code
--context-length 8192
--mem-fraction-static 0.95
--max-running-requests 1
--skip-server-warmup
--disable-cuda-graph
--enable-metrics
--served-model-name Qwen3.5-9B