DGX Spark
vLLM
SGlang
shell
curl -LsSf https://astral.sh/uv/install.sh | sh1
shell
uv venv .sglang --python 3.121
shell
source .sglang/bin/activate1
shell
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu1301
shell
uv pip install xgrammar triton flashinfer-python --prerelease=allow1
shell
git clone --recursive https://github.com/sgl-project/sglang.git
cd sglang
uv pip install -e "python[cu130]" --prerelease=allow1
2
3
2
3
shell
export TORCH_CUDA_ARCH_LIST=12.1a # Spark 12.1, 12.0f, 12.1a
export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH1
2
3
4
2
3
4
shell
wget https://github.com/sgl-project/whl/releases/download/v0.3.16.post5/sgl_kernel-0.3.16.post5+cu130-cp310-abi3-manylinux2014_aarch64.whl#sha256=6624ea1afaff34010d8ff6096182f6ac9956c94d86b6d13f15ea962ae9995884
uv pip install sgl_kernel-0.3.16.post5+cu130-cp310-abi3-manylinux2014_aarch64.whl1
2
2
shell
sudo sysctl -w vm.drop_caches=31
shell
sudo apt install python3-dev1
shell
uv run python -m sglang.launch_server --model-path google/gemma-3n-E4B-it --port 30000 --mem-fraction-static 0.81