You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
docker run --runtime nvidia --gpus all \
|
|
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
|
--env "HUGGING_FACE_HUB_TOKEN=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" \
|
|
-p 8000:8000 \
|
|
--network=host \
|
|
--name vllm \
|
|
vllm/vllm-openai:latest \
|
|
--model NousResearch/Meta-Llama-3-8B-Instruct |