Add scripts for vllm server

xusenlinzy · Aug 10, 2023 · a6a7b11 · a6a7b11
1 parent 1293a64
commit a6a7b11
Showing 1 changed file with 41 additions and 2 deletions.
diff --git a/docs/VLLM_SCRIPT.md b/docs/VLLM_SCRIPT.md
@@ -22,7 +22,7 @@ docker build -f docker/Dockerfile.vllm -t llm-api:vllm .
 + `tokenizer-mode`（可选项）: `tokenizer` 的模式，默认为 `auto`
 
 
-+ `tensor_parallel_size`（可选项）: `GPU` 数量，默认为 `1`
++ `tensor-parallel-size`（可选项）: `GPU` 数量，默认为 `1`
 
 
 + `embedding_name`（可选项）: 嵌入模型的文件所在路径，推荐使用 `moka-ai/m3e-base` 或者 `BAAI/bge-large-zh`
@@ -47,5 +47,44 @@ docker run -it -d --gpus all --ipc=host --net=host -p 80:80 --name=qwen \
     --model_name qwen \
     --model Qwen/Qwen-7B-Chat \
     --trust-remote-code \
-    --tokenizer-mode slow
+    --tokenizer-mode slow \
+    --dtype half
+```
+
+### InternLM
+
+internlm-chat-7b:
+
+```shell
+docker run -it -d --gpus all --ipc=host --net=host -p 80:80 --name=internlm \
+    --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v `pwd`:/workspace \
+    llm-api:vllm \
+    python api/vllm_server.py \
+    --port 80 \
+    --allow-credentials \
+    --model_name internlm \
+    --model internlm/internlm-chat-7b \
+    --trust-remote-code \
+    --tokenizer-mode slow \
+    --dtype half
+```
+
+### Baichuan-13b-chat
+
+baichuan-inc/Baichuan-13B-Chat:
+
+```shell
+docker run -it -d --gpus all --ipc=host --net=host -p 80:80 --name=baichuan-13b-chat \
+    --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v `pwd`:/workspace \
+    llm-api:vllm \
+    python api/vllm_server.py \
+    --port 80 \
+    --allow-credentials \
+    --model_name baichuan-13b-chat \
+    --model baichuan-inc/Baichuan-13B-Chat \
+    --trust-remote-code \
+    --tokenizer-mode slow \
+    --dtype half
 ```