diff --git a/Makefile b/Makefile index 2b94b81..b6d0212 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ ARCH := ${shell uname -m} - -LOCAL_IP=${hostname -I | awk '{print $$1}' | xargs} +VERSION := v0.1.1 +NODE_NAME=${shell hostname} all: ctl dash spdk runmodel @@ -14,14 +14,13 @@ dash: -rm ./target/dashboard/* -rf cp ./dashboard/* ./target/dashboard -rL cp ./deployment/dashboard.Dockerfile ./target/dashboard/Dockerfile - -sudo docker image rm inferx/inferx_dashboard:v0.1.0 - sudo docker build -t inferx/inferx_dashboard:v0.1.0 ./target/dashboard - # sudo docker push inferx/inferx_dashboard:v0.1.0 + -sudo docker image rm inferx/inferx_dashboard:$(VERSION) + sudo docker build -t inferx/inferx_dashboard:$(VERSION) ./target/dashboard pushdash: # sudo docker login -u inferx - sudo docker tag inferx/inferx_dashboard:v0.1.0 inferx/inferx_dashboard:v0.1.0 - sudo docker push inferx/inferx_dashboard:v0.1.0 + sudo docker tag inferx/inferx_dashboard:$(VERSION) inferx/inferx_dashboard:$(VERSION) + sudo docker push inferx/inferx_dashboard:$(VERSION) runmodel: mkdir -p ./target/runmodel @@ -29,30 +28,30 @@ runmodel: cp ./script/run_llava.py ./target/runmodel cp ./script/run_stablediffusion.py ./target/runmodel cp ./deployment/vllm-opai.Dockerfile ./target/runmodel/Dockerfile - -sudo docker image rm vllm-openai-upgraded:v0.1.0 - sudo docker build -t vllm-openai-upgraded:v0.1.0 ./target/runmodel + -sudo docker image rm vllm-openai-upgraded:$(VERSION) + sudo docker build -t vllm-openai-upgraded:$(VERSION) ./target/runmodel spdk: mkdir -p ./target/spdk -rm ./target/spdk/* -rf cp ./deployment/spdk.Dockerfile ./target/spdk/Dockerfile - -sudo docker image rm inferx/spdk-container:v0.1.0 - sudo docker build -t inferx/spdk-container:v0.1.0 ./target/spdk + -sudo docker image rm inferx/spdk-container:$(VERSION) + sudo docker build -t inferx/spdk-container:$(VERSION) ./target/spdk spdk2: mkdir -p ./target/spdk -rm ./target/spdk/* -rf cp ./deployment/spdk2.Dockerfile ./target/spdk/Dockerfile cp ./deployment/spdk.script ./target/spdk/entrypoint.sh - -sudo docker image rm inferx/spdk-container2:v0.1.0 - sudo docker build -t inferx/spdk-container2:v0.1.0 ./target/spdk + -sudo docker image rm inferx/spdk-container2:$(VERSION) + sudo docker build -t inferx/spdk-container2:$(VERSION) ./target/spdk pushspdk: # sudo docker login -u inferx - sudo docker tag inferx/spdk-container:v0.1.0 inferx/spdk-container:v0.1.0 - sudo docker push inferx/spdk-container:v0.1.0 - sudo docker tag inferx/spdk-container2:v0.1.0 inferx/spdk-container2:v0.1.0 - sudo docker push inferx/spdk-container2:v0.1.0 + sudo docker tag inferx/spdk-container:$(VERSION) inferx/spdk-container:$(VERSION) + sudo docker push inferx/spdk-container:$(VERSION) + sudo docker tag inferx/spdk-container2:$(VERSION) inferx/spdk-container2:$(VERSION) + sudo docker push inferx/spdk-container2:$(VERSION) sql: sudo cp ./dashboard/sql/create_table.sql /opt/inferx/config sudo cp ./dashboard/sql/secret.sql /opt/inferx/config @@ -60,6 +59,8 @@ sql: run: -sudo pkill -9 inferx @echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env + @echo "Version=$(VERSION)" >> .env + @echo "HOSTNAME=$(NODE_NAME)" >> .env sudo docker compose -f docker-compose.yml build - sudo rm -f /opt/inferx/log/inferx.log - sudo rm -f /opt/inferx/log/onenode.log @@ -68,11 +69,14 @@ run: runblob: -sudo pkill -9 inferx - @echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env + @echo "LOCAL_IP=$$(hostname -I | tr ' ' '\n' | grep -v '^172\.' | head -n 1 | xargs)" > .env + @echo "Version=$(VERSION)" >> .env + @echo "HOSTNAME=$(NODE_NAME)" >> .env sudo docker compose -f docker-compose_blob.yml build - sudo rm -f /opt/inferx/log/inferx.log - sudo rm -f /opt/inferx/log/onenode.log sudo docker compose -f docker-compose_blob.yml up -d --remove-orphans + cat .env rm .env stop: @@ -82,7 +86,24 @@ stopblob: sudo docker compose -f docker-compose_blob.yml down rundash: - sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm inferx/inferx_dashboard:v0.1.0 + sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm inferx/inferx_dashboard:$(VERSION) stopdash: - sudo docker stop inferx_dashboard \ No newline at end of file + sudo docker stop inferx_dashboard + +runkblob: + sudo kubectl apply -f k8s/spdk.yaml + sudo kubectl apply -f k8s/etcd.yaml + sudo kubectl apply -f k8s/secretdb.yaml + sudo kubectl apply -f k8s/db-deployment.yaml + sudo kubectl apply -f k8s/keycloak_postgres.yaml + sudo kubectl apply -f k8s/keycloak.yaml + sudo kubectl apply -f k8s/statesvc.yaml + sudo kubectl apply -f k8s/scheduler.yaml + sudo kubectl apply -f k8s/nodeagent.yaml + sudo kubectl apply -f k8s/dashboard.yaml + sudo kubectl apply -f k8s/ingress.yaml + +stopnodeagent: + sudo kubectl delete DaemonSet nodeagent-blob + sudo kubectl delete DaemonSet nodeagent-file \ No newline at end of file diff --git a/config/Aquila-7B.json b/config/Aquila-7B.json index 92fcea5..373dce9 100644 --- a/config/Aquila-7B.json +++ b/config/Aquila-7B.json @@ -5,7 +5,7 @@ "name": "Aquila-7B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "BAAI/Aquila-7B", @@ -17,7 +17,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 60000, "GPU": { "Type": "Any", "Count": 2, @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Baichuan-7B.json b/config/Baichuan-7B.json index 0afff2b..05cf918 100644 --- a/config/Baichuan-7B.json +++ b/config/Baichuan-7B.json @@ -5,7 +5,7 @@ "name": "Baichuan-7B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "baichuan-inc/Baichuan-7B", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Baichuan2-13B-Chat-4bits.json b/config/Baichuan2-13B-Chat-4bits.json index e8d0f7b..89d953c 100644 --- a/config/Baichuan2-13B-Chat-4bits.json +++ b/config/Baichuan2-13B-Chat-4bits.json @@ -5,7 +5,7 @@ "name": "Baichuan2-13B-Chat-4bits", "object": { "spec": { - "image": "vllm-openai-upgraded:v.0.1", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "baichuan-inc/Baichuan2-13B-Chat-4bits", @@ -16,17 +16,21 @@ ], "resources": { "CPU": 12000, - "Mem": 14000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, - "vRam": 8000 + "vRam": 13800 } }, "envs": [ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Baichuan2-7B-Chat.json b/config/Baichuan2-7B-Chat.json index bbb8b62..6b5e516 100644 --- a/config/Baichuan2-7B-Chat.json +++ b/config/Baichuan2-7B-Chat.json @@ -5,7 +5,7 @@ "name": "Baichuan2-7B-Chat", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "baichuan-inc/Baichuan2-7B-Chat", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/DeciLM-7B-instruct.json b/config/DeciLM-7B-instruct.json index edd1cb5..466f5f1 100644 --- a/config/DeciLM-7B-instruct.json +++ b/config/DeciLM-7B-instruct.json @@ -5,7 +5,7 @@ "name": "DeciLM-7B-instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Deci/DeciLM-7B-instruct", diff --git a/config/DeciLM-7B.json b/config/DeciLM-7B.json index b36b838..f581d7d 100644 --- a/config/DeciLM-7B.json +++ b/config/DeciLM-7B.json @@ -5,7 +5,7 @@ "name": "DeciLM-7B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Deci/DeciLM-7B", diff --git a/config/DeepSeek-R1-Distill-Llama-8B.json b/config/DeepSeek-R1-Distill-Llama-8B.json index 8372ca5..3da0445 100644 --- a/config/DeepSeek-R1-Distill-Llama-8B.json +++ b/config/DeepSeek-R1-Distill-Llama-8B.json @@ -5,7 +5,7 @@ "name": "DeepSeek-R1-Distill-Llama-8B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "/root/.cache/huggingface/git/DeepSeek-R1-Distill-Llama-8B", @@ -30,6 +30,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/DeepSeek-R1-Distill-Qwen-1.5B.json b/config/DeepSeek-R1-Distill-Qwen-1.5B.json index 2c75845..209d9e1 100644 --- a/config/DeepSeek-R1-Distill-Qwen-1.5B.json +++ b/config/DeepSeek-R1-Distill-Qwen-1.5B.json @@ -5,7 +5,7 @@ "name": "DeepSeek-R1-Distill-Qwen-1.5B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "/root/.cache/huggingface/git/DeepSeek-R1-Distill-Qwen-1.5B", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 60000, "GPU": { "Type": "Any", "Count": 1, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/DeepSeek-R1-Distill-Qwen-7B.json b/config/DeepSeek-R1-Distill-Qwen-7B.json index b38d551..37fdbd8 100644 --- a/config/DeepSeek-R1-Distill-Qwen-7B.json +++ b/config/DeepSeek-R1-Distill-Qwen-7B.json @@ -5,7 +5,7 @@ "name": "DeepSeek-R1-Distill-Qwen-7B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "/root/.cache/huggingface/git/DeepSeek-R1-Distill-Qwen-7B", @@ -30,6 +30,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/EXAONE-3.0-7.8B-Instruct copy.json b/config/EXAONE-3.0-7.8B-Instruct copy.json index 916e0f2..08f15cc 100644 --- a/config/EXAONE-3.0-7.8B-Instruct copy.json +++ b/config/EXAONE-3.0-7.8B-Instruct copy.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "gemma-7b", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "google/gemma-7b", diff --git a/config/EXAONE-3.0-7.8B-Instruct.json b/config/EXAONE-3.0-7.8B-Instruct.json index b110f37..b6f029b 100644 --- a/config/EXAONE-3.0-7.8B-Instruct.json +++ b/config/EXAONE-3.0-7.8B-Instruct.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "EXAONE-3.0-7.8B-Instruct", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", diff --git a/config/Llama-2-13b-hf.json b/config/Llama-2-13b-hf.json index 9881ddf..1d7965a 100644 --- a/config/Llama-2-13b-hf.json +++ b/config/Llama-2-13b-hf.json @@ -5,7 +5,7 @@ "name": "Llama-2-13b-hf", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "meta-llama/Llama-2-13b-hf", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 60000, "GPU": { "Type": "Any", "Count": 2, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Llama-3.2-3B-Instruct.json b/config/Llama-3.2-3B-Instruct.json index 1b12c9c..85b4794 100644 --- a/config/Llama-3.2-3B-Instruct.json +++ b/config/Llama-3.2-3B-Instruct.json @@ -5,7 +5,7 @@ "name": "Llama-3.2-3B-Instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "meta-llama/Llama-3.2-3B-Instruct", @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Llama-3.2-3B-Instruct_2gpu.json b/config/Llama-3.2-3B-Instruct_2gpu.json index debbb39..5bb2563 100644 --- a/config/Llama-3.2-3B-Instruct_2gpu.json +++ b/config/Llama-3.2-3B-Instruct_2gpu.json @@ -5,7 +5,7 @@ "name": "Llama-3.2-3B-Instruct_2gpu", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "meta-llama/Llama-3.2-3B-Instruct", diff --git a/config/Meta-Llama-3-8B-Instruct.json b/config/Meta-Llama-3-8B-Instruct.json index 3b2b070..fc33444 100644 --- a/config/Meta-Llama-3-8B-Instruct.json +++ b/config/Meta-Llama-3-8B-Instruct.json @@ -5,7 +5,7 @@ "name": "Meta-Llama-3-8B-Instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "meta-llama/Meta-Llama-3-8B-Instruct", diff --git a/config/Meta-Llama-3-8B.json b/config/Meta-Llama-3-8B.json index 0ab141d..c4f9039 100644 --- a/config/Meta-Llama-3-8B.json +++ b/config/Meta-Llama-3-8B.json @@ -5,7 +5,7 @@ "name": "Meta-Llama-3-8B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "meta-llama/Meta-Llama-3-8B", diff --git a/config/MiniCPM-2B-dpo-bf16.json b/config/MiniCPM-2B-dpo-bf16.json index e3266cc..538f09e 100644 --- a/config/MiniCPM-2B-dpo-bf16.json +++ b/config/MiniCPM-2B-dpo-bf16.json @@ -5,7 +5,7 @@ "name": "MiniCPM-2B-dpo-bf16", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "openbmb/MiniCPM-2B-dpo-bf16", @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/MiniCPM-2B-sft-bf16.json b/config/MiniCPM-2B-sft-bf16.json index f7758db..805834f 100644 --- a/config/MiniCPM-2B-sft-bf16.json +++ b/config/MiniCPM-2B-sft-bf16.json @@ -5,7 +5,7 @@ "name": "MiniCPM-2B-sft-bf16", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "openbmb/MiniCPM-2B-sft-bf16", @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Minitron-8B-Base.json b/config/Minitron-8B-Base.json index e007c48..dd6c8e0 100644 --- a/config/Minitron-8B-Base.json +++ b/config/Minitron-8B-Base.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "Minitron-8B-Base", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "nvidia/Minitron-8B-Base", diff --git a/config/Mistral-7B-Instruct-v0.1.json b/config/Mistral-7B-Instruct-v0.1.json index cfe4b66..758fd0c 100644 --- a/config/Mistral-7B-Instruct-v0.1.json +++ b/config/Mistral-7B-Instruct-v0.1.json @@ -5,7 +5,7 @@ "name": "Mistral-7B-Instruct-v0.1", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "mistralai/Mistral-7B-Instruct-v0.1", diff --git a/config/Mistral-7B-v0.1.json b/config/Mistral-7B-v0.1.json index 1dd22b8..c8f99b3 100644 --- a/config/Mistral-7B-v0.1.json +++ b/config/Mistral-7B-v0.1.json @@ -5,7 +5,7 @@ "name": "Mistral-7B-v0.1", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "mistralai/Mistral-7B-v0.1", diff --git a/config/Mistral-7B-v0.1_2gpu.json b/config/Mistral-7B-v0.1_2gpu.json index b50bf0e..064ea57 100644 --- a/config/Mistral-7B-v0.1_2gpu.json +++ b/config/Mistral-7B-v0.1_2gpu.json @@ -5,7 +5,7 @@ "name": "Mistral-7B-v0.1_2gpu", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "mistralai/Mistral-7B-v0.1", @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/OLMo-1B-hf.json b/config/OLMo-1B-hf.json index 2d6c85c..70e6f1f 100644 --- a/config/OLMo-1B-hf.json +++ b/config/OLMo-1B-hf.json @@ -5,7 +5,7 @@ "name": "OLMo-1B-hf", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "allenai/OLMo-1B-hf", @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/OLMo-1B-hf_2gpu.json b/config/OLMo-1B-hf_2gpu.json index ab24284..ed0a264 100644 --- a/config/OLMo-1B-hf_2gpu.json +++ b/config/OLMo-1B-hf_2gpu.json @@ -5,7 +5,7 @@ "name": "OLMo-1B-hf_2gpu", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "allenai/OLMo-1B-hf", diff --git a/config/OLMo-7B-hf.json b/config/OLMo-7B-hf.json index 9ac9198..2b85a1c 100644 --- a/config/OLMo-7B-hf.json +++ b/config/OLMo-7B-hf.json @@ -5,7 +5,7 @@ "name": "OLMo-7B-hf", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "allenai/OLMo-7B-hf", @@ -16,7 +16,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 70000, "GPU": { "Type": "Any", "Count": 2, @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/OLMoE-1B-7B-0924-Instruct.json b/config/OLMoE-1B-7B-0924-Instruct.json index a6fdf32..a8636bd 100644 --- a/config/OLMoE-1B-7B-0924-Instruct.json +++ b/config/OLMoE-1B-7B-0924-Instruct.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "OLMoE-1B-7B-0924-Instruct", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "allenai/OLMoE-1B-7B-0924-Instruct", diff --git a/config/Phi-3-mini-128k-instruct.json b/config/Phi-3-mini-128k-instruct.json index c153126..9cf650f 100644 --- a/config/Phi-3-mini-128k-instruct.json +++ b/config/Phi-3-mini-128k-instruct.json @@ -5,7 +5,7 @@ "name": "Phi-3-mini-128k-instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "microsoft/Phi-3-mini-128k-instruct", @@ -16,7 +16,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Phi-3-mini-4k-instruct.json b/config/Phi-3-mini-4k-instruct.json index caa85fc..1eb3d6d 100644 --- a/config/Phi-3-mini-4k-instruct.json +++ b/config/Phi-3-mini-4k-instruct.json @@ -5,7 +5,7 @@ "name": "Phi-3-mini-4k-instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "microsoft/Phi-3-mini-4k-instruct", @@ -16,7 +16,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen-VL-Chat.json b/config/Qwen-VL-Chat.json index f5907bc..6022df1 100644 --- a/config/Qwen-VL-Chat.json +++ b/config/Qwen-VL-Chat.json @@ -5,7 +5,7 @@ "name": "Qwen-VL-Chat", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen-VL-Chat", diff --git a/config/Qwen.json b/config/Qwen.json index d895ed6..26625b7 100644 --- a/config/Qwen.json +++ b/config/Qwen.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "Qwen", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-3B-Instruct", diff --git a/config/Qwen1.5-MoE-A2.7B.json b/config/Qwen1.5-MoE-A2.7B.json index 67722be..f3b7b16 100644 --- a/config/Qwen1.5-MoE-A2.7B.json +++ b/config/Qwen1.5-MoE-A2.7B.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "Qwen1.5-MoE-A2.7B", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen1.5-MoE-A2.7B", diff --git a/config/Qwen2.5-1.5B.json b/config/Qwen2.5-1.5B.json index c4fa62a..8613ea4 100644 --- a/config/Qwen2.5-1.5B.json +++ b/config/Qwen2.5-1.5B.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-1.5B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-1.5B", @@ -15,7 +15,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-7B-Instruct-1M.json b/config/Qwen2.5-7B-Instruct-1M.json index 4e5e5fa..ec782fd 100644 --- a/config/Qwen2.5-7B-Instruct-1M.json +++ b/config/Qwen2.5-7B-Instruct-1M.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-7B-Instruct-1M", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-7B-Instruct-1M", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json b/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json index df68dbb..ff25841 100644 --- a/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json +++ b/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json @@ -5,14 +5,14 @@ "name": "Qwen2.5-7B-Instruct-GPTQ-Int8", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8", "--gpu-memory-utilization", - "0.80", + "0.99", "--max-model-len", - "1000" + "500" ], "resources": { "CPU": 20000, @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ @@ -46,7 +50,7 @@ "path": "v1/completions", "body": { "model": "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8", - "max_tokens": "800", + "max_tokens": "300", "temperature": "0", "stream": "true" } diff --git a/config/Qwen2.5-7B.json b/config/Qwen2.5-7B.json index 40efe67..2a6530a 100644 --- a/config/Qwen2.5-7B.json +++ b/config/Qwen2.5-7B.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "models--Qwen--Qwen2.5-7B", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-7B", diff --git a/config/Qwen2.5-Coder-1.5B-Instruct.json b/config/Qwen2.5-Coder-1.5B-Instruct.json index c25eca5..c601e11 100644 --- a/config/Qwen2.5-Coder-1.5B-Instruct.json +++ b/config/Qwen2.5-Coder-1.5B-Instruct.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Coder-1.5B-Instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Coder-1.5B-Instruct", @@ -14,7 +14,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -25,6 +25,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json b/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json index 35ea7cf..7bd0234 100644 --- a/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json +++ b/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Coder-14B-Instruct-GPTQ-Int8", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Coder-3B.json b/config/Qwen2.5-Coder-3B.json index 5180d94..073392b 100644 --- a/config/Qwen2.5-Coder-3B.json +++ b/config/Qwen2.5-Coder-3B.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Coder-3B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Coder-3B", @@ -14,7 +14,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -25,6 +25,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Coder-7B-Instruct.json b/config/Qwen2.5-Coder-7B-Instruct.json index 0319fb4..4b7c54d 100644 --- a/config/Qwen2.5-Coder-7B-Instruct.json +++ b/config/Qwen2.5-Coder-7B-Instruct.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Coder-7B-Instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Coder-7B-Instruct", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Math-1.5B-Instruct.json b/config/Qwen2.5-Math-1.5B-Instruct.json index fc094e3..f09c644 100644 --- a/config/Qwen2.5-Math-1.5B-Instruct.json +++ b/config/Qwen2.5-Math-1.5B-Instruct.json @@ -5,14 +5,14 @@ "name": "Qwen2.5-Math-1.5B-Instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Math-1.5B-Instruct" ], "resources": { "CPU": 12000, - "Mem": 20000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -23,6 +23,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Math-1.5B.json b/config/Qwen2.5-Math-1.5B.json index 867e906..c8a1472 100644 --- a/config/Qwen2.5-Math-1.5B.json +++ b/config/Qwen2.5-Math-1.5B.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Math-1.5B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Math-1.5B", @@ -15,7 +15,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Math-7B-Instruct.json b/config/Qwen2.5-Math-7B-Instruct.json index ac129d2..7bb0575 100644 --- a/config/Qwen2.5-Math-7B-Instruct.json +++ b/config/Qwen2.5-Math-7B-Instruct.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Math-7B-Instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Math-7B-Instruct", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen2.5-Math-7B.json b/config/Qwen2.5-Math-7B.json index e80973a..1d06e9b 100644 --- a/config/Qwen2.5-Math-7B.json +++ b/config/Qwen2.5-Math-7B.json @@ -5,7 +5,7 @@ "name": "Qwen2.5-Math-7B", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-Math-7B", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/Qwen7BInt8.json b/config/Qwen7BInt8.json index c6f752e..f072729 100644 --- a/config/Qwen7BInt8.json +++ b/config/Qwen7BInt8.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "Qwen", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8", diff --git a/config/TinyLlama-1.1B-Chat-v1.0.json b/config/TinyLlama-1.1B-Chat-v1.0.json index 0f5b444..f2e8465 100644 --- a/config/TinyLlama-1.1B-Chat-v1.0.json +++ b/config/TinyLlama-1.1B-Chat-v1.0.json @@ -5,7 +5,7 @@ "name": "TinyLlama-1.1B-Chat-v1.0", "object": { "spec": { - "image": "vllm/vllm-openai:v0.4.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", @@ -15,17 +15,21 @@ ], "resources": { "CPU": 20000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, - "vRam": 4500 + "vRam": 4800 } }, "envs": [ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/TinyLlama-1.1B-Chat-v1.0_13GB.json b/config/TinyLlama-1.1B-Chat-v1.0_13GB.json index e360214..76c8e99 100644 --- a/config/TinyLlama-1.1B-Chat-v1.0_13GB.json +++ b/config/TinyLlama-1.1B-Chat-v1.0_13GB.json @@ -5,7 +5,7 @@ "name": "TinyLlama-1.1B-Chat-v1.0_13GB", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", @@ -15,7 +15,7 @@ ], "resources": { "CPU": 20000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json b/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json index 453dbdc..4957523 100644 --- a/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json +++ b/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json @@ -5,7 +5,7 @@ "name": "TinyLlama-1.1B-Chat-v1.0_2gpu", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", diff --git a/config/XVERSE-13B-Chat.json b/config/XVERSE-13B-Chat.json index 0897a02..bde8da0 100644 --- a/config/XVERSE-13B-Chat.json +++ b/config/XVERSE-13B-Chat.json @@ -5,7 +5,7 @@ "name": "XVERSE-13B-Chat", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "xverse/XVERSE-13B-Chat", diff --git a/config/XVERSE-7B-Chat.json b/config/XVERSE-7B-Chat.json index 2c4c539..04ad887 100644 --- a/config/XVERSE-7B-Chat.json +++ b/config/XVERSE-7B-Chat.json @@ -5,7 +5,7 @@ "name": "XVERSE-7B-Chat", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "xverse/XVERSE-7B-Chat", diff --git a/config/chatglm3-6b-128k.json b/config/chatglm3-6b-128k.json index c79da13..4ecbd4e 100644 --- a/config/chatglm3-6b-128k.json +++ b/config/chatglm3-6b-128k.json @@ -5,7 +5,7 @@ "name": "chatglm3-6b-128k", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "THUDM/chatglm3-6b-128k", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 12000, - "Mem": 20000, + "Mem": 28000, "GPU": { "Type": "Any", "Count": 1, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/chatglm3-6b-32k.json b/config/chatglm3-6b-32k.json index 5fe2189..7e24977 100644 --- a/config/chatglm3-6b-32k.json +++ b/config/chatglm3-6b-32k.json @@ -5,7 +5,7 @@ "name": "chatglm3-6b-32k", "object": { "spec": { - "image": "vllm/vllm-openai:v0.4.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "THUDM/chatglm3-6b-32k", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 12000, - "Mem": 20000, + "Mem": 28000, "GPU": { "Type": "Any", "Count": 1, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/chatglm3-6b.json b/config/chatglm3-6b.json index 936078a..94667b1 100644 --- a/config/chatglm3-6b.json +++ b/config/chatglm3-6b.json @@ -5,7 +5,7 @@ "name": "chatglm3-6b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "THUDM/chatglm3-6b", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 12000, - "Mem": 20000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/core42_jais-13b-bnb-4bit.json b/config/core42_jais-13b-bnb-4bit.json index 6341ae7..582d914 100644 --- a/config/core42_jais-13b-bnb-4bit.json +++ b/config/core42_jais-13b-bnb-4bit.json @@ -5,7 +5,7 @@ "name": "core42_jais-13b-bnb-4bit", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "jwnder/core42_jais-13b-bnb-4bit", diff --git a/config/core42_jais-13b-chat-bnb-4bit.json b/config/core42_jais-13b-chat-bnb-4bit.json index a87c7fa..47bcd0c 100644 --- a/config/core42_jais-13b-chat-bnb-4bit.json +++ b/config/core42_jais-13b-chat-bnb-4bit.json @@ -5,7 +5,7 @@ "name": "core42_jais-13b-chat-bnb-4bit", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "jwnder/core42_jais-13b-chat-bnb-4bit", diff --git a/config/deepseek-llm-7b-chat.json b/config/deepseek-llm-7b-chat.json index efa21d0..5b2f038 100644 --- a/config/deepseek-llm-7b-chat.json +++ b/config/deepseek-llm-7b-chat.json @@ -5,7 +5,7 @@ "name": "deepseek-llm-7b-chat", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "/root/.cache/huggingface/git/deepseek-llm-7b-chat", @@ -32,6 +32,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/deepseek-llm-7b-chat_2gpu.json b/config/deepseek-llm-7b-chat_2gpu.json index 6adaf67..d6c84ed 100644 --- a/config/deepseek-llm-7b-chat_2gpu.json +++ b/config/deepseek-llm-7b-chat_2gpu.json @@ -5,7 +5,7 @@ "name": "deepseek-llm-7b-chat_2gpu", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "/root/.cache/huggingface/git/deepseek-llm-7b-chat", @@ -30,6 +30,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/deepseek-math-7b-instruct.json b/config/deepseek-math-7b-instruct.json index c52d852..1568692 100644 --- a/config/deepseek-math-7b-instruct.json +++ b/config/deepseek-math-7b-instruct.json @@ -5,7 +5,7 @@ "name": "deepseek-math-7b-instruct", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "/root/.cache/huggingface/git/deepseek-math-7b-instruct", @@ -30,6 +30,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/dolly-v2-12b.json b/config/dolly-v2-12b.json index da20a3f..6f90392 100644 --- a/config/dolly-v2-12b.json +++ b/config/dolly-v2-12b.json @@ -5,7 +5,7 @@ "name": "dolly-v2-12b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "databricks/dolly-v2-12b", @@ -31,6 +31,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/falcon-7b.json b/config/falcon-7b.json index e245b2c..a9cabca 100644 --- a/config/falcon-7b.json +++ b/config/falcon-7b.json @@ -5,7 +5,7 @@ "name": "falcon-7b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "tiiuae/falcon-7b", diff --git a/config/falcon-rw-7b.json b/config/falcon-rw-7b.json index 6c0ec42..157fab8 100644 --- a/config/falcon-rw-7b.json +++ b/config/falcon-rw-7b.json @@ -5,7 +5,7 @@ "name": "falcon-rw-7b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "tiiuae/falcon-rw-7b", @@ -27,6 +27,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/gemma-7b.json b/config/gemma-7b.json index 916e0f2..08f15cc 100644 --- a/config/gemma-7b.json +++ b/config/gemma-7b.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "gemma-7b", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "google/gemma-7b", diff --git a/config/gpt-j-6b.json b/config/gpt-j-6b.json index fc989bf..0d54647 100644 --- a/config/gpt-j-6b.json +++ b/config/gpt-j-6b.json @@ -5,7 +5,7 @@ "name": "gpt-j-6b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "EleutherAI/gpt-j-6b", diff --git a/config/gpt2-xl.json b/config/gpt2-xl.json index 1c1ca30..32cd1d9 100644 --- a/config/gpt2-xl.json +++ b/config/gpt2-xl.json @@ -5,7 +5,7 @@ "name": "gpt2-xl", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "openai-community/gpt2-xl", @@ -15,7 +15,7 @@ ], "resources": { "CPU": 12000, - "Mem": 18000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/gpt4all-j.json b/config/gpt4all-j.json index 02c9bcf..9bbf9e2 100644 --- a/config/gpt4all-j.json +++ b/config/gpt4all-j.json @@ -5,7 +5,7 @@ "name": "gpt4all-j", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "nomic-ai/gpt4all-j", @@ -17,7 +17,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 60000, "GPU": { "Type": "Any", "Count": 2, @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/internlm2-7b.json b/config/internlm2-7b.json index 65b19b9..e9ade3d 100644 --- a/config/internlm2-7b.json +++ b/config/internlm2-7b.json @@ -5,7 +5,7 @@ "name": "internlm2-7b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "internlm/internlm2-7b", diff --git a/config/internlm2_5-7b-chat.json b/config/internlm2_5-7b-chat.json index 67aab7e..a7ba978 100644 --- a/config/internlm2_5-7b-chat.json +++ b/config/internlm2_5-7b-chat.json @@ -5,7 +5,7 @@ "name": "internlm2_5-7b-chat", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "internlm/internlm2_5-7b-chat", diff --git a/config/llama_8BInt8.json b/config/llama_8BInt8.json index 5fc3408..16831b4 100644 --- a/config/llama_8BInt8.json +++ b/config/llama_8BInt8.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "TinyLlama-1.1B-Chat-v1.0_2gpu", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "meta-llama/Llama-Guard-3-8B-INT8", diff --git a/config/mamba-1.4b-hf.json b/config/mamba-1.4b-hf.json index 632bf51..5c747a7 100644 --- a/config/mamba-1.4b-hf.json +++ b/config/mamba-1.4b-hf.json @@ -5,7 +5,7 @@ "name": "mamba-1.4b-hf", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "state-spaces/mamba-1.4b-hf", diff --git a/config/mistral.json b/config/mistral.json index 9ab0172..3d3644c 100644 --- a/config/mistral.json +++ b/config/mistral.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "mistral", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "mistralai/Mistral-7B-v0.1", diff --git a/config/models.txt b/config/models.txt index 92e83a7..8cebe05 100644 --- a/config/models.txt +++ b/config/models.txt @@ -185,9 +185,6 @@ export IFERX_PASSWORD="test" /opt/inferx/bin/ixctl create deepseek-llm-7b-chat.json /opt/inferx/bin/ixctl create deepseek-llm-7b-chat_2gpu.json -/opt/inferx/bin/ixctl update deepseek-llm-7b-chat.json -/opt/inferx/bin/ixctl update deepseek-llm-7b-chat_2gpu.json - /opt/inferx/bin/ixctl create DeepSeek-R1-Distill-Llama-8B.json /opt/inferx/bin/ixctl create DeepSeek-R1-Distill-Qwen-1.5B.json /opt/inferx/bin/ixctl create DeepSeek-R1-Distill-Qwen-7B.json diff --git a/config/mpt-7b-storywriter.json b/config/mpt-7b-storywriter.json index 4f14c3f..9563700 100644 --- a/config/mpt-7b-storywriter.json +++ b/config/mpt-7b-storywriter.json @@ -5,7 +5,7 @@ "name": "mpt-7b-storywriter", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "mosaicml/mpt-7b-storywriter", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/mpt-7b.json b/config/mpt-7b.json index fdc3389..29f8c9e 100644 --- a/config/mpt-7b.json +++ b/config/mpt-7b.json @@ -5,7 +5,7 @@ "name": "mpt-7b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "mosaicml/mpt-7b", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/oasst-sft-4-pythia-12b-epoch-3.5.json b/config/oasst-sft-4-pythia-12b-epoch-3.5.json index 9ebd83a..4228ae7 100644 --- a/config/oasst-sft-4-pythia-12b-epoch-3.5.json +++ b/config/oasst-sft-4-pythia-12b-epoch-3.5.json @@ -5,7 +5,7 @@ "name": "oasst-sft-4-pythia-12b-epoch-3.5", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 60000, "GPU": { "Type": "Any", "Count": 2, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/opt-iml-max-1.3b.json b/config/opt-iml-max-1.3b.json index 525ddbd..d327d23 100644 --- a/config/opt-iml-max-1.3b.json +++ b/config/opt-iml-max-1.3b.json @@ -5,7 +5,7 @@ "name": "opt-iml-max-1.3b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "facebook/opt-iml-max-1.3b", @@ -14,17 +14,21 @@ ], "resources": { "CPU": 12000, - "Mem": 15000, + "Mem": 24000, "GPU": { "Type": "Any", "Count": 1, - "vRam": 3800 + "vRam": 4500 } }, "envs": [ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/persimmon-8b-base.json b/config/persimmon-8b-base.json index 04e9e04..9ee813d 100644 --- a/config/persimmon-8b-base.json +++ b/config/persimmon-8b-base.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "persimmon-8b-base", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "adept/persimmon-8b-base", diff --git a/config/persimmon-8b-chat.json b/config/persimmon-8b-chat.json index 1babc57..e980ac1 100644 --- a/config/persimmon-8b-chat.json +++ b/config/persimmon-8b-chat.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "persimmon-8b-chat", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "adept/persimmon-8b-chat", diff --git a/config/pythia-12b.json b/config/pythia-12b.json index 5ebda70..032a7ca 100644 --- a/config/pythia-12b.json +++ b/config/pythia-12b.json @@ -5,7 +5,7 @@ "name": "pythia-12b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "EleutherAI/pythia-12b", @@ -18,7 +18,7 @@ ], "resources": { "CPU": 20000, - "Mem": 50000, + "Mem": 60000, "GPU": { "Type": "Any", "Count": 2, @@ -29,6 +29,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/reader.json b/config/reader.json index 9a51249..56ead1e 100644 --- a/config/reader.json +++ b/config/reader.json @@ -4,7 +4,7 @@ "namespace": "ns1", "name": "reader-lm", "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "jinaai/reader-lm-1.5b", diff --git a/config/stablelm-3b-4e1t.json b/config/stablelm-3b-4e1t.json index 56eff6c..5a9435f 100644 --- a/config/stablelm-3b-4e1t.json +++ b/config/stablelm-3b-4e1t.json @@ -5,7 +5,7 @@ "name": "stablelm-3b-4e1t", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "stabilityai/stablelm-3b-4e1t", diff --git a/config/stablelm-tuned-alpha-7b.json b/config/stablelm-tuned-alpha-7b.json index ec2c2fe..edb2c8c 100644 --- a/config/stablelm-tuned-alpha-7b.json +++ b/config/stablelm-tuned-alpha-7b.json @@ -5,7 +5,7 @@ "name": "stablelm-tuned-alpha-7b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "stabilityai/stablelm-tuned-alpha-7b", diff --git a/config/starcoder2-3b.json b/config/starcoder2-3b.json index 4f025bc..1400ee2 100644 --- a/config/starcoder2-3b.json +++ b/config/starcoder2-3b.json @@ -5,7 +5,7 @@ "name": "starcoder2-3b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "bigcode/starcoder2-3b", @@ -26,6 +26,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/config/starcoder2-7b.json b/config/starcoder2-7b.json index 477d467..ad3c644 100644 --- a/config/starcoder2-7b.json +++ b/config/starcoder2-7b.json @@ -5,7 +5,7 @@ "name": "starcoder2-7b", "object": { "spec": { - "image": "vllm/vllm-openai:v0.6.2", + "image": "vllm/vllm-openai:v0.7.3", "commands": [ "--model", "bigcode/starcoder2-7b", @@ -28,6 +28,10 @@ [ "LD_LIBRARY_PATH", "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH" + ], + [ + "VLLM_CUDART_SO_PATH", + "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12" ] ], "mounts": [ diff --git a/dashboard/app.py b/dashboard/app.py index 7b5e5a4..f7b4d81 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -39,22 +39,34 @@ import logging import sys +import multiprocessing from werkzeug.middleware.proxy_fix import ProxyFix -logger = logging.getLogger('gunicorn.error') -sys.stdout = sys.stderr = logger.handlers[0].stream +# logger = logging.getLogger('gunicorn.error') +# sys.stdout = sys.stderr = logger.handlers[0].stream app = Flask(__name__) app.secret_key = os.environ.get("FLASK_SECRET", "supersecret") +def configure_logging(): + if "gunicorn" in multiprocessing.current_process().name.lower(): + logger = logging.getLogger('gunicorn.error') + if logger.handlers: + sys.stdout = sys.stderr = logger.handlers[0].stream + app.logger.info("Redirecting stdout/stderr to Gunicorn logger.") + else: + app.logger.info("Running standalone Flask — no stdout/stderr redirection.") + +configure_logging() -KEYCLOAK_URL = os.getenv('KEYCLOAK_URL', "http://192.168.0.22:81/authn") + +KEYCLOAK_URL = os.getenv('KEYCLOAK_URL', "http://192.168.0.22:31260/authn") KEYCLOAK_REALM_NAME = os.getenv('KEYCLOAK_REALM_NAME', "inferx") KEYCLOAK_CLIENT_ID = os.getenv('KEYCLOAK_CLIENT_ID', "infer_client") -KEYCLOAK_CLIENT_SECRET = os.getenv('KEYCLOAK_CLIENT_SECRET', "SJvfmGFViBNHsLfhkto4eRE0PnPhpyft") +KEYCLOAK_CLIENT_SECRET = os.getenv('KEYCLOAK_CLIENT_SECRET', "M2Dse5531tdtyipZdGizLEeoOVgziQRX") server_metadata_url = f"{KEYCLOAK_URL}/realms/{KEYCLOAK_REALM_NAME}/.well-known/openid-configuration" @@ -81,7 +93,7 @@ tls = False -apihostaddr = "http://localhost:4000" +apihostaddr = os.getenv('INFERX_APIGW_ADDR', "http://localhost:4000") # apihostaddr = "https://quarksoft.io:4000" def is_token_expired(): @@ -197,7 +209,7 @@ def logout(): f"id_token_hint={id_token}" ) -def getapkkeys(): +def getapikeys(): access_token = session.get('token')['access_token'] # Include the access token in the Authorization header headers = {'Authorization': f'Bearer {access_token}'} @@ -208,20 +220,20 @@ def getapkkeys(): return apikeys -@app.route('/apikeys') +@app.route('/admin') @require_login def apikeys(): - apikeys = getapkkeys() return render_template( - "apikey.html", apikeys=apikeys + "admin.html" ) @app.route('/generate_apikeys', methods=['GET']) @require_login def generate_apikeys(): - apikeys = getapkkeys() + apikeys = getapikeys() return apikeys + @app.route('/apikeys', methods=['PUT']) @require_login def create_apikey(): @@ -319,6 +331,29 @@ def getnode(name: str): return func +def listtenants(): + access_token = session.get('access_token', '') + if access_token == "": + headers = {} + else: + headers = {'Authorization': f'Bearer {access_token}'} + url = "{}/objects/tenant/system/system/".format(apihostaddr) + resp = requests.get(url, headers=headers) + tenants = json.loads(resp.content) + + return tenants + +def listnamespaces(): + access_token = session.get('access_token', '') + if access_token == "": + headers = {} + else: + headers = {'Authorization': f'Bearer {access_token}'} + url = "{}/objects/namespace///".format(apihostaddr) + resp = requests.get(url, headers=headers) + namespaces = json.loads(resp.content) + + return namespaces def listpods(tenant: str, namespace: str, funcname: str): access_token = session.get('access_token', '') @@ -460,6 +495,25 @@ def text2img(): headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers] return Response(resp.iter_content(1024000), resp.status_code, headers) +@app.route('/generate_tenants', methods=['GET']) +@require_login +def generate_tenants(): + tenants = listtenants() + print("tenants ", tenants) + return tenants + +@app.route('/generate_namespaces', methods=['GET']) +@require_login +def generate_namespaces(): + namespaces = listnamespaces() + print("namespaces ", namespaces) + return namespaces + +@app.route('/generate_funcs', methods=['GET']) +@require_login +def generate_funcs(): + funcs = listfuncs("", "") + return funcs @app.route('/generate', methods=['POST']) @not_require_login @@ -574,19 +628,56 @@ def proxy(path): data=request.get_data(), cookies=request.cookies, allow_redirects=False, + timeout=60, stream=True ) except requests.exceptions.RequestException as e: return Response(f"Error connecting to backend server: {e}", status=502) # Exclude hop-by-hop headers as per RFC 2616 section 13.5.1 - excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] + excluded_headers = ['content-encoding', 'transfer-encoding', 'connection'] headers = [(name, value) for name, value in resp.raw.headers.items() if name.lower() not in excluded_headers] # Create a Flask response object with the backend server's response response = Response(stream_response(resp), resp.status_code, headers) return response +@app.route('/proxy1/', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS']) +@require_login +def proxy1(path): + access_token = session.get('access_token', '') + headers = {key: value for key, value in request.headers if key.lower() != 'host'} + if access_token != "": + headers["Authorization"] = f'Bearer {access_token}' + + # Construct the full URL for the backend request + url = f"{apihostaddr}/{path}" + + try: + resp = requests.request( + method=request.method, + url=url, + headers=headers, + params=request.args, + data=request.get_data(), + cookies=request.cookies, + allow_redirects=False, + timeout=60, + stream=False + ) + except requests.exceptions.RequestException as e: + print("error ....") + return Response(f"Error connecting to backend server: {e}", status=502, mimetype='text/plain') + + response = Response(resp.content, resp.status_code, mimetype='text/plain') + # for name, value in resp.headers.items(): + # if name.lower() not in ['content-encoding', 'transfer-encoding', 'connection']: + # response.headers[name] = value + + return response + + + @app.route("/intro") def md(): name = request.args.get("name") @@ -679,14 +770,13 @@ def GetFunc(): sample = func["func"]["object"]["spec"]["sample_query"] map = sample["body"] apiType = sample["apiType"] + isAdmin = func["isAdmin"] version = func["func"]["object"]["spec"]["version"] fails = GetFailLogs(tenant, namespace, name, version) # Convert Python dictionary to pretty JSON string funcspec = json.dumps(func["func"]["object"]["spec"], indent=4) - funcspec = funcspec.replace("\n", "
") - funcspec = funcspec.replace(" ", " ") return render_template( "func.html", @@ -698,6 +788,7 @@ def GetFunc(): funcspec=funcspec, apiType=apiType, map=map, + isAdmin=isAdmin, path=sample["path"] ) diff --git a/dashboard/nginx.conf b/dashboard/nginx.conf index 6837911..ea084c7 100644 --- a/dashboard/nginx.conf +++ b/dashboard/nginx.conf @@ -34,7 +34,7 @@ server { } location /authn/ { - proxy_pass http://localhost:1260/authn/; + proxy_pass http://localhost:31260/authn/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -57,59 +57,59 @@ server { } } -server { - listen 443 ssl; - server_name inferx.net; - - # SSL Certificates - ssl_certificate /etc/letsencrypt/live/inferx.net/fullchain.pem; # Or Let's Encrypt: /etc/letsencrypt/live/yourd> - ssl_certificate_key /etc/letsencrypt/live/inferx.net/privkey.pem; # Or Let's Encrypt: /etc/letsencrypt/live/you> - - ssl_protocols TLSv1.2 TLSv1.3; - ssl_ciphers HIGH:!aNULL:!MD5; - ssl_prefer_server_ciphers on; - - location /public/ { - autoindex on; - alias /public/; - } - - location ~ \.\. { - deny all; - } - - location /funccall/ { - proxy_pass http://localhost:4000/funccall/; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; +# server { +# listen 443 ssl; +# server_name inferx.net; + +# # SSL Certificates +# ssl_certificate /etc/letsencrypt/live/inferx.net/fullchain.pem; # Or Let's Encrypt: /etc/letsencrypt/live/yourd> +# ssl_certificate_key /etc/letsencrypt/live/inferx.net/privkey.pem; # Or Let's Encrypt: /etc/letsencrypt/live/you> + +# ssl_protocols TLSv1.2 TLSv1.3; +# ssl_ciphers HIGH:!aNULL:!MD5; +# ssl_prefer_server_ciphers on; + +# location /public/ { +# autoindex on; +# alias /public/; +# } + +# location ~ \.\. { +# deny all; +# } + +# location /funccall/ { +# proxy_pass http://localhost:4000/funccall/; +# proxy_set_header Host $host; +# proxy_set_header X-Real-IP $remote_addr; +# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header X-Forwarded-Proto $scheme; - proxy_buffering off; - proxy_request_buffering off; - proxy_http_version 1.1; - chunked_transfer_encoding on; - } - - # location /authn/ { - # proxy_pass http://localhost:1260/authn/; - # proxy_set_header Host $host; - # proxy_set_header X-Real-IP $remote_addr; - # proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - # proxy_set_header X-Forwarded-Proto $scheme; - # proxy_set_header X-Forwarded-Port 8000; - # } - - location / { - proxy_pass http://127.0.0.1:1250; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - proxy_buffering off; - proxy_request_buffering off; - proxy_http_version 1.1; - chunked_transfer_encoding on; - } -} +# proxy_buffering off; +# proxy_request_buffering off; +# proxy_http_version 1.1; +# chunked_transfer_encoding on; +# } + +# # location /authn/ { +# # proxy_pass http://localhost:31260/authn/; +# # proxy_set_header Host $host; +# # proxy_set_header X-Real-IP $remote_addr; +# # proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# # proxy_set_header X-Forwarded-Proto $scheme; +# # proxy_set_header X-Forwarded-Port 8000; +# # } + +# location / { +# proxy_pass http://127.0.0.1:1250; +# proxy_set_header Host $host; +# proxy_set_header X-Real-IP $remote_addr; +# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header X-Forwarded-Proto $scheme; + +# proxy_buffering off; +# proxy_request_buffering off; +# proxy_http_version 1.1; +# chunked_transfer_encoding on; +# } +# } diff --git a/dashboard/templates/admin.html b/dashboard/templates/admin.html new file mode 100644 index 0000000..8efed14 --- /dev/null +++ b/dashboard/templates/admin.html @@ -0,0 +1,590 @@ +{% extends 'base.html' %} + +{% block content %} + + + +
+ + + + +
+ +
+ + + + + + + + + + + + + +
SelectNameUsernameApikey
+ +
+ +
+ +

Add Apikey

+
+ + +
+
+ +
+ + + + + + + + + + + + + +
SelectTenantNamespaceName
+ +
+ +
+ +

Add model

+
+ Namespace + + Model name + +
+ +
+ +
+
+ +
+ + + + + + + + + + + + +
SelectTenantName
+ +
+ +
+ +

Add Namespace

+ +
+ Tenant + +
+
+ + +
+
+ +
+ + + + + + + + + + + +
SelectName
+ +
+ +
+ +

Add Tenant

+
+ + +
+
+ + + + + + + +{{ hosturl }} +{% endblock %} \ No newline at end of file diff --git a/dashboard/templates/apikey.html b/dashboard/templates/apikey.html deleted file mode 100644 index e8ea855..0000000 --- a/dashboard/templates/apikey.html +++ /dev/null @@ -1,126 +0,0 @@ -{% extends 'base.html' %} - -{% block content %} -

Apikey

- - - - - - - - - - - - - - -
SelectNameUsernameApikey
- -
- -
- - -

Add Apikey

-
- - -
- - - -{{ hosturl }} -{% endblock %} \ No newline at end of file diff --git a/dashboard/templates/base.html b/dashboard/templates/base.html index 7021d54..4fb42b5 100644 --- a/dashboard/templates/base.html +++ b/dashboard/templates/base.html @@ -44,13 +44,13 @@

    --   Serve tens models in one box with ultra-fa Pods Snapshots Nodes - Apikeys + Admin