diff --git a/Makefile b/Makefile
index 2b94b81..b6d0212 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 ARCH := ${shell uname -m}
-
-LOCAL_IP=${hostname -I | awk '{print $$1}' | xargs}
+VERSION := v0.1.1
+NODE_NAME=${shell hostname}
 
 all: ctl dash spdk runmodel
 
@@ -14,14 +14,13 @@ dash:
 	-rm ./target/dashboard/* -rf
 	cp ./dashboard/* ./target/dashboard -rL
 	cp ./deployment/dashboard.Dockerfile ./target/dashboard/Dockerfile
-	-sudo docker image rm inferx/inferx_dashboard:v0.1.0
-	sudo docker build -t inferx/inferx_dashboard:v0.1.0 ./target/dashboard
-	# sudo docker push inferx/inferx_dashboard:v0.1.0
+	-sudo docker image rm inferx/inferx_dashboard:$(VERSION)
+	sudo docker build -t inferx/inferx_dashboard:$(VERSION) ./target/dashboard
 
 pushdash:
 	# sudo docker login -u inferx
-	sudo docker tag inferx/inferx_dashboard:v0.1.0 inferx/inferx_dashboard:v0.1.0
-	sudo docker push inferx/inferx_dashboard:v0.1.0
+	sudo docker tag inferx/inferx_dashboard:$(VERSION) inferx/inferx_dashboard:$(VERSION)
+	sudo docker push inferx/inferx_dashboard:$(VERSION)
 
 runmodel:
 	mkdir -p ./target/runmodel
@@ -29,30 +28,30 @@ runmodel:
 	cp ./script/run_llava.py ./target/runmodel
 	cp ./script/run_stablediffusion.py ./target/runmodel
 	cp ./deployment/vllm-opai.Dockerfile ./target/runmodel/Dockerfile
-	-sudo docker image rm vllm-openai-upgraded:v0.1.0
-	sudo docker build -t vllm-openai-upgraded:v0.1.0 ./target/runmodel
+	-sudo docker image rm vllm-openai-upgraded:$(VERSION)
+	sudo docker build -t vllm-openai-upgraded:$(VERSION) ./target/runmodel
 
 spdk:
 	mkdir -p ./target/spdk
 	-rm ./target/spdk/* -rf
 	cp ./deployment/spdk.Dockerfile ./target/spdk/Dockerfile
-	-sudo docker image rm inferx/spdk-container:v0.1.0
-	sudo docker build -t inferx/spdk-container:v0.1.0 ./target/spdk
+	-sudo docker image rm inferx/spdk-container:$(VERSION)
+	sudo docker build -t inferx/spdk-container:$(VERSION) ./target/spdk
 
 spdk2:
 	mkdir -p ./target/spdk
 	-rm ./target/spdk/* -rf
 	cp ./deployment/spdk2.Dockerfile ./target/spdk/Dockerfile
 	cp ./deployment/spdk.script ./target/spdk/entrypoint.sh
-	-sudo docker image rm inferx/spdk-container2:v0.1.0
-	sudo docker build -t inferx/spdk-container2:v0.1.0 ./target/spdk
+	-sudo docker image rm inferx/spdk-container2:$(VERSION)
+	sudo docker build -t inferx/spdk-container2:$(VERSION) ./target/spdk
 
 pushspdk:
 	# sudo docker login -u inferx
-	sudo docker tag inferx/spdk-container:v0.1.0 inferx/spdk-container:v0.1.0
-	sudo docker push inferx/spdk-container:v0.1.0
-	sudo docker tag inferx/spdk-container2:v0.1.0 inferx/spdk-container2:v0.1.0
-	sudo docker push inferx/spdk-container2:v0.1.0
+	sudo docker tag inferx/spdk-container:$(VERSION) inferx/spdk-container:$(VERSION)
+	sudo docker push inferx/spdk-container:$(VERSION)
+	sudo docker tag inferx/spdk-container2:$(VERSION) inferx/spdk-container2:$(VERSION)
+	sudo docker push inferx/spdk-container2:$(VERSION)
 sql:
 	sudo cp ./dashboard/sql/create_table.sql /opt/inferx/config
 	sudo cp ./dashboard/sql/secret.sql /opt/inferx/config
@@ -60,6 +59,8 @@ sql:
 run:
 	-sudo pkill -9 inferx
 	@echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env
+	@echo "Version=$(VERSION)" >> .env
+	@echo "HOSTNAME=$(NODE_NAME)" >> .env
 	sudo docker compose -f docker-compose.yml  build
 	- sudo rm -f /opt/inferx/log/inferx.log
 	- sudo rm -f /opt/inferx/log/onenode.log
@@ -68,11 +69,14 @@ run:
 
 runblob:
 	-sudo pkill -9 inferx
-	@echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env
+	@echo "LOCAL_IP=$$(hostname -I | tr ' ' '\n' | grep -v '^172\.' | head -n 1 | xargs)" > .env
+	@echo "Version=$(VERSION)" >> .env
+	@echo "HOSTNAME=$(NODE_NAME)" >> .env
 	sudo docker compose -f docker-compose_blob.yml  build
 	- sudo rm -f /opt/inferx/log/inferx.log
 	- sudo rm -f /opt/inferx/log/onenode.log
 	sudo docker compose -f docker-compose_blob.yml up -d --remove-orphans
+	cat .env
 	rm .env
 
 stop:
@@ -82,7 +86,24 @@ stopblob:
 	sudo docker compose -f docker-compose_blob.yml down
 
 rundash:
-	sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm  inferx/inferx_dashboard:v0.1.0
+	sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm  inferx/inferx_dashboard:$(VERSION)
 
 stopdash:
-	sudo docker stop inferx_dashboard
\ No newline at end of file
+	sudo docker stop inferx_dashboard
+
+runkblob:
+	sudo kubectl apply -f k8s/spdk.yaml
+	sudo kubectl apply -f k8s/etcd.yaml
+	sudo kubectl apply -f k8s/secretdb.yaml
+	sudo kubectl apply -f k8s/db-deployment.yaml
+	sudo kubectl apply -f k8s/keycloak_postgres.yaml
+	sudo kubectl apply -f k8s/keycloak.yaml
+	sudo kubectl apply -f k8s/statesvc.yaml
+	sudo kubectl apply -f k8s/scheduler.yaml
+	sudo kubectl apply -f k8s/nodeagent.yaml
+	sudo kubectl apply -f k8s/dashboard.yaml
+	sudo kubectl apply -f k8s/ingress.yaml
+
+stopnodeagent:
+	sudo kubectl delete DaemonSet nodeagent-blob
+	sudo kubectl delete DaemonSet nodeagent-file
\ No newline at end of file
diff --git a/config/Aquila-7B.json b/config/Aquila-7B.json
index 92fcea5..373dce9 100644
--- a/config/Aquila-7B.json
+++ b/config/Aquila-7B.json
@@ -5,7 +5,7 @@
     "name": "Aquila-7B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "BAAI/Aquila-7B",
@@ -17,7 +17,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 60000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 2,
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Baichuan-7B.json b/config/Baichuan-7B.json
index 0afff2b..05cf918 100644
--- a/config/Baichuan-7B.json
+++ b/config/Baichuan-7B.json
@@ -5,7 +5,7 @@
     "name": "Baichuan-7B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "baichuan-inc/Baichuan-7B",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Baichuan2-13B-Chat-4bits.json b/config/Baichuan2-13B-Chat-4bits.json
index e8d0f7b..89d953c 100644
--- a/config/Baichuan2-13B-Chat-4bits.json
+++ b/config/Baichuan2-13B-Chat-4bits.json
@@ -5,7 +5,7 @@
     "name": "Baichuan2-13B-Chat-4bits",
     "object": {
         "spec": {
-            "image": "vllm-openai-upgraded:v.0.1",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "baichuan-inc/Baichuan2-13B-Chat-4bits",
@@ -16,17 +16,21 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 14000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
-                    "vRam": 8000
+                    "vRam": 13800
                 }
             },
             "envs": [
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Baichuan2-7B-Chat.json b/config/Baichuan2-7B-Chat.json
index bbb8b62..6b5e516 100644
--- a/config/Baichuan2-7B-Chat.json
+++ b/config/Baichuan2-7B-Chat.json
@@ -5,7 +5,7 @@
     "name": "Baichuan2-7B-Chat",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "baichuan-inc/Baichuan2-7B-Chat",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/DeciLM-7B-instruct.json b/config/DeciLM-7B-instruct.json
index edd1cb5..466f5f1 100644
--- a/config/DeciLM-7B-instruct.json
+++ b/config/DeciLM-7B-instruct.json
@@ -5,7 +5,7 @@
     "name": "DeciLM-7B-instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Deci/DeciLM-7B-instruct",
diff --git a/config/DeciLM-7B.json b/config/DeciLM-7B.json
index b36b838..f581d7d 100644
--- a/config/DeciLM-7B.json
+++ b/config/DeciLM-7B.json
@@ -5,7 +5,7 @@
     "name": "DeciLM-7B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Deci/DeciLM-7B",
diff --git a/config/DeepSeek-R1-Distill-Llama-8B.json b/config/DeepSeek-R1-Distill-Llama-8B.json
index 8372ca5..3da0445 100644
--- a/config/DeepSeek-R1-Distill-Llama-8B.json
+++ b/config/DeepSeek-R1-Distill-Llama-8B.json
@@ -5,7 +5,7 @@
     "name": "DeepSeek-R1-Distill-Llama-8B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "/root/.cache/huggingface/git/DeepSeek-R1-Distill-Llama-8B",
@@ -30,6 +30,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/DeepSeek-R1-Distill-Qwen-1.5B.json b/config/DeepSeek-R1-Distill-Qwen-1.5B.json
index 2c75845..209d9e1 100644
--- a/config/DeepSeek-R1-Distill-Qwen-1.5B.json
+++ b/config/DeepSeek-R1-Distill-Qwen-1.5B.json
@@ -5,7 +5,7 @@
     "name": "DeepSeek-R1-Distill-Qwen-1.5B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "/root/.cache/huggingface/git/DeepSeek-R1-Distill-Qwen-1.5B",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 60000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/DeepSeek-R1-Distill-Qwen-7B.json b/config/DeepSeek-R1-Distill-Qwen-7B.json
index b38d551..37fdbd8 100644
--- a/config/DeepSeek-R1-Distill-Qwen-7B.json
+++ b/config/DeepSeek-R1-Distill-Qwen-7B.json
@@ -5,7 +5,7 @@
     "name": "DeepSeek-R1-Distill-Qwen-7B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "/root/.cache/huggingface/git/DeepSeek-R1-Distill-Qwen-7B",
@@ -30,6 +30,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/EXAONE-3.0-7.8B-Instruct copy.json b/config/EXAONE-3.0-7.8B-Instruct copy.json
index 916e0f2..08f15cc 100644
--- a/config/EXAONE-3.0-7.8B-Instruct copy.json	
+++ b/config/EXAONE-3.0-7.8B-Instruct copy.json	
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "gemma-7b",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "google/gemma-7b",
diff --git a/config/EXAONE-3.0-7.8B-Instruct.json b/config/EXAONE-3.0-7.8B-Instruct.json
index b110f37..b6f029b 100644
--- a/config/EXAONE-3.0-7.8B-Instruct.json
+++ b/config/EXAONE-3.0-7.8B-Instruct.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "EXAONE-3.0-7.8B-Instruct",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
diff --git a/config/Llama-2-13b-hf.json b/config/Llama-2-13b-hf.json
index 9881ddf..1d7965a 100644
--- a/config/Llama-2-13b-hf.json
+++ b/config/Llama-2-13b-hf.json
@@ -5,7 +5,7 @@
     "name": "Llama-2-13b-hf",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "meta-llama/Llama-2-13b-hf",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 60000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 2,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Llama-3.2-3B-Instruct.json b/config/Llama-3.2-3B-Instruct.json
index 1b12c9c..85b4794 100644
--- a/config/Llama-3.2-3B-Instruct.json
+++ b/config/Llama-3.2-3B-Instruct.json
@@ -5,7 +5,7 @@
     "name": "Llama-3.2-3B-Instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "meta-llama/Llama-3.2-3B-Instruct",
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Llama-3.2-3B-Instruct_2gpu.json b/config/Llama-3.2-3B-Instruct_2gpu.json
index debbb39..5bb2563 100644
--- a/config/Llama-3.2-3B-Instruct_2gpu.json
+++ b/config/Llama-3.2-3B-Instruct_2gpu.json
@@ -5,7 +5,7 @@
     "name": "Llama-3.2-3B-Instruct_2gpu",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "meta-llama/Llama-3.2-3B-Instruct",
diff --git a/config/Meta-Llama-3-8B-Instruct.json b/config/Meta-Llama-3-8B-Instruct.json
index 3b2b070..fc33444 100644
--- a/config/Meta-Llama-3-8B-Instruct.json
+++ b/config/Meta-Llama-3-8B-Instruct.json
@@ -5,7 +5,7 @@
     "name": "Meta-Llama-3-8B-Instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "meta-llama/Meta-Llama-3-8B-Instruct",
diff --git a/config/Meta-Llama-3-8B.json b/config/Meta-Llama-3-8B.json
index 0ab141d..c4f9039 100644
--- a/config/Meta-Llama-3-8B.json
+++ b/config/Meta-Llama-3-8B.json
@@ -5,7 +5,7 @@
     "name": "Meta-Llama-3-8B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "meta-llama/Meta-Llama-3-8B",
diff --git a/config/MiniCPM-2B-dpo-bf16.json b/config/MiniCPM-2B-dpo-bf16.json
index e3266cc..538f09e 100644
--- a/config/MiniCPM-2B-dpo-bf16.json
+++ b/config/MiniCPM-2B-dpo-bf16.json
@@ -5,7 +5,7 @@
     "name": "MiniCPM-2B-dpo-bf16",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "openbmb/MiniCPM-2B-dpo-bf16",
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/MiniCPM-2B-sft-bf16.json b/config/MiniCPM-2B-sft-bf16.json
index f7758db..805834f 100644
--- a/config/MiniCPM-2B-sft-bf16.json
+++ b/config/MiniCPM-2B-sft-bf16.json
@@ -5,7 +5,7 @@
     "name": "MiniCPM-2B-sft-bf16",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "openbmb/MiniCPM-2B-sft-bf16",
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Minitron-8B-Base.json b/config/Minitron-8B-Base.json
index e007c48..dd6c8e0 100644
--- a/config/Minitron-8B-Base.json
+++ b/config/Minitron-8B-Base.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "Minitron-8B-Base",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "nvidia/Minitron-8B-Base",
diff --git a/config/Mistral-7B-Instruct-v0.1.json b/config/Mistral-7B-Instruct-v0.1.json
index cfe4b66..758fd0c 100644
--- a/config/Mistral-7B-Instruct-v0.1.json
+++ b/config/Mistral-7B-Instruct-v0.1.json
@@ -5,7 +5,7 @@
     "name": "Mistral-7B-Instruct-v0.1",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "mistralai/Mistral-7B-Instruct-v0.1",
diff --git a/config/Mistral-7B-v0.1.json b/config/Mistral-7B-v0.1.json
index 1dd22b8..c8f99b3 100644
--- a/config/Mistral-7B-v0.1.json
+++ b/config/Mistral-7B-v0.1.json
@@ -5,7 +5,7 @@
     "name": "Mistral-7B-v0.1",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "mistralai/Mistral-7B-v0.1",
diff --git a/config/Mistral-7B-v0.1_2gpu.json b/config/Mistral-7B-v0.1_2gpu.json
index b50bf0e..064ea57 100644
--- a/config/Mistral-7B-v0.1_2gpu.json
+++ b/config/Mistral-7B-v0.1_2gpu.json
@@ -5,7 +5,7 @@
     "name": "Mistral-7B-v0.1_2gpu",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "mistralai/Mistral-7B-v0.1",
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/OLMo-1B-hf.json b/config/OLMo-1B-hf.json
index 2d6c85c..70e6f1f 100644
--- a/config/OLMo-1B-hf.json
+++ b/config/OLMo-1B-hf.json
@@ -5,7 +5,7 @@
     "name": "OLMo-1B-hf",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "allenai/OLMo-1B-hf",
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/OLMo-1B-hf_2gpu.json b/config/OLMo-1B-hf_2gpu.json
index ab24284..ed0a264 100644
--- a/config/OLMo-1B-hf_2gpu.json
+++ b/config/OLMo-1B-hf_2gpu.json
@@ -5,7 +5,7 @@
     "name": "OLMo-1B-hf_2gpu",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "allenai/OLMo-1B-hf",
diff --git a/config/OLMo-7B-hf.json b/config/OLMo-7B-hf.json
index 9ac9198..2b85a1c 100644
--- a/config/OLMo-7B-hf.json
+++ b/config/OLMo-7B-hf.json
@@ -5,7 +5,7 @@
     "name": "OLMo-7B-hf",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "allenai/OLMo-7B-hf",
@@ -16,7 +16,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 70000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 2,
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/OLMoE-1B-7B-0924-Instruct.json b/config/OLMoE-1B-7B-0924-Instruct.json
index a6fdf32..a8636bd 100644
--- a/config/OLMoE-1B-7B-0924-Instruct.json
+++ b/config/OLMoE-1B-7B-0924-Instruct.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "OLMoE-1B-7B-0924-Instruct",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "allenai/OLMoE-1B-7B-0924-Instruct",
diff --git a/config/Phi-3-mini-128k-instruct.json b/config/Phi-3-mini-128k-instruct.json
index c153126..9cf650f 100644
--- a/config/Phi-3-mini-128k-instruct.json
+++ b/config/Phi-3-mini-128k-instruct.json
@@ -5,7 +5,7 @@
     "name": "Phi-3-mini-128k-instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "microsoft/Phi-3-mini-128k-instruct",
@@ -16,7 +16,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Phi-3-mini-4k-instruct.json b/config/Phi-3-mini-4k-instruct.json
index caa85fc..1eb3d6d 100644
--- a/config/Phi-3-mini-4k-instruct.json
+++ b/config/Phi-3-mini-4k-instruct.json
@@ -5,7 +5,7 @@
     "name": "Phi-3-mini-4k-instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "microsoft/Phi-3-mini-4k-instruct",
@@ -16,7 +16,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen-VL-Chat.json b/config/Qwen-VL-Chat.json
index f5907bc..6022df1 100644
--- a/config/Qwen-VL-Chat.json
+++ b/config/Qwen-VL-Chat.json
@@ -5,7 +5,7 @@
     "name": "Qwen-VL-Chat",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen-VL-Chat",
diff --git a/config/Qwen.json b/config/Qwen.json
index d895ed6..26625b7 100644
--- a/config/Qwen.json
+++ b/config/Qwen.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "Qwen",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "Qwen/Qwen2.5-3B-Instruct",
diff --git a/config/Qwen1.5-MoE-A2.7B.json b/config/Qwen1.5-MoE-A2.7B.json
index 67722be..f3b7b16 100644
--- a/config/Qwen1.5-MoE-A2.7B.json
+++ b/config/Qwen1.5-MoE-A2.7B.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "Qwen1.5-MoE-A2.7B",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "Qwen/Qwen1.5-MoE-A2.7B",
diff --git a/config/Qwen2.5-1.5B.json b/config/Qwen2.5-1.5B.json
index c4fa62a..8613ea4 100644
--- a/config/Qwen2.5-1.5B.json
+++ b/config/Qwen2.5-1.5B.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-1.5B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-1.5B",
@@ -15,7 +15,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-7B-Instruct-1M.json b/config/Qwen2.5-7B-Instruct-1M.json
index 4e5e5fa..ec782fd 100644
--- a/config/Qwen2.5-7B-Instruct-1M.json
+++ b/config/Qwen2.5-7B-Instruct-1M.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-7B-Instruct-1M",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-7B-Instruct-1M",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json b/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json
index df68dbb..ff25841 100644
--- a/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json
+++ b/config/Qwen2.5-7B-Instruct-GPTQ-Int8.json
@@ -5,14 +5,14 @@
     "name": "Qwen2.5-7B-Instruct-GPTQ-Int8",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8",
                 "--gpu-memory-utilization",
-                "0.80",
+                "0.99",
                 "--max-model-len",
-                "1000"
+                "500"
             ],
             "resources": {
                 "CPU": 20000,
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
@@ -46,7 +50,7 @@
                 "path": "v1/completions",
                 "body": {
                     "model": "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8",
-                    "max_tokens": "800",
+                    "max_tokens": "300",
                     "temperature": "0",
                     "stream": "true"
                 }
diff --git a/config/Qwen2.5-7B.json b/config/Qwen2.5-7B.json
index 40efe67..2a6530a 100644
--- a/config/Qwen2.5-7B.json
+++ b/config/Qwen2.5-7B.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "models--Qwen--Qwen2.5-7B",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "Qwen/Qwen2.5-7B",
diff --git a/config/Qwen2.5-Coder-1.5B-Instruct.json b/config/Qwen2.5-Coder-1.5B-Instruct.json
index c25eca5..c601e11 100644
--- a/config/Qwen2.5-Coder-1.5B-Instruct.json
+++ b/config/Qwen2.5-Coder-1.5B-Instruct.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Coder-1.5B-Instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Coder-1.5B-Instruct",
@@ -14,7 +14,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -25,6 +25,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json b/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json
index 35ea7cf..7bd0234 100644
--- a/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json
+++ b/config/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Coder-14B-Instruct-GPTQ-Int8",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Coder-3B.json b/config/Qwen2.5-Coder-3B.json
index 5180d94..073392b 100644
--- a/config/Qwen2.5-Coder-3B.json
+++ b/config/Qwen2.5-Coder-3B.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Coder-3B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Coder-3B",
@@ -14,7 +14,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -25,6 +25,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Coder-7B-Instruct.json b/config/Qwen2.5-Coder-7B-Instruct.json
index 0319fb4..4b7c54d 100644
--- a/config/Qwen2.5-Coder-7B-Instruct.json
+++ b/config/Qwen2.5-Coder-7B-Instruct.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Coder-7B-Instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Coder-7B-Instruct",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Math-1.5B-Instruct.json b/config/Qwen2.5-Math-1.5B-Instruct.json
index fc094e3..f09c644 100644
--- a/config/Qwen2.5-Math-1.5B-Instruct.json
+++ b/config/Qwen2.5-Math-1.5B-Instruct.json
@@ -5,14 +5,14 @@
     "name": "Qwen2.5-Math-1.5B-Instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Math-1.5B-Instruct"
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 20000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -23,6 +23,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Math-1.5B.json b/config/Qwen2.5-Math-1.5B.json
index 867e906..c8a1472 100644
--- a/config/Qwen2.5-Math-1.5B.json
+++ b/config/Qwen2.5-Math-1.5B.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Math-1.5B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Math-1.5B",
@@ -15,7 +15,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Math-7B-Instruct.json b/config/Qwen2.5-Math-7B-Instruct.json
index ac129d2..7bb0575 100644
--- a/config/Qwen2.5-Math-7B-Instruct.json
+++ b/config/Qwen2.5-Math-7B-Instruct.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Math-7B-Instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Math-7B-Instruct",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen2.5-Math-7B.json b/config/Qwen2.5-Math-7B.json
index e80973a..1d06e9b 100644
--- a/config/Qwen2.5-Math-7B.json
+++ b/config/Qwen2.5-Math-7B.json
@@ -5,7 +5,7 @@
     "name": "Qwen2.5-Math-7B",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "Qwen/Qwen2.5-Math-7B",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/Qwen7BInt8.json b/config/Qwen7BInt8.json
index c6f752e..f072729 100644
--- a/config/Qwen7BInt8.json
+++ b/config/Qwen7BInt8.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "Qwen",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8",
diff --git a/config/TinyLlama-1.1B-Chat-v1.0.json b/config/TinyLlama-1.1B-Chat-v1.0.json
index 0f5b444..f2e8465 100644
--- a/config/TinyLlama-1.1B-Chat-v1.0.json
+++ b/config/TinyLlama-1.1B-Chat-v1.0.json
@@ -5,7 +5,7 @@
     "name": "TinyLlama-1.1B-Chat-v1.0",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.4.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -15,17 +15,21 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
-                    "vRam": 4500
+                    "vRam": 4800
                 }
             },
             "envs": [
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/TinyLlama-1.1B-Chat-v1.0_13GB.json b/config/TinyLlama-1.1B-Chat-v1.0_13GB.json
index e360214..76c8e99 100644
--- a/config/TinyLlama-1.1B-Chat-v1.0_13GB.json
+++ b/config/TinyLlama-1.1B-Chat-v1.0_13GB.json
@@ -5,7 +5,7 @@
     "name": "TinyLlama-1.1B-Chat-v1.0_13GB",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -15,7 +15,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json b/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json
index 453dbdc..4957523 100644
--- a/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json
+++ b/config/TinyLlama-1.1B-Chat-v1.0_2gpu.json
@@ -5,7 +5,7 @@
     "name": "TinyLlama-1.1B-Chat-v1.0_2gpu",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
diff --git a/config/XVERSE-13B-Chat.json b/config/XVERSE-13B-Chat.json
index 0897a02..bde8da0 100644
--- a/config/XVERSE-13B-Chat.json
+++ b/config/XVERSE-13B-Chat.json
@@ -5,7 +5,7 @@
     "name": "XVERSE-13B-Chat",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "xverse/XVERSE-13B-Chat",
diff --git a/config/XVERSE-7B-Chat.json b/config/XVERSE-7B-Chat.json
index 2c4c539..04ad887 100644
--- a/config/XVERSE-7B-Chat.json
+++ b/config/XVERSE-7B-Chat.json
@@ -5,7 +5,7 @@
     "name": "XVERSE-7B-Chat",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "xverse/XVERSE-7B-Chat",
diff --git a/config/chatglm3-6b-128k.json b/config/chatglm3-6b-128k.json
index c79da13..4ecbd4e 100644
--- a/config/chatglm3-6b-128k.json
+++ b/config/chatglm3-6b-128k.json
@@ -5,7 +5,7 @@
     "name": "chatglm3-6b-128k",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "THUDM/chatglm3-6b-128k",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 20000,
+                "Mem": 28000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/chatglm3-6b-32k.json b/config/chatglm3-6b-32k.json
index 5fe2189..7e24977 100644
--- a/config/chatglm3-6b-32k.json
+++ b/config/chatglm3-6b-32k.json
@@ -5,7 +5,7 @@
     "name": "chatglm3-6b-32k",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.4.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "THUDM/chatglm3-6b-32k",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 20000,
+                "Mem": 28000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/chatglm3-6b.json b/config/chatglm3-6b.json
index 936078a..94667b1 100644
--- a/config/chatglm3-6b.json
+++ b/config/chatglm3-6b.json
@@ -5,7 +5,7 @@
     "name": "chatglm3-6b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "THUDM/chatglm3-6b",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 20000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/core42_jais-13b-bnb-4bit.json b/config/core42_jais-13b-bnb-4bit.json
index 6341ae7..582d914 100644
--- a/config/core42_jais-13b-bnb-4bit.json
+++ b/config/core42_jais-13b-bnb-4bit.json
@@ -5,7 +5,7 @@
     "name": "core42_jais-13b-bnb-4bit",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "jwnder/core42_jais-13b-bnb-4bit",
diff --git a/config/core42_jais-13b-chat-bnb-4bit.json b/config/core42_jais-13b-chat-bnb-4bit.json
index a87c7fa..47bcd0c 100644
--- a/config/core42_jais-13b-chat-bnb-4bit.json
+++ b/config/core42_jais-13b-chat-bnb-4bit.json
@@ -5,7 +5,7 @@
     "name": "core42_jais-13b-chat-bnb-4bit",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "jwnder/core42_jais-13b-chat-bnb-4bit",
diff --git a/config/deepseek-llm-7b-chat.json b/config/deepseek-llm-7b-chat.json
index efa21d0..5b2f038 100644
--- a/config/deepseek-llm-7b-chat.json
+++ b/config/deepseek-llm-7b-chat.json
@@ -5,7 +5,7 @@
     "name": "deepseek-llm-7b-chat",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "/root/.cache/huggingface/git/deepseek-llm-7b-chat",
@@ -32,6 +32,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/deepseek-llm-7b-chat_2gpu.json b/config/deepseek-llm-7b-chat_2gpu.json
index 6adaf67..d6c84ed 100644
--- a/config/deepseek-llm-7b-chat_2gpu.json
+++ b/config/deepseek-llm-7b-chat_2gpu.json
@@ -5,7 +5,7 @@
     "name": "deepseek-llm-7b-chat_2gpu",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "/root/.cache/huggingface/git/deepseek-llm-7b-chat",
@@ -30,6 +30,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/deepseek-math-7b-instruct.json b/config/deepseek-math-7b-instruct.json
index c52d852..1568692 100644
--- a/config/deepseek-math-7b-instruct.json
+++ b/config/deepseek-math-7b-instruct.json
@@ -5,7 +5,7 @@
     "name": "deepseek-math-7b-instruct",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "/root/.cache/huggingface/git/deepseek-math-7b-instruct",
@@ -30,6 +30,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/dolly-v2-12b.json b/config/dolly-v2-12b.json
index da20a3f..6f90392 100644
--- a/config/dolly-v2-12b.json
+++ b/config/dolly-v2-12b.json
@@ -5,7 +5,7 @@
     "name": "dolly-v2-12b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "databricks/dolly-v2-12b",
@@ -31,6 +31,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/falcon-7b.json b/config/falcon-7b.json
index e245b2c..a9cabca 100644
--- a/config/falcon-7b.json
+++ b/config/falcon-7b.json
@@ -5,7 +5,7 @@
     "name": "falcon-7b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "tiiuae/falcon-7b",
diff --git a/config/falcon-rw-7b.json b/config/falcon-rw-7b.json
index 6c0ec42..157fab8 100644
--- a/config/falcon-rw-7b.json
+++ b/config/falcon-rw-7b.json
@@ -5,7 +5,7 @@
     "name": "falcon-rw-7b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "tiiuae/falcon-rw-7b",
@@ -27,6 +27,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/gemma-7b.json b/config/gemma-7b.json
index 916e0f2..08f15cc 100644
--- a/config/gemma-7b.json
+++ b/config/gemma-7b.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "gemma-7b",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "google/gemma-7b",
diff --git a/config/gpt-j-6b.json b/config/gpt-j-6b.json
index fc989bf..0d54647 100644
--- a/config/gpt-j-6b.json
+++ b/config/gpt-j-6b.json
@@ -5,7 +5,7 @@
     "name": "gpt-j-6b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "EleutherAI/gpt-j-6b",
diff --git a/config/gpt2-xl.json b/config/gpt2-xl.json
index 1c1ca30..32cd1d9 100644
--- a/config/gpt2-xl.json
+++ b/config/gpt2-xl.json
@@ -5,7 +5,7 @@
     "name": "gpt2-xl",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "openai-community/gpt2-xl",
@@ -15,7 +15,7 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 18000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/gpt4all-j.json b/config/gpt4all-j.json
index 02c9bcf..9bbf9e2 100644
--- a/config/gpt4all-j.json
+++ b/config/gpt4all-j.json
@@ -5,7 +5,7 @@
     "name": "gpt4all-j",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "nomic-ai/gpt4all-j",
@@ -17,7 +17,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 60000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 2,
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/internlm2-7b.json b/config/internlm2-7b.json
index 65b19b9..e9ade3d 100644
--- a/config/internlm2-7b.json
+++ b/config/internlm2-7b.json
@@ -5,7 +5,7 @@
     "name": "internlm2-7b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "internlm/internlm2-7b",
diff --git a/config/internlm2_5-7b-chat.json b/config/internlm2_5-7b-chat.json
index 67aab7e..a7ba978 100644
--- a/config/internlm2_5-7b-chat.json
+++ b/config/internlm2_5-7b-chat.json
@@ -5,7 +5,7 @@
     "name": "internlm2_5-7b-chat",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "internlm/internlm2_5-7b-chat",
diff --git a/config/llama_8BInt8.json b/config/llama_8BInt8.json
index 5fc3408..16831b4 100644
--- a/config/llama_8BInt8.json
+++ b/config/llama_8BInt8.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "TinyLlama-1.1B-Chat-v1.0_2gpu",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "meta-llama/Llama-Guard-3-8B-INT8",
diff --git a/config/mamba-1.4b-hf.json b/config/mamba-1.4b-hf.json
index 632bf51..5c747a7 100644
--- a/config/mamba-1.4b-hf.json
+++ b/config/mamba-1.4b-hf.json
@@ -5,7 +5,7 @@
     "name": "mamba-1.4b-hf",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "state-spaces/mamba-1.4b-hf",
diff --git a/config/mistral.json b/config/mistral.json
index 9ab0172..3d3644c 100644
--- a/config/mistral.json
+++ b/config/mistral.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "mistral",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "mistralai/Mistral-7B-v0.1",
diff --git a/config/models.txt b/config/models.txt
index 92e83a7..8cebe05 100644
--- a/config/models.txt
+++ b/config/models.txt
@@ -185,9 +185,6 @@ export IFERX_PASSWORD="test"
 /opt/inferx/bin/ixctl create deepseek-llm-7b-chat.json
 /opt/inferx/bin/ixctl create deepseek-llm-7b-chat_2gpu.json
 
-/opt/inferx/bin/ixctl update deepseek-llm-7b-chat.json
-/opt/inferx/bin/ixctl update deepseek-llm-7b-chat_2gpu.json
-
 /opt/inferx/bin/ixctl create DeepSeek-R1-Distill-Llama-8B.json
 /opt/inferx/bin/ixctl create DeepSeek-R1-Distill-Qwen-1.5B.json
 /opt/inferx/bin/ixctl create DeepSeek-R1-Distill-Qwen-7B.json
diff --git a/config/mpt-7b-storywriter.json b/config/mpt-7b-storywriter.json
index 4f14c3f..9563700 100644
--- a/config/mpt-7b-storywriter.json
+++ b/config/mpt-7b-storywriter.json
@@ -5,7 +5,7 @@
     "name": "mpt-7b-storywriter",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "mosaicml/mpt-7b-storywriter",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/mpt-7b.json b/config/mpt-7b.json
index fdc3389..29f8c9e 100644
--- a/config/mpt-7b.json
+++ b/config/mpt-7b.json
@@ -5,7 +5,7 @@
     "name": "mpt-7b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "mosaicml/mpt-7b",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/oasst-sft-4-pythia-12b-epoch-3.5.json b/config/oasst-sft-4-pythia-12b-epoch-3.5.json
index 9ebd83a..4228ae7 100644
--- a/config/oasst-sft-4-pythia-12b-epoch-3.5.json
+++ b/config/oasst-sft-4-pythia-12b-epoch-3.5.json
@@ -5,7 +5,7 @@
     "name": "oasst-sft-4-pythia-12b-epoch-3.5",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 60000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 2,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/opt-iml-max-1.3b.json b/config/opt-iml-max-1.3b.json
index 525ddbd..d327d23 100644
--- a/config/opt-iml-max-1.3b.json
+++ b/config/opt-iml-max-1.3b.json
@@ -5,7 +5,7 @@
     "name": "opt-iml-max-1.3b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "facebook/opt-iml-max-1.3b",
@@ -14,17 +14,21 @@
             ],
             "resources": {
                 "CPU": 12000,
-                "Mem": 15000,
+                "Mem": 24000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 1,
-                    "vRam": 3800
+                    "vRam": 4500
                 }
             },
             "envs": [
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/persimmon-8b-base.json b/config/persimmon-8b-base.json
index 04e9e04..9ee813d 100644
--- a/config/persimmon-8b-base.json
+++ b/config/persimmon-8b-base.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "persimmon-8b-base",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "adept/persimmon-8b-base",
diff --git a/config/persimmon-8b-chat.json b/config/persimmon-8b-chat.json
index 1babc57..e980ac1 100644
--- a/config/persimmon-8b-chat.json
+++ b/config/persimmon-8b-chat.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "persimmon-8b-chat",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "adept/persimmon-8b-chat",
diff --git a/config/pythia-12b.json b/config/pythia-12b.json
index 5ebda70..032a7ca 100644
--- a/config/pythia-12b.json
+++ b/config/pythia-12b.json
@@ -5,7 +5,7 @@
     "name": "pythia-12b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "EleutherAI/pythia-12b",
@@ -18,7 +18,7 @@
             ],
             "resources": {
                 "CPU": 20000,
-                "Mem": 50000,
+                "Mem": 60000,
                 "GPU": {
                     "Type": "Any",
                     "Count": 2,
@@ -29,6 +29,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/reader.json b/config/reader.json
index 9a51249..56ead1e 100644
--- a/config/reader.json
+++ b/config/reader.json
@@ -4,7 +4,7 @@
     "namespace": "ns1",
     "name": "reader-lm",
     "spec": {
-        "image": "vllm/vllm-openai:v0.6.2",
+        "image": "vllm/vllm-openai:v0.7.3",
         "commands": [
             "--model",
             "jinaai/reader-lm-1.5b",
diff --git a/config/stablelm-3b-4e1t.json b/config/stablelm-3b-4e1t.json
index 56eff6c..5a9435f 100644
--- a/config/stablelm-3b-4e1t.json
+++ b/config/stablelm-3b-4e1t.json
@@ -5,7 +5,7 @@
     "name": "stablelm-3b-4e1t",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "stabilityai/stablelm-3b-4e1t",
diff --git a/config/stablelm-tuned-alpha-7b.json b/config/stablelm-tuned-alpha-7b.json
index ec2c2fe..edb2c8c 100644
--- a/config/stablelm-tuned-alpha-7b.json
+++ b/config/stablelm-tuned-alpha-7b.json
@@ -5,7 +5,7 @@
     "name": "stablelm-tuned-alpha-7b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "stabilityai/stablelm-tuned-alpha-7b",
diff --git a/config/starcoder2-3b.json b/config/starcoder2-3b.json
index 4f025bc..1400ee2 100644
--- a/config/starcoder2-3b.json
+++ b/config/starcoder2-3b.json
@@ -5,7 +5,7 @@
     "name": "starcoder2-3b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "bigcode/starcoder2-3b",
@@ -26,6 +26,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/config/starcoder2-7b.json b/config/starcoder2-7b.json
index 477d467..ad3c644 100644
--- a/config/starcoder2-7b.json
+++ b/config/starcoder2-7b.json
@@ -5,7 +5,7 @@
     "name": "starcoder2-7b",
     "object": {
         "spec": {
-            "image": "vllm/vllm-openai:v0.6.2",
+            "image": "vllm/vllm-openai:v0.7.3",
             "commands": [
                 "--model",
                 "bigcode/starcoder2-7b",
@@ -28,6 +28,10 @@
                 [
                     "LD_LIBRARY_PATH",
                     "/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
+                ],
+                [
+                    "VLLM_CUDART_SO_PATH",
+                    "/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
                 ]
             ],
             "mounts": [
diff --git a/dashboard/app.py b/dashboard/app.py
index 7b5e5a4..f7b4d81 100644
--- a/dashboard/app.py
+++ b/dashboard/app.py
@@ -39,22 +39,34 @@
 
 import logging
 import sys
+import multiprocessing
 
 from werkzeug.middleware.proxy_fix import ProxyFix
 
 
 
-logger = logging.getLogger('gunicorn.error')
-sys.stdout = sys.stderr = logger.handlers[0].stream
+# logger = logging.getLogger('gunicorn.error')
+# sys.stdout = sys.stderr = logger.handlers[0].stream
 
 app = Flask(__name__)
 app.secret_key = os.environ.get("FLASK_SECRET", "supersecret")
 
+def configure_logging():
+    if "gunicorn" in multiprocessing.current_process().name.lower():
+        logger = logging.getLogger('gunicorn.error')
+        if logger.handlers:
+            sys.stdout = sys.stderr = logger.handlers[0].stream
+            app.logger.info("Redirecting stdout/stderr to Gunicorn logger.")
+    else:
+        app.logger.info("Running standalone Flask — no stdout/stderr redirection.")
+
+configure_logging()
 
-KEYCLOAK_URL = os.getenv('KEYCLOAK_URL', "http://192.168.0.22:81/authn")
+
+KEYCLOAK_URL = os.getenv('KEYCLOAK_URL', "http://192.168.0.22:31260/authn")
 KEYCLOAK_REALM_NAME = os.getenv('KEYCLOAK_REALM_NAME', "inferx")
 KEYCLOAK_CLIENT_ID = os.getenv('KEYCLOAK_CLIENT_ID', "infer_client")
-KEYCLOAK_CLIENT_SECRET = os.getenv('KEYCLOAK_CLIENT_SECRET', "SJvfmGFViBNHsLfhkto4eRE0PnPhpyft")
+KEYCLOAK_CLIENT_SECRET = os.getenv('KEYCLOAK_CLIENT_SECRET', "M2Dse5531tdtyipZdGizLEeoOVgziQRX")
 
 server_metadata_url = f"{KEYCLOAK_URL}/realms/{KEYCLOAK_REALM_NAME}/.well-known/openid-configuration"
 
@@ -81,7 +93,7 @@
 
 tls = False
 
-apihostaddr = "http://localhost:4000"
+apihostaddr = os.getenv('INFERX_APIGW_ADDR', "http://localhost:4000")
 # apihostaddr = "https://quarksoft.io:4000"
 
 def is_token_expired():
@@ -197,7 +209,7 @@ def logout():
         f"id_token_hint={id_token}"
     )
 
-def getapkkeys():
+def getapikeys():
     access_token = session.get('token')['access_token']
     # Include the access token in the Authorization header
     headers = {'Authorization': f'Bearer {access_token}'}
@@ -208,20 +220,20 @@ def getapkkeys():
 
     return apikeys
 
-@app.route('/apikeys')
+@app.route('/admin')
 @require_login
 def apikeys():
-    apikeys = getapkkeys()
     return render_template(
-        "apikey.html", apikeys=apikeys
+        "admin.html"
     )
 
 @app.route('/generate_apikeys', methods=['GET'])
 @require_login
 def generate_apikeys():
-    apikeys = getapkkeys()
+    apikeys = getapikeys()
     return apikeys
 
+
 @app.route('/apikeys', methods=['PUT'])
 @require_login
 def create_apikey():
@@ -319,6 +331,29 @@ def getnode(name: str):
 
     return func
 
+def listtenants():
+    access_token = session.get('access_token', '')
+    if access_token == "":
+        headers = {}
+    else:
+        headers = {'Authorization': f'Bearer {access_token}'}
+    url = "{}/objects/tenant/system/system/".format(apihostaddr)
+    resp = requests.get(url, headers=headers)
+    tenants = json.loads(resp.content)
+
+    return tenants
+
+def listnamespaces():
+    access_token = session.get('access_token', '')
+    if access_token == "":
+        headers = {}
+    else:
+        headers = {'Authorization': f'Bearer {access_token}'}
+    url = "{}/objects/namespace///".format(apihostaddr)
+    resp = requests.get(url, headers=headers)
+    namespaces = json.loads(resp.content)
+
+    return namespaces
 
 def listpods(tenant: str, namespace: str, funcname: str):
     access_token = session.get('access_token', '')
@@ -460,6 +495,25 @@ def text2img():
     headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
     return Response(resp.iter_content(1024000), resp.status_code, headers)
 
+@app.route('/generate_tenants', methods=['GET'])
+@require_login
+def generate_tenants():
+    tenants = listtenants()
+    print("tenants ", tenants)
+    return tenants
+
+@app.route('/generate_namespaces', methods=['GET'])
+@require_login
+def generate_namespaces():
+    namespaces = listnamespaces()
+    print("namespaces ", namespaces)
+    return namespaces
+
+@app.route('/generate_funcs', methods=['GET'])
+@require_login
+def generate_funcs():
+    funcs = listfuncs("", "")
+    return funcs
 
 @app.route('/generate', methods=['POST'])
 @not_require_login
@@ -574,19 +628,56 @@ def proxy(path):
             data=request.get_data(),
             cookies=request.cookies,
             allow_redirects=False,
+            timeout=60,
             stream=True
         )
     except requests.exceptions.RequestException as e:
         return Response(f"Error connecting to backend server: {e}", status=502)
     
     # Exclude hop-by-hop headers as per RFC 2616 section 13.5.1
-    excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
+    excluded_headers = ['content-encoding', 'transfer-encoding', 'connection']
     headers = [(name, value) for name, value in resp.raw.headers.items() if name.lower() not in excluded_headers]
     
     # Create a Flask response object with the backend server's response
     response = Response(stream_response(resp), resp.status_code, headers)
     return response
 
+@app.route('/proxy1/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
+@require_login
+def proxy1(path):
+    access_token = session.get('access_token', '')
+    headers = {key: value for key, value in request.headers if key.lower() != 'host'}
+    if access_token != "":
+        headers["Authorization"] = f'Bearer {access_token}'
+    
+    # Construct the full URL for the backend request
+    url = f"{apihostaddr}/{path}"
+
+    try:
+        resp = requests.request(
+            method=request.method,
+            url=url,
+            headers=headers,
+            params=request.args,
+            data=request.get_data(),
+            cookies=request.cookies,
+            allow_redirects=False,
+            timeout=60,
+            stream=False
+        )
+    except requests.exceptions.RequestException as e:
+        print("error ....")
+        return Response(f"Error connecting to backend server: {e}", status=502, mimetype='text/plain')
+    
+    response = Response(resp.content, resp.status_code, mimetype='text/plain')
+    # for name, value in resp.headers.items():
+    #     if name.lower() not in ['content-encoding', 'transfer-encoding', 'connection']:
+    #         response.headers[name] = value
+
+    return response
+    
+
+
 @app.route("/intro")
 def md():
     name = request.args.get("name")
@@ -679,14 +770,13 @@ def GetFunc():
     sample = func["func"]["object"]["spec"]["sample_query"]
     map = sample["body"]
     apiType = sample["apiType"]
+    isAdmin = func["isAdmin"]
 
     version = func["func"]["object"]["spec"]["version"]
     fails = GetFailLogs(tenant, namespace, name, version)
 
     # Convert Python dictionary to pretty JSON string
     funcspec = json.dumps(func["func"]["object"]["spec"], indent=4)
-    funcspec = funcspec.replace("\n", "<br>")
-    funcspec = funcspec.replace("    ", "&emsp;")
 
     return render_template(
         "func.html",
@@ -698,6 +788,7 @@ def GetFunc():
         funcspec=funcspec,
         apiType=apiType,
         map=map,
+        isAdmin=isAdmin,
         path=sample["path"]
     )
 
diff --git a/dashboard/nginx.conf b/dashboard/nginx.conf
index 6837911..ea084c7 100644
--- a/dashboard/nginx.conf
+++ b/dashboard/nginx.conf
@@ -34,7 +34,7 @@ server {
     }
     
     location /authn/ {
-        proxy_pass http://localhost:1260/authn/;
+        proxy_pass http://localhost:31260/authn/;
         proxy_set_header Host $host;
         proxy_set_header X-Real-IP $remote_addr;
         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -57,59 +57,59 @@ server {
     }
 }
 
-server {
-    listen 443 ssl;
-    server_name inferx.net;
-
-    # SSL Certificates
-    ssl_certificate /etc/letsencrypt/live/inferx.net/fullchain.pem; # Or Let's Encrypt: /etc/letsencrypt/live/yourd>
-    ssl_certificate_key /etc/letsencrypt/live/inferx.net/privkey.pem; # Or Let's Encrypt: /etc/letsencrypt/live/you>
-
-    ssl_protocols TLSv1.2 TLSv1.3;
-    ssl_ciphers HIGH:!aNULL:!MD5;
-    ssl_prefer_server_ciphers on;
-
-    location /public/ {
-        autoindex on;
-        alias /public/;
-    } 
-
-    location ~ \.\. {
-        deny all;
-    }
-
-    location /funccall/ {
-        proxy_pass http://localhost:4000/funccall/;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
+# server {
+#     listen 443 ssl;
+#     server_name inferx.net;
+
+#     # SSL Certificates
+#     ssl_certificate /etc/letsencrypt/live/inferx.net/fullchain.pem; # Or Let's Encrypt: /etc/letsencrypt/live/yourd>
+#     ssl_certificate_key /etc/letsencrypt/live/inferx.net/privkey.pem; # Or Let's Encrypt: /etc/letsencrypt/live/you>
+
+#     ssl_protocols TLSv1.2 TLSv1.3;
+#     ssl_ciphers HIGH:!aNULL:!MD5;
+#     ssl_prefer_server_ciphers on;
+
+#     location /public/ {
+#         autoindex on;
+#         alias /public/;
+#     } 
+
+#     location ~ \.\. {
+#         deny all;
+#     }
+
+#     location /funccall/ {
+#         proxy_pass http://localhost:4000/funccall/;
+#         proxy_set_header Host $host;
+#         proxy_set_header X-Real-IP $remote_addr;
+#         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+#         proxy_set_header X-Forwarded-Proto $scheme;
         
-        proxy_buffering off;
-        proxy_request_buffering off;
-        proxy_http_version 1.1;
-        chunked_transfer_encoding on;
-    }
-
-    # location /authn/ {
-    #     proxy_pass http://localhost:1260/authn/;
-    #     proxy_set_header Host $host;
-    #     proxy_set_header X-Real-IP $remote_addr;
-    #     proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-    #     proxy_set_header X-Forwarded-Proto $scheme;
-    #     proxy_set_header X-Forwarded-Port 8000;
-    # }
-
-    location / {
-        proxy_pass http://127.0.0.1:1250;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-
-        proxy_buffering off;
-        proxy_request_buffering off;
-        proxy_http_version 1.1;
-        chunked_transfer_encoding on;
-    }
-}
+#         proxy_buffering off;
+#         proxy_request_buffering off;
+#         proxy_http_version 1.1;
+#         chunked_transfer_encoding on;
+#     }
+
+#     # location /authn/ {
+#     #     proxy_pass http://localhost:31260/authn/;
+#     #     proxy_set_header Host $host;
+#     #     proxy_set_header X-Real-IP $remote_addr;
+#     #     proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+#     #     proxy_set_header X-Forwarded-Proto $scheme;
+#     #     proxy_set_header X-Forwarded-Port 8000;
+#     # }
+
+#     location / {
+#         proxy_pass http://127.0.0.1:1250;
+#         proxy_set_header Host $host;
+#         proxy_set_header X-Real-IP $remote_addr;
+#         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+#         proxy_set_header X-Forwarded-Proto $scheme;
+
+#         proxy_buffering off;
+#         proxy_request_buffering off;
+#         proxy_http_version 1.1;
+#         chunked_transfer_encoding on;
+#     }
+# }
diff --git a/dashboard/templates/admin.html b/dashboard/templates/admin.html
new file mode 100644
index 0000000..8efed14
--- /dev/null
+++ b/dashboard/templates/admin.html
@@ -0,0 +1,590 @@
+{% extends 'base.html' %}
+
+{% block content %}
+
+<style>
+    table,
+    th,
+    td {
+        border: 1px solid black;
+    }
+
+    .tab {
+        overflow: hidden;
+        border-bottom: 1px solid #ccc;
+    }
+
+    .tab button {
+        background-color: inherit;
+        float: left;
+        border: none;
+        outline: none;
+        padding: 10px 16px;
+        cursor: pointer;
+    }
+
+    .tab button.active {
+        background-color: #ccc;
+    }
+
+    .tabcontent {
+        display: none;
+        padding: 10px;
+        border-top: none;
+    }
+
+    .tabcontent.active {
+        display: block;
+    }
+</style>
+
+<div class="tab">
+    <button class="tablinks" onclick="openTab(event, 'Models')" style="font-size: 25px;">Models</button>
+    <button class="tablinks" onclick="openTab(event, 'Namespaces')" style="font-size: 25px;">Namespaces</button>
+    <button class="tablinks" onclick="openTab(event, 'Tenants')" style="font-size: 25px;">Tenants</button>
+    <button class="tablinks" onclick="openTab(event, 'Apikeys')" style="font-size: 25px;">Apikeys</button>
+</div>
+
+<div id="Apikeys" class="tabcontent">
+    <table style="width:100%" id="items-table">
+        <thead>
+            <tr>
+                <th style="width: 150px;">Select</th>
+                <th>Name</th>
+                <th>Username</th>
+                <th>Apikey</th>
+            </tr>
+        </thead>
+
+        <tbody>
+
+        </tbody>
+    </table>
+
+    <form id="delete-form">
+        <button id="delete-button" type="submit" onclick="delete_apikeys()"">Delete Apikeys</button>
+    </form>
+
+    <h2>Add Apikey</h2>
+    <form id=" add-form">
+            <input type="text" id="apikey_name" placeholder="Enter apikey name" required>
+            <button type="submit" onclick="create_apikey()">Add Apikey Name</button>
+    </form>
+</div>
+
+<div id="Models" class="tabcontent">
+    <table style="width:100%" id="models-table">
+        <thead>
+            <tr>
+                <th style="width: 150px;">Select</th>
+                <th>Tenant</th>
+                <th>Namespace</th>
+                <th>Name</th>
+            </tr>
+        </thead>
+
+        <tbody>
+
+        </tbody>
+    </table>
+
+    <form id="delete-models-form">
+        <button id="delete-models-button" type="submit" onclick="delete_funcs()"">Delete Namespaces</button>
+    </form>
+    
+    <h2>Add model</h2>
+    <form id=" add-models-form">
+            Namespace
+            <select id="namespace_dropdown">
+                <option disabled selected>Select a namespace</option>
+            </select>
+            Model name
+            <input type="text" id="model_name" placeholder="Enter model name" required>
+            <br>
+            <textarea id="model_spec" rows="30" cols="120" placeholder="Enter model Spec" required></textarea>
+            <br>
+            <button type="submit" onclick="create_func()">Add model</button>
+    </form>
+</div>
+
+<div id="Namespaces" class="tabcontent">
+    <table style="width:100%" id="namespaces-table">
+        <thead>
+            <tr>
+                <th style="width: 150px;">Select</th>
+                <th>Tenant</th>
+                <th>Name</th>
+            </tr>
+        </thead>
+
+        <tbody>
+
+        </tbody>
+    </table>
+
+    <form id="delete-namespace-form">
+        <button id="delete-namespace-button" type="submit" onclick="delete_namespaces()"">Delete Namespaces</button>
+    </form>
+    
+    <h2>Add Namespace</h2>
+    
+    <form id=" add-namespace-form">
+            Tenant
+            <select id="tenant_dropdown" required>
+                <option disabled selected>Select a tenant</option>
+            </select>
+            <br>
+            <br>
+            <input type="text" id="namespace_name" placeholder="Enter Namespace name" required>
+            <button type="submit" onclick="create_namespace()">Create Namespace</button>
+    </form>
+</div>
+
+<div id="Tenants" class="tabcontent">
+    <table style="width:100%" id="tenants-table">
+        <thead>
+            <tr>
+                <th style="width: 150px;">Select</th>
+                <th>Name</th>
+            </tr>
+        </thead>
+
+        <tbody>
+
+        </tbody>
+    </table>
+
+    <form id="delete-tenant-form">
+        <button id="delete-tenant-button" type="submit" onclick="delete_tenants()"">Delete Tenants</button>
+    </form>
+    
+    <h2>Add Tenant</h2>
+    <form id=" add-tenant-form">
+            <input type="text" id="tenant_name" placeholder="Enter Tenant name" required>
+            <button type="submit" onclick="create_tenant()">Create Tenant</button>
+    </form>
+</div>
+
+<script>
+    function openTab(evt, tabName) {
+        const tabcontent = document.getElementsByClassName("tabcontent");
+        for (let i = 0; i < tabcontent.length; i++) {
+            tabcontent[i].classList.remove("active");
+        }
+
+        const tablinks = document.getElementsByClassName("tablinks");
+        for (let i = 0; i < tablinks.length; i++) {
+            tablinks[i].classList.remove("active");
+        }
+
+        document.getElementById(tabName).classList.add("active");
+        evt.currentTarget.classList.add("active");
+    }
+
+    window.onload = function () {
+        // Simulate click on the first tab button
+        openTab({ currentTarget: document.querySelector('.tablinks') }, 'Models');
+    };
+</script>
+
+
+<!-- <textarea id="output" rows="20" cols="120"></textarea> -->
+<script>
+    async function fetchItems() {
+        const apikeys_resp = await fetch('/generate_apikeys');
+        const apikeys = await apikeys_resp.json();
+        const apikey_tbody = document.querySelector('#items-table tbody');
+        apikey_tbody.innerHTML = '';
+        apikeys.forEach(key => {
+            const row = document.createElement('tr');
+            row.innerHTML = `
+                <td><input type="checkbox" data-id="${key.apikey}"></td>
+                <td>${key.keyname}</td>
+                <td>${key.username}</td>
+                <td>${key.apikey}</td>
+            `;
+            apikey_tbody.appendChild(row);
+        });
+
+        const tenant_dropdown = document.getElementById("tenant_dropdown");
+        const namespace_dropdown = document.getElementById("namespace_dropdown");
+
+        const funcs_resp = await fetch('/generate_funcs');
+        const funcs = await funcs_resp.json();
+        const funcs_tbody = document.querySelector('#models-table tbody');
+        funcs_tbody.innerHTML = '';
+        funcs.forEach(key => {
+            const row = document.createElement('tr');
+            row.innerHTML = `
+                <td><input type="checkbox" data-id="${key.func.tenant}/${key.func.namespace}/${key.func.name}"></td>
+                <td>
+                    <a href="{{ hosturl }}listfunc?tenant=${key.func.tenant}">
+                        ${key.func.tenant}
+                    </a>
+                </td>
+                <td>
+                    <a
+                        href="{{ hosturl }}listfunc?tenant=${key.func.tenant}&&namespace=${key.func.namespace}">
+                        ${key.func.namespace}
+                    </a>
+                </td>
+                <td>
+                    <a
+                        href="{{ hosturl }}func?tenant=${key.func.tenant}&&namespace=${key.func.namespace}&&name=${key.func.name}">
+                        ${key.func.name}
+                    </a>
+                </td>
+            `;
+            funcs_tbody.appendChild(row);
+        });
+
+        const namespaces_resp = await fetch('/generate_namespaces');
+        const namespaces = await namespaces_resp.json();
+        const namespaces_tbody = document.querySelector('#namespaces-table tbody');
+        namespaces_tbody.innerHTML = '';
+        namespaces.forEach(key => {
+            const row = document.createElement('tr');
+            row.innerHTML = `
+                <td><input type="checkbox" data-id="${key.tenant}/system/${key.name}"></td>
+                <td>
+                    <a href="{{ hosturl }}listfunc?tenant=${key.tenant}">
+                        ${key.tenant}
+                    </a>
+                </td>
+                <td>
+                    <a
+                        href="{{ hosturl }}listfunc?tenant=${key.tenant}&&namespace=${key.name}">
+                        ${key.name}
+                    </a>
+                </td>
+            `;
+            namespaces_tbody.appendChild(row);
+
+            const option = document.createElement("option");
+            option.value = key.tenant + "/" + key.name;
+            option.textContent = key.tenant + "/" + key.name;
+            namespace_dropdown.appendChild(option);
+        });
+
+        const tenants_resp = await fetch('/generate_tenants');
+        const tenants = await tenants_resp.json();
+        const tenants_tbody = document.querySelector('#tenants-table tbody');
+        tenants_tbody.innerHTML = '';
+        tenants.forEach(key => {
+            const row = document.createElement('tr');
+            row.innerHTML = `
+                <td><input type="checkbox" data-id="system/system/${key.name}"></td>
+                <td>
+                    <a
+                        href="{{ hosturl }}listfunc?tenant=${key.name}">
+                        ${key.name}
+                    </a>
+                </td>
+            `;
+            tenants_tbody.appendChild(row);
+
+            const option = document.createElement("option");
+            option.value = key.name;
+            option.textContent = key.name;
+            tenant_dropdown.appendChild(option);
+        });
+    }
+
+    async function create_func() {
+        var access_token = "{{ session['token'] }}";
+        const namespace_dropdown = document.getElementById("namespace_dropdown");
+        const isSelected = namespace_dropdown.selectedIndex !== -1;
+        if (!isSelected) {
+            alert("need choose a namespace");
+            return
+        }
+
+        const namespacekey = namespace_dropdown.value;
+
+        const parts = namespacekey.split("/");
+        const tenant = parts[0];     // "tenant"
+        const namespace = parts[1];  // "namespace"
+
+        const modelname = document.getElementById("model_name").value.trim();
+        if (!modelname) {
+            alert("need valid model name");
+            return
+        }
+
+        const hostname = window.location.hostname;
+        const port = window.location.port;
+        const schema = window.location.protocol;
+
+        url = schema + "//" + hostname + ":" + port + "/proxy1/object/";
+
+        const model_spec = document.getElementById('model_spec').value.trim();
+        if (!model_spec) {
+            alert("need valid model spec");
+            return
+        }
+
+        try {
+            const options = {
+                method: 'PUT',
+                headers: {
+                    'Accept': 'application/json',
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({
+                    "type": "function",
+                    "tenant": tenant,
+                    "namespace": namespace,
+                    "name": modelname,
+                    "object": {
+                        "spec": JSON.parse(model_spec)
+                    }
+                })
+            };
+
+            const response = await fetch("/proxy/object/", options);
+            const text = await response.text();
+            if (response.status == '200') {
+                alert("Create Model successfully! " + text);
+            } else {
+                alert("Create Model fail with error" + text);
+            }
+            // nameInput.value = '';
+            // fetchItems();
+        } catch (error) {
+            // always get to error here but create func successfully
+            // todo: debug this.
+            // alert("get error " + error);
+
+        }
+    }
+
+    async function delete_funcs() {
+        var access_token = "{{ session['token'] }}";
+        try {
+            const checkboxes = document.querySelectorAll('#models-table tbody input[type="checkbox"]:checked');
+            const keys = Array.from(checkboxes).map(cb => cb.getAttribute('data-id'));
+            if (keys.length > 0) {
+                await Promise.all(keys.map(key => {
+                    fetch('proxy/object/function/' + key + "/", {
+                        method: 'DELETE'
+                    });
+                }));
+            }
+        } catch (error) {
+            alert("get error " + error);
+        }
+    }
+
+    async function create_namespace() {
+        const tenant_dropdown = document.getElementById("tenant_dropdown");
+        const isSelected = tenant_dropdown.selectedIndex !== -1;
+        if (!isSelected) {
+            alert("need choose a tenant");
+            return
+        }
+
+        const tenant = tenant_dropdown.value;
+        if (tenant == "Select a tenant") {
+            alert("need choose a tenant");
+            return
+        }
+
+        const namespacename = document.getElementById("namespace_name").value.trim();
+        if (!namespacename) {
+            alert("need valid namespace name");
+            return
+        }
+
+        const hostname = window.location.hostname;
+        const port = window.location.port;
+        const schema = window.location.protocol;
+
+        url = schema + "//" + hostname + ":" + port + "/proxy1/object/";
+        // url = "http://192.168.0.22:1250/proxy/object/";
+
+        try {
+            const options = {
+                method: 'PUT',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({
+                    "type": "namespace",
+                    "tenant": tenant,
+                    "namespace": "system",
+                    "name": namespacename,
+                    "object": {
+                        "spec": {},
+                        "status": {
+                            "disable": false
+                        }
+                    }
+                })
+            };
+
+            const apikeys_resp = await fetch("/proxy/object/", options);
+            const text = await apikeys_resp.text();
+
+            if (apikeys_resp.status === 200) {
+                alert("Create Namespace successfully! ");
+            } else {
+                alert("Create Namespace fail with error" + text);
+            }
+        } catch (error) {
+            // always get to error here but create func successfully
+            // todo: debug this.
+            // alert("Create Namespace get error " + error);
+        }
+    }
+
+    async function delete_namespaces() {
+        try {
+            const checkboxes = document.querySelectorAll('#namespaces-table tbody input[type="checkbox"]:checked');
+            const keys = Array.from(checkboxes).map(cb => cb.getAttribute('data-id'));
+            if (keys.length > 0) {
+                await Promise.all(keys.map(key => {
+                    fetch('proxy/object/namespace/' + key + "/", {
+                        method: 'DELETE'
+                    });
+                }));
+            }
+        } catch (error) {
+            alert("get error " + error);
+        }
+    }
+
+    async function create_tenant() {
+        const tenant_name = document.getElementById("tenant_name").value.trim();
+        if (!tenant_name) {
+            alert("need valid namespace name");
+            return
+        }
+
+        const hostname = window.location.hostname;
+        const port = window.location.port;
+        const schema = window.location.protocol;
+
+        url = schema + "//" + hostname + ":" + port + "/proxy1/object/";
+        // url = "http://192.168.0.22:1250/proxy/object/";
+
+        try {
+            const options = {
+                method: 'PUT',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({
+                    "type": "tenant",
+                    "tenant": "system",
+                    "namespace": "system",
+                    "name": tenant_name,
+                    "object": {
+                        "spec": {},
+                        "status": {
+                            "disable": false
+                        }
+                    }
+                })
+            };
+
+            const resp = await fetch("/proxy/object/", options);
+            const text = await resp.text();
+
+            if (resp.status === 200) {
+                alert("Create Tenant successfully! ");
+            } else {
+                alert("Create Tenant fail with error" + text);
+            }
+        } catch (error) {
+            // always get to error here but create func successfully
+            // todo: debug this.
+            // alert("Create Namespace get error " + error);
+        }
+    }
+
+    async function delete_tenants() {
+        try {
+            const checkboxes = document.querySelectorAll('#tenants-table tbody input[type="checkbox"]:checked');
+            const keys = Array.from(checkboxes).map(cb => cb.getAttribute('data-id'));
+            if (keys.length > 0) {
+                await Promise.all(keys.map(key => {
+                    fetch('proxy/object/tenant/' + key + "/", {
+                        method: 'DELETE'
+                    });
+                }));
+            }
+        } catch (error) {
+            alert("get error " + error);
+        }
+    }
+
+    async function create_apikey() {
+        var access_token = "{{ session['token'] }}";
+        const nameInput = document.getElementById('apikey_name');
+        // const output = document.getElementById('output');
+        const keyname = nameInput.value.trim();
+        try {
+            if (keyname) {
+                const body = {
+                    method: 'PUT',
+                    headers: {
+                        'Accept': 'application/json',
+                        'Content-Type': 'application/json',
+                        'Authorization': 'Bearer ' + access_token
+                    },
+                    body: JSON.stringify({
+                        "apikey": "",
+                        "realm": "",
+                        "username": "",
+                        "keyname": keyname
+                    })
+                };
+                // await fetch('http://192.168.0.22/apikey/', body);
+                await fetch('/apikeys', body);
+                nameInput.value = '';
+                fetchItems();
+            }
+        } catch (error) {
+            // output.innerHTML += error + '<br>';
+            // output.innerHTML += error.cause + '<br>';
+
+        }
+    }
+
+    async function delete_apikeys() {
+        var access_token = "{{ session['token'] }}";
+        const nameInput = document.getElementById('apikey_name');
+        const keyname = nameInput.value.trim();
+        try {
+            const checkboxes = document.querySelectorAll('#items-table tbody input[type="checkbox"]:checked');
+            const keys = Array.from(checkboxes).map(cb => cb.getAttribute('data-id'));
+            if (keys.length > 0) {
+                await Promise.all(keys.map(key => {
+                    fetch('apikeys', {
+                        method: 'DELETE',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({
+                            "apikey": key,
+                            "realm": "",
+                            "username": "",
+                            "keyname": "",
+                            "keys": keys
+                        })
+                    });
+                }));
+            }
+
+            fetchItems();
+        } catch (error) {
+            // output.innerHTML += error + '<br>';
+            // output.innerHTML += error.cause + '<br>';
+
+        }
+    }
+
+    // Initial fetch
+    fetchItems();
+</script>
+
+{{ hosturl }}
+{% endblock %}
\ No newline at end of file
diff --git a/dashboard/templates/apikey.html b/dashboard/templates/apikey.html
deleted file mode 100644
index e8ea855..0000000
--- a/dashboard/templates/apikey.html
+++ /dev/null
@@ -1,126 +0,0 @@
-{% extends 'base.html' %}
-
-{% block content %}
-<h2>Apikey</h2>
-<style>
-    table,
-    th,
-    td {
-        border: 1px solid black;
-    }
-</style>
-<table style="width:100%" id="items-table">
-    <thead>
-        <tr>
-            <th>Select</th>
-            <th>Name</th>
-            <th>Username</th>
-            <th>Apikey</th>
-        </tr>
-    </thead>
-
-    <tbody>
-
-    </tbody>
-</table>
-
-<form id="delete-form">
-    <button id="delete-button" type="submit" onclick="delete_apikeys()"">Delete Selected</button>
-</form>
-
-
-<h2>Add Apikey</h2>
-<form id=" add-form">
-        <input type="text" id="apikey_name" placeholder="Enter apikey name" required>
-        <button type="submit" onclick="create_apikey()">Add Apikey Name</button>
-</form>
-<!-- <textarea id="output" rows="20" cols="120"></textarea> -->
-<script>
-    async function fetchItems() {
-        const response = await fetch('/generate_apikeys');
-        const items = await response.json();
-        const tbody = document.querySelector('#items-table tbody');
-        tbody.innerHTML = '';
-        items.forEach(key => {
-            const row = document.createElement('tr');
-            row.innerHTML = `
-                <td><input type="checkbox" data-id="${key.apikey}"></td>
-                <td>${key.keyname}</td>
-                <td>${key.username}</td>
-                <td>${key.apikey}</td>
-            `;
-            tbody.appendChild(row);
-        });
-    }
-
-
-    async function create_apikey() {
-        var access_token = "{{ session['token'] }}";
-        const nameInput = document.getElementById('apikey_name');
-        // const output = document.getElementById('output');
-        const keyname = nameInput.value.trim();
-        try {
-            if (keyname) {
-                const body = {
-                    method: 'PUT',
-                    headers: {
-                        'Accept': 'application/json',
-                        'Content-Type': 'application/json',
-                        'Authorization': 'Bearer ' + access_token
-                    },
-                    body: JSON.stringify({
-                        "apikey": "",
-                        "realm": "",
-                        "username": "",
-                        "keyname": keyname
-                    })
-                };
-                // await fetch('http://192.168.0.22/apikey/', body);
-                await fetch('/apikeys', body);
-                nameInput.value = '';
-                fetchItems();
-            }
-        } catch (error) {
-            // output.innerHTML += error + '<br>';
-            // output.innerHTML += error.cause + '<br>';
-
-        }
-    }
-
-    async function delete_apikeys() {
-        var access_token = "{{ session['token'] }}";
-        const nameInput = document.getElementById('apikey_name');
-        const keyname = nameInput.value.trim();
-        try {
-            const checkboxes = document.querySelectorAll('#items-table tbody input[type="checkbox"]:checked');
-            const keys = Array.from(checkboxes).map(cb => cb.getAttribute('data-id'));
-            if (keys.length > 0) {
-                await Promise.all(keys.map(key => {
-                    fetch('apikeys', {
-                        method: 'DELETE',
-                        headers: { 'Content-Type': 'application/json' },
-                        body: JSON.stringify({
-                            "apikey": key,
-                            "realm": "",
-                            "username": "",
-                            "keyname": "",
-                            "keys": keys
-                        })
-                    });
-                }));
-            }
-
-            fetchItems();
-        } catch (error) {
-            // output.innerHTML += error + '<br>';
-            // output.innerHTML += error.cause + '<br>';
-
-        }
-    }
-
-    // Initial fetch
-    fetchItems();
-</script>
-
-{{ hosturl }}
-{% endblock %}
\ No newline at end of file
diff --git a/dashboard/templates/base.html b/dashboard/templates/base.html
index 7021d54..4fb42b5 100644
--- a/dashboard/templates/base.html
+++ b/dashboard/templates/base.html
@@ -44,13 +44,13 @@ <h2>&nbsp;&nbsp;&nbsp;&nbsp;-- &nbsp; Serve tens models in one box with ultra-fa
         <a href="{{ hosturl }}listpod"> Pods </a>
         <a href="{{ hosturl }}listsnapshot"> Snapshots </a>
         <a href="{{ hosturl }}listnode"> Nodes </a>
-        <a href="{{ hosturl }}apikeys" id="apikey"> Apikeys </a>
+        <a href="{{ hosturl }}admin" id="admin"> Admin </a>
     </nav>
     <script>
         const link = document.querySelector('#login_logout');
-        const apikey = document.querySelector('#apikey');
+        const admin = document.querySelector('#admin');
         if (location.protocol == 'https:') {
-            apikey.style.display = 'none';
+            admin.style.display = 'none';
             link.style.display = 'none';
         } else {
             const usernam = "{{ session['username'] }}";
diff --git a/dashboard/templates/func.html b/dashboard/templates/func.html
index aa3e429..f905b8b 100644
--- a/dashboard/templates/func.html
+++ b/dashboard/templates/func.html
@@ -103,6 +103,43 @@ <h2>Pods</h2>
 </table>
 
 
+
+{% if fails %}
+<h2>Failures</h2>
+<table style="width:100%">
+    <tr>
+        <th>tenant</th>
+        <th>namespace</th>
+        <th>model name</th>
+        <th>revision</th>
+        <th>id</th>
+        <th>exit info</th>
+        <th>state</th>
+    </tr>
+    {% for fail in fails %}
+    <tr>
+        <td>{{ fail["tenant"] }}</td>
+        <td>{{ fail["namespace"] }}</td>
+        <td>{{ fail["fpname"] }}</td>
+        <td>{{ fail["fprevision"] }}</td>
+        <td>{{ fail["id"] }}</td>
+        <td>{{ fail["exit_info"] }}</td>
+        <td><a href="{{ hosturl }}failpod?tenant={{ fail['tenant'] }}&&namespace={{ fail['namespace'] }}&&name={{
+                fail['fpname'] }}&&version={{fail['fprevision'] }}&&id={{fail['id'] }}">log</a></td>
+    </tr>
+    {% endfor %}
+</table>
+{% endif %}
+
+
+<h2>Func</h2>
+{% if isAdmin %}
+<button id="updateSpec" onclick="updateSpec()">Update Model</button>
+<button id="deleteSpec" onclick="deleteFunc()">Delete Model</button>
+{% endif %}
+<br />
+<textarea id="spec" rows="50" cols="120">{{ funcspec }}</textarea>
+
 <script>
     const tenant = {{ tenant | tojson }};
     const namespace = {{ namespace | tojson }};
@@ -110,6 +147,78 @@ <h2>Pods</h2>
     const apiType = {{ apiType | tojson }};
     const map = {{ map | tojson }};
     const path = {{ path | tojson }};
+    const funcconfig = {{ func | tojson }};
+
+    async function updateSpec() {
+        const debugDiv = document.getElementById('debug');
+        const spec = document.getElementById('spec').value;
+        try {
+            funcconfig.func.object.spec = JSON.parse(spec);
+
+            const hostname = window.location.hostname;
+            const port = window.location.port;
+            const schema = window.location.protocol;
+
+            url = schema + "//" + hostname + ":" + port + "/proxy/object/";
+            const body = {
+                method: 'POST',
+                headers: {
+                    'Accept': 'application/json',
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify(funcconfig.func)
+            };
+
+            const response = await fetch(url, body);
+            const text = await response.text();
+            if (response.status == '200') {
+                alert("Update Model Spec successfully! ");
+            } else {
+                alert("Update Model Spec fail with error " + text);
+            }
+            debugDiv.innerHTML += " " + response.status;
+            debugDiv.innerHTML += " " + text;
+        } catch (err) {
+            alert("Update Model Spec fail with error: " + err);
+            debugDiv.innerHTML += 'Update Model Spec fail with error:' + err + "\n\n";
+        }
+    }
+
+    async function deleteFunc() {
+        if (confirm("Are you sure to delete the Model " + namespace + "/" + name)) {
+            const debugDiv = document.getElementById('debug');
+            try {
+                const hostname = window.location.hostname;
+                const port = window.location.port;
+                const schema = window.location.protocol;
+
+                url = schema + "//" + hostname + ":" + port + "/proxy/object/function/" + tenant + "/" + namespace + "/" + name + "/";
+                const body = {
+                    method: 'DELETE',
+                    headers: {
+                        'Accept': 'application/json',
+                        'Content-Type': 'application/json'
+                    }
+                };
+
+                const response = await fetch(url, body);
+                const text = await response.text();
+                if (response.status == '200') {
+                    alert("Delete Model successfully! ");
+                    window.location.assign("/listfunc");
+                } else {
+                    alert("Delete Model fail with error " + text);
+                }
+                debugDiv.innerHTML += " " + response.status;
+                debugDiv.innerHTML += " " + text;
+
+            } catch (err) {
+                alert("Delete Model fail with error: " + err);
+                debugDiv.innerHTML += 'Delete Model fail with error:' + err + "\n\n";
+            }
+        }
+
+    }
 
     async function cancel() {
         if (abortController) {
@@ -160,7 +269,7 @@ <h2>Pods</h2>
 
             url = schema + "//" + hostname + ":" + port + "/proxy/funccall/" + tenant + "/" + namespace + "/" + name + "/" + path;
             map["prompt"] = prompt;
-            const body = {
+            const options = {
                 method: 'POST',
                 headers: {
                     'Accept': 'application/json',
@@ -170,7 +279,7 @@ <h2>Pods</h2>
                 signal
             };
 
-            const response = await fetch(url, body);
+            const response = await fetch(url, options);
             processImage.style = "display:none;"
             buttonTxt.style = "";
 
@@ -184,53 +293,51 @@ <h2>Pods</h2>
             startDiv.innerHTML = "Start Latency: " + restore + ' ms ' + '<br>';
             ttftDiv.innerHTML = "Time To First Token: " + ttft + ' ms ' + '<br>';
 
-            if (response.ok == false) {
-                outputDiv.innerHTML = response.content;
+            if (!response.ok) {
+                const errorText = await response.text();
+                outputDiv.innerHTML = errorText;
                 button.disabled = false;
-                return
+                cancelBtn.disabled = true;
+                return;
             }
             const reader = response.body.getReader();
             const decoder = new TextDecoder('utf-8');
-            let done = false;
             let tokenCount = 0;
             const startTime = Date.now();
 
-            while (!done) {
-                const { value, done: doneReading } = await reader.read();
-                done = doneReading;
-                const chunkValue = decoder.decode(value, { stream: true });
-                const lines = chunkValue.split('\n').filter(line => line.trim() !== '');
-                // debugDiv.innerHTML += "lines:" + lines + "\n\n";;
+            let buffer = "";
+            while (true) {
+                const { value, done } = await reader.read();
+                if (done) break;
 
-                let left = "";
+                buffer += decoder.decode(value, { stream: true });
+                let lines = buffer.split('\n');
+                buffer = lines.pop(); // retain partial line
                 for (const line of lines) {
-                    if (line.trim() === 'data: [DONE]') {
-                        done = true;
-                        break;
+                    const trimmed = line.trim();
+                    if (trimmed === '' || !trimmed.startsWith('data:')) continue;
+
+                    const jsonPart = trimmed.replace(/^data:\s*/, '');
+
+                    if (jsonPart === '[DONE]') {
+                        button.disabled = false;
+                        cancelBtn.disabled = true;
+                        return;
                     }
 
-                    if (true) {
-                        left += line;
-                        const jsonStr = left.replace(/^data: /, '');
-                        left = ""
-                        // debugDiv.innerHTML += jsonStr + "\n\n";
-                        try {
-                            const parsed = JSON.parse(jsonStr);
-                            const content = parsed.choices?.[0]?.text;
-                            // debugDiv.innerHTML += "parse:" + JSON.stringify(content) + "\n\n";
-                            if (content) {
-                                outputDiv.innerHTML += content;
-                                tokenCount += 1;
-                                const elapsed = (Date.now() - startTime) / 1000;
-                                tpsDiv.innerHTML = "TPS: " + (tokenCount / elapsed).toFixed(0) + "     Tokens: " + tokenCount;
-                                // tpsDisplay.textContent = (tokenCount / elapsed).toFixed(2);
-                            }
-                        } catch (err) {
-                            // some line is splitted by "\n"
-                            left = jsonStr
-                            debugDiv.innerHTML += jsonStr + "\n\n";
-                            debugDiv.innerHTML += 'Error parsing JSON:' + err + "\n\n";
+                    try {
+                        const parsed = JSON.parse(jsonPart);
+                        const content = parsed.choices?.[0]?.delta?.content ?? parsed.choices?.[0]?.text ?? '';
+                        if (content) {
+                            outputDiv.innerHTML += content;
+                            tokenCount++;
+                            const elapsed = (Date.now() - startTime) / 1000;
+                            tpsDiv.innerHTML = "TPS: " + (tokenCount / elapsed).toFixed(0) + "     Tokens: " + tokenCount;
                         }
+                    } catch (err) {
+                        console.warn('JSON parse error:', err, 'on line:', jsonPart);
+                        // optionally accumulate broken line into buffer for next round
+                        buffer = jsonPart + buffer;
                     }
                 }
             }
@@ -349,43 +456,5 @@ <h2>Pods</h2>
 
 
 
-{% if fails %}
-<h2>Failures</h2>
-<table style="width:100%">
-    <tr>
-        <th>tenant</th>
-        <th>namespace</th>
-        <th>model name</th>
-        <th>revision</th>
-        <th>id</th>
-        <th>exit info</th>
-        <th>state</th>
-    </tr>
-    {% for fail in fails %}
-    <tr>
-        <td>{{ fail["tenant"] }}</td>
-        <td>{{ fail["namespace"] }}</td>
-        <td>{{ fail["fpname"] }}</td>
-        <td>{{ fail["fprevision"] }}</td>
-        <td>{{ fail["id"] }}</td>
-        <td>{{ fail["exit_info"] }}</td>
-        <td><a href="{{ hosturl }}failpod?tenant={{ fail['tenant'] }}&&namespace={{ fail['namespace'] }}&&name={{
-                fail['fpname'] }}&&version={{fail['fprevision'] }}&&id={{fail['id'] }}">log</a></td>
-    </tr>
-    {% endfor %}
-</table>
-{% endif %}
-
-
-<h2>Func</h2>
-<table style="width:100%">
-    <tr>
-        {% autoescape false %}
-        <td>{{ funcspec }}</td>
-        {% endautoescape %}
-    </tr>
-</table>
-
-
 {{ log | safe }}
 {% endblock %}
\ No newline at end of file
diff --git a/deployment/dashboard.Dockerfile b/deployment/dashboard.Dockerfile
index db9def8..9e0110d 100644
--- a/deployment/dashboard.Dockerfile
+++ b/deployment/dashboard.Dockerfile
@@ -8,6 +8,7 @@ RUN apt-get -y update
 RUN apt-get install -y libpq-dev gcc
 RUN apt-get install -y bash
 RUN apt-get install -y nginx
+RUN apt-get install -y curl
 
 COPY requirements.txt requirements.txt
 RUN pip3 install -r requirements.txt
diff --git a/docker-compose.yml b/docker-compose.yml
index b56dcca..43eae83 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -46,13 +46,22 @@ services:
       - '2379:2379'
       - '2380:2380'
   inferx:
-    image: inferx/inferx_one:v0.1.0
-    restart: on-failure
+    image: inferx/inferx_one:${Version}
+    restart: always
     container_name: inferx_one
     privileged: true
     user: root
     cpus: 10
     network_mode: "host"
+    environment:
+      - NODE_NAME=${HOSTNAME}
+      - POD_IP=${LOCAL_IP}
+      - ALLOC_MEMORY=40Gi
+      - ALLOC_CPU=24
+      - CACHE_MEMORY=20Gi
+      - ENABLE_2MB_PAGE=false
+      - ENALBE_BLOB=false
+      - BLOB_BUFF=4Gi
     pid: "host"
     healthcheck:
       test: [ "CMD", "curl", "-f", "http://localhost:4000" ]
@@ -66,6 +75,8 @@ services:
         condition: service_started
       keycloak:
         condition: service_started
+      secret_db:
+        condition: service_started
     volumes:
       - /var/run/docker.sock:/var/run/docker.sock
       - /run/udev:/run/udev
@@ -85,12 +96,12 @@ services:
       - ./onenode
       - /opt/inferx/config/node1.json
   dashboard:
-    image: inferx/inferx_dashboard:v0.1.0
+    image: inferx/inferx_dashboard:${Version}
     restart: on-failure
     container_name: inferx_dashboard
     network_mode: "host"
     environment:
-      - KEYCLOAK_URL=http://${LOCAL_IP}:81/authn
+      - KEYCLOAK_URL=http://${LOCAL_IP}:1260/authn
       - KEYCLOAK_REALM_NAME=inferx
       - KEYCLOAK_CLIENT_ID=infer_client
       - KEYCLOAK_CLIENT_SECRET=M2Dse5531tdtyipZdGizLEeoOVgziQRX
@@ -102,6 +113,7 @@ services:
     depends_on:
       - db
       - etcd
+      - keycloak
     volumes:
       - /etc/letsencrypt/:/etc/letsencrypt/
   keycloak-postgres:
diff --git a/docker-compose_blob.yml b/docker-compose_blob.yml
index 608ba18..1c882be 100644
--- a/docker-compose_blob.yml
+++ b/docker-compose_blob.yml
@@ -58,13 +58,23 @@ services:
       - '2379:2379'
       - '2380:2380'
   inferx:
-    image: inferx/inferx_one:v0.1.0
-    restart: on-failure
+    image: inferx/inferx_one:${Version}
+    restart: always
     container_name: inferx_one
     privileged: true
     user: root
     cpus: 10
     network_mode: "host"
+    environment:
+      - NODE_NAME=${HOSTNAME}
+      - POD_IP=${LOCAL_IP}
+      - RUN_SERVICE=All
+      - ALLOC_CPU=24
+      - ALLOC_MEMORY=180Gi
+      - CACHE_MEMORY=50Gi
+      - ENABLE_2MB_PAGE=true
+      - ENALBE_BLOB=true
+      - BLOB_BUFF=4Gi
     pid: "host"
     healthcheck:
       test: [ "CMD", "curl", "-f", "http://localhost:4000" ]
@@ -80,6 +90,8 @@ services:
         condition: service_started
       spdk:
         condition: service_started
+      secret_db:
+        condition: service_started
     volumes:
       - /var/run/docker.sock:/var/run/docker.sock
       - /run/udev:/run/udev
@@ -99,12 +111,12 @@ services:
       - ./onenode
       - /opt/inferx/config/node3.json
   dashboard:
-    image: inferx/inferx_dashboard:v0.1.0
+    image: inferx/inferx_dashboard:${Version}
     restart: on-failure
     container_name: inferx_dashboard
     network_mode: "host"
     environment:
-      - KEYCLOAK_URL=http://${LOCAL_IP}:81/authn
+      - KEYCLOAK_URL=http://${LOCAL_IP}:1260/authn
       - KEYCLOAK_REALM_NAME=inferx
       - KEYCLOAK_CLIENT_ID=infer_client
       - KEYCLOAK_CLIENT_SECRET=M2Dse5531tdtyipZdGizLEeoOVgziQRX
@@ -116,6 +128,7 @@ services:
     depends_on:
       - db
       - etcd
+      - keycloak
     volumes:
       - /etc/letsencrypt/:/etc/letsencrypt/
   keycloak-postgres:
diff --git a/inferxlib/src/obj_mgr/cidrlock.rs b/inferxlib/src/obj_mgr/cidrlock.rs
new file mode 100644
index 0000000..f170a10
--- /dev/null
+++ b/inferxlib/src/obj_mgr/cidrlock.rs
@@ -0,0 +1,8 @@
+use serde::{Deserialize, Serialize};
+
+use crate::resource::NodeResources;
+
+use crate::data_obj::*;
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct CidrlockSpec {}
diff --git a/inferxlib/src/obj_mgr/func_mgr.rs b/inferxlib/src/obj_mgr/func_mgr.rs
index 5c81688..a2cf015 100644
--- a/inferxlib/src/obj_mgr/func_mgr.rs
+++ b/inferxlib/src/obj_mgr/func_mgr.rs
@@ -17,6 +17,7 @@ use std::collections::{BTreeMap, HashMap};
 use serde::{Deserialize, Serialize};
 
 use crate::data_obj::*;
+use crate::resource;
 use crate::resource::*;
 
 pub const FUNCPOD_TYPE: &str = "funcpod_type.qservice.io";
@@ -120,7 +121,6 @@ pub struct FuncSpec {
     pub commands: Vec<String>,
     pub envs: Vec<(String, String)>,
     pub mounts: Vec<Mount>,
-    #[serde(default)]
     pub endpoint: HttpEndpoint,
     #[serde(default)]
     pub version: i64,
@@ -128,16 +128,15 @@ pub struct FuncSpec {
     #[serde(default)]
     pub entrypoint: Vec<String>,
 
-    #[serde(default)]
     pub resources: Resources,
 
-    #[serde(default, rename = "standby")]
+    #[serde(rename = "standby")]
     pub standby: Standby,
 
     #[serde(default)]
     pub probe: HttpEndpoint,
 
-    #[serde(default, rename = "sample_query")]
+    #[serde(rename = "sample_query")]
     pub sampleCall: SampleCall,
 }
 
@@ -148,32 +147,12 @@ fn PromptDefault() -> String {
 impl FuncSpec {
     pub const HIBERNATE_CONTAINER_MEM_OVERHEAD: u64 = 500; // 500 * 1024 * 1024; 500 MB
 
-    pub fn RestoreResource(&self, blobStoreEnable: bool) -> Resources {
-        match self.standby.GpuMemKeepalive(blobStoreEnable) {
-            StandbyType::Mem => {
-                return Resources {
-                    memory: self.resources.memory,
-                    ..Default::default()
-                };
-            }
-            _ => {
-                return Resources {
-                    memory: Self::HIBERNATE_CONTAINER_MEM_OVERHEAD,
-                    ..Default::default()
-                };
-            }
-        }
-    }
-
     pub fn SnapshotResource(&self) -> Resources {
         return self.resources.clone();
     }
 
-    pub fn ResumeResource(&self, blobStoreEnable: bool) -> Resources {
-        let restoreResource = self.RestoreResource(blobStoreEnable);
-        let mut req = self.resources.clone();
-        req.Sub(&restoreResource);
-        return req;
+    pub fn AllResources(&self) -> Resources {
+        return self.resources.clone();
     }
 }
 
diff --git a/inferxlib/src/obj_mgr/funcsnapshot_mgr.rs b/inferxlib/src/obj_mgr/funcsnapshot_mgr.rs
index 83b76fb..d68af53 100644
--- a/inferxlib/src/obj_mgr/funcsnapshot_mgr.rs
+++ b/inferxlib/src/obj_mgr/funcsnapshot_mgr.rs
@@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::common::*;
 use crate::data_obj::{DataObject, DataObjectMgr};
-use crate::resource::Standby;
+use crate::resource::{GPUResource, Resources, Standby, StandbyType};
 
 #[derive(Serialize, Deserialize, Debug, Clone, Default)]
 pub struct SnapshotInfo {
@@ -15,6 +15,8 @@ pub struct SnapshotInfo {
     pub standby: Standby,
 }
 
+pub const ONE_GB: u64 = 1 << 30;
+
 impl SnapshotInfo {
     pub fn SnapshotStandyInfo(&self) -> SnapshotStandyInfo {
         let mut gpu = 0;
@@ -27,6 +29,37 @@ impl SnapshotInfo {
             pinned: self.hostMemSize,
         };
     }
+
+    pub fn StandyCacheMemory(&self) -> u64 {
+        let mut cacheMemory = 0;
+        if self.standby.gpuMem == StandbyType::Mem {
+            for (_, size) in &self.gpuMemSizes {
+                cacheMemory += (*size + ONE_GB - 1) / ONE_GB * 1024;
+            }
+        }
+
+        if self.standby.pinndMem == StandbyType::Mem {
+            cacheMemory += (self.hostMemSize + ONE_GB - 1) / ONE_GB * 1024;
+        }
+
+        return cacheMemory;
+    }
+
+    pub fn ReadyCacheMemory(&self) -> u64 {
+        // the cudahostmemory will be keep in cache memory when in ready state
+        let cacheMemory = (self.hostMemSize + ONE_GB - 1) / ONE_GB * 1024;
+        return cacheMemory;
+    }
+
+    pub fn StandbyMemory(&self) -> u64 {
+        let mut memory = 0;
+
+        if self.standby.pageableMem == StandbyType::Mem {
+            memory += self.processCheckpointSize;
+        }
+
+        return memory;
+    }
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone, Default)]
@@ -68,6 +101,21 @@ pub struct ContainerSnapshot {
     pub info: SnapshotInfo,
 }
 
+impl ContainerSnapshot {
+    pub fn StandbyResource(&self) -> Resources {
+        return Resources {
+            cpu: 0,
+            memory: self.info.StandbyMemory(),
+            cacheMemory: self.info.StandyCacheMemory(),
+            gpu: GPUResource::default(),
+        };
+    }
+
+    pub fn ReadyCacheMemory(&self) -> u64 {
+        return self.info.ReadyCacheMemory();
+    }
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Copy, PartialEq, Eq)]
 pub enum SnapshotState {
     Loading,
diff --git a/inferxlib/src/obj_mgr/mod.rs b/inferxlib/src/obj_mgr/mod.rs
index 3040b86..7ecb628 100644
--- a/inferxlib/src/obj_mgr/mod.rs
+++ b/inferxlib/src/obj_mgr/mod.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod cidrlock;
 pub mod func_mgr;
 pub mod funcsnapshot_mgr;
 pub mod namespace_mgr;
diff --git a/inferxlib/src/obj_mgr/pod_mgr.rs b/inferxlib/src/obj_mgr/pod_mgr.rs
index 5382ad7..dd94311 100644
--- a/inferxlib/src/obj_mgr/pod_mgr.rs
+++ b/inferxlib/src/obj_mgr/pod_mgr.rs
@@ -159,7 +159,8 @@ impl FuncPod {
     }
 
     pub fn ResumeRestore(&mut self, resources: &NodeResources) -> Result<()> {
-        return self.object.spec.allocResources.Add(resources);
+        self.object.spec.allocResources = resources.clone();
+        return Ok(());
     }
 
     pub fn MemHibernateDone(&mut self) -> Result<()> {
@@ -175,6 +176,7 @@ impl FuncPod {
             nodename: self.object.spec.nodename.clone(),
             cpu: 0,
             memory: 0,
+            cacheMemory: 0,
             gpuType: GPUType::Any(),
             gpus: gpuResources,
             maxContextCnt: 0,
diff --git a/inferxlib/src/resource.rs b/inferxlib/src/resource.rs
index cf859ae..31e9124 100644
--- a/inferxlib/src/resource.rs
+++ b/inferxlib/src/resource.rs
@@ -25,7 +25,7 @@ pub const MAX_GPU_COUNT: usize = 8;
 
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
 
-pub struct GPUType(String);
+pub struct GPUType(pub String);
 
 impl Default for GPUType {
     fn default() -> Self {
@@ -38,6 +38,10 @@ impl GPUType {
         return Self("Any".to_string());
     }
 
+    pub fn Unknown() -> Self {
+        return Self("Unknown".to_string());
+    }
+
     pub fn CanAlloc(&self, req: &Self) -> bool {
         if &req.0 == "Any" {
             return true;
@@ -130,16 +134,21 @@ impl Default for GPUSet {
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
 pub struct ResourceConfig {
-    #[serde(rename = "CPU", default)]
-    pub cpu: u64, // 1/1000 CPU cores
     #[serde(rename = "Mem", default)]
-    pub memory: u64, // MB memory
-    #[serde(rename = "GPUType", default)]
-    pub gpuType: GPUType,
+    pub allocMemory: u64, // MB memory
+    #[serde(rename = "CacheMemory", default)]
+    pub cacheMemory: u64,
+    #[serde(rename = "Enable2MBPage", default)]
+    pub enable2MBPage: bool,
+    #[serde(rename = "EnableBlob", default)]
+    pub enableBlob: bool,
+    #[serde(rename = "BlobBuffer", default)]
+    pub blobBuffer: u64,
     #[serde(rename = "GPUs", default)]
     pub gpus: GPUSet,
-    #[serde(rename = "vRam", default)]
-    pub vRam: u64, // MB vRam per GPU
+
+    #[serde(rename = "CPU", default)]
+    pub cpu: u64, // 1/1000 CPU cores
 
     #[serde(rename = "ContextOverhead")]
     pub contextOverhead: u64, // MB vRam per GPU
@@ -156,6 +165,15 @@ impl ResourceConfig {
 
         return self.maxContextPerGPU;
     }
+
+    pub fn AllocableMem(&self) -> u64 {
+        let mut memory = self.allocMemory - self.cacheMemory;
+        if self.enableBlob {
+            memory -= self.blobBuffer;
+        }
+
+        return memory;
+    }
 }
 
 impl GPUResourceMap {
@@ -317,6 +335,8 @@ pub struct NodeResources {
     pub cpu: u64, // 1/1000 CPU cores
     #[serde(rename = "Mem", default)]
     pub memory: u64, // MB memory
+    #[serde(rename = "CacheMem", default)]
+    pub cacheMemory: u64,
     #[serde(rename = "GPUType", default)]
     pub gpuType: GPUType,
     #[serde(rename = "GPUs", default)]
@@ -330,6 +350,7 @@ impl NodeResources {
         nodename: &str,
         cpu: u64,
         memory: u64,
+        cacheMemory: u64,
         gpuType: GPUType,
         gpus: GPUResourceMap,
         maxContextPerGpu: u64,
@@ -338,6 +359,7 @@ impl NodeResources {
             nodename: nodename.to_owned(),
             cpu: cpu,
             memory: memory,
+            cacheMemory: cacheMemory,
             gpuType: gpuType.clone(),
             gpus: gpus,
             maxContextCnt: maxContextPerGpu,
@@ -349,6 +371,7 @@ impl NodeResources {
             nodename: self.nodename.clone(),
             cpu: self.cpu,
             memory: self.memory,
+            cacheMemory: self.cacheMemory,
             gpuType: self.gpuType.clone(),
             gpus: self.gpus.clone(),
             maxContextCnt: self.maxContextCnt,
@@ -360,6 +383,7 @@ impl NodeResources {
             nodename: "".to_owned(),
             cpu: 0,
             memory: 0,
+            cacheMemory: 0,
             gpuType: self.gpuType.clone(),
             gpus: self.gpus.clone(),
             maxContextCnt: self.maxContextCnt,
@@ -369,17 +393,26 @@ impl NodeResources {
     pub fn CanAlloc(&self, req: &Resources) -> bool {
         let canAlloc = self.cpu >= req.cpu
             && self.memory >= req.memory
+            && self.cacheMemory >= req.cacheMemory
             && self.gpuType.CanAlloc(&req.gpu.type_)
             && self.gpus.CanAlloc(&req.gpu);
 
-        // if !canAlloc {
-        //     let cpu = self.cpu >= req.cpu;
-        //     let memory = self.memory >= req.memory;
-        //     let gpuType = self.gpuType.CanAlloc(&req.gpu.type_);
-        //     let gpus = self.gpus.CanAlloc(&req.gpu);
+        if !canAlloc {
+            let cpu = self.cpu >= req.cpu;
+            let memory = self.memory >= req.memory;
+            let cacheMemory = self.cacheMemory >= req.cacheMemory;
+            let gpuType = self.gpuType.CanAlloc(&req.gpu.type_);
+            let gpus = self.gpus.CanAlloc(&req.gpu);
+
+            if !memory {
+                error!(
+                    "self.memory is {} required memory is {}",
+                    self.memory, req.memory
+                );
+            }
 
-        //     error!("CanAlloc fail cpu:{cpu} memory:{memory}, gpuType:{gpuType}, gpus:{gpus}");
-        // }
+            error!("CanAlloc fail cpu:{cpu} memory:{memory}, cacheMemory:{cacheMemory}, gpuType:{gpuType}, gpus:{gpus}");
+        }
 
         return canAlloc;
     }
@@ -388,6 +421,7 @@ impl NodeResources {
         // error!("NodeResources sub \n curr is {:?} \n sub {:?}", self, other);
         // self.cpu -= other.cpu;
         self.memory -= other.memory;
+        self.cacheMemory -= other.cacheMemory;
         self.gpus.Sub(&other.gpus);
 
         return Ok(());
@@ -399,6 +433,7 @@ impl NodeResources {
             nodename: self.nodename.clone(),
             cpu: resource.cpu,
             memory: resource.memory,
+            cacheMemory: resource.cacheMemory,
             gpuType: self.gpuType.clone(),
             gpus: GPUResourceMap::default(),
             maxContextCnt: self.maxContextCnt,
@@ -407,8 +442,12 @@ impl NodeResources {
 
     pub fn Alloc(&mut self, req: &Resources) -> Result<NodeResources> {
         if !self.CanAlloc(req) {
+            error!(
+                "NodeResources::alloc fail available {:#?} require {:#?}",
+                self, req
+            );
             return Err(Error::SchedulerNoEnoughResource(format!(
-                "NodeResources::alloc fail type doesn't match available {:?} require {:?}",
+                "NodeResources::alloc fail available {:?} require {:?}",
                 self, req
             )));
         }
@@ -416,12 +455,14 @@ impl NodeResources {
         // we don't allc/free cpu resource, assume there are enough cpu resource
         // self.cpu -= req.cpu;
         self.memory -= req.memory;
+        self.cacheMemory -= req.cacheMemory;
         let gpus = self.gpus.Alloc(&req.gpu)?;
 
         return Ok(NodeResources {
             nodename: self.nodename.clone(),
             cpu: req.cpu,
             memory: req.memory,
+            cacheMemory: req.cacheMemory,
             gpuType: self.gpuType.clone(),
             gpus: gpus,
             maxContextCnt: self.maxContextCnt,
@@ -433,6 +474,7 @@ impl NodeResources {
         self.gpus.Add(&free.gpus);
         // self.cpu += free.cpu;
         self.memory += free.memory;
+        self.cacheMemory += free.cacheMemory;
 
         return Ok(());
     }
@@ -448,6 +490,8 @@ pub struct Resources {
     pub cpu: u64, // 1/1000 CPU cores
     #[serde(rename = "Mem")]
     pub memory: u64, // MB memory
+    #[serde(default, rename = "CacheMem")]
+    pub cacheMemory: u64,
     #[serde(rename = "GPU")]
     pub gpu: GPUResource,
 }
@@ -457,6 +501,7 @@ impl Default for Resources {
         Self {
             cpu: 0,
             memory: 0,
+            cacheMemory: 0,
             gpu: GPUResource::default(),
         }
     }
@@ -477,6 +522,7 @@ impl Resources {
         return Self {
             cpu: 0,
             memory: 0,
+            cacheMemory: 0,
             gpu: self.gpu.clone(),
         };
     }
@@ -484,6 +530,7 @@ impl Resources {
     pub fn Sub(&mut self, other: &Self) {
         self.cpu -= other.cpu;
         self.memory -= other.memory;
+        self.cacheMemory -= other.cacheMemory;
         self.gpu.Sub(&other.gpu);
     }
 }
@@ -651,6 +698,12 @@ impl Standby {
     pub fn GpuMemKeepalive(&self, blobStoreEnable: bool) -> StandbyType {
         return self.gpuMem.StandbyType(blobStoreEnable);
     }
+
+    pub fn Needblob(&self) -> bool {
+        return self.gpuMem == StandbyType::Blob
+            || self.pageableMem == StandbyType::Blob
+            || self.pinndMem == StandbyType::Blob;
+    }
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
diff --git a/ixctl/main.rs b/ixctl/main.rs
index 1b32345..04ff902 100644
--- a/ixctl/main.rs
+++ b/ixctl/main.rs
@@ -107,7 +107,7 @@ fn GetCred() -> Authorization {
 async fn AuthN() -> String {
     let keycloakUrl = match std::env::var("KEYCLOAK_URL") {
         Ok(s) => s,
-        Err(_) => "http://localhost:1260/authn".to_owned(),
+        Err(_) => "http://localhost:31260/authn".to_owned(),
     };
 
     println!("keycloakUrl is {}", &keycloakUrl);
diff --git a/k8s/clean-k3sagent.sh b/k8s/clean-k3sagent.sh
new file mode 100644
index 0000000..0f25c75
--- /dev/null
+++ b/k8s/clean-k3sagent.sh
@@ -0,0 +1,13 @@
+# Stop K3s service if running
+sudo systemctl stop k3s-agent || true
+
+# Run the uninstall script if present
+sudo /usr/local/bin/k3s-agent-uninstall.sh || true
+
+# Clean residual data
+sudo rm -rf /etc/rancher/k3s /var/lib/rancher/k3s /var/lib/kubelet /etc/systemd/system/k3s-agent.service /usr/local/bin/k3s*
+
+# Optionally clean containerd data if used before
+# sudo rm -rf /var/lib/containerd
+
+echo "K3s agent cleanup complete."
\ No newline at end of file
diff --git a/k8s/cleanup-k3s.sh b/k8s/cleanup-k3s.sh
new file mode 100755
index 0000000..d623143
--- /dev/null
+++ b/k8s/cleanup-k3s.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+### 1. Stop & Uninstall K3s
+if command -v k3s-uninstall.sh &> /dev/null; then
+  sudo /usr/local/bin/k3s-uninstall.sh           # Stops k3s, removes services, data, etc. :contentReference[oaicite:0]{index=0}
+fi
+if command -v k3s-agent-uninstall.sh &> /dev/null; then
+  sudo /usr/local/bin/k3s-agent-uninstall.sh     # Removes agent components on workers :contentReference[oaicite:1]{index=1}
+fi
+
+### 2. Kill any remaining processes
+if command -v k3s-killall.sh &> /dev/null; then
+  sudo /usr/local/bin/k3s-killall.sh             # Kills k3s-related processes, containerd, etc. :contentReference[oaicite:2]{index=2}
+fi
+
+### 3. Remove leftover dirs and configs
+# sudo rm -rf /etc/rancher/k3s /var/lib/rancher/k3s /var/lib/kubelet 
+            # /etc/containerd /var/lib/containerd           # Clean containerd and K3s state :contentReference[oaicite:3]{index=3}
+
+sudo rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/systemd/system/k3s* /var/lib/containerd /etc/cni /opt/cni
+
+
+### 4. Restart containerd to clear any stuck state
+sudo systemctl restart containerd                     # Ensures containerd is fresh :contentReference[oaicite:4]{index=4}
+
+echo "✔️  K3s and related components have been fully removed."
diff --git a/k8s/dashboard.yaml b/k8s/dashboard.yaml
new file mode 100644
index 0000000..25ff997
--- /dev/null
+++ b/k8s/dashboard.yaml
@@ -0,0 +1,58 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: inferx-dashboard
+  labels:
+    app: inferx-dashboard
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: inferx-dashboard
+  template:
+    metadata:
+      labels:
+        app: inferx-dashboard
+    spec:
+      containers:
+        - name: inferx-dashboard
+          image: inferx/inferx_dashboard:v0.1.1
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: KEYCLOAK_URL
+              value: "http://192.168.0.22:31260/authn"
+            - name: KEYCLOAK_REALM_NAME
+              value: "inferx"
+            - name: KEYCLOAK_CLIENT_ID
+              value: "infer_client"
+            - name: KEYCLOAK_CLIENT_SECRET
+              value: "M2Dse5531tdtyipZdGizLEeoOVgziQRX"
+            - name: INFERX_APIGW_ADDR
+              value: "http://nodeagent:4000"
+          volumeMounts:
+            - name: cert-volume
+              mountPath: /etc/letsencrypt/
+          livenessProbe:
+            httpGet:
+              path: /intro?name=home.md
+              port: 1250
+            initialDelaySeconds: 10
+            periodSeconds: 10
+      volumes:
+        - name: cert-volume
+          hostPath:
+            path: /etc/letsencrypt/
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: inferx-dashboard
+spec:
+  type: NodePort
+  selector:
+    app: inferx-dashboard
+  ports:
+    - name: http
+      port: 1250
+      targetPort: 1250
+      nodePort: 31250  
\ No newline at end of file
diff --git a/k8s/db-deployment.yaml b/k8s/db-deployment.yaml
new file mode 100644
index 0000000..dadfc00
--- /dev/null
+++ b/k8s/db-deployment.yaml
@@ -0,0 +1,67 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: db-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: db
+  template:
+    metadata:
+      labels:
+        app: db
+    spec:
+      nodeSelector:
+        inferx_storage: data      
+      containers:
+        - name: postgres
+          image: postgres:14.5
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: POSTGRES_USER
+              value: audit_user
+            - name: POSTGRES_PASSWORD
+              value: "123456"
+            - name: POSTGRES_DB
+              value: auditdb
+            - name: PGDATA
+              value: /data/postgres
+          volumeMounts:
+            - name: db-data
+              mountPath: /data/postgres
+            - name: init-sql
+              mountPath: /docker-entrypoint-initdb.d/db.sql
+      volumes:
+        - name: db-data
+          hostPath:
+            path: /opt/inferx/data/postgres
+            type: DirectoryOrCreate
+        - name: init-sql
+          hostPath:
+            path: /opt/inferx/config/create_table.sql
+            type: File
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: db
+spec:
+  selector:
+    app: db
+  ports:
+    - port: 5432
+      targetPort: 5432
+      nodePort: 30542
+  type: NodePort
diff --git a/k8s/etcd.yaml b/k8s/etcd.yaml
new file mode 100644
index 0000000..a060573
--- /dev/null
+++ b/k8s/etcd.yaml
@@ -0,0 +1,50 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: etcd
+  labels:
+    app: etcd
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: etcd
+  template:
+    metadata:
+      labels:
+        app: etcd
+    spec:
+      nodeSelector:
+        inferx_storage: data      
+      containers:
+        - name: etcd
+          image: quay.io/coreos/etcd:v3.5.13
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - name: etcd-data
+              mountPath: /opt/inferx/data/etcd
+          command: [ "etcd" ]
+          args:
+            - "--name=etcd-00"
+            - "--data-dir=/opt/inferx/data/etcd"
+            - "--advertise-client-urls=http://etcd-00:2379"
+            - "--listen-client-urls=http://0.0.0.0:2379"
+            - "--initial-advertise-peer-urls=http://etcd-00:2380"
+            - "--listen-peer-urls=http://0.0.0.0:2380"
+            - "--initial-cluster=etcd-00=http://etcd-00:2380"
+      volumes:
+        - name: etcd-data
+          hostPath:
+            path: /opt/inferx/data/etcd
+            type: DirectoryOrCreate
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: etcd
+spec:
+  selector:
+    app: etcd
+  ports:
+    - port: 2379
+      targetPort: 2379
diff --git a/k8s/inferx_one.yaml b/k8s/inferx_one.yaml
new file mode 100644
index 0000000..4507671
--- /dev/null
+++ b/k8s/inferx_one.yaml
@@ -0,0 +1,118 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: inferx-one
+  labels:
+    app: inferx-one
+spec:
+  selector:
+    matchLabels:
+      app: inferx-one
+  template:
+    metadata:
+      labels:
+        app: inferx-one
+    spec:
+      hostPID: true
+      initContainers:
+        - name: wait-for-dependencies
+          image: busybox
+          command:
+            [
+              "sh", "-c",
+              "until nc -z etcd.default.svc.cluster.local 2379 && \
+                     nc -z keycloak.default.svc.cluster.local 8080; do \
+                  echo 'Waiting for dependencies...'; sleep 3; \
+               done"
+            ]
+      containers:
+        - name: inferx-one
+          image: inferx/inferx_one:v0.1.1
+          securityContext:
+            privileged: true
+            capabilities:
+              add: ["SYS_ADMIN", "IPC_LOCK", "SYS_RAWIO"]
+            runAsUser: 0
+            runAsGroup: 0
+          env:
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          volumeMounts:
+            - mountPath: /dev/vfio
+              name: dev-vfio
+            - mountPath: /var/run/docker.sock
+              name: docker-sock
+            - mountPath: /run/udev
+              name: run-udev
+            - mountPath: /dev/hugepages
+              name: dev-hugepages
+              mountPropagation: Bidirectional
+            - mountPath: /opt/inferx/
+              name: opt-inferx
+            - mountPath: /etc/letsencrypt/
+              name: letsencrypt
+            - mountPath: /var/run/docker/runtime-runc/moby/
+              name: docker-runtime
+            - mountPath: /var/lib/docker/
+              name: docker-lib
+            - mountPath: /sys/bus/pci/devices
+              name: pci-devices
+              readOnly: false
+            - mountPath: /sys/class/uio
+              name: uio
+              readOnly: false
+            - mountPath: /sys/kernel/mm/hugepages
+              name: hugepages-sys
+              readOnly: false
+          command: ["./onenode", "/opt/inferx/config/node.json"]
+      volumes:
+        - name: dev-vfio
+          hostPath:
+            path: /dev/vfio
+        - name: pci-devices
+          hostPath:
+            path: /sys/bus/pci/devices
+        - name: uio
+          hostPath:
+            path: /sys/class/uio
+        - name: hugepages-sys
+          hostPath:
+            path: /sys/kernel/mm/hugepages
+        - name: docker-sock
+          hostPath:
+            path: /var/run/docker.sock
+        - name: run-udev
+          hostPath:
+            path: /run/udev
+        - name: dev-hugepages
+          hostPath:
+            path: /dev/hugepages
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx/
+        - name: letsencrypt
+          hostPath:
+            path: /etc/letsencrypt/
+        - name: docker-runtime
+          hostPath:
+            path: /var/run/docker/runtime-runc/moby/
+        - name: docker-lib
+          hostPath:
+            path: /var/lib/docker/
+      restartPolicy: Always
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: inferx-one
+spec:
+  type: NodePort
+  selector:
+    app: inferx-one
+  ports:
+    - name: http
+      port: 4000
+      targetPort: 4000
+      nodePort: 31500  
diff --git a/k8s/inferx_one_blob.yaml b/k8s/inferx_one_blob.yaml
new file mode 100644
index 0000000..443ba82
--- /dev/null
+++ b/k8s/inferx_one_blob.yaml
@@ -0,0 +1,125 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: inferx-one
+  labels:
+    app: inferx-one
+spec:
+  selector:
+    matchLabels:
+      app: inferx-one
+  template:
+    metadata:
+      labels:
+        app: inferx-one
+    spec:
+      hostPID: true
+      initContainers:
+        - name: wait-for-dependencies
+          image: busybox
+          command:
+            [
+              "sh", "-c",
+              "until nc -z etcd.default.svc.cluster.local 2379 && \
+                     nc -z keycloak.default.svc.cluster.local 8080; do \
+                  echo 'Waiting for dependencies...'; sleep 3; \
+               done"
+            ]
+      containers:
+        - name: inferx-one
+          image: inferx/inferx_one:v0.1.1
+          securityContext:
+            privileged: true
+            capabilities:
+              add: ["SYS_ADMIN", "IPC_LOCK", "SYS_RAWIO"]
+            runAsUser: 0
+            runAsGroup: 0
+          env:
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          resources:
+            requests:
+              memory: "60Gi"              # Regular memory request (RAM)
+              hugepages-2Mi: "60Gi"       # HugePages request
+            limits:
+              memory: "60Gi"              # Regular memory request (RAM)
+              hugepages-2Mi: "60Gi" 
+          volumeMounts:
+            - mountPath: /dev/vfio
+              name: dev-vfio
+            - mountPath: /var/run/docker.sock
+              name: docker-sock
+            - mountPath: /run/udev
+              name: run-udev
+            - mountPath: /dev/hugepages
+              name: dev-hugepages
+              mountPropagation: Bidirectional
+            - mountPath: /opt/inferx/
+              name: opt-inferx
+            - mountPath: /etc/letsencrypt/
+              name: letsencrypt
+            - mountPath: /var/run/docker/runtime-runc/moby/
+              name: docker-runtime
+            - mountPath: /var/lib/docker/
+              name: docker-lib
+            - mountPath: /sys/bus/pci/devices
+              name: pci-devices
+              readOnly: false
+            - mountPath: /sys/class/uio
+              name: uio
+              readOnly: false
+            - mountPath: /sys/kernel/mm/hugepages
+              name: hugepages-sys
+              readOnly: false
+          command: ["./onenode", "/opt/inferx/config/node_blob.json"]
+      volumes:
+        - name: dev-vfio
+          hostPath:
+            path: /dev/vfio
+        - name: pci-devices
+          hostPath:
+            path: /sys/bus/pci/devices
+        - name: uio
+          hostPath:
+            path: /sys/class/uio
+        - name: hugepages-sys
+          hostPath:
+            path: /sys/kernel/mm/hugepages
+        - name: docker-sock
+          hostPath:
+            path: /var/run/docker.sock
+        - name: run-udev
+          hostPath:
+            path: /run/udev
+        - name: dev-hugepages
+          hostPath:
+            path: /dev/hugepages
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx/
+        - name: letsencrypt
+          hostPath:
+            path: /etc/letsencrypt/
+        - name: docker-runtime
+          hostPath:
+            path: /var/run/docker/runtime-runc/moby/
+        - name: docker-lib
+          hostPath:
+            path: /var/lib/docker/
+      restartPolicy: Always
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: inferx-one
+spec:
+  type: NodePort
+  selector:
+    app: inferx-one
+  ports:
+    - name: http
+      port: 4000
+      targetPort: 4000
+      nodePort: 31500  
diff --git a/k8s/ingress.yaml b/k8s/ingress.yaml
new file mode 100644
index 0000000..83f8b5a
--- /dev/null
+++ b/k8s/ingress.yaml
@@ -0,0 +1,45 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: inferx-ingress
+  annotations:
+    nginx.ingress.kubernetes.io/use-regex: "true"
+    nginx.ingress.kubernetes.io/rewrite-target: /$1$2
+    nginx.ingress.kubernetes.io/proxy-buffering: "off"
+    nginx.ingress.kubernetes.io/proxy-request-buffering: "off"
+    nginx.ingress.kubernetes.io/proxy-http-version: "1.1"
+    nginx.ingress.kubernetes.io/proxy-chunked: "on"
+spec:
+  rules:
+    - http:
+        paths:
+          - path: /funccall/
+            pathType: Prefix
+            backend:
+              service:
+                name: nodeagent
+                port:
+                  number: 4000
+          - path: /authn/
+            pathType: Prefix
+            backend:
+              service:
+                name: keycloak
+                port:
+                  number: 8080
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: inferx-dashboard
+                port:
+                  number: 1250
+    ports:
+      web:
+        port: 80
+        hostPort: 80
+        expose: true
+      websecure:
+        port: 443
+        hostPort: 443
+        expose: true
\ No newline at end of file
diff --git a/k8s/install-k3s.sh b/k8s/install-k3s.sh
new file mode 100755
index 0000000..c7965a4
--- /dev/null
+++ b/k8s/install-k3s.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+set -e
+
+
+### 2. Install K3s using Docker runtime
+echo "[+] Installing K3s with Docker as container runtime..."
+curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--docker --node-external-ip=192.168.0.22" sh -
+
+echo "[+] Waiting for K3s to be ready..."
+sleep 10
+kubectl get node
+
+### 3. Install Helm (if not installed)
+if ! command -v helm &> /dev/null; then
+  echo "[+] Installing Helm..."
+  curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash
+fi
+
+### 4. Add NVIDIA Helm repo
+echo "[+] Adding NVIDIA Helm repo..."
+helm repo add nvidia https://nvidia.github.io/gpu-operator
+helm repo update
+
+### 5. Deploy NVIDIA GPU Operator with Docker runtime
+echo "[+] Installing NVIDIA GPU Operator..."
+export KUBECONFIG=/etc/rancher/k3s/k3s.yam
+chmod 555 /etc/rancher/k3s/k3s.yaml
+helm install --wait gpu-operator \
+  nvidia/gpu-operator \
+  -n gpu-operator --create-namespace \
+  --set operator.defaultRuntime=docker \
+  --set driver.enabled=false \
+  --set toolkit.enabled=true
+
+echo "[✓] K3s with Docker runtime and NVIDIA GPU Operator installed successfully."
diff --git a/k8s/join-k3sagent.sh b/k8s/join-k3sagent.sh
new file mode 100644
index 0000000..a3ccedf
--- /dev/null
+++ b/k8s/join-k3sagent.sh
@@ -0,0 +1,24 @@
+# On server node
+# sudo cat /var/lib/rancher/k3s/server/node-token
+# hostname -I  # Use internal IP accessible by the joining node
+
+
+sudo /usr/local/bin/k3s-agent-uninstall.sh
+sudo rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /var/lib/cni /run/flannel
+
+
+
+curl -sfL https://get.k3s.io | K3S_URL=https://192.168.0.22:6443 \
+  K3S_TOKEN=K106218814e0f9ea4c0b067750e725aee4a2921804a6867b625abb51b5c11149e9a::server:5401cee22c6fd5315c24574784b8d8a1 \
+  INSTALL_K3S_EXEC="--docker --with-node-id" sh -
+
+sudo k3s agent --docker \
+  --server https://192.168.0.22:6443 \
+  --token K106218814e0f9ea4c0b067750e725aee4a2921804a6867b625abb51b5c11149e9a::server:5401cee22c6fd5315c24574784b8d8a1 \
+  --with-node-id \
+  --node-name inferx-agent1 \
+  --debug
+
+
+# sudo k3s agent --docker --server https://192.168.0.22:6443 --token K106218814e0f9ea4c0b067750e725aee4a2921804a6867b625abb51b5c11149e9a::server:5401cee22c6fd5315c24574784b8d8a1 --debug
+
diff --git a/k8s/keycloak.yaml b/k8s/keycloak.yaml
new file mode 100644
index 0000000..31f24e8
--- /dev/null
+++ b/k8s/keycloak.yaml
@@ -0,0 +1,57 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: keycloak
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: keycloak
+  template:
+    metadata:
+      labels:
+        app: keycloak
+    spec:
+      containers:
+        - name: keycloak
+          image: quay.io/keycloak/keycloak:latest
+          imagePullPolicy: IfNotPresent
+          args: ["start-dev", "--verbose"]
+          env:
+            - name: KEYCLOAK_ADMIN
+              value: admin
+            - name: KEYCLOAK_ADMIN_PASSWORD
+              value: admin
+            - name: KC_DB
+              value: postgres
+            - name: KC_DB_URL
+              value: jdbc:postgresql://keycloak-postgres:5432/keycloak
+            - name: KC_DB_USERNAME
+              value: keycloak
+            - name: KC_DB_PASSWORD
+              value: "123456"
+            - name: KC_HTTP_ENABLED
+              value: "true"
+            - name: KC_PROXY
+              value: edge
+            - name: KC_HOSTNAME_STRICT_HTTPS
+              value: "false"
+            - name: KC_HOSTNAME_STRICT
+              value: "false"
+            - name: KC_HTTP_RELATIVE_PATH
+              value: /authn
+          ports:
+            - containerPort: 8080
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: keycloak
+spec:
+  type: NodePort
+  selector:
+    app: keycloak
+  ports:
+    - port: 8080
+      targetPort: 8080
+      nodePort: 31260  # Can customize between 30000–32767
diff --git a/k8s/keycloak_postgres.yaml b/k8s/keycloak_postgres.yaml
new file mode 100644
index 0000000..ce3e112
--- /dev/null
+++ b/k8s/keycloak_postgres.yaml
@@ -0,0 +1,61 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: keycloak-db-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: keycloak-postgres
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: keycloak-postgres
+  template:
+    metadata:
+      labels:
+        app: keycloak-postgres
+    spec:
+      nodeSelector:
+        inferx_storage: data      
+      containers:
+        - name: postgres
+          image: postgres:14.5
+          imagePullPolicy: IfNotPresent
+          env:
+            - name: POSTGRES_USER
+              value: keycloak
+            - name: POSTGRES_PASSWORD
+              value: "123456"
+            - name: POSTGRES_DB
+              value: keycloak
+            - name: PGDATA
+              value: /data/postgres
+          ports:
+            - containerPort: 5432
+          volumeMounts:
+            - name: db-data
+              mountPath: /data/postgres  
+      volumes:
+        - name: db-data
+          hostPath:
+            path: /opt/inferx/data/postgres_keycloak
+            type: DirectoryOrCreate
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: keycloak-postgres
+spec:
+  selector:
+    app: keycloak-postgres
+  ports:
+    - port: 5432
+      targetPort: 5432
diff --git a/k8s/nodeagent.yaml b/k8s/nodeagent.yaml
new file mode 100644
index 0000000..101aaa3
--- /dev/null
+++ b/k8s/nodeagent.yaml
@@ -0,0 +1,280 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nodeagent-blob
+  labels:
+    app: nodeagent
+spec:
+  selector:
+    matchLabels:
+      app: nodeagent
+  template:
+    metadata:
+      labels:
+        app: nodeagent
+    spec:
+      nodeSelector:
+        inferx_nodeType: inferx_blob      
+      hostPID: true
+      initContainers:
+        - name: wait-for-dependencies
+          image: busybox
+          command:
+            [
+              "sh", "-c",
+              "until nc -z etcd.default.svc.cluster.local 2379 && \
+                     nc -z keycloak.default.svc.cluster.local 8080; do \
+                  echo 'Waiting for dependencies...'; sleep 3; \
+               done"
+            ]
+      containers:
+        - name: nodeagent
+          image: inferx/inferx_one:v0.1.1
+          imagePullPolicy: IfNotPresent
+          securityContext:
+            privileged: true
+            capabilities:
+              add: ["SYS_ADMIN", "IPC_LOCK", "SYS_RAWIO"]
+            runAsUser: 0
+            runAsGroup: 0
+          env:
+          - name: STATESVC_ADDR
+            value: "http://statesvc:1237"
+          - name: RUN_SERVICE
+            value: "NodeAgent"
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          - name: NODE_NAME
+            valueFrom:
+              fieldRef:
+                fieldPath: spec.nodeName
+          - name: ALLOC_MEMORY
+            valueFrom:
+              resourceFieldRef:
+                containerName: nodeagent
+                resource: requests.memory                
+          - name: CACHE_MEMORY
+            value: "50Gi"
+          - name: ENABLE_2MB_PAGE
+            value: "true"
+          - name: ALLOC_MEMORY_2M
+            valueFrom:
+              resourceFieldRef:
+                containerName: nodeagent
+                resource: requests.hugepages-2Mi                
+          - name: ALLOC_CPU
+            valueFrom:
+              resourceFieldRef:
+                containerName: nodeagent
+                resource: requests.cpu                            
+          - name: ENALBE_BLOB
+            value: "true"
+          - name: BLOB_BUFF
+            value: "4Gi"
+          resources:
+            requests:
+              cpu: "20"               
+              memory: "120Gi"              # Regular memory request (RAM)
+              hugepages-2Mi: "60Gi"       # HugePages request
+            limits:
+              cpu: "20"               
+              memory: "120Gi"              # Regular memory request (RAM)
+              hugepages-2Mi: "60Gi" 
+          volumeMounts:
+            - mountPath: /dev/vfio
+              name: dev-vfio
+            - mountPath: /var/run/docker.sock
+              name: docker-sock
+            - mountPath: /run/udev
+              name: run-udev
+            - mountPath: /dev/hugepages
+              name: dev-hugepages
+              mountPropagation: Bidirectional
+            - mountPath: /opt/inferx/
+              name: opt-inferx
+            - mountPath: /etc/letsencrypt/
+              name: letsencrypt
+            - mountPath: /var/run/docker/runtime-runc/moby/
+              name: docker-runtime
+            - mountPath: /var/lib/docker/
+              name: docker-lib
+            - mountPath: /sys/bus/pci/devices
+              name: pci-devices
+              readOnly: false
+            - mountPath: /sys/class/uio
+              name: uio
+              readOnly: false
+            - mountPath: /sys/kernel/mm/hugepages
+              name: hugepages-sys
+              readOnly: false
+          command: ["./onenode", "/opt/inferx/config/node.json"]
+      volumes:
+        - name: dev-vfio
+          hostPath:
+            path: /dev/vfio
+        - name: pci-devices
+          hostPath:
+            path: /sys/bus/pci/devices
+        - name: uio
+          hostPath:
+            path: /sys/class/uio
+        - name: hugepages-sys
+          hostPath:
+            path: /sys/kernel/mm/hugepages
+        - name: docker-sock
+          hostPath:
+            path: /var/run/docker.sock
+        - name: run-udev
+          hostPath:
+            path: /run/udev
+        - name: dev-hugepages
+          hostPath:
+            path: /dev/hugepages
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx/
+        - name: letsencrypt
+          hostPath:
+            path: /etc/letsencrypt/
+        - name: docker-runtime
+          hostPath:
+            path: /var/run/docker/runtime-runc/moby/
+        - name: docker-lib
+          hostPath:
+            path: /var/lib/docker/
+      restartPolicy: Always
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nodeagent-file
+  labels:
+    app: nodeagent
+spec:
+  selector:
+    matchLabels:
+      app: nodeagent
+  template:
+    metadata:
+      labels:
+        app: nodeagent
+    spec:
+      nodeSelector:
+        inferx_nodeType: inferx_file      
+      hostPID: true
+      initContainers:
+        - name: wait-for-dependencies
+          image: busybox
+          command:
+            [
+              "sh", "-c",
+              "until nc -z etcd.default.svc.cluster.local 2379 && \
+                     nc -z keycloak.default.svc.cluster.local 8080; do \
+                  echo 'Waiting for dependencies...'; sleep 3; \
+               done"
+            ]
+      containers:
+        - name: nodeagent
+          image: inferx/inferx_one:v0.1.1
+          imagePullPolicy: IfNotPresent
+          securityContext:
+            privileged: true
+            capabilities:
+              add: ["SYS_ADMIN", "IPC_LOCK", "SYS_RAWIO"]
+            runAsUser: 0
+            runAsGroup: 0
+          env:
+          - name: STATESVC_ADDR
+            value: "http://statesvc:1237"
+          - name: RUN_SERVICE
+            value: "NodeAgent"
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          - name: NODE_NAME
+            valueFrom:
+              fieldRef:
+                fieldPath: spec.nodeName
+          - name: ALLOC_MEMORY
+            valueFrom:
+              resourceFieldRef:
+                containerName: nodeagent
+                resource: requests.memory                
+          - name: ALLOC_MEMORY_2M
+            valueFrom:
+              resourceFieldRef:
+                containerName: nodeagent
+                resource: requests.hugepages-2Mi   
+          - name: CACHE_MEMORY
+            value: "20Gi"
+          - name: ENABLE_2MB_PAGE
+            value: "false"              
+          - name: ALLOC_CPU
+            valueFrom:
+              resourceFieldRef:
+                containerName: nodeagent
+                resource: requests.cpu                            
+          - name: ENALBE_BLOB
+            value: "false"                     
+          resources:
+            requests:
+              cpu: "20"
+              memory: "40Gi"              # Regular memory request (RAM)
+            limits:
+              cpu: "20"
+              memory: "40Gi"              # Regular memory request (RAM)
+              nvidia.com/gpu: 1
+          volumeMounts:
+            - mountPath: /var/run/docker.sock
+              name: docker-sock
+            - mountPath: /run/udev
+              name: run-udev
+            - mountPath: /opt/inferx/
+              name: opt-inferx
+            - mountPath: /etc/letsencrypt/
+              name: letsencrypt
+            - mountPath: /var/run/docker/runtime-runc/moby/
+              name: docker-runtime
+            - mountPath: /var/lib/docker/
+              name: docker-lib
+          command: ["./onenode", "/opt/inferx/config/node.json"]
+      volumes:
+        - name: docker-sock
+          hostPath:
+            path: /var/run/docker.sock
+        - name: run-udev
+          hostPath:
+            path: /run/udev
+        - name: dev-hugepages
+          hostPath:
+            path: /dev/hugepages
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx/
+        - name: letsencrypt
+          hostPath:
+            path: /etc/letsencrypt/
+        - name: docker-runtime
+          hostPath:
+            path: /var/run/docker/runtime-runc/moby/
+        - name: docker-lib
+          hostPath:
+            path: /var/lib/docker/
+      restartPolicy: Always
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: nodeagent
+spec:
+  type: NodePort
+  selector:
+    app: nodeagent
+  ports:
+    - name: http
+      port: 4000
+      targetPort: 4000
+      nodePort: 31501  
diff --git a/k8s/nvidia-test.yaml b/k8s/nvidia-test.yaml
new file mode 100644
index 0000000..2a7aeaf
--- /dev/null
+++ b/k8s/nvidia-test.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: nvidia-test
+spec:
+  containers:
+  - name: cuda-container
+    image: nvidia/cuda:12.2.0-devel-ubuntu20.04
+    imagePullPolicy: IfNotPresent
+    command: ["sleep", "infinity"]
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+  nodeSelector:
+    kubernetes.io/hostname: brad-ms-7d46
\ No newline at end of file
diff --git a/k8s/scheduler.yaml b/k8s/scheduler.yaml
new file mode 100644
index 0000000..6350157
--- /dev/null
+++ b/k8s/scheduler.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: scheduler
+  labels:
+    app: scheduler
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: scheduler
+  template:
+    metadata:
+      labels:
+        app: scheduler
+    spec:
+      hostPID: true
+      containers:
+        - name: scheduler
+          image: inferx/inferx_one:v0.1.1
+          imagePullPolicy: IfNotPresent
+          env:
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP          
+          - name: RUN_SERVICE
+            value: "Scheduler"
+          - name: STATESVC_ADDR
+            value: "http://statesvc:1237"
+          volumeMounts:
+            - mountPath: /opt/inferx/
+              name: opt-inferx
+          command: ["./onenode", "/opt/inferx/config/node.json"]
+      volumes:
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx/       
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: scheduler
+spec:
+  type: NodePort
+  selector:
+    app: scheduler
+  ports:
+    - name: http
+      port: 1238
+      targetPort: 1238
diff --git a/k8s/secretdb.yaml b/k8s/secretdb.yaml
new file mode 100644
index 0000000..cb14d21
--- /dev/null
+++ b/k8s/secretdb.yaml
@@ -0,0 +1,70 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: secret-db-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: secret-db
+spec:      
+  replicas: 1
+  selector:
+    matchLabels:
+      app: secret-db
+  template:
+    metadata:
+      labels:
+        app: secret-db
+    spec:
+      nodeSelector:
+        inferx_storage: data
+      containers:
+        - name: postgres
+          image: postgres:14.5
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 5432
+          env:
+            - name: POSTGRES_USER
+              value: secret
+            - name: POSTGRES_PASSWORD
+              value: "123456"
+            - name: POSTGRES_DB
+              value: secretdb
+            - name: PGDATA
+              value: /data/postgres
+          volumeMounts:
+            - name: db-data
+              mountPath: /data/postgres
+            - name: init-sql
+              mountPath: /docker-entrypoint-initdb.d/db.sql
+      volumes:
+        - name: db-data
+          hostPath:
+            path: /opt/inferx/data/postgres_secret
+            type: DirectoryOrCreate
+        - name: init-sql
+          hostPath:
+            path: /opt/inferx/config/secret.sql
+            type: File
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: secret-db
+spec:
+  selector:
+    app: secret-db
+  ports:
+    - port: 5432
+      targetPort: 5432
+      nodePort: 30541
+  type: NodePort
+
diff --git a/k8s/spdk.yaml b/k8s/spdk.yaml
new file mode 100644
index 0000000..96b3000
--- /dev/null
+++ b/k8s/spdk.yaml
@@ -0,0 +1,54 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: spdk
+  labels:
+    app: spdk
+spec:
+  selector:
+    matchLabels:
+      app: spdk
+  template:
+    metadata:
+      labels:
+        app: spdk
+    spec:
+      nodeSelector:
+        inferx_nodeType: inferx_blob   
+      hostNetwork: true
+      hostPID: true
+      containers:
+        - name: spdk
+          image: inferx/spdk-container2:v0.1.0
+          imagePullPolicy: IfNotPresent
+          securityContext:
+            privileged: true
+            runAsUser: 0
+          env:
+            - name: HUGEMEM
+              value: "64000"
+          volumeMounts:
+            - name: hugepages
+              mountPath: /dev/hugepages
+            - name: lib-modules
+              mountPath: /lib/modules
+            - name: opt-inferx
+              mountPath: /opt/inferx
+            - name: run-udev
+              mountPath: /run/udev
+      volumes:
+        - name: hugepages
+          hostPath:
+            path: /dev/hugepages
+        - name: lib-modules
+          hostPath:
+            path: /lib/modules
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx
+        - name: run-udev
+          hostPath:
+            path: /run/udev
+      restartPolicy: Always
+      tolerations:
+        - operator: "Exists"  # Allow on tainted nodes
diff --git a/k8s/statesvc.yaml b/k8s/statesvc.yaml
new file mode 100644
index 0000000..89194ce
--- /dev/null
+++ b/k8s/statesvc.yaml
@@ -0,0 +1,51 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: statesvc
+  labels:
+    app: statesvc
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: statesvc
+  template:
+    metadata:
+      labels:
+        app: statesvc
+    spec:
+      hostPID: true
+      containers:
+        - name: statesvc
+          image: inferx/inferx_one:v0.1.1
+          imagePullPolicy: IfNotPresent
+          env:
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP          
+          - name: RUN_SERVICE
+            value: "StateSvc"
+          - name: CACHE_MEMORY
+            value: 20Gi
+          volumeMounts:
+            - mountPath: /opt/inferx/
+              name: opt-inferx
+          command: ["./onenode", "/opt/inferx/config/node.json"]
+      volumes:
+        - name: opt-inferx
+          hostPath:
+            path: /opt/inferx/       
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: statesvc
+spec:
+  type: NodePort
+  selector:
+    app: statesvc
+  ports:
+    - name: http
+      port: 1237
+      targetPort: 1237
diff --git a/nodeconfig/node.json b/nodeconfig/node.json
new file mode 100644
index 0000000..357f7dc
--- /dev/null
+++ b/nodeconfig/node.json
@@ -0,0 +1,45 @@
+{
+    "nodeName": "node1",
+    "etcdAddrs": [
+        "http://etcd:2379"
+    ],
+    "hostIpCidr": "192.168.0.0/16",
+    "podMgrPort": 1233,
+    "tsotCniPort": 1234,
+    "tsotSvcPort": 1235,
+    "qletStateSvcPort": 1236,
+    "statSvcPort": 1237,
+    "schedulerPort": 1238,
+    "gatewayPort": 4000,
+    "cidr": "10.1.3.0/8",
+    "stateSvcAddrs": [
+        "http://localhost:1237"
+    ],
+    "tsotSocketPath": "/opt/inferx/sockets/tsot-socket",
+    "tsotGwSocketPath": "/opt/inferx/sockets_host/tsot-socket",
+    "runService": true,
+    "auditdbAddr": "postgresql://audit_user:123456@db:5432/auditdb",
+    "resources": {
+        "CPU": 30000,
+        "Mem": 400000,
+        "GPUs": "Auto",
+        "ContextOverhead": 450,
+        "MaxContextPerGPU": 1
+    },
+    "snapshotDir": "/opt/inferx/snapshot",
+    "enableBlobStore": false,
+    "sharemem": {
+        "size": 20,
+        "hugepage": true
+    },
+    "tlsconfig": {
+        "enable": false,
+        "cert": "/etc/letsencrypt/live/inferx.net/fullchain.pem",
+        "key": "/etc/letsencrypt/live/inferx.net/privkey.pem"
+    },
+    "secretStoreAddr": "postgresql://secret:123456@secret-db:5432/secretdb",
+    "keycloakconfig": {
+        "url": "http://keycloak:8080/authn",
+        "realm": "inferx"
+    }
+}
\ No newline at end of file
diff --git a/nodeconfig/node1.json b/nodeconfig/node1.json
new file mode 100644
index 0000000..84bf37b
--- /dev/null
+++ b/nodeconfig/node1.json
@@ -0,0 +1,46 @@
+{
+    "nodeName": "node1",
+    "etcdAddrs": [
+        "http://localhost:2379"
+    ],
+    "hostIpCidr": "192.168.0.0/16",
+    "podMgrPort": 1233,
+    "tsotCniPort": 1234,
+    "tsotSvcPort": 1235,
+    "qletStateSvcPort": 1236,
+    "statSvcPort": 1237,
+    "schedulerPort": 1238,
+    "gatewayPort": 4000,
+    "cidr": "10.1.3.0/8",
+    "stateSvcAddrs": [
+        "http://localhost:1237"
+    ],
+    "tsotSocketPath": "/opt/inferx/sockets/tsot-socket",
+    "tsotGwSocketPath": "/opt/inferx/sockets_host/tsot-socket",
+    "runService": true,
+    "auditdbAddr": "postgresql://audit_user:123456@localhost:5432/auditdb",
+    "resources": {
+        "CPU": 30000,
+        "Mem": 400000,
+        "GPUType": "A4000",
+        "GPUs": "Auto",
+        "ContextOverhead": 450,
+        "MaxContextPerGPU": 1
+    },
+    "snapshotDir": "/opt/inferx/snapshot",
+    "enableBlobStore": false,
+    "sharemem": {
+        "size": 20,
+        "hugepage": true
+    },
+    "tlsconfig": {
+        "enable": false,
+        "cert": "/etc/letsencrypt/live/inferx.net/fullchain.pem",
+        "key": "/etc/letsencrypt/live/inferx.net/privkey.pem"
+    },
+    "secretStoreAddr": "postgresql://secret:123456@localhost:5431/secretdb",
+    "keycloakconfig": {
+        "url": "http://localhost:1260/authn",
+        "realm": "inferx"
+    }
+}
\ No newline at end of file
diff --git a/nodeconfig/node2.json b/nodeconfig/node2.json
new file mode 100644
index 0000000..5255535
--- /dev/null
+++ b/nodeconfig/node2.json
@@ -0,0 +1,32 @@
+{
+    "nodeName": "node2",
+    "etcdAddrs": [
+        "http://localhost:2379"
+    ],
+    "hostIpCidr": "192.168.0.0/16",
+    "podMgrPort": 1233,
+    "tsotCniPort": 1234,
+    "tsotSvcPort": 1235,
+    "qletStateSvcPort": 1236,
+    "statSvcPort": 1237,
+    "schedulerPort": 1238,
+    "gatewayPort": 4000,
+    "cidr": "10.1.2.0/8",
+    "stateSvcAddrs": [
+        "http://localhost:1237"
+    ],
+    "tsotSocketPath": "/var/run/quark/tsot-socket",
+    "tsotGwSocketPath": "/var/run/quark_host/tsot-socket",
+    "runService": false,
+    "auditdbAddr": "postgresql://audit_user:123456@localhost/auditdb",
+    "resources": {
+        "CPU": 30000,
+        "Mem": 300000,
+        "GPUType": "A4000",
+        "GPUs": "Auto",
+        "ContextOverhead": 450,
+        "MaxContextPerGPU": 2
+    },
+    "snapshotDir": "/snapshot",
+    "enableBlobStore": true
+}
\ No newline at end of file
diff --git a/nodeconfig/node3.json b/nodeconfig/node3.json
new file mode 100644
index 0000000..ae7fd8b
--- /dev/null
+++ b/nodeconfig/node3.json
@@ -0,0 +1,46 @@
+{
+    "nodeName": "node2",
+    "etcdAddrs": [
+        "http://localhost:2379"
+    ],
+    "hostIpCidr": "192.168.0.0/16",
+    "podMgrPort": 1233,
+    "tsotCniPort": 1234,
+    "tsotSvcPort": 1235,
+    "qletStateSvcPort": 1236,
+    "statSvcPort": 1237,
+    "schedulerPort": 1238,
+    "gatewayPort": 4000,
+    "cidr": "10.1.2.0/8",
+    "stateSvcAddrs": [
+        "http://localhost:1237"
+    ],
+    "tsotSocketPath": "/opt/inferx/sockets/tsot-socket",
+    "tsotGwSocketPath": "/opt/inferx/sockets_host/tsot-socket",
+    "runService": true,
+    "auditdbAddr": "postgresql://audit_user:123456@localhost:5432/auditdb",
+    "resources": {
+        "CPU": 30000,
+        "Mem": 400000,
+        "GPUType": "A4000",
+        "GPUs": "Auto",
+        "ContextOverhead": 440,
+        "MaxContextPerGPU": 1
+    },
+    "snapshotDir": "/opt/inferx/snapshot",
+    "enableBlobStore": true,
+    "sharemem": {
+        "size": 36,
+        "hugepage": true
+    },
+    "tlsconfig": {
+        "enable": false,
+        "cert": "/etc/letsencrypt/live/inferx.net/fullchain.pem",
+        "key": "/etc/letsencrypt/live/inferx.net/privkey.pem"
+    },
+    "secretStoreAddr": "postgresql://secret:123456@localhost:5431/secretdb",
+    "keycloakconfig": {
+        "url": "http://localhost:1260/authn",
+        "realm": "inferx"
+    }
+}
\ No newline at end of file
diff --git a/nodeconfig/node4.json b/nodeconfig/node4.json
new file mode 100644
index 0000000..b01c4b8
--- /dev/null
+++ b/nodeconfig/node4.json
@@ -0,0 +1,48 @@
+{
+    "nodeName": "node3",
+    "etcdAddrs": [
+        "http://localhost:2379"
+    ],
+    "hostIpCidr": "192.168.0.0/16",
+    "podMgrPort": 1233,
+    "tsotCniPort": 1234,
+    "tsotSvcPort": 1235,
+    "qletStateSvcPort": 1236,
+    "statSvcPort": 1237,
+    "schedulerPort": 1238,
+    "gatewayPort": 4000,
+    "cidr": "10.1.2.0/8",
+    "stateSvcAddrs": [
+        "http://localhost:1237"
+    ],
+    "tsotSocketPath": "/opt/inferx/sockets/tsot-socket",
+    "tsotGwSocketPath": "/opt/inferx/sockets_host/tsot-socket",
+    "runService": true,
+    "auditdbAddr": "postgresql://audit_user:123456@localhost:30542/auditdb",
+    "resources": {
+        "CPU": 30000,
+        "Mem": 400000,
+        "GPUType": "A4000",
+        "GPUs": "Auto",
+        "ContextOverhead": 450,
+        "MaxContextPerGPU": 2
+    },
+    "snapshotDir": "/opt/inferx/snapshot",
+    "enableBlobStore": true,
+    "sharemem": {
+        "size": 36,
+        "hugepage": true
+    },
+    "tlsconfig": {
+        "enable": false,
+        "cert": "/etc/letsencrypt/live/inferx.net/fullchain.pem",
+        "key": "/etc/letsencrypt/live/inferx.net/privkey.pem"
+    },
+    "secretStoreAddr": "postgresql://secret:123456@localhost:30541/secretdb",
+    "keycloakconfig": {
+        "url": "http://localhost:31260",
+        "realm": "inferx",
+        "adminUser": "admin",
+        "adminPassword": "admin"
+    }
+}
\ No newline at end of file
diff --git a/nodeconfig/node_blob.json b/nodeconfig/node_blob.json
new file mode 100644
index 0000000..a855504
--- /dev/null
+++ b/nodeconfig/node_blob.json
@@ -0,0 +1,46 @@
+{
+    "nodeName": "node1",
+    "etcdAddrs": [
+        "http://etcd:2379"
+    ],
+    "hostIpCidr": "192.168.0.0/16",
+    "podMgrPort": 1233,
+    "tsotCniPort": 1234,
+    "tsotSvcPort": 1235,
+    "qletStateSvcPort": 1236,
+    "statSvcPort": 1237,
+    "schedulerPort": 1238,
+    "gatewayPort": 4000,
+    "cidr": "10.1.3.0/8",
+    "stateSvcAddrs": [
+        "http://localhost:1237"
+    ],
+    "tsotSocketPath": "/opt/inferx/sockets/tsot-socket",
+    "tsotGwSocketPath": "/opt/inferx/sockets_host/tsot-socket",
+    "runService": true,
+    "auditdbAddr": "postgresql://audit_user:123456@db:5432/auditdb",
+    "resources": {
+        "CPU": 30000,
+        "Mem": 400000,
+        "GPUType": "A4000",
+        "GPUs": "Auto",
+        "ContextOverhead": 450,
+        "MaxContextPerGPU": 1
+    },
+    "snapshotDir": "/opt/inferx/snapshot",
+    "enableBlobStore": true,
+    "sharemem": {
+        "size": 50,
+        "hugepage": true
+    },
+    "tlsconfig": {
+        "enable": false,
+        "cert": "/etc/letsencrypt/live/inferx.net/fullchain.pem",
+        "key": "/etc/letsencrypt/live/inferx.net/privkey.pem"
+    },
+    "secretStoreAddr": "postgresql://secret:123456@secret-db:5432/secretdb",
+    "keycloakconfig": {
+        "url": "http://keycloak:8080/authn",
+        "realm": "inferx"
+    }
+}
\ No newline at end of file
diff --git a/script/inferx_clean.sh b/script/inferx_clean.sh
new file mode 100755
index 0000000..847df90
--- /dev/null
+++ b/script/inferx_clean.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+PARENT_DIR="/opt/inferx/sandbox/"
+INFERX_BIN="/opt/inferx/bin/inferx"
+
+# pkill -9 inferx
+
+for SUBDIR in "$PARENT_DIR"/*; do
+  if [ -d "$SUBDIR" ]; then
+    SUBFOLDER_NAME=$(basename "$SUBDIR")
+    echo "Running inferx on: $SUBFOLDER_NAME"
+    "$INFERX_BIN" \
+      --root "/var/run/docker/runtime-runc/moby" \
+      --log-format json \
+      --systemd-cgroup delete "$SUBFOLDER_NAME"
+    
+  fi
+done
\ No newline at end of file