From 4cde8d3467dacff7ba03074387f8e97317000b5a Mon Sep 17 00:00:00 2001
From: tangqy <121102723@qq.com>
Date: Tue, 20 Aug 2024 09:16:02 +0800
Subject: [PATCH 1/6] update sagemaker bge embedding model

---
 application/.env.template                |  2 +-
 application/nlq/business/vector_store.py | 30 +++++++++++-------------
 application/utils/llm.py                 |  9 +++----
 source/resources/lib/ecs/ecs-stack.ts    |  5 ++++
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/application/.env.template b/application/.env.template
index cb606724..62d52a55 100644
--- a/application/.env.template
+++ b/application/.env.template
@@ -21,7 +21,7 @@ RDS_REGION_NAME=us-west-2
 AWS_DEFAULT_REGION=us-west-2
 DYNAMODB_AWS_REGION=us-west-2
 
-EMBEDDING_DIMENSION=1536
+EMBEDDING_DIMENSION=1024
 BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v1
 
 # If you need to use ak/sk to access bedrock, please configure bedrock's ak/sk to Secrets Manager, Examples are as follows
diff --git a/application/nlq/business/vector_store.py b/application/nlq/business/vector_store.py
index b0f9577f..15dc6cb1 100644
--- a/application/nlq/business/vector_store.py
+++ b/application/nlq/business/vector_store.py
@@ -76,7 +76,7 @@ def get_all_agent_cot_samples(cls, profile_name):
     def add_sample(cls, profile_name, question, answer):
         logger.info(f'add sample question: {question} to profile {profile_name}')
         if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
-            embedding = cls.create_vector_embedding_with_sagemaker(question)
+            embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,question,opensearch_info['sql_index'])
         else:
             embedding = cls.create_vector_embedding_with_bedrock(question)
         has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
@@ -89,7 +89,7 @@ def add_sample(cls, profile_name, question, answer):
     def add_entity_sample(cls, profile_name, entity, comment):
         logger.info(f'add sample entity: {entity} to profile {profile_name}')
         if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
-            embedding = cls.create_vector_embedding_with_sagemaker(entity)
+            embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['ner_index'])
         else:
             embedding = cls.create_vector_embedding_with_bedrock(entity)
         has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
@@ -102,7 +102,7 @@ def add_entity_sample(cls, profile_name, entity, comment):
     def add_agent_cot_sample(cls, profile_name, entity, comment):
         logger.info(f'add agent sample query: {entity} to profile {profile_name}')
         if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
-            embedding = cls.create_vector_embedding_with_sagemaker(entity)
+            embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['agent_index'])
         else:
             embedding = cls.create_vector_embedding_with_bedrock(entity)
         has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
@@ -129,19 +129,17 @@ def create_vector_embedding_with_bedrock(cls, text):
         return embedding
 
     @classmethod
-    def create_vector_embedding_with_sagemaker(cls, text):
-        try:
-            body = json.dumps(
-                {
-                    "inputs": text,
-                    "is_query": True
-                }
-            )
-            response = invoke_model_sagemaker_endpoint(SAGEMAKER_ENDPOINT_EMBEDDING, body, model_type="embedding")
-            embeddings = response[0]
-            return embeddings
-        except Exception as e:
-            logger.error(f'create_vector_embedding_with_sagemaker is error {e}')
+    def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name):
+        body=json.dumps(
+        {
+            "inputs": text,
+            "is_query": True,
+            "instruction" :  "Represent this sentence for searching relevant passages:"
+        }
+        )
+        response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
+        embeddings = response['sentence_embeddings'][0]
+        return {"_index": index_name, "text": text, "vector_field": embeddings}
 
     @classmethod
     def delete_sample(cls, profile_name, doc_id):
diff --git a/application/utils/llm.py b/application/utils/llm.py
index 9eb542c1..6fe8a612 100644
--- a/application/utils/llm.py
+++ b/application/utils/llm.py
@@ -570,14 +570,15 @@ def create_vector_embedding_with_bedrock(text, index_name):
 
 
 def create_vector_embedding_with_sagemaker(endpoint_name, text, index_name):
-    body = json.dumps(
+    body=json.dumps(
         {
             "inputs": text,
-            "is_query": True
+            "is_query": True,
+            "instruction" :  "Represent this sentence for searching relevant passages:"
         }
     )
-    response = invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="embedding")
-    embeddings = response[0]
+    response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
+    embeddings = response['sentence_embeddings'][0]
     return {"_index": index_name, "text": text, "vector_field": embeddings}
 
 
diff --git a/source/resources/lib/ecs/ecs-stack.ts b/source/resources/lib/ecs/ecs-stack.ts
index 70ce5e2f..d85d7de4 100644
--- a/source/resources/lib/ecs/ecs-stack.ts
+++ b/source/resources/lib/ecs/ecs-stack.ts
@@ -192,6 +192,9 @@ export class ECSStack extends cdk.Stack {
             }),
         });
 
+        containerStreamlit.addEnvironment('EMBEDDING_DIMENSION', '1024');
+        containerStreamlit.addEnvironment('SAGEMAKER_ENDPOINT_EMBEDDING', 'bge-zh-15-2024-08-17-03-56-58-281-endpoint');
+
         containerStreamlit.addEnvironment('OPENSEARCH_TYPE', 'service');
         containerStreamlit.addEnvironment('AOS_INDEX', 'uba');
         containerStreamlit.addEnvironment('AOS_INDEX_NER', 'uba_ner');
@@ -242,6 +245,8 @@ export class ECSStack extends cdk.Stack {
             }),
         });
 
+        containerAPI.addEnvironment('EMBEDDING_DIMENSION', '1024');
+        containerAPI.addEnvironment('SAGEMAKER_ENDPOINT_EMBEDDING', 'bge-zh-15-2024-08-17-03-56-58-281-endpoint');
         containerAPI.addEnvironment('OPENSEARCH_TYPE', 'service');
         containerAPI.addEnvironment('AOS_INDEX', 'uba');
         containerAPI.addEnvironment('AOS_INDEX_NER', 'uba_ner');

From fcb842581a44c898435e0e9c12d5ed7ab938b2f7 Mon Sep 17 00:00:00 2001
From: tangqy <121102723@qq.com>
Date: Tue, 20 Aug 2024 10:50:30 +0800
Subject: [PATCH 2/6] update sagemaker bge embedding model

---
 application/.env.template |    2 +-
 bge_zh_deploy.ipynb       | 1570 +++++++++++++++++++++++++++++++++++++
 2 files changed, 1571 insertions(+), 1 deletion(-)
 create mode 100644 bge_zh_deploy.ipynb

diff --git a/application/.env.template b/application/.env.template
index 62d52a55..6e7e61c1 100644
--- a/application/.env.template
+++ b/application/.env.template
@@ -32,4 +32,4 @@ BEDROCK_SECRETS_AK_SK=
 OPENSEARCH_SECRETS_URL_HOST=opensearch-host-url
 OPENSEARCH_SECRETS_USERNAME_PASSWORD=opensearch-master-user
 
-# SAGEMAKER_ENDPOINT_EMBEDDING=
\ No newline at end of file
+SAGEMAKER_ENDPOINT_EMBEDDING=bge-zh-15-2024-08-17-03-56-58-281-endpoint
\ No newline at end of file
diff --git a/bge_zh_deploy.ipynb b/bge_zh_deploy.ipynb
new file mode 100644
index 00000000..9d867cab
--- /dev/null
+++ b/bge_zh_deploy.ipynb
@@ -0,0 +1,1570 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7060c891-cebd-4011-b350-b7d1e70b40b2",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### 1. 安装HuggingFace 并下载模型到本地"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9f413314-c410-43d3-bb3a-ba0aa18ec1be",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!pip install huggingface-hub -Uqq\n",
+    "!pip install -Uqq sagemaker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "df3b0a4b-f166-4f1a-a7cc-9c7277c68173",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### 如果是外海region，执行下面cell通过huggingface_hub下载, 否则跳过"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "be112a00-cbef-4387-b0d7-80e5e7b7030d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "687f1df2c88843d088aad7e22643a96e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1119ac99d9e94e198880ba04f58250f0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c099efbf11fc46b1983b7644202fb859",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9760ebc6f7174c3faed804ebd3db7c87",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       ".gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "15e750c43a5d4da29d8efa5e22c241be",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a36f858aab6a42549be7e07849d68775",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md:   0%|          | 0.00/25.5k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a25f33d21b9b431abd0bc427b16e27c6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/1.30G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9519fed4aed14612869e90e4c64391da",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "48709f8ed9474eae967dc3cecba46e6b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "pytorch_model.bin:   0%|          | 0.00/1.30G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "015061c7d770446fb078c0db2540a249",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "00c3d91da3cf4884a5c2c487a0ef91e9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "765d3e9d58ed4a169f991d013b321640",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6a7022ca832440cd86b8d3f41d8af30c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fb5d4ec702ca46dd8fe0fbaff1d3dd54",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/439k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from huggingface_hub import snapshot_download\n",
+    "from pathlib import Path\n",
+    "\n",
+    "local_model_path = Path(\"./bge-zh-model\")\n",
+    "local_model_path.mkdir(exist_ok=True)\n",
+    "model_name = \"BAAI/bge-large-zh-v1.5\"\n",
+    "commit_hash = \"00f8ffc4928a685117583e2a38af8ebb65dcec2c\"\n",
+    "snapshot_download(repo_id=model_name, revision=commit_hash, cache_dir=local_model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67712dfc-a411-433b-bc67-9734a1686480",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### 如果是中国区，执行下面cell通过modelscope进行下载"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b3fa6b9-492d-477b-8a45-8b8e3bbfb6e8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!pip install modelscope -i https://pypi.tuna.tsinghua.edu.cn/simple -Uqq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dda7d753-6faf-46fc-a8c1-da9a3f1cd1db",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from modelscope.hub.snapshot_download import snapshot_download\n",
+    "from pathlib import Path\n",
+    "\n",
+    "local_model_path = Path(\"./bge-zh-model\")\n",
+    "\n",
+    "local_model_path.mkdir(exist_ok=True)\n",
+    "model_name = \"Xorbits/bge-large-zh-v1.5\"\n",
+    "commit_hash = \"v0.0.1\"\n",
+    "\n",
+    "snapshot_download(model_name, revision=commit_hash, cache_dir=local_model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6b61ad8-a8c2-48c2-8539-e7c1e2afe773",
+   "metadata": {},
+   "source": [
+    "### 2. 把模型拷贝到S3为后续部署做准备"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5e1873f4-1bfe-4146-8297-584e9ad76fc9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n",
+      "sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sagemaker\n",
+    "from sagemaker import image_uris\n",
+    "import boto3\n",
+    "import os\n",
+    "import time\n",
+    "import json\n",
+    "\n",
+    "role = sagemaker.get_execution_role()  # execution role for the endpoint\n",
+    "sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs\n",
+    "bucket = sess.default_bucket()  # bucket to house artifacts\n",
+    "\n",
+    "region = sess._region_name\n",
+    "account_id = sess.account_id()\n",
+    "\n",
+    "s3_client = boto3.client(\"s3\")\n",
+    "sm_client = boto3.client(\"sagemaker\")\n",
+    "smr_client = boto3.client(\"sagemaker-runtime\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "68394e44-4d51-48ae-adc1-d02f520a5d4d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "s3_model_prefix = \"LLM-RAG/workshop/bge-zh-model\"  # folder where model checkpoint will go\n",
+    "if region in ['cn-north-1', 'cn-northwest-1']:\n",
+    "    model_snapshot_path = list(local_model_path.glob(f\"**/Xorbits/*\"))[0]\n",
+    "else:\n",
+    "    model_snapshot_path = list(local_model_path.glob(\"**/snapshots/*\"))[0]\n",
+    "s3_code_prefix = \"LLM-RAG/workshop/bge_zh_deploy_code\"\n",
+    "print(f\"s3_code_prefix: {s3_code_prefix}\")\n",
+    "print(f\"model_snapshot_path: {model_snapshot_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0b9e177a-886d-4838-891e-2e612a3cbc9d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/config_sentence_transformers.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/config_sentence_transformers.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/README.md to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/README.md\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/.gitattributes to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/.gitattributes\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/sentence_bert_config.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/sentence_bert_config.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/special_tokens_map.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/special_tokens_map.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/modules.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/modules.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/tokenizer.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/tokenizer.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/1_Pooling/config.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/1_Pooling/config.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/config.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/config.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/tokenizer_config.json to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/tokenizer_config.json\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/vocab.txt to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/vocab.txt\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/model.safetensors to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/model.safetensors\n",
+      "upload: bge-zh-model/models--BAAI--bge-large-zh-v1.5/snapshots/00f8ffc4928a685117583e2a38af8ebb65dcec2c/pytorch_model.bin to s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/pytorch_model.bin\n"
+     ]
+    }
+   ],
+   "source": [
+    "!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59f35a6f-5988-42ec-87b0-de36eaebe41b",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### 3. 模型部署准备（entrypoint脚本，容器镜像，服务配置）"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "86daea77-a7ae-46b8-8800-212d07ce5605",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n"
+     ]
+    }
+   ],
+   "source": [
+    "s3_code_prefix = \"LLM-RAG/workshop/bge_zh_deploy_code\"\n",
+    "s3_model_prefix = \"LLM-RAG/workshop/bge-zh-model\" \n",
+    "inference_image_uri = (\n",
+    "    f\"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n",
+    "    \n",
+    ")\n",
+    "\n",
+    "#中国区需要替换为下面的image_uri\n",
+    "if region in ['cn-north-1', 'cn-northwest-1']:\n",
+    "    inference_image_uri = (\n",
+    "        f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n",
+    "    )\n",
+    "\n",
+    "print(f\"Image going to be used is ---- > {inference_image_uri}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "49435172-e6c5-492a-8dcb-43e3fffb0f5c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir -p bge_zh_deploy_code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "70990dd3-431e-4dd0-a494-d26ceb454945",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting bge_zh_deploy_code/model.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile bge_zh_deploy_code/model.py\n",
+    "from djl_python import Input, Output\n",
+    "import torch\n",
+    "import logging\n",
+    "import math\n",
+    "import os\n",
+    "from FlagEmbedding import FlagModel\n",
+    "\n",
+    "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n",
+    "print(f'--device={device}')\n",
+    "\n",
+    "def load_model(properties):\n",
+    "    tensor_parallel = properties[\"tensor_parallel_degree\"]\n",
+    "    model_location = properties['model_dir']\n",
+    "    if \"model_id\" in properties:\n",
+    "        model_location = properties['model_id']\n",
+    "    logging.info(f\"Loading model in {model_location}\")\n",
+    "\n",
+    "    model =  FlagModel(model_location)\n",
+    "    \n",
+    "    return model\n",
+    "\n",
+    "model = None\n",
+    "\n",
+    "def handle(inputs: Input):\n",
+    "    global model\n",
+    "    if not model:\n",
+    "        model = load_model(inputs.get_properties())\n",
+    "\n",
+    "    if inputs.is_empty():\n",
+    "        return None\n",
+    "    data = inputs.get_as_json()\n",
+    "    \n",
+    "    input_sentences = None\n",
+    "    inputs = data[\"inputs\"]\n",
+    "    if isinstance(inputs, list):\n",
+    "        input_sentences = inputs\n",
+    "    else:\n",
+    "        input_sentences =  [inputs]\n",
+    "        \n",
+    "    is_query = data[\"is_query\"]\n",
+    "    instruction = data[\"instruction\"]\n",
+    "    logging.info(f\"inputs: {input_sentences}\")\n",
+    "    logging.info(f\"is_query: {is_query}\")\n",
+    "    logging.info(f\"instruction: {instruction}\")\n",
+    "    \n",
+    "    if is_query and instruction:\n",
+    "        input_sentences = [ instruction + sent for sent in input_sentences ]\n",
+    "        \n",
+    "    sentence_embeddings =  model.encode(input_sentences)\n",
+    "        \n",
+    "    result = {\"sentence_embeddings\": sentence_embeddings}\n",
+    "    return Output().add_as_json(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "0f4c6d5f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "option.s3url ==> s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"option.s3url ==> s3://{bucket}/{s3_model_prefix}/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a1e1ecec-79cf-4ed4-bba1-95e2fe79daea",
+   "metadata": {},
+   "source": [
+    "#### Note: option.s3url 为SageMaker部署时使用的模型S3路径"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7b126565-66e2-4987-ac6b-e02f09070a65",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "if not os.path.exists(\"bge_zh_deploy_code\"):\n",
+    "    os.mkdir(\"bge_zh_deploy_code\")\n",
+    "\n",
+    "with open('bge_zh_deploy_code/serving.properties', 'w') as f:\n",
+    "    f.write(\"engine=Python\")\n",
+    "    f.write(\"\\n\")\n",
+    "    f.write(\"option.tensor_parallel_degree=1\")\n",
+    "    f.write(\"\\n\")\n",
+    "    f.write(f\"option.s3url=s3://{bucket}/{s3_model_prefix}/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e11b8f7-aebf-42a0-9a7f-31691059cc65",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### 【注意】下面这两个Cell，根据region，仅挑选一个执行，否则会产生问题"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1434f9a-f114-4f83-a103-04fde82cb307",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "#### 如果是中国region，执行下面这个cell，在requirements.txt中添加国内的pip镜像"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38bf548e-fb01-4951-b49f-15a91c61fb2e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%writefile bge_zh_deploy_code/requirements.txt\n",
+    "-i https://pypi.tuna.tsinghua.edu.cn/simple\n",
+    "transformers==4.28.1\n",
+    "FlagEmbedding"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2a35bbdf-c246-4e9e-ab7f-aac5b389ddf2",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "#### 如果是海外region，执行下面这个cell，无需额外添加pip镜像"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "928a7806-afc4-4ae7-9253-1c9dfabfed99",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting bge_zh_deploy_code/requirements.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile bge_zh_deploy_code/requirements.txt\n",
+    "transformers==4.28.1\n",
+    "FlagEmbedding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "ffe41472-c2cf-4bb5-99aa-84df76c629b3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bge_zh_deploy_code/\n",
+      "bge_zh_deploy_code/serving.properties\n",
+      "bge_zh_deploy_code/requirements.txt\n",
+      "bge_zh_deploy_code/model.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "!rm s2e_model.tar.gz\n",
+    "!cd bge_zh_deploy_code && rm -rf \".ipynb_checkpoints\"\n",
+    "!tar czvf s2e_model.tar.gz bge_zh_deploy_code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "1fabd7ce-b855-4569-857c-ad872662800b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge_zh_deploy_code/s2e_model.tar.gz\n"
+     ]
+    }
+   ],
+   "source": [
+    "s3_code_artifact = sess.upload_data(\"s2e_model.tar.gz\", bucket, s3_code_prefix)\n",
+    "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18fb01ed-6bd3-4880-a647-cfd71e692820",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### 4. 创建模型 & 创建endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e6209d24-8473-4256-93d3-02e4e144386b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bge-zh-15-2024-08-17-08-59-03-419\n",
+      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n",
+      "Created Model: arn:aws:sagemaker:us-east-1:687912291502:model/bge-zh-15-2024-08-17-08-59-03-419\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sagemaker.utils import name_from_base\n",
+    "import boto3\n",
+    "\n",
+    "model_name = name_from_base(\"bge-zh-15\") #Note: Need to specify model_name\n",
+    "print(model_name)\n",
+    "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n",
+    "\n",
+    "#'subnet-028ee65a5a6678649','subnet-06d32aebc5cf17ff5'\n",
+    "# 指定 VPC 配置\n",
+    "vpc_config = {\n",
+    "    'Subnets': ['subnet-02e6c2c6f55233d4a'],  # 替换为你的子网 ID\n",
+    "    'SecurityGroupIds': ['sg-05698097a0be6a59e','sg-0a433d4f351c3ceb7']  # 替换为你的安全组 ID\n",
+    "}\n",
+    "\n",
+    "create_model_response = sm_client.create_model(\n",
+    "    ModelName=model_name,\n",
+    "    ExecutionRoleArn=role,\n",
+    "    PrimaryContainer={\n",
+    "        \"Image\": inference_image_uri,\n",
+    "        \"ModelDataUrl\": s3_code_artifact\n",
+    "    },\n",
+    "    VpcConfig=vpc_config\n",
+    "    \n",
+    ")\n",
+    "model_arn = create_model_response[\"ModelArn\"]\n",
+    "\n",
+    "print(f\"Created Model: {model_arn}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "686abae8-5db7-4ebd-9fbf-5bd54f36c0ab",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:687912291502:endpoint-config/bge-zh-15-2024-08-17-08-59-03-419-config',\n",
+       " 'ResponseMetadata': {'RequestId': '7e25b335-b056-4441-81e8-a7a6333463a9',\n",
+       "  'HTTPStatusCode': 200,\n",
+       "  'HTTPHeaders': {'x-amzn-requestid': '7e25b335-b056-4441-81e8-a7a6333463a9',\n",
+       "   'content-type': 'application/x-amz-json-1.1',\n",
+       "   'content-length': '121',\n",
+       "   'date': 'Sat, 17 Aug 2024 08:59:04 GMT'},\n",
+       "  'RetryAttempts': 0}}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "endpoint_config_name = f\"{model_name}-config\"\n",
+    "endpoint_name = f\"{model_name}-endpoint\"\n",
+    "endpoint_name = \"bge-zh-15-2024-08-17-03-56-58-281-endpoint\"\n",
+    "\n",
+    "\n",
+    "endpoint_config_response = sm_client.create_endpoint_config(\n",
+    "    EndpointConfigName=endpoint_config_name,\n",
+    "    ProductionVariants=[\n",
+    "        {\n",
+    "            \"VariantName\": \"variant1\",\n",
+    "            \"ModelName\": model_name,\n",
+    "            \"InstanceType\": \"ml.g5.xlarge\",\n",
+    "            \"InitialInstanceCount\": 1,\n",
+    "            # \"VolumeSizeInGB\" : 400,\n",
+    "            # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n",
+    "            \"ContainerStartupHealthCheckTimeoutInSeconds\": 15*60,\n",
+    "        },\n",
+    "    ],\n",
+    "    \n",
+    ")\n",
+    "endpoint_config_response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "f4c1df06-ae4a-42e2-9695-da0afa9ad734",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Created Endpoint: arn:aws:sagemaker:us-east-1:687912291502:endpoint/bge-zh-15-2024-08-17-03-56-58-281-endpoint\n"
+     ]
+    }
+   ],
+   "source": [
+    "create_endpoint_response = sm_client.create_endpoint(\n",
+    "    EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n",
+    ")\n",
+    "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f85116e-9a73-480c-a067-96fdfa98695c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "d9c71240-6878-4fed-bf7d-2c1cf75f4ac5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Status: Creating\n",
+      "Status: Creating\n",
+      "Status: Creating\n",
+      "Status: Creating\n",
+      "Status: Creating\n",
+      "Status: Creating\n",
+      "Status: Creating\n",
+      "Status: InService\n",
+      "Arn: arn:aws:sagemaker:us-east-1:687912291502:endpoint/bge-zh-15-2024-08-17-03-56-58-281-endpoint\n",
+      "Status: InService\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "\n",
+    "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
+    "status = resp[\"EndpointStatus\"]\n",
+    "print(\"Status: \" + status)\n",
+    "\n",
+    "while status == \"Creating\":\n",
+    "    time.sleep(60)\n",
+    "    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
+    "    status = resp[\"EndpointStatus\"]\n",
+    "    print(\"Status: \" + status)\n",
+    "\n",
+    "print(\"Arn: \" + resp[\"EndpointArn\"])\n",
+    "print(\"Status: \" + status)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dddba20e-fc18-480d-9940-ae39695ac450",
+   "metadata": {},
+   "source": [
+    "### 5. 模型测试"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1f28db25-6996-440c-b004-14f96cfd982d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def get_vector_by_sm_endpoint(questions, sm_client, endpoint_name):\n",
+    "    parameters = {\n",
+    "    }\n",
+    "\n",
+    "    response_model = sm_client.invoke_endpoint(\n",
+    "        EndpointName=endpoint_name,\n",
+    "        Body=json.dumps(\n",
+    "            {\n",
+    "                \"inputs\": questions,\n",
+    "                \"is_query\": True,\n",
+    "                \"instruction\" :  \"Represent this sentence for searching relevant passages:\"\n",
+    "            }\n",
+    "        ),\n",
+    "        ContentType=\"application/json\",\n",
+    "    )\n",
+    "    # 中文instruction => 为这个句子生成表示以用于检索相关文章：\n",
+    "    json_str = response_model['Body'].read().decode('utf8')\n",
+    "    json_obj = json.loads(json_str)\n",
+    "    embeddings = json_obj['sentence_embeddings']\n",
+    "    return embeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52d4f56a-092e-4a6a-a920-48550ec9f20c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "prompts1 = [\"你好啊，大聪明\"]\n",
+    "\n",
+    "emb = get_vector_by_sm_endpoint(prompts1, smr_client, endpoint_name)\n",
+    "print(emb[0][:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "328d16bf-6f28-46cf-af31-f8c32017ca6a",
+   "metadata": {},
+   "source": [
+    "#### 清除模型Endpoint和config （如无需要，不要执行）"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa0d6623-236d-4a5d-8360-4b07f8d8d40c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!aws sagemaker delete-endpoint --endpoint-name bge-large-en-2023-08-16-09-58-49-900-endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36281bf3-b6f5-450f-b1b8-9f1285b1dad0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!aws sagemaker delete-endpoint-config --endpoint-config-name bge-large-en-2023-08-16-09-58-49-900-config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9700aff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!aws sagemaker delete-model --model-name bge-large-en-2023-08-16-09-58-49-900"
+   ]
+  }
+ ],
+ "metadata": {
+  "availableInstances": [
+   {
+    "_defaultOrder": 0,
+    "_isFastLaunch": true,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 4,
+    "name": "ml.t3.medium",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 1,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.t3.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 2,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.t3.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 3,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.t3.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 4,
+    "_isFastLaunch": true,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.m5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 5,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.m5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 6,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.m5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 7,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.m5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 8,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.m5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 9,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.m5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 10,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.m5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 11,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.m5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 12,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.m5d.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 13,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.m5d.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 14,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.m5d.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 15,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.m5d.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 16,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.m5d.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 17,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.m5d.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 18,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.m5d.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 19,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.m5d.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 20,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": true,
+    "memoryGiB": 0,
+    "name": "ml.geospatial.interactive",
+    "supportedImageNames": [
+     "sagemaker-geospatial-v1-0"
+    ],
+    "vcpuNum": 0
+   },
+   {
+    "_defaultOrder": 21,
+    "_isFastLaunch": true,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 4,
+    "name": "ml.c5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 22,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.c5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 23,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.c5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 24,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.c5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 25,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 72,
+    "name": "ml.c5.9xlarge",
+    "vcpuNum": 36
+   },
+   {
+    "_defaultOrder": 26,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 96,
+    "name": "ml.c5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 27,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 144,
+    "name": "ml.c5.18xlarge",
+    "vcpuNum": 72
+   },
+   {
+    "_defaultOrder": 28,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.c5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 29,
+    "_isFastLaunch": true,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.g4dn.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 30,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.g4dn.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 31,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.g4dn.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 32,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.g4dn.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 33,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.g4dn.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 34,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.g4dn.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 35,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 61,
+    "name": "ml.p3.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 36,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 244,
+    "name": "ml.p3.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 37,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 488,
+    "name": "ml.p3.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 38,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.p3dn.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 39,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.r5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 40,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.r5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 41,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.r5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 42,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.r5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 43,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.r5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 44,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.r5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 45,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 512,
+    "name": "ml.r5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 46,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.r5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 47,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.g5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 48,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.g5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 49,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.g5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 50,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.g5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 51,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.g5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 52,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.g5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 53,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.g5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 54,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.g5.48xlarge",
+    "vcpuNum": 192
+   },
+   {
+    "_defaultOrder": 55,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 1152,
+    "name": "ml.p4d.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 56,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 1152,
+    "name": "ml.p4de.24xlarge",
+    "vcpuNum": 96
+   }
+  ],
+  "instance_type": "ml.m5.large",
+  "kernelspec": {
+   "display_name": "conda_python3",
+   "language": "python",
+   "name": "conda_python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 79ea08ee519a002a49efdc86c8bca6b2f788daf3 Mon Sep 17 00:00:00 2001
From: tangqy <121102723@qq.com>
Date: Tue, 20 Aug 2024 18:54:17 +0800
Subject: [PATCH 3/6] update sagemaker bge embedding model

---
 application/nlq/business/vector_store.py | 17 +++++++++++------
 application/utils/llm.py                 |  4 +++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/application/nlq/business/vector_store.py b/application/nlq/business/vector_store.py
index 15dc6cb1..fcf5cf57 100644
--- a/application/nlq/business/vector_store.py
+++ b/application/nlq/business/vector_store.py
@@ -79,10 +79,11 @@ def add_sample(cls, profile_name, question, answer):
             embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,question,opensearch_info['sql_index'])
         else:
             embedding = cls.create_vector_embedding_with_bedrock(question)
-        has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
+        #has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
+        has_same_sample = False
         if has_same_sample:
             logger.info(f'delete sample sample entity: {question} to profile {profile_name}')
-        if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding):
+        if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding['vector_field']):
             logger.info('Sample added')
 
     @classmethod
@@ -92,10 +93,11 @@ def add_entity_sample(cls, profile_name, entity, comment):
             embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['ner_index'])
         else:
             embedding = cls.create_vector_embedding_with_bedrock(entity)
-        has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
+        #has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
+        has_same_sample = False
         if has_same_sample:
             logger.info(f'delete sample sample entity: {entity} to profile {profile_name}')
-        if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding):
+        if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding['vector_field']):
             logger.info('Sample added')
 
     @classmethod
@@ -105,10 +107,11 @@ def add_agent_cot_sample(cls, profile_name, entity, comment):
             embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['agent_index'])
         else:
             embedding = cls.create_vector_embedding_with_bedrock(entity)
-        has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
+        #has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
+        has_same_sample = False
         if has_same_sample:
             logger.info(f'delete agent sample sample query: {entity} to profile {profile_name}')
-        if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding):
+        if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding['vector_field']):
             logger.info('Sample added')
 
     @classmethod
@@ -139,6 +142,8 @@ def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name):
         )
         response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
         embeddings = response['sentence_embeddings'][0]
+        logger.info("embeddings to ingestion")
+        prinlogger.info(embeddings[:10])
         return {"_index": index_name, "text": text, "vector_field": embeddings}
 
     @classmethod
diff --git a/application/utils/llm.py b/application/utils/llm.py
index 6fe8a612..8c0099e2 100644
--- a/application/utils/llm.py
+++ b/application/utils/llm.py
@@ -232,7 +232,7 @@ def invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="LLM", with_
                 Body=body,
                 ContentType="application/json",
             )
-            response_body = json.loads(response.get('Body').read())
+            response_body = json.loads(response.get('Body').read().decode("utf8"))
             return response_body
 
 
@@ -579,6 +579,8 @@ def create_vector_embedding_with_sagemaker(endpoint_name, text, index_name):
     )
     response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
     embeddings = response['sentence_embeddings'][0]
+    logger.info("embeddings in llm.py")
+    logger.info(embeddings[:10])
     return {"_index": index_name, "text": text, "vector_field": embeddings}
 
 

From cb37a27297abcdb486e370ac2447cd237c9f2d7a Mon Sep 17 00:00:00 2001
From: tangqy <121102723@qq.com>
Date: Tue, 20 Aug 2024 21:04:09 +0800
Subject: [PATCH 4/6] update sagemaker bge embedding model

---
 application/nlq/data_access/opensearch.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/application/nlq/data_access/opensearch.py b/application/nlq/data_access/opensearch.py
index 69cbe080..dd1b60a8 100644
--- a/application/nlq/data_access/opensearch.py
+++ b/application/nlq/data_access/opensearch.py
@@ -3,8 +3,9 @@
 from opensearchpy import OpenSearch
 from opensearchpy.helpers import bulk
 
-from utils.llm import create_vector_embedding_with_bedrock
-
+from utils.llm import create_vector_embedding_with_bedrock,create_vector_embedding_with_sagemaker
+from utils.env_var import BEDROCK_REGION, AOS_HOST, AOS_PORT, AOS_USER, AOS_PASSWORD, opensearch_info, \
+    SAGEMAKER_ENDPOINT_EMBEDDING
 logger = logging.getLogger(__name__)
 
 def put_bulk_in_opensearch(list, client):
@@ -190,8 +191,14 @@ def delete_sample(self, index_name, profile_name, doc_id):
         return self.opensearch_client.delete(index=index_name, id=doc_id)
 
     def search_sample(self, profile_name, top_k, index_name, query):
-        records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
-        return self.search_sample_with_embedding(profile_name, top_k, index_name,  records_with_embedding['vector_field'])
+        if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
+            records_with_embedding = create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING, query, index_name)
+            return self.search_sample_with_embedding(profile_name, top_k, index_name,  records_with_embedding['vector_field'])
+        else:
+            records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
+            return self.search_sample_with_embedding(profile_name, top_k, index_name,  records_with_embedding['vector_field'])
+
+
 
 
     def search_sample_with_embedding(self, profile_name, top_k, index_name, query_embedding):

From 578e690e6d6a4ef003b45362279f1b260f76f650 Mon Sep 17 00:00:00 2001
From: tangqy <121102723@qq.com>
Date: Wed, 21 Aug 2024 15:17:42 +0800
Subject: [PATCH 5/6] update default password

---
 application/config_files/stauth_config.yaml | 2 +-
 source/resources/cdk-config.json            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/application/config_files/stauth_config.yaml b/application/config_files/stauth_config.yaml
index 53029e73..97f7cde2 100644
--- a/application/config_files/stauth_config.yaml
+++ b/application/config_files/stauth_config.yaml
@@ -5,7 +5,7 @@ credentials:
       failed_login_attempts: 0 # Will be managed automatically
       logged_in: False # Will be managed automatically
       name: AWS
-      password: $2b$12$NDQv5NLaWiVlNuzQYHwAo.tv.f.TuX1nbdoUZi44/Y3xv4I4QAfjy # Set the password following instructions in README
+      password: $2b$12$pP4Vi1ovItxf/22zYn1UFeYrA2IM/D7glGNoAd3TrY0Gr4QzdZSNC # Set the password following instructions in README
 cookie:
   expiry_days: 2
   key: some_signature_key # Must be string
diff --git a/source/resources/cdk-config.json b/source/resources/cdk-config.json
index 58f1b940..0aed55e4 100644
--- a/source/resources/cdk-config.json
+++ b/source/resources/cdk-config.json
@@ -3,7 +3,7 @@
     "vpc_id" : ""
   },
   "rds": {
-    "deploy": false
+    "deploy": true
   },
   "embedding": {
     "bedrock_embedding_name": "amazon.titan-embed-text-v1",
@@ -12,7 +12,7 @@
   "sagemaker": {
     "segamaker_endpoint_embedding" : "",
     "segamaker_endpoint_sql" : "",
-    "embedding_dimension": ""
+    "embedding_dimension": 1024
   },
   "opensearch": {
     "sql_index" : "uba",

From cd8b2e8c6fe83f0f8e076b1e6e9e11120f9e5a14 Mon Sep 17 00:00:00 2001
From: tangqy <121102723@qq.com>
Date: Wed, 21 Aug 2024 17:38:49 +0800
Subject: [PATCH 6/6] fix bug

---
 application/nlq/business/vector_store.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/nlq/business/vector_store.py b/application/nlq/business/vector_store.py
index fcf5cf57..cb9266e1 100644
--- a/application/nlq/business/vector_store.py
+++ b/application/nlq/business/vector_store.py
@@ -143,7 +143,7 @@ def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name):
         response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
         embeddings = response['sentence_embeddings'][0]
         logger.info("embeddings to ingestion")
-        prinlogger.info(embeddings[:10])
+        logger.info(embeddings[:10])
         return {"_index": index_name, "text": text, "vector_field": embeddings}
 
     @classmethod