From 4cde8d3467dacff7ba03074387f8e97317000b5a Mon Sep 17 00:00:00 2001 From: tangqy <121102723@qq.com> Date: Tue, 20 Aug 2024 09:16:02 +0800 Subject: [PATCH 1/6] update sagemaker bge embedding model --- application/.env.template | 2 +- application/nlq/business/vector_store.py | 30 +++++++++++------------- application/utils/llm.py | 9 +++---- source/resources/lib/ecs/ecs-stack.ts | 5 ++++ 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/application/.env.template b/application/.env.template index cb606724..62d52a55 100644 --- a/application/.env.template +++ b/application/.env.template @@ -21,7 +21,7 @@ RDS_REGION_NAME=us-west-2 AWS_DEFAULT_REGION=us-west-2 DYNAMODB_AWS_REGION=us-west-2 -EMBEDDING_DIMENSION=1536 +EMBEDDING_DIMENSION=1024 BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v1 # If you need to use ak/sk to access bedrock, please configure bedrock's ak/sk to Secrets Manager, Examples are as follows diff --git a/application/nlq/business/vector_store.py b/application/nlq/business/vector_store.py index b0f9577f..15dc6cb1 100644 --- a/application/nlq/business/vector_store.py +++ b/application/nlq/business/vector_store.py @@ -76,7 +76,7 @@ def get_all_agent_cot_samples(cls, profile_name): def add_sample(cls, profile_name, question, answer): logger.info(f'add sample question: {question} to profile {profile_name}') if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "": - embedding = cls.create_vector_embedding_with_sagemaker(question) + embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,question,opensearch_info['sql_index']) else: embedding = cls.create_vector_embedding_with_bedrock(question) has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding) @@ -89,7 +89,7 @@ def add_sample(cls, profile_name, question, answer): def add_entity_sample(cls, profile_name, entity, comment): logger.info(f'add sample entity: {entity} to profile {profile_name}') if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "": - embedding = cls.create_vector_embedding_with_sagemaker(entity) + embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['ner_index']) else: embedding = cls.create_vector_embedding_with_bedrock(entity) has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding) @@ -102,7 +102,7 @@ def add_entity_sample(cls, profile_name, entity, comment): def add_agent_cot_sample(cls, profile_name, entity, comment): logger.info(f'add agent sample query: {entity} to profile {profile_name}') if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "": - embedding = cls.create_vector_embedding_with_sagemaker(entity) + embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['agent_index']) else: embedding = cls.create_vector_embedding_with_bedrock(entity) has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding) @@ -129,19 +129,17 @@ def create_vector_embedding_with_bedrock(cls, text): return embedding @classmethod - def create_vector_embedding_with_sagemaker(cls, text): - try: - body = json.dumps( - { - "inputs": text, - "is_query": True - } - ) - response = invoke_model_sagemaker_endpoint(SAGEMAKER_ENDPOINT_EMBEDDING, body, model_type="embedding") - embeddings = response[0] - return embeddings - except Exception as e: - logger.error(f'create_vector_embedding_with_sagemaker is error {e}') + def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name): + body=json.dumps( + { + "inputs": text, + "is_query": True, + "instruction" : "Represent this sentence for searching relevant passages:" + } + ) + response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings") + embeddings = response['sentence_embeddings'][0] + return {"_index": index_name, "text": text, "vector_field": embeddings} @classmethod def delete_sample(cls, profile_name, doc_id): diff --git a/application/utils/llm.py b/application/utils/llm.py index 9eb542c1..6fe8a612 100644 --- a/application/utils/llm.py +++ b/application/utils/llm.py @@ -570,14 +570,15 @@ def create_vector_embedding_with_bedrock(text, index_name): def create_vector_embedding_with_sagemaker(endpoint_name, text, index_name): - body = json.dumps( + body=json.dumps( { "inputs": text, - "is_query": True + "is_query": True, + "instruction" : "Represent this sentence for searching relevant passages:" } ) - response = invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="embedding") - embeddings = response[0] + response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings") + embeddings = response['sentence_embeddings'][0] return {"_index": index_name, "text": text, "vector_field": embeddings} diff --git a/source/resources/lib/ecs/ecs-stack.ts b/source/resources/lib/ecs/ecs-stack.ts index 70ce5e2f..d85d7de4 100644 --- a/source/resources/lib/ecs/ecs-stack.ts +++ b/source/resources/lib/ecs/ecs-stack.ts @@ -192,6 +192,9 @@ export class ECSStack extends cdk.Stack { }), }); + containerStreamlit.addEnvironment('EMBEDDING_DIMENSION', '1024'); + containerStreamlit.addEnvironment('SAGEMAKER_ENDPOINT_EMBEDDING', 'bge-zh-15-2024-08-17-03-56-58-281-endpoint'); + containerStreamlit.addEnvironment('OPENSEARCH_TYPE', 'service'); containerStreamlit.addEnvironment('AOS_INDEX', 'uba'); containerStreamlit.addEnvironment('AOS_INDEX_NER', 'uba_ner'); @@ -242,6 +245,8 @@ export class ECSStack extends cdk.Stack { }), }); + containerAPI.addEnvironment('EMBEDDING_DIMENSION', '1024'); + containerAPI.addEnvironment('SAGEMAKER_ENDPOINT_EMBEDDING', 'bge-zh-15-2024-08-17-03-56-58-281-endpoint'); containerAPI.addEnvironment('OPENSEARCH_TYPE', 'service'); containerAPI.addEnvironment('AOS_INDEX', 'uba'); containerAPI.addEnvironment('AOS_INDEX_NER', 'uba_ner'); From fcb842581a44c898435e0e9c12d5ed7ab938b2f7 Mon Sep 17 00:00:00 2001 From: tangqy <121102723@qq.com> Date: Tue, 20 Aug 2024 10:50:30 +0800 Subject: [PATCH 2/6] update sagemaker bge embedding model --- application/.env.template | 2 +- bge_zh_deploy.ipynb | 1570 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1571 insertions(+), 1 deletion(-) create mode 100644 bge_zh_deploy.ipynb diff --git a/application/.env.template b/application/.env.template index 62d52a55..6e7e61c1 100644 --- a/application/.env.template +++ b/application/.env.template @@ -32,4 +32,4 @@ BEDROCK_SECRETS_AK_SK= OPENSEARCH_SECRETS_URL_HOST=opensearch-host-url OPENSEARCH_SECRETS_USERNAME_PASSWORD=opensearch-master-user -# SAGEMAKER_ENDPOINT_EMBEDDING= \ No newline at end of file +SAGEMAKER_ENDPOINT_EMBEDDING=bge-zh-15-2024-08-17-03-56-58-281-endpoint \ No newline at end of file diff --git a/bge_zh_deploy.ipynb b/bge_zh_deploy.ipynb new file mode 100644 index 00000000..9d867cab --- /dev/null +++ b/bge_zh_deploy.ipynb @@ -0,0 +1,1570 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7060c891-cebd-4011-b350-b7d1e70b40b2", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### 1. 安装HuggingFace 并下载模型到本地" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9f413314-c410-43d3-bb3a-ba0aa18ec1be", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!pip install huggingface-hub -Uqq\n", + "!pip install -Uqq sagemaker" + ] + }, + { + "cell_type": "markdown", + "id": "df3b0a4b-f166-4f1a-a7cc-9c7277c68173", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### 如果是外海region,执行下面cell通过huggingface_hub下载, 否则跳过" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "be112a00-cbef-4387-b0d7-80e5e7b7030d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "687f1df2c88843d088aad7e22643a96e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 13 files: 0%| | 0/13 [00:00 763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n" + ] + } + ], + "source": [ + "s3_code_prefix = \"LLM-RAG/workshop/bge_zh_deploy_code\"\n", + "s3_model_prefix = \"LLM-RAG/workshop/bge-zh-model\" \n", + "inference_image_uri = (\n", + " f\"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n", + " \n", + ")\n", + "\n", + "#中国区需要替换为下面的image_uri\n", + "if region in ['cn-north-1', 'cn-northwest-1']:\n", + " inference_image_uri = (\n", + " f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n", + " )\n", + "\n", + "print(f\"Image going to be used is ---- > {inference_image_uri}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "49435172-e6c5-492a-8dcb-43e3fffb0f5c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!mkdir -p bge_zh_deploy_code" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "70990dd3-431e-4dd0-a494-d26ceb454945", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting bge_zh_deploy_code/model.py\n" + ] + } + ], + "source": [ + "%%writefile bge_zh_deploy_code/model.py\n", + "from djl_python import Input, Output\n", + "import torch\n", + "import logging\n", + "import math\n", + "import os\n", + "from FlagEmbedding import FlagModel\n", + "\n", + "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", + "print(f'--device={device}')\n", + "\n", + "def load_model(properties):\n", + " tensor_parallel = properties[\"tensor_parallel_degree\"]\n", + " model_location = properties['model_dir']\n", + " if \"model_id\" in properties:\n", + " model_location = properties['model_id']\n", + " logging.info(f\"Loading model in {model_location}\")\n", + "\n", + " model = FlagModel(model_location)\n", + " \n", + " return model\n", + "\n", + "model = None\n", + "\n", + "def handle(inputs: Input):\n", + " global model\n", + " if not model:\n", + " model = load_model(inputs.get_properties())\n", + "\n", + " if inputs.is_empty():\n", + " return None\n", + " data = inputs.get_as_json()\n", + " \n", + " input_sentences = None\n", + " inputs = data[\"inputs\"]\n", + " if isinstance(inputs, list):\n", + " input_sentences = inputs\n", + " else:\n", + " input_sentences = [inputs]\n", + " \n", + " is_query = data[\"is_query\"]\n", + " instruction = data[\"instruction\"]\n", + " logging.info(f\"inputs: {input_sentences}\")\n", + " logging.info(f\"is_query: {is_query}\")\n", + " logging.info(f\"instruction: {instruction}\")\n", + " \n", + " if is_query and instruction:\n", + " input_sentences = [ instruction + sent for sent in input_sentences ]\n", + " \n", + " sentence_embeddings = model.encode(input_sentences)\n", + " \n", + " result = {\"sentence_embeddings\": sentence_embeddings}\n", + " return Output().add_as_json(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0f4c6d5f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "option.s3url ==> s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge-zh-model/\n" + ] + } + ], + "source": [ + "print(f\"option.s3url ==> s3://{bucket}/{s3_model_prefix}/\")" + ] + }, + { + "cell_type": "markdown", + "id": "a1e1ecec-79cf-4ed4-bba1-95e2fe79daea", + "metadata": {}, + "source": [ + "#### Note: option.s3url 为SageMaker部署时使用的模型S3路径" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7b126565-66e2-4987-ac6b-e02f09070a65", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "if not os.path.exists(\"bge_zh_deploy_code\"):\n", + " os.mkdir(\"bge_zh_deploy_code\")\n", + "\n", + "with open('bge_zh_deploy_code/serving.properties', 'w') as f:\n", + " f.write(\"engine=Python\")\n", + " f.write(\"\\n\")\n", + " f.write(\"option.tensor_parallel_degree=1\")\n", + " f.write(\"\\n\")\n", + " f.write(f\"option.s3url=s3://{bucket}/{s3_model_prefix}/\")" + ] + }, + { + "cell_type": "markdown", + "id": "5e11b8f7-aebf-42a0-9a7f-31691059cc65", + "metadata": { + "tags": [] + }, + "source": [ + "### 【注意】下面这两个Cell,根据region,仅挑选一个执行,否则会产生问题" + ] + }, + { + "cell_type": "markdown", + "id": "e1434f9a-f114-4f83-a103-04fde82cb307", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "#### 如果是中国region,执行下面这个cell,在requirements.txt中添加国内的pip镜像" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38bf548e-fb01-4951-b49f-15a91c61fb2e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%writefile bge_zh_deploy_code/requirements.txt\n", + "-i https://pypi.tuna.tsinghua.edu.cn/simple\n", + "transformers==4.28.1\n", + "FlagEmbedding" + ] + }, + { + "cell_type": "markdown", + "id": "2a35bbdf-c246-4e9e-ab7f-aac5b389ddf2", + "metadata": { + "tags": [] + }, + "source": [ + "#### 如果是海外region,执行下面这个cell,无需额外添加pip镜像" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "928a7806-afc4-4ae7-9253-1c9dfabfed99", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting bge_zh_deploy_code/requirements.txt\n" + ] + } + ], + "source": [ + "%%writefile bge_zh_deploy_code/requirements.txt\n", + "transformers==4.28.1\n", + "FlagEmbedding" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ffe41472-c2cf-4bb5-99aa-84df76c629b3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bge_zh_deploy_code/\n", + "bge_zh_deploy_code/serving.properties\n", + "bge_zh_deploy_code/requirements.txt\n", + "bge_zh_deploy_code/model.py\n" + ] + } + ], + "source": [ + "!rm s2e_model.tar.gz\n", + "!cd bge_zh_deploy_code && rm -rf \".ipynb_checkpoints\"\n", + "!tar czvf s2e_model.tar.gz bge_zh_deploy_code" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1fabd7ce-b855-4569-857c-ad872662800b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-1-687912291502/LLM-RAG/workshop/bge_zh_deploy_code/s2e_model.tar.gz\n" + ] + } + ], + "source": [ + "s3_code_artifact = sess.upload_data(\"s2e_model.tar.gz\", bucket, s3_code_prefix)\n", + "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")" + ] + }, + { + "cell_type": "markdown", + "id": "18fb01ed-6bd3-4880-a647-cfd71e692820", + "metadata": { + "tags": [] + }, + "source": [ + "### 4. 创建模型 & 创建endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e6209d24-8473-4256-93d3-02e4e144386b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bge-zh-15-2024-08-17-08-59-03-419\n", + "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n", + "Created Model: arn:aws:sagemaker:us-east-1:687912291502:model/bge-zh-15-2024-08-17-08-59-03-419\n" + ] + } + ], + "source": [ + "from sagemaker.utils import name_from_base\n", + "import boto3\n", + "\n", + "model_name = name_from_base(\"bge-zh-15\") #Note: Need to specify model_name\n", + "print(model_name)\n", + "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n", + "\n", + "#'subnet-028ee65a5a6678649','subnet-06d32aebc5cf17ff5'\n", + "# 指定 VPC 配置\n", + "vpc_config = {\n", + " 'Subnets': ['subnet-02e6c2c6f55233d4a'], # 替换为你的子网 ID\n", + " 'SecurityGroupIds': ['sg-05698097a0be6a59e','sg-0a433d4f351c3ceb7'] # 替换为你的安全组 ID\n", + "}\n", + "\n", + "create_model_response = sm_client.create_model(\n", + " ModelName=model_name,\n", + " ExecutionRoleArn=role,\n", + " PrimaryContainer={\n", + " \"Image\": inference_image_uri,\n", + " \"ModelDataUrl\": s3_code_artifact\n", + " },\n", + " VpcConfig=vpc_config\n", + " \n", + ")\n", + "model_arn = create_model_response[\"ModelArn\"]\n", + "\n", + "print(f\"Created Model: {model_arn}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "686abae8-5db7-4ebd-9fbf-5bd54f36c0ab", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:687912291502:endpoint-config/bge-zh-15-2024-08-17-08-59-03-419-config',\n", + " 'ResponseMetadata': {'RequestId': '7e25b335-b056-4441-81e8-a7a6333463a9',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '7e25b335-b056-4441-81e8-a7a6333463a9',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '121',\n", + " 'date': 'Sat, 17 Aug 2024 08:59:04 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "endpoint_config_name = f\"{model_name}-config\"\n", + "endpoint_name = f\"{model_name}-endpoint\"\n", + "endpoint_name = \"bge-zh-15-2024-08-17-03-56-58-281-endpoint\"\n", + "\n", + "\n", + "endpoint_config_response = sm_client.create_endpoint_config(\n", + " EndpointConfigName=endpoint_config_name,\n", + " ProductionVariants=[\n", + " {\n", + " \"VariantName\": \"variant1\",\n", + " \"ModelName\": model_name,\n", + " \"InstanceType\": \"ml.g5.xlarge\",\n", + " \"InitialInstanceCount\": 1,\n", + " # \"VolumeSizeInGB\" : 400,\n", + " # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n", + " \"ContainerStartupHealthCheckTimeoutInSeconds\": 15*60,\n", + " },\n", + " ],\n", + " \n", + ")\n", + "endpoint_config_response" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f4c1df06-ae4a-42e2-9695-da0afa9ad734", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created Endpoint: arn:aws:sagemaker:us-east-1:687912291502:endpoint/bge-zh-15-2024-08-17-03-56-58-281-endpoint\n" + ] + } + ], + "source": [ + "create_endpoint_response = sm_client.create_endpoint(\n", + " EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n", + ")\n", + "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f85116e-9a73-480c-a067-96fdfa98695c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d9c71240-6878-4fed-bf7d-2c1cf75f4ac5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status: Creating\n", + "Status: Creating\n", + "Status: Creating\n", + "Status: Creating\n", + "Status: Creating\n", + "Status: Creating\n", + "Status: Creating\n", + "Status: InService\n", + "Arn: arn:aws:sagemaker:us-east-1:687912291502:endpoint/bge-zh-15-2024-08-17-03-56-58-281-endpoint\n", + "Status: InService\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n", + "status = resp[\"EndpointStatus\"]\n", + "print(\"Status: \" + status)\n", + "\n", + "while status == \"Creating\":\n", + " time.sleep(60)\n", + " resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n", + " status = resp[\"EndpointStatus\"]\n", + " print(\"Status: \" + status)\n", + "\n", + "print(\"Arn: \" + resp[\"EndpointArn\"])\n", + "print(\"Status: \" + status)" + ] + }, + { + "cell_type": "markdown", + "id": "dddba20e-fc18-480d-9940-ae39695ac450", + "metadata": {}, + "source": [ + "### 5. 模型测试" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f28db25-6996-440c-b004-14f96cfd982d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_vector_by_sm_endpoint(questions, sm_client, endpoint_name):\n", + " parameters = {\n", + " }\n", + "\n", + " response_model = sm_client.invoke_endpoint(\n", + " EndpointName=endpoint_name,\n", + " Body=json.dumps(\n", + " {\n", + " \"inputs\": questions,\n", + " \"is_query\": True,\n", + " \"instruction\" : \"Represent this sentence for searching relevant passages:\"\n", + " }\n", + " ),\n", + " ContentType=\"application/json\",\n", + " )\n", + " # 中文instruction => 为这个句子生成表示以用于检索相关文章:\n", + " json_str = response_model['Body'].read().decode('utf8')\n", + " json_obj = json.loads(json_str)\n", + " embeddings = json_obj['sentence_embeddings']\n", + " return embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52d4f56a-092e-4a6a-a920-48550ec9f20c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "prompts1 = [\"你好啊,大聪明\"]\n", + "\n", + "emb = get_vector_by_sm_endpoint(prompts1, smr_client, endpoint_name)\n", + "print(emb[0][:10])" + ] + }, + { + "cell_type": "markdown", + "id": "328d16bf-6f28-46cf-af31-f8c32017ca6a", + "metadata": {}, + "source": [ + "#### 清除模型Endpoint和config (如无需要,不要执行)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa0d6623-236d-4a5d-8360-4b07f8d8d40c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!aws sagemaker delete-endpoint --endpoint-name bge-large-en-2023-08-16-09-58-49-900-endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36281bf3-b6f5-450f-b1b8-9f1285b1dad0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!aws sagemaker delete-endpoint-config --endpoint-config-name bge-large-en-2023-08-16-09-58-49-900-config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9700aff", + "metadata": {}, + "outputs": [], + "source": [ + "!aws sagemaker delete-model --model-name bge-large-en-2023-08-16-09-58-49-900" + ] + } + ], + "metadata": { + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + } + ], + "instance_type": "ml.m5.large", + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 79ea08ee519a002a49efdc86c8bca6b2f788daf3 Mon Sep 17 00:00:00 2001 From: tangqy <121102723@qq.com> Date: Tue, 20 Aug 2024 18:54:17 +0800 Subject: [PATCH 3/6] update sagemaker bge embedding model --- application/nlq/business/vector_store.py | 17 +++++++++++------ application/utils/llm.py | 4 +++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/application/nlq/business/vector_store.py b/application/nlq/business/vector_store.py index 15dc6cb1..fcf5cf57 100644 --- a/application/nlq/business/vector_store.py +++ b/application/nlq/business/vector_store.py @@ -79,10 +79,11 @@ def add_sample(cls, profile_name, question, answer): embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,question,opensearch_info['sql_index']) else: embedding = cls.create_vector_embedding_with_bedrock(question) - has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding) + #has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding) + has_same_sample = False if has_same_sample: logger.info(f'delete sample sample entity: {question} to profile {profile_name}') - if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding): + if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding['vector_field']): logger.info('Sample added') @classmethod @@ -92,10 +93,11 @@ def add_entity_sample(cls, profile_name, entity, comment): embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['ner_index']) else: embedding = cls.create_vector_embedding_with_bedrock(entity) - has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding) + #has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding) + has_same_sample = False if has_same_sample: logger.info(f'delete sample sample entity: {entity} to profile {profile_name}') - if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding): + if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding['vector_field']): logger.info('Sample added') @classmethod @@ -105,10 +107,11 @@ def add_agent_cot_sample(cls, profile_name, entity, comment): embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['agent_index']) else: embedding = cls.create_vector_embedding_with_bedrock(entity) - has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding) + #has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding) + has_same_sample = False if has_same_sample: logger.info(f'delete agent sample sample query: {entity} to profile {profile_name}') - if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding): + if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding['vector_field']): logger.info('Sample added') @classmethod @@ -139,6 +142,8 @@ def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name): ) response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings") embeddings = response['sentence_embeddings'][0] + logger.info("embeddings to ingestion") + prinlogger.info(embeddings[:10]) return {"_index": index_name, "text": text, "vector_field": embeddings} @classmethod diff --git a/application/utils/llm.py b/application/utils/llm.py index 6fe8a612..8c0099e2 100644 --- a/application/utils/llm.py +++ b/application/utils/llm.py @@ -232,7 +232,7 @@ def invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="LLM", with_ Body=body, ContentType="application/json", ) - response_body = json.loads(response.get('Body').read()) + response_body = json.loads(response.get('Body').read().decode("utf8")) return response_body @@ -579,6 +579,8 @@ def create_vector_embedding_with_sagemaker(endpoint_name, text, index_name): ) response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings") embeddings = response['sentence_embeddings'][0] + logger.info("embeddings in llm.py") + logger.info(embeddings[:10]) return {"_index": index_name, "text": text, "vector_field": embeddings} From cb37a27297abcdb486e370ac2447cd237c9f2d7a Mon Sep 17 00:00:00 2001 From: tangqy <121102723@qq.com> Date: Tue, 20 Aug 2024 21:04:09 +0800 Subject: [PATCH 4/6] update sagemaker bge embedding model --- application/nlq/data_access/opensearch.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/application/nlq/data_access/opensearch.py b/application/nlq/data_access/opensearch.py index 69cbe080..dd1b60a8 100644 --- a/application/nlq/data_access/opensearch.py +++ b/application/nlq/data_access/opensearch.py @@ -3,8 +3,9 @@ from opensearchpy import OpenSearch from opensearchpy.helpers import bulk -from utils.llm import create_vector_embedding_with_bedrock - +from utils.llm import create_vector_embedding_with_bedrock,create_vector_embedding_with_sagemaker +from utils.env_var import BEDROCK_REGION, AOS_HOST, AOS_PORT, AOS_USER, AOS_PASSWORD, opensearch_info, \ + SAGEMAKER_ENDPOINT_EMBEDDING logger = logging.getLogger(__name__) def put_bulk_in_opensearch(list, client): @@ -190,8 +191,14 @@ def delete_sample(self, index_name, profile_name, doc_id): return self.opensearch_client.delete(index=index_name, id=doc_id) def search_sample(self, profile_name, top_k, index_name, query): - records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name) - return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field']) + if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "": + records_with_embedding = create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING, query, index_name) + return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field']) + else: + records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name) + return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field']) + + def search_sample_with_embedding(self, profile_name, top_k, index_name, query_embedding): From 578e690e6d6a4ef003b45362279f1b260f76f650 Mon Sep 17 00:00:00 2001 From: tangqy <121102723@qq.com> Date: Wed, 21 Aug 2024 15:17:42 +0800 Subject: [PATCH 5/6] update default password --- application/config_files/stauth_config.yaml | 2 +- source/resources/cdk-config.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/application/config_files/stauth_config.yaml b/application/config_files/stauth_config.yaml index 53029e73..97f7cde2 100644 --- a/application/config_files/stauth_config.yaml +++ b/application/config_files/stauth_config.yaml @@ -5,7 +5,7 @@ credentials: failed_login_attempts: 0 # Will be managed automatically logged_in: False # Will be managed automatically name: AWS - password: $2b$12$NDQv5NLaWiVlNuzQYHwAo.tv.f.TuX1nbdoUZi44/Y3xv4I4QAfjy # Set the password following instructions in README + password: $2b$12$pP4Vi1ovItxf/22zYn1UFeYrA2IM/D7glGNoAd3TrY0Gr4QzdZSNC # Set the password following instructions in README cookie: expiry_days: 2 key: some_signature_key # Must be string diff --git a/source/resources/cdk-config.json b/source/resources/cdk-config.json index 58f1b940..0aed55e4 100644 --- a/source/resources/cdk-config.json +++ b/source/resources/cdk-config.json @@ -3,7 +3,7 @@ "vpc_id" : "" }, "rds": { - "deploy": false + "deploy": true }, "embedding": { "bedrock_embedding_name": "amazon.titan-embed-text-v1", @@ -12,7 +12,7 @@ "sagemaker": { "segamaker_endpoint_embedding" : "", "segamaker_endpoint_sql" : "", - "embedding_dimension": "" + "embedding_dimension": 1024 }, "opensearch": { "sql_index" : "uba", From cd8b2e8c6fe83f0f8e076b1e6e9e11120f9e5a14 Mon Sep 17 00:00:00 2001 From: tangqy <121102723@qq.com> Date: Wed, 21 Aug 2024 17:38:49 +0800 Subject: [PATCH 6/6] fix bug --- application/nlq/business/vector_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/nlq/business/vector_store.py b/application/nlq/business/vector_store.py index fcf5cf57..cb9266e1 100644 --- a/application/nlq/business/vector_store.py +++ b/application/nlq/business/vector_store.py @@ -143,7 +143,7 @@ def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name): response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings") embeddings = response['sentence_embeddings'][0] logger.info("embeddings to ingestion") - prinlogger.info(embeddings[:10]) + logger.info(embeddings[:10]) return {"_index": index_name, "text": text, "vector_field": embeddings} @classmethod