Skip to content

V1.5.0 #324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: v1.5.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions application/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RDS_REGION_NAME=us-west-2
AWS_DEFAULT_REGION=us-west-2
DYNAMODB_AWS_REGION=us-west-2

EMBEDDING_DIMENSION=1536
EMBEDDING_DIMENSION=1024
BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v1

# If you need to use ak/sk to access bedrock, please configure bedrock's ak/sk to Secrets Manager, Examples are as follows
Expand All @@ -32,4 +32,4 @@ BEDROCK_SECRETS_AK_SK=
OPENSEARCH_SECRETS_URL_HOST=opensearch-host-url
OPENSEARCH_SECRETS_USERNAME_PASSWORD=opensearch-master-user

# SAGEMAKER_ENDPOINT_EMBEDDING=
SAGEMAKER_ENDPOINT_EMBEDDING=bge-zh-15-2024-08-17-03-56-58-281-endpoint
2 changes: 1 addition & 1 deletion application/config_files/stauth_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ credentials:
failed_login_attempts: 0 # Will be managed automatically
logged_in: False # Will be managed automatically
name: AWS
password: $2b$12$NDQv5NLaWiVlNuzQYHwAo.tv.f.TuX1nbdoUZi44/Y3xv4I4QAfjy # Set the password following instructions in README
password: $2b$12$pP4Vi1ovItxf/22zYn1UFeYrA2IM/D7glGNoAd3TrY0Gr4QzdZSNC # Set the password following instructions in README
cookie:
expiry_days: 2
key: some_signature_key # Must be string
Expand Down
47 changes: 25 additions & 22 deletions application/nlq/business/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,39 +76,42 @@ def get_all_agent_cot_samples(cls, profile_name):
def add_sample(cls, profile_name, question, answer):
logger.info(f'add sample question: {question} to profile {profile_name}')
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
embedding = cls.create_vector_embedding_with_sagemaker(question)
embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,question,opensearch_info['sql_index'])
else:
embedding = cls.create_vector_embedding_with_bedrock(question)
has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
#has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['sql_index'], embedding)
has_same_sample = False
if has_same_sample:
logger.info(f'delete sample sample entity: {question} to profile {profile_name}')
if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding):
if cls.opensearch_dao.add_sample(opensearch_info['sql_index'], profile_name, question, answer, embedding['vector_field']):
logger.info('Sample added')

@classmethod
def add_entity_sample(cls, profile_name, entity, comment):
logger.info(f'add sample entity: {entity} to profile {profile_name}')
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
embedding = cls.create_vector_embedding_with_sagemaker(entity)
embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['ner_index'])
else:
embedding = cls.create_vector_embedding_with_bedrock(entity)
has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
#has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['ner_index'], embedding)
has_same_sample = False
if has_same_sample:
logger.info(f'delete sample sample entity: {entity} to profile {profile_name}')
if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding):
if cls.opensearch_dao.add_entity_sample(opensearch_info['ner_index'], profile_name, entity, comment, embedding['vector_field']):
logger.info('Sample added')

@classmethod
def add_agent_cot_sample(cls, profile_name, entity, comment):
logger.info(f'add agent sample query: {entity} to profile {profile_name}')
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
embedding = cls.create_vector_embedding_with_sagemaker(entity)
embedding = cls.create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING,entity,opensearch_info['agent_index'])
else:
embedding = cls.create_vector_embedding_with_bedrock(entity)
has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
#has_same_sample = cls.search_same_query(profile_name, 1, opensearch_info['agent_index'], embedding)
has_same_sample = False
if has_same_sample:
logger.info(f'delete agent sample sample query: {entity} to profile {profile_name}')
if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding):
if cls.opensearch_dao.add_agent_cot_sample(opensearch_info['agent_index'], profile_name, entity, comment, embedding['vector_field']):
logger.info('Sample added')

@classmethod
Expand All @@ -129,19 +132,19 @@ def create_vector_embedding_with_bedrock(cls, text):
return embedding

@classmethod
def create_vector_embedding_with_sagemaker(cls, text):
try:
body = json.dumps(
{
"inputs": text,
"is_query": True
}
)
response = invoke_model_sagemaker_endpoint(SAGEMAKER_ENDPOINT_EMBEDDING, body, model_type="embedding")
embeddings = response[0]
return embeddings
except Exception as e:
logger.error(f'create_vector_embedding_with_sagemaker is error {e}')
def create_vector_embedding_with_sagemaker(cls,endpoint_name, text, index_name):
body=json.dumps(
{
"inputs": text,
"is_query": True,
"instruction" : "Represent this sentence for searching relevant passages:"
}
)
response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
embeddings = response['sentence_embeddings'][0]
logger.info("embeddings to ingestion")
logger.info(embeddings[:10])
return {"_index": index_name, "text": text, "vector_field": embeddings}

@classmethod
def delete_sample(cls, profile_name, doc_id):
Expand Down
15 changes: 11 additions & 4 deletions application/nlq/data_access/opensearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk

from utils.llm import create_vector_embedding_with_bedrock

from utils.llm import create_vector_embedding_with_bedrock,create_vector_embedding_with_sagemaker
from utils.env_var import BEDROCK_REGION, AOS_HOST, AOS_PORT, AOS_USER, AOS_PASSWORD, opensearch_info, \
SAGEMAKER_ENDPOINT_EMBEDDING
logger = logging.getLogger(__name__)

def put_bulk_in_opensearch(list, client):
Expand Down Expand Up @@ -190,8 +191,14 @@ def delete_sample(self, index_name, profile_name, doc_id):
return self.opensearch_client.delete(index=index_name, id=doc_id)

def search_sample(self, profile_name, top_k, index_name, query):
records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])
if SAGEMAKER_ENDPOINT_EMBEDDING is not None and SAGEMAKER_ENDPOINT_EMBEDDING != "":
records_with_embedding = create_vector_embedding_with_sagemaker(SAGEMAKER_ENDPOINT_EMBEDDING, query, index_name)
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])
else:
records_with_embedding = create_vector_embedding_with_bedrock(query, index_name=index_name)
return self.search_sample_with_embedding(profile_name, top_k, index_name, records_with_embedding['vector_field'])




def search_sample_with_embedding(self, profile_name, top_k, index_name, query_embedding):
Expand Down
13 changes: 8 additions & 5 deletions application/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="LLM", with_
Body=body,
ContentType="application/json",
)
response_body = json.loads(response.get('Body').read())
response_body = json.loads(response.get('Body').read().decode("utf8"))
return response_body


Expand Down Expand Up @@ -570,14 +570,17 @@ def create_vector_embedding_with_bedrock(text, index_name):


def create_vector_embedding_with_sagemaker(endpoint_name, text, index_name):
body = json.dumps(
body=json.dumps(
{
"inputs": text,
"is_query": True
"is_query": True,
"instruction" : "Represent this sentence for searching relevant passages:"
}
)
response = invoke_model_sagemaker_endpoint(endpoint_name, body, model_type="embedding")
embeddings = response[0]
response = invoke_model_sagemaker_endpoint(endpoint_name, body, "embeddings")
embeddings = response['sentence_embeddings'][0]
logger.info("embeddings in llm.py")
logger.info(embeddings[:10])
return {"_index": index_name, "text": text, "vector_field": embeddings}


Expand Down
Loading