diff --git a/vector_store_integration/RAG_USING_WEAVIATE.ipynb b/vector_store_integration/RAG_USING_WEAVIATE.ipynb
new file mode 100644
index 0000000..8beb4ad
--- /dev/null
+++ b/vector_store_integration/RAG_USING_WEAVIATE.ipynb
@@ -0,0 +1,236 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **End-to-End RAG Tutorial Using Salesforce, Airbyte Cloud, Weaviate, and LangChain**\n",
+ "This notebook illustrates the complete setup of a Retrieval-Augmented Generation (RAG) pipeline.
\n",
+ "We extract data from a GitHub repository using PyAirbyte, store the data in a Chroma vector store, and use LangChain to perform RAG on the stored data.
\n",
+ "## **Prerequisites**\n",
+ "**1) OpenAI API Key**:\n",
+ " - **Create an OpenAI Account**: Sign up for an account on [OpenAI](https://www.openai.com/).\n",
+ " - **Generate an API Key**: Go to the API section and generate a new API key. For detailed instructions, refer to the [OpenAI documentation](https://beta.openai.com/docs/quickstart).\n",
+ "\n",
+ "**2) Weaviate Cluster's Public URL and API Key**:\n",
+ "\n",
+ " - **Get your URL and Key**: Cick on your clusters drop down button. Visit [this](https://console.weaviate.cloud/).\n"
+ ],
+ "metadata": {
+ "id": "sW7Fx5ilH2jK"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Installing Dependencies**\n"
+ ],
+ "metadata": {
+ "id": "oFw9gVa9JKDH"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "zAztxTpIBiMF"
+ },
+ "outputs": [],
+ "source": [
+ "# Add virtual environment support if needed\n",
+ "!apt-get install -qq python3.10-venv\n",
+ "\n",
+ "# Install required packages\n",
+ "%pip install --quiet openai langchain-openai tiktoken pandas weaviate-client langchain-weaviate langchain-community\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Set Up Environment Variables**"
+ ],
+ "metadata": {
+ "id": "wH5CoP0WJfjd"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"WEAVIATE_URL\"] = \"YOUR_WEAVIATE_URL\"\n",
+ "os.environ[\"WEAVIATE_API_KEY\"] = \"YOUR_WEAVIATE_API_KEY\"\n",
+ "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY\""
+ ],
+ "metadata": {
+ "id": "JDqAdjZTCbeB"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Initialize Weaviate Vector Store**"
+ ],
+ "metadata": {
+ "id": "jGEqM1-RJlTE"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import weaviate\n",
+ "from weaviate.auth import AuthApiKey\n",
+ "from langchain_weaviate.vectorstores import WeaviateVectorStore\n",
+ "from langchain_community.document_loaders import TextLoader\n",
+ "from langchain_openai import OpenAIEmbeddings\n",
+ "from langchain_text_splitters import CharacterTextSplitter\n",
+ "import pandas as pd\n",
+ "\n",
+ "# Connect to Weaviate with API key\n",
+ "auth_config = AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\"))\n",
+ "\n",
+ "try:\n",
+ " weaviate_client = weaviate.Client(\n",
+ " url=os.getenv(\"WEAVIATE_URL\"),\n",
+ " auth_client_secret=auth_config,\n",
+ " )\n",
+ " print(\"Successfully connected to Weaviate\", flush=True)\n",
+ "except Exception as e:\n",
+ " print(f\"Error connecting to Weaviate: {e}\", flush=True)\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "XnzOsbFaCepg"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Embedding and similarity search with Weaviate**\n",
+ "Here we will convert the user's query into embeddings using OpenAI and retrieve similar chunks from Weaviate based on the query.
\n",
+ "### Note: Change collection and property according to your own requirement!"
+ ],
+ "metadata": {
+ "id": "EPEEwKcgJtI_"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from langchain_openai import OpenAIEmbeddings\n",
+ "from typing import List\n",
+ "\n",
+ "# Initialize OpenAI client for embeddings\n",
+ "openai_embeddings = OpenAIEmbeddings(openai_api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
+ "\n",
+ "# Convert user's query into a vector array to prep for similarity search\n",
+ "def get_embedding_from_openai(query) -> List[float]:\n",
+ " return openai_embeddings.embed_query(query)\n",
+ "\n",
+ "# Use Weaviate to find matching chunks\n",
+ "collection=\"Lead\"\n",
+ "property=\"name\"\n",
+ "def get_similar_chunks_from_weaviate(query: str) -> List[str]:\n",
+ " try:\n",
+ " embedding = get_embedding_from_openai(query)\n",
+ " near_vector = {\n",
+ " \"vector\": embedding\n",
+ " }\n",
+ " result = weaviate_client.query.get(collection, [property]).with_near_vector(near_vector).do()\n",
+ "\n",
+ " if 'data' in result and 'Get' in result['data'] and collection in result['data']['Get']:\n",
+ " chunks = [res[property] for res in result['data']['Get'][collection]]\n",
+ " return chunks\n",
+ " else:\n",
+ " print(\"Unexpected result format:\", result, flush=True)\n",
+ " return []\n",
+ " except Exception as e:\n",
+ " print(f\"Error during Weaviate query: {e}\", flush=True)\n",
+ " return []\n"
+ ],
+ "metadata": {
+ "id": "8SpJVDx4D23z"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Building RAG Pipeline and asking a question**\n",
+ "Finally we use OpenAI for querying our data!
\n",
+ "We know the three main steps of a RAG Pipeline are :
\n",
+ "- Embedding incoming query
\n",
+ "- Doing similarity search to find matching chunks
\n",
+ "- Send chunks to LLM for completion"
+ ],
+ "metadata": {
+ "id": "ubPGiUFSKBhF"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from typing import List\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
+ "\n",
+ "# Use OpenAI to complete the response\n",
+ "def get_completion_from_openai(question, document_chunks: List[str], model_name=\"gpt-3.5-turbo\"):\n",
+ " chunks = \"\\n\\n\".join(document_chunks)\n",
+ "\n",
+ " try:\n",
+ " completion = client.chat.completions.create(\n",
+ " model=model_name,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": \"You are an assistant. Answer the question based on the context. Do not use any other information. Be concise.\"},\n",
+ " {\"role\": \"user\", \"content\": f\"Context:\\n{chunks}\\n\\n{question}\\n\\nAnswer:\"}\n",
+ " ],\n",
+ " max_tokens=150\n",
+ " )\n",
+ " return completion.choices[0].message.content.strip()\n",
+ " except Exception as e:\n",
+ " print(f\"Error during OpenAI completion: {e}\", flush=True)\n",
+ " return \"There was an error generating the response.\"\n",
+ "\n",
+ "\n",
+ "# Putting it all together\n",
+ "def get_response(query, model_name=\"gpt-3.5-turbo\"):\n",
+ " chunks = get_similar_chunks_from_weaviate(query)\n",
+ " if len(chunks) == 0:\n",
+ " return \"I am sorry, I do not have the context to answer your question.\"\n",
+ " else:\n",
+ " return get_completion_from_openai(query, chunks, model_name)\n",
+ "\n",
+ "# Ask a question\n",
+ "query = 'How many lead work in BNY?'\n",
+ "response = get_response(query)\n",
+ "\n",
+ "print(f\"\\n\\nResponse from LLM:\\n\\n{response}\", flush=True)\n"
+ ],
+ "metadata": {
+ "id": "Qu1C3e1TE2YL"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file