cohere-ai
diff --git a/‎LICENSE
Lines changed: 2 additions & 2 deletions b/‎LICENSE
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 49 additions & 0 deletions b/‎README.md
Lines changed: 49 additions & 0 deletions
diff --git a/‎__init__.py
Lines changed: 7 additions & 0 deletions b/‎__init__.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎cli_demo.py
Lines changed: 27 additions & 0 deletions b/‎cli_demo.py
Lines changed: 27 additions & 0 deletions
diff --git a/‎discord_bot.py
Lines changed: 73 additions & 0 deletions b/‎discord_bot.py
Lines changed: 73 additions & 0 deletions
diff --git a/‎qa/__init__.py
Lines changed: 7 additions & 0 deletions b/‎qa/__init__.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎qa/__pycache__/__init__.cpython-310.pyc
153 Bytes b/‎qa/__pycache__/__init__.cpython-310.pyc
153 Bytes
diff --git a/‎qa/__pycache__/__init__.cpython-39.pyc
151 Bytes b/‎qa/__pycache__/__init__.cpython-39.pyc
151 Bytes
diff --git a/‎qa/__pycache__/answer.cpython-310.pyc
3.04 KB b/‎qa/__pycache__/answer.cpython-310.pyc
3.04 KB
diff --git a/‎qa/__pycache__/answer.cpython-39.pyc
3.11 KB b/‎qa/__pycache__/answer.cpython-39.pyc
3.11 KB
diff --git a/‎qa/__pycache__/bot.cpython-310.pyc
1.9 KB b/‎qa/__pycache__/bot.cpython-310.pyc
1.9 KB
diff --git a/‎qa/__pycache__/bot.cpython-39.pyc
1.89 KB b/‎qa/__pycache__/bot.cpython-39.pyc
1.89 KB
diff --git a/‎qa/__pycache__/model.cpython-310.pyc
1.87 KB b/‎qa/__pycache__/model.cpython-310.pyc
1.87 KB
diff --git a/‎qa/__pycache__/model.cpython-39.pyc
1.8 KB b/‎qa/__pycache__/model.cpython-39.pyc
1.8 KB
diff --git a/‎qa/__pycache__/search.cpython-310.pyc
5.28 KB b/‎qa/__pycache__/search.cpython-310.pyc
5.28 KB
diff --git a/‎qa/__pycache__/search.cpython-39.pyc
5.25 KB b/‎qa/__pycache__/search.cpython-39.pyc
5.25 KB
diff --git a/‎qa/__pycache__/util.cpython-310.pyc
528 Bytes b/‎qa/__pycache__/util.cpython-310.pyc
528 Bytes
diff --git a/‎qa/__pycache__/util.cpython-39.pyc
526 Bytes b/‎qa/__pycache__/util.cpython-39.pyc
526 Bytes
diff --git a/‎qa/answer.py
Lines changed: 85 additions & 0 deletions b/‎qa/answer.py
Lines changed: 85 additions & 0 deletions
diff --git a/‎qa/bot.py
Lines changed: 60 additions & 0 deletions b/‎qa/bot.py
Lines changed: 60 additions & 0 deletions
diff --git a/‎qa/model.py
Lines changed: 62 additions & 0 deletions b/‎qa/model.py
Lines changed: 62 additions & 0 deletions
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 cohere.ai
+Copyright (c) 2022 Cohere Inc. and its affiliates.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
@@ -0,0 +1,49 @@
+```
+################################################################################
+#    ____      _                     ____                  _ _                 #
+#   / ___|___ | |__   ___ _ __ ___  / ___|  __ _ _ __   __| | |__   _____  __  #
+#  | |   / _ \| '_ \ / _ \ '__/ _ \ \___ \ / _` | '_ \ / _` | '_ \ / _ \ \/ /  #
+#  | |__| (_) | | | |  __/ | |  __/  ___) | (_| | | | | (_| | |_) | (_) >  <   #
+#   \____\___/|_| |_|\___|_|  \___| |____/ \__,_|_| |_|\__,_|_.__/ \___/_/\_\  #
+#                                                                              #
+# This project is part of Cohere Sandbox, Cohere's Experimental Open Source    #
+# offering. This project provides a library, tooling, or demo making use of    #
+# the Cohere Platform. You should expect (self-)documented, high quality code  #
+# but be warned that this is EXPERIMENTAL. Therefore, also expect rough edges, #
+# non-backwards compatible changes, or potential changes in functionality as   #
+# the library, tool, or demo evolves. Please consider referencing a specific   #
+# git commit or version if depending upon the project in any mission-critical  #
+# code as part of your own projects.                                           #
+#                                                                              #
+# Please don't hesitate to raise issues or submit pull requests, and thanks    #
+# for checking out this project!                                               #
+#                                                                              #
+################################################################################
+```
+
+**Maintainer:** [nickfrosst](https://github.com/nickfrosst) \
+**Project maintained until at least (YYYY-MM-DD):** 2023-01-01
+
+# Grounded Question Answering — A Cohere Sandbox Project
+
+This is a cohere API powered contextualized factual question answering bot! 
+
+It responds to question in discord by understanding the context, google 
+searching what it believes to be the appropriate question, finding relevant 
+information on the google result pages and then answering the question based on 
+what it found.
+
+## Installation
+1- Clone the repository.
+
+2- Install all the dependencies:
+
+```pip install -r requirements.txt```
+
+4- Running the streamlit demo
+Try the demo by running the cli tool
+
+```python3 cli_demo.py --cohere_api_key <API_KEY> --serp_api_key <API_KEY>```
+
+# License
+COHERE-GROUNDED-QA has an MIT license, as found in the LICENSE file.
@@ -0,0 +1,7 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
@@ -0,0 +1,27 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+# this is a cli demo of you the bot. You can run it and ask questions directly in your terminal
+
+import argparse
+
+from qa.bot import GroundedQaBot
+
+parser = argparse.ArgumentParser(description="A grounded QA bot with cohere and google search")
+parser.add_argument("--cohere_api_key", type=str, help="api key for cohere", required=True)
+parser.add_argument("--serp_api_key", type=str, help="api key for serpAPI", required=True)
+parser.add_argument("--verbosity", type=int, default=0, help="verbosity level")
+args = parser.parse_args()
+
+bot = GroundedQaBot(args.cohere_api_key, args.serp_api_key)
+
+if __name__ == "__main__":
+    while True:
+        question = input("question: ")
+        reply = bot.answer(question, verbosity=args.verbosity, n_paragraphs=2)
+        print("answer: " + reply)
@@ -0,0 +1,73 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+# this is a demo discord bot. You can make a discord bot token by visiting https://discord.com/developers
+
+import argparse
+
+import discord
+from discord import Embed
+from discord.ext import commands
+
+from qa.bot import GroundedQaBot
+
+parser = argparse.ArgumentParser(description="A grounded QA bot with cohere and google search")
+parser.add_argument("--cohere_api_key", type=str, help="api key for cohere", required=True)
+parser.add_argument("--serp_api_key", type=str, help="api key for serpAPI", required=True)
+parser.add_argument("--discord_key", type=str, help="api key for discord boat", required=True)
+parser.add_argument("--verbosity", type=int, default=0, help="verbosity level")
+args = parser.parse_args()
+
+bot = GroundedQaBot(args.cohere_api_key, args.serp_api_key)
+
+
+class MyClient(discord.Client):
+
+    async def on_ready(self):
+        """Initializes bot"""
+        print(f"We have logged in as {self.user}")
+
+        for guild in self.guilds:
+            print(f"{self.user} is connected to the following guild:\n"
+                  f"{guild.name}(id: {guild.id})")
+
+    async def answer(self, message):
+        """Answers a question based on the context of the conversation and information from the web"""
+        history = []
+        async for historic_msg in message.channel.history(limit=6, before=message):
+            if historic_msg.content:
+                name = "user"
+                if historic_msg.author.name == self.user.name:
+                    name = "bot"
+                history = [f"{name}: {historic_msg.clean_content}"] + history
+
+        print(history)
+        bot.set_chat_history(history)
+
+        async with message.channel.typing():
+            reply = bot.answer(message.clean_content, verbosity=2)
+            response_msg = await message.channel.send(reply, reference=message)
+            await response_msg.edit(suppress=True)
+            return
+
+    async def on_message(self, message):
+        """Handles query messages triggered by direct messages to the bot"""
+        if isinstance(message.channel, discord.channel.DMChannel) and message.author != self.user:
+            await self.answer(message)
+
+    async def on_reaction_add(self, reaction, user):
+        """Handles query messages triggered by emoji from user."""
+        if user != self.user:
+            if str(reaction.emoji) == "❓" and reaction.count == 1:
+                await self.answer(reaction.message)
+
+
+if __name__ == "__main__":
+    intents = discord.Intents.all()
+    client = MyClient(intents=intents)
+    client.run(args.discord_key)
@@ -0,0 +1,7 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
@@ -0,0 +1,85 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+import numpy as np
+
+from qa.model import get_sample_answer
+from qa.search import embedding_search, get_results_paragraphs_multi_process
+from qa.util import pretty_print
+
+
+def trim_stop_sequences(s, stop_sequences):
+    """Remove stop sequences found at the end of returned generated text."""
+
+    for stop_sequence in stop_sequences:
+        if s.endswith(stop_sequence):
+            return s[:-len(stop_sequence)]
+    return s
+
+
+def answer(question, context, co, model, chat_history=""):
+    """Answer a question given some context."""
+
+    prompt = ("This is an example of question answering based on a text passage:\n "
+              f"Context:-{context}\nQuestion:\n-{question}\nAnswer:\n-")
+    if chat_history:
+        prompt = ("This is an example of factual question answering chat bot. It "
+                  "takes the text context and answers related questions:\n "
+                  f"Context:-{context}\nChat Log\n{chat_history}\nbot:")
+
+    stop_sequences = ["\n"]
+
+    num_generations = 4
+    prompt = "".join(co.tokenize(text=prompt).token_strings[-1900:])
+    prediction = co.generate(model=model,
+                             prompt=prompt,
+                             max_tokens=100,
+                             temperature=0.3,
+                             stop_sequences=stop_sequences,
+                             num_generations=num_generations,
+                             return_likelihoods="GENERATION")
+    generations = [[
+        trim_stop_sequences(prediction.generations[i].text.strip(), stop_sequences),
+        prediction.generations[i].likelihood
+    ] for i in range(num_generations)]
+    generations = list(filter(lambda x: not x[0].isspace(), generations))
+    response = generations[np.argmax([g[1] for g in generations])][0]
+    return response.strip()
+
+
+def answer_with_search(question,
+                       co,
+                       serp_api_token,
+                       chat_history="",
+                       model="xlarge",
+                       embedding_model="small",
+                       url=None,
+                       n_paragraphs=1,
+                       verbosity=0):
+    """Generates completion based on search results."""
+
+    paragraphs, paragraph_sources = get_results_paragraphs_multi_process(question, serp_api_token, url=url)
+    if not paragraphs:
+        return ("", "", "")
+    sample_answer = get_sample_answer(question, co)
+
+    results = embedding_search(paragraphs, paragraph_sources, sample_answer, co, model=embedding_model)
+
+    if verbosity > 1:
+        pprint_results = "\n".join([r[0] for r in results])
+        pretty_print("OKGREEN", f"all search result context: {pprint_results}")
+
+    results = results[-n_paragraphs:]
+    context = "\n".join([r[0] for r in results])
+
+    if verbosity:
+        pretty_print("OKCYAN", "relevant result context: " + context)
+
+    response = answer(question, context, co, chat_history=chat_history, model=model)
+
+    return (response, [r[1] for r in results], [r[0] for r in results])
@@ -0,0 +1,60 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+from sys import settrace
+
+import cohere
+
+from qa.answer import answer_with_search
+from qa.model import get_contextual_search_query
+from qa.util import pretty_print
+
+
+class GroundedQaBot():
+    """A class yielding Grounded question-answering conversational agents."""
+
+    def __init__(self, cohere_api_key, serp_api_key):
+        self._cohere_api_key = cohere_api_key
+        self._serp_api_key = serp_api_key
+        self._chat_history = []
+        self._co = cohere.Client(self._cohere_api_key)
+
+    @property
+    def chat_history(self):
+        return self._chat_history
+
+    def set_chat_history(self, chat_history):
+        self._chat_history = chat_history
+
+    def answer(self, question, verbosity=0, n_paragraphs=1):
+        """Answer a question, based on recent conversational history."""
+
+        self.chat_history.append("user: " + question)
+
+        history = "\n".join(self.chat_history[-6:])
+        question = get_contextual_search_query(history, self._co, verbosity=verbosity)
+
+        answer_text, source_urls, source_texts = answer_with_search(question,
+                                                                    self._co,
+                                                                    self._serp_api_key,
+                                                                    verbosity=verbosity,
+                                                                    n_paragraphs=n_paragraphs)
+
+        self._chat_history.append("bot: " + answer_text)
+
+        if not source_texts or "".join(source_texts).isspace():
+            reply = ("Sorry, I could not find any relevant information for that "
+                     "question.")
+        elif answer_text.strip() == question.strip():
+            reply = ("I had trouble answering the question, but maybe this link on "
+                     "the right will help.")
+        else:
+            sources_str = "\n--".join(source_urls)
+            reply = f"{answer_text}\nSource: {sources_str}"
+
+        return reply
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+import os
+
+import cohere
+import numpy as np
+from cohere.classify import Example
+
+from qa.util import pretty_print
+
+_DATA_DIRNAME = os.path.join(os.path.dirname(__file__), "prompt_data")
+
+
+def get_contextual_search_query(history, co, model="xlarge", verbosity=0):
+    """Adds message history context to user query."""
+
+    prompt_path = os.path.join(_DATA_DIRNAME, "get_contextual_search_query.prompt")
+    with open(prompt_path) as f:
+        prompt = f.read() + f"{history}\n-"
+    prediction = co.generate(
+        model=model,
+        prompt=prompt,
+        max_tokens=50,
+        temperature=0.75,
+        k=0,
+        p=0.75,
+        frequency_penalty=0,
+        presence_penalty=0,
+        stop_sequences=["\n"],
+        return_likelihoods="GENERATION",
+        num_generations=4,
+    )
+    likelihood = [g.likelihood for g in prediction.generations]
+    result = prediction.generations[np.argmax(likelihood)].text
+    if verbosity:
+        pretty_print("OKGREEN", "contextual question prompt: " + prompt)
+        pretty_print("OKCYAN", "contextual question: " + result)
+    return result.strip()
+
+
+def get_sample_answer(question, co, model="xlarge"):
+    """Return a sample answer to a question based on the model's training data.
+    """
+
+    prompt_path = os.path.join(_DATA_DIRNAME, "get_sample_answer.prompt")
+    with open(prompt_path) as f:
+        prompt = f.read() + f"{question}\nAnswer:"
+    response = co.generate(model=model,
+                           prompt=prompt,
+                           max_tokens=50,
+                           temperature=0.8,
+                           k=0,
+                           p=0.7,
+                           stop_sequences=["--"])
+
+    return response.generations[0].text