feat: Use streaming setup at Query level (#214)

2023-07-10 23:07:19 +05:30
parent 8674297d1a
commit c597b1939d
5 changed files with 41 additions and 38 deletions
--- a/README.md
+++ b/README.md
@@ -204,19 +204,6 @@ from embedchain import PersonApp as ECPApp
 print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
 # answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
 ```
 ### Stream Response
 - You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format
 - To use this, instantiate App with a `InitConfig` instance passing `stream_response=True`. The following example iterates through the chunks and prints them as they appear
 ```python
 app = App(InitConfig(stream_response=True))
 resp = naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?")
 for chunk in resp:
    print(chunk, end="", flush=True)
 # answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
 ```
 ### Chat Interface
@@ -235,6 +222,22 @@ print(naval_chat_bot.chat("what did the author say about happiness?"))
 # answer: The author, Naval Ravikant, believes that happiness is a choice you make and a skill you develop. He compares the mind to the body, stating that just as the body can be molded and changed, so can the mind. He emphasizes the importance of being present in the moment and not getting caught up in regrets of the past or worries about the future. By being present and grateful for where you are, you can experience true happiness.
 ```
 ### Stream Response
 - You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format. Currently only supports OpenAI model.
 - To use this, instantiate a `QueryConfig` or `ChatConfig` object with `stream=True`. Then pass it to the `.chat()` or `.query()` method. The following example iterates through the chunks and prints them as they appear.
 ```python
 app = App()
 query_config = QueryConfig(stream = True)
 resp = app.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?", query_config)
 for chunk in resp:
    print(chunk, end="", flush=True)
 # answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
 ```
 ## Format supported
 We support the following formats:
@@ -453,6 +456,7 @@ _coming soon_
 |option|description|type|default|
 |---|---|---|---|
 |template|custom template for prompt|Template|Template("Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \$context Query: $query Helpful Answer:")|
 |stream|control if response is streamed back to the user|bool|False|
 #### **Chat Config**
--- a/embedchain/config/ChatConfig.py
+++ b/embedchain/config/ChatConfig.py
@@ -4,5 +4,11 @@ class ChatConfig(QueryConfig):
    """
    Config for the `chat` method, inherits from `QueryConfig`.
    """
-    def __init__(self):
+    def __init__(self, stream: bool = False):
-        pass
+        """
        Initializes the QueryConfig instance.
        :param stream: Optional. Control if response is streamed back to the user
        :raises ValueError: If the template is not valid as template should contain $context and $query
        """
        super().__init__(stream=stream)
--- a/embedchain/config/InitConfig.py
+++ b/embedchain/config/InitConfig.py
@@ -6,7 +6,7 @@ class InitConfig(BaseConfig):
    """
    Config to initialize an embedchain `App` instance.
    """
-    def __init__(self, ef=None, db=None, stream_response=False):
+    def __init__(self, ef=None, db=None):
        """
        :param ef: Optional. Embedding function to use.
        :param db: Optional. (Vector) database to use for embeddings.
@@ -27,10 +27,6 @@ class InitConfig(BaseConfig):
            self.db = ChromaDB(ef=self.ef)
        else:
            self.db = db
        if not isinstance(stream_response, bool):
            raise ValueError("`stream_respone` should be bool")
        self.stream_response = stream_response
        return
--- a/embedchain/config/QueryConfig.py
+++ b/embedchain/config/QueryConfig.py
@@ -22,11 +22,12 @@ class QueryConfig(BaseConfig):
    """
    Config for the `query` method.
    """
-    def __init__(self, template: Template = None):
+    def __init__(self, template: Template = None, stream: bool = False):
        """
        Initializes the QueryConfig instance.
        :param template: Optional. The `Template` instance to use as a template for prompt.
        :param stream: Optional. Control if response is streamed back to the user
        :raises ValueError: If the template is not valid as template should contain $context and $query
        """
        if template is None:
@@ -35,3 +36,7 @@ class QueryConfig(BaseConfig):
            and re.search(context_re, template.template)):
            raise ValueError("`template` should have `query` and `context` keys")
        self.template = template
        if not isinstance(stream, bool):
            raise ValueError("`stream` should be bool")
        self.stream = stream
--- a/embedchain/embedchain.py
+++ b/embedchain/embedchain.py
@@ -155,7 +155,7 @@ class EmbedChain:
        prompt = template.substitute(context = context, query = input_query)
        return prompt
-    def get_answer_from_llm(self, prompt):
+    def get_answer_from_llm(self, prompt, config: ChatConfig):
        """
        Gets an answer based on the given query and context by passing it
        to an LLM.
@@ -165,7 +165,7 @@ class EmbedChain:
        :return: The answer.
        """
-        return self.get_llm_model_answer(prompt)
+        return self.get_llm_model_answer(prompt, config)
    def query(self, input_query, config: QueryConfig = None):
        """
@@ -181,7 +181,7 @@ class EmbedChain:
            config = QueryConfig()
        context = self.retrieve_from_database(input_query)
        prompt = self.generate_prompt(input_query, context, config.template)
-        answer = self.get_answer_from_llm(prompt)
+        answer = self.get_answer_from_llm(prompt, config)
        return answer
    def generate_chat_prompt(self, input_query, context, chat_history=''):
@@ -224,7 +224,7 @@ class EmbedChain:
            context,
            chat_history=chat_history,
        )
-        answer = self.get_answer_from_llm(prompt)
+        answer = self.get_answer_from_llm(prompt, config)
        memory.chat_memory.add_user_message(input_query)
        if isinstance(answer, str):
            memory.chat_memory.add_ai_message(answer)
@@ -295,14 +295,8 @@ class App(EmbedChain):
            config = InitConfig()
        super().__init__(config)
-    def get_llm_model_answer(self, prompt):
+    def get_llm_model_answer(self, prompt, config: ChatConfig):
        stream_response = self.config.stream_response
        if stream_response:
            return self._stream_llm_model_response(prompt)
        else:
            return self._get_llm_model_response(prompt)
    def _get_llm_model_response(self, prompt, stream_response = False):
        messages = []
        messages.append({
            "role": "user", "content": prompt
@@ -313,20 +307,18 @@ class App(EmbedChain):
            temperature=0,
            max_tokens=1000,
            top_p=1,
-            stream=stream_response
+            stream=config.stream
        )
-        if stream_response:
+        if config.stream:
-            # This contains the entire completions object. Needs to be sanitised
+            return self._stream_llm_model_response(response)
            return response
        else:
            return response["choices"][0]["message"]["content"]
-    def _stream_llm_model_response(self, prompt):
+    def _stream_llm_model_response(self, response):
        """
        This is a generator for streaming response from the OpenAI completions API
        """
        response = self._get_llm_model_response(prompt, True)
        for line in response:
            chunk = line['choices'][0].get('delta', {}).get('content', '')
            yield chunk