feat: Use streaming setup at Query level (#214)

This commit is contained in:
aaishikdutta
2023-07-10 23:07:19 +05:30
committed by GitHub
parent 8674297d1a
commit c597b1939d
5 changed files with 41 additions and 38 deletions

View File

@@ -204,19 +204,6 @@ from embedchain import PersonApp as ECPApp
print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?")) print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality. # answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
``` ```
### Stream Response
- You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format
- To use this, instantiate App with a `InitConfig` instance passing `stream_response=True`. The following example iterates through the chunks and prints them as they appear
```python
app = App(InitConfig(stream_response=True))
resp = naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?")
for chunk in resp:
print(chunk, end="", flush=True)
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
```
### Chat Interface ### Chat Interface
@@ -235,6 +222,22 @@ print(naval_chat_bot.chat("what did the author say about happiness?"))
# answer: The author, Naval Ravikant, believes that happiness is a choice you make and a skill you develop. He compares the mind to the body, stating that just as the body can be molded and changed, so can the mind. He emphasizes the importance of being present in the moment and not getting caught up in regrets of the past or worries about the future. By being present and grateful for where you are, you can experience true happiness. # answer: The author, Naval Ravikant, believes that happiness is a choice you make and a skill you develop. He compares the mind to the body, stating that just as the body can be molded and changed, so can the mind. He emphasizes the importance of being present in the moment and not getting caught up in regrets of the past or worries about the future. By being present and grateful for where you are, you can experience true happiness.
``` ```
### Stream Response
- You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format. Currently only supports OpenAI model.
- To use this, instantiate a `QueryConfig` or `ChatConfig` object with `stream=True`. Then pass it to the `.chat()` or `.query()` method. The following example iterates through the chunks and prints them as they appear.
```python
app = App()
query_config = QueryConfig(stream = True)
resp = app.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?", query_config)
for chunk in resp:
print(chunk, end="", flush=True)
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
```
## Format supported ## Format supported
We support the following formats: We support the following formats:
@@ -453,6 +456,7 @@ _coming soon_
|option|description|type|default| |option|description|type|default|
|---|---|---|---| |---|---|---|---|
|template|custom template for prompt|Template|Template("Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \$context Query: $query Helpful Answer:")| |template|custom template for prompt|Template|Template("Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \$context Query: $query Helpful Answer:")|
|stream|control if response is streamed back to the user|bool|False|
#### **Chat Config** #### **Chat Config**

View File

@@ -4,5 +4,11 @@ class ChatConfig(QueryConfig):
""" """
Config for the `chat` method, inherits from `QueryConfig`. Config for the `chat` method, inherits from `QueryConfig`.
""" """
def __init__(self): def __init__(self, stream: bool = False):
pass """
Initializes the QueryConfig instance.
:param stream: Optional. Control if response is streamed back to the user
:raises ValueError: If the template is not valid as template should contain $context and $query
"""
super().__init__(stream=stream)

View File

@@ -6,7 +6,7 @@ class InitConfig(BaseConfig):
""" """
Config to initialize an embedchain `App` instance. Config to initialize an embedchain `App` instance.
""" """
def __init__(self, ef=None, db=None, stream_response=False): def __init__(self, ef=None, db=None):
""" """
:param ef: Optional. Embedding function to use. :param ef: Optional. Embedding function to use.
:param db: Optional. (Vector) database to use for embeddings. :param db: Optional. (Vector) database to use for embeddings.
@@ -28,10 +28,6 @@ class InitConfig(BaseConfig):
else: else:
self.db = db self.db = db
if not isinstance(stream_response, bool):
raise ValueError("`stream_respone` should be bool")
self.stream_response = stream_response
return return

View File

@@ -22,11 +22,12 @@ class QueryConfig(BaseConfig):
""" """
Config for the `query` method. Config for the `query` method.
""" """
def __init__(self, template: Template = None): def __init__(self, template: Template = None, stream: bool = False):
""" """
Initializes the QueryConfig instance. Initializes the QueryConfig instance.
:param template: Optional. The `Template` instance to use as a template for prompt. :param template: Optional. The `Template` instance to use as a template for prompt.
:param stream: Optional. Control if response is streamed back to the user
:raises ValueError: If the template is not valid as template should contain $context and $query :raises ValueError: If the template is not valid as template should contain $context and $query
""" """
if template is None: if template is None:
@@ -35,3 +36,7 @@ class QueryConfig(BaseConfig):
and re.search(context_re, template.template)): and re.search(context_re, template.template)):
raise ValueError("`template` should have `query` and `context` keys") raise ValueError("`template` should have `query` and `context` keys")
self.template = template self.template = template
if not isinstance(stream, bool):
raise ValueError("`stream` should be bool")
self.stream = stream

View File

@@ -155,7 +155,7 @@ class EmbedChain:
prompt = template.substitute(context = context, query = input_query) prompt = template.substitute(context = context, query = input_query)
return prompt return prompt
def get_answer_from_llm(self, prompt): def get_answer_from_llm(self, prompt, config: ChatConfig):
""" """
Gets an answer based on the given query and context by passing it Gets an answer based on the given query and context by passing it
to an LLM. to an LLM.
@@ -165,7 +165,7 @@ class EmbedChain:
:return: The answer. :return: The answer.
""" """
return self.get_llm_model_answer(prompt) return self.get_llm_model_answer(prompt, config)
def query(self, input_query, config: QueryConfig = None): def query(self, input_query, config: QueryConfig = None):
""" """
@@ -181,7 +181,7 @@ class EmbedChain:
config = QueryConfig() config = QueryConfig()
context = self.retrieve_from_database(input_query) context = self.retrieve_from_database(input_query)
prompt = self.generate_prompt(input_query, context, config.template) prompt = self.generate_prompt(input_query, context, config.template)
answer = self.get_answer_from_llm(prompt) answer = self.get_answer_from_llm(prompt, config)
return answer return answer
def generate_chat_prompt(self, input_query, context, chat_history=''): def generate_chat_prompt(self, input_query, context, chat_history=''):
@@ -224,7 +224,7 @@ class EmbedChain:
context, context,
chat_history=chat_history, chat_history=chat_history,
) )
answer = self.get_answer_from_llm(prompt) answer = self.get_answer_from_llm(prompt, config)
memory.chat_memory.add_user_message(input_query) memory.chat_memory.add_user_message(input_query)
if isinstance(answer, str): if isinstance(answer, str):
memory.chat_memory.add_ai_message(answer) memory.chat_memory.add_ai_message(answer)
@@ -295,14 +295,8 @@ class App(EmbedChain):
config = InitConfig() config = InitConfig()
super().__init__(config) super().__init__(config)
def get_llm_model_answer(self, prompt): def get_llm_model_answer(self, prompt, config: ChatConfig):
stream_response = self.config.stream_response
if stream_response:
return self._stream_llm_model_response(prompt)
else:
return self._get_llm_model_response(prompt)
def _get_llm_model_response(self, prompt, stream_response = False):
messages = [] messages = []
messages.append({ messages.append({
"role": "user", "content": prompt "role": "user", "content": prompt
@@ -313,20 +307,18 @@ class App(EmbedChain):
temperature=0, temperature=0,
max_tokens=1000, max_tokens=1000,
top_p=1, top_p=1,
stream=stream_response stream=config.stream
) )
if stream_response: if config.stream:
# This contains the entire completions object. Needs to be sanitised return self._stream_llm_model_response(response)
return response
else: else:
return response["choices"][0]["message"]["content"] return response["choices"][0]["message"]["content"]
def _stream_llm_model_response(self, prompt): def _stream_llm_model_response(self, response):
""" """
This is a generator for streaming response from the OpenAI completions API This is a generator for streaming response from the OpenAI completions API
""" """
response = self._get_llm_model_response(prompt, True)
for line in response: for line in response:
chunk = line['choices'][0].get('delta', {}).get('content', '') chunk = line['choices'][0].get('delta', {}).get('content', '')
yield chunk yield chunk