feat: Use streaming setup at Query level (#214)

This commit is contained in:
aaishikdutta
2023-07-10 23:07:19 +05:30
committed by GitHub
parent 8674297d1a
commit c597b1939d
5 changed files with 41 additions and 38 deletions

View File

@@ -204,19 +204,6 @@ from embedchain import PersonApp as ECPApp
print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
```
### Stream Response
- You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format
- To use this, instantiate App with a `InitConfig` instance passing `stream_response=True`. The following example iterates through the chunks and prints them as they appear
```python
app = App(InitConfig(stream_response=True))
resp = naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?")
for chunk in resp:
print(chunk, end="", flush=True)
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
```
### Chat Interface
@@ -235,6 +222,22 @@ print(naval_chat_bot.chat("what did the author say about happiness?"))
# answer: The author, Naval Ravikant, believes that happiness is a choice you make and a skill you develop. He compares the mind to the body, stating that just as the body can be molded and changed, so can the mind. He emphasizes the importance of being present in the moment and not getting caught up in regrets of the past or worries about the future. By being present and grateful for where you are, you can experience true happiness.
```
### Stream Response
- You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format. Currently only supports OpenAI model.
- To use this, instantiate a `QueryConfig` or `ChatConfig` object with `stream=True`. Then pass it to the `.chat()` or `.query()` method. The following example iterates through the chunks and prints them as they appear.
```python
app = App()
query_config = QueryConfig(stream = True)
resp = app.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?", query_config)
for chunk in resp:
print(chunk, end="", flush=True)
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
```
## Format supported
We support the following formats:
@@ -453,6 +456,7 @@ _coming soon_
|option|description|type|default|
|---|---|---|---|
|template|custom template for prompt|Template|Template("Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \$context Query: $query Helpful Answer:")|
|stream|control if response is streamed back to the user|bool|False|
#### **Chat Config**

View File

@@ -4,5 +4,11 @@ class ChatConfig(QueryConfig):
"""
Config for the `chat` method, inherits from `QueryConfig`.
"""
def __init__(self):
pass
def __init__(self, stream: bool = False):
"""
Initializes the QueryConfig instance.
:param stream: Optional. Control if response is streamed back to the user
:raises ValueError: If the template is not valid as template should contain $context and $query
"""
super().__init__(stream=stream)

View File

@@ -6,7 +6,7 @@ class InitConfig(BaseConfig):
"""
Config to initialize an embedchain `App` instance.
"""
def __init__(self, ef=None, db=None, stream_response=False):
def __init__(self, ef=None, db=None):
"""
:param ef: Optional. Embedding function to use.
:param db: Optional. (Vector) database to use for embeddings.
@@ -27,10 +27,6 @@ class InitConfig(BaseConfig):
self.db = ChromaDB(ef=self.ef)
else:
self.db = db
if not isinstance(stream_response, bool):
raise ValueError("`stream_respone` should be bool")
self.stream_response = stream_response
return

View File

@@ -22,11 +22,12 @@ class QueryConfig(BaseConfig):
"""
Config for the `query` method.
"""
def __init__(self, template: Template = None):
def __init__(self, template: Template = None, stream: bool = False):
"""
Initializes the QueryConfig instance.
:param template: Optional. The `Template` instance to use as a template for prompt.
:param stream: Optional. Control if response is streamed back to the user
:raises ValueError: If the template is not valid as template should contain $context and $query
"""
if template is None:
@@ -35,3 +36,7 @@ class QueryConfig(BaseConfig):
and re.search(context_re, template.template)):
raise ValueError("`template` should have `query` and `context` keys")
self.template = template
if not isinstance(stream, bool):
raise ValueError("`stream` should be bool")
self.stream = stream

View File

@@ -155,7 +155,7 @@ class EmbedChain:
prompt = template.substitute(context = context, query = input_query)
return prompt
def get_answer_from_llm(self, prompt):
def get_answer_from_llm(self, prompt, config: ChatConfig):
"""
Gets an answer based on the given query and context by passing it
to an LLM.
@@ -165,7 +165,7 @@ class EmbedChain:
:return: The answer.
"""
return self.get_llm_model_answer(prompt)
return self.get_llm_model_answer(prompt, config)
def query(self, input_query, config: QueryConfig = None):
"""
@@ -181,7 +181,7 @@ class EmbedChain:
config = QueryConfig()
context = self.retrieve_from_database(input_query)
prompt = self.generate_prompt(input_query, context, config.template)
answer = self.get_answer_from_llm(prompt)
answer = self.get_answer_from_llm(prompt, config)
return answer
def generate_chat_prompt(self, input_query, context, chat_history=''):
@@ -224,7 +224,7 @@ class EmbedChain:
context,
chat_history=chat_history,
)
answer = self.get_answer_from_llm(prompt)
answer = self.get_answer_from_llm(prompt, config)
memory.chat_memory.add_user_message(input_query)
if isinstance(answer, str):
memory.chat_memory.add_ai_message(answer)
@@ -295,14 +295,8 @@ class App(EmbedChain):
config = InitConfig()
super().__init__(config)
def get_llm_model_answer(self, prompt):
stream_response = self.config.stream_response
if stream_response:
return self._stream_llm_model_response(prompt)
else:
return self._get_llm_model_response(prompt)
def get_llm_model_answer(self, prompt, config: ChatConfig):
def _get_llm_model_response(self, prompt, stream_response = False):
messages = []
messages.append({
"role": "user", "content": prompt
@@ -313,20 +307,18 @@ class App(EmbedChain):
temperature=0,
max_tokens=1000,
top_p=1,
stream=stream_response
stream=config.stream
)
if stream_response:
# This contains the entire completions object. Needs to be sanitised
return response
if config.stream:
return self._stream_llm_model_response(response)
else:
return response["choices"][0]["message"]["content"]
def _stream_llm_model_response(self, prompt):
def _stream_llm_model_response(self, response):
"""
This is a generator for streaming response from the OpenAI completions API
"""
response = self._get_llm_model_response(prompt, True)
for line in response:
chunk = line['choices'][0].get('delta', {}).get('content', '')
yield chunk