feat: Use streaming setup at Query level (#214)
This commit is contained in:
30
README.md
30
README.md
@@ -204,19 +204,6 @@ from embedchain import PersonApp as ECPApp
|
|||||||
print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
|
print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
|
||||||
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
|
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
|
||||||
```
|
```
|
||||||
### Stream Response
|
|
||||||
|
|
||||||
- You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format
|
|
||||||
|
|
||||||
- To use this, instantiate App with a `InitConfig` instance passing `stream_response=True`. The following example iterates through the chunks and prints them as they appear
|
|
||||||
```python
|
|
||||||
app = App(InitConfig(stream_response=True))
|
|
||||||
resp = naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?")
|
|
||||||
|
|
||||||
for chunk in resp:
|
|
||||||
print(chunk, end="", flush=True)
|
|
||||||
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
|
|
||||||
```
|
|
||||||
|
|
||||||
### Chat Interface
|
### Chat Interface
|
||||||
|
|
||||||
@@ -235,6 +222,22 @@ print(naval_chat_bot.chat("what did the author say about happiness?"))
|
|||||||
# answer: The author, Naval Ravikant, believes that happiness is a choice you make and a skill you develop. He compares the mind to the body, stating that just as the body can be molded and changed, so can the mind. He emphasizes the importance of being present in the moment and not getting caught up in regrets of the past or worries about the future. By being present and grateful for where you are, you can experience true happiness.
|
# answer: The author, Naval Ravikant, believes that happiness is a choice you make and a skill you develop. He compares the mind to the body, stating that just as the body can be molded and changed, so can the mind. He emphasizes the importance of being present in the moment and not getting caught up in regrets of the past or worries about the future. By being present and grateful for where you are, you can experience true happiness.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Stream Response
|
||||||
|
|
||||||
|
- You can add config to your query method to stream responses like ChatGPT does. You would require a downstream handler to render the chunk in your desirable format. Currently only supports OpenAI model.
|
||||||
|
|
||||||
|
- To use this, instantiate a `QueryConfig` or `ChatConfig` object with `stream=True`. Then pass it to the `.chat()` or `.query()` method. The following example iterates through the chunks and prints them as they appear.
|
||||||
|
|
||||||
|
```python
|
||||||
|
app = App()
|
||||||
|
query_config = QueryConfig(stream = True)
|
||||||
|
resp = app.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?", query_config)
|
||||||
|
|
||||||
|
for chunk in resp:
|
||||||
|
print(chunk, end="", flush=True)
|
||||||
|
# answer: Naval argues that humans possess the unique capacity to understand explanations or concepts to the maximum extent possible in this physical reality.
|
||||||
|
```
|
||||||
|
|
||||||
## Format supported
|
## Format supported
|
||||||
|
|
||||||
We support the following formats:
|
We support the following formats:
|
||||||
@@ -453,6 +456,7 @@ _coming soon_
|
|||||||
|option|description|type|default|
|
|option|description|type|default|
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
|template|custom template for prompt|Template|Template("Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \$context Query: $query Helpful Answer:")|
|
|template|custom template for prompt|Template|Template("Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \$context Query: $query Helpful Answer:")|
|
||||||
|
|stream|control if response is streamed back to the user|bool|False|
|
||||||
|
|
||||||
#### **Chat Config**
|
#### **Chat Config**
|
||||||
|
|
||||||
|
|||||||
@@ -4,5 +4,11 @@ class ChatConfig(QueryConfig):
|
|||||||
"""
|
"""
|
||||||
Config for the `chat` method, inherits from `QueryConfig`.
|
Config for the `chat` method, inherits from `QueryConfig`.
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self, stream: bool = False):
|
||||||
pass
|
"""
|
||||||
|
Initializes the QueryConfig instance.
|
||||||
|
|
||||||
|
:param stream: Optional. Control if response is streamed back to the user
|
||||||
|
:raises ValueError: If the template is not valid as template should contain $context and $query
|
||||||
|
"""
|
||||||
|
super().__init__(stream=stream)
|
||||||
@@ -6,7 +6,7 @@ class InitConfig(BaseConfig):
|
|||||||
"""
|
"""
|
||||||
Config to initialize an embedchain `App` instance.
|
Config to initialize an embedchain `App` instance.
|
||||||
"""
|
"""
|
||||||
def __init__(self, ef=None, db=None, stream_response=False):
|
def __init__(self, ef=None, db=None):
|
||||||
"""
|
"""
|
||||||
:param ef: Optional. Embedding function to use.
|
:param ef: Optional. Embedding function to use.
|
||||||
:param db: Optional. (Vector) database to use for embeddings.
|
:param db: Optional. (Vector) database to use for embeddings.
|
||||||
@@ -27,10 +27,6 @@ class InitConfig(BaseConfig):
|
|||||||
self.db = ChromaDB(ef=self.ef)
|
self.db = ChromaDB(ef=self.ef)
|
||||||
else:
|
else:
|
||||||
self.db = db
|
self.db = db
|
||||||
|
|
||||||
if not isinstance(stream_response, bool):
|
|
||||||
raise ValueError("`stream_respone` should be bool")
|
|
||||||
self.stream_response = stream_response
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -22,11 +22,12 @@ class QueryConfig(BaseConfig):
|
|||||||
"""
|
"""
|
||||||
Config for the `query` method.
|
Config for the `query` method.
|
||||||
"""
|
"""
|
||||||
def __init__(self, template: Template = None):
|
def __init__(self, template: Template = None, stream: bool = False):
|
||||||
"""
|
"""
|
||||||
Initializes the QueryConfig instance.
|
Initializes the QueryConfig instance.
|
||||||
|
|
||||||
:param template: Optional. The `Template` instance to use as a template for prompt.
|
:param template: Optional. The `Template` instance to use as a template for prompt.
|
||||||
|
:param stream: Optional. Control if response is streamed back to the user
|
||||||
:raises ValueError: If the template is not valid as template should contain $context and $query
|
:raises ValueError: If the template is not valid as template should contain $context and $query
|
||||||
"""
|
"""
|
||||||
if template is None:
|
if template is None:
|
||||||
@@ -35,3 +36,7 @@ class QueryConfig(BaseConfig):
|
|||||||
and re.search(context_re, template.template)):
|
and re.search(context_re, template.template)):
|
||||||
raise ValueError("`template` should have `query` and `context` keys")
|
raise ValueError("`template` should have `query` and `context` keys")
|
||||||
self.template = template
|
self.template = template
|
||||||
|
|
||||||
|
if not isinstance(stream, bool):
|
||||||
|
raise ValueError("`stream` should be bool")
|
||||||
|
self.stream = stream
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ class EmbedChain:
|
|||||||
prompt = template.substitute(context = context, query = input_query)
|
prompt = template.substitute(context = context, query = input_query)
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def get_answer_from_llm(self, prompt):
|
def get_answer_from_llm(self, prompt, config: ChatConfig):
|
||||||
"""
|
"""
|
||||||
Gets an answer based on the given query and context by passing it
|
Gets an answer based on the given query and context by passing it
|
||||||
to an LLM.
|
to an LLM.
|
||||||
@@ -165,7 +165,7 @@ class EmbedChain:
|
|||||||
:return: The answer.
|
:return: The answer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.get_llm_model_answer(prompt)
|
return self.get_llm_model_answer(prompt, config)
|
||||||
|
|
||||||
def query(self, input_query, config: QueryConfig = None):
|
def query(self, input_query, config: QueryConfig = None):
|
||||||
"""
|
"""
|
||||||
@@ -181,7 +181,7 @@ class EmbedChain:
|
|||||||
config = QueryConfig()
|
config = QueryConfig()
|
||||||
context = self.retrieve_from_database(input_query)
|
context = self.retrieve_from_database(input_query)
|
||||||
prompt = self.generate_prompt(input_query, context, config.template)
|
prompt = self.generate_prompt(input_query, context, config.template)
|
||||||
answer = self.get_answer_from_llm(prompt)
|
answer = self.get_answer_from_llm(prompt, config)
|
||||||
return answer
|
return answer
|
||||||
|
|
||||||
def generate_chat_prompt(self, input_query, context, chat_history=''):
|
def generate_chat_prompt(self, input_query, context, chat_history=''):
|
||||||
@@ -224,7 +224,7 @@ class EmbedChain:
|
|||||||
context,
|
context,
|
||||||
chat_history=chat_history,
|
chat_history=chat_history,
|
||||||
)
|
)
|
||||||
answer = self.get_answer_from_llm(prompt)
|
answer = self.get_answer_from_llm(prompt, config)
|
||||||
memory.chat_memory.add_user_message(input_query)
|
memory.chat_memory.add_user_message(input_query)
|
||||||
if isinstance(answer, str):
|
if isinstance(answer, str):
|
||||||
memory.chat_memory.add_ai_message(answer)
|
memory.chat_memory.add_ai_message(answer)
|
||||||
@@ -295,14 +295,8 @@ class App(EmbedChain):
|
|||||||
config = InitConfig()
|
config = InitConfig()
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
|
|
||||||
def get_llm_model_answer(self, prompt):
|
def get_llm_model_answer(self, prompt, config: ChatConfig):
|
||||||
stream_response = self.config.stream_response
|
|
||||||
if stream_response:
|
|
||||||
return self._stream_llm_model_response(prompt)
|
|
||||||
else:
|
|
||||||
return self._get_llm_model_response(prompt)
|
|
||||||
|
|
||||||
def _get_llm_model_response(self, prompt, stream_response = False):
|
|
||||||
messages = []
|
messages = []
|
||||||
messages.append({
|
messages.append({
|
||||||
"role": "user", "content": prompt
|
"role": "user", "content": prompt
|
||||||
@@ -313,20 +307,18 @@ class App(EmbedChain):
|
|||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=1000,
|
max_tokens=1000,
|
||||||
top_p=1,
|
top_p=1,
|
||||||
stream=stream_response
|
stream=config.stream
|
||||||
)
|
)
|
||||||
|
|
||||||
if stream_response:
|
if config.stream:
|
||||||
# This contains the entire completions object. Needs to be sanitised
|
return self._stream_llm_model_response(response)
|
||||||
return response
|
|
||||||
else:
|
else:
|
||||||
return response["choices"][0]["message"]["content"]
|
return response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
def _stream_llm_model_response(self, prompt):
|
def _stream_llm_model_response(self, response):
|
||||||
"""
|
"""
|
||||||
This is a generator for streaming response from the OpenAI completions API
|
This is a generator for streaming response from the OpenAI completions API
|
||||||
"""
|
"""
|
||||||
response = self._get_llm_model_response(prompt, True)
|
|
||||||
for line in response:
|
for line in response:
|
||||||
chunk = line['choices'][0].get('delta', {}).get('content', '')
|
chunk = line['choices'][0].get('delta', {}).get('content', '')
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|||||||
Reference in New Issue
Block a user