[App Deployment] create chat with PDF app (#1049)
Co-authored-by: Deven Patel <deven298@yahoo.com> Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
32
docs/examples/chat-with-PDF.mdx
Normal file
32
docs/examples/chat-with-PDF.mdx
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
### Embedchain Chat with PDF App
|
||||||
|
|
||||||
|
You can easily create and deploy your own `chat-pdf` App using Embedchain.
|
||||||
|
|
||||||
|
Here are few simple steps for you to create and deploy your app:
|
||||||
|
|
||||||
|
1. Fork the embedchain repo from [Github](https://github.com/embedchain/embedchain).
|
||||||
|
|
||||||
|
<Note>
|
||||||
|
If you run into problems with forking, please refer to [github docs](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) for forking a repo.
|
||||||
|
</Note>
|
||||||
|
|
||||||
|
2. Navigate to `chat-pdf` example app from your forked repo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd <your_fork_repo>/examples/chat-pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run your app in development environment with simple commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
ec dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Feel free to improve our simple `chat-pdf` streamlit app and create pull request to showcase your app [here](https://docs.embedchain.ai/examples/showcase)
|
||||||
|
|
||||||
|
4. You can easily deploy your app using Streamlit interface
|
||||||
|
|
||||||
|
Connect your Github account with Streamlit and refer this [guide](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app) to deploy your app.
|
||||||
|
|
||||||
|
You can also use the deploy button from your streamlit website you see when running `ec dev` command.
|
||||||
@@ -151,6 +151,7 @@
|
|||||||
{
|
{
|
||||||
"group": "Examples",
|
"group": "Examples",
|
||||||
"pages": [
|
"pages": [
|
||||||
|
"examples/chat-with-PDF",
|
||||||
"examples/notebooks-and-replits",
|
"examples/notebooks-and-replits",
|
||||||
{
|
{
|
||||||
"group": "REST API Service",
|
"group": "REST API Service",
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class OllamaLlm(BaseLlm):
|
|||||||
|
|
||||||
def get_llm_model_answer(self, prompt):
|
def get_llm_model_answer(self, prompt):
|
||||||
return self._get_answer(prompt=prompt, config=self.config)
|
return self._get_answer(prompt=prompt, config=self.config)
|
||||||
|
|
||||||
def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:
|
def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:
|
||||||
callback_manager = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()]
|
callback_manager = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()]
|
||||||
|
|
||||||
@@ -28,7 +28,7 @@ class OllamaLlm(BaseLlm):
|
|||||||
system=config.system_prompt,
|
system=config.system_prompt,
|
||||||
temperature=config.temperature,
|
temperature=config.temperature,
|
||||||
top_p=config.top_p,
|
top_p=config.top_p,
|
||||||
callback_manager=CallbackManager(callback_manager)
|
callback_manager=CallbackManager(callback_manager),
|
||||||
)
|
)
|
||||||
|
|
||||||
return llm(prompt)
|
return llm(prompt)
|
||||||
|
|||||||
0
examples/chat-pdf/.streamlit/secrets.toml
Normal file
0
examples/chat-pdf/.streamlit/secrets.toml
Normal file
30
examples/chat-pdf/README.md
Normal file
30
examples/chat-pdf/README.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Embedchain Chat with PDF App
|
||||||
|
|
||||||
|
You can easily create and deploy your own `Chat-with-PDF` App using Embedchain.
|
||||||
|
|
||||||
|
Here are few simple steps for you to create and deploy your app:
|
||||||
|
|
||||||
|
1. Fork the embedchain repo from [Github](https://github.com/embedchain/embedchain).
|
||||||
|
|
||||||
|
If you run into problems with forking, please refer to [github docs](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) for forking a repo.
|
||||||
|
|
||||||
|
2. Navigate to `chat-pdf` example app from your forked repo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd <your_fork_repo>/examples/chat-pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run your app in development environment with simple commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
ec dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Feel free to improve our simple `chat-pdf` streamlit app and create pull request to showcase your app [here](https://docs.embedchain.ai/examples/showcase)
|
||||||
|
|
||||||
|
4. You can easily deploy your app using Streamlit interface
|
||||||
|
|
||||||
|
Connect your Github account with Streamlit and refer this [guide](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app) to deploy your app.
|
||||||
|
|
||||||
|
You can also use the deploy button from your streamlit website you see when running `ec dev` command.
|
||||||
150
examples/chat-pdf/app.py
Normal file
150
examples/chat-pdf/app.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import os
|
||||||
|
import queue
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
from embedchain import Pipeline as App
|
||||||
|
from embedchain.config import BaseLlmConfig
|
||||||
|
from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
|
||||||
|
generate)
|
||||||
|
|
||||||
|
|
||||||
|
@st.cache_resource
|
||||||
|
def embedchain_bot():
|
||||||
|
return App.from_config(
|
||||||
|
config={
|
||||||
|
"llm": {
|
||||||
|
"provider": "openai",
|
||||||
|
"config": {
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"temperature": 0.5,
|
||||||
|
"max_tokens": 1000,
|
||||||
|
"top_p": 1,
|
||||||
|
"stream": True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"vectordb": {
|
||||||
|
"provider": "chroma",
|
||||||
|
"config": {"collection_name": "chat-pdf", "dir": "db", "allow_reset": True},
|
||||||
|
},
|
||||||
|
"chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@st.cache_data
|
||||||
|
def update_openai_key():
|
||||||
|
os.environ["OPENAI_API_KEY"] = st.session_state.chatbot_api_key
|
||||||
|
|
||||||
|
|
||||||
|
with st.sidebar:
|
||||||
|
openai_access_token = st.text_input(
|
||||||
|
"OpenAI API Key", value=os.environ.get("OPENAI_API_KEY"), key="chatbot_api_key", type="password"
|
||||||
|
) # noqa: E501
|
||||||
|
"WE DO NOT STORE YOUR OPENAI KEY."
|
||||||
|
"Just paste your OpenAI API key here and we'll use it to power the chatbot. [Get your OpenAI API key](https://platform.openai.com/api-keys)" # noqa: E501
|
||||||
|
|
||||||
|
if openai_access_token:
|
||||||
|
update_openai_key()
|
||||||
|
|
||||||
|
pdf_files = st.file_uploader("Upload your PDF files", accept_multiple_files=True, type="pdf")
|
||||||
|
add_pdf_files = st.session_state.get("add_pdf_files", [])
|
||||||
|
for pdf_file in pdf_files:
|
||||||
|
file_name = pdf_file.name
|
||||||
|
if file_name in add_pdf_files:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if not os.environ.get("OPENAI_API_KEY"):
|
||||||
|
st.error("Please enter your OpenAI API Key")
|
||||||
|
st.stop()
|
||||||
|
app = embedchain_bot()
|
||||||
|
temp_file_name = None
|
||||||
|
with tempfile.NamedTemporaryFile(mode="wb", delete=False, prefix=file_name, suffix=".pdf") as f:
|
||||||
|
f.write(pdf_file.getvalue())
|
||||||
|
temp_file_name = f.name
|
||||||
|
if temp_file_name:
|
||||||
|
st.markdown(f"Adding {file_name} to knowledge base...")
|
||||||
|
app.add(temp_file_name, data_type="pdf_file")
|
||||||
|
st.markdown("")
|
||||||
|
add_pdf_files.append(file_name)
|
||||||
|
os.remove(temp_file_name)
|
||||||
|
st.session_state.messages.append({"role": "assistant", "content": f"Added {file_name} to knowledge base!"})
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error adding {file_name} to knowledge base: {e}")
|
||||||
|
st.stop()
|
||||||
|
st.session_state["add_pdf_files"] = add_pdf_files
|
||||||
|
|
||||||
|
st.title("📄 Embedchain - Chat with PDF")
|
||||||
|
styled_caption = '<p style="font-size: 17px; color: #aaa;">🚀 An <a href="https://github.com/embedchain/embedchain">Embedchain</a> app powered by OpenAI!</p>' # noqa: E501
|
||||||
|
st.markdown(styled_caption, unsafe_allow_html=True)
|
||||||
|
|
||||||
|
if "messages" not in st.session_state:
|
||||||
|
st.session_state.messages = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": """
|
||||||
|
Hi! I'm chatbot powered by Embedchain, which can answer questions about your pdf documents.\n
|
||||||
|
Upload your pdf documents here and I'll answer your questions about them!
|
||||||
|
""",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
for message in st.session_state.messages:
|
||||||
|
with st.chat_message(message["role"]):
|
||||||
|
st.markdown(message["content"])
|
||||||
|
|
||||||
|
if prompt := st.chat_input("Ask me anything!"):
|
||||||
|
if not os.environ.get("OPENAI_API_KEY"):
|
||||||
|
st.error("Please enter your OpenAI API Key", icon="🤖")
|
||||||
|
st.stop()
|
||||||
|
|
||||||
|
app = embedchain_bot()
|
||||||
|
with st.chat_message("user"):
|
||||||
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||||
|
st.markdown(prompt)
|
||||||
|
|
||||||
|
with st.chat_message("assistant"):
|
||||||
|
msg_placeholder = st.empty()
|
||||||
|
msg_placeholder.markdown("Thinking...")
|
||||||
|
full_response = ""
|
||||||
|
|
||||||
|
q = queue.Queue()
|
||||||
|
|
||||||
|
def app_response(result):
|
||||||
|
llm_config = app.llm.config.as_dict()
|
||||||
|
llm_config["callbacks"] = [StreamingStdOutCallbackHandlerYield(q=q)]
|
||||||
|
config = BaseLlmConfig(**llm_config)
|
||||||
|
answer, citations = app.chat(prompt, config=config, citations=True)
|
||||||
|
result["answer"] = answer
|
||||||
|
result["citations"] = citations
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
thread = threading.Thread(target=app_response, args=(results,))
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
for answer_chunk in generate(q):
|
||||||
|
full_response += answer_chunk
|
||||||
|
msg_placeholder.markdown(full_response)
|
||||||
|
|
||||||
|
thread.join()
|
||||||
|
answer, citations = results["answer"], results["citations"]
|
||||||
|
if citations:
|
||||||
|
full_response += "\n\n**Sources**:\n"
|
||||||
|
sources = []
|
||||||
|
for i, citation in enumerate(citations):
|
||||||
|
source = citation[1]
|
||||||
|
pattern = re.compile(r"([^/]+)\.[^\.]+\.pdf$")
|
||||||
|
match = pattern.search(source)
|
||||||
|
if match:
|
||||||
|
source = match.group(1) + ".pdf"
|
||||||
|
sources.append(source)
|
||||||
|
sources = list(set(sources))
|
||||||
|
for source in sources:
|
||||||
|
full_response += f"- {source}\n"
|
||||||
|
|
||||||
|
msg_placeholder.markdown(full_response)
|
||||||
|
print("Answer: ", answer)
|
||||||
|
st.session_state.messages.append({"role": "assistant", "content": answer})
|
||||||
3
examples/chat-pdf/embedchain.json
Normal file
3
examples/chat-pdf/embedchain.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"provider": "streamlit.io"
|
||||||
|
}
|
||||||
2
examples/chat-pdf/requirements.txt
Normal file
2
examples/chat-pdf/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
streamlit==1.29.0
|
||||||
|
embedchain
|
||||||
@@ -9,6 +9,7 @@ def ollama_llm_config():
|
|||||||
config = BaseLlmConfig(model="llama2", temperature=0.7, top_p=0.8, stream=True, system_prompt=None)
|
config = BaseLlmConfig(model="llama2", temperature=0.7, top_p=0.8, stream=True, system_prompt=None)
|
||||||
yield config
|
yield config
|
||||||
|
|
||||||
|
|
||||||
def test_get_llm_model_answer(ollama_llm_config, mocker):
|
def test_get_llm_model_answer(ollama_llm_config, mocker):
|
||||||
mocker.patch("embedchain.llm.ollama.OllamaLlm._get_answer", return_value="Test answer")
|
mocker.patch("embedchain.llm.ollama.OllamaLlm._get_answer", return_value="Test answer")
|
||||||
|
|
||||||
@@ -33,6 +34,6 @@ def test_get_answer_mocked_ollama(ollama_llm_config, mocker):
|
|||||||
system=None,
|
system=None,
|
||||||
temperature=0.7,
|
temperature=0.7,
|
||||||
top_p=0.8,
|
top_p=0.8,
|
||||||
callback_manager=mocker.ANY # Use mocker.ANY to ignore the exact instance
|
callback_manager=mocker.ANY, # Use mocker.ANY to ignore the exact instance
|
||||||
)
|
)
|
||||||
mock_instance.assert_called_once_with(prompt)
|
mock_instance.assert_called_once_with(prompt)
|
||||||
|
|||||||
Reference in New Issue
Block a user