From 9fe80c5cca212f415da100d61a2d4a06bb4e7cd7 Mon Sep 17 00:00:00 2001 From: Deven Patel Date: Fri, 22 Dec 2023 19:53:10 +0530 Subject: [PATCH] [App Deployment] create chat with PDF app (#1049) Co-authored-by: Deven Patel Co-authored-by: Deshraj Yadav --- docs/examples/chat-with-PDF.mdx | 32 +++++ docs/mint.json | 1 + embedchain/llm/ollama.py | 4 +- examples/chat-pdf/.streamlit/secrets.toml | 0 examples/chat-pdf/README.md | 30 +++++ examples/chat-pdf/app.py | 150 ++++++++++++++++++++++ examples/chat-pdf/embedchain.json | 3 + examples/chat-pdf/requirements.txt | 2 + tests/llm/test_ollama.py | 3 +- 9 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 docs/examples/chat-with-PDF.mdx create mode 100644 examples/chat-pdf/.streamlit/secrets.toml create mode 100644 examples/chat-pdf/README.md create mode 100644 examples/chat-pdf/app.py create mode 100644 examples/chat-pdf/embedchain.json create mode 100644 examples/chat-pdf/requirements.txt diff --git a/docs/examples/chat-with-PDF.mdx b/docs/examples/chat-with-PDF.mdx new file mode 100644 index 00000000..ad8fb9a5 --- /dev/null +++ b/docs/examples/chat-with-PDF.mdx @@ -0,0 +1,32 @@ +### Embedchain Chat with PDF App + +You can easily create and deploy your own `chat-pdf` App using Embedchain. + +Here are few simple steps for you to create and deploy your app: + +1. Fork the embedchain repo from [Github](https://github.com/embedchain/embedchain). + + +If you run into problems with forking, please refer to [github docs](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) for forking a repo. + + +2. Navigate to `chat-pdf` example app from your forked repo: + +```bash +cd /examples/chat-pdf +``` + +3. Run your app in development environment with simple commands + +```bash +pip install -r requirements.txt +ec dev +``` + +Feel free to improve our simple `chat-pdf` streamlit app and create pull request to showcase your app [here](https://docs.embedchain.ai/examples/showcase) + +4. You can easily deploy your app using Streamlit interface + +Connect your Github account with Streamlit and refer this [guide](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app) to deploy your app. + +You can also use the deploy button from your streamlit website you see when running `ec dev` command. diff --git a/docs/mint.json b/docs/mint.json index 8edb6a3e..9ff2c716 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -151,6 +151,7 @@ { "group": "Examples", "pages": [ + "examples/chat-with-PDF", "examples/notebooks-and-replits", { "group": "REST API Service", diff --git a/embedchain/llm/ollama.py b/embedchain/llm/ollama.py index 459a22b6..c06cd5d9 100644 --- a/embedchain/llm/ollama.py +++ b/embedchain/llm/ollama.py @@ -19,7 +19,7 @@ class OllamaLlm(BaseLlm): def get_llm_model_answer(self, prompt): return self._get_answer(prompt=prompt, config=self.config) - + def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]: callback_manager = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()] @@ -28,7 +28,7 @@ class OllamaLlm(BaseLlm): system=config.system_prompt, temperature=config.temperature, top_p=config.top_p, - callback_manager=CallbackManager(callback_manager) + callback_manager=CallbackManager(callback_manager), ) return llm(prompt) diff --git a/examples/chat-pdf/.streamlit/secrets.toml b/examples/chat-pdf/.streamlit/secrets.toml new file mode 100644 index 00000000..e69de29b diff --git a/examples/chat-pdf/README.md b/examples/chat-pdf/README.md new file mode 100644 index 00000000..b58b0c42 --- /dev/null +++ b/examples/chat-pdf/README.md @@ -0,0 +1,30 @@ +# Embedchain Chat with PDF App + +You can easily create and deploy your own `Chat-with-PDF` App using Embedchain. + +Here are few simple steps for you to create and deploy your app: + +1. Fork the embedchain repo from [Github](https://github.com/embedchain/embedchain). + +If you run into problems with forking, please refer to [github docs](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) for forking a repo. + +2. Navigate to `chat-pdf` example app from your forked repo: + +```bash +cd /examples/chat-pdf +``` + +3. Run your app in development environment with simple commands + +```bash +pip install -r requirements.txt +ec dev +``` + +Feel free to improve our simple `chat-pdf` streamlit app and create pull request to showcase your app [here](https://docs.embedchain.ai/examples/showcase) + +4. You can easily deploy your app using Streamlit interface + +Connect your Github account with Streamlit and refer this [guide](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app) to deploy your app. + +You can also use the deploy button from your streamlit website you see when running `ec dev` command. diff --git a/examples/chat-pdf/app.py b/examples/chat-pdf/app.py new file mode 100644 index 00000000..b30e99e8 --- /dev/null +++ b/examples/chat-pdf/app.py @@ -0,0 +1,150 @@ +import os +import queue +import re +import tempfile +import threading + +import streamlit as st + +from embedchain import Pipeline as App +from embedchain.config import BaseLlmConfig +from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield, + generate) + + +@st.cache_resource +def embedchain_bot(): + return App.from_config( + config={ + "llm": { + "provider": "openai", + "config": { + "model": "gpt-3.5-turbo-1106", + "temperature": 0.5, + "max_tokens": 1000, + "top_p": 1, + "stream": True, + }, + }, + "vectordb": { + "provider": "chroma", + "config": {"collection_name": "chat-pdf", "dir": "db", "allow_reset": True}, + }, + "chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"}, + } + ) + + +@st.cache_data +def update_openai_key(): + os.environ["OPENAI_API_KEY"] = st.session_state.chatbot_api_key + + +with st.sidebar: + openai_access_token = st.text_input( + "OpenAI API Key", value=os.environ.get("OPENAI_API_KEY"), key="chatbot_api_key", type="password" + ) # noqa: E501 + "WE DO NOT STORE YOUR OPENAI KEY." + "Just paste your OpenAI API key here and we'll use it to power the chatbot. [Get your OpenAI API key](https://platform.openai.com/api-keys)" # noqa: E501 + + if openai_access_token: + update_openai_key() + + pdf_files = st.file_uploader("Upload your PDF files", accept_multiple_files=True, type="pdf") + add_pdf_files = st.session_state.get("add_pdf_files", []) + for pdf_file in pdf_files: + file_name = pdf_file.name + if file_name in add_pdf_files: + continue + try: + if not os.environ.get("OPENAI_API_KEY"): + st.error("Please enter your OpenAI API Key") + st.stop() + app = embedchain_bot() + temp_file_name = None + with tempfile.NamedTemporaryFile(mode="wb", delete=False, prefix=file_name, suffix=".pdf") as f: + f.write(pdf_file.getvalue()) + temp_file_name = f.name + if temp_file_name: + st.markdown(f"Adding {file_name} to knowledge base...") + app.add(temp_file_name, data_type="pdf_file") + st.markdown("") + add_pdf_files.append(file_name) + os.remove(temp_file_name) + st.session_state.messages.append({"role": "assistant", "content": f"Added {file_name} to knowledge base!"}) + except Exception as e: + st.error(f"Error adding {file_name} to knowledge base: {e}") + st.stop() + st.session_state["add_pdf_files"] = add_pdf_files + +st.title("📄 Embedchain - Chat with PDF") +styled_caption = '

🚀 An Embedchain app powered by OpenAI!

' # noqa: E501 +st.markdown(styled_caption, unsafe_allow_html=True) + +if "messages" not in st.session_state: + st.session_state.messages = [ + { + "role": "assistant", + "content": """ + Hi! I'm chatbot powered by Embedchain, which can answer questions about your pdf documents.\n + Upload your pdf documents here and I'll answer your questions about them! + """, + } + ] + +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +if prompt := st.chat_input("Ask me anything!"): + if not os.environ.get("OPENAI_API_KEY"): + st.error("Please enter your OpenAI API Key", icon="🤖") + st.stop() + + app = embedchain_bot() + with st.chat_message("user"): + st.session_state.messages.append({"role": "user", "content": prompt}) + st.markdown(prompt) + + with st.chat_message("assistant"): + msg_placeholder = st.empty() + msg_placeholder.markdown("Thinking...") + full_response = "" + + q = queue.Queue() + + def app_response(result): + llm_config = app.llm.config.as_dict() + llm_config["callbacks"] = [StreamingStdOutCallbackHandlerYield(q=q)] + config = BaseLlmConfig(**llm_config) + answer, citations = app.chat(prompt, config=config, citations=True) + result["answer"] = answer + result["citations"] = citations + + results = {} + thread = threading.Thread(target=app_response, args=(results,)) + thread.start() + + for answer_chunk in generate(q): + full_response += answer_chunk + msg_placeholder.markdown(full_response) + + thread.join() + answer, citations = results["answer"], results["citations"] + if citations: + full_response += "\n\n**Sources**:\n" + sources = [] + for i, citation in enumerate(citations): + source = citation[1] + pattern = re.compile(r"([^/]+)\.[^\.]+\.pdf$") + match = pattern.search(source) + if match: + source = match.group(1) + ".pdf" + sources.append(source) + sources = list(set(sources)) + for source in sources: + full_response += f"- {source}\n" + + msg_placeholder.markdown(full_response) + print("Answer: ", answer) + st.session_state.messages.append({"role": "assistant", "content": answer}) diff --git a/examples/chat-pdf/embedchain.json b/examples/chat-pdf/embedchain.json new file mode 100644 index 00000000..32dec293 --- /dev/null +++ b/examples/chat-pdf/embedchain.json @@ -0,0 +1,3 @@ +{ + "provider": "streamlit.io" +} \ No newline at end of file diff --git a/examples/chat-pdf/requirements.txt b/examples/chat-pdf/requirements.txt new file mode 100644 index 00000000..b864076a --- /dev/null +++ b/examples/chat-pdf/requirements.txt @@ -0,0 +1,2 @@ +streamlit==1.29.0 +embedchain diff --git a/tests/llm/test_ollama.py b/tests/llm/test_ollama.py index 394bbd41..34ab8238 100644 --- a/tests/llm/test_ollama.py +++ b/tests/llm/test_ollama.py @@ -9,6 +9,7 @@ def ollama_llm_config(): config = BaseLlmConfig(model="llama2", temperature=0.7, top_p=0.8, stream=True, system_prompt=None) yield config + def test_get_llm_model_answer(ollama_llm_config, mocker): mocker.patch("embedchain.llm.ollama.OllamaLlm._get_answer", return_value="Test answer") @@ -33,6 +34,6 @@ def test_get_answer_mocked_ollama(ollama_llm_config, mocker): system=None, temperature=0.7, top_p=0.8, - callback_manager=mocker.ANY # Use mocker.ANY to ignore the exact instance + callback_manager=mocker.ANY, # Use mocker.ANY to ignore the exact instance ) mock_instance.assert_called_once_with(prompt)