diff --git a/docs/data-sources/gmail.mdx b/docs/data-sources/gmail.mdx index 4c0dbfbf..0ce44b36 100644 --- a/docs/data-sources/gmail.mdx +++ b/docs/data-sources/gmail.mdx @@ -24,12 +24,11 @@ To use this you need to save `credentials.json` in the directory from where you 12. Put the `.json` file in your current directory and rename it to `credentials.json` ```python -import os -from embedchain.apps.app import App -from embedchain.models.data_type import DataType +from embedchain import Pipeline as App + app = App() -query = "to: me label:inbox" -app.add(query, data_type=DataType.GMAIL) +gmail_filter = "to: me label:inbox" +app.add(gmail_filter, data_type="gmail") app.query("Summarize my email conversations") ``` \ No newline at end of file diff --git a/docs/data-sources/json.mdx b/docs/data-sources/json.mdx index d4821638..4008d0f2 100644 --- a/docs/data-sources/json.mdx +++ b/docs/data-sources/json.mdx @@ -2,52 +2,43 @@ title: '📃 JSON' --- -To add any json file, use the data_type as `json`. Headers are included for each line, so if you have an `age` column, `18` will be added as `age: 18`. Eg: +To add any json file, use the data_type as `json`. Headers are included for each line, so for example if you have a json like `{"age": 18}`, then it will be added as `age: 18`. Here are the supported sources for loading `json`: + ``` 1. URL - valid url to json file that ends with ".json" extension. 2. Local file - valid url to local json file that ends with ".json" extension. 3. String - valid json string (e.g. - app.add('{"foo": "bar"}')) ``` -If you would like to add other data structures (e.x. list, dict etc.), do: -```python - import json - a = {"foo": "bar"} - valid_json_string_data = json.dumps(a, indent=0) + +If you would like to add other data structures (e.g. list, dict etc.), convert it to a valid json first using `json.dumps()` function. + - b = [{"foo": "bar"}] - valid_json_string_data = json.dumps(b, indent=0) -``` -Example: -```python -import os +## Example -from embedchain.apps.app import App + -os.environ["OPENAI_API_KEY"] = "openai_api_key" +```python python +from embedchain import Pipeline as App app = App() -response = app.query("What is the net worth of Elon Musk as of October 2023?") +# Add json file +app.add("temp.json") -print(response) -"I'm sorry, but I don't have access to real-time information or future predictions. Therefore, I don't know the net worth of Elon Musk as of October 2023." - -source_id = app.add("temp.json") - -response = app.query("What is the net worth of Elon Musk as of October 2023?") - -print(response) -"As of October 2023, Elon Musk's net worth is $255.2 billion." +app.query("What is the net worth of Elon Musk as of October 2023?") +# As of October 2023, Elon Musk's net worth is $255.2 billion. ``` -temp.json -```json + + +```json temp.json { "question": "What is your net worth, Elon Musk?", "answer": "As of October 2023, Elon Musk's net worth is $255.2 billion, making him one of the wealthiest individuals in the world." } ``` + diff --git a/docs/data-sources/openapi.mdx b/docs/data-sources/openapi.mdx index 41c2b080..d95142d2 100644 --- a/docs/data-sources/openapi.mdx +++ b/docs/data-sources/openapi.mdx @@ -2,13 +2,10 @@ title: 🙌 OpenAPI --- -To add any OpenAPI spec yaml file (currently the json file will be detected as JSON data type), use the data_type as 'openapi'. 'openapi' allows remote urls and conventional file paths. Headers are included for each line, so if you have an `age` column, `18` will be added as `age: 18`. Eg: +To add any OpenAPI spec yaml file (currently the json file will be detected as JSON data type), use the data_type as 'openapi'. 'openapi' allows remote urls and conventional file paths. ```python -from embedchain.apps.app import App -import os - -os.environ["OPENAI_API_KEY"] = "sk-xxx" +from embedchain import Pipeline as App app = App() @@ -16,8 +13,10 @@ app.add("https://github.com/openai/openai-openapi/blob/master/openapi.yaml", dat # Or add using the local file path # app.add("configs/openai_openapi.yaml", data_type="openapi") -response = app.query("What can OpenAI API endpoint do? Can you list the things it can learn from?") +app.query("What can OpenAI API endpoint do? Can you list the things it can learn from?") # Answer: The OpenAI API endpoint allows users to interact with OpenAI's models and perform various tasks such as generating text, answering questions, summarizing documents, translating languages, and more. The specific capabilities and tasks that the API can learn from may vary depending on the models and features provided by OpenAI. For more detailed information, it is recommended to refer to the OpenAI API documentation at https://platform.openai.com/docs/api-reference. ``` -NOTE: The yaml file added to the App must have the required OpenAPI fields otherwise the adding OpenAPI spec will fail. Please refer to [OpenAPI Spec Doc](https://spec.openapis.org/oas/v3.1.0) \ No newline at end of file + +The yaml file added to the App must have the required OpenAPI fields otherwise the adding OpenAPI spec will fail. Please refer to [OpenAPI Spec Doc](https://spec.openapis.org/oas/v3.1.0) + \ No newline at end of file diff --git a/docs/get-started/openai-assistant.mdx b/docs/get-started/openai-assistant.mdx index f227341b..66c60129 100644 --- a/docs/get-started/openai-assistant.mdx +++ b/docs/get-started/openai-assistant.mdx @@ -8,7 +8,7 @@ Embedchain now supports [OpenAI Assistants API](https://platform.openai.com/docs At a high level, an integration of the Assistants API has the following flow: -1. Create an Assistant in the API by defining it custom instructions and picking a model +1. Create an Assistant in the API by defining custom instructions and picking a model 2. Create a Thread when a user starts a conversation 3. Add Messages to the Thread as the user ask questions 4. Run the Assistant on the Thread to trigger responses. This automatically calls the relevant tools. @@ -19,7 +19,7 @@ Creating an OpenAI Assistant using Embedchain is very simple 3 step process. Make sure that you have `OPENAI_API_KEY` set in the environment variable. -```python +```python Initialize from embedchain.store.assistants import OpenAIAssistant assistant = OpenAIAssistant( @@ -28,10 +28,28 @@ assistant = OpenAIAssistant( ) ``` +If you want to use the existing assistant, you can do something like this: + +```python Initialize +# Load an assistant and create a new thread +assistant = OpenAIAssistant(assistant_id="asst_xxx") + +# Load a specific thread for an assistant +assistant = OpenAIAssistant(assistant_id="asst_xxx", thread_id="thread_xxx") +``` + ### Arguments - - Load existing OpenAI Assistant. If you pass this, you don't have to pass other arguments + + Name for your AI assistant + + + + how the Assistant and model should behave or respond + + + + Load existing OpenAI Assistant. If you pass this, you don't have to pass other arguments. @@ -53,14 +71,14 @@ assistant = OpenAIAssistant( ## Step-2: Add data to thread You can add any custom data source that is supported by Embedchain. Else, you can directly pass the file path on your local system and Embedchain propagates it to OpenAI Assistant. -```python +```python Add data assistant.add("/path/to/file.pdf") -assistant.add("https://www.youtube.com/watch?v=U9mJuUkhUzk", data_type="youtube_video") +assistant.add("https://www.youtube.com/watch?v=U9mJuUkhUzk") assistant.add("https://openai.com/blog/new-models-and-developer-products-announced-at-devday") ``` ## Step-3: Chat with your Assistant -```python +```python Chat assistant.chat("How much OpenAI credits were offered to attendees during OpenAI DevDay?") # Response: 'Every attendee of OpenAI DevDay 2023 was offered $500 in OpenAI credits.' ``` diff --git a/embedchain/store/assistants.py b/embedchain/store/assistants.py index f5d09625..0a396a37 100644 --- a/embedchain/store/assistants.py +++ b/embedchain/store/assistants.py @@ -1,5 +1,6 @@ import logging import os +import re import tempfile import time from pathlib import Path @@ -70,9 +71,9 @@ class OpenAIAssistant: if Path(source).is_file(): return source data_type = data_type or detect_datatype(source) - formatter = DataFormatter(data_type=DataType(data_type), config=AddConfig()) + formatter = DataFormatter(data_type=DataType(data_type), config=AddConfig(), kwargs={}) data = formatter.loader.load_data(source)["data"] - return self._save_temp_data(data[0]["content"].encode()) + return self._save_temp_data(data=data[0]["content"].encode(), source=source) def _add_file_to_assistant(self, file_path): file_obj = self._client.files.create(file=open(file_path, "rb"), purpose="assistants") @@ -117,9 +118,11 @@ class OpenAIAssistant: content = [c.text.value for c in thread_message.content if isinstance(c, MessageContentText)] return " ".join(content) - def _save_temp_data(self, data): + def _save_temp_data(self, data, source): + special_chars_pattern = r'[\\/:*?"<>|&=% ]+' + sanitized_source = re.sub(special_chars_pattern, "_", source)[:256] temp_dir = tempfile.mkdtemp() - file_path = os.path.join(temp_dir, "temp_data") + file_path = os.path.join(temp_dir, sanitized_source) with open(file_path, "wb") as file: file.write(data) return file_path diff --git a/pyproject.toml b/pyproject.toml index 48d9ad4f..ef06f931 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.1.0" +version = "0.1.1" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" authors = [ "Taranjeet Singh ",