[Refactor] Converge Pipeline and App classes (#1021)

Co-authored-by: Deven Patel <deven298@yahoo.com>
2023-12-29 16:52:41 +05:30
parent c0aafd38c9
commit a926bcc640
91 changed files with 646 additions and 875 deletions
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ For example, you can create an Elon Musk bot using the following code:
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 # Create a bot instance
 os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
--- a/configs/chroma.yaml
+++ b/configs/chroma.yaml
@@ -1,7 +1,6 @@
 app:
  config:
    id: 'my-app'
    collection_name: 'my-app'
 llm:
  provider: openai
--- a/configs/opensource.yaml
+++ b/configs/opensource.yaml
@@ -1,7 +1,6 @@
 app:
  config:
    id: 'open-source-app'
    collection_name: 'open-source-app'
    collect_metrics: false
 llm:
--- a/docs/api-reference/pipeline/add.mdx
+++ b/docs/api-reference/pipeline/add.mdx
@@ -21,7 +21,7 @@ title: '📊 add'
 ### Load data from webpage
 ```python Code example
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("https://www.forbes.com/profile/elon-musk")
@@ -32,7 +32,7 @@ app.add("https://www.forbes.com/profile/elon-musk")
 ### Load data from sitemap
 ```python Code example
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("https://python.langchain.com/sitemap.xml", data_type="sitemap")
--- a/docs/api-reference/pipeline/chat.mdx
+++ b/docs/api-reference/pipeline/chat.mdx
@@ -36,7 +36,7 @@ title: '💬 chat'
 If you want to get the answer to question and return both answer and citations, use the following code snippet:
 ```python With Citations
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
@@ -79,7 +79,7 @@ When `citations=True`, note that the returned `sources` are a list of tuples whe
 If you just want to return answers and don't want to return citations, you can use the following example:
 ```python Without Citations
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
--- a/docs/api-reference/pipeline/delete.mdx
+++ b/docs/api-reference/pipeline/delete.mdx
@@ -7,7 +7,7 @@ title: 🗑 delete
 ## Usage
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/api-reference/pipeline/deploy.mdx
+++ b/docs/api-reference/pipeline/deploy.mdx
@@ -9,7 +9,7 @@ The `deploy()` method not only deploys your pipeline but also efficiently manage
 ## Usage
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
--- a/docs/api-reference/pipeline/overview.mdx
+++ b/docs/api-reference/pipeline/overview.mdx
@@ -41,7 +41,7 @@ You can create an embedchain pipeline instance using the following methods:
 ### Default setting
 ```python Code Example
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 ```
@@ -49,7 +49,7 @@ app = App()
 ### Python Dict
 ```python Code Example
-from embedchain import Pipeline as App
+from embedchain import App
 config_dict = {
  'llm': {
@@ -76,7 +76,7 @@ app = App.from_config(config=config_dict)
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load llm configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
@@ -103,7 +103,7 @@ embedder:
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load llm configuration from config.json file
 app = App.from_config(config_path="config.json")
--- a/docs/api-reference/pipeline/query.mdx
+++ b/docs/api-reference/pipeline/query.mdx
@@ -36,7 +36,7 @@ title: '❓ query'
 If you want to get the answer to question and return both answer and citations, use the following code snippet:
 ```python With Citations
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
@@ -78,7 +78,7 @@ When `citations=True`, note that the returned `sources` are a list of tuples whe
 If you just want to return answers and don't want to return citations, you can use the following example:
 ```python Without Citations
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
--- a/docs/api-reference/pipeline/reset.mdx
+++ b/docs/api-reference/pipeline/reset.mdx
@@ -7,7 +7,7 @@ title: 🔄 reset
 ## Usage
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("https://www.forbes.com/profile/elon-musk")
--- a/docs/api-reference/pipeline/search.mdx
+++ b/docs/api-reference/pipeline/search.mdx
@@ -24,7 +24,7 @@ title: '🔍 search'
 Refer to the following example on how to use the search api:
 ```python Code example
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
--- a/docs/components/data-sources/beehiiv.mdx
+++ b/docs/components/data-sources/beehiiv.mdx
@@ -5,7 +5,7 @@ title: "🐝 Beehiiv"
 To add any Beehiiv data sources to your app, just add the base url as the source and set the data_type to `beehiiv`.
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/csv.mdx
+++ b/docs/components/data-sources/csv.mdx
@@ -5,7 +5,7 @@ title: '📊 CSV'
 To add any csv file, use the data_type as `csv`. `csv` allows remote urls and conventional file paths. Headers are included for each line, so if you have an `age` column, `18` will be added as `age: 18`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add('https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv', data_type="csv")
--- a/docs/components/data-sources/custom.mdx
+++ b/docs/components/data-sources/custom.mdx
@@ -5,7 +5,7 @@ title: '⚙️ Custom'
 When we say "custom", we mean that you can customize the loader and chunker to your needs. This is done by passing a custom loader and chunker to the `add` method.
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 import your_loader
 import your_chunker
@@ -27,7 +27,7 @@ app.add("source", data_type="custom", loader=loader, chunker=chunker)
 Example:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.loaders.github import GithubLoader
 app = App()
--- a/docs/components/data-sources/data-type-handling.mdx
+++ b/docs/components/data-sources/data-type-handling.mdx
@@ -35,7 +35,7 @@ Default behavior is to create a persistent vector db in the directory **./db**.
 Create a local index:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 naval_chat_bot = App()
 naval_chat_bot.add("https://www.youtube.com/watch?v=3qHkcs3kG44")
@@ -45,7 +45,7 @@ naval_chat_bot.add("https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Alma
 You can reuse the local index with the same code, but without adding new documents:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 naval_chat_bot = App()
 print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
@@ -56,7 +56,7 @@ print(naval_chat_bot.query("What unique capacity does Naval argue humans possess
 You can reset the app by simply calling the `reset` method. This will delete the vector database and all other app related files.
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("https://www.youtube.com/watch?v=3qHkcs3kG44")
--- a/docs/components/data-sources/directory.mdx
+++ b/docs/components/data-sources/directory.mdx
@@ -8,7 +8,7 @@ To use an entire directory as data source, just add `data_type` as `directory` a
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["OPENAI_API_KEY"] = "sk-xxx"
@@ -23,7 +23,7 @@ print(response)
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.loaders.directory_loader import DirectoryLoader
 os.environ["OPENAI_API_KEY"] = "sk-xxx"
--- a/docs/components/data-sources/discord.mdx
+++ b/docs/components/data-sources/discord.mdx
@@ -12,7 +12,7 @@ To add any Discord channel messages to your app, just add the `channel_id` as th
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 # add your discord "BOT" token
 os.environ["DISCORD_TOKEN"] = "xxx"
--- a/docs/components/data-sources/docs-site.mdx
+++ b/docs/components/data-sources/docs-site.mdx
@@ -5,7 +5,7 @@ title: '📚 Code documentation'
 To add any code documentation website as a loader, use the data_type as `docs_site`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("https://docs.embedchain.ai/", data_type="docs_site")
--- a/docs/components/data-sources/docx.mdx
+++ b/docs/components/data-sources/docx.mdx
@@ -7,7 +7,7 @@ title: '📄 Docx file'
 To add any doc/docx file, use the data_type as `docx`. `docx` allows remote urls and conventional file paths. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add('https://example.com/content/intro.docx', data_type="docx")
--- a/docs/components/data-sources/gmail.mdx
+++ b/docs/components/data-sources/gmail.mdx
@@ -24,7 +24,7 @@ To use this you need to save `credentials.json` in the directory from where you
 12. Put the `.json` file in your current directory and rename it to `credentials.json`
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/json.mdx
+++ b/docs/components/data-sources/json.mdx
@@ -21,7 +21,7 @@ If you would like to add other data structures (e.g. list, dict etc.), convert i
 <CodeGroup>
 ```python python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/mdx.mdx
+++ b/docs/components/data-sources/mdx.mdx
@@ -5,7 +5,7 @@ title: '📝 Mdx file'
 To add any `.mdx` file to your app, use the data_type (first argument to `.add()` method) as `mdx`. Note that this supports support mdx file present on machine, so this should be a file path. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add('path/to/file.mdx', data_type='mdx')
--- a/docs/components/data-sources/notion.mdx
+++ b/docs/components/data-sources/notion.mdx
@@ -8,7 +8,7 @@ To load a notion page, use the data_type as `notion`. Since it is hard to automa
 The next argument must **end** with the `notion page id`. The id is a 32-character string. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/openapi.mdx
+++ b/docs/components/data-sources/openapi.mdx
@@ -5,7 +5,7 @@ title: 🙌 OpenAPI
 To add any OpenAPI spec yaml file (currently the json file will be detected as JSON data type), use the data_type as 'openapi'. 'openapi' allows remote urls and conventional file paths.
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/pdf-file.mdx
+++ b/docs/components/data-sources/pdf-file.mdx
@@ -5,7 +5,7 @@ title: '📰 PDF file'
 To add any pdf file, use the data_type as `pdf_file`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/qna.mdx
+++ b/docs/components/data-sources/qna.mdx
@@ -5,7 +5,7 @@ title: '❓💬 Queston and answer pair'
 QnA pair is a local data type. To supply your own QnA pair, use the data_type as `qna_pair` and enter a tuple. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/sitemap.mdx
+++ b/docs/components/data-sources/sitemap.mdx
@@ -5,7 +5,7 @@ title: '🗺️ Sitemap'
 Add all web pages from an xml-sitemap. Filters non-text files. Use the data_type as `sitemap`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/slack.mdx
+++ b/docs/components/data-sources/slack.mdx
@@ -16,7 +16,7 @@ This will automatically retrieve data from the workspace associated with the use
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["SLACK_USER_TOKEN"] = "xoxp-xxx"
 app = App()
--- a/docs/components/data-sources/substack.mdx
+++ b/docs/components/data-sources/substack.mdx
@@ -5,7 +5,7 @@ title: "📝 Substack"
 To add any Substack data sources to your app, just add the main base url as the source and set the data_type to `substack`.
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/text.mdx
+++ b/docs/components/data-sources/text.mdx
@@ -7,7 +7,7 @@ title: '📝 Text'
 Text is a local data type. To supply your own text, use the data_type as `text` and enter a string. The text is not processed, this can be very versatile. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/web-page.mdx
+++ b/docs/components/data-sources/web-page.mdx
@@ -5,7 +5,7 @@ title: '🌐 Web page'
 To add any web page, use the data_type as `web_page`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/xml.mdx
+++ b/docs/components/data-sources/xml.mdx
@@ -7,7 +7,7 @@ title: '🧾 XML file'
 To add any xml file, use the data_type as `xml`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
--- a/docs/components/data-sources/youtube-channel.mdx
+++ b/docs/components/data-sources/youtube-channel.mdx
@@ -13,7 +13,7 @@ pip install -u "embedchain[youtube]"
 </Note>
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("@channel_name", data_type="youtube_channel")
--- a/docs/components/data-sources/youtube-video.mdx
+++ b/docs/components/data-sources/youtube-video.mdx
@@ -5,7 +5,7 @@ title: '📺 Youtube'
 To add any youtube video to your app, use the data_type as `youtube_video`. Eg:
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add('a_valid_youtube_url_here', data_type='youtube_video')
--- a/docs/components/embedding-models.mdx
+++ b/docs/components/embedding-models.mdx
@@ -25,7 +25,7 @@ Once you have obtained the key, you can use it like this:
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['OPENAI_API_KEY'] = 'xxx'
@@ -52,7 +52,7 @@ To use Google AI embedding function, you have to set the `GOOGLE_API_KEY` enviro
 <CodeGroup>
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["GOOGLE_API_KEY"] = "xxx"
@@ -81,7 +81,7 @@ To use Azure OpenAI embedding model, you have to set some of the azure openai re
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["AZURE_OPENAI_ENDPOINT"] = "https://xxx.openai.azure.com/"
@@ -119,7 +119,7 @@ GPT4All supports generating high quality embeddings of arbitrary length document
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load embedding model configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
@@ -148,7 +148,7 @@ Hugging Face supports generating embeddings of arbitrary length documents of tex
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load embedding model configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
@@ -179,7 +179,7 @@ Embedchain supports Google's VertexAI embeddings model through a simple interfac
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load embedding model configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
--- a/docs/components/llms.mdx
+++ b/docs/components/llms.mdx
@@ -29,7 +29,7 @@ Once you have obtained the key, you can use it like this:
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['OPENAI_API_KEY'] = 'xxx'
@@ -44,7 +44,7 @@ If you are looking to configure the different parameters of the LLM, you can do
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['OPENAI_API_KEY'] = 'xxx'
@@ -71,7 +71,7 @@ Examples:
 <Accordion title="Using Pydantic Models">
  ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.llm.openai import OpenAILlm
 import requests
 from pydantic import BaseModel, Field, ValidationError, field_validator
@@ -123,7 +123,7 @@ print(result)
  <Accordion title="Using OpenAI JSON schema">
 ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.llm.openai import OpenAILlm
 import requests
 from pydantic import BaseModel, Field, ValidationError, field_validator
@@ -158,7 +158,7 @@ print(result)
  <Accordion title="Using actual python functions">
  ```python
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.llm.openai import OpenAILlm
 import requests
 from pydantic import BaseModel, Field, ValidationError, field_validator
@@ -192,7 +192,7 @@ To use Google AI model, you have to set the `GOOGLE_API_KEY` environment variabl
 <CodeGroup>
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["GOOGLE_API_KEY"] = "xxx"
@@ -235,7 +235,7 @@ To use Azure OpenAI model, you have to set some of the azure openai related envi
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
@@ -274,7 +274,7 @@ To use anthropic's model, please set the `ANTHROPIC_API_KEY` which you find on t
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["ANTHROPIC_API_KEY"] = "xxx"
@@ -311,7 +311,7 @@ Once you have the API key, you are all set to use it with Embedchain.
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["COHERE_API_KEY"] = "xxx"
@@ -347,7 +347,7 @@ Once you have the API key, you are all set to use it with Embedchain.
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["TOGETHER_API_KEY"] = "xxx"
@@ -375,7 +375,7 @@ Setup Ollama using https://github.com/jmorganca/ollama
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 # load llm configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
@@ -406,7 +406,7 @@ GPT4all is a free-to-use, locally running, privacy-aware chatbot. No GPU or inte
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load llm configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
@@ -438,7 +438,7 @@ Once you have the key, load the app using the config yaml file:
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["JINACHAT_API_KEY"] = "xxx"
 # load llm configuration from config.yaml file
@@ -474,7 +474,7 @@ Once you have the token, load the app using the config yaml file:
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
@@ -504,7 +504,7 @@ Once you have the token, load the app using the config yaml file:
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["REPLICATE_API_TOKEN"] = "xxx"
@@ -531,7 +531,7 @@ Setup Google Cloud Platform application credentials by following the instruction
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load llm configuration from config.yaml file
 app = App.from_config(config_path="config.yaml")
--- a/docs/components/vector-databases.mdx
+++ b/docs/components/vector-databases.mdx
@@ -22,7 +22,7 @@ Utilizing a vector database alongside Embedchain is a seamless process. All you
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load chroma configuration from yaml file
 app = App.from_config(config_path="config1.yaml")
@@ -67,7 +67,7 @@ You can authorize the connection to Elasticsearch by providing either `basic_aut
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load elasticsearch configuration from yaml file
 app = App.from_config(config_path="config.yaml")
@@ -97,7 +97,7 @@ pip install --upgrade 'embedchain[opensearch]'
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load opensearch configuration from yaml file
 app = App.from_config(config_path="config.yaml")
@@ -133,7 +133,7 @@ Set the Zilliz environment variables `ZILLIZ_CLOUD_URI` and `ZILLIZ_CLOUD_TOKEN`
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['ZILLIZ_CLOUD_URI'] = 'https://xxx.zillizcloud.com'
 os.environ['ZILLIZ_CLOUD_TOKEN'] = 'xxx'
@@ -172,7 +172,7 @@ In order to use Pinecone as vector database, set the environment variables `PINE
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load pinecone configuration from yaml file
 app = App.from_config(config_path="config.yaml")
@@ -195,7 +195,7 @@ In order to use Qdrant as a vector database, set the environment variables `QDRA
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load qdrant configuration from yaml file
 app = App.from_config(config_path="config.yaml")
@@ -215,7 +215,7 @@ In order to use Weaviate as a vector database, set the environment variables `WE
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load weaviate configuration from yaml file
 app = App.from_config(config_path="config.yaml")
--- a/docs/deployment/embedchain_ai.mdx
+++ b/docs/deployment/embedchain_ai.mdx
@@ -10,7 +10,7 @@ Embedchain enables developers to deploy their LLM-powered apps in production usi
 See the example below on how to use the deploy your app (for free):
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 # Initialize app
 app = App()
--- a/docs/get-started/faq.mdx
+++ b/docs/get-started/faq.mdx
@@ -11,7 +11,7 @@ Use the model provided on huggingface: `mistralai/Mistral-7B-v0.1`
 <CodeGroup>
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "hf_your_token"
@@ -40,7 +40,7 @@ Use the model `gpt-4-turbo` provided my openai.
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['OPENAI_API_KEY'] = 'xxx'
@@ -65,7 +65,7 @@ llm:
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['OPENAI_API_KEY'] = 'xxx'
@@ -90,7 +90,7 @@ llm:
 <CodeGroup>
 ```python main.py
-from embedchain import Pipeline as App
+from embedchain import App
 # load llm configuration from opensource.yaml file
 app = App.from_config(config_path="opensource.yaml")
@@ -131,7 +131,7 @@ llm:
 ```python main.py
 import os
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ['OPENAI_API_KEY'] = 'sk-xxx'
@@ -149,7 +149,7 @@ response = app.query("What is the net worth of Elon Musk?")
  Set up the app by adding an `id` in the config file. This keeps the data for future use. You can include this `id` in the yaml config or input it directly in `config` dict.
  ```python app1.py
  import os
-  from embedchain import Pipeline as App
+  from embedchain import App
  os.environ['OPENAI_API_KEY'] = 'sk-xxx'
@@ -167,7 +167,7 @@ response = app.query("What is the net worth of Elon Musk?")
  ```
  ```python app2.py
  import os
-  from embedchain import Pipeline as App
+  from embedchain import App
  os.environ['OPENAI_API_KEY'] = 'sk-xxx'
--- a/docs/get-started/quickstart.mdx
+++ b/docs/get-started/quickstart.mdx
@@ -14,7 +14,7 @@ Creating an app involves 3 steps:
 <Steps>
  <Step title="⚙️ Import app instance">
    ```python
-    from embedchain import Pipeline as App
+    from embedchain import App
    app = App()
    ```
    <Accordion title="Customize your app by a simple YAML config" icon="gear-complex">
@@ -22,15 +22,15 @@ Creating an app involves 3 steps:
      Explore the custom configurations [here](https://docs.embedchain.ai/advanced/configuration).
      <CodeGroup>
      ```python yaml_app.py
-      from embedchain import Pipeline as App
+      from embedchain import App
      app = App.from_config(config_path="config.yaml")
      ```
      ```python json_app.py
-      from embedchain import Pipeline as App
+      from embedchain import App
      app = App.from_config(config_path="config.json")
      ```
      ```python app.py
-      from embedchain import Pipeline as App
+      from embedchain import App
      config = {} # Add your config here
      app = App.from_config(config=config)
      ```
--- a/docs/integration/chainlit.mdx
+++ b/docs/integration/chainlit.mdx
@@ -21,7 +21,7 @@ Create a new file called `app.py` and add the following code:
 ```python
 import chainlit as cl
-from embedchain import Pipeline as App
+from embedchain import App
 import os
--- a/docs/integration/langsmith.mdx
+++ b/docs/integration/langsmith.mdx
@@ -39,7 +39,7 @@ os.environ['LANGCHAIN_PROJECT] = <your-project>
 ```python
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 app.add("https://en.wikipedia.org/wiki/Elon_Musk")
--- a/docs/integration/streamlit-mistral.mdx
+++ b/docs/integration/streamlit-mistral.mdx
@@ -17,7 +17,7 @@ pip install embedchain streamlit
    <Tab title="app.py">
    ```python
    import os
-    from embedchain import Pipeline as App
+    from embedchain import App
    import streamlit as st
    with st.sidebar:
--- a/docs/use-cases/question-answering.mdx
+++ b/docs/use-cases/question-answering.mdx
@@ -24,7 +24,7 @@ Quickly create a RAG pipeline to answer queries about the [Next.JS Framework](ht
 First, let's create your RAG pipeline. Open your Python environment and enter:
 ```python Create pipeline
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 ```
--- a/docs/use-cases/semantic-search.mdx
+++ b/docs/use-cases/semantic-search.mdx
@@ -19,7 +19,7 @@ Embedchain offers a simple yet customizable `search()` API that you can use for
 First, let's create your RAG pipeline. Open your Python environment and enter:
 ```python Create pipeline
-from embedchain import Pipeline as App
+from embedchain import App
 app = App()
 ```
--- a/embedchain/init.py
+++ b/embedchain/init.py
@@ -2,10 +2,9 @@ import importlib.metadata
 __version__ = importlib.metadata.version(__package__ or __name__)
-from embedchain.apps.app import App  # noqa: F401
+from embedchain.app import App  # noqa: F401
 from embedchain.client import Client  # noqa: F401
 from embedchain.pipeline import Pipeline  # noqa: F401
 from embedchain.vectordb.chroma import ChromaDB  # noqa: F401
 # Setup the user directory if doesn't exist already
 Client.setup_dir()
--- a/embedchain/app.py
+++ b/embedchain/app.py
@@ -0,0 +1,431 @@
 import ast
 import json
 import logging
 import os
 import sqlite3
 import uuid
 from typing import Any, Dict, Optional
 import requests
 import yaml
 from embedchain.client import Client
 from embedchain.config import AppConfig, ChunkerConfig
 from embedchain.constants import SQLITE_PATH
 from embedchain.embedchain import EmbedChain
 from embedchain.embedder.base import BaseEmbedder
 from embedchain.embedder.openai import OpenAIEmbedder
 from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.llm.base import BaseLlm
 from embedchain.llm.openai import OpenAILlm
 from embedchain.telemetry.posthog import AnonymousTelemetry
 from embedchain.utils import validate_config
 from embedchain.vectordb.base import BaseVectorDB
 from embedchain.vectordb.chroma import ChromaDB
 # Setup the user directory if doesn't exist already
 Client.setup_dir()
@register_deserializable
 class App(EmbedChain):
    """
    EmbedChain App lets you create a LLM powered app for your unstructured
    data by defining your chosen data source, embedding model,
    and vector database.
    """
    def __init__(
        self,
        id: str = None,
        name: str = None,
        config: AppConfig = None,
        db: BaseVectorDB = None,
        embedding_model: BaseEmbedder = None,
        llm: BaseLlm = None,
        config_data: dict = None,
        log_level=logging.WARN,
        auto_deploy: bool = False,
        chunker: ChunkerConfig = None,
    ):
        """
        Initialize a new `App` instance.
        :param config: Configuration for the pipeline, defaults to None
        :type config: AppConfig, optional
        :param db: The database to use for storing and retrieving embeddings, defaults to None
        :type db: BaseVectorDB, optional
        :param embedding_model: The embedding model used to calculate embeddings, defaults to None
        :type embedding_model: BaseEmbedder, optional
        :param llm: The LLM model used to calculate embeddings, defaults to None
        :type llm: BaseLlm, optional
        :param config_data: Config dictionary, defaults to None
        :type config_data: dict, optional
        :param log_level: Log level to use, defaults to logging.WARN
        :type log_level: int, optional
        :param auto_deploy: Whether to deploy the pipeline automatically, defaults to False
        :type auto_deploy: bool, optional
        :raises Exception: If an error occurs while creating the pipeline
        """
        if id and config_data:
            raise Exception("Cannot provide both id and config. Please provide only one of them.")
        if id and name:
            raise Exception("Cannot provide both id and name. Please provide only one of them.")
        if name and config:
            raise Exception("Cannot provide both name and config. Please provide only one of them.")
        logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
        self.logger = logging.getLogger(__name__)
        self.auto_deploy = auto_deploy
        # Store the dict config as an attribute to be able to send it
        self.config_data = config_data if (config_data and validate_config(config_data)) else None
        self.client = None
        # pipeline_id from the backend
        self.id = None
        self.chunker = None
        if chunker:
            self.chunker = ChunkerConfig(**chunker)
        self.config = config or AppConfig()
        self.name = self.config.name
        self.config.id = self.local_id = str(uuid.uuid4()) if self.config.id is None else self.config.id
        if id is not None:
            # Init client first since user is trying to fetch the pipeline
            # details from the platform
            self._init_client()
            pipeline_details = self._get_pipeline(id)
            self.config.id = self.local_id = pipeline_details["metadata"]["local_id"]
            self.id = id
        if name is not None:
            self.name = name
        self.embedding_model = embedding_model or OpenAIEmbedder()
        self.db = db or ChromaDB()
        self.llm = llm or OpenAILlm()
        self._init_db()
        # Send anonymous telemetry
        self._telemetry_props = {"class": self.__class__.__name__}
        self.telemetry = AnonymousTelemetry(enabled=self.config.collect_metrics)
        # Establish a connection to the SQLite database
        self.connection = sqlite3.connect(SQLITE_PATH, check_same_thread=False)
        self.cursor = self.connection.cursor()
        # Create the 'data_sources' table if it doesn't exist
        self.cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS data_sources (
                pipeline_id TEXT,
                hash TEXT,
                type TEXT,
                value TEXT,
                metadata TEXT,
                is_uploaded INTEGER DEFAULT 0,
                PRIMARY KEY (pipeline_id, hash)
            )
        """
        )
        self.connection.commit()
        # Send anonymous telemetry
        self.telemetry.capture(event_name="init", properties=self._telemetry_props)
        self.user_asks = []
        if self.auto_deploy:
            self.deploy()
    def _init_db(self):
        """
        Initialize the database.
        """
        self.db._set_embedder(self.embedding_model)
        self.db._initialize()
        self.db.set_collection_name(self.db.config.collection_name)
    def _init_client(self):
        """
        Initialize the client.
        """
        config = Client.load_config()
        if config.get("api_key"):
            self.client = Client()
        else:
            api_key = input(
                "🔑 Enter your Embedchain API key. You can find the API key at https://app.embedchain.ai/settings/keys/ \n"  # noqa: E501
            )
            self.client = Client(api_key=api_key)
    def _get_pipeline(self, id):
        """
        Get existing pipeline
        """
        print("🛠️ Fetching pipeline details from the platform...")
        url = f"{self.client.host}/api/v1/pipelines/{id}/cli/"
        r = requests.get(
            url,
            headers={"Authorization": f"Token {self.client.api_key}"},
        )
        if r.status_code == 404:
            raise Exception(f"❌ Pipeline with id {id} not found!")
        print(
            f"🎉 Pipeline loaded successfully! Pipeline url: https://app.embedchain.ai/pipelines/{r.json()['id']}\n"  # noqa: E501
        )
        return r.json()
    def _create_pipeline(self):
        """
        Create a pipeline on the platform.
        """
        print("🛠️ Creating pipeline on the platform...")
        # self.config_data is a dict. Pass it inside the key 'yaml_config' to the backend
        payload = {
            "yaml_config": json.dumps(self.config_data),
            "name": self.name,
            "local_id": self.local_id,
        }
        url = f"{self.client.host}/api/v1/pipelines/cli/create/"
        r = requests.post(
            url,
            json=payload,
            headers={"Authorization": f"Token {self.client.api_key}"},
        )
        if r.status_code not in [200, 201]:
            raise Exception(f"❌ Error occurred while creating pipeline. API response: {r.text}")
        if r.status_code == 200:
            print(
                f"🎉🎉🎉 Existing pipeline found! View your pipeline: https://app.embedchain.ai/pipelines/{r.json()['id']}\n"  # noqa: E501
            )  # noqa: E501
        elif r.status_code == 201:
            print(
                f"🎉🎉🎉 Pipeline created successfully! View your pipeline: https://app.embedchain.ai/pipelines/{r.json()['id']}\n"  # noqa: E501
            )
        return r.json()
    def _get_presigned_url(self, data_type, data_value):
        payload = {"data_type": data_type, "data_value": data_value}
        r = requests.post(
            f"{self.client.host}/api/v1/pipelines/{self.id}/cli/presigned_url/",
            json=payload,
            headers={"Authorization": f"Token {self.client.api_key}"},
        )
        r.raise_for_status()
        return r.json()
    def search(self, query, num_documents=3):
        """
        Search for similar documents related to the query in the vector database.
        """
        # Send anonymous telemetry
        self.telemetry.capture(event_name="search", properties=self._telemetry_props)
        # TODO: Search will call the endpoint rather than fetching the data from the db itself when deploy=True.
        if self.id is None:
            where = {"app_id": self.local_id}
            context = self.db.query(
                query,
                n_results=num_documents,
                where=where,
                skip_embedding=False,
                citations=True,
            )
            result = []
            for c in context:
                result.append(
                    {
                        "context": c[0],
                        "source": c[1],
                        "document_id": c[2],
                    }
                )
            return result
        else:
            # Make API call to the backend to get the results
            NotImplementedError("Search is not implemented yet for the prod mode.")
    def _upload_file_to_presigned_url(self, presigned_url, file_path):
        try:
            with open(file_path, "rb") as file:
                response = requests.put(presigned_url, data=file)
                response.raise_for_status()
                return response.status_code == 200
        except Exception as e:
            self.logger.exception(f"Error occurred during file upload: {str(e)}")
            print("❌ Error occurred during file upload!")
            return False
    def _upload_data_to_pipeline(self, data_type, data_value, metadata=None):
        payload = {
            "data_type": data_type,
            "data_value": data_value,
            "metadata": metadata,
        }
        try:
            self._send_api_request(f"/api/v1/pipelines/{self.id}/cli/add/", payload)
            # print the local file path if user tries to upload a local file
            printed_value = metadata.get("file_path") if metadata.get("file_path") else data_value
            print(f"✅ Data of type: {data_type}, value: {printed_value} added successfully.")
        except Exception as e:
            print(f"❌ Error occurred during data upload for type {data_type}!. Error: {str(e)}")
    def _send_api_request(self, endpoint, payload):
        url = f"{self.client.host}{endpoint}"
        headers = {"Authorization": f"Token {self.client.api_key}"}
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()
        return response
    def _process_and_upload_data(self, data_hash, data_type, data_value):
        if os.path.isabs(data_value):
            presigned_url_data = self._get_presigned_url(data_type, data_value)
            presigned_url = presigned_url_data["presigned_url"]
            s3_key = presigned_url_data["s3_key"]
            if self._upload_file_to_presigned_url(presigned_url, file_path=data_value):
                metadata = {"file_path": data_value, "s3_key": s3_key}
                data_value = presigned_url
            else:
                self.logger.error(f"File upload failed for hash: {data_hash}")
                return False
        else:
            if data_type == "qna_pair":
                data_value = list(ast.literal_eval(data_value))
            metadata = {}
        try:
            self._upload_data_to_pipeline(data_type, data_value, metadata)
            self._mark_data_as_uploaded(data_hash)
            return True
        except Exception:
            print(f"❌ Error occurred during data upload for hash {data_hash}!")
            return False
    def _mark_data_as_uploaded(self, data_hash):
        self.cursor.execute(
            "UPDATE data_sources SET is_uploaded = 1 WHERE hash = ? AND pipeline_id = ?",
            (data_hash, self.local_id),
        )
        self.connection.commit()
    def get_data_sources(self):
        db_data = self.cursor.execute("SELECT * FROM data_sources WHERE pipeline_id = ?", (self.local_id,)).fetchall()
        data_sources = []
        for data in db_data:
            data_sources.append({"data_type": data[2], "data_value": data[3], "metadata": data[4]})
        return data_sources
    def deploy(self):
        if self.client is None:
            self._init_client()
        pipeline_data = self._create_pipeline()
        self.id = pipeline_data["id"]
        results = self.cursor.execute(
            "SELECT * FROM data_sources WHERE pipeline_id = ? AND is_uploaded = 0", (self.local_id,)  # noqa:E501
        ).fetchall()
        if len(results) > 0:
            print("🛠️ Adding data to your pipeline...")
        for result in results:
            data_hash, data_type, data_value = result[1], result[2], result[3]
            self._process_and_upload_data(data_hash, data_type, data_value)
        # Send anonymous telemetry
        self.telemetry.capture(event_name="deploy", properties=self._telemetry_props)
    @classmethod
    def from_config(
        cls,
        config_path: Optional[str] = None,
        config: Optional[Dict[str, Any]] = None,
        auto_deploy: bool = False,
        yaml_path: Optional[str] = None,
    ):
        """
        Instantiate a Pipeline object from a configuration.
        :param config_path: Path to the YAML or JSON configuration file.
        :type config_path: Optional[str]
        :param config: A dictionary containing the configuration.
        :type config: Optional[Dict[str, Any]]
        :param auto_deploy: Whether to deploy the pipeline automatically, defaults to False
        :type auto_deploy: bool, optional
        :param yaml_path: (Deprecated) Path to the YAML configuration file. Use config_path instead.
        :type yaml_path: Optional[str]
        :return: An instance of the Pipeline class.
        :rtype: Pipeline
        """
        # Backward compatibility for yaml_path
        if yaml_path and not config_path:
            config_path = yaml_path
        if config_path and config:
            raise ValueError("Please provide only one of config_path or config.")
        config_data = None
        if config_path:
            file_extension = os.path.splitext(config_path)[1]
            with open(config_path, "r") as file:
                if file_extension in [".yaml", ".yml"]:
                    config_data = yaml.safe_load(file)
                elif file_extension == ".json":
                    config_data = json.load(file)
                else:
                    raise ValueError("config_path must be a path to a YAML or JSON file.")
        elif config and isinstance(config, dict):
            config_data = config
        else:
            logging.error(
                "Please provide either a config file path (YAML or JSON) or a config dictionary. Falling back to defaults because no config is provided.",  # noqa: E501
            )
            config_data = {}
        try:
            validate_config(config_data)
        except Exception as e:
            raise Exception(f"Error occurred while validating the config. Error: {str(e)}")
        app_config_data = config_data.get("app", {}).get("config", {})
        db_config_data = config_data.get("vectordb", {})
        embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
        llm_config_data = config_data.get("llm", {})
        chunker_config_data = config_data.get("chunker", {})
        app_config = AppConfig(**app_config_data)
        db_provider = db_config_data.get("provider", "chroma")
        db = VectorDBFactory.create(db_provider, db_config_data.get("config", {}))
        if llm_config_data:
            llm_provider = llm_config_data.get("provider", "openai")
            llm = LlmFactory.create(llm_provider, llm_config_data.get("config", {}))
        else:
            llm = None
        embedding_model_provider = embedding_model_config_data.get("provider", "openai")
        embedding_model = EmbedderFactory.create(
            embedding_model_provider, embedding_model_config_data.get("config", {})
        )
        # Send anonymous telemetry
        event_properties = {"init_type": "config_data"}
        AnonymousTelemetry().capture(event_name="init", properties=event_properties)
        return cls(
            config=app_config,
            llm=llm,
            db=db,
            embedding_model=embedding_model,
            config_data=config_data,
            auto_deploy=auto_deploy,
            chunker=chunker_config_data,
        )
--- a/embedchain/apps/init.py
+++ b/embedchain/apps/init.py
--- a/embedchain/apps/app.py
+++ b/embedchain/apps/app.py
@@ -1,157 +0,0 @@
 from typing import Optional
 import yaml
 from embedchain.config import (AppConfig, BaseEmbedderConfig, BaseLlmConfig,
                               ChunkerConfig)
 from embedchain.config.vectordb.base import BaseVectorDbConfig
 from embedchain.embedchain import EmbedChain
 from embedchain.embedder.base import BaseEmbedder
 from embedchain.embedder.openai import OpenAIEmbedder
 from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.llm.base import BaseLlm
 from embedchain.llm.openai import OpenAILlm
 from embedchain.utils import validate_config
 from embedchain.vectordb.base import BaseVectorDB
 from embedchain.vectordb.chroma import ChromaDB
@register_deserializable
 class App(EmbedChain):
    """
    The EmbedChain app in it's simplest and most straightforward form.
    An opinionated choice of LLM, vector database and embedding model.
    Methods:
    add(source, data_type): adds the data from the given URL to the vector db.
    query(query): finds answer to the given query using vector database and LLM.
    chat(query): finds answer to the given query using vector database and LLM, with conversation history.
    """
    def __init__(
        self,
        config: Optional[AppConfig] = None,
        llm: BaseLlm = None,
        llm_config: Optional[BaseLlmConfig] = None,
        db: BaseVectorDB = None,
        db_config: Optional[BaseVectorDbConfig] = None,
        embedder: BaseEmbedder = None,
        embedder_config: Optional[BaseEmbedderConfig] = None,
        system_prompt: Optional[str] = None,
        chunker: Optional[ChunkerConfig] = None,
    ):
        """
        Initialize a new `App` instance.
        :param config: Config for the app instance., defaults to None
        :type config: Optional[AppConfig], optional
        :param llm:  LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm
        :type llm: BaseLlm, optional
        :param llm_config: Allows you to configure the LLM, e.g. how many documents to return,
        example: `from embedchain.config import BaseLlmConfig`, defaults to None
        :type llm_config: Optional[BaseLlmConfig], optional
        :param db: The database to use for storing and retrieving embeddings,
        example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb
        :type db: BaseVectorDB, optional
        :param db_config: Allows you to configure the vector database,
        example: `from embedchain.config import ChromaDbConfig`, defaults to None
        :type db_config: Optional[BaseVectorDbConfig], optional
        :param embedder: The embedder (embedding model and function) use to calculate embeddings.
        example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to OpenAIEmbedder
        :type embedder: BaseEmbedder, optional
        :param embedder_config: Allows you to configure the Embedder.
        example: `from embedchain.config import BaseEmbedderConfig`, defaults to None
        :type embedder_config: Optional[BaseEmbedderConfig], optional
        :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None
        :type system_prompt: Optional[str], optional
        :raises TypeError: LLM, database or embedder or their config is not a valid class instance.
        """
        # Type check configs
        if config and not isinstance(config, AppConfig):
            raise TypeError(
                "Config is not a `AppConfig` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        if llm_config and not isinstance(llm_config, BaseLlmConfig):
            raise TypeError(
                "`llm_config` is not a `BaseLlmConfig` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        if db_config and not isinstance(db_config, BaseVectorDbConfig):
            raise TypeError(
                "`db_config` is not a `BaseVectorDbConfig` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        if embedder_config and not isinstance(embedder_config, BaseEmbedderConfig):
            raise TypeError(
                "`embedder_config` is not a `BaseEmbedderConfig` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        # Assign defaults
        if config is None:
            config = AppConfig()
        if llm is None:
            llm = OpenAILlm(config=llm_config)
        if db is None:
            db = ChromaDB(config=db_config)
        if embedder is None:
            embedder = OpenAIEmbedder(config=embedder_config)
        self.chunker = None
        if chunker:
            self.chunker = ChunkerConfig(**chunker)
        # Type check assignments
        if not isinstance(llm, BaseLlm):
            raise TypeError(
                "LLM is not a `BaseLlm` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        if not isinstance(db, BaseVectorDB):
            raise TypeError(
                "Database is not a `BaseVectorDB` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        if not isinstance(embedder, BaseEmbedder):
            raise TypeError(
                "Embedder is not a `BaseEmbedder` instance. "
                "Please make sure the type is right and that you are passing an instance."
            )
        super().__init__(config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt)
    @classmethod
    def from_config(cls, yaml_path: str):
        """
        Instantiate an App object from a YAML configuration file.
        :param yaml_path: Path to the YAML configuration file.
        :type yaml_path: str
        :return: An instance of the App class.
        :rtype: App
        """
        with open(yaml_path, "r") as file:
            config_data = yaml.safe_load(file)
        try:
            validate_config(config_data)
        except Exception as e:
            raise Exception(f"❌ Error occurred while validating the YAML config. Error: {str(e)}")
        app_config_data = config_data.get("app", {})
        llm_config_data = config_data.get("llm", {})
        db_config_data = config_data.get("vectordb", {})
        embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
        chunker_config_data = config_data.get("chunker", {})
        app_config = AppConfig(**app_config_data.get("config", {}))
        llm_provider = llm_config_data.get("provider", "openai")
        llm = LlmFactory.create(llm_provider, llm_config_data.get("config", {}))
        db_provider = db_config_data.get("provider", "chroma")
        db = VectorDBFactory.create(db_provider, db_config_data.get("config", {}))
        embedder_provider = embedding_model_config_data.get("provider", "openai")
        embedder = EmbedderFactory.create(embedder_provider, embedding_model_config_data.get("config", {}))
        return cls(config=app_config, llm=llm, db=db, embedder=embedder, chunker=chunker_config_data)
--- a/embedchain/bots/base.py
+++ b/embedchain/bots/base.py
@@ -1,7 +1,7 @@
 from typing import Any
-from embedchain import Pipeline as App
+from embedchain import App
-from embedchain.config import AddConfig, BaseLlmConfig, PipelineConfig
+from embedchain.config import AddConfig, AppConfig, BaseLlmConfig
 from embedchain.embedder.openai import OpenAIEmbedder
 from embedchain.helpers.json_serializable import (JSONSerializable,
                                                  register_deserializable)
@@ -12,7 +12,7 @@ from embedchain.vectordb.chroma import ChromaDB
@register_deserializable
 class BaseBot(JSONSerializable):
    def __init__(self):
-        self.app = App(config=PipelineConfig(), llm=OpenAILlm(), db=ChromaDB(), embedding_model=OpenAIEmbedder())
+        self.app = App(config=AppConfig(), llm=OpenAILlm(), db=ChromaDB(), embedding_model=OpenAIEmbedder())
    def add(self, data: Any, config: AddConfig = None):
        """
--- a/embedchain/config/init.py
+++ b/embedchain/config/init.py
@@ -1,12 +1,11 @@
 # flake8: noqa: F401
 from .add_config import AddConfig, ChunkerConfig
-from .apps.app_config import AppConfig
+from .app_config import AppConfig
 from .base_config import BaseConfig
 from .embedder.base import BaseEmbedderConfig
 from .embedder.base import BaseEmbedderConfig as EmbedderConfig
 from .llm.base import BaseLlmConfig
 from .pipeline_config import PipelineConfig
 from .vectordb.chroma import ChromaDbConfig
 from .vectordb.elasticsearch import ElasticsearchDBConfig
 from .vectordb.opensearch import OpenSearchDBConfig
--- a/embedchain/config/apps/app_config.py
+++ b/embedchain/config/apps/app_config.py
@@ -15,8 +15,9 @@ class AppConfig(BaseAppConfig):
        self,
        log_level: str = "WARNING",
        id: Optional[str] = None,
        name: Optional[str] = None,
        collect_metrics: Optional[bool] = True,
-        collection_name: Optional[str] = None,
+        **kwargs,
    ):
        """
        Initializes a configuration class instance for an App. This is the simplest form of an embedchain app.
@@ -28,8 +29,6 @@ class AppConfig(BaseAppConfig):
        :type id: Optional[str], optional
        :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
        :type collect_metrics: Optional[bool], optional
        :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
        defaults to None
        :type collection_name: Optional[str], optional
        """
-        super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, collection_name=collection_name)
+        self.name = name
        super().__init__(log_level=log_level, id=id, collect_metrics=collect_metrics, **kwargs)
--- a/embedchain/config/apps/init.py
+++ b/embedchain/config/apps/init.py
--- a/embedchain/config/apps/base_app_config.py
+++ b/embedchain/config/apps/base_app_config.py
--- a/embedchain/config/pipeline_config.py
+++ b/embedchain/config/pipeline_config.py
@@ -1,38 +0,0 @@
 from typing import Optional
 from embedchain.helpers.json_serializable import register_deserializable
 from .apps.base_app_config import BaseAppConfig
@register_deserializable
 class PipelineConfig(BaseAppConfig):
    """
    Config to initialize an embedchain custom `App` instance, with extra config options.
    """
    def __init__(
        self,
        log_level: str = "WARNING",
        id: Optional[str] = None,
        name: Optional[str] = None,
        collect_metrics: Optional[bool] = True,
    ):
        """
        Initializes a configuration class instance for an App. This is the simplest form of an embedchain app.
        Most of the configuration is done in the `App` class itself.
        :param log_level: Debug level ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], defaults to "WARNING"
        :type log_level: str, optional
        :param id: ID of the app. Document metadata will have this id., defaults to None
        :type id: Optional[str], optional
        :param collect_metrics: Send anonymous telemetry to improve embedchain, defaults to True
        :type collect_metrics: Optional[bool], optional
        :param collection_name: Default collection name. It's recommended to use app.db.set_collection_name() instead,
        defaults to None
        :type collection_name: Optional[str], optional
        """
        self._setup_logging(log_level)
        self.id = id
        self.name = name
        self.collect_metrics = collect_metrics
--- a/embedchain/deployment/gradio.app/app.py
+++ b/embedchain/deployment/gradio.app/app.py
@@ -2,7 +2,7 @@ import os
 import gradio as gr
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["OPENAI_API_KEY"] = "sk-xxx"
--- a/embedchain/deployment/streamlit.io/app.py
+++ b/embedchain/deployment/streamlit.io/app.py
@@ -1,6 +1,6 @@
 import streamlit as st
-from embedchain import Pipeline as App
+from embedchain import App
@st.cache_resource
--- a/embedchain/embedchain.py
+++ b/embedchain/embedchain.py
@@ -9,7 +9,7 @@ from langchain.docstore.document import Document
 from embedchain.chunkers.base_chunker import BaseChunker
 from embedchain.config import AddConfig, BaseLlmConfig, ChunkerConfig
-from embedchain.config.apps.base_app_config import BaseAppConfig
+from embedchain.config.base_app_config import BaseAppConfig
 from embedchain.constants import SQLITE_PATH
 from embedchain.data_formatter import DataFormatter
 from embedchain.embedder.base import BaseEmbedder
--- a/embedchain/pipeline.py
+++ b/embedchain/pipeline.py
@@ -1,425 +1,9 @@
-import ast
+from embedchain.app import App
 import json
 import logging
 import os
 import sqlite3
 import uuid
 from typing import Any, Dict, Optional
 import requests
 import yaml
 from embedchain import Client
 from embedchain.config import ChunkerConfig, PipelineConfig
 from embedchain.constants import SQLITE_PATH
 from embedchain.embedchain import EmbedChain
 from embedchain.embedder.base import BaseEmbedder
 from embedchain.embedder.openai import OpenAIEmbedder
 from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory
 from embedchain.helpers.json_serializable import register_deserializable
 from embedchain.llm.base import BaseLlm
 from embedchain.llm.openai import OpenAILlm
 from embedchain.telemetry.posthog import AnonymousTelemetry
 from embedchain.utils import validate_config
 from embedchain.vectordb.base import BaseVectorDB
 from embedchain.vectordb.chroma import ChromaDB
 # Setup the user directory if doesn't exist already
 Client.setup_dir()
-@register_deserializable
+class Pipeline(App):
 class Pipeline(EmbedChain):
    """
-    EmbedChain pipeline lets you create a LLM powered app for your unstructured
+    This is deprecated. Use `App` instead.
    data by defining a pipeline with your chosen data source, embedding model,
    and vector database.
    """
-    def __init__(
+    pass
        self,
        id: str = None,
        name: str = None,
        config: PipelineConfig = None,
        db: BaseVectorDB = None,
        embedding_model: BaseEmbedder = None,
        llm: BaseLlm = None,
        config_data: dict = None,
        log_level=logging.WARN,
        auto_deploy: bool = False,
        chunker: ChunkerConfig = None,
    ):
        """
        Initialize a new `App` instance.
        :param config: Configuration for the pipeline, defaults to None
        :type config: PipelineConfig, optional
        :param db: The database to use for storing and retrieving embeddings, defaults to None
        :type db: BaseVectorDB, optional
        :param embedding_model: The embedding model used to calculate embeddings, defaults to None
        :type embedding_model: BaseEmbedder, optional
        :param llm: The LLM model used to calculate embeddings, defaults to None
        :type llm: BaseLlm, optional
        :param config_data: Config dictionary, defaults to None
        :type config_data: dict, optional
        :param log_level: Log level to use, defaults to logging.WARN
        :type log_level: int, optional
        :param auto_deploy: Whether to deploy the pipeline automatically, defaults to False
        :type auto_deploy: bool, optional
        :raises Exception: If an error occurs while creating the pipeline
        """
        if id and config_data:
            raise Exception("Cannot provide both id and config. Please provide only one of them.")
        if id and name:
            raise Exception("Cannot provide both id and name. Please provide only one of them.")
        if name and config:
            raise Exception("Cannot provide both name and config. Please provide only one of them.")
        logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
        self.logger = logging.getLogger(__name__)
        self.auto_deploy = auto_deploy
        # Store the dict config as an attribute to be able to send it
        self.config_data = config_data if (config_data and validate_config(config_data)) else None
        self.client = None
        # pipeline_id from the backend
        self.id = None
        self.chunker = None
        if chunker:
            self.chunker = ChunkerConfig(**chunker)
        self.config = config or PipelineConfig()
        self.name = self.config.name
        self.config.id = self.local_id = str(uuid.uuid4()) if self.config.id is None else self.config.id
        if id is not None:
            # Init client first since user is trying to fetch the pipeline
            # details from the platform
            self._init_client()
            pipeline_details = self._get_pipeline(id)
            self.config.id = self.local_id = pipeline_details["metadata"]["local_id"]
            self.id = id
        if name is not None:
            self.name = name
        self.embedding_model = embedding_model or OpenAIEmbedder()
        self.db = db or ChromaDB()
        self.llm = llm or OpenAILlm()
        self._init_db()
        # Send anonymous telemetry
        self._telemetry_props = {"class": self.__class__.__name__}
        self.telemetry = AnonymousTelemetry(enabled=self.config.collect_metrics)
        # Establish a connection to the SQLite database
        self.connection = sqlite3.connect(SQLITE_PATH, check_same_thread=False)
        self.cursor = self.connection.cursor()
        # Create the 'data_sources' table if it doesn't exist
        self.cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS data_sources (
                pipeline_id TEXT,
                hash TEXT,
                type TEXT,
                value TEXT,
                metadata TEXT,
                is_uploaded INTEGER DEFAULT 0,
                PRIMARY KEY (pipeline_id, hash)
            )
        """
        )
        self.connection.commit()
        # Send anonymous telemetry
        self.telemetry.capture(event_name="init", properties=self._telemetry_props)
        self.user_asks = []
        if self.auto_deploy:
            self.deploy()
    def _init_db(self):
        """
        Initialize the database.
        """
        self.db._set_embedder(self.embedding_model)
        self.db._initialize()
        self.db.set_collection_name(self.db.config.collection_name)
    def _init_client(self):
        """
        Initialize the client.
        """
        config = Client.load_config()
        if config.get("api_key"):
            self.client = Client()
        else:
            api_key = input(
                "🔑 Enter your Embedchain API key. You can find the API key at https://app.embedchain.ai/settings/keys/ \n"  # noqa: E501
            )
            self.client = Client(api_key=api_key)
    def _get_pipeline(self, id):
        """
        Get existing pipeline
        """
        print("🛠️ Fetching pipeline details from the platform...")
        url = f"{self.client.host}/api/v1/pipelines/{id}/cli/"
        r = requests.get(
            url,
            headers={"Authorization": f"Token {self.client.api_key}"},
        )
        if r.status_code == 404:
            raise Exception(f"❌ Pipeline with id {id} not found!")
        print(
            f"🎉 Pipeline loaded successfully! Pipeline url: https://app.embedchain.ai/pipelines/{r.json()['id']}\n"  # noqa: E501
        )
        return r.json()
    def _create_pipeline(self):
        """
        Create a pipeline on the platform.
        """
        print("🛠️ Creating pipeline on the platform...")
        # self.config_data is a dict. Pass it inside the key 'yaml_config' to the backend
        payload = {
            "yaml_config": json.dumps(self.config_data),
            "name": self.name,
            "local_id": self.local_id,
        }
        url = f"{self.client.host}/api/v1/pipelines/cli/create/"
        r = requests.post(
            url,
            json=payload,
            headers={"Authorization": f"Token {self.client.api_key}"},
        )
        if r.status_code not in [200, 201]:
            raise Exception(f"❌ Error occurred while creating pipeline. API response: {r.text}")
        if r.status_code == 200:
            print(
                f"🎉🎉🎉 Existing pipeline found! View your pipeline: https://app.embedchain.ai/pipelines/{r.json()['id']}\n"  # noqa: E501
            )  # noqa: E501
        elif r.status_code == 201:
            print(
                f"🎉🎉🎉 Pipeline created successfully! View your pipeline: https://app.embedchain.ai/pipelines/{r.json()['id']}\n"  # noqa: E501
            )
        return r.json()
    def _get_presigned_url(self, data_type, data_value):
        payload = {"data_type": data_type, "data_value": data_value}
        r = requests.post(
            f"{self.client.host}/api/v1/pipelines/{self.id}/cli/presigned_url/",
            json=payload,
            headers={"Authorization": f"Token {self.client.api_key}"},
        )
        r.raise_for_status()
        return r.json()
    def search(self, query, num_documents=3):
        """
        Search for similar documents related to the query in the vector database.
        """
        # Send anonymous telemetry
        self.telemetry.capture(event_name="search", properties=self._telemetry_props)
        # TODO: Search will call the endpoint rather than fetching the data from the db itself when deploy=True.
        if self.id is None:
            where = {"app_id": self.local_id}
            context = self.db.query(
                query,
                n_results=num_documents,
                where=where,
                skip_embedding=False,
                citations=True,
            )
            result = []
            for c in context:
                result.append({"context": c[0], "metadata": c[1]})
            return result
        else:
            # Make API call to the backend to get the results
            NotImplementedError("Search is not implemented yet for the prod mode.")
    def _upload_file_to_presigned_url(self, presigned_url, file_path):
        try:
            with open(file_path, "rb") as file:
                response = requests.put(presigned_url, data=file)
                response.raise_for_status()
                return response.status_code == 200
        except Exception as e:
            self.logger.exception(f"Error occurred during file upload: {str(e)}")
            print("❌ Error occurred during file upload!")
            return False
    def _upload_data_to_pipeline(self, data_type, data_value, metadata=None):
        payload = {
            "data_type": data_type,
            "data_value": data_value,
            "metadata": metadata,
        }
        try:
            self._send_api_request(f"/api/v1/pipelines/{self.id}/cli/add/", payload)
            # print the local file path if user tries to upload a local file
            printed_value = metadata.get("file_path") if metadata.get("file_path") else data_value
            print(f"✅ Data of type: {data_type}, value: {printed_value} added successfully.")
        except Exception as e:
            print(f"❌ Error occurred during data upload for type {data_type}!. Error: {str(e)}")
    def _send_api_request(self, endpoint, payload):
        url = f"{self.client.host}{endpoint}"
        headers = {"Authorization": f"Token {self.client.api_key}"}
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()
        return response
    def _process_and_upload_data(self, data_hash, data_type, data_value):
        if os.path.isabs(data_value):
            presigned_url_data = self._get_presigned_url(data_type, data_value)
            presigned_url = presigned_url_data["presigned_url"]
            s3_key = presigned_url_data["s3_key"]
            if self._upload_file_to_presigned_url(presigned_url, file_path=data_value):
                metadata = {"file_path": data_value, "s3_key": s3_key}
                data_value = presigned_url
            else:
                self.logger.error(f"File upload failed for hash: {data_hash}")
                return False
        else:
            if data_type == "qna_pair":
                data_value = list(ast.literal_eval(data_value))
            metadata = {}
        try:
            self._upload_data_to_pipeline(data_type, data_value, metadata)
            self._mark_data_as_uploaded(data_hash)
            return True
        except Exception:
            print(f"❌ Error occurred during data upload for hash {data_hash}!")
            return False
    def _mark_data_as_uploaded(self, data_hash):
        self.cursor.execute(
            "UPDATE data_sources SET is_uploaded = 1 WHERE hash = ? AND pipeline_id = ?",
            (data_hash, self.local_id),
        )
        self.connection.commit()
    def get_data_sources(self):
        db_data = self.cursor.execute("SELECT * FROM data_sources WHERE pipeline_id = ?", (self.local_id,)).fetchall()
        data_sources = []
        for data in db_data:
            data_sources.append({"data_type": data[2], "data_value": data[3], "metadata": data[4]})
        return data_sources
    def deploy(self):
        if self.client is None:
            self._init_client()
        pipeline_data = self._create_pipeline()
        self.id = pipeline_data["id"]
        results = self.cursor.execute(
            "SELECT * FROM data_sources WHERE pipeline_id = ? AND is_uploaded = 0", (self.local_id,)  # noqa:E501
        ).fetchall()
        if len(results) > 0:
            print("🛠️ Adding data to your pipeline...")
        for result in results:
            data_hash, data_type, data_value = result[1], result[2], result[3]
            self._process_and_upload_data(data_hash, data_type, data_value)
        # Send anonymous telemetry
        self.telemetry.capture(event_name="deploy", properties=self._telemetry_props)
    @classmethod
    def from_config(
        cls,
        config_path: Optional[str] = None,
        config: Optional[Dict[str, Any]] = None,
        auto_deploy: bool = False,
        yaml_path: Optional[str] = None,
    ):
        """
        Instantiate a Pipeline object from a configuration.
        :param config_path: Path to the YAML or JSON configuration file.
        :type config_path: Optional[str]
        :param config: A dictionary containing the configuration.
        :type config: Optional[Dict[str, Any]]
        :param auto_deploy: Whether to deploy the pipeline automatically, defaults to False
        :type auto_deploy: bool, optional
        :param yaml_path: (Deprecated) Path to the YAML configuration file. Use config_path instead.
        :type yaml_path: Optional[str]
        :return: An instance of the Pipeline class.
        :rtype: Pipeline
        """
        # Backward compatibility for yaml_path
        if yaml_path and not config_path:
            config_path = yaml_path
        if config_path and config:
            raise ValueError("Please provide only one of config_path or config.")
        config_data = None
        if config_path:
            file_extension = os.path.splitext(config_path)[1]
            with open(config_path, "r") as file:
                if file_extension in [".yaml", ".yml"]:
                    config_data = yaml.safe_load(file)
                elif file_extension == ".json":
                    config_data = json.load(file)
                else:
                    raise ValueError("config_path must be a path to a YAML or JSON file.")
        elif config and isinstance(config, dict):
            config_data = config
        else:
            logging.error(
                "Please provide either a config file path (YAML or JSON) or a config dictionary. Falling back to defaults because no config is provided.",  # noqa: E501
            )
            config_data = {}
        try:
            validate_config(config_data)
        except Exception as e:
            raise Exception(f"Error occurred while validating the config. Error: {str(e)}")
        pipeline_config_data = config_data.get("app", {}).get("config", {})
        db_config_data = config_data.get("vectordb", {})
        embedding_model_config_data = config_data.get("embedding_model", config_data.get("embedder", {}))
        llm_config_data = config_data.get("llm", {})
        chunker_config_data = config_data.get("chunker", {})
        pipeline_config = PipelineConfig(**pipeline_config_data)
        db_provider = db_config_data.get("provider", "chroma")
        db = VectorDBFactory.create(db_provider, db_config_data.get("config", {}))
        if llm_config_data:
            llm_provider = llm_config_data.get("provider", "openai")
            llm = LlmFactory.create(llm_provider, llm_config_data.get("config", {}))
        else:
            llm = None
        embedding_model_provider = embedding_model_config_data.get("provider", "openai")
        embedding_model = EmbedderFactory.create(
            embedding_model_provider, embedding_model_config_data.get("config", {})
        )
        # Send anonymous telemetry
        event_properties = {"init_type": "config_data"}
        AnonymousTelemetry().capture(event_name="init", properties=event_properties)
        return cls(
            config=pipeline_config,
            llm=llm,
            db=db,
            embedding_model=embedding_model,
            config_data=config_data,
            auto_deploy=auto_deploy,
            chunker=chunker_config_data,
        )
--- a/examples/chainlit/app.py
+++ b/examples/chainlit/app.py
@@ -2,7 +2,7 @@ import os
 import chainlit as cl
-from embedchain import Pipeline as App
+from embedchain import App
 os.environ["OPENAI_API_KEY"] = "sk-xxx"
--- a/examples/chat-pdf/app.py
+++ b/examples/chat-pdf/app.py
@@ -6,7 +6,7 @@ import threading
 import streamlit as st
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.config import BaseLlmConfig
 from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
                                          generate)
--- a/examples/mistral-streamlit/app.py
+++ b/examples/mistral-streamlit/app.py
@@ -2,7 +2,7 @@ import os
 import streamlit as st
-from embedchain import Pipeline as App
+from embedchain import App
@st.cache_resource
--- a/examples/rest-api/main.py
+++ b/examples/rest-api/main.py
@@ -9,7 +9,7 @@ from services import get_app, get_apps, remove_app, save_app
 from sqlalchemy.orm import Session
 from utils import generate_error_message_for_api_keys
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.client import Client
 Base.metadata.create_all(bind=engine)
--- a/examples/sadhguru-ai/app.py
+++ b/examples/sadhguru-ai/app.py
@@ -6,7 +6,7 @@ from io import StringIO
 import requests
 import streamlit as st
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.config import BaseLlmConfig
 from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
                                          generate)
--- a/examples/unacademy-ai/app.py
+++ b/examples/unacademy-ai/app.py
@@ -2,7 +2,7 @@ import queue
 import streamlit as st
-from embedchain import Pipeline as App
+from embedchain import App
 from embedchain.config import BaseLlmConfig
 from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
                                          generate)
@@ -35,7 +35,7 @@ with st.expander(":grey[Want to create your own Unacademy UPSC AI?]"):
    ```
    ```python
-    from embedchain import Pipeline as App
+    from embedchain import App
    unacademy_ai_app = App()
    unacademy_ai_app.add(
        "https://unacademy.com/content/upsc/study-material/plan-policy/atma-nirbhar-bharat-3-0/",
--- a/notebooks/anthropic.ipynb
+++ b/notebooks/anthropic.ipynb
@@ -54,7 +54,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
        "os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\""
--- a/notebooks/azure-openai.ipynb
+++ b/notebooks/azure-openai.ipynb
@@ -44,7 +44,7 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "from embedchain import Pipeline as App\n",
+    "from embedchain import App\n",
    "\n",
    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
    "os.environ[\"OPENAI_API_BASE\"] = \"https://xxx.openai.azure.com/\"\n",
@@ -143,7 +143,7 @@
   "source": [
    "while(True):\n",
    "    question = input(\"Enter question: \")\n",
-    "    if question in ['q', 'exit', 'quit']\n",
+    "    if question in ['q', 'exit', 'quit']:\n",
    "        break\n",
    "    answer = app.query(question)\n",
    "    print(answer)"
--- a/notebooks/chromadb.ipynb
+++ b/notebooks/chromadb.ipynb
@@ -49,7 +49,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
      ]
--- a/notebooks/cohere.ipynb
+++ b/notebooks/cohere.ipynb
@@ -53,7 +53,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
        "os.environ[\"COHERE_API_KEY\"] = \"xxx\""
--- a/notebooks/elasticsearch.ipynb
+++ b/notebooks/elasticsearch.ipynb
@@ -49,7 +49,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
      ]
--- a/notebooks/embedchain-chromadb-server.ipynb
+++ b/notebooks/embedchain-chromadb-server.ipynb
@@ -33,7 +33,7 @@
   "outputs": [],
   "source": [
    "import os\n",
-    "from embedchain import Pipeline as App\n",
+    "from embedchain import App\n",
    "from embedchain.config import AppConfig\n",
    "\n",
    "\n",
--- a/notebooks/embedchain-docs-site-example.ipynb
+++ b/notebooks/embedchain-docs-site-example.ipynb
@@ -7,7 +7,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from embedchain import Pipeline as App\n",
+    "from embedchain import App\n",
    "\n",
    "embedchain_docs_bot = App()"
   ]
--- a/notebooks/gpt4all.ipynb
+++ b/notebooks/gpt4all.ipynb
@@ -52,7 +52,7 @@
      },
      "outputs": [],
      "source": [
-        "from embedchain import Pipeline as App"
+        "from embedchain import App"
      ]
    },
    {
--- a/notebooks/hugging_face_hub.ipynb
+++ b/notebooks/hugging_face_hub.ipynb
@@ -54,7 +54,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\""
      ]
--- a/notebooks/jina.ipynb
+++ b/notebooks/jina.ipynb
@@ -54,7 +54,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
        "os.environ[\"JINACHAT_API_KEY\"] = \"xxx\""
--- a/notebooks/llama2.ipynb
+++ b/notebooks/llama2.ipynb
@@ -53,7 +53,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
        "os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\""
--- a/notebooks/ollama.ipynb
+++ b/notebooks/ollama.ipynb
@@ -92,7 +92,7 @@
    }
   ],
   "source": [
-    "from embedchain import Pipeline as App\n",
+    "from embedchain import App\n",
    "app = App.from_config(config_path=\"ollama.yaml\")"
   ]
  },
--- a/notebooks/openai.ipynb
+++ b/notebooks/openai.ipynb
@@ -54,7 +54,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
      ]
@@ -80,7 +80,7 @@
        "llm:\n",
        "  provider: openai\n",
        "  config:\n",
-        "    model: gpt-35-turbo\n",
+        "    model: gpt-3.5-turbo\n",
        "    temperature: 0.5\n",
        "    max_tokens: 1000\n",
        "    top_p: 1\n",
--- a/notebooks/opensearch.ipynb
+++ b/notebooks/opensearch.ipynb
@@ -49,7 +49,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
      ]
--- a/notebooks/pinecone.ipynb
+++ b/notebooks/pinecone.ipynb
@@ -49,7 +49,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
        "os.environ[\"PINECONE_API_KEY\"] = \"xxx\"\n",
--- a/notebooks/together.ipynb
+++ b/notebooks/together.ipynb
@@ -53,7 +53,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "os.environ[\"TOGETHER_API_KEY\"] = \"\""
--- a/notebooks/vertex_ai.ipynb
+++ b/notebooks/vertex_ai.ipynb
@@ -53,7 +53,7 @@
      "outputs": [],
      "source": [
        "import os\n",
-        "from embedchain import Pipeline as App\n",
+        "from embedchain import App\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
      ]
--- a/tests/embedchain/test_embedchain.py
+++ b/tests/embedchain/test_embedchain.py
@@ -8,6 +8,7 @@ from embedchain.config import AppConfig, ChromaDbConfig
 from embedchain.embedchain import EmbedChain
 from embedchain.llm.base import BaseLlm
 from embedchain.memory.base import ECChatMemory
 from embedchain.vectordb.chroma import ChromaDB
 os.environ["OPENAI_API_KEY"] = "test-api-key"
@@ -15,7 +16,7 @@ os.environ["OPENAI_API_KEY"] = "test-api-key"
@pytest.fixture
 def app_instance():
    config = AppConfig(log_level="DEBUG", collect_metrics=False)
-    return App(config)
+    return App(config=config)
 def test_whole_app(app_instance, mocker):
@@ -44,9 +45,9 @@ def test_add_after_reset(app_instance, mocker):
    mocker.patch("embedchain.vectordb.chroma.chromadb.Client")
    config = AppConfig(log_level="DEBUG", collect_metrics=False)
-    chroma_config = {"allow_reset": True}
+    chroma_config = ChromaDbConfig(allow_reset=True)
-
+    db = ChromaDB(config=chroma_config)
-    app_instance = App(config=config, db_config=ChromaDbConfig(**chroma_config))
+    app_instance = App(config=config, db=db)
    # mock delete chat history
    mocker.patch.object(ECChatMemory, "delete_chat_history", autospec=True)
--- a/tests/llm/test_chat.py
+++ b/tests/llm/test_chat.py
@@ -114,5 +114,7 @@ class TestApp(unittest.TestCase):
        self.assertEqual(answer, "Test answer")
        _args, kwargs = mock_database_query.call_args
        self.assertEqual(kwargs.get("input_query"), "Test query")
-        self.assertEqual(kwargs.get("where"), {"attribute": "value"})
+        where = kwargs.get("where")
        assert "app_id" in where
        assert "attribute" in where
        mock_answer.assert_called_once()
--- a/tests/llm/test_query.py
+++ b/tests/llm/test_query.py
@@ -5,6 +5,7 @@ import pytest
 from embedchain import App
 from embedchain.config import AppConfig, BaseLlmConfig
 from embedchain.llm.openai import OpenAILlm
@pytest.fixture
@@ -37,25 +38,14 @@ def test_query_config_app_passing(mock_get_answer):
    config = AppConfig(collect_metrics=False)
    chat_config = BaseLlmConfig(system_prompt="Test system prompt")
-    app = App(config=config, llm_config=chat_config)
+    llm = OpenAILlm(config=chat_config)
    app = App(config=config, llm=llm)
    answer = app.llm.get_llm_model_answer("Test query")
    assert app.llm.config.system_prompt == "Test system prompt"
    assert answer == "Test answer"
@patch("embedchain.llm.openai.OpenAILlm._get_answer")
 def test_app_passing(mock_get_answer):
    mock_get_answer.return_value = MagicMock()
    mock_get_answer.return_value = "Test answer"
    config = AppConfig(collect_metrics=False)
    chat_config = BaseLlmConfig()
    app = App(config=config, llm_config=chat_config, system_prompt="Test system prompt")
    answer = app.llm.get_llm_model_answer("Test query")
    assert app.llm.config.system_prompt == "Test system prompt"
    assert answer == "Test answer"
@patch("chromadb.api.models.Collection.Collection.add", MagicMock)
 def test_query_with_where_in_params(app):
    with patch.object(app, "_retrieve_from_database") as mock_retrieve:
@@ -83,5 +73,7 @@ def test_query_with_where_in_query_config(app):
    assert answer == "Test answer"
    _, kwargs = mock_database_query.call_args
    assert kwargs.get("input_query") == "Test query"
-    assert kwargs.get("where") == {"attribute": "value"}
+    where = kwargs.get("where")
    assert "app_id" in where
    assert "attribute" in where
    mock_answer.assert_called_once()
--- a/tests/apps/test_apps.py
+++ b/tests/apps/test_apps.py
@@ -4,11 +4,10 @@ import pytest
 import yaml
 from embedchain import App
-from embedchain.config import (AddConfig, AppConfig, BaseEmbedderConfig,
+from embedchain.config import ChromaDbConfig
                               BaseLlmConfig, ChromaDbConfig)
 from embedchain.embedder.base import BaseEmbedder
 from embedchain.llm.base import BaseLlm
-from embedchain.vectordb.base import BaseVectorDB, BaseVectorDbConfig
+from embedchain.vectordb.base import BaseVectorDB
 from embedchain.vectordb.chroma import ChromaDB
@@ -21,13 +20,14 @@ def app():
 def test_app(app):
    assert isinstance(app.llm, BaseLlm)
    assert isinstance(app.db, BaseVectorDB)
-    assert isinstance(app.embedder, BaseEmbedder)
+    assert isinstance(app.embedding_model, BaseEmbedder)
 class TestConfigForAppComponents:
    def test_constructor_config(self):
        collection_name = "my-test-collection"
-        app = App(db_config=ChromaDbConfig(collection_name=collection_name))
+        db = ChromaDB(config=ChromaDbConfig(collection_name=collection_name))
        app = App(db=db)
        assert app.db.config.collection_name == collection_name
    def test_component_config(self):
@@ -36,50 +36,6 @@ class TestConfigForAppComponents:
        app = App(db=database)
        assert app.db.config.collection_name == collection_name
    def test_different_configs_are_proper_instances(self):
        app_config = AppConfig()
        wrong_config = AddConfig()
        with pytest.raises(TypeError):
            App(config=wrong_config)
        assert isinstance(app_config, AppConfig)
        llm_config = BaseLlmConfig()
        wrong_llm_config = "wrong_llm_config"
        with pytest.raises(TypeError):
            App(llm_config=wrong_llm_config)
        assert isinstance(llm_config, BaseLlmConfig)
        db_config = BaseVectorDbConfig()
        wrong_db_config = "wrong_db_config"
        with pytest.raises(TypeError):
            App(db_config=wrong_db_config)
        assert isinstance(db_config, BaseVectorDbConfig)
        embedder_config = BaseEmbedderConfig()
        wrong_embedder_config = "wrong_embedder_config"
        with pytest.raises(TypeError):
            App(embedder_config=wrong_embedder_config)
        assert isinstance(embedder_config, BaseEmbedderConfig)
    def test_components_raises_type_error_if_not_proper_instances(self):
        wrong_llm = "wrong_llm"
        with pytest.raises(TypeError):
            App(llm=wrong_llm)
        wrong_db = "wrong_db"
        with pytest.raises(TypeError):
            App(db=wrong_db)
        wrong_embedder = "wrong_embedder"
        with pytest.raises(TypeError):
            App(embedder=wrong_embedder)
 class TestAppFromConfig:
    def load_config_data(self, yaml_path):
@@ -92,14 +48,13 @@ class TestAppFromConfig:
        yaml_path = "configs/chroma.yaml"
        config_data = self.load_config_data(yaml_path)
-        app = App.from_config(yaml_path)
+        app = App.from_config(config_path=yaml_path)
        # Check if the App instance and its components were created correctly
        assert isinstance(app, App)
        # Validate the AppConfig values
        assert app.config.id == config_data["app"]["config"]["id"]
        assert app.config.collection_name == config_data["app"]["config"]["collection_name"]
        # Even though not present in the config, the default value is used
        assert app.config.collect_metrics is True
@@ -118,8 +73,8 @@ class TestAppFromConfig:
        # Validate the Embedder config values
        embedder_config = config_data["embedder"]["config"]
-        assert app.embedder.config.model == embedder_config["model"]
+        assert app.embedding_model.config.model == embedder_config["model"]
-        assert app.embedder.config.deployment_name == embedder_config.get("deployment_name")
+        assert app.embedding_model.config.deployment_name == embedder_config.get("deployment_name")
    def test_from_opensource_config(self, mocker):
        mocker.patch("embedchain.vectordb.chroma.chromadb.Client")
@@ -134,7 +89,6 @@ class TestAppFromConfig:
        # Validate the AppConfig values
        assert app.config.id == config_data["app"]["config"]["id"]
        assert app.config.collection_name == config_data["app"]["config"]["collection_name"]
        assert app.config.collect_metrics == config_data["app"]["config"]["collect_metrics"]
        # Validate the LLM config values
@@ -153,4 +107,4 @@ class TestAppFromConfig:
        # Validate the Embedder config values
        embedder_config = config_data["embedder"]["config"]
-        assert app.embedder.config.deployment_name == embedder_config["deployment_name"]
+        assert app.embedding_model.config.deployment_name == embedder_config["deployment_name"]
--- a/tests/vectordb/test_chroma_db.py
+++ b/tests/vectordb/test_chroma_db.py
@@ -20,8 +20,9 @@ def chroma_db():
@pytest.fixture
 def app_with_settings():
    chroma_config = ChromaDbConfig(allow_reset=True, dir="test-db")
    chroma_db = ChromaDB(config=chroma_config)
    app_config = AppConfig(collect_metrics=False)
-    return App(config=app_config, db_config=chroma_config)
+    return App(config=app_config, db=chroma_db)
@pytest.fixture(scope="session", autouse=True)
@@ -65,7 +66,8 @@ def test_app_init_with_host_and_port(mock_client):
    port = "1234"
    config = AppConfig(collect_metrics=False)
    db_config = ChromaDbConfig(host=host, port=port)
-    _app = App(config, db_config=db_config)
+    db = ChromaDB(config=db_config)
    _app = App(config=config, db=db)
    called_settings: Settings = mock_client.call_args[0][0]
    assert called_settings.chroma_server_host == host
@@ -74,7 +76,8 @@ def test_app_init_with_host_and_port(mock_client):
@patch("embedchain.vectordb.chroma.chromadb.Client")
 def test_app_init_with_host_and_port_none(mock_client):
-    _app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    _app = App(config=AppConfig(collect_metrics=False), db=db)
    called_settings: Settings = mock_client.call_args[0][0]
    assert called_settings.chroma_server_host is None
@@ -82,7 +85,8 @@ def test_app_init_with_host_and_port_none(mock_client):
 def test_chroma_db_duplicates_throw_warning(caplog):
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.db.collection.add(embeddings=[[0, 0, 0]], ids=["0"])
    app.db.collection.add(embeddings=[[0, 0, 0]], ids=["0"])
    assert "Insert of existing embedding ID: 0" in caplog.text
@@ -91,7 +95,8 @@ def test_chroma_db_duplicates_throw_warning(caplog):
 def test_chroma_db_duplicates_collections_no_warning(caplog):
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.set_collection_name("test_collection_1")
    app.db.collection.add(embeddings=[[0, 0, 0]], ids=["0"])
    app.set_collection_name("test_collection_2")
@@ -104,24 +109,28 @@ def test_chroma_db_duplicates_collections_no_warning(caplog):
 def test_chroma_db_collection_init_with_default_collection():
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    assert app.db.collection.name == "embedchain_store"
 def test_chroma_db_collection_init_with_custom_collection():
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.set_collection_name(name="test_collection")
    assert app.db.collection.name == "test_collection"
 def test_chroma_db_collection_set_collection_name():
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.set_collection_name("test_collection")
    assert app.db.collection.name == "test_collection"
 def test_chroma_db_collection_changes_encapsulated():
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.set_collection_name("test_collection_1")
    assert app.db.count() == 0
@@ -207,12 +216,14 @@ def test_chroma_db_collection_add_with_invalid_inputs(app_with_settings):
 def test_chroma_db_collection_collections_are_persistent():
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.set_collection_name("test_collection_1")
    app.db.collection.add(embeddings=[[0, 0, 0]], ids=["0"])
    del app
-    app = App(config=AppConfig(collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db"))
+    db = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
    app = App(config=AppConfig(collect_metrics=False), db=db)
    app.set_collection_name("test_collection_1")
    assert app.db.count() == 1
@@ -220,13 +231,15 @@ def test_chroma_db_collection_collections_are_persistent():
 def test_chroma_db_collection_parallel_collections():
    db1 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db", collection_name="test_collection_1"))
    app1 = App(
-        AppConfig(collection_name="test_collection_1", collect_metrics=False),
+        config=AppConfig(collect_metrics=False),
-        db_config=ChromaDbConfig(allow_reset=True, dir="test-db"),
+        db=db1,
    )
    db2 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db", collection_name="test_collection_2"))
    app2 = App(
-        AppConfig(collection_name="test_collection_2", collect_metrics=False),
+        config=AppConfig(collect_metrics=False),
-        db_config=ChromaDbConfig(allow_reset=True, dir="test-db"),
+        db=db2,
    )
    # cleanup if any previous tests failed or were interrupted
@@ -251,13 +264,11 @@ def test_chroma_db_collection_parallel_collections():
 def test_chroma_db_collection_ids_share_collections():
-    app1 = App(
+    db1 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
-        AppConfig(id="new_app_id_1", collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db")
+    app1 = App(config=AppConfig(collect_metrics=False), db=db1)
    )
    app1.set_collection_name("one_collection")
-    app2 = App(
+    db2 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
-        AppConfig(id="new_app_id_2", collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db")
+    app2 = App(config=AppConfig(collect_metrics=False), db=db2)
    )
    app2.set_collection_name("one_collection")
    app1.db.collection.add(embeddings=[[0, 0, 0], [1, 1, 1]], ids=["0", "1"])
@@ -272,21 +283,17 @@ def test_chroma_db_collection_ids_share_collections():
 def test_chroma_db_collection_reset():
-    app1 = App(
+    db1 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
-        AppConfig(id="new_app_id_1", collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db")
+    app1 = App(config=AppConfig(collect_metrics=False), db=db1)
    )
    app1.set_collection_name("one_collection")
-    app2 = App(
+    db2 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
-        AppConfig(id="new_app_id_2", collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db")
+    app2 = App(config=AppConfig(collect_metrics=False), db=db2)
    )
    app2.set_collection_name("two_collection")
-    app3 = App(
+    db3 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
-        AppConfig(id="new_app_id_1", collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db")
+    app3 = App(config=AppConfig(collect_metrics=False), db=db3)
    )
    app3.set_collection_name("three_collection")
-    app4 = App(
+    db4 = ChromaDB(config=ChromaDbConfig(allow_reset=True, dir="test-db"))
-        AppConfig(id="new_app_id_4", collect_metrics=False), db_config=ChromaDbConfig(allow_reset=True, dir="test-db")
+    app4 = App(config=AppConfig(collect_metrics=False), db=db4)
    )
    app4.set_collection_name("four_collection")
    app1.db.collection.add(embeddings=[0, 0, 0], ids=["1"])
--- a/tests/vectordb/test_elasticsearch_db.py
+++ b/tests/vectordb/test_elasticsearch_db.py
@@ -13,7 +13,7 @@ class TestEsDB(unittest.TestCase):
    def test_setUp(self, mock_client):
        self.db = ElasticsearchDB(config=ElasticsearchDBConfig(es_url="https://localhost:9200"))
        self.vector_dim = 384
-        app_config = AppConfig(collection_name=False, collect_metrics=False)
+        app_config = AppConfig(collect_metrics=False)
        self.app = App(config=app_config, db=self.db)
        # Assert that the Elasticsearch client is stored in the ElasticsearchDB class.
@@ -22,8 +22,8 @@ class TestEsDB(unittest.TestCase):
    @patch("embedchain.vectordb.elasticsearch.Elasticsearch")
    def test_query(self, mock_client):
        self.db = ElasticsearchDB(config=ElasticsearchDBConfig(es_url="https://localhost:9200"))
-        app_config = AppConfig(collection_name=False, collect_metrics=False)
+        app_config = AppConfig(collect_metrics=False)
-        self.app = App(config=app_config, db=self.db, embedder=GPT4AllEmbedder())
+        self.app = App(config=app_config, db=self.db, embedding_model=GPT4AllEmbedder())
        # Assert that the Elasticsearch client is stored in the ElasticsearchDB class.
        self.assertEqual(self.db.client, mock_client.return_value)
@@ -74,7 +74,7 @@ class TestEsDB(unittest.TestCase):
    @patch("embedchain.vectordb.elasticsearch.Elasticsearch")
    def test_query_with_skip_embedding(self, mock_client):
        self.db = ElasticsearchDB(config=ElasticsearchDBConfig(es_url="https://localhost:9200"))
-        app_config = AppConfig(collection_name=False, collect_metrics=False)
+        app_config = AppConfig(collect_metrics=False)
        self.app = App(config=app_config, db=self.db)
        # Assert that the Elasticsearch client is stored in the ElasticsearchDB class.
--- a/tests/vectordb/test_pinecone.py
+++ b/tests/vectordb/test_pinecone.py
@@ -29,7 +29,7 @@ class TestPinecone:
        # Create a PineconeDB instance
        db = PineconeDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        # Assert that the embedder was set
        assert db.embedder == embedder
@@ -48,7 +48,7 @@ class TestPinecone:
        # Create a PineconeDb instance
        db = PineconeDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=base_embedder)
+        App(config=app_config, db=db, embedding_model=base_embedder)
        # Add some documents to the database
        documents = ["This is a document.", "This is another document."]
@@ -76,7 +76,7 @@ class TestPinecone:
        # Create a PineconeDB instance
        db = PineconeDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=base_embedder)
+        App(config=app_config, db=db, embedding_model=base_embedder)
        # Query the database for documents that are similar to "document"
        input_query = ["document"]
@@ -94,7 +94,7 @@ class TestPinecone:
        # Create a PineconeDb instance
        db = PineconeDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=BaseEmbedder())
+        App(config=app_config, db=db, embedding_model=BaseEmbedder())
        # Reset the database
        db.reset()
--- a/tests/vectordb/test_qdrant.py
+++ b/tests/vectordb/test_qdrant.py
@@ -29,7 +29,7 @@ class TestQdrantDB(unittest.TestCase):
        # Create a Qdrant instance
        db = QdrantDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        self.assertEqual(db.collection_name, "embedchain-store-1526")
        self.assertEqual(db.client, qdrant_client_mock.return_value)
@@ -46,7 +46,7 @@ class TestQdrantDB(unittest.TestCase):
        # Create a Qdrant instance
        db = QdrantDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        resp = db.get(ids=[], where={})
        self.assertEqual(resp, {"ids": []})
@@ -65,7 +65,7 @@ class TestQdrantDB(unittest.TestCase):
        # Create a Qdrant instance
        db = QdrantDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        embeddings = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
        documents = ["This is a test document.", "This is another test document."]
@@ -76,7 +76,7 @@ class TestQdrantDB(unittest.TestCase):
        qdrant_client_mock.return_value.upsert.assert_called_once_with(
            collection_name="embedchain-store-1526",
            points=Batch(
-                ids=["abc", "def"],
+                ids=["def", "ghi"],
                payloads=[
                    {
                        "identifier": "123",
@@ -102,7 +102,7 @@ class TestQdrantDB(unittest.TestCase):
        # Create a Qdrant instance
        db = QdrantDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        # Query for the document.
        db.query(input_query=["This is a test document."], n_results=1, where={"doc_id": "123"}, skip_embedding=True)
@@ -132,7 +132,7 @@ class TestQdrantDB(unittest.TestCase):
        # Create a Qdrant instance
        db = QdrantDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        db.count()
        qdrant_client_mock.return_value.get_collection.assert_called_once_with(collection_name="embedchain-store-1526")
@@ -146,7 +146,7 @@ class TestQdrantDB(unittest.TestCase):
        # Create a Qdrant instance
        db = QdrantDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        db.reset()
        qdrant_client_mock.return_value.delete_collection.assert_called_once_with(
--- a/tests/vectordb/test_weaviate.py
+++ b/tests/vectordb/test_weaviate.py
@@ -29,7 +29,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        expected_class_obj = {
            "classes": [
@@ -96,7 +96,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        expected_client = db._get_or_create_db()
        self.assertEqual(expected_client, weaviate_client_mock)
@@ -115,7 +115,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        db.BATCH_SIZE = 1
        embeddings = [[1, 2, 3], [4, 5, 6]]
@@ -159,7 +159,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        # Query for the document.
        db.query(input_query=["This is a test document."], n_results=1, where={}, skip_embedding=True)
@@ -184,7 +184,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        # Query for the document.
        db.query(input_query=["This is a test document."], n_results=1, where={"doc_id": "123"}, skip_embedding=True)
@@ -210,7 +210,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        # Reset the database.
        db.reset()
@@ -232,7 +232,7 @@ class TestWeaviateDb(unittest.TestCase):
        # Create a Weaviate instance
        db = WeaviateDB()
        app_config = AppConfig(collect_metrics=False)
-        App(config=app_config, db=db, embedder=embedder)
+        App(config=app_config, db=db, embedding_model=embedder)
        # Reset the database.
        db.count()