[Feature]: Add posthog anonymous telemetry and update docs (#867)
This commit is contained in:
@@ -24,7 +24,7 @@ Once you have obtained the key, you can use it like this:
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ['OPENAI_API_KEY'] = 'xxx'
|
||||
|
||||
@@ -52,7 +52,7 @@ To use Azure OpenAI embedding model, you have to set some of the azure openai re
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
|
||||
@@ -90,7 +90,7 @@ GPT4All supports generating high quality embeddings of arbitrary length document
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load embedding model configuration from config.yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -119,7 +119,7 @@ Hugging Face supports generating embeddings of arbitrary length documents of tex
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load embedding model configuration from config.yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -150,7 +150,7 @@ Embedchain supports Google's VertexAI embeddings model through a simple interfac
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load embedding model configuration from config.yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
|
||||
@@ -26,7 +26,7 @@ Once you have obtained the key, you can use it like this:
|
||||
|
||||
```python
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ['OPENAI_API_KEY'] = 'xxx'
|
||||
|
||||
@@ -41,7 +41,7 @@ If you are looking to configure the different parameters of the LLM, you can do
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ['OPENAI_API_KEY'] = 'xxx'
|
||||
|
||||
@@ -71,7 +71,7 @@ To use Azure OpenAI model, you have to set some of the azure openai related envi
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
|
||||
@@ -110,7 +110,7 @@ To use anthropic's model, please set the `ANTHROPIC_API_KEY` which you find on t
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["ANTHROPIC_API_KEY"] = "xxx"
|
||||
|
||||
@@ -147,7 +147,7 @@ Once you have the API key, you are all set to use it with Embedchain.
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["COHERE_API_KEY"] = "xxx"
|
||||
|
||||
@@ -180,7 +180,7 @@ GPT4all is a free-to-use, locally running, privacy-aware chatbot. No GPU or inte
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load llm configuration from config.yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -212,7 +212,7 @@ Once you have the key, load the app using the config yaml file:
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["JINACHAT_API_KEY"] = "xxx"
|
||||
# load llm configuration from config.yaml file
|
||||
@@ -248,7 +248,7 @@ Once you have the token, load the app using the config yaml file:
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
|
||||
|
||||
@@ -278,7 +278,7 @@ Once you have the token, load the app using the config yaml file:
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["REPLICATE_API_TOKEN"] = "xxx"
|
||||
|
||||
@@ -305,7 +305,7 @@ Setup Google Cloud Platform application credentials by following the instruction
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load llm configuration from config.yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
|
||||
@@ -22,7 +22,7 @@ Utilizing a vector database alongside Embedchain is a seamless process. All you
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load chroma configuration from yaml file
|
||||
app = App.from_config(yaml_path="config1.yaml")
|
||||
@@ -61,7 +61,7 @@ pip install --upgrade 'embedchain[elasticsearch]'
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load elasticsearch configuration from yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -89,7 +89,7 @@ pip install --upgrade 'embedchain[opensearch]'
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load opensearch configuration from yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -125,7 +125,7 @@ Set the Zilliz environment variables `ZILLIZ_CLOUD_URI` and `ZILLIZ_CLOUD_TOKEN`
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ['ZILLIZ_CLOUD_URI'] = 'https://xxx.zillizcloud.com'
|
||||
os.environ['ZILLIZ_CLOUD_TOKEN'] = 'xxx'
|
||||
@@ -164,7 +164,7 @@ In order to use Pinecone as vector database, set the environment variables `PINE
|
||||
<CodeGroup>
|
||||
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load pinecone configuration from yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -187,7 +187,7 @@ In order to use Qdrant as a vector database, set the environment variables `QDRA
|
||||
|
||||
<CodeGroup>
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load qdrant configuration from yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
@@ -207,7 +207,7 @@ In order to use Weaviate as a vector database, set the environment variables `WE
|
||||
|
||||
<CodeGroup>
|
||||
```python main.py
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
# load weaviate configuration from yaml file
|
||||
app = App.from_config(yaml_path="config.yaml")
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '📊 CSV'
|
||||
To add any csv file, use the data_type as `csv`. `csv` allows remote urls and conventional file paths. Headers are included for each line, so if you have an `age` column, `18` will be added as `age: 18`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
app.add('https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv', data_type="csv")
|
||||
|
||||
@@ -35,7 +35,7 @@ Default behavior is to create a persistent vector db in the directory **./db**.
|
||||
Create a local index:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
naval_chat_bot = App()
|
||||
naval_chat_bot.add("https://www.youtube.com/watch?v=3qHkcs3kG44")
|
||||
@@ -45,7 +45,7 @@ naval_chat_bot.add("https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Alma
|
||||
You can reuse the local index with the same code, but without adding new documents:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
naval_chat_bot = App()
|
||||
print(naval_chat_bot.query("What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?"))
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '📚🌐 Code documentation'
|
||||
To add any code documentation website as a loader, use the data_type as `docs_site`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
app.add("https://docs.embedchain.ai/", data_type="docs_site")
|
||||
|
||||
@@ -7,7 +7,7 @@ title: '📄 Docx file'
|
||||
To add any doc/docx file, use the data_type as `docx`. `docx` allows remote urls and conventional file paths. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
app.add('https://example.com/content/intro.docx', data_type="docx")
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '📝 Mdx file'
|
||||
To add any `.mdx` file to your app, use the data_type (first argument to `.add()` method) as `mdx`. Note that this supports support mdx file present on machine, so this should be a file path. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
app.add('path/to/file.mdx', data_type='mdx')
|
||||
|
||||
@@ -8,7 +8,7 @@ To load a notion page, use the data_type as `notion`. Since it is hard to automa
|
||||
The next argument must **end** with the `notion page id`. The id is a 32-character string. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '📰 PDF file'
|
||||
To add any pdf file, use the data_type as `pdf_file`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '❓💬 Queston and answer pair'
|
||||
QnA pair is a local data type. To supply your own QnA pair, use the data_type as `qna_pair` and enter a tuple. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '🗺️ Sitemap'
|
||||
Add all web pages from an xml-sitemap. Filters non-text files. Use the data_type as `sitemap`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ title: '📝 Text'
|
||||
Text is a local data type. To supply your own text, use the data_type as `text` and enter a string. The text is not processed, this can be very versatile. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ title: '🌐📄 Web page'
|
||||
To add any web page, use the data_type as `web_page`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ title: '🧾 XML file'
|
||||
To add any xml file, use the data_type as `xml`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ title: '🎥📺 Youtube video'
|
||||
To add any youtube video to your app, use the data_type (first argument to `.add()` method) as `youtube_video`. Eg:
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
app.add('a_valid_youtube_url_here', data_type='youtube_video')
|
||||
|
||||
@@ -9,7 +9,7 @@ description: 'Collections of all the frequently asked questions'
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ['OPENAI_API_KEY'] = 'xxx'
|
||||
|
||||
@@ -36,7 +36,7 @@ llm:
|
||||
|
||||
```python main.py
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ['OPENAI_API_KEY'] = 'xxx'
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ You can add data from different data sources using the `.add()` method. Then, si
|
||||
If you want to create a Naval Ravikant bot with a YouTube video, a book in PDF format, two blog posts, and a question and answer pair, all you need to do is add the respective links. Embedchain will take care of the rest, creating a bot for you.
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
naval_bot = App()
|
||||
# Add online data
|
||||
|
||||
@@ -16,22 +16,22 @@ Creating an app involves 3 steps:
|
||||
<Steps>
|
||||
<Step title="⚙️ Import app instance">
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
app = App()
|
||||
```
|
||||
</Step>
|
||||
<Step title="🗃️ Add data sources">
|
||||
```python
|
||||
# Add different data sources
|
||||
elon_bot.add("https://en.wikipedia.org/wiki/Elon_Musk")
|
||||
elon_bot.add("https://www.forbes.com/profile/elon-musk")
|
||||
app.add("https://en.wikipedia.org/wiki/Elon_Musk")
|
||||
app.add("https://www.forbes.com/profile/elon-musk")
|
||||
# You can also add local data sources such as pdf, csv files etc.
|
||||
# elon_bot.add("/path/to/file.pdf")
|
||||
# app.add("/path/to/file.pdf")
|
||||
```
|
||||
</Step>
|
||||
<Step title="💬 Query or chat on your data and get answers">
|
||||
<Step title="💬 Query or chat or search context on your data">
|
||||
```python
|
||||
elon_bot.query("What is the net worth of Elon Musk today?")
|
||||
app.query("What is the net worth of Elon Musk today?")
|
||||
# Answer: The net worth of Elon Musk today is $258.7 billion.
|
||||
```
|
||||
</Step>
|
||||
@@ -41,18 +41,18 @@ Putting it together, you can run your first app using the following code. Make s
|
||||
|
||||
```python
|
||||
import os
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "xxx"
|
||||
elon_bot = App()
|
||||
app = App()
|
||||
|
||||
# Add different data sources
|
||||
elon_bot.add("https://en.wikipedia.org/wiki/Elon_Musk")
|
||||
elon_bot.add("https://www.forbes.com/profile/elon-musk")
|
||||
app.add("https://en.wikipedia.org/wiki/Elon_Musk")
|
||||
app.add("https://www.forbes.com/profile/elon-musk")
|
||||
# You can also add local data sources such as pdf, csv files etc.
|
||||
# elon_bot.add("/path/to/file.pdf")
|
||||
# app.add("/path/to/file.pdf")
|
||||
|
||||
response = elon_bot.query("What is the net worth of Elon Musk today?")
|
||||
response = app.query("What is the net worth of Elon Musk today?")
|
||||
print(response)
|
||||
# Answer: The net worth of Elon Musk today is $258.7 billion.
|
||||
```
|
||||
|
||||
@@ -39,7 +39,7 @@ os.environ['LANGCHAIN_PROJECT] = <your-project>
|
||||
|
||||
|
||||
```python
|
||||
from embedchain import App
|
||||
from embedchain import Pipeline as App
|
||||
|
||||
app = App()
|
||||
app.add("https://en.wikipedia.org/wiki/Elon_Musk")
|
||||
|
||||
@@ -71,10 +71,6 @@
|
||||
"group": "Examples",
|
||||
"pages": ["examples/full_stack", "examples/api_server", "examples/discord_bot", "examples/slack_bot", "examples/telegram_bot", "examples/whatsapp_bot", "examples/poe_bot"]
|
||||
},
|
||||
{
|
||||
"group": "Pipelines",
|
||||
"pages": ["pipelines/quickstart"]
|
||||
},
|
||||
{
|
||||
"group": "Community",
|
||||
"pages": [
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
---
|
||||
title: '🚀 Pipelines'
|
||||
description: '💡 Start building LLM powered data pipelines in 1 minute'
|
||||
---
|
||||
|
||||
Embedchain lets you build data pipelines on your own data sources and deploy it in production in less than a minute. It can load, index, retrieve, and sync any unstructured data.
|
||||
|
||||
Install embedchain python package:
|
||||
|
||||
```bash
|
||||
pip install embedchain
|
||||
```
|
||||
|
||||
Creating a pipeline involves 3 steps:
|
||||
|
||||
<Steps>
|
||||
<Step title="⚙️ Import pipeline instance">
|
||||
```python
|
||||
from embedchain import Pipeline
|
||||
p = Pipeline(name="Elon Musk")
|
||||
```
|
||||
</Step>
|
||||
|
||||
<Step title="🗃️ Add data sources">
|
||||
```python
|
||||
# Add different data sources
|
||||
p.add("https://en.wikipedia.org/wiki/Elon_Musk")
|
||||
p.add("https://www.forbes.com/profile/elon-musk")
|
||||
# You can also add local data sources such as pdf, csv files etc.
|
||||
# p.add("/path/to/file.pdf")
|
||||
```
|
||||
</Step>
|
||||
<Step title="💬 Deploy your pipeline to Embedchain platform">
|
||||
```python
|
||||
p.deploy()
|
||||
```
|
||||
</Step>
|
||||
</Steps>
|
||||
|
||||
That's it. Now, head to the [Embedchain platform](https://app.embedchain.ai) and your pipeline is available there. Make sure to set the `OPENAI_API_KEY` 🔑 environment variable in the code.
|
||||
|
||||
After you deploy your pipeline to Embedchain platform, you can still add more data sources and update the pipeline multiple times.
|
||||
|
||||
Here is a Google Colab notebook for you to get started: [](https://colab.research.google.com/drive/1YVXaBO4yqlHZY4ho67GCJ6aD4CHNiScD?usp=sharing)
|
||||
Reference in New Issue
Block a user