Update notebooks to use dict instead of yaml and remove dataloaders (#1075)

This commit is contained in:
Sidharth Mohanty
2023-12-29 21:57:46 +05:30
committed by GitHub
parent 904baac153
commit 6df63d9ca7
16 changed files with 230 additions and 554 deletions

View File

@@ -31,7 +31,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders]" "!pip install embedchain"
] ]
}, },
{ {
@@ -60,45 +60,13 @@
"os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\"" "os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: anthropic\n",
" config:\n",
" model: 'claude-instant-1'\n",
" temperature: 0.5\n",
" top_p: 1\n",
" stream: false\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('anthropic.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3: Create embedchain app and define your config"
] ]
}, },
{ {
@@ -109,7 +77,15 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"anthropic.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"anthropic\",\n",
" \"config\": {\n",
" \"model\": \"claude-instant-1\",\n",
" \"temperature\": 0.5,\n",
" \"top_p\": 1,\n",
" \"stream\": False\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -118,7 +94,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -143,7 +119,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -23,7 +23,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders]" "!pip install embedchain"
] ]
}, },
{ {

View File

@@ -26,7 +26,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders]" "!pip install embedchain"
] ]
}, },
{ {
@@ -54,47 +54,13 @@
"os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your Vector Database config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"vectordb:\n",
" provider: chroma\n",
" config:\n",
" collection_name: 'my-collection'\n",
" # CHANGE THE BELOW TWO LINES!\n",
" # pass remote database variables - host and port\n",
" host: your-chromadb-url.com\n",
" port: 5200\n",
" allow_reset: true\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('chromadb.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -105,7 +71,15 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"chromadb.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"chroma\",\n",
" \"config\": {\n",
" \"collection_name\": \"my-collection\",\n",
" \"host\": \"your-chromadb-url.com\",\n",
" \"port\": 5200,\n",
" \"allow_reset\": True\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -114,7 +88,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -134,7 +108,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -30,7 +30,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,cohere]" "!pip install embedchain[cohere]"
] ]
}, },
{ {
@@ -59,46 +59,13 @@
"os.environ[\"COHERE_API_KEY\"] = \"xxx\"" "os.environ[\"COHERE_API_KEY\"] = \"xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: cohere\n",
" config:\n",
" model: gptd-instruct-tft\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" top_p: 1\n",
" stream: false\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('cohere.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -114,7 +81,16 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"cohere.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"cohere\",\n",
" \"config\": {\n",
" \"model\": \"gptd-instruct-tft\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 1,\n",
" \"stream\": False\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -123,7 +99,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -148,7 +124,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -26,7 +26,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,elasticsearch]" "!pip install embedchain[elasticsearch]"
] ]
}, },
{ {
@@ -54,45 +54,13 @@
"os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your Vector Database config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"vectordb:\n",
" provider: elasticsearch\n",
" config:\n",
" collection_name: 'es-index'\n",
" es_url: your-elasticsearch-url.com\n",
" allow_reset: true\n",
" api_key: xxx\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('elasticsearch.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -103,7 +71,15 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"elasticsearch.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"elasticsearch\",\n",
" \"config\": {\n",
" \"collection_name\": \"es-index\",\n",
" \"es_url\": \"your-elasticsearch-url.com\",\n",
" \"allow_reset\": True,\n",
" \"api_key\": \"xxx\"\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -112,7 +88,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -132,7 +108,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -30,7 +30,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,opensource]" "!pip install embedchain[opensource]"
] ]
}, },
{ {
@@ -55,51 +55,13 @@
"from embedchain import App" "from embedchain import App"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: gpt4all\n",
" config:\n",
" model: 'orca-mini-3b-gguf2-q4_0.gguf'\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" top_p: 1\n",
" stream: false\n",
"\n",
"embedder:\n",
" provider: gpt4all\n",
" config:\n",
" model: 'all-MiniLM-L6-v2'\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('gpt4all.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -114,7 +76,24 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"gpt4all.yaml\")" "app = App.from_config(config={\n",
" \"llm\": {\n",
" \"provider\": \"gpt4all\",\n",
" \"config\": {\n",
" \"model\": \"orca-mini-3b-gguf2-q4_0.gguf\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 1,\n",
" \"stream\": False\n",
" }\n",
" },\n",
" \"embedder\": {\n",
" \"provider\": \"gpt4all\",\n",
" \"config\": {\n",
" \"model\": \"all-MiniLM-L6-v2\"\n",
" }\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -123,7 +102,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -148,7 +127,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -31,7 +31,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,huggingface_hub,opensource]" "!pip install embedchain[huggingface_hub,opensource]"
] ]
}, },
{ {
@@ -59,51 +59,13 @@
"os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\"" "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: huggingface\n",
" config:\n",
" model: 'google/flan-t5-xxl'\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" top_p: 0.8\n",
" stream: false\n",
"\n",
"embedder:\n",
" provider: huggingface\n",
" config:\n",
" model: 'sentence-transformers/all-mpnet-base-v2'\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('huggingface.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -114,7 +76,24 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"huggingface.yaml\")" "app = App.from_config(config={\n",
" \"llm\": {\n",
" \"provider\": \"huggingface\",\n",
" \"config\": {\n",
" \"model\": \"google/flan-t5-xxl\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 0.8,\n",
" \"stream\": False\n",
" }\n",
" },\n",
" \"embedder\": {\n",
" \"provider\": \"huggingface\",\n",
" \"config\": {\n",
" \"model\": \"sentence-transformers/all-mpnet-base-v2\"\n",
" }\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -123,7 +102,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -148,7 +127,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -31,7 +31,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders]" "!pip install embedchain"
] ]
}, },
{ {
@@ -60,45 +60,13 @@
"os.environ[\"JINACHAT_API_KEY\"] = \"xxx\"" "os.environ[\"JINACHAT_API_KEY\"] = \"xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: jina\n",
" config:\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" top_p: 1\n",
" stream: false\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('jina.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -114,7 +82,15 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"jina.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"jina\",\n",
" \"config\": {\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 1,\n",
" \"stream\": False\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -123,7 +99,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -148,7 +124,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -30,7 +30,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,llama2]" "!pip install embedchain[llama2]"
] ]
}, },
{ {
@@ -59,46 +59,13 @@
"os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\"" "os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: llama2\n",
" config:\n",
" model: 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" top_p: 0.5\n",
" stream: false\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('llama2.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -109,7 +76,16 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"llama2.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"llama2\",\n",
" \"config\": {\n",
" \"model\": \"a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 0.5,\n",
" \"stream\": False\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -118,7 +94,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -143,7 +119,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -24,50 +24,13 @@
"- ollama serve" "- ollama serve"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-2: Define your llm and embedding model config (Going all out local inference, no need for OpenAI API Key)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: ollama\n",
" config:\n",
" model: 'llama2'\n",
" temperature: 0.5\n",
" top_p: 1\n",
" stream: true\n",
"\n",
"embedder:\n",
" provider: huggingface\n",
" config:\n",
" model: 'BAAI/bge-small-en-v1.5'\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('ollama.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-3 Create embedchain app based on the config" "### Step-2 Create embedchain app and define your config (all local inference)"
] ]
}, },
{ {
@@ -93,7 +56,23 @@
], ],
"source": [ "source": [
"from embedchain import App\n", "from embedchain import App\n",
"app = App.from_config(config_path=\"ollama.yaml\")" "app = App.from_config(config={\n",
" \"llm\": {\n",
" \"provider\": \"ollama\",\n",
" \"config\": {\n",
" \"model\": \"llama2\",\n",
" \"temperature\": 0.5,\n",
" \"top_p\": 1,\n",
" \"stream\": True\n",
" }\n",
" },\n",
" \"embedder\": {\n",
" \"provider\": \"huggingface\",\n",
" \"config\": {\n",
" \"model\": \"BAAI/bge-small-en-v1.5\"\n",
" }\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -102,7 +81,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-4: Add data sources to your app" "### Step-3: Add data sources to your app"
] ]
}, },
{ {
@@ -159,7 +138,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-5: All set. Now start asking questions related to your data" "### Step-4: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -31,7 +31,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders]" "!pip install embedchain"
] ]
}, },
{ {
@@ -59,51 +59,13 @@
"os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: openai\n",
" config:\n",
" model: gpt-3.5-turbo\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" top_p: 1\n",
" stream: false\n",
"\n",
"embedder:\n",
" provider: openai\n",
" config:\n",
" model: text-embedding-ada-002\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('openai.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -114,7 +76,24 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"openai.yaml\")" "app = App.from_config(config={\n",
" \"llm\": {\n",
" \"provider\": \"openai\",\n",
" \"config\": {\n",
" \"model\": \"gpt-3.5-turbo\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"top_p\": 1,\n",
" \"stream\": False\n",
" }\n",
" },\n",
" \"embedder\": {\n",
" \"provider\": \"openai\",\n",
" \"config\": {\n",
" \"model\": \"text-embedding-ada-002\"\n",
" }\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -123,7 +102,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -143,7 +122,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {
@@ -172,7 +151,8 @@
"name": "python3" "name": "python3"
}, },
"language_info": { "language_info": {
"name": "python" "name": "python",
"version": "3.11.6"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -26,7 +26,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,opensearch]" "!pip install embedchain[opensearch]"
] ]
}, },
{ {
@@ -54,49 +54,13 @@
"os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your Vector Database config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"vectordb:\n",
" provider: opensearch\n",
" config:\n",
" opensearch_url: 'your-opensearch-url.com'\n",
" http_auth:\n",
" - admin\n",
" - admin\n",
" vector_dimension: 1536\n",
" collection_name: 'my-app'\n",
" use_ssl: false\n",
" verify_certs: false\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('opensearch.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -107,7 +71,17 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"opensearch.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"opensearch\",\n",
" \"config\": {\n",
" \"opensearch_url\": \"your-opensearch-url.com\",\n",
" \"http_auth\": [\"admin\", \"admin\"],\n",
" \"vector_dimension\": 1536,\n",
" \"collection_name\": \"my-app\",\n",
" \"use_ssl\": False,\n",
" \"verify_certs\": False\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -116,7 +90,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -136,7 +110,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -26,7 +26,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,pinecone]" "!pip install embedchain[pinecone]"
] ]
}, },
{ {
@@ -56,44 +56,13 @@
"os.environ[\"PINECONE_ENV\"] = \"xxx\"" "os.environ[\"PINECONE_ENV\"] = \"xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your Vector Database config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"vectordb:\n",
" provider: pinecone\n",
" config:\n",
" metric: cosine\n",
" vector_dimension: 768\n",
" collection_name: pc-index\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('pinecone.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -104,7 +73,14 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"pinecone.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"pinecone\",\n",
" \"config\": {\n",
" \"metric\": \"cosine\",\n",
" \"vector_dimension\": 768,\n",
" \"collection_name\": \"pc-index\"\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -113,7 +89,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -133,7 +109,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -30,7 +30,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,together]" "!pip install embedchain[together]"
] ]
}, },
{ {
@@ -59,44 +59,13 @@
"os.environ[\"TOGETHER_API_KEY\"] = \"\"" "os.environ[\"TOGETHER_API_KEY\"] = \"\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: together\n",
" config:\n",
" model: mistralai/Mixtral-8x7B-Instruct-v0.1\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('together.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -112,7 +81,14 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"together.yaml\")" "app = App.from_config(config={\n",
" \"provider\": \"together\",\n",
" \"config\": {\n",
" \"model\": \"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -121,7 +97,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -178,7 +154,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {

View File

@@ -30,7 +30,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install embedchain[dataloaders,vertexai]" "!pip install embedchain[vertexai]"
] ]
}, },
{ {
@@ -58,50 +58,13 @@
"os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"" "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
] ]
}, },
{
"cell_type": "markdown",
"metadata": {
"id": "Ns6RhPfbiitr"
},
"source": [
"### Step-3: Define your llm and embedding model config"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S9CkxVjriotB"
},
"outputs": [],
"source": [
"config = \"\"\"\n",
"llm:\n",
" provider: vertexai\n",
" config:\n",
" model: 'chat-bison'\n",
" temperature: 0.5\n",
" max_tokens: 1000\n",
" stream: false\n",
"\n",
"embedder:\n",
" provider: vertexai\n",
" config:\n",
" model: 'textembedding-gecko'\n",
"\"\"\"\n",
"\n",
"# Write the multi-line string to a YAML file\n",
"with open('vertexai.yaml', 'w') as file:\n",
" file.write(config)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "PGt6uPLIi1CS" "id": "PGt6uPLIi1CS"
}, },
"source": [ "source": [
"### Step-4 Create embedchain app based on the config" "### Step-3 Create embedchain app and define your config"
] ]
}, },
{ {
@@ -117,7 +80,23 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"app = App.from_config(config_path=\"vertexai.yaml\")" "app = App.from_config(config={\n",
" \"llm\": {\n",
" \"provider\": \"vertexai\",\n",
" \"config\": {\n",
" \"model\": \"chat-bison\",\n",
" \"temperature\": 0.5,\n",
" \"max_tokens\": 1000,\n",
" \"stream\": False\n",
" }\n",
" },\n",
" \"embedder\": {\n",
" \"provider\": \"vertexai\",\n",
" \"config\": {\n",
" \"model\": \"textembedding-gecko\"\n",
" }\n",
" }\n",
"})"
] ]
}, },
{ {
@@ -126,7 +105,7 @@
"id": "XNXv4yZwi7ef" "id": "XNXv4yZwi7ef"
}, },
"source": [ "source": [
"### Step-5: Add data sources to your app" "### Step-4: Add data sources to your app"
] ]
}, },
{ {
@@ -146,7 +125,7 @@
"id": "_7W6fDeAjMAP" "id": "_7W6fDeAjMAP"
}, },
"source": [ "source": [
"### Step-6: All set. Now start asking questions related to your data" "### Step-5: All set. Now start asking questions related to your data"
] ]
}, },
{ {