Deploy remaining bots and fix schema validation (#927)

2023-11-10 00:14:47 +05:30
parent 0d568c758b
commit 14712cac88
13 changed files with 110 additions and 26 deletions
--- a/configs/opensearch.yaml
+++ b/configs/opensearch.yaml
@@ -1,7 +1,7 @@
 app:
  config:
    id: 'my-app'
-    log_level: 'WARN'
+    log_level: 'WARNING'
    collect_metrics: true
    collection_name: 'my-app'

@@ -30,4 +30,4 @@ embedder:
  provider: openai
  config:
    model: 'text-embedding-ada-002'
-    deployment_name: null
+    deployment_name: 'my-app'
--- a/docs/examples/telegram_bot.mdx
+++ b/docs/examples/telegram_bot.mdx
@@ -1,21 +1,46 @@
 ---
-title: '📱 Telegram Bot'
+title: "📱 Telegram Bot"
 ---

 ### 🖼️ Template Setup

- Fork [this](https://replit.com/@taranjeetio/EC-Telegram-Bot-Template?v=1#README.md) replit template.
- Set your `OPENAI_API_KEY` in Secrets.
 - Open the Telegram app and search for the `BotFather` user.
 - Start a chat with BotFather and use the `/newbot` command to create a new bot.
 - Follow the instructions to choose a name and username for your bot.
 - Once the bot is created, BotFather will provide you with a unique token for your bot.
- Set this token as `TELEGRAM_BOT_TOKEN` in Secrets.
+
+<Tabs>
+    <Tab title="docker">
+        ```bash
+        docker run --name telegram-bot -e OPENAI_API_KEY=sk-xxx -e TELEGRAM_BOT_TOKEN=xxx -p 8000:8000 embedchain/telegram-bot
+        ```
+
+    <Note>
+    If you wish to use **Docker**, you would need to host your bot on a server.
+    You can use [ngrok](https://ngrok.com/) to expose your localhost to the
+    internet and then set the webhook using the ngrok URL.
+    </Note>
+
+    </Tab>
+    <Tab title="replit">
+    <Card>
+        Fork <ins>**[this](https://replit.com/@taranjeetio/EC-Telegram-Bot-Template?v=1#README.md)**</ins> replit template.
+    </Card>
+
+    - Set your `OPENAI_API_KEY` in Secrets.
+    - Set the unique token as `TELEGRAM_BOT_TOKEN` in Secrets.
+
+    </Tab>
+
+</Tabs>
+
 - Click on `Run` in the replit container and a URL will get generated for your bot.
 - Now set your webhook by running the following link in your browser:
+
 ```url
 https://api.telegram.org/bot<Your_Telegram_Bot_Token>/setWebhook?url=<Replit_Generated_URL>
 ```
+
 - When you get a successful response in your browser, your bot is ready to be used.

 ### 🚀 Usage Instructions
--- a/docs/examples/whatsapp_bot.mdx
+++ b/docs/examples/whatsapp_bot.mdx
@@ -12,10 +12,19 @@ pip install --upgrade embedchain

 2. Launch your WhatsApp bot:

+<Tabs>
+    <Tab title="docker">
+        ```bash
+        docker run --name whatsapp-bot -e OPENAI_API_KEY=sk-xxx -p 8000:8000 embedchain/whatsapp-bot
+        ```
+    </Tab>
+    <Tab title="python">
+        ```bash
+        python -m embedchain.bots.whatsapp --port 5000
+        ```
+    </Tab>
+</Tabs>

-```bash
-python -m embedchain.bots.whatsapp --port 5000
-```

 If your bot needs to be accessible online, use your machine's public IP or DNS. Otherwise, employ a proxy server like [ngrok](https://ngrok.com/) to make your local bot accessible.

--- a/embedchain/utils.py
+++ b/embedchain/utils.py
@@ -138,8 +138,7 @@ def detect_datatype(source: Any) -> DataType:
    formatted_source = format_source(str(source), 30)

    if url:
-        from langchain.document_loaders.youtube import \
-            ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS
+        from langchain.document_loaders.youtube import ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS

        if url.netloc in YOUTUBE_ALLOWED_NETLOCS:
            logging.debug(f"Source of `{formatted_source}` detected as `youtube_video`.")
@@ -309,7 +308,7 @@ def validate_yaml_config(config_data):
                    "gpt4all",
                    "jina",
                    "llama2",
-                    "vertex_ai",
+                    "vertexai",
                ),
                Optional("config"): {
                    Optional("model"): str,
@@ -329,23 +328,17 @@ def validate_yaml_config(config_data):
                Optional("provider"): Or(
                    "chroma", "elasticsearch", "opensearch", "pinecone", "qdrant", "weaviate", "zilliz"
                ),
-                Optional("config"): {
-                    Optional("collection_name"): str,
-                    Optional("dir"): str,
-                    Optional("allow_reset"): bool,
-                    Optional("host"): str,
-                    Optional("port"): str,
-                },
+                Optional("config"): object,  # TODO: add particular config schema for each provider
            },
            Optional("embedder"): {
-                Optional("provider"): Or("openai", "gpt4all", "huggingface", "vertexai"),
+                Optional("provider"): Or("openai", "gpt4all", "huggingface", "vertexai", "azure_openai"),
                Optional("config"): {
                    Optional("model"): Optional(str),
                    Optional("deployment_name"): Optional(str),
                },
            },
            Optional("embedding_model"): {
-                Optional("provider"): Or("openai", "gpt4all", "huggingface", "vertexai"),
+                Optional("provider"): Or("openai", "gpt4all", "huggingface", "vertexai", "azure_openai"),
                Optional("config"): {
                    Optional("model"): str,
                    Optional("deployment_name"): str,
--- a/examples/telegram_bot/.env.example
+++ b/examples/telegram_bot/.env.example
@@ -0,0 +1,2 @@
+TELEGRAM_BOT_TOKEN=
+OPENAI_API_KEY=
--- a/examples/telegram_bot/Dockerfile
+++ b/examples/telegram_bot/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.11-slim
+
+WORKDIR /usr/src/
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["python", "telegram_bot.py"]
--- a/examples/telegram_bot/telegram_bot.py
+++ b/examples/telegram_bot/telegram_bot.py
@@ -63,4 +63,4 @@ def send_message(chat_id, text):


 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=5000, debug=False)
+    app.run(host="0.0.0.0", port=8000, debug=False)
--- a/examples/telegram_bot/variables.env
+++ b/examples/telegram_bot/variables.env
@@ -1,2 +0,0 @@
-TELEGRAM_BOT_TOKEN=""
-OPENAI_API_KEY=""
--- a/examples/whatsapp_bot/.env.example
+++ b/examples/whatsapp_bot/.env.example
@@ -0,0 +1 @@
+OPENAI_API_KEY=
--- a/examples/whatsapp_bot/Dockerfile
+++ b/examples/whatsapp_bot/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.11-slim
+
+WORKDIR /usr/src/
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["python", "whatsapp_bot.py"]
--- a/examples/whatsapp_bot/variables.env
+++ b/examples/whatsapp_bot/variables.env
@@ -1 +0,0 @@
-OPENAI_API_KEY=""
--- a/examples/whatsapp_bot/whatsapp_bot.py
+++ b/examples/whatsapp_bot/whatsapp_bot.py
@@ -48,4 +48,4 @@ def query(message):


 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=5000, debug=False)
+    app.run(host="0.0.0.0", port=8000, debug=False)
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -0,0 +1,35 @@
+import yaml
+from embedchain.utils import validate_yaml_config
+
+CONFIG_YAMLS = [
+    "configs/anthropic.yaml",
+    "configs/azure_openai.yaml",
+    "configs/chroma.yaml",
+    "configs/chunker.yaml",
+    "configs/cohere.yaml",
+    "configs/full-stack.yaml",
+    "configs/gpt4all.yaml",
+    "configs/huggingface.yaml",
+    "configs/jina.yaml",
+    "configs/llama2.yaml",
+    "configs/opensearch.yaml",
+    "configs/opensource.yaml",
+    "configs/pinecone.yaml",
+    "configs/vertexai.yaml",
+    "configs/weaviate.yaml",
+]
+
+
+class TestAllConfigYamls:
+    def test_all_config_yamls(self):
+        """Test that all config yamls are valid."""
+        for config_yaml in CONFIG_YAMLS:
+            with open(config_yaml, "r") as f:
+                config = yaml.safe_load(f)
+            assert config is not None
+
+            try:
+                validate_yaml_config(config)
+            except Exception as e:
+                print(f"Error in {config_yaml}: {e}")
+                raise e