From 0efbc80ac9723afbecc9290204eb11471c7f968c Mon Sep 17 00:00:00 2001 From: Deven Patel Date: Fri, 15 Dec 2023 06:12:53 +0530 Subject: [PATCH] [Doc update] update mistral example (#1012) --- docs/get-started/faq.mdx | 6 +++++- embedchain/loaders/directory_loader.py | 10 +++++----- embedchain/utils.py | 3 ++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/get-started/faq.mdx b/docs/get-started/faq.mdx index d5899f91..096a7330 100644 --- a/docs/get-started/faq.mdx +++ b/docs/get-started/faq.mdx @@ -13,7 +13,6 @@ Use the model provided on huggingface: `mistralai/Mistral-7B-v0.1` import os from embedchain import Pipeline as App -os.environ["OPENAI_API_KEY"] = "sk-xxx" os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "hf_your_token" app = App.from_config("huggingface.yaml") @@ -27,6 +26,11 @@ llm: max_tokens: 1000 top_p: 0.5 stream: false + +embedder: + provider: huggingface + config: + model: 'sentence-transformers/all-mpnet-base-v2' ``` diff --git a/embedchain/loaders/directory_loader.py b/embedchain/loaders/directory_loader.py index bc670bc6..d4941939 100644 --- a/embedchain/loaders/directory_loader.py +++ b/embedchain/loaders/directory_loader.py @@ -1,14 +1,14 @@ -from pathlib import Path import hashlib import logging -from typing import Optional, Dict, Any +from pathlib import Path +from typing import Any, Dict, Optional -from embedchain.utils import detect_datatype +from embedchain.config import AddConfig +from embedchain.data_formatter.data_formatter import DataFormatter from embedchain.helpers.json_serializable import register_deserializable from embedchain.loaders.base_loader import BaseLoader from embedchain.loaders.local_text import LocalTextLoader -from embedchain.data_formatter.data_formatter import DataFormatter -from embedchain.config import AddConfig +from embedchain.utils import detect_datatype @register_deserializable diff --git a/embedchain/utils.py b/embedchain/utils.py index 7396fb5c..4db82d60 100644 --- a/embedchain/utils.py +++ b/embedchain/utils.py @@ -196,7 +196,8 @@ def detect_datatype(source: Any) -> DataType: formatted_source = format_source(str(source), 30) if url: - from langchain.document_loaders.youtube import ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS + from langchain.document_loaders.youtube import \ + ALLOWED_NETLOCK as YOUTUBE_ALLOWED_NETLOCS if url.netloc in YOUTUBE_ALLOWED_NETLOCS: logging.debug(f"Source of `{formatted_source}` detected as `youtube_video`.")