diff --git a/docs/components/data-sources/overview.mdx b/docs/components/data-sources/overview.mdx index 515f864c..385ce7e9 100644 --- a/docs/components/data-sources/overview.mdx +++ b/docs/components/data-sources/overview.mdx @@ -8,6 +8,7 @@ Embedchain comes with built-in support for various data sources. We handle the c + diff --git a/docs/components/data-sources/youtube-channel.mdx b/docs/components/data-sources/youtube-channel.mdx new file mode 100644 index 00000000..4f54046c --- /dev/null +++ b/docs/components/data-sources/youtube-channel.mdx @@ -0,0 +1,20 @@ +--- +title: '📽️ Youtube Channel' +--- + +To add all the videos from a youtube channel to your app, use the data_type as `youtube_channel`. + + +Make sure you have all the required packages installed before using this data type. You can install them by running the following command in your terminal. + +```bash +pip install -u "embedchain[youtube]" +``` + + +```python +from embedchain import Pipeline as App + +app = App() +app.add("@channel_name", data_type="youtube_channel") +``` diff --git a/docs/components/data-sources/youtube-video.mdx b/docs/components/data-sources/youtube-video.mdx index e5e70c80..2c2ffe6b 100644 --- a/docs/components/data-sources/youtube-video.mdx +++ b/docs/components/data-sources/youtube-video.mdx @@ -2,8 +2,7 @@ title: '📺 Youtube' --- - -To add any youtube video to your app, use the data_type (first argument to `.add()` method) as `youtube_video`. Eg: +To add any youtube video to your app, use the data_type as `youtube_video`. Eg: ```python from embedchain import Pipeline as App diff --git a/embedchain/embedchain.py b/embedchain/embedchain.py index e4144545..cada461d 100644 --- a/embedchain/embedchain.py +++ b/embedchain/embedchain.py @@ -189,8 +189,7 @@ class EmbedChain(JSONSerializable): data_type = detect_datatype(source) # `source_hash` is the md5 hash of the source argument - hash_object = hashlib.md5(str(source).encode("utf-8")) - source_hash = hash_object.hexdigest() + source_hash = hashlib.md5(str(source).encode("utf-8")).hexdigest() self.user_asks.append([source, data_type.value, metadata]) diff --git a/embedchain/loaders/directory_loader.py b/embedchain/loaders/directory_loader.py index 72953f75..bcc225f5 100644 --- a/embedchain/loaders/directory_loader.py +++ b/embedchain/loaders/directory_loader.py @@ -27,6 +27,7 @@ class DirectoryLoader(BaseLoader): if not directory_path.is_dir(): raise ValueError(f"Invalid path: {path}") + logging.info(f"Loading data from directory: {path}") data_list = self._process_directory(directory_path) doc_id = hashlib.sha256((str(data_list) + str(directory_path)).encode()).hexdigest() @@ -44,6 +45,8 @@ class DirectoryLoader(BaseLoader): if file_path.is_file() and (not self.extensions or any(file_path.suffix == ext for ext in self.extensions)): loader = self._predict_loader(file_path) data_list.extend(loader.load_data(str(file_path))["data"]) + elif file_path.is_dir(): + logging.info(f"Loading data from directory: {file_path}") return data_list def _predict_loader(self, file_path: Path) -> BaseLoader: diff --git a/embedchain/utils.py b/embedchain/utils.py index 9a149a87..b41030f3 100644 --- a/embedchain/utils.py +++ b/embedchain/utils.py @@ -358,10 +358,6 @@ def is_valid_json_string(source: str): _ = json.loads(source) return True except json.JSONDecodeError: - logging.error( - "Insert valid string format of JSON. \ - Check the docs to see the supported formats - `https://docs.embedchain.ai/data-sources/json`" - ) return False