diff --git a/docs/components/data-sources/directory.mdx b/docs/components/data-sources/directory.mdx new file mode 100644 index 00000000..f37e373e --- /dev/null +++ b/docs/components/data-sources/directory.mdx @@ -0,0 +1,41 @@ +--- +title: '📁 Directory' +--- + +To use an entire directory as data source, just add `data_type` as `directory` and pass in the path of the local directory. + +### Without customization + +```python +import os +from embedchain import Pipeline as App + +os.environ["OPENAI_API_KEY"] = "sk-xxx" + +app = App() +app.add("./elon-musk", data_type="directory") +response = app.query("list all files") +print(response) +# Answer: Files are elon-musk-1.txt, elon-musk-2.pdf. +``` + +### Customization + +```python +import os +from embedchain import Pipeline as App +from embedchain.loaders.directory_loader import DirectoryLoader + +os.environ["OPENAI_API_KEY"] = "sk-xxx" +lconfig = { + "recursive": True, + "extensions": [".txt"] +} +loader = DirectoryLoader(config=lconfig) +app = App() +app.add("./elon-musk", loader=loader) +response = app.query("what are all the files related to?") +print(response) + +# Answer: The files are related to Elon Musk. +``` diff --git a/docs/components/data-sources/overview.mdx b/docs/components/data-sources/overview.mdx index ec9ecade..515f864c 100644 --- a/docs/components/data-sources/overview.mdx +++ b/docs/components/data-sources/overview.mdx @@ -29,6 +29,7 @@ Embedchain comes with built-in support for various data sources. We handle the c +
diff --git a/docs/mint.json b/docs/mint.json index 257a73bc..9e21810e 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -116,7 +116,8 @@ "components/data-sources/discourse", "components/data-sources/substack", "components/data-sources/discord", - "components/data-sources/beehiiv" + "components/data-sources/beehiiv", + "components/data-sources/directory" ] }, "components/data-sources/data-type-handling"