feat: add support for mdx file (#604)

This commit is contained in:
Taranjeet Singh
2023-09-12 16:43:18 -07:00
committed by GitHub
parent ac08638a63
commit 36b26e08c3
5 changed files with 63 additions and 0 deletions

28
embedchain/loaders/mdx.py Normal file
View File

@@ -0,0 +1,28 @@
import hashlib
from langchain.document_loaders import PyPDFLoader
from embedchain.helper.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
from embedchain.utils import clean_string
@register_deserializable
class MdxLoader(BaseLoader):
def load_data(self, url):
"""Load data from a mdx file."""
with open(url, 'r', encoding="utf-8") as infile:
content = infile.read()
meta_data = {
"url": url,
}
doc_id = hashlib.sha256((content + url).encode()).hexdigest()
return {
"doc_id": doc_id,
"data": [
{
"content": content,
"meta_data": meta_data,
}
],
}