14 lines
426 B
Python
14 lines
426 B
Python
from langchain.document_loaders import UnstructuredWordDocumentLoader
|
|
|
|
|
|
class DocFileLoader:
|
|
def load_data(self, url):
|
|
loader = UnstructuredWordDocumentLoader(url)
|
|
output = []
|
|
data = loader.load()
|
|
content = data[0].page_content
|
|
meta_data = data[0].metadata
|
|
meta_data["url"] = "local"
|
|
output.append({"content": content, "meta_data": meta_data})
|
|
return output
|