[Bug fix] Fix issue with gmail loader (#1228)

This commit is contained in:
Deshraj Yadav
2024-01-29 18:36:02 +05:30
committed by GitHub
parent 31bb0e7f0f
commit 2985b667b0
4 changed files with 28 additions and 9 deletions

View File

@@ -39,11 +39,14 @@ class BaseChunker(JSONSerializable):
for data in data_records:
content = data["content"]
meta_data = data["meta_data"]
metadata = data["meta_data"]
# add data type to meta data to allow query using data type
meta_data["data_type"] = self.data_type.value
meta_data["doc_id"] = doc_id
url = meta_data["url"]
metadata["data_type"] = self.data_type.value
metadata["doc_id"] = doc_id
# TODO: Currently defaulting to the src as the url. This is done intentianally since some
# of the data types like 'gmail' loader doesn't have the url in the meta data.
url = metadata.get("url", src)
chunks = self.get_chunks(content)
for chunk in chunks:
@@ -53,7 +56,7 @@ class BaseChunker(JSONSerializable):
id_map[chunk_id] = True
chunk_ids.append(chunk_id)
documents.append(chunk)
metadatas.append(meta_data)
metadatas.append(metadata)
return {
"documents": documents,
"ids": chunk_ids,