Clean json data before loading (#895)
Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import re
|
||||
import requests
|
||||
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
from embedchain.utils import clean_string
|
||||
|
||||
VALID_URL_PATTERN = "^https:\/\/[0-9A-z.]+.[0-9A-z.]+.[a-z]+\/.*\.json$"
|
||||
|
||||
@@ -49,7 +50,7 @@ class JSONLoader(BaseLoader):
|
||||
|
||||
docs = loader.load_data(json_data)
|
||||
for doc in docs:
|
||||
doc_content = doc.text
|
||||
doc_content = clean_string(doc.text)
|
||||
data.append({"content": doc_content, "meta_data": {"url": content}})
|
||||
data_content.append(doc_content)
|
||||
doc_id = hashlib.sha256((content + ", ".join(data_content)).encode()).hexdigest()
|
||||
|
||||
Reference in New Issue
Block a user