[Feature Improvement] Update JSON Loader to support loading data from more sources (#898)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deven Patel
2023-11-03 10:00:27 -07:00
committed by GitHub
parent e2546a653d
commit 53037b5ed8
6 changed files with 166 additions and 67 deletions

View File

@@ -1,3 +1,4 @@
import json
import logging
import os
import re
@@ -261,6 +262,24 @@ def detect_datatype(source: Any) -> DataType:
# TODO: check if source is gmail query
# check if the source is valid json string
if is_valid_json_string(source):
logging.debug(f"Source of `{formatted_source}` detected as `json`.")
return DataType.JSON
# Use text as final fallback.
logging.debug(f"Source of `{formatted_source}` detected as `text`.")
return DataType.TEXT
# check if the source is valid json string
def is_valid_json_string(source: str):
try:
_ = json.loads(source)
return True
except json.JSONDecodeError:
logging.error(
"Insert valid string format of JSON. \
Check the docs to see the supported formats - `https://docs.embedchain.ai/data-sources/json`"
)
return False