From b47405e1bdb23983115955c9f5adb04d4f3204ac Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Wed, 18 Oct 2023 17:12:51 -0700 Subject: [PATCH] Update version to v0.0.74 (#825) --- .github/workflows/ci.yml | 5 +---- embedchain/data_formatter/data_formatter.py | 4 ++-- embedchain/loaders/json.py | 5 +++-- embedchain/models/data_type.py | 2 +- pyproject.toml | 2 +- tests/chunkers/test_chunkers.py | 2 +- tests/loaders/test_json.py | 7 ++++--- 7 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2fcbbb07..87ef282e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,9 +24,7 @@ jobs: run: poetry install --all-extras - name: Lint with ruff run: make lint - - name: Test with pytest - run: make test - - name: Generate coverage report + - name: Run tests and generate coverage report run: make coverage - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 @@ -34,4 +32,3 @@ jobs: file: coverage.xml env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - diff --git a/embedchain/data_formatter/data_formatter.py b/embedchain/data_formatter/data_formatter.py index 86af515d..e45414f6 100644 --- a/embedchain/data_formatter/data_formatter.py +++ b/embedchain/data_formatter/data_formatter.py @@ -6,11 +6,11 @@ from embedchain.chunkers.json import JSONChunker from embedchain.chunkers.mdx import MdxChunker from embedchain.chunkers.notion import NotionChunker from embedchain.chunkers.pdf_file import PdfFileChunker -from embedchain.chunkers.unstructured_file import UnstructuredFileChunker from embedchain.chunkers.qna_pair import QnaPairChunker from embedchain.chunkers.sitemap import SitemapChunker from embedchain.chunkers.table import TableChunker from embedchain.chunkers.text import TextChunker +from embedchain.chunkers.unstructured_file import UnstructuredFileChunker from embedchain.chunkers.web_page import WebPageChunker from embedchain.chunkers.xml import XmlChunker from embedchain.chunkers.youtube_video import YoutubeVideoChunker @@ -28,10 +28,10 @@ from embedchain.loaders.local_text import LocalTextLoader from embedchain.loaders.mdx import MdxLoader from embedchain.loaders.pdf_file import PdfFileLoader from embedchain.loaders.sitemap import SitemapLoader +from embedchain.loaders.unstructured_file import UnstructuredLoader from embedchain.loaders.web_page import WebPageLoader from embedchain.loaders.xml import XmlLoader from embedchain.loaders.youtube_video import YoutubeVideoLoader -from embedchain.loaders.unstructured_file import UnstructuredLoader from embedchain.models.data_type import DataType diff --git a/embedchain/loaders/json.py b/embedchain/loaders/json.py index 1104a054..b32c34f3 100644 --- a/embedchain/loaders/json.py +++ b/embedchain/loaders/json.py @@ -1,6 +1,7 @@ import hashlib -from langchain.document_loaders.json_loader import JSONLoader as LcJSONLoader +from langchain.document_loaders.json_loader import \ + JSONLoader as LangchainJSONLoader from embedchain.loaders.base_loader import BaseLoader @@ -13,7 +14,7 @@ class JSONLoader(BaseLoader): """Load a json file. Each data point is a key value pair.""" data = [] data_content = [] - loader = LcJSONLoader(content, text_content=False, jq_schema=langchain_json_jq_schema) + loader = LangchainJSONLoader(content, text_content=False, jq_schema=langchain_json_jq_schema) docs = loader.load() for doc in docs: meta_data = doc.metadata diff --git a/embedchain/models/data_type.py b/embedchain/models/data_type.py index 2c646a9e..9b3eb80d 100644 --- a/embedchain/models/data_type.py +++ b/embedchain/models/data_type.py @@ -25,7 +25,7 @@ class IndirectDataType(Enum): CSV = "csv" MDX = "mdx" IMAGES = "images" - UNSTRUCTURED = 'unstructured' + UNSTRUCTURED = "unstructured" JSON = "json" diff --git a/pyproject.toml b/pyproject.toml index 85d78916..411673e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.0.73" +version = "0.0.74" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" authors = ["Taranjeet Singh, Deshraj Yadav"] license = "Apache License" diff --git a/tests/chunkers/test_chunkers.py b/tests/chunkers/test_chunkers.py index cfe63cc2..29ddd28c 100644 --- a/tests/chunkers/test_chunkers.py +++ b/tests/chunkers/test_chunkers.py @@ -1,5 +1,6 @@ from embedchain.chunkers.docs_site import DocsSiteChunker from embedchain.chunkers.docx_file import DocxFileChunker +from embedchain.chunkers.json import JSONChunker from embedchain.chunkers.mdx import MdxChunker from embedchain.chunkers.notion import NotionChunker from embedchain.chunkers.pdf_file import PdfFileChunker @@ -10,7 +11,6 @@ from embedchain.chunkers.text import TextChunker from embedchain.chunkers.web_page import WebPageChunker from embedchain.chunkers.xml import XmlChunker from embedchain.chunkers.youtube_video import YoutubeVideoChunker -from embedchain.chunkers.json import JSONChunker from embedchain.config.add_config import ChunkerConfig chunker_config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len) diff --git a/tests/loaders/test_json.py b/tests/loaders/test_json.py index 8b90b753..4ffe925b 100644 --- a/tests/loaders/test_json.py +++ b/tests/loaders/test_json.py @@ -2,7 +2,8 @@ import hashlib from unittest.mock import patch from langchain.docstore.document import Document -from langchain.document_loaders.json_loader import JSONLoader as LcJSONLoader +from langchain.document_loaders.json_loader import \ + JSONLoader as LangchainJSONLoader from embedchain.loaders.json import JSONLoader @@ -12,10 +13,10 @@ def test_load_data(): Document(page_content="content1", metadata={"seq_num": 1}), Document(page_content="content2", metadata={"seq_num": 2}), ] - with patch.object(LcJSONLoader, "load", return_value=mock_document): + with patch.object(LangchainJSONLoader, "load", return_value=mock_document): content = "temp.json" - result = JsonLoader.load_data(content) + result = JSONLoader.load_data(content) assert "doc_id" in result assert "data" in result