Update version to v0.0.74 (#825)
This commit is contained in:
5
.github/workflows/ci.yml
vendored
5
.github/workflows/ci.yml
vendored
@@ -24,9 +24,7 @@ jobs:
|
|||||||
run: poetry install --all-extras
|
run: poetry install --all-extras
|
||||||
- name: Lint with ruff
|
- name: Lint with ruff
|
||||||
run: make lint
|
run: make lint
|
||||||
- name: Test with pytest
|
- name: Run tests and generate coverage report
|
||||||
run: make test
|
|
||||||
- name: Generate coverage report
|
|
||||||
run: make coverage
|
run: make coverage
|
||||||
- name: Upload coverage reports to Codecov
|
- name: Upload coverage reports to Codecov
|
||||||
uses: codecov/codecov-action@v3
|
uses: codecov/codecov-action@v3
|
||||||
@@ -34,4 +32,3 @@ jobs:
|
|||||||
file: coverage.xml
|
file: coverage.xml
|
||||||
env:
|
env:
|
||||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,11 @@ from embedchain.chunkers.json import JSONChunker
|
|||||||
from embedchain.chunkers.mdx import MdxChunker
|
from embedchain.chunkers.mdx import MdxChunker
|
||||||
from embedchain.chunkers.notion import NotionChunker
|
from embedchain.chunkers.notion import NotionChunker
|
||||||
from embedchain.chunkers.pdf_file import PdfFileChunker
|
from embedchain.chunkers.pdf_file import PdfFileChunker
|
||||||
from embedchain.chunkers.unstructured_file import UnstructuredFileChunker
|
|
||||||
from embedchain.chunkers.qna_pair import QnaPairChunker
|
from embedchain.chunkers.qna_pair import QnaPairChunker
|
||||||
from embedchain.chunkers.sitemap import SitemapChunker
|
from embedchain.chunkers.sitemap import SitemapChunker
|
||||||
from embedchain.chunkers.table import TableChunker
|
from embedchain.chunkers.table import TableChunker
|
||||||
from embedchain.chunkers.text import TextChunker
|
from embedchain.chunkers.text import TextChunker
|
||||||
|
from embedchain.chunkers.unstructured_file import UnstructuredFileChunker
|
||||||
from embedchain.chunkers.web_page import WebPageChunker
|
from embedchain.chunkers.web_page import WebPageChunker
|
||||||
from embedchain.chunkers.xml import XmlChunker
|
from embedchain.chunkers.xml import XmlChunker
|
||||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||||
@@ -28,10 +28,10 @@ from embedchain.loaders.local_text import LocalTextLoader
|
|||||||
from embedchain.loaders.mdx import MdxLoader
|
from embedchain.loaders.mdx import MdxLoader
|
||||||
from embedchain.loaders.pdf_file import PdfFileLoader
|
from embedchain.loaders.pdf_file import PdfFileLoader
|
||||||
from embedchain.loaders.sitemap import SitemapLoader
|
from embedchain.loaders.sitemap import SitemapLoader
|
||||||
|
from embedchain.loaders.unstructured_file import UnstructuredLoader
|
||||||
from embedchain.loaders.web_page import WebPageLoader
|
from embedchain.loaders.web_page import WebPageLoader
|
||||||
from embedchain.loaders.xml import XmlLoader
|
from embedchain.loaders.xml import XmlLoader
|
||||||
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||||
from embedchain.loaders.unstructured_file import UnstructuredLoader
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
from langchain.document_loaders.json_loader import JSONLoader as LcJSONLoader
|
from langchain.document_loaders.json_loader import \
|
||||||
|
JSONLoader as LangchainJSONLoader
|
||||||
|
|
||||||
from embedchain.loaders.base_loader import BaseLoader
|
from embedchain.loaders.base_loader import BaseLoader
|
||||||
|
|
||||||
@@ -13,7 +14,7 @@ class JSONLoader(BaseLoader):
|
|||||||
"""Load a json file. Each data point is a key value pair."""
|
"""Load a json file. Each data point is a key value pair."""
|
||||||
data = []
|
data = []
|
||||||
data_content = []
|
data_content = []
|
||||||
loader = LcJSONLoader(content, text_content=False, jq_schema=langchain_json_jq_schema)
|
loader = LangchainJSONLoader(content, text_content=False, jq_schema=langchain_json_jq_schema)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
meta_data = doc.metadata
|
meta_data = doc.metadata
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class IndirectDataType(Enum):
|
|||||||
CSV = "csv"
|
CSV = "csv"
|
||||||
MDX = "mdx"
|
MDX = "mdx"
|
||||||
IMAGES = "images"
|
IMAGES = "images"
|
||||||
UNSTRUCTURED = 'unstructured'
|
UNSTRUCTURED = "unstructured"
|
||||||
JSON = "json"
|
JSON = "json"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "embedchain"
|
name = "embedchain"
|
||||||
version = "0.0.73"
|
version = "0.0.74"
|
||||||
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
|
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
|
||||||
authors = ["Taranjeet Singh, Deshraj Yadav"]
|
authors = ["Taranjeet Singh, Deshraj Yadav"]
|
||||||
license = "Apache License"
|
license = "Apache License"
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from embedchain.chunkers.docs_site import DocsSiteChunker
|
from embedchain.chunkers.docs_site import DocsSiteChunker
|
||||||
from embedchain.chunkers.docx_file import DocxFileChunker
|
from embedchain.chunkers.docx_file import DocxFileChunker
|
||||||
|
from embedchain.chunkers.json import JSONChunker
|
||||||
from embedchain.chunkers.mdx import MdxChunker
|
from embedchain.chunkers.mdx import MdxChunker
|
||||||
from embedchain.chunkers.notion import NotionChunker
|
from embedchain.chunkers.notion import NotionChunker
|
||||||
from embedchain.chunkers.pdf_file import PdfFileChunker
|
from embedchain.chunkers.pdf_file import PdfFileChunker
|
||||||
@@ -10,7 +11,6 @@ from embedchain.chunkers.text import TextChunker
|
|||||||
from embedchain.chunkers.web_page import WebPageChunker
|
from embedchain.chunkers.web_page import WebPageChunker
|
||||||
from embedchain.chunkers.xml import XmlChunker
|
from embedchain.chunkers.xml import XmlChunker
|
||||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||||
from embedchain.chunkers.json import JSONChunker
|
|
||||||
from embedchain.config.add_config import ChunkerConfig
|
from embedchain.config.add_config import ChunkerConfig
|
||||||
|
|
||||||
chunker_config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len)
|
chunker_config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len)
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import hashlib
|
|||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.document_loaders.json_loader import JSONLoader as LcJSONLoader
|
from langchain.document_loaders.json_loader import \
|
||||||
|
JSONLoader as LangchainJSONLoader
|
||||||
|
|
||||||
from embedchain.loaders.json import JSONLoader
|
from embedchain.loaders.json import JSONLoader
|
||||||
|
|
||||||
@@ -12,10 +13,10 @@ def test_load_data():
|
|||||||
Document(page_content="content1", metadata={"seq_num": 1}),
|
Document(page_content="content1", metadata={"seq_num": 1}),
|
||||||
Document(page_content="content2", metadata={"seq_num": 2}),
|
Document(page_content="content2", metadata={"seq_num": 2}),
|
||||||
]
|
]
|
||||||
with patch.object(LcJSONLoader, "load", return_value=mock_document):
|
with patch.object(LangchainJSONLoader, "load", return_value=mock_document):
|
||||||
content = "temp.json"
|
content = "temp.json"
|
||||||
|
|
||||||
result = JsonLoader.load_data(content)
|
result = JSONLoader.load_data(content)
|
||||||
|
|
||||||
assert "doc_id" in result
|
assert "doc_id" in result
|
||||||
assert "data" in result
|
assert "data" in result
|
||||||
|
|||||||
Reference in New Issue
Block a user