[Feature] Add Postgres data loader (#918)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deven Patel
2023-11-08 23:50:46 -08:00
committed by GitHub
parent f7dd65a3de
commit 7de8d85199
12 changed files with 285 additions and 27 deletions

View File

@@ -6,6 +6,7 @@ from embedchain.chunkers.mdx import MdxChunker
from embedchain.chunkers.notion import NotionChunker
from embedchain.chunkers.openapi import OpenAPIChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.postgres import PostgresChunker
from embedchain.chunkers.qna_pair import QnaPairChunker
from embedchain.chunkers.sitemap import SitemapChunker
from embedchain.chunkers.table import TableChunker
@@ -33,6 +34,7 @@ chunker_common_config = {
JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
OpenAPIChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
GmailChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
PostgresChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len},
}

View File

@@ -63,5 +63,5 @@ def test_add_after_reset(app_instance, mocker):
def test_add_with_incorrect_content(app_instance, mocker):
content = [{"foo": "bar"}]
with pytest.raises(ValueError):
with pytest.raises(TypeError):
app_instance.add(content, data_type="json")

View File

@@ -0,0 +1,60 @@
from unittest.mock import MagicMock
import psycopg
import pytest
from embedchain.loaders.postgres import PostgresLoader
@pytest.fixture
def postgres_loader(mocker):
with mocker.patch.object(psycopg, "connect"):
config = {"url": "postgres://user:password@localhost:5432/database"}
loader = PostgresLoader(config=config)
yield loader
def test_postgres_loader_initialization(postgres_loader):
assert postgres_loader.connection is not None
assert postgres_loader.cursor is not None
def test_postgres_loader_invalid_config():
with pytest.raises(ValueError, match="Must provide the valid config. Received: None"):
PostgresLoader(config=None)
def test_load_data(postgres_loader, monkeypatch):
mock_cursor = MagicMock()
monkeypatch.setattr(postgres_loader, "cursor", mock_cursor)
query = "SELECT * FROM table"
mock_cursor.fetchall.return_value = [(1, "data1"), (2, "data2")]
result = postgres_loader.load_data(query)
assert "doc_id" in result
assert "data" in result
assert len(result["data"]) == 2
assert result["data"][0]["meta_data"]["url"] == f"postgres_query-({query})"
assert result["data"][1]["meta_data"]["url"] == f"postgres_query-({query})"
assert mock_cursor.execute.called_with(query)
def test_load_data_exception(postgres_loader, monkeypatch):
mock_cursor = MagicMock()
monkeypatch.setattr(postgres_loader, "cursor", mock_cursor)
_ = "SELECT * FROM table"
mock_cursor.execute.side_effect = Exception("Mocked exception")
with pytest.raises(
ValueError, match=r"Failed to load data using query=SELECT \* FROM table with: Mocked exception"
):
postgres_loader.load_data("SELECT * FROM table")
def test_close_connection(postgres_loader):
postgres_loader.close_connection()
assert postgres_loader.cursor is None
assert postgres_loader.connection is None