Add GPT4Vision Image loader (#1089)

Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
This commit is contained in:
Sidharth Mohanty
2024-01-02 03:57:23 +05:30
committed by GitHub
parent 367d6b70e2
commit c62663f2e4
29 changed files with 291 additions and 714 deletions

View File

@@ -35,7 +35,7 @@ class TestEsDB(unittest.TestCase):
ids = ["doc_1", "doc_2"]
# Add the data to the database.
self.db.add(embeddings, documents, metadatas, ids, skip_embedding=False)
self.db.add(embeddings, documents, metadatas, ids)
search_response = {
"hits": {
@@ -60,63 +60,17 @@ class TestEsDB(unittest.TestCase):
# Query the database for the documents that are most similar to the query "This is a document".
query = ["This is a document"]
results_without_citations = self.db.query(query, n_results=2, where={}, skip_embedding=False)
results_without_citations = self.db.query(query, n_results=2, where={})
expected_results_without_citations = ["This is a document.", "This is another document."]
self.assertEqual(results_without_citations, expected_results_without_citations)
results_with_citations = self.db.query(query, n_results=2, where={}, skip_embedding=False, citations=True)
results_with_citations = self.db.query(query, n_results=2, where={}, citations=True)
expected_results_with_citations = [
("This is a document.", {"url": "url_1", "doc_id": "doc_id_1", "score": 0.9}),
("This is another document.", {"url": "url_2", "doc_id": "doc_id_2", "score": 0.8}),
]
self.assertEqual(results_with_citations, expected_results_with_citations)
@patch("embedchain.vectordb.elasticsearch.Elasticsearch")
def test_query_with_skip_embedding(self, mock_client):
self.db = ElasticsearchDB(config=ElasticsearchDBConfig(es_url="https://localhost:9200"))
app_config = AppConfig(collect_metrics=False)
self.app = App(config=app_config, db=self.db)
# Assert that the Elasticsearch client is stored in the ElasticsearchDB class.
self.assertEqual(self.db.client, mock_client.return_value)
# Create some dummy data.
embeddings = [[1, 2, 3], [4, 5, 6]]
documents = ["This is a document.", "This is another document."]
metadatas = [{"url": "url_1", "doc_id": "doc_id_1"}, {"url": "url_2", "doc_id": "doc_id_2"}]
ids = ["doc_1", "doc_2"]
# Add the data to the database.
self.db.add(embeddings, documents, metadatas, ids, skip_embedding=True)
search_response = {
"hits": {
"hits": [
{
"_source": {"text": "This is a document.", "metadata": {"url": "url_1", "doc_id": "doc_id_1"}},
"_score": 0.9,
},
{
"_source": {
"text": "This is another document.",
"metadata": {"url": "url_2", "doc_id": "doc_id_2"},
},
"_score": 0.8,
},
]
}
}
# Configure the mock client to return the mocked response.
mock_client.return_value.search.return_value = search_response
# Query the database for the documents that are most similar to the query "This is a document".
query = ["This is a document"]
results = self.db.query(query, n_results=2, where={}, skip_embedding=True)
# Assert that the results are correct.
self.assertEqual(results, ["This is a document.", "This is another document."])
def test_init_without_url(self):
# Make sure it's not loaded from env
try: