[bugfix] fix elasticsearch db (#1183)
Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
@@ -99,14 +99,24 @@ class ElasticsearchDB(BaseVectorDB):
|
|||||||
query = {"bool": {"must": [{"ids": {"values": ids}}]}}
|
query = {"bool": {"must": [{"ids": {"values": ids}}]}}
|
||||||
else:
|
else:
|
||||||
query = {"bool": {"must": []}}
|
query = {"bool": {"must": []}}
|
||||||
if "app_id" in where:
|
|
||||||
app_id = where["app_id"]
|
|
||||||
query["bool"]["must"].append({"term": {"metadata.app_id": app_id}})
|
|
||||||
|
|
||||||
response = self.client.search(index=self._get_index(), query=query, _source=False, size=limit)
|
if where:
|
||||||
|
for key, value in where.items():
|
||||||
|
query["bool"]["must"].append({"term": {f"metadata.{key}.keyword": value}})
|
||||||
|
|
||||||
|
response = self.client.search(index=self._get_index(), query=query, _source=True, size=limit)
|
||||||
docs = response["hits"]["hits"]
|
docs = response["hits"]["hits"]
|
||||||
ids = [doc["_id"] for doc in docs]
|
ids = [doc["_id"] for doc in docs]
|
||||||
return {"ids": set(ids)}
|
doc_ids = [doc["_source"]["metadata"]["doc_id"] for doc in docs]
|
||||||
|
|
||||||
|
# Result is modified for compatibility with other vector databases
|
||||||
|
# TODO: Add method in vector database to return result in a standard format
|
||||||
|
result = {"ids": ids, "metadatas": []}
|
||||||
|
|
||||||
|
for doc_id in doc_ids:
|
||||||
|
result["metadatas"].append({"doc_id": doc_id})
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def add(
|
def add(
|
||||||
self,
|
self,
|
||||||
@@ -186,9 +196,11 @@ class ElasticsearchDB(BaseVectorDB):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if "app_id" in where:
|
|
||||||
app_id = where["app_id"]
|
if where:
|
||||||
query["script_score"]["query"] = {"match": {"metadata.app_id": app_id}}
|
for key, value in where.items():
|
||||||
|
query["script_score"]["query"]["bool"]["must"].append({"term": {f"metadata.{key}.keyword": value}})
|
||||||
|
|
||||||
_source = ["text", "metadata"]
|
_source = ["text", "metadata"]
|
||||||
response = self.client.search(index=self._get_index(), query=query, _source=_source, size=n_results)
|
response = self.client.search(index=self._get_index(), query=query, _source=_source, size=n_results)
|
||||||
docs = response["hits"]["hits"]
|
docs = response["hits"]["hits"]
|
||||||
@@ -244,3 +256,11 @@ class ElasticsearchDB(BaseVectorDB):
|
|||||||
# NOTE: The method is preferred to an attribute, because if collection name changes,
|
# NOTE: The method is preferred to an attribute, because if collection name changes,
|
||||||
# it's always up-to-date.
|
# it's always up-to-date.
|
||||||
return f"{self.config.collection_name}_{self.embedder.vector_dimension}".lower()
|
return f"{self.config.collection_name}_{self.embedder.vector_dimension}".lower()
|
||||||
|
|
||||||
|
def delete(self, where):
|
||||||
|
"""Delete documents from the database."""
|
||||||
|
query = {"query": {"bool": {"must": []}}}
|
||||||
|
for key, value in where.items():
|
||||||
|
query["query"]["bool"]["must"].append({"term": {f"metadata.{key}.keyword": value}})
|
||||||
|
self.client.delete_by_query(index=self._get_index(), body=query)
|
||||||
|
self.client.indices.refresh(index=self._get_index())
|
||||||
|
|||||||
Reference in New Issue
Block a user