[OpenSearch] Add chunks specific to an app_id if present (#765)

This commit is contained in:
Deshraj Yadav
2023-10-04 15:46:22 -07:00
committed by GitHub
parent 352e71461d
commit 64a34cac32
6 changed files with 81 additions and 55 deletions

View File

@@ -85,19 +85,29 @@ class OpenSearchDB(BaseVectorDB):
:return: ids
:type: Set[str]
"""
query = {}
if ids:
query = {"query": {"bool": {"must": [{"ids": {"values": ids}}]}}}
query["query"] = {"bool": {"must": [{"ids": {"values": ids}}]}}
else:
query = {"query": {"bool": {"must": []}}}
query["query"] = {"bool": {"must": []}}
if "app_id" in where:
app_id = where["app_id"]
query["query"]["bool"]["must"].append({"term": {"metadata.app_id": app_id}})
# OpenSearch syntax is different from Elasticsearch
response = self.client.search(index=self._get_index(), body=query, _source=False, size=limit)
response = self.client.search(index=self._get_index(), body=query, _source=True, size=limit)
docs = response["hits"]["hits"]
ids = [doc["_id"] for doc in docs]
return {"ids": set(ids)}
doc_ids = [doc["_source"]["metadata"]["doc_id"] for doc in docs]
# Result is modified for compatibility with other vector databases
# TODO: Add method in vector database to return result in a standard format
result = {"ids": ids, "metadatas": []}
for doc_id in doc_ids:
result["metadatas"].append({"doc_id": doc_id})
return result
def add(
self, embeddings: List[str], documents: List[str], metadatas: List[object], ids: List[str], skip_embedding: bool
@@ -204,6 +214,14 @@ class OpenSearchDB(BaseVectorDB):
# delete index in Es
self.client.indices.delete(index=self._get_index())
def delete(self, where):
"""Deletes a document from the OpenSearch index"""
if "doc_id" not in where:
raise ValueError("doc_id is required to delete a document")
query = {"query": {"bool": {"must": [{"term": {"metadata.doc_id": where["doc_id"]}}]}}}
self.client.delete_by_query(index=self._get_index(), body=query)
def _get_index(self) -> str:
"""Get the OpenSearch index for a collection