From 6ae33d04b869ea29e4f98393b50b1267acea5f27 Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Fri, 29 Sep 2023 15:19:10 -0700 Subject: [PATCH] [OpenSearch] Add support for filtering docs based on app_id in opensearch db (#729) --- embedchain/vectordb/opensearch.py | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/embedchain/vectordb/opensearch.py b/embedchain/vectordb/opensearch.py index 2bc5afcb..75222f4c 100644 --- a/embedchain/vectordb/opensearch.py +++ b/embedchain/vectordb/opensearch.py @@ -144,6 +144,11 @@ class OpenSearchDB(BaseVectorDB): http_auth=self.config.http_auth, use_ssl=True, ) + + pre_filter = {"match_all": {}} # default + if "app_id" in where: + app_id = where["app_id"] + pre_filter = {"bool": {"must": [{"term": {"metadata.app_id": app_id}}]}} docs = docsearch.similarity_search( input_query, search_type="script_scoring", @@ -151,6 +156,8 @@ class OpenSearchDB(BaseVectorDB): vector_field="embeddings", text_field="text", metadata_field="metadata", + pre_filter=pre_filter, + k=n_results, ) contents = [doc.page_content for doc in docs] return contents diff --git a/pyproject.toml b/pyproject.toml index dbb14c88..a0e1b972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.0.62" +version = "0.0.63" description = "embedchain is a framework to easily create LLM powered bots over any dataset" authors = ["Taranjeet Singh"] license = "Apache License"