diff --git a/embedchain/vectordb/opensearch.py b/embedchain/vectordb/opensearch.py index 2bc5afcb..75222f4c 100644 --- a/embedchain/vectordb/opensearch.py +++ b/embedchain/vectordb/opensearch.py @@ -144,6 +144,11 @@ class OpenSearchDB(BaseVectorDB): http_auth=self.config.http_auth, use_ssl=True, ) + + pre_filter = {"match_all": {}} # default + if "app_id" in where: + app_id = where["app_id"] + pre_filter = {"bool": {"must": [{"term": {"metadata.app_id": app_id}}]}} docs = docsearch.similarity_search( input_query, search_type="script_scoring", @@ -151,6 +156,8 @@ class OpenSearchDB(BaseVectorDB): vector_field="embeddings", text_field="text", metadata_field="metadata", + pre_filter=pre_filter, + k=n_results, ) contents = [doc.page_content for doc in docs] return contents diff --git a/pyproject.toml b/pyproject.toml index dbb14c88..a0e1b972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.0.62" +version = "0.0.63" description = "embedchain is a framework to easily create LLM powered bots over any dataset" authors = ["Taranjeet Singh"] license = "Apache License"