diff --git a/embedchain/embedchain.py b/embedchain/embedchain.py index 83ae259b..dfbf3e66 100644 --- a/embedchain/embedchain.py +++ b/embedchain/embedchain.py @@ -352,7 +352,7 @@ class EmbedChain(JSONSerializable): # get existing ids, and discard doc if any common id exist. where = {"url": src} if self.config.id is not None: - where.update({"metadata.app_id": self.config.id}) + where["app_id"] = self.config.id db_result = self.db.get(ids=ids, where=where) # optional filter existing_ids = set(db_result["ids"]) diff --git a/embedchain/vectordb/opensearch.py b/embedchain/vectordb/opensearch.py index 4121d53b..b3f1983d 100644 --- a/embedchain/vectordb/opensearch.py +++ b/embedchain/vectordb/opensearch.py @@ -93,7 +93,7 @@ class OpenSearchDB(BaseVectorDB): if "app_id" in where: app_id = where["app_id"] - query["query"]["bool"]["must"].append({"term": {"metadata.app_id": app_id}}) + query["query"]["bool"]["must"].append({"term": {"metadata.app_id.keyword": app_id}}) # OpenSearch syntax is different from Elasticsearch response = self.client.search(index=self._get_index(), body=query, _source=True, size=limit) @@ -168,7 +168,7 @@ class OpenSearchDB(BaseVectorDB): pre_filter = {"match_all": {}} # default if "app_id" in where: app_id = where["app_id"] - pre_filter = {"bool": {"must": [{"term": {"metadata.app_id": app_id}}]}} + pre_filter = {"bool": {"must": [{"term": {"metadata.app_id.keyword": app_id}}]}} docs = docsearch.similarity_search( input_query, search_type="script_scoring", diff --git a/pyproject.toml b/pyproject.toml index 01de1c04..dbc960b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.0.66" +version = "0.0.67" description = "embedchain is a framework to easily create LLM powered bots over any dataset" authors = ["Taranjeet Singh"] license = "Apache License"