feat: Adding app id in metadata while reading and writing to vector db (#189)

This commit is contained in:
Shashank Srivastava
2023-07-16 00:48:04 +05:30
committed by GitHub
parent fd97fb268a
commit d4b8542207
3 changed files with 20 additions and 12 deletions

View File

@@ -97,9 +97,11 @@ class EmbedChain:
metadatas = embeddings_data["metadatas"]
ids = embeddings_data["ids"]
# get existing ids, and discard doc if any common id exist.
where={"app_id": self.config.id} if self.config.id is not None else {}
# where={"url": src}
existing_docs = self.collection.get(
ids=ids,
# where={"url": src}
where=where, # optional filter
)
existing_ids = set(existing_docs["ids"])
@@ -113,6 +115,10 @@ class EmbedChain:
ids = list(data_dict.keys())
documents, metadatas = zip(*data_dict.values())
# Add app id in metadatas so that they can be queried on later
if (self.config.id is not None):
metadatas = [{**m, "app_id": self.config.id} for m in metadatas]
chunks_before_addition = self.count()
@@ -144,11 +150,11 @@ class EmbedChain:
:param config: The query configuration.
:return: The content of the document that matched your query.
"""
where = {"app_id": self.config.id} if self.config.id is not None else {} # optional filter
result = self.collection.query(
query_texts=[
input_query,
],
query_texts=[input_query,],
n_results=config.number_documents,
where=where,
)
results_formatted = self._format_result(result)
contents = [result[0].page_content for result in results_formatted]