Add support for OpenSearch as vector database (#725)

This commit is contained in:
Deshraj Yadav
2023-09-28 14:54:42 -07:00
committed by GitHub
parent 9951b58005
commit 414c69fd62
22 changed files with 326 additions and 82 deletions

View File

@@ -61,16 +61,13 @@ class EmbedChain(JSONSerializable):
"""
self.config = config
# Add subclasses
## Llm
# Llm
self.llm = llm
## Database
# Database has support for config assignment for backwards compatibility
if db is None and (not hasattr(self.config, "db") or self.config.db is None):
raise ValueError("App requires Database.")
self.db = db or self.config.db
## Embedder
# Embedder
if embedder is None:
raise ValueError("App requires Embedder.")
self.embedder = embedder
@@ -256,7 +253,6 @@ class EmbedChain(JSONSerializable):
)
return self.add(source=source, data_type=data_type, metadata=metadata, config=config)
def _get_existing_doc_id(self, chunker: BaseChunker, src: Any):
"""
Get id of existing document for a given source, based on the data type
@@ -395,10 +391,10 @@ class EmbedChain(JSONSerializable):
return list(documents), metadatas, ids, 0
# Count before, to calculate a delta in the end.
chunks_before_addition = self.count()
chunks_before_addition = self.db.count()
self.db.add(documents=documents, metadatas=metadatas, ids=ids)
count_new_chunks = self.count() - chunks_before_addition
count_new_chunks = self.db.count() - chunks_before_addition
print((f"Successfully saved {src} ({chunker.data_type}). New chunks count: {count_new_chunks}"))
return list(documents), metadatas, ids, count_new_chunks