Fix batch_size for vectordb (#1449)

This commit is contained in:
Dev Khant
2024-06-28 23:48:22 +05:30
committed by GitHub
parent 0a78198bb5
commit 50c0285cb2
15 changed files with 49 additions and 26 deletions

View File

@@ -42,6 +42,7 @@ class ChromaDB(BaseVectorDB):
self.settings = Settings(anonymized_telemetry=False)
self.settings.allow_reset = self.config.allow_reset if hasattr(self.config, "allow_reset") else False
self.batch_size = self.config.batch_size
if self.config.chroma_settings:
for key, value in self.config.chroma_settings.items():
if hasattr(self.settings, key):
@@ -153,12 +154,13 @@ class ChromaDB(BaseVectorDB):
" Ids size: {}".format(len(documents), len(metadatas), len(ids))
)
for i in tqdm(range(0, len(documents), self.config.batch_size), desc="Inserting batches in chromadb"):
for i in tqdm(range(0, len(documents), self.batch_size), desc="Inserting batches in chromadb"):
self.collection.add(
documents=documents[i : i + self.config.batch_size],
metadatas=metadatas[i : i + self.config.batch_size],
ids=ids[i : i + self.config.batch_size],
documents=documents[i : i + self.batch_size],
metadatas=metadatas[i : i + self.batch_size],
ids=ids[i : i + self.batch_size],
)
self.config
@staticmethod
def _format_result(results: QueryResult) -> list[tuple[Document, float]]: