[Improvement] Parallelize loading of sitemap urls

This commit is contained in:
Deshraj Yadav
2023-11-13 12:53:34 -08:00
parent 1d31b8f7e4
commit a5bf8e9075
3 changed files with 23 additions and 12 deletions

View File

@@ -158,7 +158,11 @@ class ChromaDB(BaseVectorDB):
)
for i in range(0, len(documents), self.BATCH_SIZE):
print("Inserting batches from {} to {} in chromadb".format(i, min(len(documents), i + self.BATCH_SIZE)))
print(
"Inserting batches from {} to {} in vector database.".format(
i, min(len(documents), i + self.BATCH_SIZE)
)
)
if skip_embedding:
self.collection.add(
embeddings=embeddings[i : i + self.BATCH_SIZE],