diff --git a/embedchain/embedchain.py b/embedchain/embedchain.py index b023cd12..3dfd4f9a 100644 --- a/embedchain/embedchain.py +++ b/embedchain/embedchain.py @@ -96,13 +96,19 @@ class EmbedChain: data_formatter = DataFormatter(data_type, config) self.user_asks.append([data_type, content]) - self.load_and_embed( - data_formatter.loader, - data_formatter.chunker, - content, - metadata, + documents, _metadatas, _ids, new_chunks = self.load_and_embed( + data_formatter.loader, data_formatter.chunker, content, metadata ) + # Send anonymous telemetry + if self.config.collect_metrics: + # it's quicker to check the variable twice than to count words when they won't be submitted. + word_count = sum([len(document.split(" ")) for document in documents]) + + extra_metadata = {"data_type": data_type, "word_count": word_count, "chunks_count": new_chunks} + thread_telemetry = threading.Thread(target=self._send_telemetry_event, args=("add_local", extra_metadata)) + thread_telemetry.start() + def load_and_embed(self, loader: BaseLoader, chunker: BaseChunker, src, metadata=None): """ Loads the data from the given URL, chunks it, and adds it to database.