From 39861ec1e882833fc1a0d24825ca353ce5661495 Mon Sep 17 00:00:00 2001 From: cachho Date: Mon, 14 Aug 2023 23:42:10 +0200 Subject: [PATCH] fix: add telemetry to add_local (#437) --- embedchain/embedchain.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/embedchain/embedchain.py b/embedchain/embedchain.py index b023cd12..3dfd4f9a 100644 --- a/embedchain/embedchain.py +++ b/embedchain/embedchain.py @@ -96,13 +96,19 @@ class EmbedChain: data_formatter = DataFormatter(data_type, config) self.user_asks.append([data_type, content]) - self.load_and_embed( - data_formatter.loader, - data_formatter.chunker, - content, - metadata, + documents, _metadatas, _ids, new_chunks = self.load_and_embed( + data_formatter.loader, data_formatter.chunker, content, metadata ) + # Send anonymous telemetry + if self.config.collect_metrics: + # it's quicker to check the variable twice than to count words when they won't be submitted. + word_count = sum([len(document.split(" ")) for document in documents]) + + extra_metadata = {"data_type": data_type, "word_count": word_count, "chunks_count": new_chunks} + thread_telemetry = threading.Thread(target=self._send_telemetry_event, args=("add_local", extra_metadata)) + thread_telemetry.start() + def load_and_embed(self, loader: BaseLoader, chunker: BaseChunker, src, metadata=None): """ Loads the data from the given URL, chunks it, and adds it to database.