From ab9598d00a8039487c440a6752ace96327d26973 Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Thu, 26 Oct 2023 09:51:51 -0700 Subject: [PATCH] Update version to v0.0.77 (#851) --- embedchain/llm/gpt4all.py | 8 ++----- embedchain/pipeline.py | 47 +++++++++++++++++---------------------- pyproject.toml | 2 +- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/embedchain/llm/gpt4all.py b/embedchain/llm/gpt4all.py index 99d0e15f..c644b135 100644 --- a/embedchain/llm/gpt4all.py +++ b/embedchain/llm/gpt4all.py @@ -1,7 +1,7 @@ from typing import Iterable, Optional, Union -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.stdout import StdOutCallbackHandler +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from embedchain.config import BaseLlmConfig from embedchain.helper.json_serializable import register_deserializable @@ -48,11 +48,7 @@ class GPT4ALLLlm(BaseLlm): if config.top_p: kwargs["top_p"] = config.top_p - callbacks = None - if config.stream: - callbacks = [StreamingStdOutCallbackHandler()] - else: - callbacks =[StdOutCallbackHandler()] + callbacks = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()] response = self.instance.generate(prompts=messages, callbacks=callbacks, **kwargs) answer = "" diff --git a/embedchain/pipeline.py b/embedchain/pipeline.py index 7e5f93e5..3e1ee385 100644 --- a/embedchain/pipeline.py +++ b/embedchain/pipeline.py @@ -17,6 +17,7 @@ from embedchain.embedder.openai import OpenAIEmbedder from embedchain.factory import EmbedderFactory, LlmFactory, VectorDBFactory from embedchain.helper.json_serializable import register_deserializable from embedchain.llm.base import BaseLlm +from embedchain.llm.openai import OpenAILlm from embedchain.vectordb.base import BaseVectorDB from embedchain.vectordb.chroma import ChromaDB @@ -77,11 +78,12 @@ class Pipeline(EmbedChain): self.config = config or PipelineConfig() self.name = self.config.name - self.local_id = self.config.id or str(uuid.uuid4()) + + self.config.id = self.local_id = str(uuid.uuid4()) if self.config.id is None else self.config.id self.embedding_model = embedding_model or OpenAIEmbedder() self.db = db or ChromaDB() - self.llm = llm or None + self.llm = llm or OpenAILlm() self._init_db() # setup user id and directory @@ -128,7 +130,7 @@ class Pipeline(EmbedChain): self.client = Client() else: api_key = input( - "Enter Embedchain API key. You can find the API key at https://app.embedchain.ai/settings/keys/ \n" + "🔑 Enter your Embedchain API key. You can find the API key at https://app.embedchain.ai/settings/keys/ \n" # noqa: E501 ) self.client = Client(api_key=api_key) @@ -150,7 +152,7 @@ class Pipeline(EmbedChain): headers={"Authorization": f"Token {self.client.api_key}"}, ) if r.status_code not in [200, 201]: - raise Exception(f"Error occurred while creating pipeline. Response from API: {r.text}") + raise Exception(f"❌ Error occurred while creating pipeline. API response: {r.text}") print( f"🎉🎉🎉 Pipeline created successfully! View your pipeline: https://app.embedchain.ai/pipelines/{r.json()['id']}\n" # noqa: E501 @@ -207,8 +209,7 @@ class Pipeline(EmbedChain): printed_value = metadata.get("file_path") if metadata.get("file_path") else data_value print(f"✅ Data of type: {data_type}, value: {printed_value} added successfully.") except Exception as e: - self.logger.error(f"Error occurred during data upload: {str(e)}") - print(f"❌ Error occurred during data upload for type {data_type}!") + print(f"❌ Error occurred during data upload for type {data_type}!. Error: {str(e)}") def _send_api_request(self, endpoint, payload): url = f"{self.client.host}{endpoint}" @@ -237,39 +238,33 @@ class Pipeline(EmbedChain): self._upload_data_to_pipeline(data_type, data_value, metadata) self._mark_data_as_uploaded(data_hash) return True - except Exception as e: - self.logger.error(f"Error occurred during data upload: {str(e)}") + except Exception: print(f"❌ Error occurred during data upload for hash {data_hash}!") return False def _mark_data_as_uploaded(self, data_hash): self.cursor.execute( - "UPDATE data_sources SET is_uploaded = 1 WHERE hash = ? AND pipeline_id = ? AND is_uploaded = 0", + "UPDATE data_sources SET is_uploaded = 1 WHERE hash = ? AND pipeline_id = ?", (data_hash, self.local_id), ) self.connection.commit() def deploy(self): - try: - if self.client is None: - self._init_client() + if self.client is None: + self._init_client() - pipeline_data = self._create_pipeline() - self.id = pipeline_data["id"] + pipeline_data = self._create_pipeline() + self.id = pipeline_data["id"] - results = self.cursor.execute( - "SELECT * FROM data_sources WHERE pipeline_id = ? AND is_uploaded = 0", (self.local_id,) - ).fetchall() + results = self.cursor.execute( + "SELECT * FROM data_sources WHERE pipeline_id = ? AND is_uploaded = 0", (self.local_id,) + ).fetchall() - if len(results) > 0: - print("🛠️ Adding data to your pipeline...") - for result in results: - data_hash, data_type, data_value = result[0], result[2], result[3] - self._process_and_upload_data(data_hash, data_type, data_value) - - except Exception as e: - self.logger.exception(f"Error occurred during deployment: {str(e)}") - raise HTTPException(status_code=500, detail="Error occurred during deployment.") + if len(results) > 0: + print("🛠️ Adding data to your pipeline...") + for result in results: + data_hash, data_type, data_value = result[1], result[2], result[3] + self._process_and_upload_data(data_hash, data_type, data_value) @classmethod def from_config(cls, yaml_path: str, auto_deploy: bool = False): diff --git a/pyproject.toml b/pyproject.toml index b0de3093..b0ca1ed4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "embedchain" -version = "0.0.76" +version = "0.0.77" description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data" authors = ["Taranjeet Singh, Deshraj Yadav"] license = "Apache License"