diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx index a84ed72c..f501054f 100644 --- a/docs/get-started/quickstart.mdx +++ b/docs/get-started/quickstart.mdx @@ -26,10 +26,21 @@ Creating an app involves 3 steps: Embedchain provides a wide range of options to customize your app. You can customize the model, data sources, and much more. Explore the custom configurations [here](https://docs.embedchain.ai/advanced/configuration). - ```python + + ```python yaml_app.py from embedchain import Pipeline as App - app = App(yaml_config="config.yaml") + app = App.from_config(config_path="config.yaml") ``` + ```python json_app.py + from embedchain import Pipeline as App + app = App.from_config(config_path="config.json") + ``` + ```python app.py + from embedchain import Pipeline as App + config = {} # Add your config here + app = App.from_config(config=config) + ``` + diff --git a/embedchain/embedchain.py b/embedchain/embedchain.py index 6c42a13a..76cdea79 100644 --- a/embedchain/embedchain.py +++ b/embedchain/embedchain.py @@ -190,16 +190,6 @@ class EmbedChain(JSONSerializable): hash_object = hashlib.md5(str(source).encode("utf-8")) source_hash = hash_object.hexdigest() - # Check if the data hash already exists, if so, skip the addition - self.cursor.execute( - "SELECT 1 FROM data_sources WHERE hash = ? AND pipeline_id = ?", (source_hash, self.config.id) - ) - existing_data = self.cursor.fetchone() - - if existing_data: - print(f"Data with hash {source_hash} already exists. Skipping addition.") - return source_hash - self.user_asks.append([source, data_type.value, metadata]) data_formatter = DataFormatter(data_type, config, kwargs) @@ -212,7 +202,7 @@ class EmbedChain(JSONSerializable): # Insert the data into the 'data' table self.cursor.execute( """ - INSERT INTO data_sources (hash, pipeline_id, type, value, metadata) + INSERT OR REPLACE INTO data_sources (hash, pipeline_id, type, value, metadata) VALUES (?, ?, ?, ?, ?) """, (source_hash, self.config.id, data_type.value, str(source), json.dumps(metadata)),