Package improvements (#989)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deven Patel
2023-12-05 16:48:00 -08:00
committed by GitHub
parent 512cfc9466
commit 0f4f220119
2 changed files with 14 additions and 13 deletions

View File

@@ -26,10 +26,21 @@ Creating an app involves 3 steps:
<Accordion title="Customize your app by a simple YAML config" icon="gear-complex"> <Accordion title="Customize your app by a simple YAML config" icon="gear-complex">
Embedchain provides a wide range of options to customize your app. You can customize the model, data sources, and much more. Embedchain provides a wide range of options to customize your app. You can customize the model, data sources, and much more.
Explore the custom configurations [here](https://docs.embedchain.ai/advanced/configuration). Explore the custom configurations [here](https://docs.embedchain.ai/advanced/configuration).
```python <CodeGroup>
```python yaml_app.py
from embedchain import Pipeline as App from embedchain import Pipeline as App
app = App(yaml_config="config.yaml") app = App.from_config(config_path="config.yaml")
``` ```
```python json_app.py
from embedchain import Pipeline as App
app = App.from_config(config_path="config.json")
```
```python app.py
from embedchain import Pipeline as App
config = {} # Add your config here
app = App.from_config(config=config)
```
</CodeGroup>
</Accordion> </Accordion>
</Step> </Step>
<Step title="🗃️ Add data sources"> <Step title="🗃️ Add data sources">

View File

@@ -190,16 +190,6 @@ class EmbedChain(JSONSerializable):
hash_object = hashlib.md5(str(source).encode("utf-8")) hash_object = hashlib.md5(str(source).encode("utf-8"))
source_hash = hash_object.hexdigest() source_hash = hash_object.hexdigest()
# Check if the data hash already exists, if so, skip the addition
self.cursor.execute(
"SELECT 1 FROM data_sources WHERE hash = ? AND pipeline_id = ?", (source_hash, self.config.id)
)
existing_data = self.cursor.fetchone()
if existing_data:
print(f"Data with hash {source_hash} already exists. Skipping addition.")
return source_hash
self.user_asks.append([source, data_type.value, metadata]) self.user_asks.append([source, data_type.value, metadata])
data_formatter = DataFormatter(data_type, config, kwargs) data_formatter = DataFormatter(data_type, config, kwargs)
@@ -212,7 +202,7 @@ class EmbedChain(JSONSerializable):
# Insert the data into the 'data' table # Insert the data into the 'data' table
self.cursor.execute( self.cursor.execute(
""" """
INSERT INTO data_sources (hash, pipeline_id, type, value, metadata) INSERT OR REPLACE INTO data_sources (hash, pipeline_id, type, value, metadata)
VALUES (?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?)
""", """,
(source_hash, self.config.id, data_type.value, str(source), json.dumps(metadata)), (source_hash, self.config.id, data_type.value, str(source), json.dumps(metadata)),