[Improvements] improve package ux (#950)

Co-authored-by: Deven Patel <deven298@yahoo.com>
This commit is contained in:
Deven Patel
2023-11-14 17:53:43 -08:00
committed by GitHub
parent 95c0d47236
commit 82fd595306
7 changed files with 33 additions and 19 deletions

8
configs/gpt4.yaml Normal file
View File

@@ -0,0 +1,8 @@
llm:
provider: openai
config:
model: 'gpt-4'
temperature: 0.5
max_tokens: 1000
top_p: 1
stream: false

View File

@@ -83,3 +83,9 @@ app.deploy()
# 🛠️ Adding data to your pipeline...
# ✅ Data of type: web_page, value: https://www.forbes.com/profile/elon-musk added successfully.
```
You can try it out yourself using the following Google Colab notebook:
<a href="https://colab.research.google.com/drive/17ON1LPonnXAtLaZEebnOktstB_1cJJmh?usp=sharing">
<img src="https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667" alt="Open in Colab" />
</a>

View File

@@ -30,7 +30,7 @@ class DiscourseLoader(BaseLoader):
)
def _load_post(self, post_id):
post_url = f"{self.domain}/posts/{post_id}.json"
post_url = f"{self.domain}posts/{post_id}.json"
response = requests.get(post_url)
response.raise_for_status()
response_data = response.json()
@@ -53,7 +53,7 @@ class DiscourseLoader(BaseLoader):
data = []
data_contents = []
logging.info(f"Searching data on discourse url: {self.domain}, for query: {query}")
search_url = f"{self.domain}/search.json?q={query}"
search_url = f"{self.domain}search.json?q={query}"
response = requests.get(search_url)
response.raise_for_status()
response_data = response.json()

View File

@@ -4,7 +4,7 @@ try:
from langchain.document_loaders import UnstructuredFileLoader
except ImportError:
raise ImportError(
'PDF File requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
'Unstructured file requires extra dependencies. Install with `pip install --upgrade "embedchain[dataloaders]"`'
) from None
from embedchain.helper.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "embedchain"
version = "0.1.10"
version = "0.1.11"
description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
authors = [
"Taranjeet Singh <taranjeet@embedchain.ai>",

View File

@@ -7,7 +7,7 @@ from embedchain.loaders.discourse import DiscourseLoader
@pytest.fixture
def discourse_loader_config():
return {
"domain": "https://example.com",
"domain": "https://example.com/",
}
@@ -17,9 +17,9 @@ def discourse_loader(discourse_loader_config):
def test_discourse_loader_init_with_valid_config():
config = {"domain": "https://example.com"}
config = {"domain": "https://example.com/"}
loader = DiscourseLoader(config=config)
assert loader.domain == "https://example.com"
assert loader.domain == "https://example.com/"
def test_discourse_loader_init_with_missing_config():

View File

@@ -9,6 +9,7 @@ CONFIG_YAMLS = [
"configs/chunker.yaml",
"configs/cohere.yaml",
"configs/full-stack.yaml",
"configs/gpt4.yaml",
"configs/gpt4all.yaml",
"configs/huggingface.yaml",
"configs/jina.yaml",
@@ -21,16 +22,15 @@ CONFIG_YAMLS = [
]
class TestAllConfigYamls:
def test_all_config_yamls(self):
"""Test that all config yamls are valid."""
for config_yaml in CONFIG_YAMLS:
with open(config_yaml, "r") as f:
config = yaml.safe_load(f)
assert config is not None
def test_all_config_yamls():
"""Test that all config yamls are valid."""
for config_yaml in CONFIG_YAMLS:
with open(config_yaml, "r") as f:
config = yaml.safe_load(f)
assert config is not None
try:
validate_yaml_config(config)
except Exception as e:
print(f"Error in {config_yaml}: {e}")
raise e
try:
validate_yaml_config(config)
except Exception as e:
print(f"Error in {config_yaml}: {e}")
raise e