Merge pull request #2 from embedchain/update-website-to-webpage

Update website loader/chunker to web page loader/chunker
This commit is contained in:
Taranjeet Singh
2023-06-20 16:53:06 +05:30
committed by GitHub
3 changed files with 6 additions and 6 deletions

View File

@@ -10,7 +10,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
}
class WebsiteChunker(BaseChunker):
class WebPageChunker(BaseChunker):
def __init__(self):
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
super().__init__(text_splitter)

View File

@@ -9,10 +9,10 @@ from langchain.embeddings.openai import OpenAIEmbeddings
from embedchain.loaders.youtube_video import YoutubeVideoLoader
from embedchain.loaders.pdf_file import PdfFileLoader
from embedchain.loaders.website import WebsiteLoader
from embedchain.loaders.web_page import WebPageLoader
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.website import WebsiteChunker
from embedchain.chunkers.web_page import WebPageChunker
load_dotenv()
@@ -37,7 +37,7 @@ class EmbedChain:
loaders = {
'youtube_video': YoutubeVideoLoader(),
'pdf_file': PdfFileLoader(),
'website': WebsiteLoader()
'web_page': WebPageLoader()
}
if data_type in loaders:
return loaders[data_type]
@@ -48,7 +48,7 @@ class EmbedChain:
chunkers = {
'youtube_video': YoutubeVideoChunker(),
'pdf_file': PdfFileChunker(),
'website': WebsiteChunker()
'web_page': WebPageChunker()
}
if data_type in chunkers:
return chunkers[data_type]

View File

@@ -5,7 +5,7 @@ from bs4 import BeautifulSoup
from embedchain.utils import clean_string
class WebsiteLoader:
class WebPageLoader:
def load_data(self, url):
response = requests.get(url)