Merge pull request #2 from embedchain/update-website-to-webpage
Update website loader/chunker to web page loader/chunker
This commit is contained in:
@@ -10,7 +10,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
||||
}
|
||||
|
||||
|
||||
class WebsiteChunker(BaseChunker):
|
||||
class WebPageChunker(BaseChunker):
|
||||
def __init__(self):
|
||||
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
|
||||
super().__init__(text_splitter)
|
||||
@@ -9,10 +9,10 @@ from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
|
||||
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||
from embedchain.loaders.pdf_file import PdfFileLoader
|
||||
from embedchain.loaders.website import WebsiteLoader
|
||||
from embedchain.loaders.web_page import WebPageLoader
|
||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||
from embedchain.chunkers.pdf_file import PdfFileChunker
|
||||
from embedchain.chunkers.website import WebsiteChunker
|
||||
from embedchain.chunkers.web_page import WebPageChunker
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -37,7 +37,7 @@ class EmbedChain:
|
||||
loaders = {
|
||||
'youtube_video': YoutubeVideoLoader(),
|
||||
'pdf_file': PdfFileLoader(),
|
||||
'website': WebsiteLoader()
|
||||
'web_page': WebPageLoader()
|
||||
}
|
||||
if data_type in loaders:
|
||||
return loaders[data_type]
|
||||
@@ -48,7 +48,7 @@ class EmbedChain:
|
||||
chunkers = {
|
||||
'youtube_video': YoutubeVideoChunker(),
|
||||
'pdf_file': PdfFileChunker(),
|
||||
'website': WebsiteChunker()
|
||||
'web_page': WebPageChunker()
|
||||
}
|
||||
if data_type in chunkers:
|
||||
return chunkers[data_type]
|
||||
|
||||
@@ -5,7 +5,7 @@ from bs4 import BeautifulSoup
|
||||
from embedchain.utils import clean_string
|
||||
|
||||
|
||||
class WebsiteLoader:
|
||||
class WebPageLoader:
|
||||
|
||||
def load_data(self, url):
|
||||
response = requests.get(url)
|
||||
Reference in New Issue
Block a user