Update website to web page
This commit renames the website loader, chunker to web page, as it is loading and chunking a single url than the complete website.
This commit is contained in:
@@ -10,7 +10,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WebsiteChunker(BaseChunker):
|
class WebPageChunker(BaseChunker):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
|
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
|
||||||
super().__init__(text_splitter)
|
super().__init__(text_splitter)
|
||||||
@@ -9,10 +9,10 @@ from langchain.embeddings.openai import OpenAIEmbeddings
|
|||||||
|
|
||||||
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
from embedchain.loaders.youtube_video import YoutubeVideoLoader
|
||||||
from embedchain.loaders.pdf_file import PdfFileLoader
|
from embedchain.loaders.pdf_file import PdfFileLoader
|
||||||
from embedchain.loaders.website import WebsiteLoader
|
from embedchain.loaders.web_page import WebPageLoader
|
||||||
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
from embedchain.chunkers.youtube_video import YoutubeVideoChunker
|
||||||
from embedchain.chunkers.pdf_file import PdfFileChunker
|
from embedchain.chunkers.pdf_file import PdfFileChunker
|
||||||
from embedchain.chunkers.website import WebsiteChunker
|
from embedchain.chunkers.web_page import WebPageChunker
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ class EmbedChain:
|
|||||||
loaders = {
|
loaders = {
|
||||||
'youtube_video': YoutubeVideoLoader(),
|
'youtube_video': YoutubeVideoLoader(),
|
||||||
'pdf_file': PdfFileLoader(),
|
'pdf_file': PdfFileLoader(),
|
||||||
'website': WebsiteLoader()
|
'web_page': WebPageLoader()
|
||||||
}
|
}
|
||||||
if data_type in loaders:
|
if data_type in loaders:
|
||||||
return loaders[data_type]
|
return loaders[data_type]
|
||||||
@@ -48,7 +48,7 @@ class EmbedChain:
|
|||||||
chunkers = {
|
chunkers = {
|
||||||
'youtube_video': YoutubeVideoChunker(),
|
'youtube_video': YoutubeVideoChunker(),
|
||||||
'pdf_file': PdfFileChunker(),
|
'pdf_file': PdfFileChunker(),
|
||||||
'website': WebsiteChunker()
|
'web_page': WebPageChunker()
|
||||||
}
|
}
|
||||||
if data_type in chunkers:
|
if data_type in chunkers:
|
||||||
return chunkers[data_type]
|
return chunkers[data_type]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from bs4 import BeautifulSoup
|
|||||||
from embedchain.utils import clean_string
|
from embedchain.utils import clean_string
|
||||||
|
|
||||||
|
|
||||||
class WebsiteLoader:
|
class WebPageLoader:
|
||||||
|
|
||||||
def load_data(self, url):
|
def load_data(self, url):
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
Reference in New Issue
Block a user