Update website to web page

This commit renames the website loader, chunker
to web page, as it is loading and chunking a single
url than the complete website.
This commit is contained in:
Taranjeet Singh
2023-06-20 16:50:54 +05:30
parent 0fc960e958
commit 08f155a551
3 changed files with 6 additions and 6 deletions

View File

@@ -10,7 +10,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
} }
class WebsiteChunker(BaseChunker): class WebPageChunker(BaseChunker):
def __init__(self): def __init__(self):
text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS) text_splitter = RecursiveCharacterTextSplitter(**TEXT_SPLITTER_CHUNK_PARAMS)
super().__init__(text_splitter) super().__init__(text_splitter)

View File

@@ -9,10 +9,10 @@ from langchain.embeddings.openai import OpenAIEmbeddings
from embedchain.loaders.youtube_video import YoutubeVideoLoader from embedchain.loaders.youtube_video import YoutubeVideoLoader
from embedchain.loaders.pdf_file import PdfFileLoader from embedchain.loaders.pdf_file import PdfFileLoader
from embedchain.loaders.website import WebsiteLoader from embedchain.loaders.web_page import WebPageLoader
from embedchain.chunkers.youtube_video import YoutubeVideoChunker from embedchain.chunkers.youtube_video import YoutubeVideoChunker
from embedchain.chunkers.pdf_file import PdfFileChunker from embedchain.chunkers.pdf_file import PdfFileChunker
from embedchain.chunkers.website import WebsiteChunker from embedchain.chunkers.web_page import WebPageChunker
load_dotenv() load_dotenv()
@@ -37,7 +37,7 @@ class EmbedChain:
loaders = { loaders = {
'youtube_video': YoutubeVideoLoader(), 'youtube_video': YoutubeVideoLoader(),
'pdf_file': PdfFileLoader(), 'pdf_file': PdfFileLoader(),
'website': WebsiteLoader() 'web_page': WebPageLoader()
} }
if data_type in loaders: if data_type in loaders:
return loaders[data_type] return loaders[data_type]
@@ -48,7 +48,7 @@ class EmbedChain:
chunkers = { chunkers = {
'youtube_video': YoutubeVideoChunker(), 'youtube_video': YoutubeVideoChunker(),
'pdf_file': PdfFileChunker(), 'pdf_file': PdfFileChunker(),
'website': WebsiteChunker() 'web_page': WebPageChunker()
} }
if data_type in chunkers: if data_type in chunkers:
return chunkers[data_type] return chunkers[data_type]

View File

@@ -5,7 +5,7 @@ from bs4 import BeautifulSoup
from embedchain.utils import clean_string from embedchain.utils import clean_string
class WebsiteLoader: class WebPageLoader:
def load_data(self, url): def load_data(self, url):
response = requests.get(url) response = requests.get(url)