diff --git a/embedchain/chunkers/base_chunker.py b/embedchain/chunkers/base_chunker.py index dfbacac5..83c00d72 100644 --- a/embedchain/chunkers/base_chunker.py +++ b/embedchain/chunkers/base_chunker.py @@ -3,6 +3,7 @@ import hashlib class BaseChunker: def __init__(self, text_splitter): + ''' Initialize the chunker. ''' self.text_splitter = text_splitter def create_chunks(self, loader, src): diff --git a/embedchain/chunkers/docx_file.py b/embedchain/chunkers/docx_file.py index 55e186dd..3277223f 100644 --- a/embedchain/chunkers/docx_file.py +++ b/embedchain/chunkers/docx_file.py @@ -14,6 +14,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = { class DocxFileChunker(BaseChunker): + ''' Chunker for .docx file. ''' def __init__(self, config: Optional[ChunkerConfig] = None): if config is None: config = TEXT_SPLITTER_CHUNK_PARAMS diff --git a/embedchain/chunkers/pdf_file.py b/embedchain/chunkers/pdf_file.py index a6e1afcb..76847a3c 100644 --- a/embedchain/chunkers/pdf_file.py +++ b/embedchain/chunkers/pdf_file.py @@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = { class PdfFileChunker(BaseChunker): + ''' Chunker for PDF file. ''' def __init__(self, config: Optional[ChunkerConfig] = None): if config is None: config = TEXT_SPLITTER_CHUNK_PARAMS diff --git a/embedchain/chunkers/qna_pair.py b/embedchain/chunkers/qna_pair.py index f3352ea5..6ea17af5 100644 --- a/embedchain/chunkers/qna_pair.py +++ b/embedchain/chunkers/qna_pair.py @@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = { class QnaPairChunker(BaseChunker): + ''' Chunker for QnA pair. ''' def __init__(self, config: Optional[ChunkerConfig] = None): if config is None: config = TEXT_SPLITTER_CHUNK_PARAMS diff --git a/embedchain/chunkers/text.py b/embedchain/chunkers/text.py index 95fa8eee..800ed67e 100644 --- a/embedchain/chunkers/text.py +++ b/embedchain/chunkers/text.py @@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = { class TextChunker(BaseChunker): + ''' Chunker for text. ''' def __init__(self, config: Optional[ChunkerConfig] = None): if config is None: config = TEXT_SPLITTER_CHUNK_PARAMS diff --git a/embedchain/chunkers/web_page.py b/embedchain/chunkers/web_page.py index a442556f..17f9665b 100644 --- a/embedchain/chunkers/web_page.py +++ b/embedchain/chunkers/web_page.py @@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = { class WebPageChunker(BaseChunker): + ''' Chunker for web page. ''' def __init__(self, config: Optional[ChunkerConfig] = None): if config is None: config = TEXT_SPLITTER_CHUNK_PARAMS diff --git a/embedchain/chunkers/youtube_video.py b/embedchain/chunkers/youtube_video.py index a1406ca7..6467ddfc 100644 --- a/embedchain/chunkers/youtube_video.py +++ b/embedchain/chunkers/youtube_video.py @@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = { class YoutubeVideoChunker(BaseChunker): + ''' Chunker for Youtube video. ''' def __init__(self, config: Optional[ChunkerConfig] = None): if config is None: config = TEXT_SPLITTER_CHUNK_PARAMS diff --git a/embedchain/loaders/docx_file.py b/embedchain/loaders/docx_file.py index d9872e23..e588c8d4 100644 --- a/embedchain/loaders/docx_file.py +++ b/embedchain/loaders/docx_file.py @@ -2,6 +2,7 @@ from langchain.document_loaders import Docx2txtLoader class DocxFileLoader: def load_data(self, url): + ''' Load data from a .docx file. ''' loader = Docx2txtLoader(url) output = [] data = loader.load() diff --git a/embedchain/loaders/local_qna_pair.py b/embedchain/loaders/local_qna_pair.py index 76151d98..84302744 100644 --- a/embedchain/loaders/local_qna_pair.py +++ b/embedchain/loaders/local_qna_pair.py @@ -1,6 +1,7 @@ class LocalQnaPairLoader: def load_data(self, content): + ''' Load data from a local QnA pair. ''' question, answer = content content = f"Q: {question}\nA: {answer}" meta_data = { diff --git a/embedchain/loaders/local_text.py b/embedchain/loaders/local_text.py index 52ea143f..2f9e6a17 100644 --- a/embedchain/loaders/local_text.py +++ b/embedchain/loaders/local_text.py @@ -1,6 +1,7 @@ class LocalTextLoader: def load_data(self, content): + ''' Load data from a local text file. ''' meta_data = { "url": "local", } diff --git a/embedchain/loaders/pdf_file.py b/embedchain/loaders/pdf_file.py index 14b096c5..7d0c6a49 100644 --- a/embedchain/loaders/pdf_file.py +++ b/embedchain/loaders/pdf_file.py @@ -6,6 +6,7 @@ from embedchain.utils import clean_string class PdfFileLoader: def load_data(self, url): + ''' Load data from a PDF file. ''' loader = PyPDFLoader(url) output = [] pages = loader.load_and_split() diff --git a/embedchain/loaders/web_page.py b/embedchain/loaders/web_page.py index 88ece4b8..a9e25df3 100644 --- a/embedchain/loaders/web_page.py +++ b/embedchain/loaders/web_page.py @@ -8,6 +8,7 @@ from embedchain.utils import clean_string class WebPageLoader: def load_data(self, url): + ''' Load data from a web page. ''' response = requests.get(url) data = response.content soup = BeautifulSoup(data, 'html.parser') diff --git a/embedchain/loaders/youtube_video.py b/embedchain/loaders/youtube_video.py index 8bbda5a9..601a7089 100644 --- a/embedchain/loaders/youtube_video.py +++ b/embedchain/loaders/youtube_video.py @@ -6,6 +6,7 @@ from embedchain.utils import clean_string class YoutubeVideoLoader: def load_data(self, url): + ''' Load data from a Youtube video. ''' loader = YoutubeLoader.from_youtube_url(url, add_video_info=True) doc = loader.load() output = [] diff --git a/embedchain/vectordb/base_vector_db.py b/embedchain/vectordb/base_vector_db.py index 190646f3..fcde4542 100644 --- a/embedchain/vectordb/base_vector_db.py +++ b/embedchain/vectordb/base_vector_db.py @@ -1,10 +1,13 @@ class BaseVectorDB: + ''' Base class for vector database. ''' + def __init__(self): self.client = self._get_or_create_db() self.collection = self._get_or_create_collection() def _get_or_create_db(self): + ''' Get or create the database. ''' raise NotImplementedError def _get_or_create_collection(self): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/embedchain/vectordb/chroma_db.py b/embedchain/vectordb/chroma_db.py index 6cfa7db1..46b1b9e5 100644 --- a/embedchain/vectordb/chroma_db.py +++ b/embedchain/vectordb/chroma_db.py @@ -7,6 +7,8 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB class ChromaDB(BaseVectorDB): + ''' Vector database using ChromaDB. ''' + def __init__(self, db_dir=None, ef=None): if ef: self.ef = ef @@ -26,9 +28,11 @@ class ChromaDB(BaseVectorDB): super().__init__() def _get_or_create_db(self): + ''' Get or create the database. ''' return chromadb.Client(self.client_settings) def _get_or_create_collection(self): + ''' Get or create the collection. ''' return self.client.get_or_create_collection( 'embedchain_store', embedding_function=self.ef, )