Added documentation (#219)

2023-07-11 08:31:42 +05:30
parent eda28cc491
commit 6936d6983d
15 changed files with 21 additions and 1 deletions
--- a/embedchain/chunkers/base_chunker.py
+++ b/embedchain/chunkers/base_chunker.py
@@ -3,6 +3,7 @@ import hashlib
 class BaseChunker:
    def __init__(self, text_splitter):
        ''' Initialize the chunker. '''
        self.text_splitter = text_splitter
    def create_chunks(self, loader, src):
--- a/embedchain/chunkers/docx_file.py
+++ b/embedchain/chunkers/docx_file.py
@@ -14,6 +14,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 class DocxFileChunker(BaseChunker):
    ''' Chunker for .docx file. '''
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/pdf_file.py
+++ b/embedchain/chunkers/pdf_file.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 class PdfFileChunker(BaseChunker):
    ''' Chunker for PDF file. '''
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/qna_pair.py
+++ b/embedchain/chunkers/qna_pair.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 class QnaPairChunker(BaseChunker):
    ''' Chunker for QnA pair. '''
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/text.py
+++ b/embedchain/chunkers/text.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 class TextChunker(BaseChunker):
    ''' Chunker for text. '''
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/web_page.py
+++ b/embedchain/chunkers/web_page.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 class WebPageChunker(BaseChunker):
    ''' Chunker for web page. '''
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/chunkers/youtube_video.py
+++ b/embedchain/chunkers/youtube_video.py
@@ -13,6 +13,7 @@ TEXT_SPLITTER_CHUNK_PARAMS = {
 class YoutubeVideoChunker(BaseChunker):
    ''' Chunker for Youtube video. '''
    def __init__(self, config: Optional[ChunkerConfig] = None):
        if config is None:
            config = TEXT_SPLITTER_CHUNK_PARAMS
--- a/embedchain/loaders/docx_file.py
+++ b/embedchain/loaders/docx_file.py
@@ -2,6 +2,7 @@ from langchain.document_loaders import Docx2txtLoader
 class DocxFileLoader:
    def load_data(self, url):
        ''' Load data from a .docx file. '''
        loader = Docx2txtLoader(url)
        output = []
        data = loader.load()
--- a/embedchain/loaders/local_qna_pair.py
+++ b/embedchain/loaders/local_qna_pair.py
@@ -1,6 +1,7 @@
 class LocalQnaPairLoader:
    def load_data(self, content):
        ''' Load data from a local QnA pair. '''
        question, answer = content
        content = f"Q: {question}\nA: {answer}"
        meta_data = {
--- a/embedchain/loaders/local_text.py
+++ b/embedchain/loaders/local_text.py
@@ -1,6 +1,7 @@
 class LocalTextLoader:
    def load_data(self, content):
        ''' Load data from a local text file. '''
        meta_data = {
            "url": "local",
        }
--- a/embedchain/loaders/pdf_file.py
+++ b/embedchain/loaders/pdf_file.py
@@ -6,6 +6,7 @@ from embedchain.utils import clean_string
 class PdfFileLoader:
    def load_data(self, url):
        ''' Load data from a PDF file. '''
        loader = PyPDFLoader(url)
        output = []
        pages = loader.load_and_split()
--- a/embedchain/loaders/web_page.py
+++ b/embedchain/loaders/web_page.py
@@ -8,6 +8,7 @@ from embedchain.utils import clean_string
 class WebPageLoader:
    def load_data(self, url):
        ''' Load data from a web page. '''
        response = requests.get(url)
        data = response.content
        soup = BeautifulSoup(data, 'html.parser')
--- a/embedchain/loaders/youtube_video.py
+++ b/embedchain/loaders/youtube_video.py
@@ -6,6 +6,7 @@ from embedchain.utils import clean_string
 class YoutubeVideoLoader:
    def load_data(self, url):
        ''' Load data from a Youtube video. '''
        loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
        doc = loader.load()
        output = []
--- a/embedchain/vectordb/base_vector_db.py
+++ b/embedchain/vectordb/base_vector_db.py
@@ -1,9 +1,12 @@
 class BaseVectorDB:
    ''' Base class for vector database. '''
    def __init__(self):
        self.client = self._get_or_create_db()
        self.collection = self._get_or_create_collection()
    def _get_or_create_db(self):
        ''' Get or create the database. '''
        raise NotImplementedError
    def _get_or_create_collection(self):
--- a/embedchain/vectordb/chroma_db.py
+++ b/embedchain/vectordb/chroma_db.py
@@ -7,6 +7,8 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB
 class ChromaDB(BaseVectorDB):
    ''' Vector database using ChromaDB. '''
    def __init__(self, db_dir=None, ef=None):
        if ef:
            self.ef = ef
@@ -26,9 +28,11 @@ class ChromaDB(BaseVectorDB):
        super().__init__()
    def _get_or_create_db(self):
        ''' Get or create the database. '''
        return chromadb.Client(self.client_settings)
    def _get_or_create_collection(self):
        ''' Get or create the collection. '''
        return self.client.get_or_create_collection(
            'embedchain_store', embedding_function=self.ef,
        )