Add GPT4Vision Image loader (#1089)

Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
2024-01-02 03:57:23 +05:30
parent 367d6b70e2
commit c62663f2e4
29 changed files with 291 additions and 714 deletions
--- a/embedchain/models/clip_processor.py
+++ b/embedchain/models/clip_processor.py
@@ -1,42 +0,0 @@
-try:
-    from PIL import Image, UnidentifiedImageError
-    from sentence_transformers import SentenceTransformer
-except ImportError:
-    raise ImportError("Images requires extra dependencies. Install with `pip install 'embedchain[images]'") from None
-
-MODEL_NAME = "clip-ViT-B-32"
-
-
-class ClipProcessor:
-    @staticmethod
-    def load_model():
-        """Load data from a director of images."""
-        # load model and image preprocessing
-        model = SentenceTransformer(MODEL_NAME)
-        return model
-
-    @staticmethod
-    def get_image_features(image_url, model):
-        """
-        Applies the CLIP model to evaluate the vector representation of the supplied image
-        """
-        try:
-            # load image
-            image = Image.open(image_url)
-        except FileNotFoundError:
-            raise FileNotFoundError("The supplied file does not exist`")
-        except UnidentifiedImageError:
-            raise UnidentifiedImageError("The supplied file is not an image`")
-
-        image_features = model.encode(image)
-        meta_data = {"url": image_url}
-        return {"content": image_url, "embedding": image_features.tolist(), "meta_data": meta_data}
-
-    @staticmethod
-    def get_text_features(query):
-        """
-        Applies the CLIP model to evaluate the vector representation of the supplied text
-        """
-        model = ClipProcessor.load_model()
-        text_features = model.encode(query)
-        return text_features.tolist()
--- a/embedchain/models/data_type.py
+++ b/embedchain/models/data_type.py
@@ -24,7 +24,7 @@ class IndirectDataType(Enum):
    NOTION = "notion"
    CSV = "csv"
    MDX = "mdx"
-    IMAGES = "images"
+    IMAGE = "image"
    UNSTRUCTURED = "unstructured"
    JSON = "json"
    OPENAPI = "openapi"
@@ -62,7 +62,7 @@ class DataType(Enum):
    CSV = IndirectDataType.CSV.value
    MDX = IndirectDataType.MDX.value
    QNA_PAIR = SpecialDataType.QNA_PAIR.value
-    IMAGES = IndirectDataType.IMAGES.value
+    IMAGE = IndirectDataType.IMAGE.value
    UNSTRUCTURED = IndirectDataType.UNSTRUCTURED.value
    JSON = IndirectDataType.JSON.value
    OPENAPI = IndirectDataType.OPENAPI.value