Files
t6_mem0/embedchain/loaders/image.py
Sidharth Mohanty c62663f2e4 Add GPT4Vision Image loader (#1089)
Co-authored-by: Deshraj Yadav <deshrajdry@gmail.com>
2024-01-02 03:57:23 +05:30

50 lines
2.0 KiB
Python

import base64
import hashlib
import os
from pathlib import Path
from openai import OpenAI
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.loaders.base_loader import BaseLoader
DESCRIBE_IMAGE_PROMPT = "Describe the image:"
@register_deserializable
class ImageLoader(BaseLoader):
def __init__(self, max_tokens: int = 500, api_key: str = None, prompt: str = None):
super().__init__()
self.custom_prompt = prompt or DESCRIBE_IMAGE_PROMPT
self.max_tokens = max_tokens
self.api_key = api_key or os.environ["OPENAI_API_KEY"]
self.client = OpenAI(api_key=self.api_key)
def _encode_image(self, image_path: str):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def _create_completion_request(self, content: str):
return self.client.chat.completions.create(
model="gpt-4-vision-preview", messages=[{"role": "user", "content": content}], max_tokens=self.max_tokens
)
def _process_url(self, url: str):
if url.startswith("http"):
return [{"type": "text", "text": self.custom_prompt}, {"type": "image_url", "image_url": {"url": url}}]
elif Path(url).is_file():
extension = Path(url).suffix.lstrip(".")
encoded_image = self._encode_image(url)
image_data = f"data:image/{extension};base64,{encoded_image}"
return [{"type": "text", "text": self.custom_prompt}, {"type": "image", "image_url": {"url": image_data}}]
else:
raise ValueError(f"Invalid URL or file path: {url}")
def load_data(self, url: str):
content = self._process_url(url)
response = self._create_completion_request(content)
content = response.choices[0].message.content
doc_id = hashlib.sha256((content + url).encode()).hexdigest()
return {"doc_id": doc_id, "data": [{"content": content, "meta_data": {"url": url, "type": "image"}}]}