Files
t6_mem0/embedchain/loaders/images.py
2023-10-04 12:08:21 -07:00

42 lines
1.6 KiB
Python

import hashlib
import logging
import os
from embedchain.loaders.base_loader import BaseLoader
class ImagesLoader(BaseLoader):
def load_data(self, image_url):
"""
Loads images from the supplied directory/file and applies CLIP model transformation to represent these images
in vector form
:param image_url: The URL from which the images are to be loaded
"""
# load model and image preprocessing
from embedchain.models.clip_processor import ClipProcessor
model, preprocess = ClipProcessor.load_model()
if os.path.isfile(image_url):
data = [ClipProcessor.get_image_features(image_url, model, preprocess)]
else:
data = []
for filename in os.listdir(image_url):
filepath = os.path.join(image_url, filename)
try:
data.append(ClipProcessor.get_image_features(filepath, model, preprocess))
except Exception as e:
# Log the file that was not loaded
logging.exception("Failed to load the file {}. Exception {}".format(filepath, e))
# Get the metadata like Size, Last Modified and Last Created timestamps
image_path_metadata = [
str(os.path.getsize(image_url)),
str(os.path.getmtime(image_url)),
str(os.path.getctime(image_url)),
]
doc_id = hashlib.sha256((" ".join(image_path_metadata) + image_url).encode()).hexdigest()
return {
"doc_id": doc_id,
"data": data,
}