Add support for image dataset (#571)
Co-authored-by: Rupesh Bansal <rupeshbansal@Shankars-MacBook-Air.local>
This commit is contained in:
37
embedchain/loaders/images.py
Normal file
37
embedchain/loaders/images.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import os
|
||||
import logging
|
||||
import hashlib
|
||||
from embedchain.loaders.base_loader import BaseLoader
|
||||
|
||||
|
||||
class ImagesLoader(BaseLoader):
|
||||
|
||||
def load_data(self, image_url):
|
||||
"""
|
||||
Loads images from the supplied directory/file and applies CLIP model transformation to represent these images
|
||||
in vector form
|
||||
|
||||
:param image_url: The URL from which the images are to be loaded
|
||||
"""
|
||||
# load model and image preprocessing
|
||||
from embedchain.models.clip_processor import ClipProcessor
|
||||
model, preprocess = ClipProcessor.load_model()
|
||||
if os.path.isfile(image_url):
|
||||
data = [ClipProcessor.get_image_features(image_url, model, preprocess)]
|
||||
else:
|
||||
data = []
|
||||
for filename in os.listdir(image_url):
|
||||
filepath = os.path.join(image_url, filename)
|
||||
try:
|
||||
data.append(ClipProcessor.get_image_features(filepath, model, preprocess))
|
||||
except Exception as e:
|
||||
# Log the file that was not loaded
|
||||
logging.exception("Failed to load the file {}. Exception {}".format(filepath, e))
|
||||
# Get the metadata like Size, Last Modified and Last Created timestamps
|
||||
image_path_metadata = [str(os.path.getsize(image_url)), str(os.path.getmtime(image_url)),
|
||||
str(os.path.getctime(image_url))]
|
||||
doc_id = hashlib.sha256((" ".join(image_path_metadata) + image_url).encode()).hexdigest()
|
||||
return {
|
||||
"doc_id": doc_id,
|
||||
"data": data,
|
||||
}
|
||||
Reference in New Issue
Block a user