Files
t6_mem0/embedchain/models/clip_processor.py
2023-10-04 12:08:21 -07:00

59 lines
2.1 KiB
Python

try:
import clip
import torch
from PIL import Image, UnidentifiedImageError
except ImportError:
raise ImportError("Images requires extra dependencies. Install with `pip install embedchain[images]`") from None
MODEL_NAME = "ViT-B/32"
class ClipProcessor:
@staticmethod
def load_model():
"""Load data from a director of images."""
device = "cuda" if torch.cuda.is_available() else "cpu"
# load model and image preprocessing
model, preprocess = clip.load(MODEL_NAME, device=device, jit=False)
return model, preprocess
@staticmethod
def get_image_features(image_url, model, preprocess):
"""
Applies the CLIP model to evaluate the vector representation of the supplied image
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
# load image
image = Image.open(image_url)
except FileNotFoundError:
raise FileNotFoundError("The supplied file does not exist`")
except UnidentifiedImageError:
raise UnidentifiedImageError("The supplied file is not an image`")
# pre-process image
processed_image = preprocess(image).unsqueeze(0).to(device)
with torch.no_grad():
image_features = model.encode_image(processed_image)
image_features /= image_features.norm(dim=-1, keepdim=True)
image_features = image_features.cpu().detach().numpy().tolist()[0]
meta_data = {"url": image_url}
return {"content": image_url, "embedding": image_features, "meta_data": meta_data}
@staticmethod
def get_text_features(query):
"""
Applies the CLIP model to evaluate the vector representation of the supplied text
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = ClipProcessor.load_model()
text = clip.tokenize(query).to(device)
with torch.no_grad():
text_features = model.encode_text(text)
text_features /= text_features.norm(dim=-1, keepdim=True)
return text_features.cpu().numpy().tolist()[0]