Add ollama embeddings (#1634)
This commit is contained in:
@@ -12,6 +12,8 @@ class BaseLlmConfig(ABC):
|
|||||||
temperature: float = 0,
|
temperature: float = 0,
|
||||||
max_tokens: int = 3000,
|
max_tokens: int = 3000,
|
||||||
top_p: float = 1,
|
top_p: float = 1,
|
||||||
|
|
||||||
|
# Ollama specific
|
||||||
base_url: Optional[str] = None
|
base_url: Optional[str] = None
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -35,4 +37,6 @@ class BaseLlmConfig(ABC):
|
|||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.max_tokens = max_tokens
|
self.max_tokens = max_tokens
|
||||||
self.top_p = top_p
|
self.top_p = top_p
|
||||||
self.base_url = base_url
|
|
||||||
|
# Ollama specific
|
||||||
|
self.base_url = base_url
|
||||||
|
|||||||
@@ -1,7 +1,21 @@
|
|||||||
|
from typing import Optional
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingBase(ABC):
|
class EmbeddingBase(ABC):
|
||||||
|
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
|
||||||
|
"""Initialize a base LLM class
|
||||||
|
|
||||||
|
:param config: Embedder configuration option class, defaults to None
|
||||||
|
:type config: Optional[BaseEmbedderConfig], optional
|
||||||
|
"""
|
||||||
|
if config is None:
|
||||||
|
self.config = BaseEmbedderConfig()
|
||||||
|
else:
|
||||||
|
self.config = config
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def embed(self, text):
|
def embed(self, text):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,20 +1,33 @@
|
|||||||
import ollama
|
from typing import Optional
|
||||||
|
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
from mem0.embeddings.base import EmbeddingBase
|
from mem0.embeddings.base import EmbeddingBase
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ollama import Client
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError("Ollama requires extra dependencies. Install with `pip install ollama`") from None
|
||||||
|
|
||||||
|
|
||||||
class OllamaEmbedding(EmbeddingBase):
|
class OllamaEmbedding(EmbeddingBase):
|
||||||
def __init__(self, model="nomic-embed-text"):
|
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
|
||||||
self.model = model
|
super().__init__(config)
|
||||||
|
|
||||||
|
if not self.config.model:
|
||||||
|
self.config.model="nomic-embed-text"
|
||||||
|
if not self.config.embedding_dims:
|
||||||
|
self.config.embedding_dims=512
|
||||||
|
|
||||||
|
self.client = Client(host=self.config.base_url)
|
||||||
self._ensure_model_exists()
|
self._ensure_model_exists()
|
||||||
self.dims = 512
|
|
||||||
|
|
||||||
def _ensure_model_exists(self):
|
def _ensure_model_exists(self):
|
||||||
"""
|
"""
|
||||||
Ensure the specified model exists locally. If not, pull it from Ollama.
|
Ensure the specified model exists locally. If not, pull it from Ollama.
|
||||||
"""
|
"""
|
||||||
model_list = [m["name"] for m in ollama.list()["models"]]
|
local_models = self.client.list()["models"]
|
||||||
if not any(m.startswith(self.model) for m in model_list):
|
if not any(model.get("name") == self.config.model for model in local_models):
|
||||||
ollama.pull(self.model)
|
self.client.pull(self.config.model)
|
||||||
|
|
||||||
def embed(self, text):
|
def embed(self, text):
|
||||||
"""
|
"""
|
||||||
@@ -26,5 +39,5 @@ class OllamaEmbedding(EmbeddingBase):
|
|||||||
Returns:
|
Returns:
|
||||||
list: The embedding vector.
|
list: The embedding vector.
|
||||||
"""
|
"""
|
||||||
response = ollama.embeddings(model=self.model, prompt=text)
|
response = self.client.embeddings(model=self.config.model, prompt=text)
|
||||||
return response["embedding"]
|
return response["embedding"]
|
||||||
|
|||||||
@@ -1,13 +1,20 @@
|
|||||||
|
from typing import Optional
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
|
from mem0.configs.embeddings.base import BaseEmbedderConfig
|
||||||
from mem0.embeddings.base import EmbeddingBase
|
from mem0.embeddings.base import EmbeddingBase
|
||||||
|
|
||||||
|
|
||||||
class OpenAIEmbedding(EmbeddingBase):
|
class OpenAIEmbedding(EmbeddingBase):
|
||||||
def __init__(self, model="text-embedding-3-small"):
|
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
|
||||||
|
super().__init__(config)
|
||||||
|
|
||||||
|
if not self.config.model:
|
||||||
|
self.config.model="text-embedding-3-small"
|
||||||
|
if not self.config.embedding_dims:
|
||||||
|
self.config.embedding_dims=1536
|
||||||
|
|
||||||
self.client = OpenAI()
|
self.client = OpenAI()
|
||||||
self.model = model
|
|
||||||
self.dims = 1536
|
|
||||||
|
|
||||||
def embed(self, text):
|
def embed(self, text):
|
||||||
"""
|
"""
|
||||||
@@ -21,7 +28,7 @@ class OpenAIEmbedding(EmbeddingBase):
|
|||||||
"""
|
"""
|
||||||
text = text.replace("\n", " ")
|
text = text.replace("\n", " ")
|
||||||
return (
|
return (
|
||||||
self.client.embeddings.create(input=[text], model=self.model)
|
self.client.embeddings.create(input=[text], model=self.config.model)
|
||||||
.data[0]
|
.data[0]
|
||||||
.embedding
|
.embedding
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ telemetry = AnonymousTelemetry(
|
|||||||
def capture_event(event_name, memory_instance, additional_data=None):
|
def capture_event(event_name, memory_instance, additional_data=None):
|
||||||
event_data = {
|
event_data = {
|
||||||
"collection": memory_instance.collection_name,
|
"collection": memory_instance.collection_name,
|
||||||
"vector_size": memory_instance.embedding_model.dims,
|
"vector_size": memory_instance.embedding_model.config.embedding_dims,
|
||||||
"history_store": "sqlite",
|
"history_store": "sqlite",
|
||||||
"vector_store": f"{memory_instance.vector_store.__class__.__module__}.{memory_instance.vector_store.__class__.__name__}",
|
"vector_store": f"{memory_instance.vector_store.__class__.__module__}.{memory_instance.vector_store.__class__.__name__}",
|
||||||
"llm": f"{memory_instance.llm.__class__.__module__}.{memory_instance.llm.__class__.__name__}",
|
"llm": f"{memory_instance.llm.__class__.__module__}.{memory_instance.llm.__class__.__name__}",
|
||||||
|
|||||||
@@ -33,8 +33,7 @@ class LlmFactory:
|
|||||||
class EmbedderFactory:
|
class EmbedderFactory:
|
||||||
provider_to_class = {
|
provider_to_class = {
|
||||||
"openai": "mem0.embeddings.openai.OpenAIEmbedding",
|
"openai": "mem0.embeddings.openai.OpenAIEmbedding",
|
||||||
"ollama": "mem0.embeddings.ollama.OllamaEmbedding",
|
"ollama": "mem0.embeddings.ollama.OllamaEmbedding"
|
||||||
"huggingface": "mem0.embeddings.huggingface.HuggingFaceEmbedding"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -73,8 +73,12 @@ class VectorStoreConfig(BaseModel):
|
|||||||
|
|
||||||
if isinstance(v, dict):
|
if isinstance(v, dict):
|
||||||
if provider == "qdrant":
|
if provider == "qdrant":
|
||||||
|
if "path" not in v:
|
||||||
|
v["path"] = "/tmp/qdrant"
|
||||||
return QdrantConfig(**v)
|
return QdrantConfig(**v)
|
||||||
elif provider == "chromadb":
|
elif provider == "chromadb":
|
||||||
|
if "path" not in v:
|
||||||
|
v["path"] = "/tmp/chromadb"
|
||||||
return ChromaDbConfig(**v)
|
return ChromaDbConfig(**v)
|
||||||
|
|
||||||
return v
|
return v
|
||||||
|
|||||||
Reference in New Issue
Block a user