[Feature] Add support for running huggingface models locally (#1287)
This commit is contained in:
@@ -95,6 +95,7 @@ class BaseLlmConfig(BaseConfig):
|
||||
api_key: Optional[str] = None,
|
||||
endpoint: Optional[str] = None,
|
||||
model_kwargs: Optional[dict[str, Any]] = None,
|
||||
local: Optional[bool] = False,
|
||||
):
|
||||
"""
|
||||
Initializes a configuration class instance for the LLM.
|
||||
@@ -138,6 +139,8 @@ class BaseLlmConfig(BaseConfig):
|
||||
:type callbacks: Optional[list], optional
|
||||
:param query_type: The type of query to use, defaults to None
|
||||
:type query_type: Optional[str], optional
|
||||
:param local: If True, the model will be run locally, defaults to False (for huggingface provider)
|
||||
:type local: Optional[bool], optional
|
||||
:raises ValueError: If the template is not valid as template should
|
||||
contain $context and $query (and optionally $history)
|
||||
:raises ValueError: Stream is not boolean
|
||||
@@ -165,6 +168,7 @@ class BaseLlmConfig(BaseConfig):
|
||||
self.api_key = api_key
|
||||
self.endpoint = endpoint
|
||||
self.model_kwargs = model_kwargs
|
||||
self.local = local
|
||||
|
||||
if isinstance(prompt, str):
|
||||
prompt = Template(prompt)
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import Optional
|
||||
|
||||
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
|
||||
from langchain_community.llms.huggingface_hub import HuggingFaceHub
|
||||
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
||||
|
||||
from embedchain.config import BaseLlmConfig
|
||||
from embedchain.helpers.json_serializable import register_deserializable
|
||||
@@ -34,12 +35,15 @@ class HuggingFaceLlm(BaseLlm):
|
||||
|
||||
@staticmethod
|
||||
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
|
||||
if config.model:
|
||||
# If the user wants to run the model locally, they can do so by setting the `local` flag to True
|
||||
if config.model and config.local:
|
||||
return HuggingFaceLlm._from_pipeline(prompt=prompt, config=config)
|
||||
elif config.model:
|
||||
return HuggingFaceLlm._from_model(prompt=prompt, config=config)
|
||||
elif config.endpoint:
|
||||
return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
|
||||
else:
|
||||
raise ValueError("Either `model` or `endpoint` must be set")
|
||||
raise ValueError("Either `model` or `endpoint` must be set in config")
|
||||
|
||||
@staticmethod
|
||||
def _from_model(prompt: str, config: BaseLlmConfig) -> str:
|
||||
@@ -53,15 +57,14 @@ class HuggingFaceLlm(BaseLlm):
|
||||
else:
|
||||
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
||||
|
||||
model = config.model or "google/flan-t5-xxl"
|
||||
model = config.model
|
||||
logging.info(f"Using HuggingFaceHub with model {model}")
|
||||
llm = HuggingFaceHub(
|
||||
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
|
||||
repo_id=model,
|
||||
model_kwargs=model_kwargs,
|
||||
)
|
||||
|
||||
return llm(prompt)
|
||||
return llm.invoke(prompt)
|
||||
|
||||
@staticmethod
|
||||
def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
|
||||
@@ -71,4 +74,23 @@ class HuggingFaceLlm(BaseLlm):
|
||||
task="text-generation",
|
||||
model_kwargs=config.model_kwargs,
|
||||
)
|
||||
return llm(prompt)
|
||||
return llm.invoke(prompt)
|
||||
|
||||
@staticmethod
|
||||
def _from_pipeline(prompt: str, config: BaseLlmConfig) -> str:
|
||||
model_kwargs = {
|
||||
"temperature": config.temperature or 0.1,
|
||||
"max_new_tokens": config.max_tokens,
|
||||
}
|
||||
|
||||
if 0.0 < config.top_p < 1.0:
|
||||
model_kwargs["top_p"] = config.top_p
|
||||
else:
|
||||
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
||||
|
||||
llm = HuggingFacePipeline.from_model_id(
|
||||
model_id=config.model,
|
||||
task="text-generation",
|
||||
pipeline_kwargs=model_kwargs,
|
||||
)
|
||||
return llm.invoke(prompt)
|
||||
|
||||
@@ -425,6 +425,7 @@ def validate_config(config_data):
|
||||
Optional("api_key"): str,
|
||||
Optional("endpoint"): str,
|
||||
Optional("model_kwargs"): dict,
|
||||
Optional("local"): bool,
|
||||
},
|
||||
},
|
||||
Optional("vectordb"): {
|
||||
|
||||
Reference in New Issue
Block a user