[Feature] Add support for running huggingface models locally (#1287)

2024-02-27 15:05:17 -08:00
parent 752f638cfc
commit 56bf33ab7f
5 changed files with 95 additions and 46 deletions
--- a/embedchain/config/llm/base.py
+++ b/embedchain/config/llm/base.py
@@ -95,6 +95,7 @@ class BaseLlmConfig(BaseConfig):
        api_key: Optional[str] = None,
        endpoint: Optional[str] = None,
        model_kwargs: Optional[dict[str, Any]] = None,
+        local: Optional[bool] = False,
    ):
        """
        Initializes a configuration class instance for the LLM.
@@ -138,6 +139,8 @@ class BaseLlmConfig(BaseConfig):
        :type callbacks: Optional[list], optional
        :param query_type: The type of query to use, defaults to None
        :type query_type: Optional[str], optional
+        :param local: If True, the model will be run locally, defaults to False (for huggingface provider)
+        :type local: Optional[bool], optional
        :raises ValueError: If the template is not valid as template should
        contain $context and $query (and optionally $history)
        :raises ValueError: Stream is not boolean
@@ -165,6 +168,7 @@ class BaseLlmConfig(BaseConfig):
        self.api_key = api_key
        self.endpoint = endpoint
        self.model_kwargs = model_kwargs
+        self.local = local

        if isinstance(prompt, str):
            prompt = Template(prompt)
--- a/embedchain/llm/huggingface.py
+++ b/embedchain/llm/huggingface.py
@@ -5,6 +5,7 @@ from typing import Optional

 from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
 from langchain_community.llms.huggingface_hub import HuggingFaceHub
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

 from embedchain.config import BaseLlmConfig
 from embedchain.helpers.json_serializable import register_deserializable
@@ -34,12 +35,15 @@ class HuggingFaceLlm(BaseLlm):

    @staticmethod
    def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
-        if config.model:
+        # If the user wants to run the model locally, they can do so by setting the `local` flag to True
+        if config.model and config.local:
+            return HuggingFaceLlm._from_pipeline(prompt=prompt, config=config)
+        elif config.model:
            return HuggingFaceLlm._from_model(prompt=prompt, config=config)
        elif config.endpoint:
            return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
        else:
-            raise ValueError("Either `model` or `endpoint` must be set")
+            raise ValueError("Either `model` or `endpoint` must be set in config")

    @staticmethod
    def _from_model(prompt: str, config: BaseLlmConfig) -> str:
@@ -53,15 +57,14 @@ class HuggingFaceLlm(BaseLlm):
        else:
            raise ValueError("`top_p` must be > 0.0 and < 1.0")

-        model = config.model or "google/flan-t5-xxl"
+        model = config.model
        logging.info(f"Using HuggingFaceHub with model {model}")
        llm = HuggingFaceHub(
            huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
            repo_id=model,
            model_kwargs=model_kwargs,
        )
-
-        return llm(prompt)
+        return llm.invoke(prompt)

    @staticmethod
    def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
@@ -71,4 +74,23 @@ class HuggingFaceLlm(BaseLlm):
            task="text-generation",
            model_kwargs=config.model_kwargs,
        )
-        return llm(prompt)
+        return llm.invoke(prompt)
+
+    @staticmethod
+    def _from_pipeline(prompt: str, config: BaseLlmConfig) -> str:
+        model_kwargs = {
+            "temperature": config.temperature or 0.1,
+            "max_new_tokens": config.max_tokens,
+        }
+
+        if 0.0 < config.top_p < 1.0:
+            model_kwargs["top_p"] = config.top_p
+        else:
+            raise ValueError("`top_p` must be > 0.0 and < 1.0")
+
+        llm = HuggingFacePipeline.from_model_id(
+            model_id=config.model,
+            task="text-generation",
+            pipeline_kwargs=model_kwargs,
+        )
+        return llm.invoke(prompt)
--- a/embedchain/utils/misc.py
+++ b/embedchain/utils/misc.py
@@ -425,6 +425,7 @@ def validate_config(config_data):
                    Optional("api_key"): str,
                    Optional("endpoint"): str,
                    Optional("model_kwargs"): dict,
+                    Optional("local"): bool,
                },
            },
            Optional("vectordb"): {