From 56bf33ab7fdd71372771cb1bdc2e60d9d1eab0b9 Mon Sep 17 00:00:00 2001 From: Deshraj Yadav Date: Tue, 27 Feb 2024 15:05:17 -0800 Subject: [PATCH] [Feature] Add support for running huggingface models locally (#1287) --- docs/components/llms.mdx | 95 +++++++++++++++++++++-------------- embedchain/config/llm/base.py | 4 ++ embedchain/llm/huggingface.py | 34 ++++++++++--- embedchain/utils/misc.py | 1 + tests/llm/test_huggingface.py | 7 +-- 5 files changed, 95 insertions(+), 46 deletions(-) diff --git a/docs/components/llms.mdx b/docs/components/llms.mdx index 8cf90b74..2c6c78cf 100644 --- a/docs/components/llms.mdx +++ b/docs/components/llms.mdx @@ -451,7 +451,15 @@ pip install --upgrade 'embedchain[huggingface-hub]' First, set `HUGGINGFACE_ACCESS_TOKEN` in environment variable which you can obtain from [their platform](https://huggingface.co/settings/tokens). -Once you have the token, load the app using the config yaml file: +You can load the LLMs from Hugging Face using three ways: + +- [Hugging Face Hub](#hugging-face-hub) +- [Hugging Face Local Pipelines](#hugging-face-local-pipelines) +- [Hugging Face Inference Endpoint](#hugging-face-inference-endpoint) + +### Hugging Face Hub + +To load the model from Hugging Face Hub, use the following code: @@ -461,24 +469,49 @@ from embedchain import App os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx" -# load llm configuration from config.yaml file -app = App.from_config(config_path="config.yaml") -``` +config = { + "app": {"config": {"id": "my-app"}}, + "llm": { + "provider": "huggingface", + "config": { + "model": "bigscience/bloom-1b7", + "top_p": 0.5, + "max_length": 200, + "temperature": 0.1, + }, + }, +} -```yaml config.yaml -llm: - provider: huggingface - config: - model: 'google/flan-t5-xxl' - temperature: 0.5 - max_tokens: 1000 - top_p: 0.5 - stream: false +app = App.from_config(config=config) ``` -### Custom Endpoints +### Hugging Face Local Pipelines +If you want to load the locally downloaded model from Hugging Face, you can do so by following the code provided below: + + +```python main.py +from embedchain import App + +config = { + "app": {"config": {"id": "my-app"}}, + "llm": { + "provider": "huggingface", + "config": { + "model": "Trendyol/Trendyol-LLM-7b-chat-v0.1", + "local": True, # Necessary if you want to run model locally + "top_p": 0.5, + "max_tokens": 1000, + "temperature": 0.1, + }, + } +} +app = App.from_config(config=config) +``` + + +### Hugging Face Inference Endpoint You can also use [Hugging Face Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index#-inference-endpoints) to access custom endpoints. First, set the `HUGGINGFACE_ACCESS_TOKEN` as above. @@ -487,35 +520,23 @@ Then, load the app using the config yaml file: ```python main.py -import os from embedchain import App -os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx" +config = { + "app": {"config": {"id": "my-app"}}, + "llm": { + "provider": "huggingface", + "config": { + "endpoint": "https://api-inference.huggingface.co/models/gpt2", + "model_params": {"temprature": 0.1, "max_new_tokens": 100} + }, + }, +} +app = App.from_config(config=config) -# load llm configuration from config.yaml file -app = App.from_config(config_path="config.yaml") -``` - -```yaml config.yaml -llm: - provider: huggingface - config: - endpoint: https://api-inference.huggingface.co/models/gpt2 # replace with your personal endpoint ``` -If your endpoint requires additional parameters, you can pass them in the `model_kwargs` field: - -``` -llm: - provider: huggingface - config: - endpoint: - model_kwargs: - max_new_tokens: 100 - temperature: 0.5 -``` - Currently only supports `text-generation` and `text2text-generation` for now [[ref](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html?highlight=huggingfaceendpoint#)]. See langchain's [hugging face endpoint](https://python.langchain.com/docs/integrations/chat/huggingface#huggingfaceendpoint) for more information. diff --git a/embedchain/config/llm/base.py b/embedchain/config/llm/base.py index 3059aef0..fb6491d1 100644 --- a/embedchain/config/llm/base.py +++ b/embedchain/config/llm/base.py @@ -95,6 +95,7 @@ class BaseLlmConfig(BaseConfig): api_key: Optional[str] = None, endpoint: Optional[str] = None, model_kwargs: Optional[dict[str, Any]] = None, + local: Optional[bool] = False, ): """ Initializes a configuration class instance for the LLM. @@ -138,6 +139,8 @@ class BaseLlmConfig(BaseConfig): :type callbacks: Optional[list], optional :param query_type: The type of query to use, defaults to None :type query_type: Optional[str], optional + :param local: If True, the model will be run locally, defaults to False (for huggingface provider) + :type local: Optional[bool], optional :raises ValueError: If the template is not valid as template should contain $context and $query (and optionally $history) :raises ValueError: Stream is not boolean @@ -165,6 +168,7 @@ class BaseLlmConfig(BaseConfig): self.api_key = api_key self.endpoint = endpoint self.model_kwargs = model_kwargs + self.local = local if isinstance(prompt, str): prompt = Template(prompt) diff --git a/embedchain/llm/huggingface.py b/embedchain/llm/huggingface.py index a8cfe2da..8cf0f8b5 100644 --- a/embedchain/llm/huggingface.py +++ b/embedchain/llm/huggingface.py @@ -5,6 +5,7 @@ from typing import Optional from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint from langchain_community.llms.huggingface_hub import HuggingFaceHub +from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from embedchain.config import BaseLlmConfig from embedchain.helpers.json_serializable import register_deserializable @@ -34,12 +35,15 @@ class HuggingFaceLlm(BaseLlm): @staticmethod def _get_answer(prompt: str, config: BaseLlmConfig) -> str: - if config.model: + # If the user wants to run the model locally, they can do so by setting the `local` flag to True + if config.model and config.local: + return HuggingFaceLlm._from_pipeline(prompt=prompt, config=config) + elif config.model: return HuggingFaceLlm._from_model(prompt=prompt, config=config) elif config.endpoint: return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config) else: - raise ValueError("Either `model` or `endpoint` must be set") + raise ValueError("Either `model` or `endpoint` must be set in config") @staticmethod def _from_model(prompt: str, config: BaseLlmConfig) -> str: @@ -53,15 +57,14 @@ class HuggingFaceLlm(BaseLlm): else: raise ValueError("`top_p` must be > 0.0 and < 1.0") - model = config.model or "google/flan-t5-xxl" + model = config.model logging.info(f"Using HuggingFaceHub with model {model}") llm = HuggingFaceHub( huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"], repo_id=model, model_kwargs=model_kwargs, ) - - return llm(prompt) + return llm.invoke(prompt) @staticmethod def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str: @@ -71,4 +74,23 @@ class HuggingFaceLlm(BaseLlm): task="text-generation", model_kwargs=config.model_kwargs, ) - return llm(prompt) + return llm.invoke(prompt) + + @staticmethod + def _from_pipeline(prompt: str, config: BaseLlmConfig) -> str: + model_kwargs = { + "temperature": config.temperature or 0.1, + "max_new_tokens": config.max_tokens, + } + + if 0.0 < config.top_p < 1.0: + model_kwargs["top_p"] = config.top_p + else: + raise ValueError("`top_p` must be > 0.0 and < 1.0") + + llm = HuggingFacePipeline.from_model_id( + model_id=config.model, + task="text-generation", + pipeline_kwargs=model_kwargs, + ) + return llm.invoke(prompt) diff --git a/embedchain/utils/misc.py b/embedchain/utils/misc.py index ce7b5115..ead5eef8 100644 --- a/embedchain/utils/misc.py +++ b/embedchain/utils/misc.py @@ -425,6 +425,7 @@ def validate_config(config_data): Optional("api_key"): str, Optional("endpoint"): str, Optional("model_kwargs"): dict, + Optional("local"): bool, }, }, Optional("vectordb"): { diff --git a/tests/llm/test_huggingface.py b/tests/llm/test_huggingface.py index 45a70244..754317f6 100644 --- a/tests/llm/test_huggingface.py +++ b/tests/llm/test_huggingface.py @@ -62,18 +62,19 @@ def test_get_llm_model_answer(huggingface_llm_config, mocker): def test_hugging_face_mock(huggingface_llm_config, mocker): mock_llm_instance = mocker.Mock(return_value="Test answer") - mocker.patch("embedchain.llm.huggingface.HuggingFaceHub", return_value=mock_llm_instance) + mock_hf_hub = mocker.patch("embedchain.llm.huggingface.HuggingFaceHub") + mock_hf_hub.return_value.invoke = mock_llm_instance llm = HuggingFaceLlm(huggingface_llm_config) answer = llm.get_llm_model_answer("Test query") - assert answer == "Test answer" mock_llm_instance.assert_called_once_with("Test query") def test_custom_endpoint(huggingface_endpoint_config, mocker): mock_llm_instance = mocker.Mock(return_value="Test answer") - mocker.patch("embedchain.llm.huggingface.HuggingFaceEndpoint", return_value=mock_llm_instance) + mock_hf_endpoint = mocker.patch("embedchain.llm.huggingface.HuggingFaceEndpoint") + mock_hf_endpoint.return_value.invoke = mock_llm_instance llm = HuggingFaceLlm(huggingface_endpoint_config) answer = llm.get_llm_model_answer("Test query")