[Feature] Add support for running huggingface models locally (#1287)
This commit is contained in:
@@ -451,7 +451,15 @@ pip install --upgrade 'embedchain[huggingface-hub]'
|
|||||||
|
|
||||||
First, set `HUGGINGFACE_ACCESS_TOKEN` in environment variable which you can obtain from [their platform](https://huggingface.co/settings/tokens).
|
First, set `HUGGINGFACE_ACCESS_TOKEN` in environment variable which you can obtain from [their platform](https://huggingface.co/settings/tokens).
|
||||||
|
|
||||||
Once you have the token, load the app using the config yaml file:
|
You can load the LLMs from Hugging Face using three ways:
|
||||||
|
|
||||||
|
- [Hugging Face Hub](#hugging-face-hub)
|
||||||
|
- [Hugging Face Local Pipelines](#hugging-face-local-pipelines)
|
||||||
|
- [Hugging Face Inference Endpoint](#hugging-face-inference-endpoint)
|
||||||
|
|
||||||
|
### Hugging Face Hub
|
||||||
|
|
||||||
|
To load the model from Hugging Face Hub, use the following code:
|
||||||
|
|
||||||
<CodeGroup>
|
<CodeGroup>
|
||||||
|
|
||||||
@@ -461,24 +469,49 @@ from embedchain import App
|
|||||||
|
|
||||||
os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
|
os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
|
||||||
|
|
||||||
# load llm configuration from config.yaml file
|
config = {
|
||||||
app = App.from_config(config_path="config.yaml")
|
"app": {"config": {"id": "my-app"}},
|
||||||
```
|
"llm": {
|
||||||
|
"provider": "huggingface",
|
||||||
|
"config": {
|
||||||
|
"model": "bigscience/bloom-1b7",
|
||||||
|
"top_p": 0.5,
|
||||||
|
"max_length": 200,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
```yaml config.yaml
|
app = App.from_config(config=config)
|
||||||
llm:
|
|
||||||
provider: huggingface
|
|
||||||
config:
|
|
||||||
model: 'google/flan-t5-xxl'
|
|
||||||
temperature: 0.5
|
|
||||||
max_tokens: 1000
|
|
||||||
top_p: 0.5
|
|
||||||
stream: false
|
|
||||||
```
|
```
|
||||||
</CodeGroup>
|
</CodeGroup>
|
||||||
|
|
||||||
### Custom Endpoints
|
### Hugging Face Local Pipelines
|
||||||
|
|
||||||
|
If you want to load the locally downloaded model from Hugging Face, you can do so by following the code provided below:
|
||||||
|
|
||||||
|
<CodeGroup>
|
||||||
|
```python main.py
|
||||||
|
from embedchain import App
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"app": {"config": {"id": "my-app"}},
|
||||||
|
"llm": {
|
||||||
|
"provider": "huggingface",
|
||||||
|
"config": {
|
||||||
|
"model": "Trendyol/Trendyol-LLM-7b-chat-v0.1",
|
||||||
|
"local": True, # Necessary if you want to run model locally
|
||||||
|
"top_p": 0.5,
|
||||||
|
"max_tokens": 1000,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
app = App.from_config(config=config)
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
|
||||||
|
### Hugging Face Inference Endpoint
|
||||||
|
|
||||||
You can also use [Hugging Face Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index#-inference-endpoints) to access custom endpoints. First, set the `HUGGINGFACE_ACCESS_TOKEN` as above.
|
You can also use [Hugging Face Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index#-inference-endpoints) to access custom endpoints. First, set the `HUGGINGFACE_ACCESS_TOKEN` as above.
|
||||||
|
|
||||||
@@ -487,35 +520,23 @@ Then, load the app using the config yaml file:
|
|||||||
<CodeGroup>
|
<CodeGroup>
|
||||||
|
|
||||||
```python main.py
|
```python main.py
|
||||||
import os
|
|
||||||
from embedchain import App
|
from embedchain import App
|
||||||
|
|
||||||
os.environ["HUGGINGFACE_ACCESS_TOKEN"] = "xxx"
|
config = {
|
||||||
|
"app": {"config": {"id": "my-app"}},
|
||||||
|
"llm": {
|
||||||
|
"provider": "huggingface",
|
||||||
|
"config": {
|
||||||
|
"endpoint": "https://api-inference.huggingface.co/models/gpt2",
|
||||||
|
"model_params": {"temprature": 0.1, "max_new_tokens": 100}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
app = App.from_config(config=config)
|
||||||
|
|
||||||
# load llm configuration from config.yaml file
|
|
||||||
app = App.from_config(config_path="config.yaml")
|
|
||||||
```
|
|
||||||
|
|
||||||
```yaml config.yaml
|
|
||||||
llm:
|
|
||||||
provider: huggingface
|
|
||||||
config:
|
|
||||||
endpoint: https://api-inference.huggingface.co/models/gpt2 # replace with your personal endpoint
|
|
||||||
```
|
```
|
||||||
</CodeGroup>
|
</CodeGroup>
|
||||||
|
|
||||||
If your endpoint requires additional parameters, you can pass them in the `model_kwargs` field:
|
|
||||||
|
|
||||||
```
|
|
||||||
llm:
|
|
||||||
provider: huggingface
|
|
||||||
config:
|
|
||||||
endpoint: <YOUR_ENDPOINT_URL_HERE>
|
|
||||||
model_kwargs:
|
|
||||||
max_new_tokens: 100
|
|
||||||
temperature: 0.5
|
|
||||||
```
|
|
||||||
|
|
||||||
Currently only supports `text-generation` and `text2text-generation` for now [[ref](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html?highlight=huggingfaceendpoint#)].
|
Currently only supports `text-generation` and `text2text-generation` for now [[ref](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html?highlight=huggingfaceendpoint#)].
|
||||||
|
|
||||||
See langchain's [hugging face endpoint](https://python.langchain.com/docs/integrations/chat/huggingface#huggingfaceendpoint) for more information.
|
See langchain's [hugging face endpoint](https://python.langchain.com/docs/integrations/chat/huggingface#huggingfaceendpoint) for more information.
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ class BaseLlmConfig(BaseConfig):
|
|||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
endpoint: Optional[str] = None,
|
endpoint: Optional[str] = None,
|
||||||
model_kwargs: Optional[dict[str, Any]] = None,
|
model_kwargs: Optional[dict[str, Any]] = None,
|
||||||
|
local: Optional[bool] = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes a configuration class instance for the LLM.
|
Initializes a configuration class instance for the LLM.
|
||||||
@@ -138,6 +139,8 @@ class BaseLlmConfig(BaseConfig):
|
|||||||
:type callbacks: Optional[list], optional
|
:type callbacks: Optional[list], optional
|
||||||
:param query_type: The type of query to use, defaults to None
|
:param query_type: The type of query to use, defaults to None
|
||||||
:type query_type: Optional[str], optional
|
:type query_type: Optional[str], optional
|
||||||
|
:param local: If True, the model will be run locally, defaults to False (for huggingface provider)
|
||||||
|
:type local: Optional[bool], optional
|
||||||
:raises ValueError: If the template is not valid as template should
|
:raises ValueError: If the template is not valid as template should
|
||||||
contain $context and $query (and optionally $history)
|
contain $context and $query (and optionally $history)
|
||||||
:raises ValueError: Stream is not boolean
|
:raises ValueError: Stream is not boolean
|
||||||
@@ -165,6 +168,7 @@ class BaseLlmConfig(BaseConfig):
|
|||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.model_kwargs = model_kwargs
|
self.model_kwargs = model_kwargs
|
||||||
|
self.local = local
|
||||||
|
|
||||||
if isinstance(prompt, str):
|
if isinstance(prompt, str):
|
||||||
prompt = Template(prompt)
|
prompt = Template(prompt)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from typing import Optional
|
|||||||
|
|
||||||
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
|
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
|
||||||
from langchain_community.llms.huggingface_hub import HuggingFaceHub
|
from langchain_community.llms.huggingface_hub import HuggingFaceHub
|
||||||
|
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
||||||
|
|
||||||
from embedchain.config import BaseLlmConfig
|
from embedchain.config import BaseLlmConfig
|
||||||
from embedchain.helpers.json_serializable import register_deserializable
|
from embedchain.helpers.json_serializable import register_deserializable
|
||||||
@@ -34,12 +35,15 @@ class HuggingFaceLlm(BaseLlm):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
|
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
|
||||||
if config.model:
|
# If the user wants to run the model locally, they can do so by setting the `local` flag to True
|
||||||
|
if config.model and config.local:
|
||||||
|
return HuggingFaceLlm._from_pipeline(prompt=prompt, config=config)
|
||||||
|
elif config.model:
|
||||||
return HuggingFaceLlm._from_model(prompt=prompt, config=config)
|
return HuggingFaceLlm._from_model(prompt=prompt, config=config)
|
||||||
elif config.endpoint:
|
elif config.endpoint:
|
||||||
return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
|
return HuggingFaceLlm._from_endpoint(prompt=prompt, config=config)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Either `model` or `endpoint` must be set")
|
raise ValueError("Either `model` or `endpoint` must be set in config")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _from_model(prompt: str, config: BaseLlmConfig) -> str:
|
def _from_model(prompt: str, config: BaseLlmConfig) -> str:
|
||||||
@@ -53,15 +57,14 @@ class HuggingFaceLlm(BaseLlm):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
||||||
|
|
||||||
model = config.model or "google/flan-t5-xxl"
|
model = config.model
|
||||||
logging.info(f"Using HuggingFaceHub with model {model}")
|
logging.info(f"Using HuggingFaceHub with model {model}")
|
||||||
llm = HuggingFaceHub(
|
llm = HuggingFaceHub(
|
||||||
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
|
huggingfacehub_api_token=os.environ["HUGGINGFACE_ACCESS_TOKEN"],
|
||||||
repo_id=model,
|
repo_id=model,
|
||||||
model_kwargs=model_kwargs,
|
model_kwargs=model_kwargs,
|
||||||
)
|
)
|
||||||
|
return llm.invoke(prompt)
|
||||||
return llm(prompt)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
|
def _from_endpoint(prompt: str, config: BaseLlmConfig) -> str:
|
||||||
@@ -71,4 +74,23 @@ class HuggingFaceLlm(BaseLlm):
|
|||||||
task="text-generation",
|
task="text-generation",
|
||||||
model_kwargs=config.model_kwargs,
|
model_kwargs=config.model_kwargs,
|
||||||
)
|
)
|
||||||
return llm(prompt)
|
return llm.invoke(prompt)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _from_pipeline(prompt: str, config: BaseLlmConfig) -> str:
|
||||||
|
model_kwargs = {
|
||||||
|
"temperature": config.temperature or 0.1,
|
||||||
|
"max_new_tokens": config.max_tokens,
|
||||||
|
}
|
||||||
|
|
||||||
|
if 0.0 < config.top_p < 1.0:
|
||||||
|
model_kwargs["top_p"] = config.top_p
|
||||||
|
else:
|
||||||
|
raise ValueError("`top_p` must be > 0.0 and < 1.0")
|
||||||
|
|
||||||
|
llm = HuggingFacePipeline.from_model_id(
|
||||||
|
model_id=config.model,
|
||||||
|
task="text-generation",
|
||||||
|
pipeline_kwargs=model_kwargs,
|
||||||
|
)
|
||||||
|
return llm.invoke(prompt)
|
||||||
|
|||||||
@@ -425,6 +425,7 @@ def validate_config(config_data):
|
|||||||
Optional("api_key"): str,
|
Optional("api_key"): str,
|
||||||
Optional("endpoint"): str,
|
Optional("endpoint"): str,
|
||||||
Optional("model_kwargs"): dict,
|
Optional("model_kwargs"): dict,
|
||||||
|
Optional("local"): bool,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Optional("vectordb"): {
|
Optional("vectordb"): {
|
||||||
|
|||||||
@@ -62,18 +62,19 @@ def test_get_llm_model_answer(huggingface_llm_config, mocker):
|
|||||||
|
|
||||||
def test_hugging_face_mock(huggingface_llm_config, mocker):
|
def test_hugging_face_mock(huggingface_llm_config, mocker):
|
||||||
mock_llm_instance = mocker.Mock(return_value="Test answer")
|
mock_llm_instance = mocker.Mock(return_value="Test answer")
|
||||||
mocker.patch("embedchain.llm.huggingface.HuggingFaceHub", return_value=mock_llm_instance)
|
mock_hf_hub = mocker.patch("embedchain.llm.huggingface.HuggingFaceHub")
|
||||||
|
mock_hf_hub.return_value.invoke = mock_llm_instance
|
||||||
|
|
||||||
llm = HuggingFaceLlm(huggingface_llm_config)
|
llm = HuggingFaceLlm(huggingface_llm_config)
|
||||||
answer = llm.get_llm_model_answer("Test query")
|
answer = llm.get_llm_model_answer("Test query")
|
||||||
|
|
||||||
assert answer == "Test answer"
|
assert answer == "Test answer"
|
||||||
mock_llm_instance.assert_called_once_with("Test query")
|
mock_llm_instance.assert_called_once_with("Test query")
|
||||||
|
|
||||||
|
|
||||||
def test_custom_endpoint(huggingface_endpoint_config, mocker):
|
def test_custom_endpoint(huggingface_endpoint_config, mocker):
|
||||||
mock_llm_instance = mocker.Mock(return_value="Test answer")
|
mock_llm_instance = mocker.Mock(return_value="Test answer")
|
||||||
mocker.patch("embedchain.llm.huggingface.HuggingFaceEndpoint", return_value=mock_llm_instance)
|
mock_hf_endpoint = mocker.patch("embedchain.llm.huggingface.HuggingFaceEndpoint")
|
||||||
|
mock_hf_endpoint.return_value.invoke = mock_llm_instance
|
||||||
|
|
||||||
llm = HuggingFaceLlm(huggingface_endpoint_config)
|
llm = HuggingFaceLlm(huggingface_endpoint_config)
|
||||||
answer = llm.get_llm_model_answer("Test query")
|
answer = llm.get_llm_model_answer("Test query")
|
||||||
|
|||||||
Reference in New Issue
Block a user