58 lines
2.2 KiB
Python
58 lines
2.2 KiB
Python
from typing import Iterable, Optional, Union
|
|
|
|
from langchain.callbacks.stdout import StdOutCallbackHandler
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
|
|
from embedchain.config import BaseLlmConfig
|
|
from embedchain.helpers.json_serializable import register_deserializable
|
|
from embedchain.llm.base import BaseLlm
|
|
|
|
|
|
@register_deserializable
|
|
class GPT4ALLLlm(BaseLlm):
|
|
def __init__(self, config: Optional[BaseLlmConfig] = None):
|
|
super().__init__(config=config)
|
|
if self.config.model is None:
|
|
self.config.model = "orca-mini-3b-gguf2-q4_0.gguf"
|
|
self.instance = GPT4ALLLlm._get_instance(self.config.model)
|
|
self.instance.streaming = self.config.stream
|
|
|
|
def get_llm_model_answer(self, prompt):
|
|
return self._get_answer(prompt=prompt, config=self.config)
|
|
|
|
@staticmethod
|
|
def _get_instance(model):
|
|
try:
|
|
from langchain.llms.gpt4all import GPT4All as LangchainGPT4All
|
|
except ModuleNotFoundError:
|
|
raise ModuleNotFoundError(
|
|
"The GPT4All python package is not installed. Please install it with `pip install --upgrade embedchain[opensource]`" # noqa E501
|
|
) from None
|
|
|
|
return LangchainGPT4All(model=model)
|
|
|
|
def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:
|
|
if config.model and config.model != self.config.model:
|
|
raise RuntimeError(
|
|
"GPT4ALLLlm does not support switching models at runtime. Please create a new app instance."
|
|
)
|
|
|
|
messages = []
|
|
if config.system_prompt:
|
|
messages.append(config.system_prompt)
|
|
messages.append(prompt)
|
|
kwargs = {
|
|
"temp": config.temperature,
|
|
"max_tokens": config.max_tokens,
|
|
}
|
|
if config.top_p:
|
|
kwargs["top_p"] = config.top_p
|
|
|
|
callbacks = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()]
|
|
|
|
response = self.instance.generate(prompts=messages, callbacks=callbacks, **kwargs)
|
|
answer = ""
|
|
for generations in response.generations:
|
|
answer += " ".join(map(lambda generation: generation.text, generations))
|
|
return answer
|