# !pip install accelerate # !pip install torch # !pip install transformers # !pip install bitsandbytes import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer, ) from typing import Callable, Dict, List class LlamaFunctionCaller: """ A class to manage and execute Llama functions. Attributes: ----------- model: transformers.AutoModelForCausalLM The loaded Llama model. tokenizer: transformers.AutoTokenizer The tokenizer for the Llama model. functions: Dict[str, Callable] A dictionary of functions available for execution. Methods: -------- __init__(self, model_id: str, cache_dir: str, runtime: str) Initializes the LlamaFunctionCaller with the specified model. add_func(self, name: str, function: Callable, description: str, arguments: List[Dict]) Adds a new function to the LlamaFunctionCaller. call_function(self, name: str, **kwargs) Calls the specified function with given arguments. stream(self, user_prompt: str) Streams a user prompt to the model and prints the response. Example: # Example usage model_id = "Your-Model-ID" cache_dir = "Your-Cache-Directory" runtime = "cuda" # or 'cpu' llama_caller = LlamaFunctionCaller(model_id, cache_dir, runtime) # Add a custom function def get_weather(location: str, format: str) -> str: # This is a placeholder for the actual implementation return f"Weather at {location} in {format} format." llama_caller.add_func( name="get_weather", function=get_weather, description="Get the weather at a location", arguments=[ { "name": "location", "type": "string", "description": "Location for the weather", }, { "name": "format", "type": "string", "description": "Format of the weather data", }, ], ) # Call the function result = llama_caller.call_function("get_weather", location="Paris", format="Celsius") print(result) # Stream a user prompt llama_caller("Tell me about the tallest mountain in the world.") """ def __init__( self, model_id: str = "Trelis/Llama-2-7b-chat-hf-function-calling-v2", cache_dir: str = "llama_cache", runtime: str = "auto", max_tokens: int = 500, streaming: bool = False, *args, **kwargs, ): self.model_id = model_id self.cache_dir = cache_dir self.runtime = runtime self.max_tokens = max_tokens self.streaming = streaming # Load the model and tokenizer self.model = self._load_model() self.tokenizer = AutoTokenizer.from_pretrained( model_id, cache_dir=cache_dir, use_fast=True ) self.functions = {} def _load_model(self): # Configuration for loading the model bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ) return AutoModelForCausalLM.from_pretrained( self.model_id, quantization_config=bnb_config, device_map=self.runtime, trust_remote_code=True, cache_dir=self.cache_dir, ) def add_func( self, name: str, function: Callable, description: str, arguments: List[Dict], ): """ Adds a new function to the LlamaFunctionCaller. Args: name (str): The name of the function. function (Callable): The function to execute. description (str): Description of the function. arguments (List[Dict]): List of argument specifications. """ self.functions[name] = { "function": function, "description": description, "arguments": arguments, } def call_function(self, name: str, **kwargs): """ Calls the specified function with given arguments. Args: name (str): The name of the function to call. **kwargs: Keyword arguments for the function call. Returns: The result of the function call. """ if name not in self.functions: raise ValueError(f"Function {name} not found.") func_info = self.functions[name] return func_info["function"](**kwargs) def __call__(self, task: str, **kwargs): """ Streams a user prompt to the model and prints the response. Args: task (str): The user prompt to stream. """ # Format the prompt prompt = f"{task}\n\n" # Encode and send to the model inputs = self.tokenizer([prompt], return_tensors="pt").to( self.runtime ) streamer = TextStreamer(self.tokenizer) if self.streaming: out = self.model.generate( **inputs, streamer=streamer, max_new_tokens=self.max_tokens, **kwargs, ) return out else: out = self.model.generate( **inputs, max_length=self.max_tokens, **kwargs ) # return self.tokenizer.decode(out[0], skip_special_tokens=True) return out # llama_caller = LlamaFunctionCaller() # # Add a custom function # def get_weather(location: str, format: str) -> str: # # This is a placeholder for the actual implementation # return f"Weather at {location} in {format} format." # llama_caller.add_func( # name="get_weather", # function=get_weather, # description="Get the weather at a location", # arguments=[ # { # "name": "location", # "type": "string", # "description": "Location for the weather", # }, # { # "name": "format", # "type": "string", # "description": "Format of the weather data", # }, # ], # ) # # Call the function # result = llama_caller.call_function("get_weather", location="Paris", format="Celsius") # print(result) # # Stream a user prompt # llama_caller("Tell me about the tallest mountain in the world.")