from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


class MultiModalLlava:
    """
    LLava Model

    Args:
        model_name_or_path: The model name or path to the model
        revision: The revision of the model to use
        device: The device to run the model on
        max_new_tokens: The maximum number of tokens to generate
        do_sample: Whether or not to use sampling
        temperature: The temperature of the sampling
        top_p: The top p value for sampling
        top_k: The top k value for sampling
        repetition_penalty: The repetition penalty for sampling
        device_map: The device map to use

    Methods:
        __call__: Call the model
        chat: Interactive chat in terminal

    Example:
        >>> from swarms.models.llava import LlavaModel
        >>> model = LlavaModel(device="cpu")
        >>> model("Hello, I am a robot.")
    """

    def __init__(
        self,
        model_name_or_path="TheBloke/llava-v1.5-13B-GPTQ",
        revision="main",
        device="cuda",
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        top_k=40,
        repetition_penalty=1.1,
        device_map: str = "auto",
    ):
        self.device = device
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name_or_path,
            device_map=device_map,
            trust_remote_code=False,
            revision=revision,
        ).to(self.device)

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name_or_path, use_fast=True
        )
        self.pipe = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            max_new_tokens=max_new_tokens,
            do_sample=do_sample,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            repetition_penalty=repetition_penalty,
            device=0 if self.device == "cuda" else -1,
        )

    def __call__(self, prompt):
        """Call the model"""
        return self.pipe(prompt)[0]["generated_text"]

    def chat(self):
        """Interactive chat in terminal"""
        print("Starting chat with LlavaModel. Type 'exit' to end the session.")
        while True:
            user_input = input("You: ")
            if user_input.lower() == "exit":
                break
            response = self(user_input)
            print(f"Model: {response}")