import os
import subprocess
import requests
import json


class Llm:
    def __init__(self, config):
        self.install(config["service_directory"])

    def install(self, service_directory):
        LLM_FOLDER_PATH = service_directory
        self.llm_directory = os.path.join(LLM_FOLDER_PATH, "llm")
        if not os.path.isdir(self.llm_directory):  # Check if the LLM directory exists
            os.makedirs(LLM_FOLDER_PATH, exist_ok=True)

            # Install WasmEdge
            subprocess.run(
                [
                    "curl",
                    "-sSf",
                    "https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh",
                    "|",
                    "bash",
                    "-s",
                    "--",
                    "--plugin",
                    "wasi_nn-ggml",
                ]
            )

            # Download the Qwen1.5-0.5B-Chat model GGUF file
            MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
            subprocess.run(["curl", "-LO", MODEL_URL], cwd=self.llm_directory)

            # Download the llama-api-server.wasm app
            APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
            subprocess.run(["curl", "-LO", APP_URL], cwd=self.llm_directory)

            # Run the API server
            subprocess.run(
                [
                    "wasmedge",
                    "--dir",
                    ".:.",
                    "--nn-preload",
                    "default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf",
                    "llama-api-server.wasm",
                    "-p",
                    "llama-2-chat",
                ],
                cwd=self.llm_directory,
            )

            print("LLM setup completed.")
        else:
            print("LLM already set up. Skipping download.")

    def llm(self, messages):
        url = "http://localhost:8080/v1/chat/completions"
        headers = {"accept": "application/json", "Content-Type": "application/json"}
        data = {"messages": messages, "model": "llama-2-chat"}
        with requests.post(
            url, headers=headers, data=json.dumps(data), stream=True
        ) as response:
            for line in response.iter_lines():
                if line:
                    yield json.loads(line)