import os import subprocess import requests import json class Llm: def __init__(self, config): self.install(config["service_directory"]) def install(self, service_directory): LLM_FOLDER_PATH = service_directory self.llm_directory = os.path.join(LLM_FOLDER_PATH, "llm") if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists os.makedirs(LLM_FOLDER_PATH, exist_ok=True) # Install WasmEdge subprocess.run( [ "curl", "-sSf", "https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh", "|", "bash", "-s", "--", "--plugin", "wasi_nn-ggml", ] ) # Download the Qwen1.5-0.5B-Chat model GGUF file MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf" subprocess.run(["curl", "-LO", MODEL_URL], cwd=self.llm_directory) # Download the llama-api-server.wasm app APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm" subprocess.run(["curl", "-LO", APP_URL], cwd=self.llm_directory) # Run the API server subprocess.run( [ "wasmedge", "--dir", ".:.", "--nn-preload", "default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf", "llama-api-server.wasm", "-p", "llama-2-chat", ], cwd=self.llm_directory, ) print("LLM setup completed.") else: print("LLM already set up. Skipping download.") def llm(self, messages): url = "http://localhost:8080/v1/chat/completions" headers = {"accept": "application/json", "Content-Type": "application/json"} data = {"messages": messages, "model": "llama-2-chat"} with requests.post( url, headers=headers, data=json.dumps(data), stream=True ) as response: for line in response.iter_lines(): if line: yield json.loads(line)