You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
01/software/source/server/services/llm/llamaedge/llm.py

69 lines
2.4 KiB

import os
import subprocess
import requests
import json
class Llm:
def __init__(self, config):
self.install(config["service_directory"])
def install(self, service_directory):
LLM_FOLDER_PATH = service_directory
self.llm_directory = os.path.join(LLM_FOLDER_PATH, "llm")
if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
# Install WasmEdge
subprocess.run(
[
"curl",
"-sSf",
"https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh",
"|",
"bash",
"-s",
"--",
"--plugin",
"wasi_nn-ggml",
]
)
# Download the Qwen1.5-0.5B-Chat model GGUF file
MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
subprocess.run(["curl", "-LO", MODEL_URL], cwd=self.llm_directory)
# Download the llama-api-server.wasm app
APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
subprocess.run(["curl", "-LO", APP_URL], cwd=self.llm_directory)
# Run the API server
subprocess.run(
[
"wasmedge",
"--dir",
".:.",
"--nn-preload",
"default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf",
"llama-api-server.wasm",
"-p",
"llama-2-chat",
],
cwd=self.llm_directory,
)
print("LLM setup completed.")
else:
print("LLM already set up. Skipping download.")
def llm(self, messages):
url = "http://localhost:8080/v1/chat/completions"
headers = {"accept": "application/json", "Content-Type": "application/json"}
data = {"messages": messages, "model": "llama-2-chat"}
with requests.post(
url, headers=headers, data=json.dumps(data), stream=True
) as response:
for line in response.iter_lines():
if line:
yield json.loads(line)