01/software/source/server/services/llm/llamaedge/llm.py

import os
import subprocess
import requests
import json


class Llm:
    def __init__(self, config):
        self.install(config["service_directory"])

    def install(self, service_directory):
        LLM_FOLDER_PATH = service_directory
        self.llm_directory = os.path.join(LLM_FOLDER_PATH, "llm")
        if not os.path.isdir(self.llm_directory):  # Check if the LLM directory exists
            os.makedirs(LLM_FOLDER_PATH, exist_ok=True)

            # Install WasmEdge
            subprocess.run(
                [
                    "curl",
                    "-sSf",
                    "https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh",
                    "|",
                    "bash",
                    "-s",
                    "--",
                    "--plugin",
                    "wasi_nn-ggml",
                ]
            )

            # Download the Qwen1.5-0.5B-Chat model GGUF file
            MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
            subprocess.run(["curl", "-LO", MODEL_URL], cwd=self.llm_directory)

            # Download the llama-api-server.wasm app
            APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
            subprocess.run(["curl", "-LO", APP_URL], cwd=self.llm_directory)

            # Run the API server
            subprocess.run(
                [
                    "wasmedge",
                    "--dir",
                    ".:.",
                    "--nn-preload",
                    "default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf",
                    "llama-api-server.wasm",
                    "-p",
                    "llama-2-chat",
                ],
                cwd=self.llm_directory,
            )

            print("LLM setup completed.")
        else:
            print("LLM already set up. Skipping download.")

    def llm(self, messages):
        url = "http://localhost:8080/v1/chat/completions"
        headers = {"accept": "application/json", "Content-Type": "application/json"}
        data = {"messages": messages, "model": "llama-2-chat"}
        with requests.post(
            url, headers=headers, data=json.dumps(data), stream=True
        ) as response:
            for line in response.iter_lines():
                if line:
                    yield json.loads(line)
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`import os`
			`import subprocess`
			`import requests`
			`import json`

Re-lint after rebase 10 months ago
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`class Llm:`
			`def __init__(self, config):`
			`self.install(config["service_directory"])`

			`def install(self, service_directory):`
			`LLM_FOLDER_PATH = service_directory`
Re-lint after rebase 10 months ago			`self.llm_directory = os.path.join(LLM_FOLDER_PATH, "llm")`
			`if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`os.makedirs(LLM_FOLDER_PATH, exist_ok=True)`

			`# Install WasmEdge`
Re-lint after rebase 10 months ago			`subprocess.run(`
			`[`
			`"curl",`
			`"-sSf",`
			`"https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh",`
			`"\|",`
			`"bash",`
			`"-s",`
			`"--",`
			`"--plugin",`
			`"wasi_nn-ggml",`
			`]`
			`)`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago
			`# Download the Qwen1.5-0.5B-Chat model GGUF file`
			`MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"`
Re-lint after rebase 10 months ago			`subprocess.run(["curl", "-LO", MODEL_URL], cwd=self.llm_directory)`

`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`# Download the llama-api-server.wasm app`
			`APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"`
Re-lint after rebase 10 months ago			`subprocess.run(["curl", "-LO", APP_URL], cwd=self.llm_directory)`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago
			`# Run the API server`
Re-lint after rebase 10 months ago			`subprocess.run(`
			`[`
			`"wasmedge",`
			`"--dir",`
			`".:.",`
			`"--nn-preload",`
			`"default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf",`
			`"llama-api-server.wasm",`
			`"-p",`
			`"llama-2-chat",`
			`],`
			`cwd=self.llm_directory,`
			`)`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago
			`print("LLM setup completed.")`
			`else:`
			`print("LLM already set up. Skipping download.")`

			`def llm(self, messages):`
			`url = "http://localhost:8080/v1/chat/completions"`
Re-lint after rebase 10 months ago			`headers = {"accept": "application/json", "Content-Type": "application/json"}`
			`data = {"messages": messages, "model": "llama-2-chat"}`
			`with requests.post(`
			`url, headers=headers, data=json.dumps(data), stream=True`
			`) as response:`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`for line in response.iter_lines():`
			`if line:`
			`yield json.loads(line)`