parent
91fcb94438
commit
701d357e30
@ -0,0 +1,10 @@
|
|||||||
|
from ..base_device import Device
|
||||||
|
|
||||||
|
device = Device()
|
||||||
|
|
||||||
|
def main(server_url):
|
||||||
|
device.server_url = server_url
|
||||||
|
device.start()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -1,4 +0,0 @@
|
|||||||
from ..base_device import Device
|
|
||||||
|
|
||||||
desktop_device = Device()
|
|
||||||
desktop_device.start()
|
|
@ -1,4 +1,9 @@
|
|||||||
from ..base_device import Device
|
from ..base_device import Device
|
||||||
|
|
||||||
rpi_device = Device()
|
device = Device()
|
||||||
rpi_device.start()
|
|
||||||
|
def main():
|
||||||
|
device.start()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -1,6 +0,0 @@
|
|||||||
DEVICE=$(uname -n)
|
|
||||||
if [[ "$DEVICE" == "rpi" ]]; then
|
|
||||||
python -m 01OS.clients.rpi.device
|
|
||||||
else
|
|
||||||
python -m 01OS.clients.macos.device
|
|
||||||
fi
|
|
@ -0,0 +1,15 @@
|
|||||||
|
class Llm:
|
||||||
|
def __init__(self, config):
|
||||||
|
|
||||||
|
# Litellm is used by OI by default, so we just modify OI
|
||||||
|
|
||||||
|
interpreter = config["interpreter"]
|
||||||
|
config.pop("interpreter", None)
|
||||||
|
config.pop("service_directory", None)
|
||||||
|
for key, value in config.items():
|
||||||
|
setattr(interpreter, key.replace("-", "_"), value)
|
||||||
|
|
||||||
|
self.llm = interpreter.llm.completions
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,49 @@
|
|||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
class Llm:
|
||||||
|
def __init__(self, config):
|
||||||
|
self.install(config["service_directory"])
|
||||||
|
|
||||||
|
def install(self, service_directory):
|
||||||
|
LLM_FOLDER_PATH = service_directory
|
||||||
|
self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm')
|
||||||
|
if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
|
||||||
|
os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
|
||||||
|
|
||||||
|
# Install WasmEdge
|
||||||
|
subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml'])
|
||||||
|
|
||||||
|
# Download the Qwen1.5-0.5B-Chat model GGUF file
|
||||||
|
MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
|
||||||
|
subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory)
|
||||||
|
|
||||||
|
# Download the llama-api-server.wasm app
|
||||||
|
APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
|
||||||
|
subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory)
|
||||||
|
|
||||||
|
# Run the API server
|
||||||
|
subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory)
|
||||||
|
|
||||||
|
print("LLM setup completed.")
|
||||||
|
else:
|
||||||
|
print("LLM already set up. Skipping download.")
|
||||||
|
|
||||||
|
def llm(self, messages):
|
||||||
|
url = "http://localhost:8080/v1/chat/completions"
|
||||||
|
headers = {
|
||||||
|
'accept': 'application/json',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"messages": messages,
|
||||||
|
"model": "llama-2-chat"
|
||||||
|
}
|
||||||
|
with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response:
|
||||||
|
for line in response.iter_lines():
|
||||||
|
if line:
|
||||||
|
yield json.loads(line)
|
||||||
|
|
||||||
|
|
@ -0,0 +1,84 @@
|
|||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import wget
|
||||||
|
import stat
|
||||||
|
|
||||||
|
class Llm:
|
||||||
|
def __init__(self, config):
|
||||||
|
|
||||||
|
self.interpreter = config["interpreter"]
|
||||||
|
config.pop("interpreter", None)
|
||||||
|
|
||||||
|
self.install(config["service_directory"])
|
||||||
|
|
||||||
|
config.pop("service_directory", None)
|
||||||
|
for key, value in config.items():
|
||||||
|
setattr(self.interpreter, key.replace("-", "_"), value)
|
||||||
|
|
||||||
|
self.llm = self.interpreter.llm.completions
|
||||||
|
|
||||||
|
def install(self, service_directory):
|
||||||
|
|
||||||
|
if platform.system() == "Darwin": # Check if the system is MacOS
|
||||||
|
result = subprocess.run(
|
||||||
|
["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(
|
||||||
|
"Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
|
||||||
|
)
|
||||||
|
time.sleep(3)
|
||||||
|
raise Exception("Xcode is not installed. Please install Xcode and try again.")
|
||||||
|
|
||||||
|
# Define the path to the models directory
|
||||||
|
models_dir = os.path.join(service_directory, "models")
|
||||||
|
|
||||||
|
# Check and create the models directory if it doesn't exist
|
||||||
|
if not os.path.exists(models_dir):
|
||||||
|
os.makedirs(models_dir)
|
||||||
|
|
||||||
|
# Define the path to the new llamafile
|
||||||
|
llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
|
||||||
|
|
||||||
|
# Check if the new llamafile exists, if not download it
|
||||||
|
if not os.path.exists(llamafile_path):
|
||||||
|
print(
|
||||||
|
"Attempting to download the `Phi-2` language model. This may take a few minutes."
|
||||||
|
)
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
|
||||||
|
wget.download(url, llamafile_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Make the new llamafile executable
|
||||||
|
if platform.system() != "Windows":
|
||||||
|
st = os.stat(llamafile_path)
|
||||||
|
os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
|
||||||
|
|
||||||
|
# Run the new llamafile in the background
|
||||||
|
if os.path.exists(llamafile_path):
|
||||||
|
try:
|
||||||
|
# Test if the llamafile is executable
|
||||||
|
subprocess.check_call([llamafile_path])
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
print("The llamafile is not executable. Please check the file permissions.")
|
||||||
|
raise
|
||||||
|
subprocess.Popen([llamafile_path, "-ngl", "9999"])
|
||||||
|
else:
|
||||||
|
error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
|
||||||
|
print(error_message)
|
||||||
|
print(error_message)
|
||||||
|
|
||||||
|
self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
|
||||||
|
self.interpreter.offline = True
|
||||||
|
|
||||||
|
self.interpreter.llm.model = "local"
|
||||||
|
self.interpreter.llm.temperature = 0
|
||||||
|
self.interpreter.llm.api_base = "https://localhost:8080/v1"
|
||||||
|
self.interpreter.llm.max_tokens = 1000
|
||||||
|
self.interpreter.llm.context_window = 3000
|
||||||
|
self.interpreter.llm.supports_functions = False
|
@ -0,0 +1,151 @@
|
|||||||
|
"""
|
||||||
|
Defines a function which takes a path to an audio file and turns it into text.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import os
|
||||||
|
import contextlib
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import ffmpeg
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
class Stt:
|
||||||
|
def __init__(self, config):
|
||||||
|
service_directory = config["service_directory"]
|
||||||
|
install(service_directory)
|
||||||
|
|
||||||
|
def stt(self, audio_file_path):
|
||||||
|
return stt(audio_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def install(service_dir):
|
||||||
|
|
||||||
|
### INSTALL
|
||||||
|
|
||||||
|
WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
|
||||||
|
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
|
||||||
|
if not os.path.exists(source_whisper_rust_path):
|
||||||
|
print(f"Source directory does not exist: {source_whisper_rust_path}")
|
||||||
|
exit(1)
|
||||||
|
if not os.path.exists(WHISPER_RUST_PATH):
|
||||||
|
shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
|
||||||
|
|
||||||
|
os.chdir(WHISPER_RUST_PATH)
|
||||||
|
|
||||||
|
# Check if whisper-rust executable exists before attempting to build
|
||||||
|
if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
|
||||||
|
# Check if Rust is installed. Needed to build whisper executable
|
||||||
|
rust_check = subprocess.call('command -v rustc', shell=True)
|
||||||
|
if rust_check != 0:
|
||||||
|
print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Build Whisper Rust executable if not found
|
||||||
|
subprocess.call('cargo build --release', shell=True)
|
||||||
|
else:
|
||||||
|
print("Whisper Rust executable already exists. Skipping build.")
|
||||||
|
|
||||||
|
WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
|
||||||
|
|
||||||
|
WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
|
||||||
|
WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
|
||||||
|
|
||||||
|
if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
|
||||||
|
os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
|
||||||
|
subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
|
||||||
|
else:
|
||||||
|
print("Whisper model already exists. Skipping download.")
|
||||||
|
|
||||||
|
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||||
|
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||||
|
return "wav"
|
||||||
|
if mime_type == "audio/webm":
|
||||||
|
return "webm"
|
||||||
|
if mime_type == "audio/raw":
|
||||||
|
return "dat"
|
||||||
|
|
||||||
|
return mime_type
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||||
|
temp_dir = tempfile.gettempdir()
|
||||||
|
|
||||||
|
# Create a temporary file with the appropriate extension
|
||||||
|
input_ext = convert_mime_type_to_format(mime_type)
|
||||||
|
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||||
|
with open(input_path, 'wb') as f:
|
||||||
|
f.write(audio)
|
||||||
|
|
||||||
|
# Check if the input file exists
|
||||||
|
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||||
|
|
||||||
|
# Export to wav
|
||||||
|
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||||
|
print(mime_type, input_path, output_path)
|
||||||
|
if mime_type == "audio/raw":
|
||||||
|
ffmpeg.input(
|
||||||
|
input_path,
|
||||||
|
f='s16le',
|
||||||
|
ar='16000',
|
||||||
|
ac=1,
|
||||||
|
).output(output_path).run()
|
||||||
|
else:
|
||||||
|
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield output_path
|
||||||
|
finally:
|
||||||
|
os.remove(input_path)
|
||||||
|
os.remove(output_path)
|
||||||
|
|
||||||
|
def run_command(command):
|
||||||
|
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
return result.stdout, result.stderr
|
||||||
|
|
||||||
|
def get_transcription_file(wav_file_path: str):
|
||||||
|
local_path = os.path.join(os.path.dirname(__file__), 'model')
|
||||||
|
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release')
|
||||||
|
model_name = os.getenv('WHISPER_MODEL_NAME')
|
||||||
|
if not model_name:
|
||||||
|
raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
|
||||||
|
|
||||||
|
output, error = run_command([
|
||||||
|
os.path.join(whisper_rust_path, 'whisper-rust'),
|
||||||
|
'--model-path', os.path.join(local_path, model_name),
|
||||||
|
'--file-path', wav_file_path
|
||||||
|
])
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def get_transcription_bytes(audio_bytes: bytearray, mime_type):
|
||||||
|
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||||
|
return get_transcription_file(wav_file_path)
|
||||||
|
|
||||||
|
def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
|
||||||
|
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||||
|
return stt_wav(wav_file_path)
|
||||||
|
|
||||||
|
def stt_wav(wav_file_path: str):
|
||||||
|
temp_dir = tempfile.gettempdir()
|
||||||
|
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||||
|
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||||
|
try:
|
||||||
|
transcript = get_transcription_file(output_path)
|
||||||
|
finally:
|
||||||
|
os.remove(output_path)
|
||||||
|
return transcript
|
||||||
|
|
||||||
|
def stt(input_data, mime_type="audio/wav"):
|
||||||
|
if isinstance(input_data, str):
|
||||||
|
return stt_wav(input_data)
|
||||||
|
elif isinstance(input_data, bytearray):
|
||||||
|
return stt_bytes(input_data, mime_type)
|
||||||
|
else:
|
||||||
|
raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)")
|
@ -0,0 +1,30 @@
|
|||||||
|
import ffmpeg
|
||||||
|
import tempfile
|
||||||
|
from openai import OpenAI
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
class Tts:
|
||||||
|
def __init__(self, config):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def tts(self, text):
|
||||||
|
response = client.audio.speech.create(
|
||||||
|
model="tts-1",
|
||||||
|
voice="alloy",
|
||||||
|
input=text,
|
||||||
|
response_format="opus"
|
||||||
|
)
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
|
||||||
|
response.stream_to_file(temp_file.name)
|
||||||
|
|
||||||
|
# TODO: hack to format audio correctly for device
|
||||||
|
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||||
|
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||||
|
|
||||||
|
return outfile
|
||||||
|
|
||||||
|
|
@ -0,0 +1,84 @@
|
|||||||
|
import ffmpeg
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
import tarfile
|
||||||
|
|
||||||
|
class Tts:
|
||||||
|
def __init__(self, config):
|
||||||
|
self.piper_directory = ""
|
||||||
|
self.install(config["service_directory"])
|
||||||
|
|
||||||
|
def tts(self, text):
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||||
|
output_file = temp_file.name
|
||||||
|
piper_dir = self.piper_directory
|
||||||
|
subprocess.run([
|
||||||
|
os.path.join(piper_dir, 'piper'),
|
||||||
|
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')),
|
||||||
|
'--output_file', output_file
|
||||||
|
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
|
||||||
|
# TODO: hack to format audio correctly for device
|
||||||
|
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||||
|
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||||
|
|
||||||
|
return outfile
|
||||||
|
|
||||||
|
def install(self, service_directory):
|
||||||
|
PIPER_FOLDER_PATH = service_directory
|
||||||
|
self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper')
|
||||||
|
if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists
|
||||||
|
os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
|
||||||
|
|
||||||
|
# Determine OS and architecture
|
||||||
|
OS = os.uname().sysname
|
||||||
|
ARCH = os.uname().machine
|
||||||
|
if OS == "Darwin":
|
||||||
|
OS = "macos"
|
||||||
|
if ARCH == "arm64":
|
||||||
|
ARCH = "aarch64"
|
||||||
|
elif ARCH == "x86_64":
|
||||||
|
ARCH = "x64"
|
||||||
|
else:
|
||||||
|
print("Piper: unsupported architecture")
|
||||||
|
return
|
||||||
|
|
||||||
|
PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
|
||||||
|
PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
|
||||||
|
|
||||||
|
# Download and extract Piper
|
||||||
|
urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME))
|
||||||
|
with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar:
|
||||||
|
tar.extractall(path=PIPER_FOLDER_PATH)
|
||||||
|
|
||||||
|
PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/')
|
||||||
|
PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')
|
||||||
|
|
||||||
|
# Download voice model and its json file
|
||||||
|
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME))
|
||||||
|
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"))
|
||||||
|
|
||||||
|
# Additional setup for macOS
|
||||||
|
if OS == "macos":
|
||||||
|
if ARCH == "x64":
|
||||||
|
subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license'])
|
||||||
|
|
||||||
|
PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
|
||||||
|
PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||||
|
urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME))
|
||||||
|
|
||||||
|
with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar:
|
||||||
|
tar.extractall(path=self.piper_directory)
|
||||||
|
|
||||||
|
PIPER_DIR = self.piper_directory
|
||||||
|
subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"])
|
||||||
|
subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"])
|
||||||
|
subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"])
|
||||||
|
|
||||||
|
print("Piper setup completed.")
|
||||||
|
else:
|
||||||
|
print("Piper already set up. Skipping download.")
|
@ -1,98 +0,0 @@
|
|||||||
"""
|
|
||||||
Defines a function which takes text and returns a path to an audio file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pydub import AudioSegment
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
load_dotenv() # take environment variables from .env.
|
|
||||||
|
|
||||||
import ffmpeg
|
|
||||||
import tempfile
|
|
||||||
from openai import OpenAI
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import tempfile
|
|
||||||
from pydub import AudioSegment
|
|
||||||
|
|
||||||
client = OpenAI()
|
|
||||||
|
|
||||||
chunk_size = 1024
|
|
||||||
|
|
||||||
def stream_tts(text):
|
|
||||||
"""
|
|
||||||
A generator that streams tts as LMC messages.
|
|
||||||
"""
|
|
||||||
if os.getenv('ALL_LOCAL') == 'False':
|
|
||||||
response = client.audio.speech.create(
|
|
||||||
model="tts-1",
|
|
||||||
voice="alloy",
|
|
||||||
input=text,
|
|
||||||
response_format="opus"
|
|
||||||
)
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
|
|
||||||
response.stream_to_file(temp_file.name)
|
|
||||||
|
|
||||||
# TODO: hack to format audio correctly for device
|
|
||||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
|
||||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
|
||||||
with open(outfile, "rb") as f:
|
|
||||||
audio_bytes = f.read()
|
|
||||||
file_type = "bytes.raw"
|
|
||||||
print(outfile, len(audio_bytes))
|
|
||||||
os.remove(outfile)
|
|
||||||
|
|
||||||
else:
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
|
||||||
output_file = temp_file.name
|
|
||||||
piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
|
|
||||||
subprocess.run([
|
|
||||||
os.path.join(piper_dir, 'piper'),
|
|
||||||
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
|
|
||||||
'--output_file', output_file
|
|
||||||
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
||||||
|
|
||||||
# TODO: hack to format audio correctly for device
|
|
||||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
|
||||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
|
||||||
with open(outfile, "rb") as f:
|
|
||||||
audio_bytes = f.read()
|
|
||||||
file_type = "bytes.raw"
|
|
||||||
print(outfile, len(audio_bytes))
|
|
||||||
os.remove(outfile)
|
|
||||||
|
|
||||||
# Stream the audio
|
|
||||||
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
|
|
||||||
for i in range(0, len(audio_bytes), chunk_size):
|
|
||||||
chunk = audio_bytes[i:i+chunk_size]
|
|
||||||
yield chunk
|
|
||||||
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
|
|
||||||
|
|
||||||
def play_audiosegment(audio):
|
|
||||||
"""
|
|
||||||
UNUSED
|
|
||||||
the default makes some pops. this fixes that
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Apply a fade-out (optional but recommended to smooth the end)
|
|
||||||
audio = audio.fade_out(500)
|
|
||||||
|
|
||||||
# Add silence at the end
|
|
||||||
silence_duration_ms = 500 # Duration of silence in milliseconds
|
|
||||||
silence = AudioSegment.silent(duration=silence_duration_ms)
|
|
||||||
audio_with_padding = audio + silence
|
|
||||||
|
|
||||||
# Save the modified audio as a WAV file for compatibility with simpleaudio
|
|
||||||
audio_with_padding.export("output_audio.wav", format="wav")
|
|
||||||
|
|
||||||
# Load the processed WAV file
|
|
||||||
wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
|
|
||||||
|
|
||||||
# Play the audio
|
|
||||||
play_obj = wave_obj.play()
|
|
||||||
|
|
||||||
# Wait for the playback to finish
|
|
||||||
play_obj.wait_done()
|
|
||||||
|
|
||||||
# Delete the wav file
|
|
||||||
os.remove("output_audio.wav")
|
|
||||||
|
|
@ -0,0 +1,57 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import os
|
||||||
|
import contextlib
|
||||||
|
import tempfile
|
||||||
|
import ffmpeg
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||||
|
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||||
|
return "wav"
|
||||||
|
if mime_type == "audio/webm":
|
||||||
|
return "webm"
|
||||||
|
if mime_type == "audio/raw":
|
||||||
|
return "dat"
|
||||||
|
|
||||||
|
return mime_type
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||||
|
temp_dir = tempfile.gettempdir()
|
||||||
|
|
||||||
|
# Create a temporary file with the appropriate extension
|
||||||
|
input_ext = convert_mime_type_to_format(mime_type)
|
||||||
|
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||||
|
with open(input_path, 'wb') as f:
|
||||||
|
f.write(audio)
|
||||||
|
|
||||||
|
# Check if the input file exists
|
||||||
|
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||||
|
|
||||||
|
# Export to wav
|
||||||
|
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||||
|
print(mime_type, input_path, output_path)
|
||||||
|
if mime_type == "audio/raw":
|
||||||
|
ffmpeg.input(
|
||||||
|
input_path,
|
||||||
|
f='s16le',
|
||||||
|
ar='16000',
|
||||||
|
ac=1,
|
||||||
|
).output(output_path).run()
|
||||||
|
else:
|
||||||
|
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield output_path
|
||||||
|
finally:
|
||||||
|
os.remove(input_path)
|
||||||
|
os.remove(output_path)
|
||||||
|
|
||||||
|
def run_command(command):
|
||||||
|
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
return result.stdout, result.stderr
|
||||||
|
|
||||||
|
|
||||||
|
def bytes_to_wav(audio_bytes: bytearray, mime_type):
|
||||||
|
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||||
|
return wav_file_path
|
@ -0,0 +1,95 @@
|
|||||||
|
import typer
|
||||||
|
import asyncio
|
||||||
|
import platform
|
||||||
|
import concurrent.futures
|
||||||
|
import threading
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
app = typer.Typer()
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def run(
|
||||||
|
server: bool = typer.Option(False, "--server", help="Run server"),
|
||||||
|
server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"),
|
||||||
|
server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"),
|
||||||
|
|
||||||
|
tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"),
|
||||||
|
expose: bool = typer.Option(False, "--expose", help="Expose server to internet"),
|
||||||
|
|
||||||
|
client: bool = typer.Option(False, "--client", help="Run client"),
|
||||||
|
server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"),
|
||||||
|
client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"),
|
||||||
|
|
||||||
|
llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"),
|
||||||
|
|
||||||
|
model: str = typer.Option("gpt-4", "--model", help="Specify the model"),
|
||||||
|
llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"),
|
||||||
|
llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"),
|
||||||
|
context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"),
|
||||||
|
max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"),
|
||||||
|
temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"),
|
||||||
|
|
||||||
|
tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"),
|
||||||
|
|
||||||
|
stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"),
|
||||||
|
|
||||||
|
local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"),
|
||||||
|
):
|
||||||
|
|
||||||
|
if local:
|
||||||
|
tts_service = "piper"
|
||||||
|
llm_service = "llamafile"
|
||||||
|
stt_service = "local-whisper"
|
||||||
|
|
||||||
|
if not server_url:
|
||||||
|
server_url = f"{server_host}:{server_port}"
|
||||||
|
|
||||||
|
if not server and not client:
|
||||||
|
server = True
|
||||||
|
client = True
|
||||||
|
|
||||||
|
def handle_exit(signum, frame):
|
||||||
|
os._exit(0)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, handle_exit)
|
||||||
|
|
||||||
|
if server:
|
||||||
|
from .server.server import main
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),))
|
||||||
|
server_thread.start()
|
||||||
|
|
||||||
|
if expose:
|
||||||
|
#tunnel_thread = threading.Thread(target=tunnel_service, args=[server_port])
|
||||||
|
#tunnel_thread.start()
|
||||||
|
tunnel_thread = threading.Thread(target=os.system, args=("./tunnel.sh",))
|
||||||
|
tunnel_thread.start()
|
||||||
|
|
||||||
|
if client:
|
||||||
|
if client_type == "auto":
|
||||||
|
system_type = platform.system()
|
||||||
|
if system_type == "Darwin": # Mac OS
|
||||||
|
client_type = "mac"
|
||||||
|
elif system_type == "Linux": # Linux System
|
||||||
|
try:
|
||||||
|
with open('/proc/device-tree/model', 'r') as m:
|
||||||
|
if 'raspberry pi' in m.read().lower():
|
||||||
|
client_type = "rpi"
|
||||||
|
else:
|
||||||
|
client_type = "linux"
|
||||||
|
except FileNotFoundError:
|
||||||
|
client_type = "linux"
|
||||||
|
|
||||||
|
module = importlib.import_module(f".clients.{client_type}.device", package='01OS')
|
||||||
|
client_thread = threading.Thread(target=module.main, args=[server_url])
|
||||||
|
client_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
server_thread.join()
|
||||||
|
tunnel_thread.join()
|
||||||
|
client_thread.join()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
os.kill(os.getpid(), signal.SIGINT)
|
@ -1,31 +0,0 @@
|
|||||||
"""
|
|
||||||
This is just for the Python package — we need a Python entrypoint.
|
|
||||||
Just starts `start.sh` with all the same command line arguments. Aliased to 01.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import psutil
|
|
||||||
import importlib
|
|
||||||
# Can't import normally because it starts with a number
|
|
||||||
process_utils = importlib.import_module("01OS.server.utils.process_utils")
|
|
||||||
kill_process_tree = process_utils.kill_process_tree
|
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
# Get command line arguments
|
|
||||||
args = sys.argv[1:]
|
|
||||||
|
|
||||||
# Get the directory of the current script
|
|
||||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
|
|
||||||
# Prepare the command
|
|
||||||
command = [os.path.join(dir_path, 'start.sh')] + args
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Start start.sh using psutil for better process management, and to kill all processes
|
|
||||||
psutil.Popen(command)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("Exiting...")
|
|
||||||
kill_process_tree()
|
|
Loading…
Reference in new issue