parent
91fcb94438
commit
701d357e30
@ -0,0 +1,10 @@
|
||||
from ..base_device import Device
|
||||
|
||||
device = Device()
|
||||
|
||||
def main(server_url):
|
||||
device.server_url = server_url
|
||||
device.start()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,4 +0,0 @@
|
||||
from ..base_device import Device
|
||||
|
||||
desktop_device = Device()
|
||||
desktop_device.start()
|
@ -1,4 +1,9 @@
|
||||
from ..base_device import Device
|
||||
|
||||
rpi_device = Device()
|
||||
rpi_device.start()
|
||||
device = Device()
|
||||
|
||||
def main():
|
||||
device.start()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,6 +0,0 @@
|
||||
DEVICE=$(uname -n)
|
||||
if [[ "$DEVICE" == "rpi" ]]; then
|
||||
python -m 01OS.clients.rpi.device
|
||||
else
|
||||
python -m 01OS.clients.macos.device
|
||||
fi
|
@ -0,0 +1,15 @@
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
|
||||
# Litellm is used by OI by default, so we just modify OI
|
||||
|
||||
interpreter = config["interpreter"]
|
||||
config.pop("interpreter", None)
|
||||
config.pop("service_directory", None)
|
||||
for key, value in config.items():
|
||||
setattr(interpreter, key.replace("-", "_"), value)
|
||||
|
||||
self.llm = interpreter.llm.completions
|
||||
|
||||
|
||||
|
@ -0,0 +1,49 @@
|
||||
import os
|
||||
import subprocess
|
||||
import requests
|
||||
import json
|
||||
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
self.install(config["service_directory"])
|
||||
|
||||
def install(self, service_directory):
|
||||
LLM_FOLDER_PATH = service_directory
|
||||
self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm')
|
||||
if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
|
||||
os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
|
||||
|
||||
# Install WasmEdge
|
||||
subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml'])
|
||||
|
||||
# Download the Qwen1.5-0.5B-Chat model GGUF file
|
||||
MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
|
||||
subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory)
|
||||
|
||||
# Download the llama-api-server.wasm app
|
||||
APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
|
||||
subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory)
|
||||
|
||||
# Run the API server
|
||||
subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory)
|
||||
|
||||
print("LLM setup completed.")
|
||||
else:
|
||||
print("LLM already set up. Skipping download.")
|
||||
|
||||
def llm(self, messages):
|
||||
url = "http://localhost:8080/v1/chat/completions"
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
"messages": messages,
|
||||
"model": "llama-2-chat"
|
||||
}
|
||||
with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response:
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
yield json.loads(line)
|
||||
|
||||
|
@ -0,0 +1,84 @@
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
import wget
|
||||
import stat
|
||||
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
|
||||
self.interpreter = config["interpreter"]
|
||||
config.pop("interpreter", None)
|
||||
|
||||
self.install(config["service_directory"])
|
||||
|
||||
config.pop("service_directory", None)
|
||||
for key, value in config.items():
|
||||
setattr(self.interpreter, key.replace("-", "_"), value)
|
||||
|
||||
self.llm = self.interpreter.llm.completions
|
||||
|
||||
def install(self, service_directory):
|
||||
|
||||
if platform.system() == "Darwin": # Check if the system is MacOS
|
||||
result = subprocess.run(
|
||||
["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(
|
||||
"Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
|
||||
)
|
||||
time.sleep(3)
|
||||
raise Exception("Xcode is not installed. Please install Xcode and try again.")
|
||||
|
||||
# Define the path to the models directory
|
||||
models_dir = os.path.join(service_directory, "models")
|
||||
|
||||
# Check and create the models directory if it doesn't exist
|
||||
if not os.path.exists(models_dir):
|
||||
os.makedirs(models_dir)
|
||||
|
||||
# Define the path to the new llamafile
|
||||
llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
|
||||
|
||||
# Check if the new llamafile exists, if not download it
|
||||
if not os.path.exists(llamafile_path):
|
||||
print(
|
||||
"Attempting to download the `Phi-2` language model. This may take a few minutes."
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
|
||||
wget.download(url, llamafile_path)
|
||||
|
||||
|
||||
|
||||
# Make the new llamafile executable
|
||||
if platform.system() != "Windows":
|
||||
st = os.stat(llamafile_path)
|
||||
os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
|
||||
|
||||
# Run the new llamafile in the background
|
||||
if os.path.exists(llamafile_path):
|
||||
try:
|
||||
# Test if the llamafile is executable
|
||||
subprocess.check_call([llamafile_path])
|
||||
except subprocess.CalledProcessError:
|
||||
print("The llamafile is not executable. Please check the file permissions.")
|
||||
raise
|
||||
subprocess.Popen([llamafile_path, "-ngl", "9999"])
|
||||
else:
|
||||
error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
|
||||
print(error_message)
|
||||
print(error_message)
|
||||
|
||||
self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
|
||||
self.interpreter.offline = True
|
||||
|
||||
self.interpreter.llm.model = "local"
|
||||
self.interpreter.llm.temperature = 0
|
||||
self.interpreter.llm.api_base = "https://localhost:8080/v1"
|
||||
self.interpreter.llm.max_tokens = 1000
|
||||
self.interpreter.llm.context_window = 3000
|
||||
self.interpreter.llm.supports_functions = False
|
@ -0,0 +1,151 @@
|
||||
"""
|
||||
Defines a function which takes a path to an audio file and turns it into text.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import shutil
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
|
||||
class Stt:
|
||||
def __init__(self, config):
|
||||
service_directory = config["service_directory"]
|
||||
install(service_directory)
|
||||
|
||||
def stt(self, audio_file_path):
|
||||
return stt(audio_file_path)
|
||||
|
||||
|
||||
|
||||
def install(service_dir):
|
||||
|
||||
### INSTALL
|
||||
|
||||
WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
|
||||
if not os.path.exists(source_whisper_rust_path):
|
||||
print(f"Source directory does not exist: {source_whisper_rust_path}")
|
||||
exit(1)
|
||||
if not os.path.exists(WHISPER_RUST_PATH):
|
||||
shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
|
||||
|
||||
os.chdir(WHISPER_RUST_PATH)
|
||||
|
||||
# Check if whisper-rust executable exists before attempting to build
|
||||
if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
|
||||
# Check if Rust is installed. Needed to build whisper executable
|
||||
rust_check = subprocess.call('command -v rustc', shell=True)
|
||||
if rust_check != 0:
|
||||
print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
|
||||
exit(1)
|
||||
|
||||
# Build Whisper Rust executable if not found
|
||||
subprocess.call('cargo build --release', shell=True)
|
||||
else:
|
||||
print("Whisper Rust executable already exists. Skipping build.")
|
||||
|
||||
WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
|
||||
|
||||
WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
|
||||
WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
|
||||
|
||||
if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
|
||||
os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
|
||||
subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
|
||||
else:
|
||||
print("Whisper model already exists. Skipping download.")
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
print(mime_type, input_path, output_path)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f='s16le',
|
||||
ar='16000',
|
||||
ac=1,
|
||||
).output(output_path).run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
def get_transcription_file(wav_file_path: str):
|
||||
local_path = os.path.join(os.path.dirname(__file__), 'model')
|
||||
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release')
|
||||
model_name = os.getenv('WHISPER_MODEL_NAME')
|
||||
if not model_name:
|
||||
raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
|
||||
|
||||
output, error = run_command([
|
||||
os.path.join(whisper_rust_path, 'whisper-rust'),
|
||||
'--model-path', os.path.join(local_path, model_name),
|
||||
'--file-path', wav_file_path
|
||||
])
|
||||
|
||||
return output
|
||||
|
||||
def get_transcription_bytes(audio_bytes: bytearray, mime_type):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return get_transcription_file(wav_file_path)
|
||||
|
||||
def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return stt_wav(wav_file_path)
|
||||
|
||||
def stt_wav(wav_file_path: str):
|
||||
temp_dir = tempfile.gettempdir()
|
||||
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
try:
|
||||
transcript = get_transcription_file(output_path)
|
||||
finally:
|
||||
os.remove(output_path)
|
||||
return transcript
|
||||
|
||||
def stt(input_data, mime_type="audio/wav"):
|
||||
if isinstance(input_data, str):
|
||||
return stt_wav(input_data)
|
||||
elif isinstance(input_data, bytearray):
|
||||
return stt_bytes(input_data, mime_type)
|
||||
else:
|
||||
raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)")
|
@ -0,0 +1,30 @@
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
class Tts:
|
||||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
def tts(self, text):
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format="opus"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||
|
||||
return outfile
|
||||
|
||||
|
@ -0,0 +1,84 @@
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
import tarfile
|
||||
|
||||
class Tts:
|
||||
def __init__(self, config):
|
||||
self.piper_directory = ""
|
||||
self.install(config["service_directory"])
|
||||
|
||||
def tts(self, text):
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||
output_file = temp_file.name
|
||||
piper_dir = self.piper_directory
|
||||
subprocess.run([
|
||||
os.path.join(piper_dir, 'piper'),
|
||||
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')),
|
||||
'--output_file', output_file
|
||||
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||
|
||||
return outfile
|
||||
|
||||
def install(self, service_directory):
|
||||
PIPER_FOLDER_PATH = service_directory
|
||||
self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper')
|
||||
if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists
|
||||
os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
|
||||
|
||||
# Determine OS and architecture
|
||||
OS = os.uname().sysname
|
||||
ARCH = os.uname().machine
|
||||
if OS == "Darwin":
|
||||
OS = "macos"
|
||||
if ARCH == "arm64":
|
||||
ARCH = "aarch64"
|
||||
elif ARCH == "x86_64":
|
||||
ARCH = "x64"
|
||||
else:
|
||||
print("Piper: unsupported architecture")
|
||||
return
|
||||
|
||||
PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
|
||||
PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
|
||||
|
||||
# Download and extract Piper
|
||||
urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME))
|
||||
with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar:
|
||||
tar.extractall(path=PIPER_FOLDER_PATH)
|
||||
|
||||
PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/')
|
||||
PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')
|
||||
|
||||
# Download voice model and its json file
|
||||
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME))
|
||||
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"))
|
||||
|
||||
# Additional setup for macOS
|
||||
if OS == "macos":
|
||||
if ARCH == "x64":
|
||||
subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license'])
|
||||
|
||||
PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
|
||||
PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||
urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME))
|
||||
|
||||
with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar:
|
||||
tar.extractall(path=self.piper_directory)
|
||||
|
||||
PIPER_DIR = self.piper_directory
|
||||
subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"])
|
||||
subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"])
|
||||
subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"])
|
||||
|
||||
print("Piper setup completed.")
|
||||
else:
|
||||
print("Piper already set up. Skipping download.")
|
@ -1,98 +0,0 @@
|
||||
"""
|
||||
Defines a function which takes text and returns a path to an audio file.
|
||||
"""
|
||||
|
||||
from pydub import AudioSegment
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pydub import AudioSegment
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
chunk_size = 1024
|
||||
|
||||
def stream_tts(text):
|
||||
"""
|
||||
A generator that streams tts as LMC messages.
|
||||
"""
|
||||
if os.getenv('ALL_LOCAL') == 'False':
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format="opus"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||
with open(outfile, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
file_type = "bytes.raw"
|
||||
print(outfile, len(audio_bytes))
|
||||
os.remove(outfile)
|
||||
|
||||
else:
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||
output_file = temp_file.name
|
||||
piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
|
||||
subprocess.run([
|
||||
os.path.join(piper_dir, 'piper'),
|
||||
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
|
||||
'--output_file', output_file
|
||||
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||
with open(outfile, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
file_type = "bytes.raw"
|
||||
print(outfile, len(audio_bytes))
|
||||
os.remove(outfile)
|
||||
|
||||
# Stream the audio
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
|
||||
for i in range(0, len(audio_bytes), chunk_size):
|
||||
chunk = audio_bytes[i:i+chunk_size]
|
||||
yield chunk
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
|
||||
|
||||
def play_audiosegment(audio):
|
||||
"""
|
||||
UNUSED
|
||||
the default makes some pops. this fixes that
|
||||
"""
|
||||
|
||||
# Apply a fade-out (optional but recommended to smooth the end)
|
||||
audio = audio.fade_out(500)
|
||||
|
||||
# Add silence at the end
|
||||
silence_duration_ms = 500 # Duration of silence in milliseconds
|
||||
silence = AudioSegment.silent(duration=silence_duration_ms)
|
||||
audio_with_padding = audio + silence
|
||||
|
||||
# Save the modified audio as a WAV file for compatibility with simpleaudio
|
||||
audio_with_padding.export("output_audio.wav", format="wav")
|
||||
|
||||
# Load the processed WAV file
|
||||
wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
|
||||
|
||||
# Play the audio
|
||||
play_obj = wave_obj.play()
|
||||
|
||||
# Wait for the playback to finish
|
||||
play_obj.wait_done()
|
||||
|
||||
# Delete the wav file
|
||||
os.remove("output_audio.wav")
|
||||
|
@ -0,0 +1,57 @@
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
print(mime_type, input_path, output_path)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f='s16le',
|
||||
ar='16000',
|
||||
ac=1,
|
||||
).output(output_path).run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
|
||||
def bytes_to_wav(audio_bytes: bytearray, mime_type):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return wav_file_path
|
@ -0,0 +1,95 @@
|
||||
import typer
|
||||
import asyncio
|
||||
import platform
|
||||
import concurrent.futures
|
||||
import threading
|
||||
import os
|
||||
import signal
|
||||
import importlib
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
@app.command()
|
||||
def run(
|
||||
server: bool = typer.Option(False, "--server", help="Run server"),
|
||||
server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"),
|
||||
server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"),
|
||||
|
||||
tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"),
|
||||
expose: bool = typer.Option(False, "--expose", help="Expose server to internet"),
|
||||
|
||||
client: bool = typer.Option(False, "--client", help="Run client"),
|
||||
server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"),
|
||||
client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"),
|
||||
|
||||
llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"),
|
||||
|
||||
model: str = typer.Option("gpt-4", "--model", help="Specify the model"),
|
||||
llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"),
|
||||
llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"),
|
||||
context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"),
|
||||
max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"),
|
||||
temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"),
|
||||
|
||||
tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"),
|
||||
|
||||
stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"),
|
||||
|
||||
local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"),
|
||||
):
|
||||
|
||||
if local:
|
||||
tts_service = "piper"
|
||||
llm_service = "llamafile"
|
||||
stt_service = "local-whisper"
|
||||
|
||||
if not server_url:
|
||||
server_url = f"{server_host}:{server_port}"
|
||||
|
||||
if not server and not client:
|
||||
server = True
|
||||
client = True
|
||||
|
||||
def handle_exit(signum, frame):
|
||||
os._exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_exit)
|
||||
|
||||
if server:
|
||||
from .server.server import main
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),))
|
||||
server_thread.start()
|
||||
|
||||
if expose:
|
||||
#tunnel_thread = threading.Thread(target=tunnel_service, args=[server_port])
|
||||
#tunnel_thread.start()
|
||||
tunnel_thread = threading.Thread(target=os.system, args=("./tunnel.sh",))
|
||||
tunnel_thread.start()
|
||||
|
||||
if client:
|
||||
if client_type == "auto":
|
||||
system_type = platform.system()
|
||||
if system_type == "Darwin": # Mac OS
|
||||
client_type = "mac"
|
||||
elif system_type == "Linux": # Linux System
|
||||
try:
|
||||
with open('/proc/device-tree/model', 'r') as m:
|
||||
if 'raspberry pi' in m.read().lower():
|
||||
client_type = "rpi"
|
||||
else:
|
||||
client_type = "linux"
|
||||
except FileNotFoundError:
|
||||
client_type = "linux"
|
||||
|
||||
module = importlib.import_module(f".clients.{client_type}.device", package='01OS')
|
||||
client_thread = threading.Thread(target=module.main, args=[server_url])
|
||||
client_thread.start()
|
||||
|
||||
try:
|
||||
server_thread.join()
|
||||
tunnel_thread.join()
|
||||
client_thread.join()
|
||||
except KeyboardInterrupt:
|
||||
os.kill(os.getpid(), signal.SIGINT)
|
@ -1,31 +0,0 @@
|
||||
"""
|
||||
This is just for the Python package — we need a Python entrypoint.
|
||||
Just starts `start.sh` with all the same command line arguments. Aliased to 01.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import psutil
|
||||
import importlib
|
||||
# Can't import normally because it starts with a number
|
||||
process_utils = importlib.import_module("01OS.server.utils.process_utils")
|
||||
kill_process_tree = process_utils.kill_process_tree
|
||||
|
||||
def main():
|
||||
|
||||
# Get command line arguments
|
||||
args = sys.argv[1:]
|
||||
|
||||
# Get the directory of the current script
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
# Prepare the command
|
||||
command = [os.path.join(dir_path, 'start.sh')] + args
|
||||
|
||||
try:
|
||||
# Start start.sh using psutil for better process management, and to kill all processes
|
||||
psutil.Popen(command)
|
||||
except KeyboardInterrupt:
|
||||
print("Exiting...")
|
||||
kill_process_tree()
|
Loading…
Reference in new issue