`start.py`, modular architecture, OI flags, mutable items to user dir

pull/73/head
killian 10 months ago
parent 91fcb94438
commit 701d357e30

@ -66,6 +66,7 @@ class Device:
self.pressed_keys = set()
self.captured_images = []
self.audiosegments = []
self.server_url = ""
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -303,10 +304,7 @@ class Device:
async def start_async(self):
# Configuration for WebSocket
WS_URL = os.getenv('SERVER_URL')
if not WS_URL:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
WS_URL = f"ws://{self.server_url}"
# Start the WebSocket communication
asyncio.create_task(self.websocket_communication(WS_URL))

@ -0,0 +1,10 @@
from ..base_device import Device
device = Device()
def main(server_url):
device.server_url = server_url
device.start()
if __name__ == "__main__":
main()

@ -1,4 +0,0 @@
from ..base_device import Device
desktop_device = Device()
desktop_device.start()

@ -1,4 +1,9 @@
from ..base_device import Device
rpi_device = Device()
rpi_device.start()
device = Device()
def main():
device.start()
if __name__ == "__main__":
main()

@ -1,6 +0,0 @@
DEVICE=$(uname -n)
if [[ "$DEVICE" == "rpi" ]]; then
python -m 01OS.clients.rpi.device
else
python -m 01OS.clients.macos.device
fi

@ -1,6 +1,7 @@
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
from platformdirs import user_data_dir
import os
import glob
import json
@ -36,8 +37,11 @@ def configure_interpreter(interpreter: OpenInterpreter):
### RESET conversations/user.json
script_dir = os.path.dirname(os.path.abspath(__file__))
user_json_path = os.path.join(script_dir, 'conversations', 'user.json')
app_dir = user_data_dir('01')
conversations_dir = os.path.join(app_dir, 'conversations')
os.makedirs(conversations_dir, exist_ok=True)
user_json_path = os.path.join(conversations_dir, 'user.json')
with open(user_json_path, 'w') as file:
json.dump([], file)

@ -1,17 +1,17 @@
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
from platformdirs import user_data_dir
import ast
import json
import queue
import os
import traceback
from .utils.bytes_to_wav import bytes_to_wav
import re
from fastapi import FastAPI, Request
from fastapi.responses import PlainTextResponse
from starlette.websockets import WebSocket, WebSocketDisconnect
from .stt.stt import stt_bytes
from .tts.tts import stream_tts
from pathlib import Path
import asyncio
import urllib.parse
@ -28,7 +28,8 @@ accumulator = Accumulator()
app = FastAPI()
conversation_history_path = Path(__file__).parent / 'conversations' / 'user.json'
app_dir = user_data_dir('01')
conversation_history_path = os.path.join(app_dir, 'conversations', 'user.json')
SERVER_LOCAL_PORT = int(os.getenv('SERVER_LOCAL_PORT', 8000))
@ -198,7 +199,9 @@ async def listener():
# Convert bytes to audio file
# Format will be bytes.wav or bytes.opus
mime_type = "audio/" + message["format"].split(".")[1]
text = stt_bytes(message["content"], mime_type)
audio_file_path = bytes_to_wav(message["content"], mime_type)
text = stt(audio_file_path)
print(text)
message = {"role": "user", "type": "message", "content": text}
# At this point, we have only text messages
@ -335,30 +338,77 @@ async def stream_tts_to_device(sentence):
]
if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses:
return
for chunk in stream_tts(sentence):
await to_device.put(chunk)
def stream_tts(sentence):
audio_file = tts(sentence)
with open(audio_file, "rb") as f:
audio_bytes = f.read()
os.remove(audio_file)
file_type = "bytes.raw"
chunk_size = 1024
# Stream the audio
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
for i in range(0, len(audio_bytes), chunk_size):
chunk = audio_bytes[i:i+chunk_size]
yield chunk
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
from uvicorn import Config, Server
import os
import platform
from importlib import import_module
# Run the FastAPI app
if __name__ == "__main__":
async def main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service):
# Setup services
application_directory = user_data_dir('01')
services_directory = os.path.join(application_directory, 'services')
async def main():
if os.getenv('TEACH_MODE') == "True":
teach()
else:
# Start listening
asyncio.create_task(listener())
service_dict = {'llm': llm_service, 'tts': tts_service, 'stt': stt_service}
# Start watching the kernel if it's your job to do that
if os.getenv('CODE_RUNNER') == "server":
asyncio.create_task(put_kernel_messages_into_queue(from_computer))
# Start the server
logger.info("Starting `server.py`... on localhost:" + str(SERVER_LOCAL_PORT))
for service in service_dict:
config = Config(app, host="localhost", port=SERVER_LOCAL_PORT, lifespan='on')
server = Server(config)
await server.serve()
service_directory = os.path.join(services_directory, service, service_dict[service])
# This is the folder they can mess around in
config = {"service_directory": service_directory}
if service == "llm":
config.update({
"interpreter": interpreter,
"model": model,
"llm_supports_vision": llm_supports_vision,
"llm_supports_functions": llm_supports_functions,
"context_window": context_window,
"max_tokens": max_tokens,
"temperature": temperature
})
module = import_module(f'.server.services.{service}.{service_dict[service]}.{service}', package='01OS')
ServiceClass = getattr(module, service.capitalize())
service_instance = ServiceClass(config)
globals()[service] = getattr(service_instance, service)
interpreter.llm.completions = llm
# Start listening
asyncio.create_task(listener())
# Start watching the kernel if it's your job to do that
if True: # in the future, code can run on device. for now, just server.
asyncio.create_task(put_kernel_messages_into_queue(from_computer))
config = Config(app, host=server_host, port=int(server_port), lifespan='on')
server = Server(config)
await server.serve()
# Run the FastAPI app
if __name__ == "__main__":
asyncio.run(main())

@ -0,0 +1,15 @@
class Llm:
def __init__(self, config):
# Litellm is used by OI by default, so we just modify OI
interpreter = config["interpreter"]
config.pop("interpreter", None)
config.pop("service_directory", None)
for key, value in config.items():
setattr(interpreter, key.replace("-", "_"), value)
self.llm = interpreter.llm.completions

@ -0,0 +1,49 @@
import os
import subprocess
import requests
import json
class Llm:
def __init__(self, config):
self.install(config["service_directory"])
def install(self, service_directory):
LLM_FOLDER_PATH = service_directory
self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm')
if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
# Install WasmEdge
subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml'])
# Download the Qwen1.5-0.5B-Chat model GGUF file
MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory)
# Download the llama-api-server.wasm app
APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory)
# Run the API server
subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory)
print("LLM setup completed.")
else:
print("LLM already set up. Skipping download.")
def llm(self, messages):
url = "http://localhost:8080/v1/chat/completions"
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
data = {
"messages": messages,
"model": "llama-2-chat"
}
with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response:
for line in response.iter_lines():
if line:
yield json.loads(line)

@ -0,0 +1,84 @@
import os
import platform
import subprocess
import time
import wget
import stat
class Llm:
def __init__(self, config):
self.interpreter = config["interpreter"]
config.pop("interpreter", None)
self.install(config["service_directory"])
config.pop("service_directory", None)
for key, value in config.items():
setattr(self.interpreter, key.replace("-", "_"), value)
self.llm = self.interpreter.llm.completions
def install(self, service_directory):
if platform.system() == "Darwin": # Check if the system is MacOS
result = subprocess.run(
["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
if result.returncode != 0:
print(
"Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
)
time.sleep(3)
raise Exception("Xcode is not installed. Please install Xcode and try again.")
# Define the path to the models directory
models_dir = os.path.join(service_directory, "models")
# Check and create the models directory if it doesn't exist
if not os.path.exists(models_dir):
os.makedirs(models_dir)
# Define the path to the new llamafile
llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
# Check if the new llamafile exists, if not download it
if not os.path.exists(llamafile_path):
print(
"Attempting to download the `Phi-2` language model. This may take a few minutes."
)
time.sleep(3)
url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
wget.download(url, llamafile_path)
# Make the new llamafile executable
if platform.system() != "Windows":
st = os.stat(llamafile_path)
os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
# Run the new llamafile in the background
if os.path.exists(llamafile_path):
try:
# Test if the llamafile is executable
subprocess.check_call([llamafile_path])
except subprocess.CalledProcessError:
print("The llamafile is not executable. Please check the file permissions.")
raise
subprocess.Popen([llamafile_path, "-ngl", "9999"])
else:
error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
print(error_message)
print(error_message)
self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
self.interpreter.offline = True
self.interpreter.llm.model = "local"
self.interpreter.llm.temperature = 0
self.interpreter.llm.api_base = "https://localhost:8080/v1"
self.interpreter.llm.max_tokens = 1000
self.interpreter.llm.context_window = 3000
self.interpreter.llm.supports_functions = False

@ -0,0 +1,151 @@
"""
Defines a function which takes a path to an audio file and turns it into text.
"""
from datetime import datetime
import os
import contextlib
import tempfile
import shutil
import ffmpeg
import subprocess
import os
import subprocess
class Stt:
def __init__(self, config):
service_directory = config["service_directory"]
install(service_directory)
def stt(self, audio_file_path):
return stt(audio_file_path)
def install(service_dir):
### INSTALL
WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
script_dir = os.path.dirname(os.path.realpath(__file__))
source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
if not os.path.exists(source_whisper_rust_path):
print(f"Source directory does not exist: {source_whisper_rust_path}")
exit(1)
if not os.path.exists(WHISPER_RUST_PATH):
shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
os.chdir(WHISPER_RUST_PATH)
# Check if whisper-rust executable exists before attempting to build
if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
# Check if Rust is installed. Needed to build whisper executable
rust_check = subprocess.call('command -v rustc', shell=True)
if rust_check != 0:
print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
exit(1)
# Build Whisper Rust executable if not found
subprocess.call('cargo build --release', shell=True)
else:
print("Whisper Rust executable already exists. Skipping build.")
WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
else:
print("Whisper model already exists. Skipping download.")
def convert_mime_type_to_format(mime_type: str) -> str:
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
return "wav"
if mime_type == "audio/webm":
return "webm"
if mime_type == "audio/raw":
return "dat"
return mime_type
@contextlib.contextmanager
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
temp_dir = tempfile.gettempdir()
# Create a temporary file with the appropriate extension
input_ext = convert_mime_type_to_format(mime_type)
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
with open(input_path, 'wb') as f:
f.write(audio)
# Check if the input file exists
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
# Export to wav
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
print(mime_type, input_path, output_path)
if mime_type == "audio/raw":
ffmpeg.input(
input_path,
f='s16le',
ar='16000',
ac=1,
).output(output_path).run()
else:
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try:
yield output_path
finally:
os.remove(input_path)
os.remove(output_path)
def run_command(command):
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return result.stdout, result.stderr
def get_transcription_file(wav_file_path: str):
local_path = os.path.join(os.path.dirname(__file__), 'model')
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release')
model_name = os.getenv('WHISPER_MODEL_NAME')
if not model_name:
raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
output, error = run_command([
os.path.join(whisper_rust_path, 'whisper-rust'),
'--model-path', os.path.join(local_path, model_name),
'--file-path', wav_file_path
])
return output
def get_transcription_bytes(audio_bytes: bytearray, mime_type):
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
return get_transcription_file(wav_file_path)
def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
return stt_wav(wav_file_path)
def stt_wav(wav_file_path: str):
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try:
transcript = get_transcription_file(output_path)
finally:
os.remove(output_path)
return transcript
def stt(input_data, mime_type="audio/wav"):
if isinstance(input_data, str):
return stt_wav(input_data)
elif isinstance(input_data, bytearray):
return stt_bytes(input_data, mime_type)
else:
raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)")

@ -1,9 +1,11 @@
"""
Defines a function which takes a path to an audio file and turns it into text.
"""
class Stt:
def __init__(self, config):
pass
def stt(self, audio_file_path):
return stt(audio_file_path)
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
from datetime import datetime
import os
@ -14,9 +16,6 @@ import subprocess
import openai
from openai import OpenAI
from ..utils.logs import setup_logging
from ..utils.logs import logger
setup_logging()
client = OpenAI()
@ -91,28 +90,18 @@ def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
def stt_wav(wav_file_path: str):
if os.getenv('ALL_LOCAL') == 'False':
audio_file = open(wav_file_path, "rb")
try:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
except openai.BadRequestError as e:
logger.info(f"openai.BadRequestError: {e}")
return None
return transcript
else:
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try:
transcript = get_transcription_file(output_path)
finally:
os.remove(output_path)
return transcript
audio_file = open(wav_file_path, "rb")
try:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
except openai.BadRequestError as e:
print(f"openai.BadRequestError: {e}")
return None
return transcript
def stt(input_data, mime_type="audio/wav"):
if isinstance(input_data, str):

@ -0,0 +1,30 @@
import ffmpeg
import tempfile
from openai import OpenAI
import os
import subprocess
import tempfile
client = OpenAI()
class Tts:
def __init__(self, config):
pass
def tts(self, text):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
response_format="opus"
)
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
response.stream_to_file(temp_file.name)
# TODO: hack to format audio correctly for device
outfile = tempfile.gettempdir() + "/" + "raw.dat"
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
return outfile

@ -0,0 +1,84 @@
import ffmpeg
import tempfile
import os
import subprocess
import tempfile
import urllib.request
import tarfile
class Tts:
def __init__(self, config):
self.piper_directory = ""
self.install(config["service_directory"])
def tts(self, text):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = self.piper_directory
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')),
'--output_file', output_file
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# TODO: hack to format audio correctly for device
outfile = tempfile.gettempdir() + "/" + "raw.dat"
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
return outfile
def install(self, service_directory):
PIPER_FOLDER_PATH = service_directory
self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper')
if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists
os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
# Determine OS and architecture
OS = os.uname().sysname
ARCH = os.uname().machine
if OS == "Darwin":
OS = "macos"
if ARCH == "arm64":
ARCH = "aarch64"
elif ARCH == "x86_64":
ARCH = "x64"
else:
print("Piper: unsupported architecture")
return
PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
# Download and extract Piper
urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME))
with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar:
tar.extractall(path=PIPER_FOLDER_PATH)
PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/')
PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')
# Download voice model and its json file
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME))
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"))
# Additional setup for macOS
if OS == "macos":
if ARCH == "x64":
subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license'])
PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME))
with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar:
tar.extractall(path=self.piper_directory)
PIPER_DIR = self.piper_directory
subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"])
subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"])
subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"])
print("Piper setup completed.")
else:
print("Piper already set up. Skipping download.")

@ -1,98 +0,0 @@
"""
Defines a function which takes text and returns a path to an audio file.
"""
from pydub import AudioSegment
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
import ffmpeg
import tempfile
from openai import OpenAI
import os
import subprocess
import tempfile
from pydub import AudioSegment
client = OpenAI()
chunk_size = 1024
def stream_tts(text):
"""
A generator that streams tts as LMC messages.
"""
if os.getenv('ALL_LOCAL') == 'False':
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
response_format="opus"
)
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
response.stream_to_file(temp_file.name)
# TODO: hack to format audio correctly for device
outfile = tempfile.gettempdir() + "/" + "raw.dat"
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
with open(outfile, "rb") as f:
audio_bytes = f.read()
file_type = "bytes.raw"
print(outfile, len(audio_bytes))
os.remove(outfile)
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
'--output_file', output_file
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# TODO: hack to format audio correctly for device
outfile = tempfile.gettempdir() + "/" + "raw.dat"
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
with open(outfile, "rb") as f:
audio_bytes = f.read()
file_type = "bytes.raw"
print(outfile, len(audio_bytes))
os.remove(outfile)
# Stream the audio
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
for i in range(0, len(audio_bytes), chunk_size):
chunk = audio_bytes[i:i+chunk_size]
yield chunk
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
def play_audiosegment(audio):
"""
UNUSED
the default makes some pops. this fixes that
"""
# Apply a fade-out (optional but recommended to smooth the end)
audio = audio.fade_out(500)
# Add silence at the end
silence_duration_ms = 500 # Duration of silence in milliseconds
silence = AudioSegment.silent(duration=silence_duration_ms)
audio_with_padding = audio + silence
# Save the modified audio as a WAV file for compatibility with simpleaudio
audio_with_padding.export("output_audio.wav", format="wav")
# Load the processed WAV file
wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
# Play the audio
play_obj = wave_obj.play()
# Wait for the playback to finish
play_obj.wait_done()
# Delete the wav file
os.remove("output_audio.wav")

@ -0,0 +1,57 @@
from datetime import datetime
import os
import contextlib
import tempfile
import ffmpeg
import subprocess
def convert_mime_type_to_format(mime_type: str) -> str:
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
return "wav"
if mime_type == "audio/webm":
return "webm"
if mime_type == "audio/raw":
return "dat"
return mime_type
@contextlib.contextmanager
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
temp_dir = tempfile.gettempdir()
# Create a temporary file with the appropriate extension
input_ext = convert_mime_type_to_format(mime_type)
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
with open(input_path, 'wb') as f:
f.write(audio)
# Check if the input file exists
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
# Export to wav
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
print(mime_type, input_path, output_path)
if mime_type == "audio/raw":
ffmpeg.input(
input_path,
f='s16le',
ar='16000',
ac=1,
).output(output_path).run()
else:
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try:
yield output_path
finally:
os.remove(input_path)
os.remove(output_path)
def run_command(command):
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return result.stdout, result.stderr
def bytes_to_wav(audio_bytes: bytearray, mime_type):
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
return wav_file_path

@ -0,0 +1,95 @@
import typer
import asyncio
import platform
import concurrent.futures
import threading
import os
import signal
import importlib
app = typer.Typer()
@app.command()
def run(
server: bool = typer.Option(False, "--server", help="Run server"),
server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"),
server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"),
tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"),
expose: bool = typer.Option(False, "--expose", help="Expose server to internet"),
client: bool = typer.Option(False, "--client", help="Run client"),
server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"),
client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"),
llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"),
model: str = typer.Option("gpt-4", "--model", help="Specify the model"),
llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"),
llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"),
context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"),
max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"),
temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"),
tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"),
stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"),
local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"),
):
if local:
tts_service = "piper"
llm_service = "llamafile"
stt_service = "local-whisper"
if not server_url:
server_url = f"{server_host}:{server_port}"
if not server and not client:
server = True
client = True
def handle_exit(signum, frame):
os._exit(0)
signal.signal(signal.SIGINT, handle_exit)
if server:
from .server.server import main
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),))
server_thread.start()
if expose:
#tunnel_thread = threading.Thread(target=tunnel_service, args=[server_port])
#tunnel_thread.start()
tunnel_thread = threading.Thread(target=os.system, args=("./tunnel.sh",))
tunnel_thread.start()
if client:
if client_type == "auto":
system_type = platform.system()
if system_type == "Darwin": # Mac OS
client_type = "mac"
elif system_type == "Linux": # Linux System
try:
with open('/proc/device-tree/model', 'r') as m:
if 'raspberry pi' in m.read().lower():
client_type = "rpi"
else:
client_type = "linux"
except FileNotFoundError:
client_type = "linux"
module = importlib.import_module(f".clients.{client_type}.device", package='01OS')
client_thread = threading.Thread(target=module.main, args=[server_url])
client_thread.start()
try:
server_thread.join()
tunnel_thread.join()
client_thread.join()
except KeyboardInterrupt:
os.kill(os.getpid(), signal.SIGINT)

@ -5,5 +5,5 @@ pip install 01OS
```
```bash
01 # Runs the 01 server and client.
01 # Runs the 01 server and client
```

2
01OS/poetry.lock generated

@ -8400,4 +8400,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<3.12"
content-hash = "4e7112e334cb1610550bcc44ab5f0a257621d774513c24034d60272b741caf51"
content-hash = "f582fa2573961a7bca4df34f7bf62bcbda856e57697f5e3daad6603ce2bc0589"

@ -27,13 +27,15 @@ simpleaudio = "^1.0.4"
opencv-python = "^4.9.0.80"
open-interpreter = {version = "0.2.1rc1", extras = ["os"]}
psutil = "^5.9.8"
typer = "^0.9.0"
platformdirs = "^4.2.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
01 = "start:main"
01 = "01OS.start:app"
[tool.poetry.group.dev.dependencies]
black = "^23.10.1"

@ -1,31 +0,0 @@
"""
This is just for the Python package we need a Python entrypoint.
Just starts `start.sh` with all the same command line arguments. Aliased to 01.
"""
import os
import subprocess
import sys
import psutil
import importlib
# Can't import normally because it starts with a number
process_utils = importlib.import_module("01OS.server.utils.process_utils")
kill_process_tree = process_utils.kill_process_tree
def main():
# Get command line arguments
args = sys.argv[1:]
# Get the directory of the current script
dir_path = os.path.dirname(os.path.realpath(__file__))
# Prepare the command
command = [os.path.join(dir_path, 'start.sh')] + args
try:
# Start start.sh using psutil for better process management, and to kill all processes
psutil.Popen(command)
except KeyboardInterrupt:
print("Exiting...")
kill_process_tree()

@ -28,11 +28,10 @@ If you want to run local speech-to-text using Whisper, install Rust. Follow the
pip install 01OS
```
**Run the 01 end-to-end:**
**Run the 01:**
```bash
01 # This will run a server + attempt to determine and run a client.
# (Behavior can be modified by changing the contents of `.env`)
01 # This will run the server and attempt to determine and run a client.
```
**Expose an 01 Server Publicly**
@ -40,6 +39,7 @@ pip install 01OS
We currently support exposing the 01 server publicly via a couple of different tunnel services:
- **bore.pub** ([GitHub](https://github.com/ekzhang/bore))
- **Requirements:** Ensure that Rust is installed ([Rust Installation](https://www.rust-lang.org/tools/install)), then run:
```
cargo install bore-cli
@ -50,6 +50,7 @@ We currently support exposing the 01 server publicly via a couple of different t
```
- **localtunnel** ([GitHub](https://github.com/localtunnel/localtunnel))
- **Requirements:** Ensure that Node.js is installed ([Node.js Download](https://nodejs.org/en/download)), then run:
```
npm install -g localtunnel
@ -69,7 +70,6 @@ We currently support exposing the 01 server publicly via a couple of different t
01 --server --expose-with-ngrok
```
**Run a specific client:**
```bash

Loading…
Cancel
Save