diff --git a/01OS/01OS/clients/base_device.py b/01OS/01OS/clients/base_device.py index 0ba3024..7674d42 100644 --- a/01OS/01OS/clients/base_device.py +++ b/01OS/01OS/clients/base_device.py @@ -66,6 +66,7 @@ class Device: self.pressed_keys = set() self.captured_images = [] self.audiosegments = [] + self.server_url = "" def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX): """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list.""" @@ -303,10 +304,7 @@ class Device: async def start_async(self): # Configuration for WebSocket - WS_URL = os.getenv('SERVER_URL') - if not WS_URL: - raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.") - + WS_URL = f"ws://{self.server_url}" # Start the WebSocket communication asyncio.create_task(self.websocket_communication(WS_URL)) diff --git a/01OS/01OS/clients/macos/__init__.py b/01OS/01OS/clients/mac/__init__.py similarity index 100% rename from 01OS/01OS/clients/macos/__init__.py rename to 01OS/01OS/clients/mac/__init__.py diff --git a/01OS/01OS/clients/mac/device.py b/01OS/01OS/clients/mac/device.py new file mode 100644 index 0000000..a9a79c0 --- /dev/null +++ b/01OS/01OS/clients/mac/device.py @@ -0,0 +1,10 @@ +from ..base_device import Device + +device = Device() + +def main(server_url): + device.server_url = server_url + device.start() + +if __name__ == "__main__": + main() diff --git a/01OS/01OS/clients/macos/device.py b/01OS/01OS/clients/macos/device.py deleted file mode 100644 index 0760ea1..0000000 --- a/01OS/01OS/clients/macos/device.py +++ /dev/null @@ -1,4 +0,0 @@ -from ..base_device import Device - -desktop_device = Device() -desktop_device.start() \ No newline at end of file diff --git a/01OS/01OS/clients/rpi/device.py b/01OS/01OS/clients/rpi/device.py index fe16031..279822f 100644 --- a/01OS/01OS/clients/rpi/device.py +++ b/01OS/01OS/clients/rpi/device.py @@ -1,4 +1,9 @@ from ..base_device import Device -rpi_device = Device() -rpi_device.start() \ No newline at end of file +device = Device() + +def main(): + device.start() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/01OS/01OS/clients/start.sh b/01OS/01OS/clients/start.sh deleted file mode 100644 index d602a78..0000000 --- a/01OS/01OS/clients/start.sh +++ /dev/null @@ -1,6 +0,0 @@ -DEVICE=$(uname -n) -if [[ "$DEVICE" == "rpi" ]]; then - python -m 01OS.clients.rpi.device -else - python -m 01OS.clients.macos.device -fi diff --git a/01OS/01OS/server/i.py b/01OS/01OS/server/i.py index a25c612..6013fba 100644 --- a/01OS/01OS/server/i.py +++ b/01OS/01OS/server/i.py @@ -1,6 +1,7 @@ from dotenv import load_dotenv load_dotenv() # take environment variables from .env. +from platformdirs import user_data_dir import os import glob import json @@ -36,8 +37,11 @@ def configure_interpreter(interpreter: OpenInterpreter): ### RESET conversations/user.json - script_dir = os.path.dirname(os.path.abspath(__file__)) - user_json_path = os.path.join(script_dir, 'conversations', 'user.json') + + app_dir = user_data_dir('01') + conversations_dir = os.path.join(app_dir, 'conversations') + os.makedirs(conversations_dir, exist_ok=True) + user_json_path = os.path.join(conversations_dir, 'user.json') with open(user_json_path, 'w') as file: json.dump([], file) diff --git a/01OS/01OS/server/server.py b/01OS/01OS/server/server.py index c132b8a..f53df33 100644 --- a/01OS/01OS/server/server.py +++ b/01OS/01OS/server/server.py @@ -1,17 +1,17 @@ from dotenv import load_dotenv load_dotenv() # take environment variables from .env. +from platformdirs import user_data_dir import ast import json import queue import os import traceback +from .utils.bytes_to_wav import bytes_to_wav import re from fastapi import FastAPI, Request from fastapi.responses import PlainTextResponse from starlette.websockets import WebSocket, WebSocketDisconnect -from .stt.stt import stt_bytes -from .tts.tts import stream_tts from pathlib import Path import asyncio import urllib.parse @@ -28,7 +28,8 @@ accumulator = Accumulator() app = FastAPI() -conversation_history_path = Path(__file__).parent / 'conversations' / 'user.json' +app_dir = user_data_dir('01') +conversation_history_path = os.path.join(app_dir, 'conversations', 'user.json') SERVER_LOCAL_PORT = int(os.getenv('SERVER_LOCAL_PORT', 8000)) @@ -198,7 +199,9 @@ async def listener(): # Convert bytes to audio file # Format will be bytes.wav or bytes.opus mime_type = "audio/" + message["format"].split(".")[1] - text = stt_bytes(message["content"], mime_type) + audio_file_path = bytes_to_wav(message["content"], mime_type) + text = stt(audio_file_path) + print(text) message = {"role": "user", "type": "message", "content": text} # At this point, we have only text messages @@ -335,30 +338,77 @@ async def stream_tts_to_device(sentence): ] if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses: return + for chunk in stream_tts(sentence): await to_device.put(chunk) +def stream_tts(sentence): + + audio_file = tts(sentence) + + with open(audio_file, "rb") as f: + audio_bytes = f.read() + os.remove(audio_file) + + file_type = "bytes.raw" + chunk_size = 1024 + + # Stream the audio + yield {"role": "assistant", "type": "audio", "format": file_type, "start": True} + for i in range(0, len(audio_bytes), chunk_size): + chunk = audio_bytes[i:i+chunk_size] + yield chunk + yield {"role": "assistant", "type": "audio", "format": file_type, "end": True} + from uvicorn import Config, Server +import os +import platform +from importlib import import_module -# Run the FastAPI app -if __name__ == "__main__": +async def main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service): + + # Setup services + application_directory = user_data_dir('01') + services_directory = os.path.join(application_directory, 'services') - async def main(): - if os.getenv('TEACH_MODE') == "True": - teach() - else: - # Start listening - asyncio.create_task(listener()) + service_dict = {'llm': llm_service, 'tts': tts_service, 'stt': stt_service} - # Start watching the kernel if it's your job to do that - if os.getenv('CODE_RUNNER') == "server": - asyncio.create_task(put_kernel_messages_into_queue(from_computer)) - - # Start the server - logger.info("Starting `server.py`... on localhost:" + str(SERVER_LOCAL_PORT)) + for service in service_dict: - config = Config(app, host="localhost", port=SERVER_LOCAL_PORT, lifespan='on') - server = Server(config) - await server.serve() + service_directory = os.path.join(services_directory, service, service_dict[service]) + # This is the folder they can mess around in + config = {"service_directory": service_directory} + + if service == "llm": + config.update({ + "interpreter": interpreter, + "model": model, + "llm_supports_vision": llm_supports_vision, + "llm_supports_functions": llm_supports_functions, + "context_window": context_window, + "max_tokens": max_tokens, + "temperature": temperature + }) + + module = import_module(f'.server.services.{service}.{service_dict[service]}.{service}', package='01OS') + ServiceClass = getattr(module, service.capitalize()) + service_instance = ServiceClass(config) + globals()[service] = getattr(service_instance, service) + + interpreter.llm.completions = llm + + # Start listening + asyncio.create_task(listener()) + + # Start watching the kernel if it's your job to do that + if True: # in the future, code can run on device. for now, just server. + asyncio.create_task(put_kernel_messages_into_queue(from_computer)) + + config = Config(app, host=server_host, port=int(server_port), lifespan='on') + server = Server(config) + await server.serve() + +# Run the FastAPI app +if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file diff --git a/01OS/01OS/server/llm/__init__.py b/01OS/01OS/server/services/__init__.py similarity index 100% rename from 01OS/01OS/server/llm/__init__.py rename to 01OS/01OS/server/services/__init__.py diff --git a/01OS/01OS/server/stt/__init__.py b/01OS/01OS/server/services/llm/__init__.py similarity index 100% rename from 01OS/01OS/server/stt/__init__.py rename to 01OS/01OS/server/services/llm/__init__.py diff --git a/01OS/01OS/server/services/llm/litellm/__init__.py b/01OS/01OS/server/services/llm/litellm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/llm/litellm/llm.py b/01OS/01OS/server/services/llm/litellm/llm.py new file mode 100644 index 0000000..906308b --- /dev/null +++ b/01OS/01OS/server/services/llm/litellm/llm.py @@ -0,0 +1,15 @@ +class Llm: + def __init__(self, config): + + # Litellm is used by OI by default, so we just modify OI + + interpreter = config["interpreter"] + config.pop("interpreter", None) + config.pop("service_directory", None) + for key, value in config.items(): + setattr(interpreter, key.replace("-", "_"), value) + + self.llm = interpreter.llm.completions + + + diff --git a/01OS/01OS/server/services/llm/llamaedge/__init__.py b/01OS/01OS/server/services/llm/llamaedge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/llm/llamaedge/llm.py b/01OS/01OS/server/services/llm/llamaedge/llm.py new file mode 100644 index 0000000..fa77abf --- /dev/null +++ b/01OS/01OS/server/services/llm/llamaedge/llm.py @@ -0,0 +1,49 @@ +import os +import subprocess +import requests +import json + +class Llm: + def __init__(self, config): + self.install(config["service_directory"]) + + def install(self, service_directory): + LLM_FOLDER_PATH = service_directory + self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm') + if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists + os.makedirs(LLM_FOLDER_PATH, exist_ok=True) + + # Install WasmEdge + subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml']) + + # Download the Qwen1.5-0.5B-Chat model GGUF file + MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf" + subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory) + + # Download the llama-api-server.wasm app + APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm" + subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory) + + # Run the API server + subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory) + + print("LLM setup completed.") + else: + print("LLM already set up. Skipping download.") + + def llm(self, messages): + url = "http://localhost:8080/v1/chat/completions" + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json' + } + data = { + "messages": messages, + "model": "llama-2-chat" + } + with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response: + for line in response.iter_lines(): + if line: + yield json.loads(line) + + diff --git a/01OS/01OS/server/services/llm/llamafile/__init__.py b/01OS/01OS/server/services/llm/llamafile/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/llm/llamafile/llm.py b/01OS/01OS/server/services/llm/llamafile/llm.py new file mode 100644 index 0000000..3e8e8e4 --- /dev/null +++ b/01OS/01OS/server/services/llm/llamafile/llm.py @@ -0,0 +1,84 @@ +import os +import platform +import subprocess +import time +import wget +import stat + +class Llm: + def __init__(self, config): + + self.interpreter = config["interpreter"] + config.pop("interpreter", None) + + self.install(config["service_directory"]) + + config.pop("service_directory", None) + for key, value in config.items(): + setattr(self.interpreter, key.replace("-", "_"), value) + + self.llm = self.interpreter.llm.completions + + def install(self, service_directory): + + if platform.system() == "Darwin": # Check if the system is MacOS + result = subprocess.run( + ["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + if result.returncode != 0: + print( + "Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ." + ) + time.sleep(3) + raise Exception("Xcode is not installed. Please install Xcode and try again.") + + # Define the path to the models directory + models_dir = os.path.join(service_directory, "models") + + # Check and create the models directory if it doesn't exist + if not os.path.exists(models_dir): + os.makedirs(models_dir) + + # Define the path to the new llamafile + llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile") + + # Check if the new llamafile exists, if not download it + if not os.path.exists(llamafile_path): + print( + "Attempting to download the `Phi-2` language model. This may take a few minutes." + ) + time.sleep(3) + + url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile" + wget.download(url, llamafile_path) + + + + # Make the new llamafile executable + if platform.system() != "Windows": + st = os.stat(llamafile_path) + os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC) + + # Run the new llamafile in the background + if os.path.exists(llamafile_path): + try: + # Test if the llamafile is executable + subprocess.check_call([llamafile_path]) + except subprocess.CalledProcessError: + print("The llamafile is not executable. Please check the file permissions.") + raise + subprocess.Popen([llamafile_path, "-ngl", "9999"]) + else: + error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again." + print(error_message) + print(error_message) + + self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine." + self.interpreter.offline = True + + self.interpreter.llm.model = "local" + self.interpreter.llm.temperature = 0 + self.interpreter.llm.api_base = "https://localhost:8080/v1" + self.interpreter.llm.max_tokens = 1000 + self.interpreter.llm.context_window = 3000 + self.interpreter.llm.supports_functions = False \ No newline at end of file diff --git a/01OS/01OS/server/services/stt/__init__.py b/01OS/01OS/server/services/stt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/stt/local-whisper/__init__.py b/01OS/01OS/server/services/stt/local-whisper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/stt/local-whisper/stt.py b/01OS/01OS/server/services/stt/local-whisper/stt.py new file mode 100644 index 0000000..e7bf150 --- /dev/null +++ b/01OS/01OS/server/services/stt/local-whisper/stt.py @@ -0,0 +1,151 @@ +""" +Defines a function which takes a path to an audio file and turns it into text. +""" + +from datetime import datetime +import os +import contextlib +import tempfile +import shutil +import ffmpeg +import subprocess + +import os +import subprocess + + +class Stt: + def __init__(self, config): + service_directory = config["service_directory"] + install(service_directory) + + def stt(self, audio_file_path): + return stt(audio_file_path) + + + +def install(service_dir): + + ### INSTALL + + WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust") + script_dir = os.path.dirname(os.path.realpath(__file__)) + source_whisper_rust_path = os.path.join(script_dir, "whisper-rust") + if not os.path.exists(source_whisper_rust_path): + print(f"Source directory does not exist: {source_whisper_rust_path}") + exit(1) + if not os.path.exists(WHISPER_RUST_PATH): + shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH) + + os.chdir(WHISPER_RUST_PATH) + + # Check if whisper-rust executable exists before attempting to build + if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")): + # Check if Rust is installed. Needed to build whisper executable + rust_check = subprocess.call('command -v rustc', shell=True) + if rust_check != 0: + print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.") + exit(1) + + # Build Whisper Rust executable if not found + subprocess.call('cargo build --release', shell=True) + else: + print("Whisper Rust executable already exists. Skipping build.") + + WHISPER_MODEL_PATH = os.path.join(service_dir, "model") + + WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin') + WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/') + + if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)): + os.makedirs(WHISPER_MODEL_PATH, exist_ok=True) + subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True) + else: + print("Whisper model already exists. Skipping download.") + +def convert_mime_type_to_format(mime_type: str) -> str: + if mime_type == "audio/x-wav" or mime_type == "audio/wav": + return "wav" + if mime_type == "audio/webm": + return "webm" + if mime_type == "audio/raw": + return "dat" + + return mime_type + +@contextlib.contextmanager +def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: + temp_dir = tempfile.gettempdir() + + # Create a temporary file with the appropriate extension + input_ext = convert_mime_type_to_format(mime_type) + input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}") + with open(input_path, 'wb') as f: + f.write(audio) + + # Check if the input file exists + assert os.path.exists(input_path), f"Input file does not exist: {input_path}" + + # Export to wav + output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") + print(mime_type, input_path, output_path) + if mime_type == "audio/raw": + ffmpeg.input( + input_path, + f='s16le', + ar='16000', + ac=1, + ).output(output_path).run() + else: + ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() + + try: + yield output_path + finally: + os.remove(input_path) + os.remove(output_path) + +def run_command(command): + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + return result.stdout, result.stderr + +def get_transcription_file(wav_file_path: str): + local_path = os.path.join(os.path.dirname(__file__), 'model') + whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release') + model_name = os.getenv('WHISPER_MODEL_NAME') + if not model_name: + raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.") + + output, error = run_command([ + os.path.join(whisper_rust_path, 'whisper-rust'), + '--model-path', os.path.join(local_path, model_name), + '--file-path', wav_file_path + ]) + + return output + +def get_transcription_bytes(audio_bytes: bytearray, mime_type): + with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path: + return get_transcription_file(wav_file_path) + +def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"): + with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path: + return stt_wav(wav_file_path) + +def stt_wav(wav_file_path: str): + temp_dir = tempfile.gettempdir() + output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") + ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() + try: + transcript = get_transcription_file(output_path) + finally: + os.remove(output_path) + return transcript + +def stt(input_data, mime_type="audio/wav"): + if isinstance(input_data, str): + return stt_wav(input_data) + elif isinstance(input_data, bytearray): + return stt_bytes(input_data, mime_type) + else: + raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)") \ No newline at end of file diff --git a/01OS/01OS/server/stt/whisper-rust/.gitignore b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/.gitignore similarity index 100% rename from 01OS/01OS/server/stt/whisper-rust/.gitignore rename to 01OS/01OS/server/services/stt/local-whisper/whisper-rust/.gitignore diff --git a/01OS/01OS/server/stt/whisper-rust/Cargo.lock b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.lock similarity index 100% rename from 01OS/01OS/server/stt/whisper-rust/Cargo.lock rename to 01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.lock diff --git a/01OS/01OS/server/stt/whisper-rust/Cargo.toml b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.toml similarity index 100% rename from 01OS/01OS/server/stt/whisper-rust/Cargo.toml rename to 01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.toml diff --git a/01OS/01OS/server/stt/whisper-rust/src/main.rs b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/main.rs similarity index 100% rename from 01OS/01OS/server/stt/whisper-rust/src/main.rs rename to 01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/main.rs diff --git a/01OS/01OS/server/stt/whisper-rust/src/transcribe.rs b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/transcribe.rs similarity index 100% rename from 01OS/01OS/server/stt/whisper-rust/src/transcribe.rs rename to 01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/transcribe.rs diff --git a/01OS/01OS/server/services/stt/openai/__init__.py b/01OS/01OS/server/services/stt/openai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/stt/stt.py b/01OS/01OS/server/services/stt/openai/stt.py similarity index 72% rename from 01OS/01OS/server/stt/stt.py rename to 01OS/01OS/server/services/stt/openai/stt.py index e7d5795..40308cf 100644 --- a/01OS/01OS/server/stt/stt.py +++ b/01OS/01OS/server/services/stt/openai/stt.py @@ -1,9 +1,11 @@ -""" -Defines a function which takes a path to an audio file and turns it into text. -""" +class Stt: + def __init__(self, config): + pass + + def stt(self, audio_file_path): + return stt(audio_file_path) + -from dotenv import load_dotenv -load_dotenv() # take environment variables from .env. from datetime import datetime import os @@ -14,9 +16,6 @@ import subprocess import openai from openai import OpenAI -from ..utils.logs import setup_logging -from ..utils.logs import logger -setup_logging() client = OpenAI() @@ -91,28 +90,18 @@ def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"): def stt_wav(wav_file_path: str): - if os.getenv('ALL_LOCAL') == 'False': - audio_file = open(wav_file_path, "rb") - try: - transcript = client.audio.transcriptions.create( - model="whisper-1", - file=audio_file, - response_format="text" - ) - except openai.BadRequestError as e: - logger.info(f"openai.BadRequestError: {e}") - return None - - return transcript - else: - temp_dir = tempfile.gettempdir() - output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") - ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() - try: - transcript = get_transcription_file(output_path) - finally: - os.remove(output_path) - return transcript + audio_file = open(wav_file_path, "rb") + try: + transcript = client.audio.transcriptions.create( + model="whisper-1", + file=audio_file, + response_format="text" + ) + except openai.BadRequestError as e: + print(f"openai.BadRequestError: {e}") + return None + + return transcript def stt(input_data, mime_type="audio/wav"): if isinstance(input_data, str): diff --git a/01OS/01OS/server/services/tts/__init__.py b/01OS/01OS/server/services/tts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/tts/openai/__init__.py b/01OS/01OS/server/services/tts/openai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/tts/openai/tts.py b/01OS/01OS/server/services/tts/openai/tts.py new file mode 100644 index 0000000..298b52d --- /dev/null +++ b/01OS/01OS/server/services/tts/openai/tts.py @@ -0,0 +1,30 @@ +import ffmpeg +import tempfile +from openai import OpenAI +import os +import subprocess +import tempfile + +client = OpenAI() + +class Tts: + def __init__(self, config): + pass + + def tts(self, text): + response = client.audio.speech.create( + model="tts-1", + voice="alloy", + input=text, + response_format="opus" + ) + with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file: + response.stream_to_file(temp_file.name) + + # TODO: hack to format audio correctly for device + outfile = tempfile.gettempdir() + "/" + "raw.dat" + ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run() + + return outfile + + diff --git a/01OS/01OS/server/services/tts/piper/__init__.py b/01OS/01OS/server/services/tts/piper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/01OS/01OS/server/services/tts/piper/tts.py b/01OS/01OS/server/services/tts/piper/tts.py new file mode 100644 index 0000000..53bf0dc --- /dev/null +++ b/01OS/01OS/server/services/tts/piper/tts.py @@ -0,0 +1,84 @@ +import ffmpeg +import tempfile +import os +import subprocess +import tempfile +import urllib.request +import tarfile + +class Tts: + def __init__(self, config): + self.piper_directory = "" + self.install(config["service_directory"]) + + def tts(self, text): + + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: + output_file = temp_file.name + piper_dir = self.piper_directory + subprocess.run([ + os.path.join(piper_dir, 'piper'), + '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')), + '--output_file', output_file + ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + + # TODO: hack to format audio correctly for device + outfile = tempfile.gettempdir() + "/" + "raw.dat" + ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run() + + return outfile + + def install(self, service_directory): + PIPER_FOLDER_PATH = service_directory + self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper') + if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists + os.makedirs(PIPER_FOLDER_PATH, exist_ok=True) + + # Determine OS and architecture + OS = os.uname().sysname + ARCH = os.uname().machine + if OS == "Darwin": + OS = "macos" + if ARCH == "arm64": + ARCH = "aarch64" + elif ARCH == "x86_64": + ARCH = "x64" + else: + print("Piper: unsupported architecture") + return + + PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz" + PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/" + + # Download and extract Piper + urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME)) + with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar: + tar.extractall(path=PIPER_FOLDER_PATH) + + PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/') + PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx') + + # Download voice model and its json file + urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME)) + urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json")) + + # Additional setup for macOS + if OS == "macos": + if ARCH == "x64": + subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license']) + + PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz" + PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/" + urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME)) + + with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar: + tar.extractall(path=self.piper_directory) + + PIPER_DIR = self.piper_directory + subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"]) + subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"]) + subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"]) + + print("Piper setup completed.") + else: + print("Piper already set up. Skipping download.") diff --git a/01OS/01OS/server/tts/tts.py b/01OS/01OS/server/tts/tts.py deleted file mode 100644 index b834cae..0000000 --- a/01OS/01OS/server/tts/tts.py +++ /dev/null @@ -1,98 +0,0 @@ -""" -Defines a function which takes text and returns a path to an audio file. -""" - -from pydub import AudioSegment -from dotenv import load_dotenv -load_dotenv() # take environment variables from .env. - -import ffmpeg -import tempfile -from openai import OpenAI -import os -import subprocess -import tempfile -from pydub import AudioSegment - -client = OpenAI() - -chunk_size = 1024 - -def stream_tts(text): - """ - A generator that streams tts as LMC messages. - """ - if os.getenv('ALL_LOCAL') == 'False': - response = client.audio.speech.create( - model="tts-1", - voice="alloy", - input=text, - response_format="opus" - ) - with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file: - response.stream_to_file(temp_file.name) - - # TODO: hack to format audio correctly for device - outfile = tempfile.gettempdir() + "/" + "raw.dat" - ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run() - with open(outfile, "rb") as f: - audio_bytes = f.read() - file_type = "bytes.raw" - print(outfile, len(audio_bytes)) - os.remove(outfile) - - else: - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: - output_file = temp_file.name - piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper') - subprocess.run([ - os.path.join(piper_dir, 'piper'), - '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')), - '--output_file', output_file - ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - - # TODO: hack to format audio correctly for device - outfile = tempfile.gettempdir() + "/" + "raw.dat" - ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run() - with open(outfile, "rb") as f: - audio_bytes = f.read() - file_type = "bytes.raw" - print(outfile, len(audio_bytes)) - os.remove(outfile) - - # Stream the audio - yield {"role": "assistant", "type": "audio", "format": file_type, "start": True} - for i in range(0, len(audio_bytes), chunk_size): - chunk = audio_bytes[i:i+chunk_size] - yield chunk - yield {"role": "assistant", "type": "audio", "format": file_type, "end": True} - -def play_audiosegment(audio): - """ - UNUSED - the default makes some pops. this fixes that - """ - - # Apply a fade-out (optional but recommended to smooth the end) - audio = audio.fade_out(500) - - # Add silence at the end - silence_duration_ms = 500 # Duration of silence in milliseconds - silence = AudioSegment.silent(duration=silence_duration_ms) - audio_with_padding = audio + silence - - # Save the modified audio as a WAV file for compatibility with simpleaudio - audio_with_padding.export("output_audio.wav", format="wav") - - # Load the processed WAV file - wave_obj = sa.WaveObject.from_wave_file("output_audio.wav") - - # Play the audio - play_obj = wave_obj.play() - - # Wait for the playback to finish - play_obj.wait_done() - - # Delete the wav file - os.remove("output_audio.wav") - diff --git a/01OS/01OS/server/utils/bytes_to_wav.py b/01OS/01OS/server/utils/bytes_to_wav.py new file mode 100644 index 0000000..fffa411 --- /dev/null +++ b/01OS/01OS/server/utils/bytes_to_wav.py @@ -0,0 +1,57 @@ +from datetime import datetime +import os +import contextlib +import tempfile +import ffmpeg +import subprocess + +def convert_mime_type_to_format(mime_type: str) -> str: + if mime_type == "audio/x-wav" or mime_type == "audio/wav": + return "wav" + if mime_type == "audio/webm": + return "webm" + if mime_type == "audio/raw": + return "dat" + + return mime_type + +@contextlib.contextmanager +def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: + temp_dir = tempfile.gettempdir() + + # Create a temporary file with the appropriate extension + input_ext = convert_mime_type_to_format(mime_type) + input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}") + with open(input_path, 'wb') as f: + f.write(audio) + + # Check if the input file exists + assert os.path.exists(input_path), f"Input file does not exist: {input_path}" + + # Export to wav + output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") + print(mime_type, input_path, output_path) + if mime_type == "audio/raw": + ffmpeg.input( + input_path, + f='s16le', + ar='16000', + ac=1, + ).output(output_path).run() + else: + ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() + + try: + yield output_path + finally: + os.remove(input_path) + os.remove(output_path) + +def run_command(command): + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + return result.stdout, result.stderr + + +def bytes_to_wav(audio_bytes: bytearray, mime_type): + with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path: + return wav_file_path diff --git a/01OS/01OS/start.py b/01OS/01OS/start.py new file mode 100644 index 0000000..f656ce7 --- /dev/null +++ b/01OS/01OS/start.py @@ -0,0 +1,95 @@ +import typer +import asyncio +import platform +import concurrent.futures +import threading +import os +import signal +import importlib + +app = typer.Typer() + +@app.command() +def run( + server: bool = typer.Option(False, "--server", help="Run server"), + server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"), + server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"), + + tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"), + expose: bool = typer.Option(False, "--expose", help="Expose server to internet"), + + client: bool = typer.Option(False, "--client", help="Run client"), + server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"), + client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"), + + llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"), + + model: str = typer.Option("gpt-4", "--model", help="Specify the model"), + llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"), + llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"), + context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"), + max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"), + temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"), + + tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"), + + stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"), + + local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"), + ): + + if local: + tts_service = "piper" + llm_service = "llamafile" + stt_service = "local-whisper" + + if not server_url: + server_url = f"{server_host}:{server_port}" + + if not server and not client: + server = True + client = True + + def handle_exit(signum, frame): + os._exit(0) + + signal.signal(signal.SIGINT, handle_exit) + + if server: + from .server.server import main + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),)) + server_thread.start() + + if expose: + #tunnel_thread = threading.Thread(target=tunnel_service, args=[server_port]) + #tunnel_thread.start() + tunnel_thread = threading.Thread(target=os.system, args=("./tunnel.sh",)) + tunnel_thread.start() + + if client: + if client_type == "auto": + system_type = platform.system() + if system_type == "Darwin": # Mac OS + client_type = "mac" + elif system_type == "Linux": # Linux System + try: + with open('/proc/device-tree/model', 'r') as m: + if 'raspberry pi' in m.read().lower(): + client_type = "rpi" + else: + client_type = "linux" + except FileNotFoundError: + client_type = "linux" + + module = importlib.import_module(f".clients.{client_type}.device", package='01OS') + client_thread = threading.Thread(target=module.main, args=[server_url]) + client_thread.start() + + try: + server_thread.join() + tunnel_thread.join() + client_thread.join() + except KeyboardInterrupt: + os.kill(os.getpid(), signal.SIGINT) diff --git a/01OS/README.md b/01OS/README.md index 8678d6c..26d9143 100644 --- a/01OS/README.md +++ b/01OS/README.md @@ -5,5 +5,5 @@ pip install 01OS ``` ```bash -01 # Runs the 01 server and client. +01 # Runs the 01 server and client ``` diff --git a/01OS/poetry.lock b/01OS/poetry.lock index 19febbc..f3f4ce8 100644 --- a/01OS/poetry.lock +++ b/01OS/poetry.lock @@ -8400,4 +8400,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "4e7112e334cb1610550bcc44ab5f0a257621d774513c24034d60272b741caf51" +content-hash = "f582fa2573961a7bca4df34f7bf62bcbda856e57697f5e3daad6603ce2bc0589" diff --git a/01OS/pyproject.toml b/01OS/pyproject.toml index c15eca3..9b3e4d9 100644 --- a/01OS/pyproject.toml +++ b/01OS/pyproject.toml @@ -27,13 +27,15 @@ simpleaudio = "^1.0.4" opencv-python = "^4.9.0.80" open-interpreter = {version = "0.2.1rc1", extras = ["os"]} psutil = "^5.9.8" +typer = "^0.9.0" +platformdirs = "^4.2.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] -01 = "start:main" +01 = "01OS.start:app" [tool.poetry.group.dev.dependencies] black = "^23.10.1" diff --git a/01OS/start.py b/01OS/start.py deleted file mode 100644 index b9d2f34..0000000 --- a/01OS/start.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -This is just for the Python package — we need a Python entrypoint. -Just starts `start.sh` with all the same command line arguments. Aliased to 01. -""" - -import os -import subprocess -import sys -import psutil -import importlib -# Can't import normally because it starts with a number -process_utils = importlib.import_module("01OS.server.utils.process_utils") -kill_process_tree = process_utils.kill_process_tree - -def main(): - - # Get command line arguments - args = sys.argv[1:] - - # Get the directory of the current script - dir_path = os.path.dirname(os.path.realpath(__file__)) - - # Prepare the command - command = [os.path.join(dir_path, 'start.sh')] + args - - try: - # Start start.sh using psutil for better process management, and to kill all processes - psutil.Popen(command) - except KeyboardInterrupt: - print("Exiting...") - kill_process_tree() \ No newline at end of file diff --git a/README.md b/README.md index ca080a1..b06ef60 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,10 @@ If you want to run local speech-to-text using Whisper, install Rust. Follow the pip install 01OS ``` -**Run the 01 end-to-end:** +**Run the 01:** ```bash -01 # This will run a server + attempt to determine and run a client. -# (Behavior can be modified by changing the contents of `.env`) +01 # This will run the server and attempt to determine and run a client. ``` **Expose an 01 Server Publicly** @@ -40,6 +39,7 @@ pip install 01OS We currently support exposing the 01 server publicly via a couple of different tunnel services: - **bore.pub** ([GitHub](https://github.com/ekzhang/bore)) + - **Requirements:** Ensure that Rust is installed ([Rust Installation](https://www.rust-lang.org/tools/install)), then run: ``` cargo install bore-cli @@ -50,6 +50,7 @@ We currently support exposing the 01 server publicly via a couple of different t ``` - **localtunnel** ([GitHub](https://github.com/localtunnel/localtunnel)) + - **Requirements:** Ensure that Node.js is installed ([Node.js Download](https://nodejs.org/en/download)), then run: ``` npm install -g localtunnel @@ -69,7 +70,6 @@ We currently support exposing the 01 server publicly via a couple of different t 01 --server --expose-with-ngrok ``` - **Run a specific client:** ```bash diff --git a/01OS/start.sh b/archive/start.sh similarity index 100% rename from 01OS/start.sh rename to archive/start.sh