diff --git a/OS/01/.env.example b/OS/01/.env.example index d55c875..01f14df 100644 --- a/OS/01/.env.example +++ b/OS/01/.env.example @@ -2,15 +2,17 @@ # Copy this file and rename it to ".env" to use it. # If ALL_LOCAL is False, we'll use OpenAI's services -# If setting ALL_LOCAL to true, set the path to the WHISPER local model +# else we use whisper.cpp and piper local models ALL_LOCAL=False -# WHISPER_MODEL_PATH=... +WHISPER_MODEL_NAME="ggml-tiny.en.bin" + +# Uncomment and set the OpenAI API key for OpenInterpreter to work # OPENAI_API_KEY=sk-... # For TTS, we use the en_US-lessac-medium voice model by default # Please change the voice URL and voice name if you wish to use another voice -export PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/" -export PIPER_VOICE_NAME="en_US-lessac-medium.onnx" +PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/" +PIPER_VOICE_NAME="en_US-lessac-medium.onnx" # If SERVER_START, this is where we'll serve the server. # If DEVICE_START, this is where the device expects the server to be. diff --git a/OS/01/device.py b/OS/01/device.py index b584536..d0c6687 100644 --- a/OS/01/device.py +++ b/OS/01/device.py @@ -230,10 +230,10 @@ if __name__ == "__main__": toggle_recording(True) else: break - else: - # Keyboard listener for spacebar press/release - listener = keyboard.Listener(on_press=on_press, on_release=on_release) - listener.start() + else: + # Keyboard listener for spacebar press/release + listener = keyboard.Listener(on_press=on_press, on_release=on_release) + listener.start() asyncio.run(main()) p.terminate() \ No newline at end of file diff --git a/OS/01/start.sh b/OS/01/start.sh index a5d7d67..5327767 100755 --- a/OS/01/start.sh +++ b/OS/01/start.sh @@ -9,9 +9,10 @@ set -a; source .env; set +a ### SETUP -# if using local models, install the models / executables - if [[ "$ALL_LOCAL" == "True" ]]; then + # if using local models, install the models / executables + WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" + WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust" curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH} OS=$(uname -s) ARCH=$(uname -m) diff --git a/OS/01/stt.py b/OS/01/stt.py index 48a6494..c538dc1 100644 --- a/OS/01/stt.py +++ b/OS/01/stt.py @@ -56,13 +56,14 @@ def run_command(command): return result.stdout, result.stderr def get_transcription_file(wav_file_path: str): - model_path = os.getenv("WHISPER_MODEL_PATH") - if not model_path: - raise EnvironmentError("WHISPER_MODEL_PATH environment variable is not set.") + whisper_rust_path = os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust') + model_name = os.getenv('WHISPER_MODEL_NAME') + if not model_name: + raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.") output, error = run_command([ - os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust', 'whisper-rust'), - '--model-path', model_path, + os.path.join(whisper_rust_path, 'whisper-rust'), + '--model-path', os.path.join(whisper_rust_path, model_name), '--file-path', wav_file_path ]) diff --git a/README.md b/README.md index 74a97c3..a0b8c0f 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,6 @@ python -m pip install -r requirements.txt ``` NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue. -If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model. - ## Usage ```bash @@ -36,6 +34,7 @@ cd OS/01 bash start.sh ``` +If you want to run local text-to-speech and speech-to-text, set `ALL_LOCAL` in the `start.sh` script to True. This will use the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [Piper](https://github.com/rhasspy/piper) models.
## Background