feat: added local piper TTS

pull/19/head
Shiven Mian 11 months ago
parent 65acb1163f
commit 7582c8ad02

2
.gitignore vendored

@ -1,5 +1,5 @@
ggml-*.bin ggml-*.bin
OS/01/local_tts/*
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]

@ -1 +1,12 @@
[{"role": "user", "type": "message", "content": " Hey, how you doing?\n"}] [
{
"role": "user",
"type": "message",
"content": " Hello, how are you doing?\n"
},
{
"role": "assistant",
"type": "message",
"content": "I'm an artificial intelligence, so I don't have feelings, but thank you for asking. How may I assist you today?"
}
]

@ -6,6 +6,11 @@ export ALL_LOCAL=False
# export WHISPER_MODEL_PATH=... # export WHISPER_MODEL_PATH=...
# export OPENAI_API_KEY=sk-... # export OPENAI_API_KEY=sk-...
# For TTS, we use the en_US-lessac-medium voice model by default
# Please change the voice URL and voice name if you wish to use another voice
export PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/"
export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
# If SERVER_START, this is where we'll serve the server. # If SERVER_START, this is where we'll serve the server.
# If DEVICE_START, this is where the device expects the server to be. # If DEVICE_START, this is where the device expects the server to be.
export SERVER_URL=ws://localhost:8000/ export SERVER_URL=ws://localhost:8000/
@ -22,6 +27,46 @@ export SERVER_EXPOSE_PUBLICALLY=False
### SETUP ### SETUP
# if using local models, install the models / executables
if [[ "$ALL_LOCAL" == "True" ]]; then
OS=$(uname -s)
ARCH=$(uname -m)
if [ "$OS" = "Darwin" ]; then
OS="macos"
if [ "$ARCH" = "arm64" ]; then
ARCH="aarch64"
elif [ "$ARCH" = "x86_64" ]; then
ARCH="x64"
else
echo "Piper: unsupported architecture"
fi
fi
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
mkdir local_tts
cd local_tts
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
tar -xvzf $PIPER_ASSETNAME
cd piper
if [ "$OS" = "macos" ]; then
if [ "$ARCH" = "x64" ]; then
softwareupdate --install-rosetta --agree-to-license
fi
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
PIPER_DIR=`pwd`
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
fi
cd ../..
fi
# (for dev, reset the ports we were using) # (for dev, reset the ports we were using)
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+") SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")

@ -7,20 +7,42 @@ from openai import OpenAI
from pydub import AudioSegment from pydub import AudioSegment
from pydub.playback import play from pydub.playback import play
from playsound import playsound from playsound import playsound
import os
import subprocess
import tempfile
client = OpenAI() client = OpenAI()
def run_command(command):
print(command)
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return result
def tts(text, play_audio): def tts(text, play_audio):
response = client.audio.speech.create( if os.getenv('ALL_LOCAL') == 'False':
model="tts-1", response = client.audio.speech.create(
voice="alloy", model="tts-1",
input=text, voice="alloy",
response_format="mp3" input=text,
) response_format="mp3"
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file: )
response.stream_to_file(temp_file.name) with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file:
response.stream_to_file(temp_file.name)
if play_audio:
playsound(temp_file.name) if play_audio:
playsound(temp_file.name)
return temp_file.read()
return temp_file.read()
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
'--output_file', output_file
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if play_audio:
playsound(temp_file.name)
return temp_file.read()

Loading…
Cancel
Save