feat: added local piper TTS

pull/19/head
Shiven Mian 11 months ago
parent 65acb1163f
commit 7582c8ad02

2
.gitignore vendored

@ -1,5 +1,5 @@
ggml-*.bin
OS/01/local_tts/*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

@ -1 +1,12 @@
[{"role": "user", "type": "message", "content": " Hey, how you doing?\n"}]
[
{
"role": "user",
"type": "message",
"content": " Hello, how are you doing?\n"
},
{
"role": "assistant",
"type": "message",
"content": "I'm an artificial intelligence, so I don't have feelings, but thank you for asking. How may I assist you today?"
}
]

@ -6,6 +6,11 @@ export ALL_LOCAL=False
# export WHISPER_MODEL_PATH=...
# export OPENAI_API_KEY=sk-...
# For TTS, we use the en_US-lessac-medium voice model by default
# Please change the voice URL and voice name if you wish to use another voice
export PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/"
export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
# If SERVER_START, this is where we'll serve the server.
# If DEVICE_START, this is where the device expects the server to be.
export SERVER_URL=ws://localhost:8000/
@ -22,6 +27,46 @@ export SERVER_EXPOSE_PUBLICALLY=False
### SETUP
# if using local models, install the models / executables
if [[ "$ALL_LOCAL" == "True" ]]; then
OS=$(uname -s)
ARCH=$(uname -m)
if [ "$OS" = "Darwin" ]; then
OS="macos"
if [ "$ARCH" = "arm64" ]; then
ARCH="aarch64"
elif [ "$ARCH" = "x86_64" ]; then
ARCH="x64"
else
echo "Piper: unsupported architecture"
fi
fi
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
mkdir local_tts
cd local_tts
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
tar -xvzf $PIPER_ASSETNAME
cd piper
if [ "$OS" = "macos" ]; then
if [ "$ARCH" = "x64" ]; then
softwareupdate --install-rosetta --agree-to-license
fi
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
PIPER_DIR=`pwd`
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
fi
cd ../..
fi
# (for dev, reset the ports we were using)
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")

@ -7,20 +7,42 @@ from openai import OpenAI
from pydub import AudioSegment
from pydub.playback import play
from playsound import playsound
import os
import subprocess
import tempfile
client = OpenAI()
def run_command(command):
print(command)
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return result
def tts(text, play_audio):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
response_format="mp3"
)
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file:
response.stream_to_file(temp_file.name)
if play_audio:
playsound(temp_file.name)
return temp_file.read()
if os.getenv('ALL_LOCAL') == 'False':
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
response_format="mp3"
)
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file:
response.stream_to_file(temp_file.name)
if play_audio:
playsound(temp_file.name)
return temp_file.read()
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
'--output_file', output_file
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if play_audio:
playsound(temp_file.name)
return temp_file.read()

Loading…
Cancel
Save