From 0602348f1c377445a4c648dca4bbb20f0a585934 Mon Sep 17 00:00:00 2001 From: Ty Fiero Date: Wed, 24 Apr 2024 17:59:41 -0700 Subject: [PATCH] send wav files --- .gitignore | 3 ++ software/source/server/server.py | 45 +++++++++---------- .../source/server/services/tts/openai/tts.py | 4 +- software/source/server/tunnel.py | 2 +- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index aeaed36..da03d3f 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,6 @@ cython_debug/ _.aifs software/output_audio.wav .DS_Store + +node_modules/ +.expo/ \ No newline at end of file diff --git a/software/source/server/server.py b/software/source/server/server.py index 2328f52..dd71b06 100644 --- a/software/source/server/server.py +++ b/software/source/server/server.py @@ -21,8 +21,7 @@ from ..utils.accumulator import Accumulator from .utils.logs import setup_logging from .utils.logs import logger import base64 -from google.cloud import storage - +import shutil from ..utils.print_markdown import print_markdown os.environ["STT_RUNNER"] = "server" @@ -394,31 +393,31 @@ def stream_tts(sentence): with open(audio_file, "rb") as f: audio_bytes = f.read() - - storage_client = storage.Client(project="react-native-421323") - bucket = storage_client.bucket("01-audio") - blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") - generation_match_precondition = 0 - - blob.upload_from_filename( - audio_file, if_generation_match=generation_match_precondition - ) - print( - f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav" - ) + desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop') + desktop_audio_file = os.path.join(desktop_path, os.path.basename(audio_file)) + shutil.copy(audio_file, desktop_audio_file) + print(f"Audio file saved to Desktop: {desktop_audio_file}") + # storage_client = storage.Client(project="react-native-421323") + # bucket = storage_client.bucket("01-audio") + # blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") + # generation_match_precondition = 0 + + # blob.upload_from_filename( + # audio_file, if_generation_match=generation_match_precondition + # ) + # print( + # f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav" + # ) os.remove(audio_file) - file_type = "bytes.raw" - chunk_size = 1024 - - # Stream the audio - yield {"role": "assistant", "type": "audio", "format": file_type, "start": True} - for i in range(0, len(audio_bytes), chunk_size): - chunk = audio_bytes[i : i + chunk_size] - yield chunk - yield {"role": "assistant", "type": "audio", "format": file_type, "end": True} + file_type = "audio/wav" + # Read the entire WAV file + with open(audio_file, "rb") as f: + audio_bytes = f.read() + # Stream the audio as a single message + yield {"role": "assistant", "type": "audio", "format": file_type, "content": base64.b64encode(audio_bytes).decode('utf-8'), "start": True, "end": True} from uvicorn import Config, Server import os diff --git a/software/source/server/services/tts/openai/tts.py b/software/source/server/services/tts/openai/tts.py index 07e1eec..021353b 100644 --- a/software/source/server/services/tts/openai/tts.py +++ b/software/source/server/services/tts/openai/tts.py @@ -36,9 +36,9 @@ class Tts: response.stream_to_file(temp_file.name) # TODO: hack to format audio correctly for device - outfile = tempfile.gettempdir() + "/" + "raw.dat" + outfile = tempfile.gettempdir() + "/" + "output.wav" ffmpeg.input(temp_file.name).output( - outfile, f="s16le", ar="16000", ac="1", loglevel="panic" + outfile, f="wav", ar="16000", ac="1", loglevel="panic" ).run() return outfile diff --git a/software/source/server/tunnel.py b/software/source/server/tunnel.py index 0e0ad17..f25a0b3 100644 --- a/software/source/server/tunnel.py +++ b/software/source/server/tunnel.py @@ -100,7 +100,7 @@ def create_tunnel( # If ngrok is installed, start it on the specified port # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE) process = subprocess.Popen( - f"ngrok http {server_port} --scheme http,https --domain=sterling-snail-conversely.ngrok-free.app --log=stdout", + f"ngrok http {server_port} --scheme http,https --log=stdout", shell=True, stdout=subprocess.PIPE, )