send wav files

pull/256/head
Ty Fiero 9 months ago
parent f673744f1b
commit 0602348f1c

3
.gitignore vendored

@ -169,3 +169,6 @@ cython_debug/
_.aifs _.aifs
software/output_audio.wav software/output_audio.wav
.DS_Store .DS_Store
node_modules/
.expo/

@ -21,8 +21,7 @@ from ..utils.accumulator import Accumulator
from .utils.logs import setup_logging from .utils.logs import setup_logging
from .utils.logs import logger from .utils.logs import logger
import base64 import base64
from google.cloud import storage import shutil
from ..utils.print_markdown import print_markdown from ..utils.print_markdown import print_markdown
os.environ["STT_RUNNER"] = "server" os.environ["STT_RUNNER"] = "server"
@ -394,31 +393,31 @@ def stream_tts(sentence):
with open(audio_file, "rb") as f: with open(audio_file, "rb") as f:
audio_bytes = f.read() audio_bytes = f.read()
desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
storage_client = storage.Client(project="react-native-421323") desktop_audio_file = os.path.join(desktop_path, os.path.basename(audio_file))
bucket = storage_client.bucket("01-audio") shutil.copy(audio_file, desktop_audio_file)
blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") print(f"Audio file saved to Desktop: {desktop_audio_file}")
generation_match_precondition = 0 # storage_client = storage.Client(project="react-native-421323")
# bucket = storage_client.bucket("01-audio")
blob.upload_from_filename( # blob = bucket.blob(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
audio_file, if_generation_match=generation_match_precondition # generation_match_precondition = 0
)
print( # blob.upload_from_filename(
f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav" # audio_file, if_generation_match=generation_match_precondition
) # )
# print(
# f"Audio file {audio_file} uploaded to {datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
# )
os.remove(audio_file) os.remove(audio_file)
file_type = "bytes.raw" file_type = "audio/wav"
chunk_size = 1024 # Read the entire WAV file
with open(audio_file, "rb") as f:
# Stream the audio audio_bytes = f.read()
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
for i in range(0, len(audio_bytes), chunk_size):
chunk = audio_bytes[i : i + chunk_size]
yield chunk
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
# Stream the audio as a single message
yield {"role": "assistant", "type": "audio", "format": file_type, "content": base64.b64encode(audio_bytes).decode('utf-8'), "start": True, "end": True}
from uvicorn import Config, Server from uvicorn import Config, Server
import os import os

@ -36,9 +36,9 @@ class Tts:
response.stream_to_file(temp_file.name) response.stream_to_file(temp_file.name)
# TODO: hack to format audio correctly for device # TODO: hack to format audio correctly for device
outfile = tempfile.gettempdir() + "/" + "raw.dat" outfile = tempfile.gettempdir() + "/" + "output.wav"
ffmpeg.input(temp_file.name).output( ffmpeg.input(temp_file.name).output(
outfile, f="s16le", ar="16000", ac="1", loglevel="panic" outfile, f="wav", ar="16000", ac="1", loglevel="panic"
).run() ).run()
return outfile return outfile

@ -100,7 +100,7 @@ def create_tunnel(
# If ngrok is installed, start it on the specified port # If ngrok is installed, start it on the specified port
# process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE) # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE)
process = subprocess.Popen( process = subprocess.Popen(
f"ngrok http {server_port} --scheme http,https --domain=sterling-snail-conversely.ngrok-free.app --log=stdout", f"ngrok http {server_port} --scheme http,https --log=stdout",
shell=True, shell=True,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) )

Loading…
Cancel
Save