From bb8da14e8a25d638e85e828837824d7b7cdb85b4 Mon Sep 17 00:00:00 2001 From: Shiven Mian Date: Sun, 4 Feb 2024 00:40:58 -0800 Subject: [PATCH] feat: tts --- OS/01/assistant/tts.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/OS/01/assistant/tts.py b/OS/01/assistant/tts.py index 8d58619..e5254da 100644 --- a/OS/01/assistant/tts.py +++ b/OS/01/assistant/tts.py @@ -3,16 +3,40 @@ Defines a function which takes text and returns a path to an audio file. """ from openai import OpenAI +import pydub +import pydub.playback +import tempfile +import os +from datetime import datetime +from io import BytesIO client = OpenAI() +chunk_size = 1024 +read_chunk_size = 4096 -def tts(text, file_path): +def tts(text): - response = client.with_streaming_response.audio.speech.create( - model="tts-1", - voice="alloy", - input=text, - ) + temp_dir = tempfile.gettempdir() + output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.mp3") - response.stream_to_file(file_path) - + try: + with ( + client.with_streaming_response.audio.speech.create( + model="tts-1", + voice="alloy", + input=text, + response_format='mp3', + speed=1.2) + ) as response: + with open(output_path, 'wb') as f: + for chunk in response.iter_bytes(chunk_size): + f.write(chunk) + + with open(output_path, 'rb') as f: + byte_chunk = f.read(read_chunk_size) + yield byte_chunk + + seg = pydub.AudioSegment.from_mp3(output_path) + pydub.playback.play(seg) + finally: + os.remove(output_path) \ No newline at end of file