add different sample rates for mic and speakers on 01

1 year ago · 2d15bae1ad
parent 3642905ca3
commit 2d15bae1ad
3 changed files with 890 additions and 871 deletions
--- a/software/source/clients/esp32/src/client/client.ino
+++ b/software/source/clients/esp32/src/client/client.ino
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@ -11,7 +11,7 @@

 ###
 from pynput import keyboard
-
+from .utils.bytes_to_wav import bytes_to_wav
 from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
 from RealtimeSTT import AudioToTextRecorder
 import time
@ -23,6 +23,7 @@ import os
 class AsyncInterpreter:
    def __init__(self, interpreter):
        self.interpreter = interpreter
+        self.audio_chunks = []

        # STT
        self.stt = AudioToTextRecorder(
@ -73,6 +74,7 @@ class AsyncInterpreter:
        if isinstance(chunk, bytes):
            # It's probably a chunk of audio
            self.stt.feed_audio(chunk)
+            self.audio_chunks.append(chunk)
            # print("INTERPRETER FEEDING AUDIO")

        else:
@ -171,6 +173,12 @@ class AsyncInterpreter:

        message = self.stt.text()

+        if self.audio_chunks:
+            audio_bytes = bytearray(b"".join(self.audio_chunks))
+            wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
+            print("wav_file_path ", wav_file_path)
+            self.audio_chunks = []
+
        print(message)

        # Feed generate to RealtimeTTS
@ -181,8 +189,8 @@ class AsyncInterpreter:
        text_iterator = self.generate(message, start_interpreter)

        self.tts.feed(text_iterator)
-
-        self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
+        if not self.tts.is_playing():
+            self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)

        while True:
            await asyncio.sleep(0.1)
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -5,7 +5,7 @@ from interpreter import interpreter

 # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
-interpreter.tts = "elevenlabs"
+interpreter.tts = "openai"

 # Connect your 01 to a language model
 interpreter.llm.model = "gpt-4-turbo"