add different sample rates for mic and speakers on 01

pull/279/head
Ben Xu 7 months ago
parent 3642905ca3
commit 2d15bae1ad

File diff suppressed because it is too large Load Diff

@ -11,7 +11,7 @@
### ###
from pynput import keyboard from pynput import keyboard
from .utils.bytes_to_wav import bytes_to_wav
from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
from RealtimeSTT import AudioToTextRecorder from RealtimeSTT import AudioToTextRecorder
import time import time
@ -23,6 +23,7 @@ import os
class AsyncInterpreter: class AsyncInterpreter:
def __init__(self, interpreter): def __init__(self, interpreter):
self.interpreter = interpreter self.interpreter = interpreter
self.audio_chunks = []
# STT # STT
self.stt = AudioToTextRecorder( self.stt = AudioToTextRecorder(
@ -73,6 +74,7 @@ class AsyncInterpreter:
if isinstance(chunk, bytes): if isinstance(chunk, bytes):
# It's probably a chunk of audio # It's probably a chunk of audio
self.stt.feed_audio(chunk) self.stt.feed_audio(chunk)
self.audio_chunks.append(chunk)
# print("INTERPRETER FEEDING AUDIO") # print("INTERPRETER FEEDING AUDIO")
else: else:
@ -171,6 +173,12 @@ class AsyncInterpreter:
message = self.stt.text() message = self.stt.text()
if self.audio_chunks:
audio_bytes = bytearray(b"".join(self.audio_chunks))
wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
print("wav_file_path ", wav_file_path)
self.audio_chunks = []
print(message) print(message)
# Feed generate to RealtimeTTS # Feed generate to RealtimeTTS
@ -181,8 +189,8 @@ class AsyncInterpreter:
text_iterator = self.generate(message, start_interpreter) text_iterator = self.generate(message, start_interpreter)
self.tts.feed(text_iterator) self.tts.feed(text_iterator)
if not self.tts.is_playing():
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True) self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
while True: while True:
await asyncio.sleep(0.1) await asyncio.sleep(0.1)

@ -5,7 +5,7 @@ from interpreter import interpreter
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"} # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
interpreter.tts = "elevenlabs" interpreter.tts = "openai"
# Connect your 01 to a language model # Connect your 01 to a language model
interpreter.llm.model = "gpt-4-turbo" interpreter.llm.model = "gpt-4-turbo"

Loading…
Cancel
Save