add different sample rates for mic and speakers on 01

pull/279/head
Ben Xu 7 months ago
parent 3642905ca3
commit 2d15bae1ad

@ -541,7 +541,9 @@ void tryReconnectWiFi() {
}
void tryReconnectToServer() {
preferences.begin("network", true); // Open Preferences with the "network" namespace in ReadOnly mode
String serverURL = preferences.getString("server_url", ""); // Get stored server URL, if any
const String SERVER_URL="sterling-snail-conversely.ngrok-free.app";
String serverURL = SERVER_URL; // Get stored server URL, if any
// String serverURL = preferences.getString("server_url", ""); // Get stored server URL, if any
preferences.end(); // Close the Preferences
if (!serverURL.isEmpty()) {
@ -573,6 +575,9 @@ void tryReconnectToServer() {
#define MAX_DATA_LEN (1024 * 9)
#define MIC_SAMPLE_RATE 16000
#define SPEAKER_SAMPLE_RATE 24000 // or 22050 for OpenAI TTS
uint8_t microphonedata0[1024 * 10];
uint8_t speakerdata0[1024 * 1];
int speaker_offset;
@ -615,7 +620,6 @@ void InitI2SSpeakerOrMic(int mode)
i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER),
.sample_rate = 16000,
.bits_per_sample =
I2S_BITS_PER_SAMPLE_16BIT, // is fixed at 12bit, stereo, MSB
.channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
@ -633,12 +637,14 @@ void InitI2SSpeakerOrMic(int mode)
{
i2s_config.mode =
(i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
i2s_config.sample_rate = MIC_SAMPLE_RATE;
}
else
{
i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
i2s_config.use_apll = false;
i2s_config.tx_desc_auto_clear = true;
i2s_config.sample_rate = SPEAKER_SAMPLE_RATE;
}
err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
@ -652,7 +658,9 @@ void InitI2SSpeakerOrMic(int mode)
tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
err += i2s_set_clk(SPEAKER_I2S_NUMBER,
(mode == MODE_MIC) ? MIC_SAMPLE_RATE : SPEAKER_SAMPLE_RATE, // set the sample rate here as well
I2S_BITS_PER_SAMPLE_16BIT,
I2S_CHANNEL_MONO);
}
@ -783,15 +791,18 @@ void setup() {
Serial.setTxBufferSize(1024); // Set the transmit buffer size for the Serial object.
WiFi.mode(WIFI_AP_STA); // Set WiFi mode to both AP and STA.
const String WIFI_NAME="gunner1";
const String WIFI_PASSWORD="startup1";
// delay(100); // Short delay to ensure mode change takes effect
// WiFi.softAPConfig(localIP, gatewayIP, subnetMask);
// WiFi.softAP(ssid, password);
startSoftAccessPoint(ssid, password, localIP, gatewayIP);
// startSoftAccessPoint(ssid, password, localIP, gatewayIP);
connectToWifi(WIFI_NAME, WIFI_PASSWORD);
setUpDNSServer(dnsServer, localIP);
tryReconnectToServer();
setUpWebserver(server, localIP);
tryReconnectWiFi();
// setUpWebserver(server, localIP);
// tryReconnectWiFi();
// Print a welcome message to the Serial port.
Serial.println("\n\nCaptive Test, V0.5.0 compiled " __DATE__ " " __TIME__ " by CD_FER");
Serial.printf("%s-%d\n\r", ESP.getChipModel(), ESP.getChipRevision());

@ -11,7 +11,7 @@
###
from pynput import keyboard
from .utils.bytes_to_wav import bytes_to_wav
from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
from RealtimeSTT import AudioToTextRecorder
import time
@ -23,6 +23,7 @@ import os
class AsyncInterpreter:
def __init__(self, interpreter):
self.interpreter = interpreter
self.audio_chunks = []
# STT
self.stt = AudioToTextRecorder(
@ -73,6 +74,7 @@ class AsyncInterpreter:
if isinstance(chunk, bytes):
# It's probably a chunk of audio
self.stt.feed_audio(chunk)
self.audio_chunks.append(chunk)
# print("INTERPRETER FEEDING AUDIO")
else:
@ -171,6 +173,12 @@ class AsyncInterpreter:
message = self.stt.text()
if self.audio_chunks:
audio_bytes = bytearray(b"".join(self.audio_chunks))
wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
print("wav_file_path ", wav_file_path)
self.audio_chunks = []
print(message)
# Feed generate to RealtimeTTS
@ -181,7 +189,7 @@ class AsyncInterpreter:
text_iterator = self.generate(message, start_interpreter)
self.tts.feed(text_iterator)
if not self.tts.is_playing():
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
while True:

@ -5,7 +5,7 @@ from interpreter import interpreter
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
interpreter.tts = "elevenlabs"
interpreter.tts = "openai"
# Connect your 01 to a language model
interpreter.llm.model = "gpt-4-turbo"

Loading…
Cancel
Save