add different sample rates for mic and speakers on 01

1 year ago · 2d15bae1ad
parent 3642905ca3
commit 2d15bae1ad
3 changed files with 890 additions and 871 deletions
--- a/software/source/clients/esp32/src/client/client.ino
+++ b/software/source/clients/esp32/src/client/client.ino
@ -541,7 +541,9 @@ void tryReconnectWiFi() {
 }
 void tryReconnectToServer() {
    preferences.begin("network", true); // Open Preferences with the "network" namespace in ReadOnly mode
-    String serverURL = preferences.getString("server_url", ""); // Get stored server URL, if any
+    const String SERVER_URL="sterling-snail-conversely.ngrok-free.app";
    String serverURL = SERVER_URL; // Get stored server URL, if any
    // String serverURL = preferences.getString("server_url", ""); // Get stored server URL, if any
    preferences.end(); // Close the Preferences
    if (!serverURL.isEmpty()) {
@ -573,6 +575,9 @@ void tryReconnectToServer() {
 #define MAX_DATA_LEN (1024 * 9)
 #define MIC_SAMPLE_RATE 16000
 #define SPEAKER_SAMPLE_RATE 24000  // or 22050 for OpenAI TTS
 uint8_t microphonedata0[1024 * 10];
 uint8_t speakerdata0[1024 * 1];
 int speaker_offset;
@ -615,7 +620,6 @@ void InitI2SSpeakerOrMic(int mode)
    i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
    i2s_config_t i2s_config = {
        .mode = (i2s_mode_t)(I2S_MODE_MASTER),
        .sample_rate = 16000,
        .bits_per_sample =
            I2S_BITS_PER_SAMPLE_16BIT, // is fixed at 12bit, stereo, MSB
        .channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
@ -633,12 +637,14 @@ void InitI2SSpeakerOrMic(int mode)
    {
        i2s_config.mode =
            (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
        i2s_config.sample_rate = MIC_SAMPLE_RATE;
    }
    else
    {
        i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
        i2s_config.use_apll = false;
        i2s_config.tx_desc_auto_clear = true;
        i2s_config.sample_rate = SPEAKER_SAMPLE_RATE;
    }
    err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
@ -652,8 +658,10 @@ void InitI2SSpeakerOrMic(int mode)
    tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
    tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
    err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
-    err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
+    err += i2s_set_clk(SPEAKER_I2S_NUMBER,
-                    I2S_CHANNEL_MONO);
+                        (mode == MODE_MIC) ? MIC_SAMPLE_RATE : SPEAKER_SAMPLE_RATE,      // set the sample rate here as well
                        I2S_BITS_PER_SAMPLE_16BIT,
                        I2S_CHANNEL_MONO);
 }
 void speaker_play(uint8_t *payload, uint32_t len)
@ -783,15 +791,18 @@ void setup() {
    Serial.setTxBufferSize(1024); // Set the transmit buffer size for the Serial object.
    WiFi.mode(WIFI_AP_STA); // Set WiFi mode to both AP and STA.
-
+    const String WIFI_NAME="gunner1";
    const String WIFI_PASSWORD="startup1";
    // delay(100); // Short delay to ensure mode change takes effect
    // WiFi.softAPConfig(localIP, gatewayIP, subnetMask);
    // WiFi.softAP(ssid, password);
-    startSoftAccessPoint(ssid, password, localIP, gatewayIP);
+    // startSoftAccessPoint(ssid, password, localIP, gatewayIP);
    connectToWifi(WIFI_NAME, WIFI_PASSWORD);
    setUpDNSServer(dnsServer, localIP);
    tryReconnectToServer();
-    setUpWebserver(server, localIP);
+    // setUpWebserver(server, localIP);
-    tryReconnectWiFi();
+    // tryReconnectWiFi();
    // Print a welcome message to the Serial port.
    Serial.println("\n\nCaptive Test, V0.5.0 compiled " __DATE__ " " __TIME__ " by CD_FER");
    Serial.printf("%s-%d\n\r", ESP.getChipModel(), ESP.getChipRevision());
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@ -11,7 +11,7 @@
 ###
 from pynput import keyboard
-
+from .utils.bytes_to_wav import bytes_to_wav
 from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
 from RealtimeSTT import AudioToTextRecorder
 import time
@ -23,6 +23,7 @@ import os
 class AsyncInterpreter:
    def __init__(self, interpreter):
        self.interpreter = interpreter
        self.audio_chunks = []
        # STT
        self.stt = AudioToTextRecorder(
@ -73,6 +74,7 @@ class AsyncInterpreter:
        if isinstance(chunk, bytes):
            # It's probably a chunk of audio
            self.stt.feed_audio(chunk)
            self.audio_chunks.append(chunk)
            # print("INTERPRETER FEEDING AUDIO")
        else:
@ -171,6 +173,12 @@ class AsyncInterpreter:
        message = self.stt.text()
        if self.audio_chunks:
            audio_bytes = bytearray(b"".join(self.audio_chunks))
            wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
            print("wav_file_path ", wav_file_path)
            self.audio_chunks = []
        print(message)
        # Feed generate to RealtimeTTS
@ -181,8 +189,8 @@ class AsyncInterpreter:
        text_iterator = self.generate(message, start_interpreter)
        self.tts.feed(text_iterator)
-
+        if not self.tts.is_playing():
-        self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
+            self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
        while True:
            await asyncio.sleep(0.1)
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -5,7 +5,7 @@ from interpreter import interpreter
 # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
-interpreter.tts = "elevenlabs"
+interpreter.tts = "openai"
 # Connect your 01 to a language model
 interpreter.llm.model = "gpt-4-turbo"