Merge branch 'main' into hb/prompt_updates

2 years ago · 5a82976f2e
parent 0c6187363f 387bc00a20
commit 5a82976f2e
11 changed files with 664 additions and 137 deletions
--- a/01OS/01OS/clients/base_device.py
+++ b/01OS/01OS/clients/base_device.py
@ -329,6 +329,6 @@ class Device:
                listener.start()

    def start(self):
-        if os.getenv('TEACH_MODE') == "False":
+        if os.getenv('TEACH_MODE') != "True":
            asyncio.run(self.start_async())
            p.terminate()
--- a/01OS/01OS/clients/esp32/playback.ino
+++ b/01OS/01OS/clients/esp32/playback.ino
@ -1,95 +0,0 @@
-/*Press button to record,released button to playback*/
-
-#include <driver/i2s.h>
-#include <M5Atom.h>
-
-#define CONFIG_I2S_BCK_PIN 19
-#define CONFIG_I2S_LRCK_PIN 33
-#define CONFIG_I2S_DATA_PIN 22
-#define CONFIG_I2S_DATA_IN_PIN 23
-
-#define SPEAKER_I2S_NUMBER I2S_NUM_0
-
-#define MODE_MIC 0
-#define MODE_SPK 1
-#define DATA_SIZE 1024
-
-uint8_t microphonedata0[1024 * 70];
-int data_offset = 0;
-
-void InitI2SSpeakerOrMic(int mode) {
-  esp_err_t err = ESP_OK;
-
-  i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
-  i2s_config_t i2s_config = {
-    .mode = (i2s_mode_t)(I2S_MODE_MASTER),
-    .sample_rate = 16000,
-    .bits_per_sample =
-      I2S_BITS_PER_SAMPLE_16BIT,  // is fixed at 12bit, stereo, MSB
-    .channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
-#if ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 1, 0)
-    .communication_format =
-      I2S_COMM_FORMAT_STAND_I2S,  // Set the format of the communication.
-#else                             // 设置通讯格式
-    .communication_format = I2S_COMM_FORMAT_I2S,
-#endif
-    .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
-    .dma_buf_count = 6,
-    .dma_buf_len = 60,
-  };
-  if (mode == MODE_MIC) {
-    i2s_config.mode =
-      (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
-  } else {
-    i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
-    i2s_config.use_apll = false;
-    i2s_config.tx_desc_auto_clear = true;
-  }
-
-  err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
-  i2s_pin_config_t tx_pin_config;
-
-#if (ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 3, 0))
-  tx_pin_config.mck_io_num = I2S_PIN_NO_CHANGE;
-#endif
-  tx_pin_config.bck_io_num = CONFIG_I2S_BCK_PIN;
-  tx_pin_config.ws_io_num = CONFIG_I2S_LRCK_PIN;
-  tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
-  tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
-
-  // Serial.println("Init i2s_set_pin");
-  err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
-  // Serial.println("Init i2s_set_clk");
-  err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
-                     I2S_CHANNEL_MONO);
-}
-
-void setup() {
-  M5.begin(true, false, true);
-  M5.dis.drawpix(0, CRGB(128, 128, 0));
-  delay(2000);
-}
-
-void loop() {
-  if (M5.Btn.isPressed()) {
-    data_offset = 0;
-    InitI2SSpeakerOrMic(MODE_MIC);
-    M5.dis.drawpix(0, CRGB(128, 128, 0));
-    size_t byte_read;
-
-    while (1) {
-      i2s_read(SPEAKER_I2S_NUMBER,
-               (char *)(microphonedata0 + data_offset), DATA_SIZE,
-               &byte_read, (100 / portTICK_RATE_MS));
-      data_offset += 1024;
-      M5.update();
-      if (M5.Btn.isReleased() || data_offset >= 71679) break;
-      // delay(60);
-    }
-    size_t bytes_written;
-    InitI2SSpeakerOrMic(MODE_SPK);
-    i2s_write(SPEAKER_I2S_NUMBER, microphonedata0, data_offset,
-              &bytes_written, portMAX_DELAY);
-  }
-  M5.update();
-}
--- a/01OS/01OS/clients/esp32/playback/playback.ino
+++ b/01OS/01OS/clients/esp32/playback/playback.ino
@ -0,0 +1,243 @@
+/*Press button to record,released button to playback*/
+
+#include <driver/i2s.h>
+#include <M5Atom.h>
+
+#include <Arduino.h>
+
+#include <WiFi.h>
+#include <WiFiMulti.h>
+#include <WiFiClientSecure.h>
+
+#include <WebSocketsClient.h>
+
+
+#define COMPUTER_IP "192.168.68.87"
+
+#define CONFIG_I2S_BCK_PIN 19
+#define CONFIG_I2S_LRCK_PIN 33
+#define CONFIG_I2S_DATA_PIN 22
+#define CONFIG_I2S_DATA_IN_PIN 23
+
+#define SPEAKER_I2S_NUMBER I2S_NUM_0
+
+#define MODE_MIC 0
+#define MODE_SPK 1
+#define DATA_SIZE 1024
+
+uint8_t microphonedata0[1024 * 10];
+uint8_t speakerdata0[1024 * 1];
+int speaker_offset = 0;
+int data_offset = 0;
+
+WebSocketsClient webSocket;
+
+class ButtonChecker {
+  public:
+    void loop() {
+      lastTickState = thisTickState;
+      thisTickState = M5.Btn.isPressed() != 0;
+    }
+
+    bool justPressed() {
+      return thisTickState && !lastTickState;
+    }
+
+    bool justReleased() {
+      return !thisTickState && lastTickState;
+    }
+
+  private:
+    bool lastTickState = false;
+    bool thisTickState = false;
+};
+
+ButtonChecker button = ButtonChecker();
+
+
+
+void hexdump(const void *mem, uint32_t len, uint8_t cols = 16) {
+  const uint8_t* src = (const uint8_t*) mem;
+  Serial.printf("\n[HEXDUMP] Address: 0x%08X len: 0x%X (%d)", (ptrdiff_t)src, len, len);
+  for (uint32_t i = 0; i < len; i++) {
+    if (i % cols == 0) {
+      Serial.printf("\n[0x%08X] 0x%08X: ", (ptrdiff_t)src, i);
+    }
+    Serial.printf("%02X ", *src);
+    src++;
+  }
+  Serial.printf("\n");
+}
+
+void InitI2SSpeakerOrMic(int mode) {
+  Serial.printf("InitI2sSpeakerOrMic %d\n", mode);
+  esp_err_t err = ESP_OK;
+
+  i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
+  i2s_config_t i2s_config = {
+    .mode = (i2s_mode_t)(I2S_MODE_MASTER),
+    .sample_rate = 16000,
+    .bits_per_sample =
+    I2S_BITS_PER_SAMPLE_16BIT,  // is fixed at 12bit, stereo, MSB
+    .channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
+#if ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 1, 0)
+    .communication_format =
+    I2S_COMM_FORMAT_STAND_I2S,  // Set the format of the communication.
+#else                             // 设置通讯格式
+    .communication_format = I2S_COMM_FORMAT_I2S,
+#endif
+    .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
+    .dma_buf_count = 6,
+    .dma_buf_len = 60,
+  };
+  if (mode == MODE_MIC) {
+    i2s_config.mode =
+      (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
+  } else {
+    i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
+    i2s_config.use_apll = false;
+    i2s_config.tx_desc_auto_clear = true;
+  }
+
+  err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
+  i2s_pin_config_t tx_pin_config;
+
+#if (ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 3, 0))
+  tx_pin_config.mck_io_num = I2S_PIN_NO_CHANGE;
+#endif
+  tx_pin_config.bck_io_num = CONFIG_I2S_BCK_PIN;
+  tx_pin_config.ws_io_num = CONFIG_I2S_LRCK_PIN;
+  tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
+  tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
+
+  // Serial.println("Init i2s_set_pin");
+  err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
+  // Serial.println("Init i2s_set_clk");
+  err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
+                     I2S_CHANNEL_MONO);
+}
+
+void speaker_play(uint8_t *payload,  uint32_t len){
+    Serial.printf("received %lu bytes", len);
+    size_t bytes_written;
+    InitI2SSpeakerOrMic(MODE_SPK);
+    i2s_write(SPEAKER_I2S_NUMBER, payload, len,
+    &bytes_written, portMAX_DELAY);
+}
+
+void webSocketEvent(WStype_t type, uint8_t * payload, size_t length) {
+  switch (type) {
+    case WStype_DISCONNECTED:
+      Serial.printf("[WSc] Disconnected!\n");
+      break;
+    case WStype_CONNECTED:
+      Serial.printf("[WSc] Connected to url: %s\n", payload);
+
+      // send message to server when Connected
+      break;
+    case WStype_TEXT:
+      Serial.printf("[WSc] get text: %s\n", payload);
+      {
+        std::string str(payload, payload + length);
+        bool isAudio = str.find("\"audio\"") != std::string::npos;
+        if (isAudio && str.find("\"start\"") != std::string::npos) {
+          Serial.println("start playback");
+          speaker_offset = 0;
+          InitI2SSpeakerOrMic(MODE_SPK);
+        } else if (isAudio && str.find("\"end\"") != std::string::npos) {
+          Serial.println("end playback");
+          // speaker_play(speakerdata0, speaker_offset);
+          // speaker_offset = 0;
+        }
+      }
+      // send message to server
+      // webSocket.sendTXT("message here");
+      break;
+    case WStype_BIN:
+      Serial.printf("[WSc] get binary length: %u\n", length);
+      memcpy(speakerdata0 + speaker_offset, payload, length);
+      speaker_offset += length;
+      size_t bytes_written;
+      i2s_write(SPEAKER_I2S_NUMBER, speakerdata0, speaker_offset, &bytes_written, portMAX_DELAY);
+      speaker_offset = 0;
+      
+
+      // send data to server
+      // webSocket.sendBIN(payload, length);
+      break;
+    case WStype_ERROR:
+    case WStype_FRAGMENT_TEXT_START:
+    case WStype_FRAGMENT_BIN_START:
+    case WStype_FRAGMENT:
+    case WStype_FRAGMENT_FIN:
+      break;
+  }
+
+}
+
+void websocket_setup() {
+  Serial.begin(115200);
+  WiFi.begin("Soundview_Guest", "");
+  while (WiFi.status() != WL_CONNECTED){
+    delay(500);
+    Serial.println("connecting to WiFi");
+  }
+  Serial.println("connected to WiFi");
+  webSocket.begin(COMPUTER_IP, 8000, "/");
+  webSocket.onEvent(webSocketEvent);
+  //    webSocket.setAuthorization("user", "Password");
+  webSocket.setReconnectInterval(5000);
+}
+
+void setup() {
+  M5.begin(true, false, true);
+  M5.dis.drawpix(0, CRGB(128, 128, 0));
+  websocket_setup();
+  InitI2SSpeakerOrMic(MODE_SPK);
+
+  delay(2000);
+}
+
+bool recording = false;
+
+void flush_microphone() {
+  Serial.printf("[microphone] flushing %d bytes of data\n", data_offset);
+  if (data_offset == 0) return;
+  webSocket.sendBIN(microphonedata0, data_offset);
+  data_offset = 0;
+}
+
+void loop() {
+  button.loop();
+  if (button.justPressed()) {
+    Serial.println("Recording...");
+    webSocket.sendTXT("{\"role\": \"user\", \"type\": \"audio\", \"format\": \"bytes.raw\", \"start\": true}");
+    InitI2SSpeakerOrMic(MODE_MIC);
+    recording = true;
+    data_offset = 0;
+    Serial.println("Recording ready.");
+  } else if (button.justReleased()) {
+    Serial.println("Stopped recording.");
+    webSocket.sendTXT("{\"role\": \"user\", \"type\": \"audio\", \"format\": \"bytes.raw\", \"end\": true}");
+    flush_microphone();
+    recording = false;
+    data_offset = 0;
+  } else if (recording) {
+    Serial.printf("Reading chunk at %d...\n", data_offset);
+    size_t bytes_read;
+    i2s_read(
+      SPEAKER_I2S_NUMBER,
+      (char *)(microphonedata0 + data_offset),
+      DATA_SIZE, &bytes_read, (100 / portTICK_RATE_MS)
+    );
+    data_offset += bytes_read;
+    Serial.printf("Read %d bytes in chunk.\n", bytes_read);
+
+    if (data_offset > 1024*9) {
+      flush_microphone();
+    }
+  }
+
+  M5.update();
+  webSocket.loop();
+}
--- a/01OS/01OS/clients/esp32/websocket_test.py
+++ b/01OS/01OS/clients/esp32/websocket_test.py
@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+"""A basic echo server for testing the device."""
+
+import asyncio
+import uuid
+import websockets
+from websockets.server import serve
+import traceback
+
+
+def divide_chunks(l, n):
+    # looping till length l
+    for i in range(0, len(l), n):
+        yield l[i : i + n]
+
+
+buffers: dict[uuid.UUID, bytearray] = {}
+
+
+async def echo(websocket: websockets.WebSocketServerProtocol):
+    async for message in websocket:
+        try:
+            if message == "s":
+                print("starting stream for", websocket.id)
+                buffers[websocket.id] = bytearray()
+            elif message == "e":
+                print("end, echoing stream for", websocket.id)
+                await websocket.send("s")
+                for chunk in divide_chunks(buffers[websocket.id], 1000):
+                    await websocket.send(chunk)
+                await websocket.send("e")
+            elif type(message) is bytes:
+                print("recvd", len(message), "bytes from", websocket.id)
+                buffers[websocket.id].extend(message)
+            else:
+                print("ERR: recvd unknown message", message[:10], "from", websocket.id)
+        except Exception as _e:
+            traceback.print_exc()
+
+
+async def main():
+    async with serve(echo, "0.0.0.0", 9001):
+        await asyncio.Future()  # run forever
+
+
+asyncio.run(main())
--- a/01OS/01OS/clients/esp32/wifi_captiveportal/wifi_captiveportal.ino
+++ b/01OS/01OS/clients/esp32/wifi_captiveportal/wifi_captiveportal.ino
@ -0,0 +1,260 @@
+#include <Arduino.h> //not needed in the arduino ide
+
+// Captive Portal
+#include <AsyncTCP.h> //https://github.com/me-no-dev/AsyncTCP using the latest dev version from @me-no-dev
+#include <DNSServer.h>
+#include <ESPAsyncWebServer.h> //https://github.com/me-no-dev/ESPAsyncWebServer using the latest dev version from @me-no-dev
+#include <esp_wifi.h>          //Used for mpdu_rx_disable android workaround
+
+// Pre reading on the fundamentals of captive portals https://textslashplain.com/2022/06/24/captive-portals/
+
+const char *ssid = "captive"; // FYI The SSID can't have a space in it.
+// const char * password = "12345678"; //Atleast 8 chars
+const char *password = NULL; // no password
+
+#define MAX_CLIENTS 4  // ESP32 supports up to 10 but I have not tested it yet
+#define WIFI_CHANNEL 6 // 2.4ghz channel 6 https://en.wikipedia.org/wiki/List_of_WLAN_channels#2.4_GHz_(802.11b/g/n/ax)
+
+const IPAddress localIP(4, 3, 2, 1);          // the IP address the web server, Samsung requires the IP to be in public space
+const IPAddress gatewayIP(4, 3, 2, 1);        // IP address of the network should be the same as the local IP for captive portals
+const IPAddress subnetMask(255, 255, 255, 0); // no need to change: https://avinetworks.com/glossary/subnet-mask/
+
+const String localIPURL = "http://4.3.2.1"; // a string version of the local IP with http, used for redirecting clients to your webpage
+
+String generateHTMLWithSSIDs()
+{
+    String html = "<!DOCTYPE html><html><body><h2>Select Wi-Fi Network</h2><form action='/submit' method='POST'><label for='ssid'>SSID:</label><select id='ssid' name='ssid'>";
+
+    int n = WiFi.scanComplete();
+    for (int i = 0; i < n; ++i)
+    {
+        html += "<option value='" + WiFi.SSID(i) + "'>" + WiFi.SSID(i) + "</option>";
+    }
+
+    html += "</select><br><label for='password'>Password:</label><input type='password' id='password' name='password'><br><input type='submit' value='Connect'></form></body></html>";
+
+    return html;
+}
+
+const char index_html[] PROGMEM = R"=====(
+<!DOCTYPE html>
+<html>
+<head>
+  <title>ESP32 WiFi Setup</title>
+  <style>
+    body {background-color:#06cc13;}
+    h1 {color: white;}
+    h2 {color: white;}
+  </style>
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+</head>
+<body>
+  <h1>WiFi Setup</h1>
+  <form action="/submit" method="post">
+    <label for="ssid">SSID:</label><br>
+    <input type="text" id="ssid" name="ssid"><br>
+    <label for="password">Password:</label><br>
+    <input type="password" id="password" name="password"><br><br>
+    <input type="submit" value="Connect">
+  </form>
+</body>
+</html>
+)=====";
+
+DNSServer dnsServer;
+AsyncWebServer server(80);
+
+void setUpDNSServer(DNSServer &dnsServer, const IPAddress &localIP)
+{
+// Define the DNS interval in milliseconds between processing DNS requests
+#define DNS_INTERVAL 30
+
+    // Set the TTL for DNS response and start the DNS server
+    dnsServer.setTTL(3600);
+    dnsServer.start(53, "*", localIP);
+}
+
+void startSoftAccessPoint(const char *ssid, const char *password, const IPAddress &localIP, const IPAddress &gatewayIP)
+{
+// Define the maximum number of clients that can connect to the server
+#define MAX_CLIENTS 4
+// Define the WiFi channel to be used (channel 6 in this case)
+#define WIFI_CHANNEL 6
+
+    // Set the WiFi mode to access point and station
+    // WiFi.mode(WIFI_MODE_AP);
+
+    // Define the subnet mask for the WiFi network
+    const IPAddress subnetMask(255, 255, 255, 0);
+
+    // Configure the soft access point with a specific IP and subnet mask
+    WiFi.softAPConfig(localIP, gatewayIP, subnetMask);
+
+    // Start the soft access point with the given ssid, password, channel, max number of clients
+    WiFi.softAP(ssid, password, WIFI_CHANNEL, 0, MAX_CLIENTS);
+
+    // Disable AMPDU RX on the ESP32 WiFi to fix a bug on Android
+    esp_wifi_stop();
+    esp_wifi_deinit();
+    wifi_init_config_t my_config = WIFI_INIT_CONFIG_DEFAULT();
+    my_config.ampdu_rx_enable = false;
+    esp_wifi_init(&my_config);
+    esp_wifi_start();
+    vTaskDelay(100 / portTICK_PERIOD_MS); // Add a small delay
+}
+
+void connectToWifi(String ssid, String password)
+{
+    WiFi.begin(ssid.c_str(), password.c_str());
+
+    // Wait for connection to establish
+    int attempts = 0;
+    while (WiFi.status() != WL_CONNECTED && attempts < 20)
+    {
+        delay(1000);
+        Serial.print(".");
+        attempts++;
+    }
+
+    if (WiFi.status() == WL_CONNECTED)
+    {
+        Serial.println("Connected to Wi-Fi");
+    }
+    else
+    {
+        Serial.println("Failed to connect to Wi-Fi. Check credentials.");
+    }
+}
+
+void setUpWebserver(AsyncWebServer &server, const IPAddress &localIP)
+{
+    //======================== Webserver ========================
+    // WARNING IOS (and maybe macos) WILL NOT POP UP IF IT CONTAINS THE WORD "Success" https://www.esp8266.com/viewtopic.php?f=34&t=4398
+    // SAFARI (IOS) IS STUPID, G-ZIPPED FILES CAN'T END IN .GZ https://github.com/homieiot/homie-esp8266/issues/476 this is fixed by the webserver serve static function.
+    // SAFARI (IOS) there is a 128KB limit to the size of the HTML. The HTML can reference external resources/images that bring the total over 128KB
+    // SAFARI (IOS) popup browser has some severe limitations (javascript disabled, cookies disabled)
+
+    // Required
+    server.on("/connecttest.txt", [](AsyncWebServerRequest *request)
+              { request->redirect("http://logout.net"); }); // windows 11 captive portal workaround
+    server.on("/wpad.dat", [](AsyncWebServerRequest *request)
+              { request->send(404); }); // Honestly don't understand what this is but a 404 stops win 10 keep calling this repeatedly and panicking the esp32 :)
+
+    // Background responses: Probably not all are Required, but some are. Others might speed things up?
+    // A Tier (commonly used by modern systems)
+    server.on("/generate_204", [](AsyncWebServerRequest *request)
+              { request->redirect(localIPURL); }); // android captive portal redirect
+    server.on("/redirect", [](AsyncWebServerRequest *request)
+              { request->redirect(localIPURL); }); // microsoft redirect
+    server.on("/hotspot-detect.html", [](AsyncWebServerRequest *request)
+              { request->redirect(localIPURL); }); // apple call home
+    server.on("/canonical.html", [](AsyncWebServerRequest *request)
+              { request->redirect(localIPURL); }); // firefox captive portal call home
+    server.on("/success.txt", [](AsyncWebServerRequest *request)
+              { request->send(200); }); // firefox captive portal call home
+    server.on("/ncsi.txt", [](AsyncWebServerRequest *request)
+              { request->redirect(localIPURL); }); // windows call home
+
+    // B Tier (uncommon)
+    //  server.on("/chrome-variations/seed",[](AsyncWebServerRequest *request){request->send(200);}); //chrome captive portal call home
+    //  server.on("/service/update2/json",[](AsyncWebServerRequest *request){request->send(200);}); //firefox?
+    //  server.on("/chat",[](AsyncWebServerRequest *request){request->send(404);}); //No stop asking Whatsapp, there is no internet connection
+    //  server.on("/startpage",[](AsyncWebServerRequest *request){request->redirect(localIPURL);});
+
+    // return 404 to webpage icon
+    server.on("/favicon.ico", [](AsyncWebServerRequest *request)
+              { request->send(404); }); // webpage icon
+
+    // Serve Basic HTML Page
+    server.on("/", HTTP_ANY, [](AsyncWebServerRequest *request)
+              {
+	String htmlContent = index_html;
+    Serial.printf("wifi scan complete: %d . WIFI_SCAN_RUNNING: %d", WiFi.scanComplete(), WIFI_SCAN_RUNNING);
+    if(WiFi.scanComplete() > 0) {
+      // Scan complete, process results
+      Serial.println("done scanning wifi");
+      htmlContent = generateHTMLWithSSIDs();
+      // WiFi.scanNetworks(true); // Start a new scan in async mode
+    }
+		AsyncWebServerResponse *response = request->beginResponse(200, "text/html", htmlContent);
+		response->addHeader("Cache-Control", "public,max-age=31536000");  // save this file to cache for 1 year (unless you refresh)
+		request->send(response);
+		Serial.println("Served Basic HTML Page"); });
+
+    // the catch all
+    server.onNotFound([](AsyncWebServerRequest *request)
+                      {
+		request->redirect(localIPURL);
+		Serial.print("onnotfound ");
+		Serial.print(request->host());	// This gives some insight into whatever was being requested on the serial monitor
+		Serial.print(" ");
+		Serial.print(request->url());
+		Serial.print(" sent redirect to " + localIPURL + "\n"); });
+
+    server.on("/submit", HTTP_POST, [](AsyncWebServerRequest *request)
+              {
+    String ssid;
+    String password;
+    
+    // Check if SSID parameter exists and assign it
+    if(request->hasParam("ssid", true)) {
+        ssid = request->getParam("ssid", true)->value();
+    }
+
+    // Check if Password parameter exists and assign it
+    if(request->hasParam("password", true)) {
+        password = request->getParam("password", true)->value();
+    }
+
+    // Attempt to connect to the Wi-Fi network with these credentials
+    connectToWifi(ssid, password);
+
+    // Redirect user or send a response back
+    request->send(200, "text/plain", "Attempting to connect to " + ssid); });
+}
+
+void setup()
+{
+    // Set the transmit buffer size for the Serial object and start it with a baud rate of 115200.
+    Serial.setTxBufferSize(1024);
+    Serial.begin(115200);
+
+    // Wait for the Serial object to become available.
+    while (!Serial)
+        ;
+
+    WiFi.mode(WIFI_AP_STA);
+
+    // Print a welcome message to the Serial port.
+    Serial.println("\n\nCaptive Test, V0.5.0 compiled " __DATE__ " " __TIME__ " by CD_FER"); //__DATE__ is provided by the platformio ide
+    Serial.printf("%s-%d\n\r", ESP.getChipModel(), ESP.getChipRevision());
+
+    startSoftAccessPoint(ssid, password, localIP, gatewayIP);
+
+    setUpDNSServer(dnsServer, localIP);
+
+    WiFi.scanNetworks(true);
+
+    setUpWebserver(server, localIP);
+    server.begin();
+
+    Serial.print("\n");
+    Serial.print("Startup Time:"); // should be somewhere between 270-350 for Generic ESP32 (D0WDQ6 chip, can have a higher startup time on first boot)
+    Serial.println(millis());
+    Serial.print("\n");
+}
+
+void loop()
+{ 
+    dnsServer.processNextRequest(); // I call this atleast every 10ms in my other projects (can be higher but I haven't tested it for stability)
+    delay(DNS_INTERVAL);            // seems to help with stability, if you are doing other things in the loop this may not be needed
+
+    // Check WiFi connection status
+    if (WiFi.status() == WL_CONNECTED)
+    {
+        // If connected, you might want to do something, like printing the IP address
+        Serial.println("Connected to WiFi!");
+        Serial.println("IP Address: " + WiFi.localIP().toString());
+        Serial.println("SSID " + WiFi.SSID());
+    }
+}
--- a/01OS/01OS/server/i.py
+++ b/01OS/01OS/server/i.py
@ -47,6 +47,21 @@ def configure_interpreter(interpreter: OpenInterpreter):
    Be very concise. Ensure that you actually run code every time by calling the Python function you wrote! THIS IS IMPORTANT. You NEED to write code. **Help the user by being very concise in your answers.** Do not break down tasks excessively, just into simple, few minute steps. Don't assume the user lives their life in a certain way— pick very general tasks if you're breaking a task down.

    Prefer to use the following functions (assume they're imported) to complete your goals whenever possible:
+
+    ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Keep your responses succint in light of this!
+    IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
+    For example:
+    > User: What is 432/7?
+    > Assistant: Let me use Python to calculate that.
+    > Assistant Python function call:
+    >   # Here's the plan:
+    >   # 1. Divide the numbers
+    >   # 2. Round it to 3 digits.
+    >   print(round(432/7, 3))
+    > Assistant: 432 / 7 is 61.714.
+
+    Use the following functions (assume they're imported) to complete your goals whenever possible:
+
    {{
 import sys

@ -76,7 +91,9 @@ print(output)

    """.strip()

-    interpreter.custom_instructions = system_message
+    # interpreter.custom_instructions = system_message
+    interpreter.system_message = system_message
+    interpreter.llm.supports_functions = True

    ### LLM SETTINGS

--- a/01OS/01OS/server/server.py
+++ b/01OS/01OS/server/server.py
@ -201,7 +201,7 @@ async def listener():

        accumulated_text = ""
        
-        for chunk in interpreter.chat(messages, stream=True, display=False):
+        for chunk in interpreter.chat(messages, stream=True, display=True):

            logger.debug("Got chunk:", chunk)

@ -212,7 +212,7 @@ async def listener():
            
            if os.getenv('TTS_RUNNER') == "server":
                # Speak full sentences out loud
-                if chunk["role"] == "assistant" and "content" in chunk:
+                if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message":
                    accumulated_text += chunk["content"]
                    sentences = split_into_sentences(accumulated_text)
                    
@ -241,7 +241,7 @@ async def listener():
                # Check if it's just an end flag. We ignore those.
                temp_message = await from_user.get()
                
-                if temp_message == {'role': 'user', 'type': 'message', 'end': True}:
+                if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"):
                    # Yup. False alarm.
                    continue
                else:
@ -251,8 +251,9 @@ async def listener():
                with open(conversation_history_path, 'w') as file:
                    json.dump(interpreter.messages, file, indent=4)

-                logger.info("New user message recieved. Breaking.")
-                break
+                # TODO: is triggering seemingly randomly
+                #logger.info("New user message recieved. Breaking.")
+                #break

            # Also check if there's any new computer messages
            if not from_computer.empty():
--- a/01OS/01OS/server/stt/stt.py
+++ b/01OS/01OS/server/stt/stt.py
@ -25,6 +25,8 @@ def convert_mime_type_to_format(mime_type: str) -> str:
        return "wav"
    if mime_type == "audio/webm":
        return "webm"
+    if mime_type == "audio/raw":
+        return "dat"

    return mime_type

@ -43,7 +45,16 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:

    # Export to wav
    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
-    ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
+    print(mime_type, input_path, output_path)
+    if mime_type == "audio/raw":
+        ffmpeg.input(
+            input_path,
+            f='s16le',
+            ar='16000',
+            ac=1,
+        ).output(output_path).run()
+    else:
+        ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()

    try:
        yield output_path
--- a/01OS/01OS/server/teach.py
+++ b/01OS/01OS/server/teach.py
@ -1,9 +1,7 @@
 from datetime import datetime
 from .utils.logs import setup_logging, logger
-import tkinter as tk
-import tkinter.simpledialog
 from interpreter import interpreter
-from tkinter import messagebox
+from tkinter import messagebox, Button, simpledialog, Tk, Label, Frame, LEFT, ACTIVE
 from ..utils.accumulator import Accumulator
 import time
 import os
@ -17,6 +15,39 @@ class Skill:
        self.steps = []
        self.code = ""

+class StepCheckDialog(simpledialog.Dialog):
+    def body(self, master):
+        self.title("Step Check")  # Set the title of the dialog window
+        description = "Did I do this step correctly?"  # Add window description
+        Label(master, text=description).pack()  # Display window description
+
+    def buttonbox(self):
+        box = Frame(self)
+        Button(box, text="Yes", width=10, command=self.yes_action, default=ACTIVE).pack(side=LEFT, padx=5, pady=5)
+        Button(box, text="No", width=10, command=self.no_action).pack(side=LEFT, padx=5, pady=5)
+        Button(box, text="Task Complete", width=10, command=self.task_complete_action).pack(side=LEFT, padx=5, pady=5)
+
+        self.bind("<Return>", self.yes_action)
+        self.bind("<Escape>", self.no_action)
+
+        box.pack()
+
+    def yes_action(self, event=None):
+        self.result = "Yes"
+        self.destroy()
+
+    def no_action(self, event=None):
+        self.result = "No"
+        self.destroy()
+
+    def task_complete_action(self, event=None):
+        self.result = "Task Complete"
+        self.destroy()
+
+    def done(self, result):
+        self.result = result
+        self.destroy()
+
 def to_camel_case(text):
    words = text.split()
    camel_case_string = words[0].lower() + ''.join(word.title() for word in words[1:])
@ -36,34 +67,38 @@ def generate_python_steps(function_name, steps):
    return code_string

 def teach():
-    root = tk.Tk()
+    root = Tk()
    root.withdraw()
-
-    skill_name = tkinter.simpledialog.askstring("Skill Name", "Please enter the name for the skill:")
-    skill = Skill(skill_name)
-    while True:
-        step = tkinter.simpledialog.askstring("Next Step", "Enter the next step (or 'end' to finish): ")
-        logger.info(f"Performing step: {step}")
-        if step == "end":
-            break
-
-        chunk_code = ""
-        interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"]
-        interpreter.force_task_completion = True
-        for chunk in interpreter.chat(step, stream=True, display=False):
-            if "format" in chunk and chunk["format"] == "execution":
-                content = chunk["content"]
-                language = content["format"]
-                code = content["content"]
-                chunk_code += code
-                interpreter.computer.run(code, language)
-            time.sleep(0.05)
-            accumulator.accumulate(chunk)
-
-        isCorrect = messagebox.askyesno("To Proceed?", "Did I do this step right?")
-        if isCorrect:
-            skill.steps.append(step)
-            skill.code += chunk_code
+    skill_name = simpledialog.askstring("Skill Name", "Please enter the name for the skill:", parent=root)
+    if skill_name:
+        skill = Skill(skill_name)
+        while True:
+            step = simpledialog.askstring("Next Step", "Enter the next step (or 'end' to finish): ", parent=root)
+            if step is None or step == "end":
+                break
+            elif step.strip() == "":
+                continue
+            logger.info(f"Performing step: {step}")
+            chunk_code = ""
+            interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"]
+            interpreter.force_task_completion = True
+            for chunk in interpreter.chat(step, stream=True, display=False):
+                if "format" in chunk and chunk["format"] == "execution":
+                    content = chunk["content"]
+                    language = content["format"]
+                    code = content["content"]
+                    chunk_code += code
+                    interpreter.computer.run(code, language)
+                time.sleep(0.05)
+                accumulator.accumulate(chunk)
+            
+            stepCheckDialog = StepCheckDialog(root)
+            stepCheckResult = stepCheckDialog.result
+            if stepCheckResult == "Yes" or stepCheckResult == "Task Complete":
+                skill.steps.append(step)
+                skill.code += chunk_code
+                if stepCheckResult == "Task Complete":
+                    break

    # Uncomment this incase you want steps instead of code
    #python_code = generate_python_steps(skill.skill_name, skill.steps)
@ -71,5 +106,6 @@ def teach():
    python_code = generate_python_code(skill.skill_name, skill.code)
    SKILLS_DIR = os.path.dirname(__file__) + "/skills"
    filename = os.path.join(SKILLS_DIR, f"{skill.skill_name.replace(' ', '_')}.py")
+    logger.info(f"Saving skill to: {filename}")
    with open(filename, "w") as file:
        file.write(python_code)
--- a/01OS/01OS/server/tts/tts.py
+++ b/01OS/01OS/server/tts/tts.py
@ -6,6 +6,7 @@ from pydub import AudioSegment
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.

+import ffmpeg
 import tempfile
 from openai import OpenAI
 import os
@ -28,11 +29,17 @@ def stream_tts(text):
            input=text,
            response_format="opus"
        )
-        with tempfile.NamedTemporaryFile(suffix=".opus") as temp_file:
+        with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
            response.stream_to_file(temp_file.name)

-            audio_bytes = temp_file.read()
-            file_type = "bytes.opus"
+            # TODO: hack to format audio correctly for device
+            outfile = tempfile.gettempdir() + "/" + "raw.dat"
+            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
+            with open(outfile, "rb") as f:
+                audio_bytes = f.read()
+            file_type = "bytes.raw"
+            print(outfile, len(audio_bytes))
+            os.remove(outfile)

    else:
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
--- a/01OS/pyproject.toml
+++ b/01OS/pyproject.toml
@ -4,7 +4,7 @@ packages = [
    {include = "01OS"},
 ]
 include = [".env.example", "start.py", "start.sh"]
-version = "0.0.3"
+version = "0.0.4"
 description = "The open-source language model computer"
 authors = ["Killian <killian@openinterpreter.com>"]
 license = "AGPL"