Merge branch 'main' into hb/prompt_updates

pull/47/head
killian 11 months ago committed by GitHub
commit 5a82976f2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -329,6 +329,6 @@ class Device:
listener.start() listener.start()
def start(self): def start(self):
if os.getenv('TEACH_MODE') == "False": if os.getenv('TEACH_MODE') != "True":
asyncio.run(self.start_async()) asyncio.run(self.start_async())
p.terminate() p.terminate()

@ -1,95 +0,0 @@
/*Press button to record,released button to playback*/
#include <driver/i2s.h>
#include <M5Atom.h>
#define CONFIG_I2S_BCK_PIN 19
#define CONFIG_I2S_LRCK_PIN 33
#define CONFIG_I2S_DATA_PIN 22
#define CONFIG_I2S_DATA_IN_PIN 23
#define SPEAKER_I2S_NUMBER I2S_NUM_0
#define MODE_MIC 0
#define MODE_SPK 1
#define DATA_SIZE 1024
uint8_t microphonedata0[1024 * 70];
int data_offset = 0;
void InitI2SSpeakerOrMic(int mode) {
esp_err_t err = ESP_OK;
i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER),
.sample_rate = 16000,
.bits_per_sample =
I2S_BITS_PER_SAMPLE_16BIT, // is fixed at 12bit, stereo, MSB
.channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
#if ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 1, 0)
.communication_format =
I2S_COMM_FORMAT_STAND_I2S, // Set the format of the communication.
#else // 设置通讯格式
.communication_format = I2S_COMM_FORMAT_I2S,
#endif
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 6,
.dma_buf_len = 60,
};
if (mode == MODE_MIC) {
i2s_config.mode =
(i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
} else {
i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
i2s_config.use_apll = false;
i2s_config.tx_desc_auto_clear = true;
}
err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
i2s_pin_config_t tx_pin_config;
#if (ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 3, 0))
tx_pin_config.mck_io_num = I2S_PIN_NO_CHANGE;
#endif
tx_pin_config.bck_io_num = CONFIG_I2S_BCK_PIN;
tx_pin_config.ws_io_num = CONFIG_I2S_LRCK_PIN;
tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
// Serial.println("Init i2s_set_pin");
err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
// Serial.println("Init i2s_set_clk");
err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
I2S_CHANNEL_MONO);
}
void setup() {
M5.begin(true, false, true);
M5.dis.drawpix(0, CRGB(128, 128, 0));
delay(2000);
}
void loop() {
if (M5.Btn.isPressed()) {
data_offset = 0;
InitI2SSpeakerOrMic(MODE_MIC);
M5.dis.drawpix(0, CRGB(128, 128, 0));
size_t byte_read;
while (1) {
i2s_read(SPEAKER_I2S_NUMBER,
(char *)(microphonedata0 + data_offset), DATA_SIZE,
&byte_read, (100 / portTICK_RATE_MS));
data_offset += 1024;
M5.update();
if (M5.Btn.isReleased() || data_offset >= 71679) break;
// delay(60);
}
size_t bytes_written;
InitI2SSpeakerOrMic(MODE_SPK);
i2s_write(SPEAKER_I2S_NUMBER, microphonedata0, data_offset,
&bytes_written, portMAX_DELAY);
}
M5.update();
}

@ -0,0 +1,243 @@
/*Press button to record,released button to playback*/
#include <driver/i2s.h>
#include <M5Atom.h>
#include <Arduino.h>
#include <WiFi.h>
#include <WiFiMulti.h>
#include <WiFiClientSecure.h>
#include <WebSocketsClient.h>
#define COMPUTER_IP "192.168.68.87"
#define CONFIG_I2S_BCK_PIN 19
#define CONFIG_I2S_LRCK_PIN 33
#define CONFIG_I2S_DATA_PIN 22
#define CONFIG_I2S_DATA_IN_PIN 23
#define SPEAKER_I2S_NUMBER I2S_NUM_0
#define MODE_MIC 0
#define MODE_SPK 1
#define DATA_SIZE 1024
uint8_t microphonedata0[1024 * 10];
uint8_t speakerdata0[1024 * 1];
int speaker_offset = 0;
int data_offset = 0;
WebSocketsClient webSocket;
class ButtonChecker {
public:
void loop() {
lastTickState = thisTickState;
thisTickState = M5.Btn.isPressed() != 0;
}
bool justPressed() {
return thisTickState && !lastTickState;
}
bool justReleased() {
return !thisTickState && lastTickState;
}
private:
bool lastTickState = false;
bool thisTickState = false;
};
ButtonChecker button = ButtonChecker();
void hexdump(const void *mem, uint32_t len, uint8_t cols = 16) {
const uint8_t* src = (const uint8_t*) mem;
Serial.printf("\n[HEXDUMP] Address: 0x%08X len: 0x%X (%d)", (ptrdiff_t)src, len, len);
for (uint32_t i = 0; i < len; i++) {
if (i % cols == 0) {
Serial.printf("\n[0x%08X] 0x%08X: ", (ptrdiff_t)src, i);
}
Serial.printf("%02X ", *src);
src++;
}
Serial.printf("\n");
}
void InitI2SSpeakerOrMic(int mode) {
Serial.printf("InitI2sSpeakerOrMic %d\n", mode);
esp_err_t err = ESP_OK;
i2s_driver_uninstall(SPEAKER_I2S_NUMBER);
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER),
.sample_rate = 16000,
.bits_per_sample =
I2S_BITS_PER_SAMPLE_16BIT, // is fixed at 12bit, stereo, MSB
.channel_format = I2S_CHANNEL_FMT_ALL_RIGHT,
#if ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 1, 0)
.communication_format =
I2S_COMM_FORMAT_STAND_I2S, // Set the format of the communication.
#else // 设置通讯格式
.communication_format = I2S_COMM_FORMAT_I2S,
#endif
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 6,
.dma_buf_len = 60,
};
if (mode == MODE_MIC) {
i2s_config.mode =
(i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM);
} else {
i2s_config.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX);
i2s_config.use_apll = false;
i2s_config.tx_desc_auto_clear = true;
}
err += i2s_driver_install(SPEAKER_I2S_NUMBER, &i2s_config, 0, NULL);
i2s_pin_config_t tx_pin_config;
#if (ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(4, 3, 0))
tx_pin_config.mck_io_num = I2S_PIN_NO_CHANGE;
#endif
tx_pin_config.bck_io_num = CONFIG_I2S_BCK_PIN;
tx_pin_config.ws_io_num = CONFIG_I2S_LRCK_PIN;
tx_pin_config.data_out_num = CONFIG_I2S_DATA_PIN;
tx_pin_config.data_in_num = CONFIG_I2S_DATA_IN_PIN;
// Serial.println("Init i2s_set_pin");
err += i2s_set_pin(SPEAKER_I2S_NUMBER, &tx_pin_config);
// Serial.println("Init i2s_set_clk");
err += i2s_set_clk(SPEAKER_I2S_NUMBER, 16000, I2S_BITS_PER_SAMPLE_16BIT,
I2S_CHANNEL_MONO);
}
void speaker_play(uint8_t *payload, uint32_t len){
Serial.printf("received %lu bytes", len);
size_t bytes_written;
InitI2SSpeakerOrMic(MODE_SPK);
i2s_write(SPEAKER_I2S_NUMBER, payload, len,
&bytes_written, portMAX_DELAY);
}
void webSocketEvent(WStype_t type, uint8_t * payload, size_t length) {
switch (type) {
case WStype_DISCONNECTED:
Serial.printf("[WSc] Disconnected!\n");
break;
case WStype_CONNECTED:
Serial.printf("[WSc] Connected to url: %s\n", payload);
// send message to server when Connected
break;
case WStype_TEXT:
Serial.printf("[WSc] get text: %s\n", payload);
{
std::string str(payload, payload + length);
bool isAudio = str.find("\"audio\"") != std::string::npos;
if (isAudio && str.find("\"start\"") != std::string::npos) {
Serial.println("start playback");
speaker_offset = 0;
InitI2SSpeakerOrMic(MODE_SPK);
} else if (isAudio && str.find("\"end\"") != std::string::npos) {
Serial.println("end playback");
// speaker_play(speakerdata0, speaker_offset);
// speaker_offset = 0;
}
}
// send message to server
// webSocket.sendTXT("message here");
break;
case WStype_BIN:
Serial.printf("[WSc] get binary length: %u\n", length);
memcpy(speakerdata0 + speaker_offset, payload, length);
speaker_offset += length;
size_t bytes_written;
i2s_write(SPEAKER_I2S_NUMBER, speakerdata0, speaker_offset, &bytes_written, portMAX_DELAY);
speaker_offset = 0;
// send data to server
// webSocket.sendBIN(payload, length);
break;
case WStype_ERROR:
case WStype_FRAGMENT_TEXT_START:
case WStype_FRAGMENT_BIN_START:
case WStype_FRAGMENT:
case WStype_FRAGMENT_FIN:
break;
}
}
void websocket_setup() {
Serial.begin(115200);
WiFi.begin("Soundview_Guest", "");
while (WiFi.status() != WL_CONNECTED){
delay(500);
Serial.println("connecting to WiFi");
}
Serial.println("connected to WiFi");
webSocket.begin(COMPUTER_IP, 8000, "/");
webSocket.onEvent(webSocketEvent);
// webSocket.setAuthorization("user", "Password");
webSocket.setReconnectInterval(5000);
}
void setup() {
M5.begin(true, false, true);
M5.dis.drawpix(0, CRGB(128, 128, 0));
websocket_setup();
InitI2SSpeakerOrMic(MODE_SPK);
delay(2000);
}
bool recording = false;
void flush_microphone() {
Serial.printf("[microphone] flushing %d bytes of data\n", data_offset);
if (data_offset == 0) return;
webSocket.sendBIN(microphonedata0, data_offset);
data_offset = 0;
}
void loop() {
button.loop();
if (button.justPressed()) {
Serial.println("Recording...");
webSocket.sendTXT("{\"role\": \"user\", \"type\": \"audio\", \"format\": \"bytes.raw\", \"start\": true}");
InitI2SSpeakerOrMic(MODE_MIC);
recording = true;
data_offset = 0;
Serial.println("Recording ready.");
} else if (button.justReleased()) {
Serial.println("Stopped recording.");
webSocket.sendTXT("{\"role\": \"user\", \"type\": \"audio\", \"format\": \"bytes.raw\", \"end\": true}");
flush_microphone();
recording = false;
data_offset = 0;
} else if (recording) {
Serial.printf("Reading chunk at %d...\n", data_offset);
size_t bytes_read;
i2s_read(
SPEAKER_I2S_NUMBER,
(char *)(microphonedata0 + data_offset),
DATA_SIZE, &bytes_read, (100 / portTICK_RATE_MS)
);
data_offset += bytes_read;
Serial.printf("Read %d bytes in chunk.\n", bytes_read);
if (data_offset > 1024*9) {
flush_microphone();
}
}
M5.update();
webSocket.loop();
}

@ -0,0 +1,47 @@
#!/usr/bin/env python
"""A basic echo server for testing the device."""
import asyncio
import uuid
import websockets
from websockets.server import serve
import traceback
def divide_chunks(l, n):
# looping till length l
for i in range(0, len(l), n):
yield l[i : i + n]
buffers: dict[uuid.UUID, bytearray] = {}
async def echo(websocket: websockets.WebSocketServerProtocol):
async for message in websocket:
try:
if message == "s":
print("starting stream for", websocket.id)
buffers[websocket.id] = bytearray()
elif message == "e":
print("end, echoing stream for", websocket.id)
await websocket.send("s")
for chunk in divide_chunks(buffers[websocket.id], 1000):
await websocket.send(chunk)
await websocket.send("e")
elif type(message) is bytes:
print("recvd", len(message), "bytes from", websocket.id)
buffers[websocket.id].extend(message)
else:
print("ERR: recvd unknown message", message[:10], "from", websocket.id)
except Exception as _e:
traceback.print_exc()
async def main():
async with serve(echo, "0.0.0.0", 9001):
await asyncio.Future() # run forever
asyncio.run(main())

@ -0,0 +1,260 @@
#include <Arduino.h> //not needed in the arduino ide
// Captive Portal
#include <AsyncTCP.h> //https://github.com/me-no-dev/AsyncTCP using the latest dev version from @me-no-dev
#include <DNSServer.h>
#include <ESPAsyncWebServer.h> //https://github.com/me-no-dev/ESPAsyncWebServer using the latest dev version from @me-no-dev
#include <esp_wifi.h> //Used for mpdu_rx_disable android workaround
// Pre reading on the fundamentals of captive portals https://textslashplain.com/2022/06/24/captive-portals/
const char *ssid = "captive"; // FYI The SSID can't have a space in it.
// const char * password = "12345678"; //Atleast 8 chars
const char *password = NULL; // no password
#define MAX_CLIENTS 4 // ESP32 supports up to 10 but I have not tested it yet
#define WIFI_CHANNEL 6 // 2.4ghz channel 6 https://en.wikipedia.org/wiki/List_of_WLAN_channels#2.4_GHz_(802.11b/g/n/ax)
const IPAddress localIP(4, 3, 2, 1); // the IP address the web server, Samsung requires the IP to be in public space
const IPAddress gatewayIP(4, 3, 2, 1); // IP address of the network should be the same as the local IP for captive portals
const IPAddress subnetMask(255, 255, 255, 0); // no need to change: https://avinetworks.com/glossary/subnet-mask/
const String localIPURL = "http://4.3.2.1"; // a string version of the local IP with http, used for redirecting clients to your webpage
String generateHTMLWithSSIDs()
{
String html = "<!DOCTYPE html><html><body><h2>Select Wi-Fi Network</h2><form action='/submit' method='POST'><label for='ssid'>SSID:</label><select id='ssid' name='ssid'>";
int n = WiFi.scanComplete();
for (int i = 0; i < n; ++i)
{
html += "<option value='" + WiFi.SSID(i) + "'>" + WiFi.SSID(i) + "</option>";
}
html += "</select><br><label for='password'>Password:</label><input type='password' id='password' name='password'><br><input type='submit' value='Connect'></form></body></html>";
return html;
}
const char index_html[] PROGMEM = R"=====(
<!DOCTYPE html>
<html>
<head>
<title>ESP32 WiFi Setup</title>
<style>
body {background-color:#06cc13;}
h1 {color: white;}
h2 {color: white;}
</style>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<h1>WiFi Setup</h1>
<form action="/submit" method="post">
<label for="ssid">SSID:</label><br>
<input type="text" id="ssid" name="ssid"><br>
<label for="password">Password:</label><br>
<input type="password" id="password" name="password"><br><br>
<input type="submit" value="Connect">
</form>
</body>
</html>
)=====";
DNSServer dnsServer;
AsyncWebServer server(80);
void setUpDNSServer(DNSServer &dnsServer, const IPAddress &localIP)
{
// Define the DNS interval in milliseconds between processing DNS requests
#define DNS_INTERVAL 30
// Set the TTL for DNS response and start the DNS server
dnsServer.setTTL(3600);
dnsServer.start(53, "*", localIP);
}
void startSoftAccessPoint(const char *ssid, const char *password, const IPAddress &localIP, const IPAddress &gatewayIP)
{
// Define the maximum number of clients that can connect to the server
#define MAX_CLIENTS 4
// Define the WiFi channel to be used (channel 6 in this case)
#define WIFI_CHANNEL 6
// Set the WiFi mode to access point and station
// WiFi.mode(WIFI_MODE_AP);
// Define the subnet mask for the WiFi network
const IPAddress subnetMask(255, 255, 255, 0);
// Configure the soft access point with a specific IP and subnet mask
WiFi.softAPConfig(localIP, gatewayIP, subnetMask);
// Start the soft access point with the given ssid, password, channel, max number of clients
WiFi.softAP(ssid, password, WIFI_CHANNEL, 0, MAX_CLIENTS);
// Disable AMPDU RX on the ESP32 WiFi to fix a bug on Android
esp_wifi_stop();
esp_wifi_deinit();
wifi_init_config_t my_config = WIFI_INIT_CONFIG_DEFAULT();
my_config.ampdu_rx_enable = false;
esp_wifi_init(&my_config);
esp_wifi_start();
vTaskDelay(100 / portTICK_PERIOD_MS); // Add a small delay
}
void connectToWifi(String ssid, String password)
{
WiFi.begin(ssid.c_str(), password.c_str());
// Wait for connection to establish
int attempts = 0;
while (WiFi.status() != WL_CONNECTED && attempts < 20)
{
delay(1000);
Serial.print(".");
attempts++;
}
if (WiFi.status() == WL_CONNECTED)
{
Serial.println("Connected to Wi-Fi");
}
else
{
Serial.println("Failed to connect to Wi-Fi. Check credentials.");
}
}
void setUpWebserver(AsyncWebServer &server, const IPAddress &localIP)
{
//======================== Webserver ========================
// WARNING IOS (and maybe macos) WILL NOT POP UP IF IT CONTAINS THE WORD "Success" https://www.esp8266.com/viewtopic.php?f=34&t=4398
// SAFARI (IOS) IS STUPID, G-ZIPPED FILES CAN'T END IN .GZ https://github.com/homieiot/homie-esp8266/issues/476 this is fixed by the webserver serve static function.
// SAFARI (IOS) there is a 128KB limit to the size of the HTML. The HTML can reference external resources/images that bring the total over 128KB
// SAFARI (IOS) popup browser has some severe limitations (javascript disabled, cookies disabled)
// Required
server.on("/connecttest.txt", [](AsyncWebServerRequest *request)
{ request->redirect("http://logout.net"); }); // windows 11 captive portal workaround
server.on("/wpad.dat", [](AsyncWebServerRequest *request)
{ request->send(404); }); // Honestly don't understand what this is but a 404 stops win 10 keep calling this repeatedly and panicking the esp32 :)
// Background responses: Probably not all are Required, but some are. Others might speed things up?
// A Tier (commonly used by modern systems)
server.on("/generate_204", [](AsyncWebServerRequest *request)
{ request->redirect(localIPURL); }); // android captive portal redirect
server.on("/redirect", [](AsyncWebServerRequest *request)
{ request->redirect(localIPURL); }); // microsoft redirect
server.on("/hotspot-detect.html", [](AsyncWebServerRequest *request)
{ request->redirect(localIPURL); }); // apple call home
server.on("/canonical.html", [](AsyncWebServerRequest *request)
{ request->redirect(localIPURL); }); // firefox captive portal call home
server.on("/success.txt", [](AsyncWebServerRequest *request)
{ request->send(200); }); // firefox captive portal call home
server.on("/ncsi.txt", [](AsyncWebServerRequest *request)
{ request->redirect(localIPURL); }); // windows call home
// B Tier (uncommon)
// server.on("/chrome-variations/seed",[](AsyncWebServerRequest *request){request->send(200);}); //chrome captive portal call home
// server.on("/service/update2/json",[](AsyncWebServerRequest *request){request->send(200);}); //firefox?
// server.on("/chat",[](AsyncWebServerRequest *request){request->send(404);}); //No stop asking Whatsapp, there is no internet connection
// server.on("/startpage",[](AsyncWebServerRequest *request){request->redirect(localIPURL);});
// return 404 to webpage icon
server.on("/favicon.ico", [](AsyncWebServerRequest *request)
{ request->send(404); }); // webpage icon
// Serve Basic HTML Page
server.on("/", HTTP_ANY, [](AsyncWebServerRequest *request)
{
String htmlContent = index_html;
Serial.printf("wifi scan complete: %d . WIFI_SCAN_RUNNING: %d", WiFi.scanComplete(), WIFI_SCAN_RUNNING);
if(WiFi.scanComplete() > 0) {
// Scan complete, process results
Serial.println("done scanning wifi");
htmlContent = generateHTMLWithSSIDs();
// WiFi.scanNetworks(true); // Start a new scan in async mode
}
AsyncWebServerResponse *response = request->beginResponse(200, "text/html", htmlContent);
response->addHeader("Cache-Control", "public,max-age=31536000"); // save this file to cache for 1 year (unless you refresh)
request->send(response);
Serial.println("Served Basic HTML Page"); });
// the catch all
server.onNotFound([](AsyncWebServerRequest *request)
{
request->redirect(localIPURL);
Serial.print("onnotfound ");
Serial.print(request->host()); // This gives some insight into whatever was being requested on the serial monitor
Serial.print(" ");
Serial.print(request->url());
Serial.print(" sent redirect to " + localIPURL + "\n"); });
server.on("/submit", HTTP_POST, [](AsyncWebServerRequest *request)
{
String ssid;
String password;
// Check if SSID parameter exists and assign it
if(request->hasParam("ssid", true)) {
ssid = request->getParam("ssid", true)->value();
}
// Check if Password parameter exists and assign it
if(request->hasParam("password", true)) {
password = request->getParam("password", true)->value();
}
// Attempt to connect to the Wi-Fi network with these credentials
connectToWifi(ssid, password);
// Redirect user or send a response back
request->send(200, "text/plain", "Attempting to connect to " + ssid); });
}
void setup()
{
// Set the transmit buffer size for the Serial object and start it with a baud rate of 115200.
Serial.setTxBufferSize(1024);
Serial.begin(115200);
// Wait for the Serial object to become available.
while (!Serial)
;
WiFi.mode(WIFI_AP_STA);
// Print a welcome message to the Serial port.
Serial.println("\n\nCaptive Test, V0.5.0 compiled " __DATE__ " " __TIME__ " by CD_FER"); //__DATE__ is provided by the platformio ide
Serial.printf("%s-%d\n\r", ESP.getChipModel(), ESP.getChipRevision());
startSoftAccessPoint(ssid, password, localIP, gatewayIP);
setUpDNSServer(dnsServer, localIP);
WiFi.scanNetworks(true);
setUpWebserver(server, localIP);
server.begin();
Serial.print("\n");
Serial.print("Startup Time:"); // should be somewhere between 270-350 for Generic ESP32 (D0WDQ6 chip, can have a higher startup time on first boot)
Serial.println(millis());
Serial.print("\n");
}
void loop()
{
dnsServer.processNextRequest(); // I call this atleast every 10ms in my other projects (can be higher but I haven't tested it for stability)
delay(DNS_INTERVAL); // seems to help with stability, if you are doing other things in the loop this may not be needed
// Check WiFi connection status
if (WiFi.status() == WL_CONNECTED)
{
// If connected, you might want to do something, like printing the IP address
Serial.println("Connected to WiFi!");
Serial.println("IP Address: " + WiFi.localIP().toString());
Serial.println("SSID " + WiFi.SSID());
}
}

@ -47,6 +47,21 @@ def configure_interpreter(interpreter: OpenInterpreter):
Be very concise. Ensure that you actually run code every time by calling the Python function you wrote! THIS IS IMPORTANT. You NEED to write code. **Help the user by being very concise in your answers.** Do not break down tasks excessively, just into simple, few minute steps. Don't assume the user lives their life in a certain way— pick very general tasks if you're breaking a task down. Be very concise. Ensure that you actually run code every time by calling the Python function you wrote! THIS IS IMPORTANT. You NEED to write code. **Help the user by being very concise in your answers.** Do not break down tasks excessively, just into simple, few minute steps. Don't assume the user lives their life in a certain way— pick very general tasks if you're breaking a task down.
Prefer to use the following functions (assume they're imported) to complete your goals whenever possible: Prefer to use the following functions (assume they're imported) to complete your goals whenever possible:
ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Keep your responses succint in light of this!
IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
For example:
> User: What is 432/7?
> Assistant: Let me use Python to calculate that.
> Assistant Python function call:
> # Here's the plan:
> # 1. Divide the numbers
> # 2. Round it to 3 digits.
> print(round(432/7, 3))
> Assistant: 432 / 7 is 61.714.
Use the following functions (assume they're imported) to complete your goals whenever possible:
{{ {{
import sys import sys
@ -76,7 +91,9 @@ print(output)
""".strip() """.strip()
interpreter.custom_instructions = system_message # interpreter.custom_instructions = system_message
interpreter.system_message = system_message
interpreter.llm.supports_functions = True
### LLM SETTINGS ### LLM SETTINGS

@ -201,7 +201,7 @@ async def listener():
accumulated_text = "" accumulated_text = ""
for chunk in interpreter.chat(messages, stream=True, display=False): for chunk in interpreter.chat(messages, stream=True, display=True):
logger.debug("Got chunk:", chunk) logger.debug("Got chunk:", chunk)
@ -212,7 +212,7 @@ async def listener():
if os.getenv('TTS_RUNNER') == "server": if os.getenv('TTS_RUNNER') == "server":
# Speak full sentences out loud # Speak full sentences out loud
if chunk["role"] == "assistant" and "content" in chunk: if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message":
accumulated_text += chunk["content"] accumulated_text += chunk["content"]
sentences = split_into_sentences(accumulated_text) sentences = split_into_sentences(accumulated_text)
@ -241,7 +241,7 @@ async def listener():
# Check if it's just an end flag. We ignore those. # Check if it's just an end flag. We ignore those.
temp_message = await from_user.get() temp_message = await from_user.get()
if temp_message == {'role': 'user', 'type': 'message', 'end': True}: if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"):
# Yup. False alarm. # Yup. False alarm.
continue continue
else: else:
@ -251,8 +251,9 @@ async def listener():
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4) json.dump(interpreter.messages, file, indent=4)
logger.info("New user message recieved. Breaking.") # TODO: is triggering seemingly randomly
break #logger.info("New user message recieved. Breaking.")
#break
# Also check if there's any new computer messages # Also check if there's any new computer messages
if not from_computer.empty(): if not from_computer.empty():

@ -25,6 +25,8 @@ def convert_mime_type_to_format(mime_type: str) -> str:
return "wav" return "wav"
if mime_type == "audio/webm": if mime_type == "audio/webm":
return "webm" return "webm"
if mime_type == "audio/raw":
return "dat"
return mime_type return mime_type
@ -43,6 +45,15 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
# Export to wav # Export to wav
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
print(mime_type, input_path, output_path)
if mime_type == "audio/raw":
ffmpeg.input(
input_path,
f='s16le',
ar='16000',
ac=1,
).output(output_path).run()
else:
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try: try:

@ -1,9 +1,7 @@
from datetime import datetime from datetime import datetime
from .utils.logs import setup_logging, logger from .utils.logs import setup_logging, logger
import tkinter as tk
import tkinter.simpledialog
from interpreter import interpreter from interpreter import interpreter
from tkinter import messagebox from tkinter import messagebox, Button, simpledialog, Tk, Label, Frame, LEFT, ACTIVE
from ..utils.accumulator import Accumulator from ..utils.accumulator import Accumulator
import time import time
import os import os
@ -17,6 +15,39 @@ class Skill:
self.steps = [] self.steps = []
self.code = "" self.code = ""
class StepCheckDialog(simpledialog.Dialog):
def body(self, master):
self.title("Step Check") # Set the title of the dialog window
description = "Did I do this step correctly?" # Add window description
Label(master, text=description).pack() # Display window description
def buttonbox(self):
box = Frame(self)
Button(box, text="Yes", width=10, command=self.yes_action, default=ACTIVE).pack(side=LEFT, padx=5, pady=5)
Button(box, text="No", width=10, command=self.no_action).pack(side=LEFT, padx=5, pady=5)
Button(box, text="Task Complete", width=10, command=self.task_complete_action).pack(side=LEFT, padx=5, pady=5)
self.bind("<Return>", self.yes_action)
self.bind("<Escape>", self.no_action)
box.pack()
def yes_action(self, event=None):
self.result = "Yes"
self.destroy()
def no_action(self, event=None):
self.result = "No"
self.destroy()
def task_complete_action(self, event=None):
self.result = "Task Complete"
self.destroy()
def done(self, result):
self.result = result
self.destroy()
def to_camel_case(text): def to_camel_case(text):
words = text.split() words = text.split()
camel_case_string = words[0].lower() + ''.join(word.title() for word in words[1:]) camel_case_string = words[0].lower() + ''.join(word.title() for word in words[1:])
@ -36,17 +67,18 @@ def generate_python_steps(function_name, steps):
return code_string return code_string
def teach(): def teach():
root = tk.Tk() root = Tk()
root.withdraw() root.withdraw()
skill_name = simpledialog.askstring("Skill Name", "Please enter the name for the skill:", parent=root)
skill_name = tkinter.simpledialog.askstring("Skill Name", "Please enter the name for the skill:") if skill_name:
skill = Skill(skill_name) skill = Skill(skill_name)
while True: while True:
step = tkinter.simpledialog.askstring("Next Step", "Enter the next step (or 'end' to finish): ") step = simpledialog.askstring("Next Step", "Enter the next step (or 'end' to finish): ", parent=root)
logger.info(f"Performing step: {step}") if step is None or step == "end":
if step == "end":
break break
elif step.strip() == "":
continue
logger.info(f"Performing step: {step}")
chunk_code = "" chunk_code = ""
interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"] interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"]
interpreter.force_task_completion = True interpreter.force_task_completion = True
@ -60,10 +92,13 @@ def teach():
time.sleep(0.05) time.sleep(0.05)
accumulator.accumulate(chunk) accumulator.accumulate(chunk)
isCorrect = messagebox.askyesno("To Proceed?", "Did I do this step right?") stepCheckDialog = StepCheckDialog(root)
if isCorrect: stepCheckResult = stepCheckDialog.result
if stepCheckResult == "Yes" or stepCheckResult == "Task Complete":
skill.steps.append(step) skill.steps.append(step)
skill.code += chunk_code skill.code += chunk_code
if stepCheckResult == "Task Complete":
break
# Uncomment this incase you want steps instead of code # Uncomment this incase you want steps instead of code
#python_code = generate_python_steps(skill.skill_name, skill.steps) #python_code = generate_python_steps(skill.skill_name, skill.steps)
@ -71,5 +106,6 @@ def teach():
python_code = generate_python_code(skill.skill_name, skill.code) python_code = generate_python_code(skill.skill_name, skill.code)
SKILLS_DIR = os.path.dirname(__file__) + "/skills" SKILLS_DIR = os.path.dirname(__file__) + "/skills"
filename = os.path.join(SKILLS_DIR, f"{skill.skill_name.replace(' ', '_')}.py") filename = os.path.join(SKILLS_DIR, f"{skill.skill_name.replace(' ', '_')}.py")
logger.info(f"Saving skill to: {filename}")
with open(filename, "w") as file: with open(filename, "w") as file:
file.write(python_code) file.write(python_code)

@ -6,6 +6,7 @@ from pydub import AudioSegment
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() # take environment variables from .env. load_dotenv() # take environment variables from .env.
import ffmpeg
import tempfile import tempfile
from openai import OpenAI from openai import OpenAI
import os import os
@ -28,11 +29,17 @@ def stream_tts(text):
input=text, input=text,
response_format="opus" response_format="opus"
) )
with tempfile.NamedTemporaryFile(suffix=".opus") as temp_file: with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
response.stream_to_file(temp_file.name) response.stream_to_file(temp_file.name)
audio_bytes = temp_file.read() # TODO: hack to format audio correctly for device
file_type = "bytes.opus" outfile = tempfile.gettempdir() + "/" + "raw.dat"
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
with open(outfile, "rb") as f:
audio_bytes = f.read()
file_type = "bytes.raw"
print(outfile, len(audio_bytes))
os.remove(outfile)
else: else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:

@ -4,7 +4,7 @@ packages = [
{include = "01OS"}, {include = "01OS"},
] ]
include = [".env.example", "start.py", "start.sh"] include = [".env.example", "start.py", "start.sh"]
version = "0.0.3" version = "0.0.4"
description = "The open-source language model computer" description = "The open-source language model computer"
authors = ["Killian <killian@openinterpreter.com>"] authors = ["Killian <killian@openinterpreter.com>"]
license = "AGPL" license = "AGPL"

Loading…
Cancel
Save