Merge branch 'main' into fix/windows-tts-stt

2 years ago · 5168219e76
parent 547c4afb57 0fa5241b60
commit 5168219e76
6 changed files with 30 additions and 20 deletions
--- a/CONTEXT.md
+++ b/CONTEXT.md
@ -12,7 +12,7 @@ That should be compatible with other popular systems. For example, I think [LMC

 We need to pick a niche. I wonder if startups is the right niche for us (use the 01 to build and sell your own Tab, Pin, Rabbit, etc) or if we should go with education, as the Raspberry Pi and Arduino did. Computer science departments is where UNIX took hold.

-4. **Be afforable.**
+4. **Be affordable.**

 The most hackable, most basic 01 MUST be under $100, ideally under $70.

--- a/TASKS.md
+++ b/TASKS.md
@ -55,7 +55,7 @@
 - [ ] Can tapping the mic twice = trigger pressing the "button"? Simple sensing, just based on volume spikes?
 - [ ] Update Architecture
  - [ ] Base Devise Class
-  - [ ] Seperate folders for Rasberry Pi, Desktop, Droid, App, Web
+  - [ ] Separate folders for Rasberry Pi, Desktop, Droid, App, Web
  - [ ] device.py for each folder has input logic for that device
    - [ ] Add basic TUI to device.py. Just renders messages and lets you add messages. Can easily copy OI's TUI.
  - [ ] index.html for each folder has user interface for that device
--- a/docs/README_CN.md
+++ b/docs/README_CN.md
@ -5,7 +5,7 @@
    <br>
    <br>
    <strong>The open-source language model computer.（开源大语言模型计算机）</strong><br>
-    <!-- <br><a href="https://openinterpreter.com">Preorder the Light</a>‎ ‎ |‎ ‎ <a href="https://openinterpreter.com">Get Updates</a>‎ ‎ |‎ ‎ <a href="https://docs.openinterpreter.com/">Documentation</a><br> -->
+    <br><a href="https://openinterpreter.com/01">预订 Light‎</a>‎ ‎ |‎ ‎ <a href="https://changes.openinterpreter.com">获取更新‎</a>‎ ‎ |‎ ‎ <a href="https://01.openinterpreter.com/">文档</a><br>
 </p>

 <br>
--- a/software/source/clients/esp32/src/client/client.ino
+++ b/software/source/clients/esp32/src/client/client.ino
@ -668,7 +668,7 @@ void websocket_setup(String server_domain, int port)
        return;
    }
    Serial.println("connected to WiFi");
-    webSocket.begin(server_domain, 80, "/");
+    webSocket.begin(server_domain, port, "/");
    webSocket.onEvent(webSocketEvent);
    // webSocket.setAuthorization("user", "Password");
    webSocket.setReconnectInterval(5000);
@ -798,4 +798,4 @@ void loop()
        M5.update();
        webSocket.loop();
    }
-}
+}
--- a/software/source/server/services/stt/local-whisper/stt.py
+++ b/software/source/server/services/stt/local-whisper/stt.py
@ -12,6 +12,8 @@ import subprocess

 import os
 import subprocess
+import platform
+import urllib.request


 class Stt:
@ -23,7 +25,6 @@ class Stt:
        return stt(self.service_directory, audio_file_path)


-
 def install(service_dir):

    ### INSTALL
@ -42,27 +43,31 @@ def install(service_dir):
    # Check if whisper-rust executable exists before attempting to build
    if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
        # Check if Rust is installed. Needed to build whisper executable
+        
        rustc_path = shutil.which("rustc")
+        
        if rustc_path is None:
            print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
            exit(1)
-        
+
        # Build Whisper Rust executable if not found
-        subprocess.call('cargo build --release', shell=True)
+        subprocess.run(['cargo', 'build', '--release'], check=True)
    else:
        print("Whisper Rust executable already exists. Skipping build.")

    WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
-    
+
    WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
    WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
-    
+
    if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
        os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
-        subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
+        urllib.request.urlretrieve(f"{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}",
+                                   os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME))
    else:
        print("Whisper model already exists. Skipping download.")

+
 def convert_mime_type_to_format(mime_type: str) -> str:
    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
        return "wav"
@ -73,6 +78,7 @@ def convert_mime_type_to_format(mime_type: str) -> str:

    return mime_type

+
@contextlib.contextmanager
 def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    temp_dir = tempfile.gettempdir()
@ -105,10 +111,12 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
        os.remove(input_path)
        os.remove(output_path)

+
 def run_command(command):
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    return result.stdout, result.stderr

+
 def get_transcription_file(service_directory, wav_file_path: str):
    local_path = os.path.join(service_directory, 'model')
    whisper_rust_path = os.path.join(service_directory, 'whisper-rust', 'target', 'release')
@ -124,14 +132,15 @@ def get_transcription_file(service_directory, wav_file_path: str):


 def stt_wav(service_directory, wav_file_path: str):
-        temp_dir = tempfile.gettempdir()
-        output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
-        ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
-        try:
-            transcript = get_transcription_file(service_directory, output_path)
-        finally:
-            os.remove(output_path)
-        return transcript
+    temp_dir = tempfile.gettempdir()
+    output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
+    try:
+        transcript = get_transcription_file(service_directory, output_path)
+    finally:
+        os.remove(output_path)
+    return transcript
+

 def stt(service_directory, input_data):
-    return stt_wav(service_directory, input_data)
+    return stt_wav(service_directory, input_data)
--- a/software/source/server/utils/kernel.py
+++ b/software/source/server/utils/kernel.py
@ -59,6 +59,7 @@ def check_filtered_kernel():
    
    return "\n".join(filtered_messages)

+
 async def put_kernel_messages_into_queue(queue):
    while True:
        text = check_filtered_kernel()