Merge branch 'main' into fix/windows-tts-stt

pull/173/head
Ty Fiero 10 months ago committed by GitHub
commit 5168219e76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -12,7 +12,7 @@ That should be compatible with other popular systems. For example, I think [LMC
We need to pick a niche. I wonder if startups is the right niche for us (use the 01 to build and sell your own Tab, Pin, Rabbit, etc) or if we should go with education, as the Raspberry Pi and Arduino did. Computer science departments is where UNIX took hold.
4. **Be afforable.**
4. **Be affordable.**
The most hackable, most basic 01 MUST be under $100, ideally under $70.

@ -55,7 +55,7 @@
- [ ] Can tapping the mic twice = trigger pressing the "button"? Simple sensing, just based on volume spikes?
- [ ] Update Architecture
- [ ] Base Devise Class
- [ ] Seperate folders for Rasberry Pi, Desktop, Droid, App, Web
- [ ] Separate folders for Rasberry Pi, Desktop, Droid, App, Web
- [ ] device.py for each folder has input logic for that device
- [ ] Add basic TUI to device.py. Just renders messages and lets you add messages. Can easily copy OI's TUI.
- [ ] index.html for each folder has user interface for that device

@ -5,7 +5,7 @@
<br>
<br>
<strong>The open-source language model computer.(开源大语言模型计算机)</strong><br>
<!-- <br><a href="https://openinterpreter.com">Preorder the Light</a> | <a href="https://openinterpreter.com">Get Updates</a> | <a href="https://docs.openinterpreter.com/">Documentation</a><br> -->
<br><a href="https://openinterpreter.com/01">预订 Light</a> | <a href="https://changes.openinterpreter.com">获取更新‎</a> | <a href="https://01.openinterpreter.com/">文档</a><br>
</p>
<br>

@ -668,7 +668,7 @@ void websocket_setup(String server_domain, int port)
return;
}
Serial.println("connected to WiFi");
webSocket.begin(server_domain, 80, "/");
webSocket.begin(server_domain, port, "/");
webSocket.onEvent(webSocketEvent);
// webSocket.setAuthorization("user", "Password");
webSocket.setReconnectInterval(5000);
@ -798,4 +798,4 @@ void loop()
M5.update();
webSocket.loop();
}
}
}

@ -12,6 +12,8 @@ import subprocess
import os
import subprocess
import platform
import urllib.request
class Stt:
@ -23,7 +25,6 @@ class Stt:
return stt(self.service_directory, audio_file_path)
def install(service_dir):
### INSTALL
@ -42,27 +43,31 @@ def install(service_dir):
# Check if whisper-rust executable exists before attempting to build
if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
# Check if Rust is installed. Needed to build whisper executable
rustc_path = shutil.which("rustc")
if rustc_path is None:
print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
exit(1)
# Build Whisper Rust executable if not found
subprocess.call('cargo build --release', shell=True)
subprocess.run(['cargo', 'build', '--release'], check=True)
else:
print("Whisper Rust executable already exists. Skipping build.")
WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
urllib.request.urlretrieve(f"{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}",
os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME))
else:
print("Whisper model already exists. Skipping download.")
def convert_mime_type_to_format(mime_type: str) -> str:
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
return "wav"
@ -73,6 +78,7 @@ def convert_mime_type_to_format(mime_type: str) -> str:
return mime_type
@contextlib.contextmanager
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
temp_dir = tempfile.gettempdir()
@ -105,10 +111,12 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
os.remove(input_path)
os.remove(output_path)
def run_command(command):
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return result.stdout, result.stderr
def get_transcription_file(service_directory, wav_file_path: str):
local_path = os.path.join(service_directory, 'model')
whisper_rust_path = os.path.join(service_directory, 'whisper-rust', 'target', 'release')
@ -124,14 +132,15 @@ def get_transcription_file(service_directory, wav_file_path: str):
def stt_wav(service_directory, wav_file_path: str):
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try:
transcript = get_transcription_file(service_directory, output_path)
finally:
os.remove(output_path)
return transcript
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
try:
transcript = get_transcription_file(service_directory, output_path)
finally:
os.remove(output_path)
return transcript
def stt(service_directory, input_data):
return stt_wav(service_directory, input_data)
return stt_wav(service_directory, input_data)

@ -59,6 +59,7 @@ def check_filtered_kernel():
return "\n".join(filtered_messages)
async def put_kernel_messages_into_queue(queue):
while True:
text = check_filtered_kernel()

Loading…
Cancel
Save