From 6f84f5a6867db25cca7458362cc7d5642faceaeb Mon Sep 17 00:00:00 2001
From: Shiven Mian <shivenmian97@gmail.com>
Date: Sat, 3 Feb 2024 17:52:13 -0800
Subject: [PATCH 1/5] fix: commented out rpio requirement

---
 OS/01/requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/OS/01/requirements.txt b/OS/01/requirements.txt
index daef207..83d21e0 100644
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@@ -2,4 +2,6 @@ git+https://github.com/KillianLucas/open-interpreter.git
 redis
 fastapi
 uvicorn
-RPi.GPIO
\ No newline at end of file
+websockets
+python-dotenv
+ffmpeg-python
\ No newline at end of file

From a3de4c1286a2a66ab736ed72645d177103236407 Mon Sep 17 00:00:00 2001
From: Shiven Mian <shivenmian97@gmail.com>
Date: Sat, 3 Feb 2024 17:52:26 -0800
Subject: [PATCH 2/5] fix: add gitignore

---
 OS/01/.gitignore | 160 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 OS/01/.gitignore

diff --git a/OS/01/.gitignore b/OS/01/.gitignore
new file mode 100644
index 0000000..6769e21
--- /dev/null
+++ b/OS/01/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file

From f749cb878edb9244b84b8784c38e6c3aaf7f79f5 Mon Sep 17 00:00:00 2001
From: Shiven Mian <shivenmian97@gmail.com>
Date: Sat, 3 Feb 2024 18:56:06 -0800
Subject: [PATCH 3/5] feat: added Whisper stt

---
 OS/01/assistant/listen.py |  52 ++++++++++++++
 OS/01/assistant/stt.py    |  52 ++++++++++++++
 OS/01/requirements.txt    |   3 +
 OS/01/user/record.py      | 141 ++++++++++++++++++++++++++++++++++++++
 OS/01/user/user.py        |  13 ----
 5 files changed, 248 insertions(+), 13 deletions(-)
 create mode 100644 OS/01/assistant/listen.py
 create mode 100644 OS/01/user/record.py
 delete mode 100644 OS/01/user/user.py

diff --git a/OS/01/assistant/listen.py b/OS/01/assistant/listen.py
new file mode 100644
index 0000000..948ef9a
--- /dev/null
+++ b/OS/01/assistant/listen.py
@@ -0,0 +1,52 @@
+from fastapi import FastAPI, WebSocket
+import uvicorn
+import json
+from stt import stt
+import tempfile
+
+app = FastAPI()
+
+@app.websocket("/user")
+async def user(ws: WebSocket):
+    await ws.accept()
+    audio_file = bytearray()
+    mime_type = None
+
+    try:
+        while True:
+            message = await ws.receive()
+
+            if message['type'] == 'websocket.disconnect':
+                break
+
+            if message['type'] == 'websocket.receive':
+                if 'text' in message:
+                    control_message = json.loads(message['text'])
+                    if control_message.get('action') == 'command' and control_message.get('state') == 'start' and 'mimeType' in control_message:
+                        # This indicates the start of a new audio file
+                        mime_type = control_message.get('mimeType')
+                    elif control_message.get('action') == 'command' and control_message.get('state') == 'end':
+                        # This indicates the end of the audio file
+                        # Process the complete audio file here
+                        transcription = stt(audio_file, mime_type)
+                        await ws.send_json({"transcript": transcription})
+                        
+                        print("SENT TRANSCRIPTION!")
+
+                        # Reset the bytearray for the next audio file
+                        audio_file = bytearray()
+                        mime_type = None
+                elif 'bytes' in message:
+                    # If it's not a control message, it's part of the audio file
+                    audio_file.extend(message['bytes'])
+                    
+    except Exception as e:
+        print(f"WebSocket connection closed with exception: {e}")
+    finally:
+        await ws.close()
+        print("WebSocket connection closed")
+
+
+if __name__ == "__main__":
+    with tempfile.TemporaryDirectory():
+        uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/OS/01/assistant/stt.py b/OS/01/assistant/stt.py
index e69de29..d52f260 100644
--- a/OS/01/assistant/stt.py
+++ b/OS/01/assistant/stt.py
@@ -0,0 +1,52 @@
+from datetime import datetime
+import os
+import contextlib
+import tempfile
+import ffmpeg
+import subprocess
+
+from openai import OpenAI
+client = OpenAI()
+
+def convert_mime_type_to_format(mime_type: str) -> str:
+    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
+        return "wav"
+    if mime_type == "audio/webm":
+        return "webm"
+
+    return mime_type
+
+@contextlib.contextmanager
+def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
+    temp_dir = tempfile.gettempdir()
+
+    # Create a temporary file with the appropriate extension
+    input_ext = convert_mime_type_to_format(mime_type)
+    input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
+    with open(input_path, 'wb') as f:
+        f.write(audio)
+
+    # Export to wav
+    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
+
+    print(f"Temporary file path: {output_path}")
+
+    try:
+        yield output_path
+    finally:
+        os.remove(input_path)
+        os.remove(output_path)
+
+
+def stt(audio_bytes: bytearray, mime_type):
+    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
+        audio_file = open(wav_file_path, "rb")
+        transcript = client.audio.transcriptions.create(
+            model="whisper-1", 
+            file=audio_file,
+            response_format="text"
+        )
+
+        print("Exciting transcription result:", transcript)
+        return transcript
diff --git a/OS/01/requirements.txt b/OS/01/requirements.txt
index 83d21e0..4500632 100644
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@@ -1,4 +1,7 @@
 git+https://github.com/KillianLucas/open-interpreter.git
+asyncio
+pyaudio
+pynput
 redis
 fastapi
 uvicorn
diff --git a/OS/01/user/record.py b/OS/01/user/record.py
new file mode 100644
index 0000000..f376e49
--- /dev/null
+++ b/OS/01/user/record.py
@@ -0,0 +1,141 @@
+"""
+Handles everything the user interacts through.
+
+Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
+
+For now, just handles a spacebar being pressed— for the duration it's pressed,
+it should record audio.
+
+SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
+sends it to /user in LMC format (role: user, etc)
+
+MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
+"""
+
+import os
+import pyaudio
+import threading
+import asyncio
+import websockets
+import json
+from pynput import keyboard
+import wave
+import tempfile
+from datetime import datetime
+
+# Configuration
+chunk = 1024  # Record in chunks of 1024 samples
+sample_format = pyaudio.paInt16  # 16 bits per sample
+channels = 1  # Stereo
+fs = 48000 # Sample rate
+
+p = pyaudio.PyAudio()  # Create an interface to PortAudio
+frames = []  # Initialize array to store frames
+recording = False  # Flag to control recording state
+
+ws_chunk_size = 4096 # Websocket stream chunk size
+
+async def start_recording():
+    global recording
+
+    if recording:
+        return  # Avoid multiple starts
+    recording = True
+    frames.clear()  # Clear existing frames
+
+    stream = p.open(format=sample_format,
+                    channels=channels,
+                    rate=fs,
+                    frames_per_buffer=chunk,
+                    input=True)
+
+    print("Recording started...")
+    async with websockets.connect("ws://localhost:8000/user") as websocket:
+        # Send the start command with mime type
+        await websocket.send(json.dumps({"action": "command", "state": "start", "mimeType": "audio/wav"}))
+        while recording:
+            data = stream.read(chunk)
+            frames.append(data)
+
+        stream.stop_stream()
+        stream.close()
+
+        try:
+            file_path = save_recording(frames)
+            with open(file_path, 'rb') as audio_file:
+                byte_chunk = audio_file.read(ws_chunk_size)
+                while byte_chunk:
+                    await websocket.send(byte_chunk)
+                    byte_chunk = audio_file.read(ws_chunk_size)
+        finally:
+            os.remove(file_path)
+
+        # Send the end command
+        await websocket.send(json.dumps({"action": "command", "state": "end"}))
+
+        # Receive a json message and then close the connection
+        message = await websocket.recv()
+        print("Received message:", json.loads(message))
+
+    print("Recording stopped.")
+
+def save_recording(frames) -> str:
+    # Save the recorded data as a WAV file
+    temp_dir = tempfile.gettempdir()
+
+    # Create a temporary file with the appropriate extension
+    output_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    with wave.open(output_path, 'wb') as wf:
+        wf.setnchannels(channels)
+        wf.setsampwidth(p.get_sample_size(sample_format))
+        wf.setframerate(fs)
+        wf.writeframes(b''.join(frames))
+
+    return output_path
+
+def start_recording_sync():
+    # Create a new event loop for the thread
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    # Run the asyncio event loop
+    loop.run_until_complete(start_recording())
+    loop.close()
+
+def stop_recording():
+    global recording
+    recording = False
+    print("Stopped recording")
+
+def toggle_recording():
+    global recording
+    if recording:
+        stop_recording()
+    else:
+        # Start recording in a new thread to avoid blocking
+        print("Starting recording")
+        threading.Thread(target=start_recording_sync).start()
+
+is_space_pressed = False  # Flag to track the state of the spacebar
+
+def on_press(key):
+    global is_space_pressed
+    if key == keyboard.Key.space and not is_space_pressed:
+        is_space_pressed = True
+        toggle_recording()
+
+def on_release(key):
+    global is_space_pressed
+    if key == keyboard.Key.space and is_space_pressed:
+        is_space_pressed = False
+        stop_recording()
+    if key == keyboard.Key.esc:
+        # Stop listener
+        return False
+
+# Collect events until released
+with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
+    with tempfile.TemporaryDirectory():
+        print("Press the spacebar to start/stop recording. Press ESC to exit.")
+        listener.join()
+
+p.terminate()
\ No newline at end of file
diff --git a/OS/01/user/user.py b/OS/01/user/user.py
deleted file mode 100644
index ee3529f..0000000
--- a/OS/01/user/user.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-Handles everything the user interacts through.
-
-Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
-
-For now, just handles a spacebar being pressed— for the duration it's pressed,
-it should record audio.
-
-SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
-sends it to /user in LMC format (role: user, etc)
-
-MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
-"""
\ No newline at end of file

From 7a95ce4dd831a7f8599f4bf595057ab252497470 Mon Sep 17 00:00:00 2001
From: Shiven Mian <shivenmian97@gmail.com>
Date: Sat, 3 Feb 2024 19:25:50 -0800
Subject: [PATCH 4/5] chore: add listen docstring

---
 OS/01/assistant/listen.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/OS/01/assistant/listen.py b/OS/01/assistant/listen.py
index 948ef9a..44a089b 100644
--- a/OS/01/assistant/listen.py
+++ b/OS/01/assistant/listen.py
@@ -1,3 +1,8 @@
+"""
+Listens to chunks of audio recorded by user.
+Run `python listen.py` to start the server, then `cd user` and run `python record.py` to record audio.
+"""
+
 from fastapi import FastAPI, WebSocket
 import uvicorn
 import json

From 5f1be31562a00f233c9ee2d53efdc4461b5d75d1 Mon Sep 17 00:00:00 2001
From: Shiven Mian <shivenmian97@gmail.com>
Date: Sat, 3 Feb 2024 19:29:03 -0800
Subject: [PATCH 5/5] fix: add back user.py docstring

---
 OS/01/user/user.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 OS/01/user/user.py

diff --git a/OS/01/user/user.py b/OS/01/user/user.py
new file mode 100644
index 0000000..ee3529f
--- /dev/null
+++ b/OS/01/user/user.py
@@ -0,0 +1,13 @@
+"""
+Handles everything the user interacts through.
+
+Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
+
+For now, just handles a spacebar being pressed— for the duration it's pressed,
+it should record audio.
+
+SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
+sends it to /user in LMC format (role: user, etc)
+
+MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
+"""
\ No newline at end of file