Added Push to talk example

2 years ago · 6c39a5d497
parent 1f658d8837
commit 6c39a5d497
7 changed files with 305 additions and 2 deletions
--- a/OS/01/core/i_endpoint.py
+++ b/OS/01/core/i_endpoint.py
@ -67,6 +67,7 @@ async def a(ws: WebSocket):
                elif 'bytes' in message:
                    # If it's not a control message, it's part of the audio file
                    audio_file.extend(message['bytes'])
    except Exception as e:
        print(f"WebSocket connection closed with exception: {e}")
    finally:
--- a/OS/01/core/stt/init.py
+++ b/OS/01/core/stt/init.py
@ -6,7 +6,7 @@ import ffmpeg
 import subprocess
 def convert_mime_type_to_format(mime_type: str) -> str:
-    if mime_type == "audio/x-wav":
+    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
        return "wav"
    if mime_type == "audio/webm":
        return "webm"
@ -33,7 +33,7 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
        yield output_path
    finally:
        os.remove(input_path)
-        #os.remove(output_path)
+        os.remove(output_path)
 def run_command(command):
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
--- a/examples/push-to-record/.gitignore
+++ b/examples/push-to-record/.gitignore
@ -0,0 +1,160 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
--- a/examples/push-to-record/README.md
+++ b/examples/push-to-record/README.md
@ -0,0 +1,9 @@
 # Setup
 On Mac install portaudio `brew install portaudio`
 1. Run `pip install -r requirements.txt`
 2. Start the core/i_endpoint.py service
 3. Run `python main.py`
 4. Press spacebar down to talk and release to get the transcription back
--- a/examples/push-to-record/main.py
+++ b/examples/push-to-record/main.py
@ -0,0 +1,129 @@
 import os
 import pyaudio
 import threading
 import asyncio
 import websockets
 import json
 from pynput import keyboard
 import wave
 import tempfile
 from datetime import datetime
 # Configuration
 chunk = 1024  # Record in chunks of 1024 samples
 sample_format = pyaudio.paInt16  # 16 bits per sample
 channels = 1  # Stereo
 fs = 48000 # Sample rate
 p = pyaudio.PyAudio()  # Create an interface to PortAudio
 frames = []  # Initialize array to store frames
 recording = False  # Flag to control recording state
 ws_chunk_size = 4096 # Websocket stream chunk size
 async def start_recording():
    global recording
    if recording:
        return  # Avoid multiple starts
    recording = True
    frames.clear()  # Clear existing frames
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)
    print("Recording started...")
    async with websockets.connect("ws://localhost:8000/a") as websocket:
        # Send the start command with mime type
        await websocket.send(json.dumps({"action": "command", "state": "start", "mimeType": "audio/wav"}))
        while recording:
            data = stream.read(chunk)
            frames.append(data)
        stream.stop_stream()
        stream.close()
        try:
            file_path = save_recording(frames)
            with open(file_path, 'rb') as audio_file:
                byte_chunk = audio_file.read(ws_chunk_size)
                while byte_chunk:
                    await websocket.send(byte_chunk)
                    byte_chunk = audio_file.read(ws_chunk_size)
        finally:
            os.remove(file_path)
        # Send the end command
        await websocket.send(json.dumps({"action": "command", "state": "end"}))
        # Receive a json message and then close the connection
        message = await websocket.recv()
        print("Received message:", json.loads(message))
    print("Recording stopped.")
 def save_recording(frames) -> str:
    # Save the recorded data as a WAV file
    temp_dir = tempfile.gettempdir()
    # Create a temporary file with the appropriate extension
    output_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    with wave.open(output_path, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(p.get_sample_size(sample_format))
        wf.setframerate(fs)
        wf.writeframes(b''.join(frames))
    return output_path
 def start_recording_sync():
    # Create a new event loop for the thread
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    # Run the asyncio event loop
    loop.run_until_complete(start_recording())
    loop.close()
 def stop_recording():
    global recording
    recording = False
    print("Stopped recording")
 def toggle_recording(e):
    global recording
    if recording:
        stop_recording()
    else:
        # Start recording in a new thread to avoid blocking
        print("Starting recording")
        threading.Thread(target=start_recording_sync).start()
 is_space_pressed = False  # Flag to track the state of the spacebar
 def on_press(key):
    global is_space_pressed
    if key == keyboard.Key.space and not is_space_pressed:
        is_space_pressed = True
        toggle_recording(key)
 def on_release(key):
    global is_space_pressed
    if key == keyboard.Key.space and is_space_pressed:
        is_space_pressed = False
        stop_recording()
    if key == keyboard.Key.esc:
        # Stop listener
        return False
 # Collect events until released
 with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
    with tempfile.TemporaryDirectory():
        print("Press the spacebar to start/stop recording. Press ESC to exit.")
        listener.join()
 p.terminate()
--- a/examples/push-to-record/output.wav
+++ b/examples/push-to-record/output.wav
--- a/examples/push-to-record/requirements.txt
+++ b/examples/push-to-record/requirements.txt
@ -0,0 +1,4 @@
 pynput @ git+https://github.com/moses-palmer/pynput.git@12acf84dc0f721d91a957da65311497acb664933
 pyaudio
 websockets==12.0
 asyncio==3.4.3