01/software/source/server/services/stt/local-whisper/stt.py

"""
Defines a function which takes a path to an audio file and turns it into text.
"""

from datetime import datetime
import os
import contextlib
import tempfile
import shutil
import ffmpeg
import subprocess

import os
import subprocess
import platform
import urllib.request


class Stt:
    def __init__(self, config):
        self.service_directory = config["service_directory"]
        install(self.service_directory)

    def stt(self, audio_file_path):
        return stt(self.service_directory, audio_file_path)


def install(service_dir):

    ### INSTALL

    WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
    script_dir = os.path.dirname(os.path.realpath(__file__))
    source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
    if not os.path.exists(source_whisper_rust_path):
        print(f"Source directory does not exist: {source_whisper_rust_path}")
        exit(1)
    if not os.path.exists(WHISPER_RUST_PATH):
        shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)

    os.chdir(WHISPER_RUST_PATH)

    # Check if whisper-rust executable exists before attempting to build
    if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
        # Check if Rust is installed. Needed to build whisper executable
        
        rustc_path = shutil.which("rustc")
        
        if rustc_path is None:
            print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
            exit(1)

        # Build Whisper Rust executable if not found
        subprocess.run(['cargo', 'build', '--release'], check=True)
    else:
        print("Whisper Rust executable already exists. Skipping build.")

    WHISPER_MODEL_PATH = os.path.join(service_dir, "model")

    WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
    WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')

    if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
        os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
        urllib.request.urlretrieve(f"{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}",
                                   os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME))
    else:
        print("Whisper model already exists. Skipping download.")


def convert_mime_type_to_format(mime_type: str) -> str:
    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
        return "wav"
    if mime_type == "audio/webm":
        return "webm"
    if mime_type == "audio/raw":
        return "dat"

    return mime_type


@contextlib.contextmanager
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    temp_dir = tempfile.gettempdir()

    # Create a temporary file with the appropriate extension
    input_ext = convert_mime_type_to_format(mime_type)
    input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
    with open(input_path, 'wb') as f:
        f.write(audio)

    # Check if the input file exists
    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"

    # Export to wav
    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    print(mime_type, input_path, output_path)
    if mime_type == "audio/raw":
        ffmpeg.input(
            input_path,
            f='s16le',
            ar='16000',
            ac=1,
        ).output(output_path, loglevel='panic').run()
    else:
        ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k', loglevel='panic').run()

    try:
        yield output_path
    finally:
        os.remove(input_path)
        os.remove(output_path)


def run_command(command):
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    return result.stdout, result.stderr


def get_transcription_file(service_directory, wav_file_path: str):
    local_path = os.path.join(service_directory, 'model')
    whisper_rust_path = os.path.join(service_directory, 'whisper-rust', 'target', 'release')
    model_name = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')

    output, _ = run_command([
        os.path.join(whisper_rust_path, 'whisper-rust'),
        '--model-path', os.path.join(local_path, model_name),
        '--file-path', wav_file_path
    ])

    return output


def stt_wav(service_directory, wav_file_path: str):
    temp_dir = tempfile.gettempdir()
    output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
    try:
        transcript = get_transcription_file(service_directory, output_path)
    finally:
        os.remove(output_path)
    return transcript


def stt(service_directory, input_data):
    return stt_wav(service_directory, input_data)
Updates 1 year ago			`"""`
			`Defines a function which takes a path to an audio file and turns it into text.`
			`"""`

feat: added Whisper stt 1 year ago			`from datetime import datetime`
			`import os`
			`import contextlib`
			`import tempfile`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`import shutil`
feat: added Whisper stt 1 year ago			`import ffmpeg`
			`import subprocess`
Error handling 1 year ago
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`import os`
			`import subprocess`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago			`import platform`
			`import urllib.request`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago

			`class Stt:`
			`def __init__(self, config):`
Fixed local whisper 11 months ago			`self.service_directory = config["service_directory"]`
			`install(self.service_directory)`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago
			`def stt(self, audio_file_path):`
Fixed local whisper 11 months ago			`return stt(self.service_directory, audio_file_path)`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago

			`def install(service_dir):`

			`### INSTALL`
Adding standard python logging support 12 months ago
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")`
			`script_dir = os.path.dirname(os.path.realpath(__file__))`
			`source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")`
			`if not os.path.exists(source_whisper_rust_path):`
			`print(f"Source directory does not exist: {source_whisper_rust_path}")`
			`exit(1)`
			`if not os.path.exists(WHISPER_RUST_PATH):`
			`shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)`

			`os.chdir(WHISPER_RUST_PATH)`

			`# Check if whisper-rust executable exists before attempting to build`
			`if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):`
			`# Check if Rust is installed. Needed to build whisper executable`
Merge branch 'main' into fix/windows-tts-stt 10 months ago
Adjusted rust installation check to work with windows 10 months ago			`rustc_path = shutil.which("rustc")`
Merge branch 'main' into fix/windows-tts-stt 10 months ago
Adjusted rust installation check to work with windows 10 months ago			`if rustc_path is None:`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")`
			`exit(1)`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`# Build Whisper Rust executable if not found`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago			`subprocess.run(['cargo', 'build', '--release'], check=True)`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`else:`
			`print("Whisper Rust executable already exists. Skipping build.")`

			`WHISPER_MODEL_PATH = os.path.join(service_dir, "model")`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')`
			`WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):`
			`os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago			`urllib.request.urlretrieve(f"{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}",`
			`os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME))`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`else:`
			`print("Whisper model already exists. Skipping download.")`
feat: added Whisper stt 1 year ago
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
feat: added Whisper stt 1 year ago			`def convert_mime_type_to_format(mime_type: str) -> str:`
			`if mime_type == "audio/x-wav" or mime_type == "audio/wav":`
			`return "wav"`
			`if mime_type == "audio/webm":`
			`return "webm"`
support audio/raw in stt 12 months ago			`if mime_type == "audio/raw":`
			`return "dat"`
feat: added Whisper stt 1 year ago
			`return mime_type`

Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
feat: added Whisper stt 1 year ago			`@contextlib.contextmanager`
			`def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:`
			`temp_dir = tempfile.gettempdir()`

			`# Create a temporary file with the appropriate extension`
			`input_ext = convert_mime_type_to_format(mime_type)`
			`input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")`
			`with open(input_path, 'wb') as f:`
			`f.write(audio)`

Two way websocket in user + settings 1 year ago			`# Check if the input file exists`
			`assert os.path.exists(input_path), f"Input file does not exist: {input_path}"`

feat: added Whisper stt 1 year ago			`# Export to wav`
			`output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")`
support audio/raw in stt 12 months ago			`print(mime_type, input_path, output_path)`
			`if mime_type == "audio/raw":`
			`ffmpeg.input(`
			`input_path,`
			`f='s16le',`
			`ar='16000',`
			`ac=1,`
Printing essential print statements, hiding ffmpeg prints, updated i profile 11 months ago			`).output(output_path, loglevel='panic').run()`
support audio/raw in stt 12 months ago			`else:`
Printing essential print statements, hiding ffmpeg prints, updated i profile 11 months ago			`ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k', loglevel='panic').run()`
feat: added Whisper stt 1 year ago
			`try:`
			`yield output_path`
			`finally:`
			`os.remove(input_path)`
			`os.remove(output_path)`

Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
chore: integrated local whisper + restructuring 12 months ago			`def run_command(command):`
			`result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)`
			`return result.stdout, result.stderr`

Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago
Fixed local whisper 11 months ago			`def get_transcription_file(service_directory, wav_file_path: str):`
			`local_path = os.path.join(service_directory, 'model')`
			`whisper_rust_path = os.path.join(service_directory, 'whisper-rust', 'target', 'release')`
			`model_name = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')`
chore: integrated local whisper + restructuring 12 months ago
Fixed local whisper 11 months ago			`output, _ = run_command([`
fix: added back change to auto-download STT models 12 months ago			`os.path.join(whisper_rust_path, 'whisper-rust'),`
Switched to poetry, pushed to `pip`, new scaffolding for multiple clients 12 months ago			`'--model-path', os.path.join(local_path, model_name),`
chore: integrated local whisper + restructuring 12 months ago			`'--file-path', wav_file_path`
			`])`

			`return output`


Fixed local whisper 11 months ago			`def stt_wav(service_directory, wav_file_path: str):`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago			`temp_dir = tempfile.gettempdir()`
			`output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")`
			`ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()`
			`try:`
			`transcript = get_transcription_file(service_directory, output_path)`
			`finally:`
			`os.remove(output_path)`
			`return transcript`

Ready for whisper 12 months ago
Fixed local whisper 11 months ago			`def stt(service_directory, input_data):`
Implement cross-platform compatibility in stt.py by replacing system-specific commands with Python's built-in functions 10 months ago			`return stt_wav(service_directory, input_data)`