01/software/source/server/services/stt/openai/stt.py

class Stt:
    def __init__(self, config):
        pass

    def stt(self, audio_file_path):
        return stt(audio_file_path)


from datetime import datetime
import os
import contextlib
import tempfile
import ffmpeg
import subprocess
import openai
from openai import OpenAI


client = OpenAI()


def convert_mime_type_to_format(mime_type: str) -> str:
    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
        return "wav"
    if mime_type == "audio/webm":
        return "webm"
    if mime_type == "audio/raw":
        return "dat"

    return mime_type


@contextlib.contextmanager
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    temp_dir = tempfile.gettempdir()

    # Create a temporary file with the appropriate extension
    input_ext = convert_mime_type_to_format(mime_type)
    input_path = os.path.join(
        temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}"
    )
    with open(input_path, "wb") as f:
        f.write(audio)

    # Check if the input file exists
    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"

    # Export to wav
    output_path = os.path.join(
        temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
    )
    if mime_type == "audio/raw":
        ffmpeg.input(
            input_path,
            f="s16le",
            ar="16000",
            ac=1,
        ).output(output_path, loglevel="panic").run()
    else:
        ffmpeg.input(input_path).output(
            output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic"
        ).run()

    try:
        yield output_path
    finally:
        os.remove(input_path)
        os.remove(output_path)


def run_command(command):
    result = subprocess.run(
        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    return result.stdout, result.stderr


def get_transcription_file(wav_file_path: str):
    local_path = os.path.join(os.path.dirname(__file__), "local_service")
    whisper_rust_path = os.path.join(
        os.path.dirname(__file__), "whisper-rust", "target", "release"
    )
    model_name = os.getenv("WHISPER_MODEL_NAME", "ggml-tiny.en.bin")

    output, error = run_command(
        [
            os.path.join(whisper_rust_path, "whisper-rust"),
            "--model-path",
            os.path.join(local_path, model_name),
            "--file-path",
            wav_file_path,
        ]
    )

    return output


def get_transcription_bytes(audio_bytes: bytearray, mime_type):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        return get_transcription_file(wav_file_path)


def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        return stt_wav(wav_file_path)


def stt_wav(wav_file_path: str):
    audio_file = open(wav_file_path, "rb")
    try:
        transcript = client.audio.transcriptions.create(
            model="whisper-1", file=audio_file, response_format="text"
        )
    except openai.BadRequestError as e:
        print(f"openai.BadRequestError: {e}")
        return None

    return transcript


def stt(input_data, mime_type="audio/wav"):
    if isinstance(input_data, str):
        return stt_wav(input_data)
    elif isinstance(input_data, bytearray):
        return stt_bytes(input_data, mime_type)
    else:
        raise ValueError(
            "Input data should be either a path to a wav file (str) or audio bytes (bytearray)"
        )
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`class Stt:`
			`def __init__(self, config):`
			`pass`

			`def stt(self, audio_file_path):`
			`return stt(audio_file_path)`

Updates 1 year ago
feat: added Whisper stt 1 year ago			`from datetime import datetime`
			`import os`
			`import contextlib`
			`import tempfile`
			`import ffmpeg`
			`import subprocess`
Error handling 1 year ago			`import openai`
feat: added Whisper stt 1 year ago			`from openai import OpenAI`
Error handling 1 year ago
Adding standard python logging support 12 months ago
feat: added Whisper stt 1 year ago			`client = OpenAI()`

Re-lint after rebase 10 months ago
feat: added Whisper stt 1 year ago			`def convert_mime_type_to_format(mime_type: str) -> str:`
			`if mime_type == "audio/x-wav" or mime_type == "audio/wav":`
			`return "wav"`
			`if mime_type == "audio/webm":`
			`return "webm"`
support audio/raw in stt 12 months ago			`if mime_type == "audio/raw":`
			`return "dat"`
feat: added Whisper stt 1 year ago
			`return mime_type`

Re-lint after rebase 10 months ago
feat: added Whisper stt 1 year ago			`@contextlib.contextmanager`
			`def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:`
			`temp_dir = tempfile.gettempdir()`

			`# Create a temporary file with the appropriate extension`
			`input_ext = convert_mime_type_to_format(mime_type)`
Re-lint after rebase 10 months ago			`input_path = os.path.join(`
			`temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}"`
			`)`
			`with open(input_path, "wb") as f:`
feat: added Whisper stt 1 year ago			`f.write(audio)`

Two way websocket in user + settings 1 year ago			`# Check if the input file exists`
			`assert os.path.exists(input_path), f"Input file does not exist: {input_path}"`

feat: added Whisper stt 1 year ago			`# Export to wav`
Re-lint after rebase 10 months ago			`output_path = os.path.join(`
			`temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"`
			`)`
support audio/raw in stt 12 months ago			`if mime_type == "audio/raw":`
			`ffmpeg.input(`
			`input_path,`
Re-lint after rebase 10 months ago			`f="s16le",`
			`ar="16000",`
support audio/raw in stt 12 months ago			`ac=1,`
Re-lint after rebase 10 months ago			`).output(output_path, loglevel="panic").run()`
support audio/raw in stt 12 months ago			`else:`
Re-lint after rebase 10 months ago			`ffmpeg.input(input_path).output(`
			`output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic"`
			`).run()`
feat: added Whisper stt 1 year ago
			`try:`
			`yield output_path`
			`finally:`
			`os.remove(input_path)`
			`os.remove(output_path)`

Re-lint after rebase 10 months ago
chore: integrated local whisper + restructuring 12 months ago			`def run_command(command):`
Re-lint after rebase 10 months ago			`result = subprocess.run(`
			`command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True`
			`)`
chore: integrated local whisper + restructuring 12 months ago			`return result.stdout, result.stderr`


Re-lint after rebase 10 months ago			`def get_transcription_file(wav_file_path: str):`
			`local_path = os.path.join(os.path.dirname(__file__), "local_service")`
			`whisper_rust_path = os.path.join(`
			`os.path.dirname(__file__), "whisper-rust", "target", "release"`
			`)`
			`model_name = os.getenv("WHISPER_MODEL_NAME", "ggml-tiny.en.bin")`

			`output, error = run_command(`
			`[`
			`os.path.join(whisper_rust_path, "whisper-rust"),`
			`"--model-path",`
			`os.path.join(local_path, model_name),`
			`"--file-path",`
			`wav_file_path,`
			`]`
			`)`
chore: integrated local whisper + restructuring 12 months ago
			`return output`

Re-lint after rebase 10 months ago
chore: integrated local whisper + restructuring 12 months ago			`def get_transcription_bytes(audio_bytes: bytearray, mime_type):`
			`with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:`
			`return get_transcription_file(wav_file_path)`

Re-lint after rebase 10 months ago
8th Architecture 12 months ago			`def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):`
feat: added Whisper stt 1 year ago			`with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:`
8th Architecture 12 months ago			`return stt_wav(wav_file_path)`


Re-lint after rebase 10 months ago			`def stt_wav(wav_file_path: str):`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`audio_file = open(wav_file_path, "rb")`
			`try:`
			`transcript = client.audio.transcriptions.create(`
Re-lint after rebase 10 months ago			`model="whisper-1", file=audio_file, response_format="text"`
`start.py`, modular architecture, OI flags, mutable items to user dir 11 months ago			`)`
			`except openai.BadRequestError as e:`
			`print(f"openai.BadRequestError: {e}")`
			`return None`

			`return transcript`
Ready for whisper 12 months ago
Re-lint after rebase 10 months ago
Ready for whisper 12 months ago			`def stt(input_data, mime_type="audio/wav"):`
			`if isinstance(input_data, str):`
			`return stt_wav(input_data)`
			`elif isinstance(input_data, bytearray):`
			`return stt_bytes(input_data, mime_type)`
			`else:`
Re-lint after rebase 10 months ago			`raise ValueError(`
			`"Input data should be either a path to a wav file (str) or audio bytes (bytearray)"`
			`)`