swarms/playground/demos/personal_assistant/better_communication_exampl...

import os
import time

import pygame
import speech_recognition as sr
from dotenv import load_dotenv
from playsound import playsound

from swarms import OpenAIChat, OpenAITTS

# Load the environment variables
load_dotenv()

# Get the API key from the environment
openai_api_key = os.environ.get("OPENAI_API_KEY")

# Initialize the language model
llm = OpenAIChat(
    openai_api_key=openai_api_key,
)

# Initialize the text-to-speech model
tts = OpenAITTS(
    model_name="tts-1-1106",
    voice="onyx",
    openai_api_key=openai_api_key,
    saved_filepath="runs/tts_speech.wav",
)

# Initialize the speech recognition model
r = sr.Recognizer()


def play_audio(file_path):
    # Check if the file exists
    if not os.path.isfile(file_path):
        print(f"Audio file {file_path} not found.")
        return

    # Initialize the mixer module
    pygame.mixer.init()

    try:
        # Load the mp3 file
        pygame.mixer.music.load(file_path)

        # Play the mp3 file
        pygame.mixer.music.play()

        # Wait for the audio to finish playing
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
    except pygame.error as e:
        print(f"Couldn't play {file_path}: {e}")
    finally:
        # Stop the mixer module and free resources
        pygame.mixer.quit()


while True:
    # Listen for user speech
    with sr.Microphone() as source:
        print("Listening...")
        audio = r.listen(source)

    # Convert speech to text
    try:
        print("Recognizing...")
        task = r.recognize_google(audio)
        print(f"User said: {task}")
    except sr.UnknownValueError:
        print("Could not understand audio")
        continue
    except Exception as e:
        print(f"Error: {e}")
        continue

    # Run the Gemini model on the task
    print("Running GPT4 model...")
    out = llm(task)
    print(f"Gemini output: {out}")

    # Convert the Gemini output to speech
    print("Running text-to-speech model...")
    out = tts.run_and_save(out)
    print(f"Text-to-speech output: {out}")

    # Ask the user if they want to play the audio
    # play_audio = input("Do you want to play the audio? (yes/no): ")
    # if play_audio.lower() == "yes":
    # Initialize the mixer module
    # Play the audio file

    time.sleep(5)

    playsound("runs/tts_speech.wav")
[DEMOS][TESTS] 1 year ago			`import os`
[CLEANUP OPERATION] 11 months ago			`import time`
[DEMOS][TESTS] 1 year ago
			`import pygame`
			`import speech_recognition as sr`
			`from dotenv import load_dotenv`
			`from playsound import playsound`

			`from swarms import OpenAIChat, OpenAITTS`

			`# Load the environment variables`
			`load_dotenv()`

			`# Get the API key from the environment`
			`openai_api_key = os.environ.get("OPENAI_API_KEY")`

			`# Initialize the language model`
			`llm = OpenAIChat(`
			`openai_api_key=openai_api_key,`
			`)`

			`# Initialize the text-to-speech model`
			`tts = OpenAITTS(`
			`model_name="tts-1-1106",`
			`voice="onyx",`
			`openai_api_key=openai_api_key,`
			`saved_filepath="runs/tts_speech.wav",`
			`)`

			`# Initialize the speech recognition model`
			`r = sr.Recognizer()`


			`def play_audio(file_path):`
			`# Check if the file exists`
			`if not os.path.isfile(file_path):`
			`print(f"Audio file {file_path} not found.")`
			`return`

			`# Initialize the mixer module`
			`pygame.mixer.init()`

			`try:`
			`# Load the mp3 file`
			`pygame.mixer.music.load(file_path)`

			`# Play the mp3 file`
			`pygame.mixer.music.play()`

			`# Wait for the audio to finish playing`
			`while pygame.mixer.music.get_busy():`
			`pygame.time.Clock().tick(10)`
			`except pygame.error as e:`
			`print(f"Couldn't play {file_path}: {e}")`
			`finally:`
			`# Stop the mixer module and free resources`
			`pygame.mixer.quit()`

[CLEANUP] 1 year ago
[DEMOS][TESTS] 1 year ago			`while True:`
			`# Listen for user speech`
			`with sr.Microphone() as source:`
			`print("Listening...")`
			`audio = r.listen(source)`

			`# Convert speech to text`
			`try:`
			`print("Recognizing...")`
			`task = r.recognize_google(audio)`
			`print(f"User said: {task}")`
			`except sr.UnknownValueError:`
			`print("Could not understand audio")`
			`continue`
			`except Exception as e:`
			`print(f"Error: {e}")`
			`continue`

			`# Run the Gemini model on the task`
			`print("Running GPT4 model...")`
			`out = llm(task)`
			`print(f"Gemini output: {out}")`

			`# Convert the Gemini output to speech`
			`print("Running text-to-speech model...")`
			`out = tts.run_and_save(out)`
			`print(f"Text-to-speech output: {out}")`

			`# Ask the user if they want to play the audio`
			`# play_audio = input("Do you want to play the audio? (yes/no): ")`
			`# if play_audio.lower() == "yes":`
			`# Initialize the mixer module`
			`# Play the audio file`
[CLEANUP] 1 year ago
[DEMOS][TESTS] 1 year ago			`time.sleep(5)`
[CLEANUP] 1 year ago
			`playsound("runs/tts_speech.wav")`