swarms/examples/demos/personal_assistant/better_communication_exampl...

import os
import time

import pygame
import speech_recognition as sr
from dotenv import load_dotenv
from playsound import playsound

from swarms import OpenAIChat, OpenAITTS

# Load the environment variables
load_dotenv()

# Get the API key from the environment
openai_api_key = os.environ.get("OPENAI_API_KEY")

# Initialize the language model
llm = OpenAIChat(
    openai_api_key=openai_api_key,
)

# Initialize the text-to-speech model
tts = OpenAITTS(
    model_name="tts-1-1106",
    voice="onyx",
    openai_api_key=openai_api_key,
    saved_filepath="runs/tts_speech.wav",
)

# Initialize the speech recognition model
r = sr.Recognizer()


def play_audio(file_path):
    # Check if the file exists
    if not os.path.isfile(file_path):
        print(f"Audio file {file_path} not found.")
        return

    # Initialize the mixer module
    pygame.mixer.init()

    try:
        # Load the mp3 file
        pygame.mixer.music.load(file_path)

        # Play the mp3 file
        pygame.mixer.music.play()

        # Wait for the audio to finish playing
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
    except pygame.error as e:
        print(f"Couldn't play {file_path}: {e}")
    finally:
        # Stop the mixer module and free resources
        pygame.mixer.quit()


while True:
    # Listen for user speech
    with sr.Microphone() as source:
        print("Listening...")
        audio = r.listen(source)

    # Convert speech to text
    try:
        print("Recognizing...")
        task = r.recognize_google(audio)
        print(f"User said: {task}")
    except sr.UnknownValueError:
        print("Could not understand audio")
        continue
    except Exception as e:
        print(f"Error: {e}")
        continue

    # Run the Gemini model on the task
    print("Running GPT4 model...")
    out = llm(task)
    print(f"Gemini output: {out}")

    # Convert the Gemini output to speech
    print("Running text-to-speech model...")
    out = tts.run_and_save(out)
    print(f"Text-to-speech output: {out}")

    # Ask the user if they want to play the audio
    # play_audio = input("Do you want to play the audio? (yes/no): ")
    # if play_audio.lower() == "yes":
    # Initialize the mixer module
    # Play the audio file

    time.sleep(5)

    playsound("runs/tts_speech.wav")