You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/examples/demos/personal_assistant/better_communication_exampl...

97 lines
2.3 KiB

6 months ago
import os
import time
import pygame
import speech_recognition as sr
from dotenv import load_dotenv
from playsound import playsound
from swarms import OpenAIChat, OpenAITTS
# Load the environment variables
load_dotenv()
# Get the API key from the environment
openai_api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model
llm = OpenAIChat(
openai_api_key=openai_api_key,
)
# Initialize the text-to-speech model
tts = OpenAITTS(
model_name="tts-1-1106",
voice="onyx",
openai_api_key=openai_api_key,
saved_filepath="runs/tts_speech.wav",
)
# Initialize the speech recognition model
r = sr.Recognizer()
def play_audio(file_path):
# Check if the file exists
if not os.path.isfile(file_path):
print(f"Audio file {file_path} not found.")
return
# Initialize the mixer module
pygame.mixer.init()
try:
# Load the mp3 file
pygame.mixer.music.load(file_path)
# Play the mp3 file
pygame.mixer.music.play()
# Wait for the audio to finish playing
while pygame.mixer.music.get_busy():
pygame.time.Clock().tick(10)
except pygame.error as e:
print(f"Couldn't play {file_path}: {e}")
finally:
# Stop the mixer module and free resources
pygame.mixer.quit()
while True:
# Listen for user speech
with sr.Microphone() as source:
print("Listening...")
audio = r.listen(source)
# Convert speech to text
try:
print("Recognizing...")
task = r.recognize_google(audio)
print(f"User said: {task}")
except sr.UnknownValueError:
print("Could not understand audio")
continue
except Exception as e:
print(f"Error: {e}")
continue
# Run the Gemini model on the task
print("Running GPT4 model...")
out = llm(task)
print(f"Gemini output: {out}")
# Convert the Gemini output to speech
print("Running text-to-speech model...")
out = tts.run_and_save(out)
print(f"Text-to-speech output: {out}")
# Ask the user if they want to play the audio
# play_audio = input("Do you want to play the audio? (yes/no): ")
# if play_audio.lower() == "yes":
# Initialize the mixer module
# Play the audio file
time.sleep(5)
playsound("runs/tts_speech.wav")