diff --git a/.env.example b/.env.example index a367261e..c0023751 100644 --- a/.env.example +++ b/.env.example @@ -44,3 +44,9 @@ REVGPT_DISABLE_HISTORY=True REVGPT_PUID="your_puid_here" REVGPT_UNVERIFIED_PLUGIN_DOMAINS="showme.redstarplugin.com" CHATGPT_BASE_URL="" + +#Discord Bot +################################ +SAVE_DIRECTORY="" +STORAGE_SERVICE="" +DISCORD_TOKEN="" diff --git a/apps/__init__.py b/apps/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/discord.py b/apps/discord.py index b6168335..a03d0835 100644 --- a/apps/discord.py +++ b/apps/discord.py @@ -1,92 +1,138 @@ -import discord -from discord.ext import commands -from swarms.models import OpenAIChat -from swarms.agents import OmniModalAgent import os +import asyncio +import dalle3 +import discord +import responses +from invoke import Executor from dotenv import load_dotenv from discord.ext import commands -load_dotenv() - -intents = discord.Intents.default() -intents.messages = True -intents.guilds = True -intents.voice_states = True -intents.message_content = True - -bot = commands.Bot(command_prefix="!", intents=intents) -# Setup - -DISCORD_TOKEN = os.getenv("DISCORD_TOKEN") - -# Initialize the OmniModalAgent -llm = OpenAIChat(model_name="gpt-4") -agent = OmniModalAgent(llm) - - -@bot.event -async def on_ready(): - print(f"We have logged in as {bot.user}") - - -@bot.command() -async def greet(ctx): - """Greets the user.""" - await ctx.send(f"Hello, {ctx.author.name}!") - - -@bot.command() -async def run(ctx, *, description: str): - """Generates a video based on the given description.""" - response = agent.run( - description - ) # Assuming the response provides information or a link to the generated video - await ctx.send(response) - - -@bot.command() -async def help_me(ctx): - """Provides a list of commands and their descriptions.""" - help_text = """ - - `!greet`: Greets you. - - `!run [description]`: Generates a video based on the given description. - - `!help_me`: Provides this list of commands and their descriptions. - """ - await ctx.send(help_text) - -@bot.event -async def on_command_error(ctx, error): - """Handles errors that occur while executing commands.""" - if isinstance(error, commands.CommandNotFound): - await ctx.send("That command does not exist!") - else: - await ctx.send(f"An error occurred: {error}") - -def setup(bot): - @bot.command() - async def join(ctx): - """Joins the voice channel that the user is in.""" - if ctx.author.voice: - channel = ctx.author.voice.channel - await channel.connect() - else: - await ctx.send("You are not in a voice channel!") - - @bot.command() - async def leave(ctx): - """Leaves the voice channel that the bot is in.""" - if ctx.voice_client: - await ctx.voice_client.disconnect() - else: - await ctx.send("I am not in a voice channel!") - -# voice_transcription.py -from discord.ext import commands - -def setup(bot): - @bot.command() - async def listen(ctx): - """Starts listening to voice in the voice channel that the bot is in.""" - # ... (code for listening to voice and transcribing it goes here) - -bot.run("DISCORD_TOKEN") +class Bot: + def __init__(self, agent, llm, command_prefix="!"): + load_dotenv() + + intents = discord.intents.default() + intents.messages = True + intents.guilds = True + intents.voice_states = True + intents.message_content = True + + # setup + self.llm = llm + self.agent = agent + self. bot = commands.bot(command_prefix="!", intents=intents) + self.discord_token = os.getenv("DISCORD_TOKEN") + self.storage_service = os.getenv("STORAGE_SERVICE") + + + @self.bot.event + async def on_ready(): + print(f"we have logged in as {self.bot.user}") + + + @self.bot.command() + async def greet(ctx): + """greets the user.""" + await ctx.send(f"hello, {ctx.author.name}!") + + + @self.bot.command() + async def help_me(ctx): + """provides a list of commands and their descriptions.""" + help_text = """ + - `!greet`: greets you. + - `!run [description]`: generates a video based on the given description. + - `!help_me`: provides this list of commands and their descriptions. + """ + await ctx.send(help_text) + + @self.bot.event + async def on_command_error(ctx, error): + """handles errors that occur while executing commands.""" + if isinstance(error, commands.commandnotfound): + await ctx.send("that command does not exist!") + else: + await ctx.send(f"an error occurred: {error}") + + @self.bot.command() + async def join(ctx): + """joins the voice channel that the user is in.""" + if ctx.author.voice: + channel = ctx.author.voice.channel + await channel.connect() + else: + await ctx.send("you are not in a voice channel!") + + @self.bot.command() + async def leave(ctx): + """leaves the voice channel that the self.bot is in.""" + if ctx.voice_client: + await ctx.voice_client.disconnect() + else: + await ctx.send("i am not in a voice channel!") + + # voice_transcription.py + @self.bot.command() + async def listen(ctx): + """starts listening to voice in the voice channel that the bot is in.""" + if ctx.voice_client: + # create a wavesink to record the audio + sink = discord.sinks.wavesink('audio.wav') + # start recording + ctx.voice_client.start_recording(sink) + await ctx.send("started listening and recording.") + else: + await ctx.send("i am not in a voice channel!") + + # image_generator.py + @self.bot.command() + async def generate_image(ctx, *, prompt: str): + """generates images based on the provided prompt""" + await ctx.send(f"generating images for prompt: `{prompt}`...") + loop = asyncio.get_event_loop() + + # initialize a future object for the dalle instance + model_instance = dalle3() + future = loop.run_in_executor(Executor, model_instance.run, prompt) + + try: + # wait for the dalle request to complete, with a timeout of 60 seconds + await asyncio.wait_for(future, timeout=300) + print("done generating images!") + + # list all files in the save_directory + all_files = [os.path.join(root, file) for root, _, files in os.walk(os.environ("SAVE_DIRECTORY")) for file in files] + + # sort files by their creation time (latest first) + sorted_files = sorted(all_files, key=os.path.getctime, reverse=True) + + # get the 4 most recent files + latest_files = sorted_files[:4] + print(f"sending {len(latest_files)} images to discord...") + + # send all the latest images in a single message + storage_service = os.environ("STORAGE_SERVICE") # "https://storage.googleapis.com/your-bucket-name/ + await ctx.send(files=[storage_service.upload(filepath) for filepath in latest_files]) + + except asyncio.timeouterror: + await ctx.send("the request took too long! it might have been censored or you're out of boosts. please try entering the prompt again.") + except Exception as e: + await ctx.send(f"an error occurred: {e}") + + @self.bot.command() + async def send_text(ctx, *, text: str, use_agent: bool = True): + """sends the provided text to the worker and returns the response""" + if use_agent: + response = self.agent.run(text) + else: + response = self.llm.run(text) + await ctx.send(response) + + def add_command(self, name, func): + @self.bot.command() + async def command(ctx, *args): + reponse = func(*args) + await ctx.send(responses) + +def run(self) : + self.bot.run("DISCORD_TOKEN") diff --git a/docs/applications/discord.md b/docs/applications/discord.md new file mode 100644 index 00000000..cae3c8c1 --- /dev/null +++ b/docs/applications/discord.md @@ -0,0 +1,103 @@ +## Usage Documentation: Discord Bot with Advanced Features + +--- + +### Overview: + +This code provides a structure for a Discord bot with advanced features such as voice channel interactions, image generation, and text-based interactions using OpenAI models. + +--- + +### Setup: + +1. Ensure that the necessary libraries are installed: +```bash +pip install discord.py python-dotenv dalle3 invoke openai +``` + +2. Create a `.env` file in the same directory as your bot script and add the following: +``` +DISCORD_TOKEN=your_discord_bot_token +STORAGE_SERVICE=your_storage_service_endpoint +SAVE_DIRECTORY=path_to_save_generated_images +``` + +--- + +### Bot Class and its Methods: + +#### `__init__(self, agent, llm, command_prefix="!")`: + +Initializes the bot with the given agent, language model (`llm`), and a command prefix (default is `!`). + +#### `add_command(self, name, func)`: + +Allows you to dynamically add new commands to the bot. The `name` is the command's name and `func` is the function to execute when the command is called. + +#### `run(self)`: + +Starts the bot using the `DISCORD_TOKEN` from the `.env` file. + +--- + +### Commands: + +1. **!greet**: Greets the user. + +2. **!help_me**: Provides a list of commands and their descriptions. + +3. **!join**: Joins the voice channel the user is in. + +4. **!leave**: Leaves the voice channel the bot is currently in. + +5. **!listen**: Starts listening to voice in the current voice channel and records the audio. + +6. **!generate_image [prompt]**: Generates images based on the provided prompt using the DALL-E3 model. + +7. **!send_text [text] [use_agent=True]**: Sends the provided text to the worker (either the agent or the LLM) and returns the response. + +--- + +### Usage: + +Initialize the `llm` (Language Learning Model) with your OpenAI API key: + +```python +from swarms.models import OpenAIChat +llm = OpenAIChat( + openai_api_key="Your_OpenAI_API_Key", + temperature=0.5, +) +``` + +Initialize the bot with the `llm`: + +```python +from apps.discord import Bot +bot = Bot(llm=llm) +``` + +Send a task to the bot: + +```python +task = "What were the winning Boston Marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times." +bot.send_text(task) +``` + +Start the bot: + +```python +bot.run() +``` + +--- + +### Additional Notes: + +- The bot makes use of the `dalle3` library for image generation. Ensure you have the model and necessary setup for it. + +- For the storage service, you might want to integrate with a cloud service like Google Cloud Storage or AWS S3 to store and retrieve generated images. The given code assumes a method `.upload()` for the storage service to upload files. + +- Ensure that you've granted the bot necessary permissions on Discord, especially if you want to use voice channel features. + +- Handle API keys and tokens securely. Avoid hardcoding them directly into your code. Use environment variables or secure secret management tools. diff --git a/playground/apps/discord_example.py b/playground/apps/discord_example.py new file mode 100644 index 00000000..2010f71e --- /dev/null +++ b/playground/apps/discord_example.py @@ -0,0 +1,14 @@ +from swarms.models import OpenAIChat +from apps.discord import Bot + +llm = OpenAIChat( + openai_api_key="Enter in your key", + temperature=0.5, +) + +bot = Bot(llm=llm) +task = "What were the winning boston marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times." + +bot.send_text(task) +bot.run() +