swarms/apps/open-sourcerer/main.py

import os
import discord
from discord.ext import commands
import interpreter
import dotenv
import whisper

dotenv.load_dotenv(".env")

bot_id = os.getenv("BOT_ID")
bot_token = os.getenv("DISCORD_TOKEN")

interpreter.api_key = os.getenv("OPENAI_API_KEY")
# interpreter.api_base = os.getenv("API_BASE")
# interpreter.auto_run = True


def split_text(text, chunk_size=1500):
    #########################################################################
    return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]


# discord initial
intents = discord.Intents.all()
intents.message_content = True
client = commands.Bot(command_prefix="$", intents=intents)

message_chunks = []
send_image = False

model = whisper.load_model("base")


def transcribe(audio):
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # detect the spoken language
    _, probs = model.detect_language(mel)

    # decode the audio
    options = whisper.DecodingOptions()
    result = whisper.decode(model, mel, options)
    return result.text


@client.event
async def on_message(message):
    await client.process_commands(message)
    bot_mention = f"<@{bot_id}>"
    # if ("<@1158923910855798804>" in message.content) or (message.author == client.user or message.content[0] == '$'):
    # return
    response = []
    for chunk in interpreter.chat(message.content, display=False, stream=False):
        # await message.channel.send(chunk)
        if "message" in chunk:
            response.append(chunk["message"])
    last_response = response[-1]

    max_message_length = 2000  # Discord's max message length is 2000 characters
    # Splitting the message into chunks of 2000 characters
    response_chunks = [
        last_response[i : i + max_message_length]
        for i in range(0, len(last_response), max_message_length)
    ]
    # Sending each chunk as a separate message
    for chunk in response_chunks:
        await message.channel.send(chunk)


@client.command()
async def join(ctx):
    if ctx.author.voice:
        channel = ctx.message.author.voice.channel
        print("joining..")
        await channel.connect()
        print("joined.")
    else:
        print("not in a voice channel!")


@client.command()
async def leave(ctx):
    if ctx.voice_client:
        await ctx.voice_client.disconnect()
    else:
        print("not in a voice channel!")


@client.command()
async def listen(ctx):
    if ctx.voice_client:
        print("trying to listen..")
        ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx)
        print("listening..")
    else:
        print("not in a voice channel!")


async def callback(sink: discord.sinks, ctx):
    print("in callback..")
    for user_id, audio in sink.audio_data.items():
        if user_id == ctx.author.id:
            print("saving audio..")
            audio: discord.sinks.core.AudioData = audio
            print(user_id)
            filename = "audio.wav"
            with open(filename, "wb") as f:
                f.write(audio.file.getvalue())
            print("audio saved.")
            transcription = transcribe(filename)
            print(transcription)
            response = []
            for chunk in interpreter.chat(transcription, display=False, stream=True):
                # await message.channel.send(chunk)
                if "message" in chunk:
                    response.append(chunk["message"])
            await ctx.message.channel.send(" ".join(response))


@client.command()
async def stop(ctx):
    ctx.voice_client.stop_recording()


@client.event
async def on_ready():
    print(f"We have logged in as {client.user}")


client.run(bot_token)