You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
136 lines
3.8 KiB
136 lines
3.8 KiB
import os
|
|
import discord
|
|
from discord.ext import commands
|
|
import interpreter
|
|
import dotenv
|
|
import whisper
|
|
|
|
dotenv.load_dotenv(".env")
|
|
|
|
bot_id = os.getenv("BOT_ID")
|
|
bot_token = os.getenv("DISCORD_TOKEN")
|
|
|
|
interpreter.api_key = os.getenv("OPENAI_API_KEY")
|
|
# interpreter.api_base = os.getenv("API_BASE")
|
|
# interpreter.auto_run = True
|
|
|
|
|
|
def split_text(text, chunk_size=1500):
|
|
#########################################################################
|
|
return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
|
|
|
|
|
|
# discord initial
|
|
intents = discord.Intents.all()
|
|
intents.message_content = True
|
|
client = commands.Bot(command_prefix="$", intents=intents)
|
|
|
|
message_chunks = []
|
|
send_image = False
|
|
|
|
model = whisper.load_model("base")
|
|
|
|
|
|
def transcribe(audio):
|
|
# load audio and pad/trim it to fit 30 seconds
|
|
audio = whisper.load_audio(audio)
|
|
audio = whisper.pad_or_trim(audio)
|
|
|
|
# make log-Mel spectrogram and move to the same device as the model
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
|
|
|
# detect the spoken language
|
|
_, probs = model.detect_language(mel)
|
|
|
|
# decode the audio
|
|
options = whisper.DecodingOptions()
|
|
result = whisper.decode(model, mel, options)
|
|
return result.text
|
|
|
|
|
|
@client.event
|
|
async def on_message(message):
|
|
await client.process_commands(message)
|
|
bot_mention = f"<@{bot_id}>"
|
|
# if ("<@1158923910855798804>" in message.content) or (message.author == client.user or message.content[0] == '$'):
|
|
# return
|
|
response = []
|
|
for chunk in interpreter.chat(message.content, display=False, stream=False):
|
|
# await message.channel.send(chunk)
|
|
if "message" in chunk:
|
|
response.append(chunk["message"])
|
|
last_response = response[-1]
|
|
|
|
max_message_length = 2000 # Discord's max message length is 2000 characters
|
|
# Splitting the message into chunks of 2000 characters
|
|
response_chunks = [
|
|
last_response[i : i + max_message_length]
|
|
for i in range(0, len(last_response), max_message_length)
|
|
]
|
|
# Sending each chunk as a separate message
|
|
for chunk in response_chunks:
|
|
await message.channel.send(chunk)
|
|
|
|
|
|
@client.command()
|
|
async def join(ctx):
|
|
if ctx.author.voice:
|
|
channel = ctx.message.author.voice.channel
|
|
print("joining..")
|
|
await channel.connect()
|
|
print("joined.")
|
|
else:
|
|
print("not in a voice channel!")
|
|
|
|
|
|
@client.command()
|
|
async def leave(ctx):
|
|
if ctx.voice_client:
|
|
await ctx.voice_client.disconnect()
|
|
else:
|
|
print("not in a voice channel!")
|
|
|
|
|
|
@client.command()
|
|
async def listen(ctx):
|
|
if ctx.voice_client:
|
|
print("trying to listen..")
|
|
ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx)
|
|
print("listening..")
|
|
else:
|
|
print("not in a voice channel!")
|
|
|
|
|
|
async def callback(sink: discord.sinks, ctx):
|
|
print("in callback..")
|
|
for user_id, audio in sink.audio_data.items():
|
|
if user_id == ctx.author.id:
|
|
print("saving audio..")
|
|
audio: discord.sinks.core.AudioData = audio
|
|
print(user_id)
|
|
filename = "audio.wav"
|
|
with open(filename, "wb") as f:
|
|
f.write(audio.file.getvalue())
|
|
print("audio saved.")
|
|
transcription = transcribe(filename)
|
|
print(transcription)
|
|
response = []
|
|
for chunk in interpreter.chat(transcription, display=False, stream=True):
|
|
# await message.channel.send(chunk)
|
|
if "message" in chunk:
|
|
response.append(chunk["message"])
|
|
await ctx.message.channel.send(" ".join(response))
|
|
|
|
|
|
@client.command()
|
|
async def stop(ctx):
|
|
ctx.voice_client.stop_recording()
|
|
|
|
|
|
@client.event
|
|
async def on_ready():
|
|
print(f"We have logged in as {client.user}")
|
|
|
|
|
|
client.run(bot_token)
|