parent
							
								
									2eb0751b28
								
							
						
					
					
						commit
						1a2452f72e
					
				@ -0,0 +1,6 @@
 | 
				
			||||
ELEVEN_LABS_API_KEY="<your_api_key>"  # https://elevenlabs.io/speech-synthesis
 | 
				
			||||
OPENAI_API_KEY="your_api_key"
 | 
				
			||||
DISCORD_TOKEN="your_discord_token"
 | 
				
			||||
BOT_ID='your_bot_id'
 | 
				
			||||
API_BASE="open_ai_api_base"
 | 
				
			||||
SYSTEM_MESSAGE="syetem_message"
 | 
				
			||||
@ -0,0 +1,22 @@
 | 
				
			||||
# Use an official Python runtime as a parent image
 | 
				
			||||
FROM python:3.10
 | 
				
			||||
 | 
				
			||||
# Set the working directory in the container to /app
 | 
				
			||||
WORKDIR /app
 | 
				
			||||
 | 
				
			||||
# Add the current directory contents into the container at /app
 | 
				
			||||
ADD . /app
 | 
				
			||||
 | 
				
			||||
# Install any needed packages specified in requirements.txt
 | 
				
			||||
RUN pip install --no-cache-dir -r requirements.txt
 | 
				
			||||
 | 
				
			||||
# Clone the Pycord-Development repository and install it
 | 
				
			||||
RUN git clone https://github.com/Pycord-Development/pycord && \
 | 
				
			||||
    cd pycord && \
 | 
				
			||||
    pip install -U .
 | 
				
			||||
 | 
				
			||||
# Make port 80 available to the world outside this container
 | 
				
			||||
EXPOSE 80
 | 
				
			||||
 | 
				
			||||
# Run DiscordInterpreter.py when the container launches
 | 
				
			||||
CMD ["python", "main.py"]
 | 
				
			||||
@ -0,0 +1,10 @@
 | 
				
			||||
version: '3'
 | 
				
			||||
services:
 | 
				
			||||
  your-service:
 | 
				
			||||
    build:
 | 
				
			||||
      context: .
 | 
				
			||||
      dockerfile: Dockerfile
 | 
				
			||||
    env_file:
 | 
				
			||||
      - .env
 | 
				
			||||
    ports:
 | 
				
			||||
      - "80:80"
 | 
				
			||||
@ -0,0 +1,121 @@
 | 
				
			||||
import gradio_client as grc
 | 
				
			||||
import interpreter
 | 
				
			||||
import time
 | 
				
			||||
import gradio as gr
 | 
				
			||||
from pydub import AudioSegment
 | 
				
			||||
import io
 | 
				
			||||
from elevenlabs import generate, play, set_api_key
 | 
				
			||||
import whisper
 | 
				
			||||
import dotenv
 | 
				
			||||
 | 
				
			||||
dotenv.load_dotenv(".env")
 | 
				
			||||
 | 
				
			||||
# interpreter.model = "TheBloke/Mistral-7B-OpenOrca-GGUF"
 | 
				
			||||
interpreter.auto_run = True
 | 
				
			||||
model = whisper.load_model("base")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def transcribe(audio):
 | 
				
			||||
 | 
				
			||||
    # load audio and pad/trim it to fit 30 seconds
 | 
				
			||||
    audio = whisper.load_audio(audio)
 | 
				
			||||
    audio = whisper.pad_or_trim(audio)
 | 
				
			||||
 | 
				
			||||
    # make log-Mel spectrogram and move to the same device as the model
 | 
				
			||||
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
 | 
				
			||||
 | 
				
			||||
    # detect the spoken language
 | 
				
			||||
    _, probs = model.detect_language(mel)
 | 
				
			||||
 | 
				
			||||
    # decode the audio
 | 
				
			||||
    options = whisper.DecodingOptions()
 | 
				
			||||
    result = whisper.decode(model, mel, options)
 | 
				
			||||
    return result.text
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
set_api_key("ELEVEN_LABS_API_KEY")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def get_audio_length(audio_bytes):
 | 
				
			||||
    # Create a BytesIO object from the byte array
 | 
				
			||||
    byte_io = io.BytesIO(audio_bytes)
 | 
				
			||||
 | 
				
			||||
    # Load the audio data with PyDub
 | 
				
			||||
    audio = AudioSegment.from_mp3(byte_io)
 | 
				
			||||
 | 
				
			||||
    # Get the length of the audio in milliseconds
 | 
				
			||||
    length_ms = len(audio)
 | 
				
			||||
 | 
				
			||||
    # Optionally convert to seconds
 | 
				
			||||
    length_s = length_ms / 1000.0
 | 
				
			||||
 | 
				
			||||
    return length_s
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def speak(text):
 | 
				
			||||
    speaking = True
 | 
				
			||||
    audio = generate(
 | 
				
			||||
        text=text,
 | 
				
			||||
        voice="Daniel"
 | 
				
			||||
    )
 | 
				
			||||
    play(audio, notebook=True)
 | 
				
			||||
 | 
				
			||||
    audio_length = get_audio_length(audio)
 | 
				
			||||
    time.sleep(audio_length)
 | 
				
			||||
 | 
				
			||||
# @title Text-only JARVIS
 | 
				
			||||
# @markdown Run this cell for a ChatGPT-like interface.
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
with gr.Blocks() as demo:
 | 
				
			||||
    chatbot = gr.Chatbot()
 | 
				
			||||
    msg = gr.Textbox()
 | 
				
			||||
 | 
				
			||||
    def user(user_message, history):
 | 
				
			||||
        return "", history + [[user_message, None]]
 | 
				
			||||
 | 
				
			||||
    def bot(history):
 | 
				
			||||
 | 
				
			||||
        user_message = history[-1][0]
 | 
				
			||||
        history[-1][1] = ""
 | 
				
			||||
        active_block_type = ""
 | 
				
			||||
 | 
				
			||||
        for chunk in interpreter.chat(user_message, stream=True, display=False):
 | 
				
			||||
 | 
				
			||||
            # Message
 | 
				
			||||
            if "message" in chunk:
 | 
				
			||||
                if active_block_type != "message":
 | 
				
			||||
                    active_block_type = "message"
 | 
				
			||||
                history[-1][1] += chunk["message"]
 | 
				
			||||
                yield history
 | 
				
			||||
 | 
				
			||||
            # Code
 | 
				
			||||
            if "language" in chunk:
 | 
				
			||||
                language = chunk["language"]
 | 
				
			||||
            if "code" in chunk:
 | 
				
			||||
                if active_block_type != "code":
 | 
				
			||||
                    active_block_type = "code"
 | 
				
			||||
                    history[-1][1] += f"\n```{language}\n"
 | 
				
			||||
                history[-1][1] += chunk["code"]
 | 
				
			||||
                yield history
 | 
				
			||||
 | 
				
			||||
            # Output
 | 
				
			||||
            if "executing" in chunk:
 | 
				
			||||
                history[-1][1] += "\n```\n\n```text\n"
 | 
				
			||||
                yield history
 | 
				
			||||
            if "output" in chunk:
 | 
				
			||||
                if chunk["output"] != "KeyboardInterrupt":
 | 
				
			||||
                    history[-1][1] += chunk["output"] + "\n"
 | 
				
			||||
                    yield history
 | 
				
			||||
            if "end_of_execution" in chunk:
 | 
				
			||||
                history[-1][1] = history[-1][1].strip()
 | 
				
			||||
                history[-1][1] += "\n```\n"
 | 
				
			||||
                yield history
 | 
				
			||||
 | 
				
			||||
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
 | 
				
			||||
        bot, chatbot, chatbot
 | 
				
			||||
    )
 | 
				
			||||
 | 
				
			||||
if __name__ == '__main__':
 | 
				
			||||
    demo.queue()
 | 
				
			||||
    demo.launch(debug=True)
 | 
				
			||||
@ -0,0 +1,129 @@
 | 
				
			||||
import os
 | 
				
			||||
import discord
 | 
				
			||||
from discord.ext import commands
 | 
				
			||||
import interpreter
 | 
				
			||||
import dotenv
 | 
				
			||||
import whisper
 | 
				
			||||
from jarvis import transcribe
 | 
				
			||||
 | 
				
			||||
dotenv.load_dotenv(".env")
 | 
				
			||||
 | 
				
			||||
bot_token = os.getenv("DISCORD_TOKEN")
 | 
				
			||||
bot_id = os.getenv("BOT_ID")
 | 
				
			||||
 | 
				
			||||
# interpreter.api_key = os.getenv("API_KEY")
 | 
				
			||||
# interpreter.api_base = os.getenv("API_BASE")
 | 
				
			||||
# interpreter.auto_run = True
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def split_text(text, chunk_size=1500):
 | 
				
			||||
    #########################################################################
 | 
				
			||||
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
# discord initial
 | 
				
			||||
intents = discord.Intents.all()
 | 
				
			||||
intents.message_content = True
 | 
				
			||||
client = commands.Bot(command_prefix="$", intents=intents)
 | 
				
			||||
 | 
				
			||||
message_chunks = []
 | 
				
			||||
send_image = False
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
@client.event
 | 
				
			||||
async def on_message(message):
 | 
				
			||||
    await client.process_commands(message)
 | 
				
			||||
    bot_mention = f'<@{bot_id}>'
 | 
				
			||||
    if (bot_mention not in message.content) or (message.author == client.user or message.content[0] == '$'):
 | 
				
			||||
        return
 | 
				
			||||
    response = []
 | 
				
			||||
    for chunk in interpreter.chat(message.content, display=False, stream=False):
 | 
				
			||||
        # await message.channel.send(chunk)
 | 
				
			||||
        if 'message' in chunk:
 | 
				
			||||
            response.append(chunk['message'])
 | 
				
			||||
    last_response = response[-1]
 | 
				
			||||
 | 
				
			||||
    max_message_length = 2000  # Discord's max message length is 2000 characters
 | 
				
			||||
    # Splitting the message into chunks of 2000 characters
 | 
				
			||||
    response_chunks = [last_response[i:i + max_message_length]
 | 
				
			||||
                       for i in range(0, len(last_response), max_message_length)]
 | 
				
			||||
    # Sending each chunk as a separate message
 | 
				
			||||
    for chunk in response_chunks:
 | 
				
			||||
        await message.channel.send(chunk)
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
@client.command()
 | 
				
			||||
async def join(ctx):
 | 
				
			||||
    if ctx.author.voice:
 | 
				
			||||
        channel = ctx.message.author.voice.channel
 | 
				
			||||
        print('joining..')
 | 
				
			||||
        await channel.connect()
 | 
				
			||||
        print('joined.')
 | 
				
			||||
    else:
 | 
				
			||||
        print("not in a voice channel!")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
@client.command()
 | 
				
			||||
async def leave(ctx):
 | 
				
			||||
    if ctx.voice_client:
 | 
				
			||||
        await ctx.voice_client.disconnect()
 | 
				
			||||
    else:
 | 
				
			||||
        print("not in a voice channel!")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
@client.command()
 | 
				
			||||
async def listen(ctx):
 | 
				
			||||
    if ctx.voice_client:
 | 
				
			||||
        print('trying to listen..')
 | 
				
			||||
        ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx)
 | 
				
			||||
        print('listening..')
 | 
				
			||||
    else:
 | 
				
			||||
        print("not in a voice channel!")
 | 
				
			||||
 | 
				
			||||
def transcribe(audio):
 | 
				
			||||
 | 
				
			||||
    # load audio and pad/trim it to fit 30 seconds
 | 
				
			||||
    audio = whisper.load_audio(audio)
 | 
				
			||||
    audio = whisper.pad_or_trim(audio)
 | 
				
			||||
 | 
				
			||||
    # make log-Mel spectrogram and move to the same device as the model
 | 
				
			||||
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
 | 
				
			||||
 | 
				
			||||
    # detect the spoken language
 | 
				
			||||
    _, probs = model.detect_language(mel)
 | 
				
			||||
 | 
				
			||||
    # decode the audio
 | 
				
			||||
    options = whisper.DecodingOptions()
 | 
				
			||||
    result = whisper.decode(model, mel, options)
 | 
				
			||||
    return result.text
 | 
				
			||||
 | 
				
			||||
async def callback(sink: discord.sinks, ctx):
 | 
				
			||||
    print('in callback..')
 | 
				
			||||
    for user_id, audio in sink.audio_data.items():
 | 
				
			||||
        if user_id == ctx.author.id:
 | 
				
			||||
            print('saving audio..')
 | 
				
			||||
            audio: discord.sinks.core.AudioData = audio
 | 
				
			||||
            print(user_id)
 | 
				
			||||
            filename = "audio.wav"
 | 
				
			||||
            with open(filename, "wb") as f:
 | 
				
			||||
                f.write(audio.file.getvalue())
 | 
				
			||||
            print('audio saved.')
 | 
				
			||||
            transcription = transcribe(filename)
 | 
				
			||||
            print(transcription)
 | 
				
			||||
            response = []
 | 
				
			||||
            for chunk in interpreter.chat(transcription, display=False, stream=True):
 | 
				
			||||
                # await message.channel.send(chunk)
 | 
				
			||||
                if 'message' in chunk:
 | 
				
			||||
                    response.append(chunk['message'])
 | 
				
			||||
            await ctx.message.channel.send(' '.join(response))
 | 
				
			||||
 | 
				
			||||
@client.command()
 | 
				
			||||
async def stop(ctx):
 | 
				
			||||
    ctx.voice_client.stop_recording()
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
@client.event
 | 
				
			||||
async def on_ready():
 | 
				
			||||
    print(f"We have logged in as {client.user}")
 | 
				
			||||
 | 
				
			||||
client.run(bot_token)
 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue