feat: Add open-sourcerer

Former-commit-id: 6b9b419273
2 years ago · 3133a0d4fc
parent a39e527b32
commit 3133a0d4fc
4 changed files with 263 additions and 0 deletions
--- a/apps/open-sourcerer/Dockerfile
+++ b/apps/open-sourcerer/Dockerfile
@ -0,0 +1,29 @@
 # Use an official Python runtime as a parent image
 FROM python:3.10
 # Set the working directory in the container to /app
 WORKDIR /app
 # Add the current directory contents into the container at /app
 ADD . /app
 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Clone the Pycord-Development repository and install it
 RUN git clone https://github.com/Pycord-Development/pycord && \
    cd pycord && \
    pip install -U .
 # Make port 80 available to the world outside this container
 EXPOSE 80
 ENV ELEVEN_LABS_API_KEY="" \
    OPENAI_API_KEY="" \
    DISCORD_TOKEN="" \
    API_KEY="" \
    API_BASE="" \
    SYSTEM_MESSAGE=""
 # Run DiscordInterpreter.py when the container launches
 CMD ["python", "main.py"]
--- a/apps/open-sourcerer/docker-compose.yaml
+++ b/apps/open-sourcerer/docker-compose.yaml
@ -0,0 +1,6 @@
 version: '3'
 services:
  my-python-app:
    build: .
    ports:
      - "80:80"
--- a/apps/open-sourcerer/main.py
+++ b/apps/open-sourcerer/main.py
@ -0,0 +1,107 @@
 import os
 import discord
 from discord.ext import commands
 import interpreter
 import dotenv
 from voice import transcribe
 dotenv.load_dotenv(".env")
 bot_token = os.getenv("DISCORD_TOKEN")
 interpreter.api_key = os.getenv("API_KEY")
 interpreter.api_base = os.getenv("API_BASE")
 # interpreter.auto_run = True
 def split_text(text, chunk_size=1500):
    #########################################################################
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
 # discord initial
 intents = discord.Intents.all()
 intents.message_content = True
 client = commands.Bot(command_prefix="$", intents=intents)
 message_chunks = []
 send_image = False
@client.event
 async def on_message(message):
    await client.process_commands(message)
    if ('<@1158923910855798804>' not in message.content) or (message.author == client.user or message.content[0] == '$'):
        return
    response = []
    for chunk in interpreter.chat(message.content, display=False, stream=False):
        # await message.channel.send(chunk)
        if 'message' in chunk:
            response.append(chunk['message'])
    last_response = response[-1]
    max_message_length = 2000  # Discord's max message length is 2000 characters
    # Splitting the message into chunks of 2000 characters
    response_chunks = [last_response[i:i + max_message_length] for i in range(0, len(last_response), max_message_length)]
    # Sending each chunk as a separate message
    for chunk in response_chunks:
        await message.channel.send(chunk)
@client.command()
 async def join(ctx):
    if ctx.author.voice:
        channel = ctx.message.author.voice.channel
        print('joining..')
        await channel.connect()
        print('joined.')
    else:
        print("not in a voice channel!")
@client.command()
 async def leave(ctx):
    if ctx.voice_client:
        await ctx.voice_client.disconnect()
    else:
        print("not in a voice channel!")
@client.command()
 async def listen(ctx):
    if ctx.voice_client:
        print('trying to listen..')
        ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx)
        print('listening..')
    else:
        print("not in a voice channel!")
 async def callback(sink: discord.sinks, ctx):
    print('in callback..')
    for user_id, audio in sink.audio_data.items():
        if user_id == ctx.author.id:
            print('saving audio..')
            audio: discord.sinks.core.AudioData = audio
            print(user_id)
            filename = "audio.wav"
            with open(filename, "wb") as f:
                f.write(audio.file.getvalue())
            print('audio saved.')
            transcription = transcribe(filename)
            print(transcription)
            response = []
            for chunk in interpreter.chat(transcription, display=False, stream=True):
                # await message.channel.send(chunk)
                if 'message' in chunk:
                    response.append(chunk['message'])
            await ctx.message.channel.send(' '.join(response))
@client.command()
 async def stop(ctx):
    ctx.voice_client.stop_recording()
@client.event
 async def on_ready():
    print(f"We have logged in as {client.user}")
 client.run(bot_token)
--- a/apps/open-sourcerer/voice.py
+++ b/apps/open-sourcerer/voice.py
@ -0,0 +1,121 @@
 import gradio_client as grc
 import interpreter
 import time
 import gradio as gr
 from pydub import AudioSegment
 import io
 from elevenlabs import generate, play, set_api_key
 import whisper
 import dotenv
 dotenv.load_dotenv(".env")
 # interpreter.model = "TheBloke/Mistral-7B-OpenOrca-GGUF"
 interpreter.auto_run = True
 model = whisper.load_model("base")
 def transcribe(audio):
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)
    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    # detect the spoken language
    _, probs = model.detect_language(mel)
    # decode the audio
    options = whisper.DecodingOptions()
    result = whisper.decode(model, mel, options)
    return result.text
 set_api_key("ELEVEN_LABS_API_KEY")
 def get_audio_length(audio_bytes):
    # Create a BytesIO object from the byte array
    byte_io = io.BytesIO(audio_bytes)
    # Load the audio data with PyDub
    audio = AudioSegment.from_mp3(byte_io)
    # Get the length of the audio in milliseconds
    length_ms = len(audio)
    # Optionally convert to seconds
    length_s = length_ms / 1000.0
    return length_s
 def speak(text):
    speaking = True
    audio = generate(
        text=text,
        voice="Daniel"
    )
    play(audio, notebook=True)
    audio_length = get_audio_length(audio)
    time.sleep(audio_length)
 # @title Text-only JARVIS
 # @markdown Run this cell for a ChatGPT-like interface.
 with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    def user(user_message, history):
        return "", history + [[user_message, None]]
    def bot(history):
        user_message = history[-1][0]
        history[-1][1] = ""
        active_block_type = ""
        for chunk in interpreter.chat(user_message, stream=True, display=False):
            # Message
            if "message" in chunk:
                if active_block_type != "message":
                    active_block_type = "message"
                history[-1][1] += chunk["message"]
                yield history
            # Code
            if "language" in chunk:
                language = chunk["language"]
            if "code" in chunk:
                if active_block_type != "code":
                    active_block_type = "code"
                    history[-1][1] += f"\n```{language}\n"
                history[-1][1] += chunk["code"]
                yield history
            # Output
            if "executing" in chunk:
                history[-1][1] += "\n```\n\n```text\n"
                yield history
            if "output" in chunk:
                if chunk["output"] != "KeyboardInterrupt":
                    history[-1][1] += chunk["output"] + "\n"
                    yield history
            if "end_of_execution" in chunk:
                history[-1][1] = history[-1][1].strip()
                history[-1][1] += "\n```\n"
                yield history
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
 if __name__ == '__main__':
    demo.queue()
    demo.launch(debug=True)