diff --git a/apps/open-sourcerer/Dockerfile b/apps/open-sourcerer/Dockerfile new file mode 100644 index 00000000..6f52006b --- /dev/null +++ b/apps/open-sourcerer/Dockerfile @@ -0,0 +1,29 @@ +# Use an official Python runtime as a parent image +FROM python:3.10 + +# Set the working directory in the container to /app +WORKDIR /app + +# Add the current directory contents into the container at /app +ADD . /app + +# Install any needed packages specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Clone the Pycord-Development repository and install it +RUN git clone https://github.com/Pycord-Development/pycord && \ + cd pycord && \ + pip install -U . + +# Make port 80 available to the world outside this container +EXPOSE 80 + +ENV ELEVEN_LABS_API_KEY="" \ + OPENAI_API_KEY="" \ + DISCORD_TOKEN="" \ + API_KEY="" \ + API_BASE="" \ + SYSTEM_MESSAGE="" + +# Run DiscordInterpreter.py when the container launches +CMD ["python", "main.py"] diff --git a/apps/open-sourcerer/docker-compose.yaml b/apps/open-sourcerer/docker-compose.yaml new file mode 100644 index 00000000..b1552617 --- /dev/null +++ b/apps/open-sourcerer/docker-compose.yaml @@ -0,0 +1,6 @@ +version: '3' +services: + my-python-app: + build: . + ports: + - "80:80" diff --git a/apps/open-sourcerer/main.py b/apps/open-sourcerer/main.py new file mode 100644 index 00000000..3c84f84a --- /dev/null +++ b/apps/open-sourcerer/main.py @@ -0,0 +1,107 @@ +import os +import discord +from discord.ext import commands +import interpreter +import dotenv +from voice import transcribe + +dotenv.load_dotenv(".env") + +bot_token = os.getenv("DISCORD_TOKEN") + +interpreter.api_key = os.getenv("API_KEY") +interpreter.api_base = os.getenv("API_BASE") +# interpreter.auto_run = True + +def split_text(text, chunk_size=1500): + ######################################################################### + return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] + +# discord initial +intents = discord.Intents.all() +intents.message_content = True +client = commands.Bot(command_prefix="$", intents=intents) + +message_chunks = [] +send_image = False + +@client.event +async def on_message(message): + await client.process_commands(message) + if ('<@1158923910855798804>' not in message.content) or (message.author == client.user or message.content[0] == '$'): + return + response = [] + for chunk in interpreter.chat(message.content, display=False, stream=False): + # await message.channel.send(chunk) + if 'message' in chunk: + response.append(chunk['message']) + last_response = response[-1] + + max_message_length = 2000 # Discord's max message length is 2000 characters + # Splitting the message into chunks of 2000 characters + response_chunks = [last_response[i:i + max_message_length] for i in range(0, len(last_response), max_message_length)] + # Sending each chunk as a separate message + for chunk in response_chunks: + await message.channel.send(chunk) + + +@client.command() +async def join(ctx): + if ctx.author.voice: + channel = ctx.message.author.voice.channel + print('joining..') + await channel.connect() + print('joined.') + else: + print("not in a voice channel!") + + +@client.command() +async def leave(ctx): + if ctx.voice_client: + await ctx.voice_client.disconnect() + else: + print("not in a voice channel!") + + +@client.command() +async def listen(ctx): + if ctx.voice_client: + print('trying to listen..') + ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx) + print('listening..') + else: + print("not in a voice channel!") + + +async def callback(sink: discord.sinks, ctx): + print('in callback..') + for user_id, audio in sink.audio_data.items(): + if user_id == ctx.author.id: + print('saving audio..') + audio: discord.sinks.core.AudioData = audio + print(user_id) + filename = "audio.wav" + with open(filename, "wb") as f: + f.write(audio.file.getvalue()) + print('audio saved.') + transcription = transcribe(filename) + print(transcription) + response = [] + for chunk in interpreter.chat(transcription, display=False, stream=True): + # await message.channel.send(chunk) + if 'message' in chunk: + response.append(chunk['message']) + await ctx.message.channel.send(' '.join(response)) + + +@client.command() +async def stop(ctx): + ctx.voice_client.stop_recording() + + +@client.event +async def on_ready(): + print(f"We have logged in as {client.user}") + +client.run(bot_token) diff --git a/apps/open-sourcerer/voice.py b/apps/open-sourcerer/voice.py new file mode 100644 index 00000000..3b42aa40 --- /dev/null +++ b/apps/open-sourcerer/voice.py @@ -0,0 +1,121 @@ +import gradio_client as grc +import interpreter +import time +import gradio as gr +from pydub import AudioSegment +import io +from elevenlabs import generate, play, set_api_key +import whisper +import dotenv + +dotenv.load_dotenv(".env") + +# interpreter.model = "TheBloke/Mistral-7B-OpenOrca-GGUF" +interpreter.auto_run = True +model = whisper.load_model("base") + + +def transcribe(audio): + + # load audio and pad/trim it to fit 30 seconds + audio = whisper.load_audio(audio) + audio = whisper.pad_or_trim(audio) + + # make log-Mel spectrogram and move to the same device as the model + mel = whisper.log_mel_spectrogram(audio).to(model.device) + + # detect the spoken language + _, probs = model.detect_language(mel) + + # decode the audio + options = whisper.DecodingOptions() + result = whisper.decode(model, mel, options) + return result.text + + +set_api_key("ELEVEN_LABS_API_KEY") + + +def get_audio_length(audio_bytes): + # Create a BytesIO object from the byte array + byte_io = io.BytesIO(audio_bytes) + + # Load the audio data with PyDub + audio = AudioSegment.from_mp3(byte_io) + + # Get the length of the audio in milliseconds + length_ms = len(audio) + + # Optionally convert to seconds + length_s = length_ms / 1000.0 + + return length_s + + +def speak(text): + speaking = True + audio = generate( + text=text, + voice="Daniel" + ) + play(audio, notebook=True) + + audio_length = get_audio_length(audio) + time.sleep(audio_length) + +# @title Text-only JARVIS +# @markdown Run this cell for a ChatGPT-like interface. + + +with gr.Blocks() as demo: + chatbot = gr.Chatbot() + msg = gr.Textbox() + + def user(user_message, history): + return "", history + [[user_message, None]] + + def bot(history): + + user_message = history[-1][0] + history[-1][1] = "" + active_block_type = "" + + for chunk in interpreter.chat(user_message, stream=True, display=False): + + # Message + if "message" in chunk: + if active_block_type != "message": + active_block_type = "message" + history[-1][1] += chunk["message"] + yield history + + # Code + if "language" in chunk: + language = chunk["language"] + if "code" in chunk: + if active_block_type != "code": + active_block_type = "code" + history[-1][1] += f"\n```{language}\n" + history[-1][1] += chunk["code"] + yield history + + # Output + if "executing" in chunk: + history[-1][1] += "\n```\n\n```text\n" + yield history + if "output" in chunk: + if chunk["output"] != "KeyboardInterrupt": + history[-1][1] += chunk["output"] + "\n" + yield history + if "end_of_execution" in chunk: + history[-1][1] = history[-1][1].strip() + history[-1][1] += "\n```\n" + yield history + + msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, chatbot, chatbot + ) + +if __name__ == '__main__': + demo.queue() + demo.launch(debug=True)