parent
a39e527b32
commit
3133a0d4fc
@ -0,0 +1,29 @@
|
|||||||
|
# Use an official Python runtime as a parent image
|
||||||
|
FROM python:3.10
|
||||||
|
|
||||||
|
# Set the working directory in the container to /app
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Add the current directory contents into the container at /app
|
||||||
|
ADD . /app
|
||||||
|
|
||||||
|
# Install any needed packages specified in requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Clone the Pycord-Development repository and install it
|
||||||
|
RUN git clone https://github.com/Pycord-Development/pycord && \
|
||||||
|
cd pycord && \
|
||||||
|
pip install -U .
|
||||||
|
|
||||||
|
# Make port 80 available to the world outside this container
|
||||||
|
EXPOSE 80
|
||||||
|
|
||||||
|
ENV ELEVEN_LABS_API_KEY="" \
|
||||||
|
OPENAI_API_KEY="" \
|
||||||
|
DISCORD_TOKEN="" \
|
||||||
|
API_KEY="" \
|
||||||
|
API_BASE="" \
|
||||||
|
SYSTEM_MESSAGE=""
|
||||||
|
|
||||||
|
# Run DiscordInterpreter.py when the container launches
|
||||||
|
CMD ["python", "main.py"]
|
@ -0,0 +1,6 @@
|
|||||||
|
version: '3'
|
||||||
|
services:
|
||||||
|
my-python-app:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
@ -0,0 +1,107 @@
|
|||||||
|
import os
|
||||||
|
import discord
|
||||||
|
from discord.ext import commands
|
||||||
|
import interpreter
|
||||||
|
import dotenv
|
||||||
|
from voice import transcribe
|
||||||
|
|
||||||
|
dotenv.load_dotenv(".env")
|
||||||
|
|
||||||
|
bot_token = os.getenv("DISCORD_TOKEN")
|
||||||
|
|
||||||
|
interpreter.api_key = os.getenv("API_KEY")
|
||||||
|
interpreter.api_base = os.getenv("API_BASE")
|
||||||
|
# interpreter.auto_run = True
|
||||||
|
|
||||||
|
def split_text(text, chunk_size=1500):
|
||||||
|
#########################################################################
|
||||||
|
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
||||||
|
|
||||||
|
# discord initial
|
||||||
|
intents = discord.Intents.all()
|
||||||
|
intents.message_content = True
|
||||||
|
client = commands.Bot(command_prefix="$", intents=intents)
|
||||||
|
|
||||||
|
message_chunks = []
|
||||||
|
send_image = False
|
||||||
|
|
||||||
|
@client.event
|
||||||
|
async def on_message(message):
|
||||||
|
await client.process_commands(message)
|
||||||
|
if ('<@1158923910855798804>' not in message.content) or (message.author == client.user or message.content[0] == '$'):
|
||||||
|
return
|
||||||
|
response = []
|
||||||
|
for chunk in interpreter.chat(message.content, display=False, stream=False):
|
||||||
|
# await message.channel.send(chunk)
|
||||||
|
if 'message' in chunk:
|
||||||
|
response.append(chunk['message'])
|
||||||
|
last_response = response[-1]
|
||||||
|
|
||||||
|
max_message_length = 2000 # Discord's max message length is 2000 characters
|
||||||
|
# Splitting the message into chunks of 2000 characters
|
||||||
|
response_chunks = [last_response[i:i + max_message_length] for i in range(0, len(last_response), max_message_length)]
|
||||||
|
# Sending each chunk as a separate message
|
||||||
|
for chunk in response_chunks:
|
||||||
|
await message.channel.send(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
@client.command()
|
||||||
|
async def join(ctx):
|
||||||
|
if ctx.author.voice:
|
||||||
|
channel = ctx.message.author.voice.channel
|
||||||
|
print('joining..')
|
||||||
|
await channel.connect()
|
||||||
|
print('joined.')
|
||||||
|
else:
|
||||||
|
print("not in a voice channel!")
|
||||||
|
|
||||||
|
|
||||||
|
@client.command()
|
||||||
|
async def leave(ctx):
|
||||||
|
if ctx.voice_client:
|
||||||
|
await ctx.voice_client.disconnect()
|
||||||
|
else:
|
||||||
|
print("not in a voice channel!")
|
||||||
|
|
||||||
|
|
||||||
|
@client.command()
|
||||||
|
async def listen(ctx):
|
||||||
|
if ctx.voice_client:
|
||||||
|
print('trying to listen..')
|
||||||
|
ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx)
|
||||||
|
print('listening..')
|
||||||
|
else:
|
||||||
|
print("not in a voice channel!")
|
||||||
|
|
||||||
|
|
||||||
|
async def callback(sink: discord.sinks, ctx):
|
||||||
|
print('in callback..')
|
||||||
|
for user_id, audio in sink.audio_data.items():
|
||||||
|
if user_id == ctx.author.id:
|
||||||
|
print('saving audio..')
|
||||||
|
audio: discord.sinks.core.AudioData = audio
|
||||||
|
print(user_id)
|
||||||
|
filename = "audio.wav"
|
||||||
|
with open(filename, "wb") as f:
|
||||||
|
f.write(audio.file.getvalue())
|
||||||
|
print('audio saved.')
|
||||||
|
transcription = transcribe(filename)
|
||||||
|
print(transcription)
|
||||||
|
response = []
|
||||||
|
for chunk in interpreter.chat(transcription, display=False, stream=True):
|
||||||
|
# await message.channel.send(chunk)
|
||||||
|
if 'message' in chunk:
|
||||||
|
response.append(chunk['message'])
|
||||||
|
await ctx.message.channel.send(' '.join(response))
|
||||||
|
|
||||||
|
|
||||||
|
@client.command()
|
||||||
|
async def stop(ctx):
|
||||||
|
ctx.voice_client.stop_recording()
|
||||||
|
|
||||||
|
|
||||||
|
@client.event
|
||||||
|
async def on_ready():
|
||||||
|
print(f"We have logged in as {client.user}")
|
||||||
|
|
||||||
|
client.run(bot_token)
|
@ -0,0 +1,121 @@
|
|||||||
|
import gradio_client as grc
|
||||||
|
import interpreter
|
||||||
|
import time
|
||||||
|
import gradio as gr
|
||||||
|
from pydub import AudioSegment
|
||||||
|
import io
|
||||||
|
from elevenlabs import generate, play, set_api_key
|
||||||
|
import whisper
|
||||||
|
import dotenv
|
||||||
|
|
||||||
|
dotenv.load_dotenv(".env")
|
||||||
|
|
||||||
|
# interpreter.model = "TheBloke/Mistral-7B-OpenOrca-GGUF"
|
||||||
|
interpreter.auto_run = True
|
||||||
|
model = whisper.load_model("base")
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe(audio):
|
||||||
|
|
||||||
|
# load audio and pad/trim it to fit 30 seconds
|
||||||
|
audio = whisper.load_audio(audio)
|
||||||
|
audio = whisper.pad_or_trim(audio)
|
||||||
|
|
||||||
|
# make log-Mel spectrogram and move to the same device as the model
|
||||||
|
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
||||||
|
|
||||||
|
# detect the spoken language
|
||||||
|
_, probs = model.detect_language(mel)
|
||||||
|
|
||||||
|
# decode the audio
|
||||||
|
options = whisper.DecodingOptions()
|
||||||
|
result = whisper.decode(model, mel, options)
|
||||||
|
return result.text
|
||||||
|
|
||||||
|
|
||||||
|
set_api_key("ELEVEN_LABS_API_KEY")
|
||||||
|
|
||||||
|
|
||||||
|
def get_audio_length(audio_bytes):
|
||||||
|
# Create a BytesIO object from the byte array
|
||||||
|
byte_io = io.BytesIO(audio_bytes)
|
||||||
|
|
||||||
|
# Load the audio data with PyDub
|
||||||
|
audio = AudioSegment.from_mp3(byte_io)
|
||||||
|
|
||||||
|
# Get the length of the audio in milliseconds
|
||||||
|
length_ms = len(audio)
|
||||||
|
|
||||||
|
# Optionally convert to seconds
|
||||||
|
length_s = length_ms / 1000.0
|
||||||
|
|
||||||
|
return length_s
|
||||||
|
|
||||||
|
|
||||||
|
def speak(text):
|
||||||
|
speaking = True
|
||||||
|
audio = generate(
|
||||||
|
text=text,
|
||||||
|
voice="Daniel"
|
||||||
|
)
|
||||||
|
play(audio, notebook=True)
|
||||||
|
|
||||||
|
audio_length = get_audio_length(audio)
|
||||||
|
time.sleep(audio_length)
|
||||||
|
|
||||||
|
# @title Text-only JARVIS
|
||||||
|
# @markdown Run this cell for a ChatGPT-like interface.
|
||||||
|
|
||||||
|
|
||||||
|
with gr.Blocks() as demo:
|
||||||
|
chatbot = gr.Chatbot()
|
||||||
|
msg = gr.Textbox()
|
||||||
|
|
||||||
|
def user(user_message, history):
|
||||||
|
return "", history + [[user_message, None]]
|
||||||
|
|
||||||
|
def bot(history):
|
||||||
|
|
||||||
|
user_message = history[-1][0]
|
||||||
|
history[-1][1] = ""
|
||||||
|
active_block_type = ""
|
||||||
|
|
||||||
|
for chunk in interpreter.chat(user_message, stream=True, display=False):
|
||||||
|
|
||||||
|
# Message
|
||||||
|
if "message" in chunk:
|
||||||
|
if active_block_type != "message":
|
||||||
|
active_block_type = "message"
|
||||||
|
history[-1][1] += chunk["message"]
|
||||||
|
yield history
|
||||||
|
|
||||||
|
# Code
|
||||||
|
if "language" in chunk:
|
||||||
|
language = chunk["language"]
|
||||||
|
if "code" in chunk:
|
||||||
|
if active_block_type != "code":
|
||||||
|
active_block_type = "code"
|
||||||
|
history[-1][1] += f"\n```{language}\n"
|
||||||
|
history[-1][1] += chunk["code"]
|
||||||
|
yield history
|
||||||
|
|
||||||
|
# Output
|
||||||
|
if "executing" in chunk:
|
||||||
|
history[-1][1] += "\n```\n\n```text\n"
|
||||||
|
yield history
|
||||||
|
if "output" in chunk:
|
||||||
|
if chunk["output"] != "KeyboardInterrupt":
|
||||||
|
history[-1][1] += chunk["output"] + "\n"
|
||||||
|
yield history
|
||||||
|
if "end_of_execution" in chunk:
|
||||||
|
history[-1][1] = history[-1][1].strip()
|
||||||
|
history[-1][1] += "\n```\n"
|
||||||
|
yield history
|
||||||
|
|
||||||
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
||||||
|
bot, chatbot, chatbot
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
demo.queue()
|
||||||
|
demo.launch(debug=True)
|
Loading…
Reference in new issue