diff --git a/README.md b/README.md index e1bc097d..ea2871b0 100644 --- a/README.md +++ b/README.md @@ -57,30 +57,36 @@ print(response) --- ## Usage -- `GodMode` is a simple class that takes in x amount of llms and when given a task runs them all concurrently! -```python - +- `MultiAgentDebate` is a simple class that enables multi agent collaboration. -from swarms.models import Anthropic, GooglePalm, OpenAIChat -from swarms.swarms import GodMode +```python +from swarms import Worker, MultiAgentDebate, select_speaker -claude = Anthropic(anthropic_api_key="") -palm = GooglePalm(google_api_key="") -gpt = OpenAIChat(openai_api_key="") +# Initialize agents +worker1 = Worker(openai_api_key="", ai_name="Optimus Prime") +worker2 = Worker(openai_api_key="", ai_name="Bumblebee") +worker3 = Worker(openai_api_key="", ai_name="Megatron") -# Usage -llms = [ - claude, - palm, - gpt +agents = [ + worker1, + worker2, + worker3 ] -god_mode = GodMode(llms) +# Initialize multi-agent debate with the selection function +debate = MultiAgentDebate(agents, select_speaker) -task = f"What are the biggest risks facing humanity?" +# Run task +task = "What were the winning boston marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times." +results = debate.run(task, max_iters=4) -god_mode.print_responses(task) +# Print results +for result in results: + print(f"Agent {result['agent']} responded: {result['response']}") ``` + +---- + - The `Worker` is an fully feature complete agent with an llm, tools, and a vectorstore for long term memory! ```python @@ -97,6 +103,27 @@ response = node.run(task) print(response) ``` + +------ + +### OmniModal Agent +- OmniModal Agent is an LLM that access to 10+ multi-modal encoders and diffusers! It can generate images, videos, speech, music and so much more, get started with: + +```python +from langchain.llms import OpenAIChat +from swarms.agents import OmniModalAgent + + +llm = OpenAIChat(model_name="gpt-4") + +agent = OmniModalAgent(llm) + +agent.run("Create a video of a swarm of fish") + +``` + +- OmniModal Agent has a ui in the root called `python3 omni_ui.py` + --- # Documentation diff --git a/omni_exa.py b/omni_exa_example.py similarity index 100% rename from omni_exa.py rename to omni_exa_example.py diff --git a/omni_ui.py b/omni_ui.py index 4bc44913..7a843938 100644 --- a/omni_ui.py +++ b/omni_ui.py @@ -1,50 +1,85 @@ -import gradio as gr -from gradio import Interface +#Import required libraries +from gradio import Interface, Textbox, HTML import threading import os -from langchain.llms import OpenAIChat -from swarms.agents import OmniModalAgent +import glob +import base64 +from langchain.llms import OpenAIChat # Replace with your actual class +from swarms.agents import OmniModalAgent # Replace with your actual class -# Initialize the OmniModalAgent -llm = OpenAIChat(model_name="gpt-4") -agent = OmniModalAgent(llm) +#Function to convert image to base64 +def image_to_base64(image_path): + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode() -# Global variable to store chat history +#Function to get the most recently created image in the directory +def get_latest_image(): + list_of_files = glob.glob('./*.png') # Replace with your image file type + if not list_of_files: + return None + latest_file = max(list_of_files, key=os.path.getctime) + return latest_file + +#Initialize your OmniModalAgent +llm = OpenAIChat(model_name="gpt-4") # Replace with your actual initialization +agent = OmniModalAgent(llm) # Replace with your actual initialization + +#Global variable to store chat history chat_history = [] +#Function to update chat def update_chat(user_input): global chat_history chat_history.append({"type": "user", "content": user_input}) - - # Get agent response + + #Get agent response agent_response = agent.run(user_input) + + # Handle the case where agent_response is not in the expected dictionary format + if not isinstance(agent_response, dict): + agent_response = {"type": "text", "content": str(agent_response)} + chat_history.append(agent_response) - + + # Check for the most recently created image and add it to the chat history + latest_image = get_latest_image() + if latest_image: + chat_history.append({"type": "image", "content": latest_image}) + return render_chat(chat_history) +#Function to render chat as HTML + def render_chat(chat_history): - chat_str = '
' + chat_str = "
" for message in chat_history: - timestamp = message.get('timestamp', 'N/A') if message['type'] == 'user': - chat_str += f'
{message["content"]}
{timestamp}
' + chat_str += f"

User: {message['content']}

" elif message['type'] == 'text': - chat_str += f'
{message["content"]}
{timestamp}
' + chat_str += f"

Agent: {message['content']}

" elif message['type'] == 'image': - img_path = os.path.join("root_directory", message['content']) - chat_str += f'
image
{timestamp}
' - chat_str += '
' + img_path = os.path.join(".", message['content']) + base64_img = image_to_base64(img_path) + chat_str += f"

Agent: image

" + chat_str += "
" return chat_str -# Define Gradio interface +#Define Gradio interface iface = Interface( fn=update_chat, - inputs=gr.inputs.Textbox(lines=2, placeholder="Type your message here..."), - outputs=gr.outputs.HTML(label="Chat History"), - live=True, - title="Conversational AI Interface", - description="Chat with our AI agent!", - allow_flagging=False + inputs=Textbox(label="Your Message", type="text"), + outputs=HTML(label="Chat History"), + live=True ) -iface.launch() +#Function to update the chat display +def update_display(): + global chat_history + while True: + iface.update(render_chat(chat_history)) + +#Run the update_display function in a separate thread +threading.Thread(target=update_display).start() + +#Run Gradio interface +iface.launch() \ No newline at end of file diff --git a/omnimodal_agent.py b/omnimodal_agent_example.py similarity index 100% rename from omnimodal_agent.py rename to omnimodal_agent_example.py