You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/swarms/agents/omni_modal_agent.py

101 lines
2.9 KiB

from langchain.base_language import BaseLanguageModel
from langchain_experimental.autonomous_agents.hugginggpt.repsonse_generator import (
load_response_generator,
)
from langchain_experimental.autonomous_agents.hugginggpt.task_executor import (
TaskExecutor,
)
from langchain_experimental.autonomous_agents.hugginggpt.task_planner import (
load_chat_planner,
)
from transformers import load_tool
from swarms.structs.agent import Agent
from swarms.utils.loguru_logger import logger
class OmniModalAgent(Agent):
"""
OmniModalAgent
LLM -> Plans -> Tasks -> Tools -> Response
Architecture:
1. LLM: Language Model
2. Chat Planner: Plans
3. Task Executor: Tasks
4. Tools: Tools
Args:
llm (BaseLanguageModel): Language Model
tools (List[BaseTool]): List of tools
Returns:
str: response
Usage:
from swarms import OmniModalAgent, OpenAIChat,
llm = OpenAIChat()
agent = OmniModalAgent(llm)
response = agent.run("Hello, how are you? Create an image of how your are doing!")
"""
def __init__(
self,
llm: BaseLanguageModel,
verbose: bool = False,
*args,
**kwargs,
):
super().__init__(llm=llm, *args, **kwargs)
self.llm = llm
self.verbose = verbose
print("Loading tools...")
self.tools = [
load_tool(tool_name)
for tool_name in [
"document-question-answering",
"image-captioning",
"image-question-answering",
"image-segmentation",
"speech-to-text",
"summarization",
"text-classification",
"text-question-answering",
"translation",
"huggingface-tools/text-to-image",
"huggingface-tools/text-to-video",
"text-to-speech",
"huggingface-tools/text-download",
"huggingface-tools/image-transformation",
]
]
# Load the chat planner and response generator
self.chat_planner = load_chat_planner(llm)
self.response_generator = load_response_generator(llm)
self.task_executor = TaskExecutor
self.history = []
def run(self, task: str) -> str:
"""Run the OmniAgent"""
try:
plan = self.chat_planner.plan(
inputs={
"input": task,
"hf_tools": self.tools,
}
)
self.task_executor = TaskExecutor(plan)
self.task_executor.run()
response = self.response_generator.generate(
{"task_execution": self.task_executor}
)
return response
except Exception as error:
logger.error(f"Error running the agent: {error}")
return f"Error running the agent: {error}"