diff --git a/swarms/agents/multi_modal_agent.py b/swarms/agents/multi_modal_agent.py index 0d8103e9..022c8f43 100644 --- a/swarms/agents/multi_modal_agent.py +++ b/swarms/agents/multi_modal_agent.py @@ -1,4 +1,5 @@ -from swarms.agents.muti_modal_workers.multi_modal_agent import MultiModalVisualAgent +from swarms.agents.multi_modal_workers.multi_modal_agent import MultiModalVisualAgent +from swarms.agents.message import Message class MultiModalAgent: """ @@ -13,6 +14,19 @@ class MultiModalAgent: default_language (str, optional): Default language for the agent. Defaults to "English". Usage + -------------- + For chats: + ------------ + agent = MultiModalAgent() + agent.chat("Hello") + + ----------- + + Or just with text + ------------ + agent = MultiModalAgent() + agent.run_text("Hello") + """ def __init__( @@ -35,8 +49,14 @@ class MultiModalAgent: temperature ) self.language = language + self.history = [] + - def run_text(self, text, language=None): + def run_text( + self, + text: str = None, + language=None + ): """Run text through the model""" if language is None: @@ -48,7 +68,11 @@ class MultiModalAgent: except Exception as e: return f"Error processing text: {str(e)}" - def run_img(self, image_path: str, language=None): + def run_img( + self, + image_path: str, + language=None + ): """If language is None""" if language is None: language = self.default_language @@ -60,8 +84,90 @@ class MultiModalAgent: ) except Exception as error: return f"Error processing image: {str(error)}" + + def chat( + self, + msg: str = None, + language: str = None, + streaming: bool = False + ): + """ + Run chat with the multi-modal agent + + Args: + msg (str, optional): Message to send to the agent. Defaults to None. + language (str, optional): Language to use. Defaults to None. + streaming (bool, optional): Whether to stream the response. Defaults to False. + + Returns: + str: Response from the agent + + Usage: + -------------- + agent = MultiModalAgent() + agent.chat("Hello") + + """ + if language is None: + language = self.default_language + + #add users message to the history + self.history.append( + Message( + "User", + msg + ) + ) + + #process msg + try: + self.agent.init_agent(language) + response = self.agent.run_text(msg) + + #add agent's response to the history + self.history.append( + Message( + "Agent", + response + ) + ) + + #if streaming is = True + if streaming: + return self._stream_response(response) + else: + response + + except Exception as error: + error_message = f"Error processing message: {str(error)}" + + #add error to history + self.history.append( + Message( + "Agent", + error_message + ) + ) + return error_message + def _stream_response( + self, + response: str = None + ): + """ + Yield the response token by token (word by word) + + Usage: + -------------- + for token in _stream_response(response): + print(token) + + """ + for token in response.split(): + yield token + def clear(self): + """Clear agent's memory""" try: self.agent.clear_memory() except Exception as e: