[REACT Prompt] [FEAT][BaseStructure Tests] [MM-VCOT Agent]

2 years ago · b1d3aa54a8
parent efd2b9b22f
commit b1d3aa54a8
10 changed files with 191 additions and 23 deletions
--- a/playground/demos/multi_modal_chain_of_thought/eyetest.jpg
+++ b/playground/demos/multi_modal_chain_of_thought/eyetest.jpg
--- a/playground/demos/multi_modal_chain_of_thought/vcot.py
+++ b/playground/demos/multi_modal_chain_of_thought/vcot.py
@ -0,0 +1,35 @@
+import os
+
+from dotenv import load_dotenv
+
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarms.prompts.visual_cot import VISUAL_CHAIN_OF_THOUGHT
+from swarms.structs import Agent
+
+# Load the environment variables
+load_dotenv()
+
+# Get the API key from the environment
+api_key = os.environ.get("OPENAI_API_KEY")
+
+# Initialize the language model
+llm = GPT4VisionAPI(
+    openai_api_key=api_key,
+    max_tokens=500,
+)
+
+# Initialize the task
+task = "This is an eye test. What do you see?"
+img = "playground/demos/multi_modal_chain_of_thought/eyetest.jpg"
+
+## Initialize the workflow
+agent = Agent(
+    llm=llm,
+    max_loops=2,
+    autosave=True,
+    sop=VISUAL_CHAIN_OF_THOUGHT,
+)
+
+# Run the workflow on a task
+out = agent.run(task=task, img=img)
+print(out)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry]
 name = "swarms"
-version = "2.7.8"
+version = "2.7.9"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
@ -41,7 +41,6 @@ datasets = "*"
 optimum = "1.15.0"
 diffusers = "*"
 PyPDF2 = "*"
-vllm = "*"
 accelerate = "*"
 sentencepiece = "*"
 wget = "*"
--- a/swarms/models/gpt4_vision_api.py
+++ b/swarms/models/gpt4_vision_api.py
@ -161,6 +161,8 @@ class GPT4VisionAPI(BaseMultiModalModel):
                    .get("message", {})
                    .get("content", None)
                )
+                if self.streaming_enabled:
+                    content = self.stream_response(content)
                return content
            else:
                print("No valid response in 'choices'")
--- a/swarms/prompts/react.py
+++ b/swarms/prompts/react.py
@ -0,0 +1,59 @@
+
+def react_prompt(task: str = None):
+    REACT = f"""
+        Task Description:
+        Accomplish the following {task} using the reasoning guidelines below.
+
+
+        ######### REASONING GUIDELINES #########
+        You're an autonomous agent that has been tasked with {task}. You have been given a set of guidelines to follow to accomplish this task. You must follow the guidelines exactly.
+        
+        Step 1: Observation
+
+        Begin by carefully observing the situation or problem at hand. Describe what you see, identify key elements, and note any relevant details.
+
+        Use <observation>...</observation> tokens to encapsulate your observations.
+
+        Example:
+        <observation> [Describe your initial observations of the task or problem here.] </observation>
+
+        Step 2: Thought Process
+
+        Analyze the observations. Consider different angles, potential challenges, and any underlying patterns or connections.
+
+        Think about possible solutions or approaches to address the task.
+
+        Use <thought>...</thought> tokens to encapsulate your thinking process.
+
+        Example:
+        <thought> [Explain your analysis of the observations, your reasoning behind potential solutions, and any assumptions or considerations you are making.] </thought>
+
+        Step 3: Action Planning
+
+        Based on your thoughts and analysis, plan a series of actions to solve the problem or complete the task.
+
+        Detail the steps you intend to take, resources you will use, and how these actions will address the key elements identified in your observations.
+
+        Use <action>...</action> tokens to encapsulate your action plan.
+
+        Example:
+        <action> [List the specific actions you plan to take, including any steps to gather more information or implement a solution.] </action>
+
+        Step 4: Execute and Reflect
+
+        Implement your action plan. As you proceed, continue to observe and think, adjusting your actions as needed.
+
+        Reflect on the effectiveness of your actions and the outcome. Consider what worked well and what could be improved.
+
+        Use <observation>...</observation>, <thought>...</thought>, and <action>...</action> tokens as needed to describe this ongoing process.
+
+        Example:
+        <observation> [New observations during action implementation.] </observation>
+        <thought> [Thoughts on how the actions are affecting the situation, adjustments needed, etc.] </thought>
+        <action> [Adjusted or continued actions to complete the task.] </action>
+
+        Guidance:
+        Remember, your goal is to provide a transparent and logical process that leads from observation to effective action. Your responses should demonstrate clear thinking, an understanding of the problem, and a rational approach to solving it. The use of tokens helps to structure your response and clarify the different stages of your reasoning and action.
+
+    """
+    return REACT
--- a/swarms/prompts/visual_cot.py
+++ b/swarms/prompts/visual_cot.py
@ -0,0 +1,36 @@
+VISUAL_CHAIN_OF_THOUGHT = """
+    
+You, as the model, are presented with a visual problem. This could be an image containing various elements that you need to analyze, a graph that requires interpretation, or a visual puzzle. Your task is to examine the visual information carefully and describe your process of understanding and solving the problem.
+
+Instructions:
+
+Observation: Begin by describing what you see in the image. Break down the visual elements into understandable segments. For instance, if it's a picture of a street, identify the key components like cars, buildings, people, street signs, etc. If it's a graph, start by outlining its type, the axes, and the data it presents.
+
+Initial Analysis: Based on your observation, start analyzing the image. If it's a scene, narrate the possible context or the story the image might be telling. If it's a graph or data, begin to interpret what the data might indicate. This step is about forming hypotheses or interpretations based on visual cues.
+
+Detailed Reasoning: Delve deeper into your analysis. This is where the chain of thought becomes critical. If you're looking at a scene, consider the relationships between elements. Why might that person be running? What does the traffic signal indicate? For graphs or data-driven images, analyze trends, outliers, and correlations. Explain your thought process in a step-by-step manner.
+
+Visual References: As you explain, make visual references. Draw arrows, circles, or use highlights in the image to pinpoint exactly what you're discussing. These annotations should accompany your verbal reasoning, adding clarity to your explanations.
+
+Conclusion or Solution: Based on your detailed reasoning, draw a conclusion or propose a solution. If it's a visual puzzle or problem, present your answer clearly, backed by the reasoning you've just outlined. If it’s an open-ended image, summarize your understanding of the scene or the data.
+
+Reflection: Finally, reflect on your thought process. Was there anything particularly challenging or ambiguous? How confident are you in your interpretation or solution, and why? This step is about self-assessment and providing insight into your reasoning confidence.
+
+Example:
+
+Let’s say the image is a complex graph showing climate change data over the last century.
+
+Observation: "The graph is a line graph with time on the x-axis and average global temperature on the y-axis. There are peaks and troughs, but a general upward trend is visible."
+
+Initial Analysis: "The immediate observation is that average temperatures have risen over the last century. There are fluctuations, but the overall direction is upward."
+
+Detailed Reasoning: "Looking closer, the steepest increase appears post-1950. This aligns with industrial advancements globally, suggesting a link between human activity and rising temperatures. The short-term fluctuations could be due to natural climate cycles, but the long-term trend indicates a more worrying, human-induced climate change pattern."
+
+Visual References: "Here [draws arrow], the graph shows a sharp rise. The annotations indicate major industrial events, aligning with these spikes."
+
+Conclusion or Solution: "The data strongly suggests a correlation between industrialization and global warming. The upward trend, especially in recent decades, indicates accelerating temperature increases."
+
+Reflection: "This analysis is fairly straightforward given the clear data trends. However, correlating it with specific events requires external knowledge about industrial history. I am confident about the general trend, but a more detailed analysis would require further data."    
+    
+    
+"""
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -163,7 +163,7 @@ class Agent:
        id: str = agent_id,
        llm: Any = None,
        template: Optional[str] = None,
-        max_loops=5,
+        max_loops: int = 1,
        stopping_condition: Optional[Callable[[str], bool]] = None,
        loop_interval: int = 1,
        retry_attempts: int = 3,
@ -194,6 +194,7 @@ class Agent:
        preset_stopping_token: Optional[bool] = False,
        traceback: Any = None,
        traceback_handlers: Any = None,
+        streaming_on: Optional[bool] = False,
        *args,
        **kwargs: Any,
    ):
@ -236,6 +237,7 @@ class Agent:
        self.preset_stopping_token = preset_stopping_token
        self.traceback = traceback
        self.traceback_handlers = traceback_handlers
+        self.streaming_on = streaming_on

        # self.system_prompt = AGENT_SYSTEM_PROMPT_3

@ -489,7 +491,6 @@ class Agent:
                    Interactive: {self.interactive}
                    Dashboard: {self.dashboard}
                    Dynamic Temperature: {self.dynamic_temperature_enabled}
-                    Temperature: {self.llm.model_kwargs.get('temperature')}
                    Autosave: {self.autosave}
                    Saved State: {self.saved_state_path}
                    Model Configuration: {model_config}
@ -547,6 +548,15 @@ class Agent:
        print(colored(f"\nLoop {loop_count} of {max_loops}", "cyan"))
        print("\n")

+    def streaming(self, content: str = None):
+        """prints each chunk of content as it is generated
+
+        Args:
+            content (str, optional): _description_. Defaults to None.
+        """
+        for chunk in content:
+            print(chunk, end="")
+
    def _history(self, user_name: str, task: str) -> str:
        """Generate the history for the history prompt

@ -720,7 +730,11 @@ class Agent:
            raise

    def _run(self, **kwargs: Any) -> str:
-        """Generate a result using the provided keyword args."""
+        """Run the agent on a task
+
+        Returns:
+            str: _description_
+        """
        try:
            task = self.format_prompt(**kwargs)
            response, history = self._generate(task, task)
--- a/swarms/structs/base.py
+++ b/swarms/structs/base.py
@ -15,7 +15,6 @@ except ImportError as error:


 class BaseStructure(ABC):
-    
    def __init__(
        self,
        name: Optional[str] = None,
--- a/swarms/swarms/base.py
+++ b/swarms/swarms/base.py
@ -11,14 +11,10 @@ Paid
 """

 import asyncio
-import concurrent.futures
-import logging
-import time
-from abc import ABC, abstractmethod
+from abc import ABC
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Callable, Dict, List, Optional

-from termcolor import colored

 from swarms.structs.agent import Agent

--- a/tests/swarms/test_base.py
+++ b/tests/swarms/test_base.py
@ -86,7 +86,11 @@ class TestBaseStructure:
        with open(log_file, "r") as file:
            lines = file.readlines()
            assert len(lines) == 1
-            assert lines[0] == f"[{base_structure._current_timestamp()}] [{event_type}] {event}\n"
+            assert (
+                lines[0]
+                == f"[{base_structure._current_timestamp()}]"
+                f" [{event_type}] {event}\n"
+            )

    @pytest.mark.asyncio
    async def test_run_async(self):
@ -130,7 +134,9 @@ class TestBaseStructure:

        artifact = {"key": "value"}
        artifact_name = "test_artifact"
-        await base_structure.save_artifact_async(artifact, artifact_name)
+        await base_structure.save_artifact_async(
+            artifact, artifact_name
+        )
        loaded_artifact = base_structure.load_artifact(artifact_name)

        assert loaded_artifact == artifact
@ -143,7 +149,9 @@ class TestBaseStructure:
        artifact = {"key": "value"}
        artifact_name = "test_artifact"
        base_structure.save_artifact(artifact, artifact_name)
-        loaded_artifact = await base_structure.load_artifact_async(artifact_name)
+        loaded_artifact = await base_structure.load_artifact_async(
+            artifact_name
+        )

        assert loaded_artifact == artifact

@ -160,7 +168,11 @@ class TestBaseStructure:
        with open(log_file, "r") as file:
            lines = file.readlines()
            assert len(lines) == 1
-            assert lines[0] == f"[{base_structure._current_timestamp()}] [{event_type}] {event}\n"
+            assert (
+                lines[0]
+                == f"[{base_structure._current_timestamp()}]"
+                f" [{event_type}] {event}\n"
+            )

    @pytest.mark.asyncio
    async def test_asave_to_file(self, tmpdir):
@ -187,14 +199,18 @@ class TestBaseStructure:

    def test_run_in_thread(self):
        base_structure = BaseStructure()
-        result = base_structure.run_in_thread(lambda: "Thread Test Result")
+        result = base_structure.run_in_thread(
+            lambda: "Thread Test Result"
+        )
        assert result.result() == "Thread Test Result"

    def test_save_and_decompress_data(self):
        base_structure = BaseStructure()
        data = {"key": "value"}
        compressed_data = base_structure.compress_data(data)
-        decompressed_data = base_structure.decompres_data(compressed_data)
+        decompressed_data = base_structure.decompres_data(
+            compressed_data
+        )
        assert decompressed_data == data

    def test_run_batched(self):
@ -204,9 +220,13 @@ class TestBaseStructure:
            return f"Processed {data}"

        batched_data = list(range(10))
-        result = base_structure.run_batched(batched_data, batch_size=5, func=run_function)
+        result = base_structure.run_batched(
+            batched_data, batch_size=5, func=run_function
+        )

-        expected_result = [f"Processed {data}" for data in batched_data]
+        expected_result = [
+            f"Processed {data}" for data in batched_data
+        ]
        assert result == expected_result

    def test_load_config(self, tmpdir):
@ -224,11 +244,15 @@ class TestBaseStructure:
        tmp_dir = tmpdir.mkdir("test_dir")
        base_structure = BaseStructure()
        data_to_backup = {"key": "value"}
-        base_structure.backup_data(data_to_backup, backup_path=tmp_dir)
+        base_structure.backup_data(
+            data_to_backup, backup_path=tmp_dir
+        )
        backup_files = os.listdir(tmp_dir)

        assert len(backup_files) == 1
-        loaded_data = base_structure.load_from_file(os.path.join(tmp_dir, backup_files[0]))
+        loaded_data = base_structure.load_from_file(
+            os.path.join(tmp_dir, backup_files[0])
+        )
        assert loaded_data == data_to_backup

    def test_monitor_resources(self):
@ -253,7 +277,11 @@ class TestBaseStructure:
            return f"Processed {data}"

        batched_data = list(range(10))
-        result = base_structure.run_with_resources_batched(batched_data, batch_size=5, func=run_function)
+        result = base_structure.run_with_resources_batched(
+            batched_data, batch_size=5, func=run_function
+        )

-        expected_result = [f"Processed {data}" for data in batched_data]
+        expected_result = [
+            f"Processed {data}" for data in batched_data
+        ]
        assert result == expected_result