[DOCS]

2 years ago · 4212fb4aa5
parent 8587191555
commit 4212fb4aa5
3 changed files with 104 additions and 2 deletions
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -104,9 +104,10 @@ nav:
 - Guides:
    - Overview: "examples/index.md"
    - Agents:
-      - Flow: "examples/flow.md"
-      - SequentialWorkflow: "examples/reliable_autonomous_agents.md"
+      - Agent: "examples/flow.md"
      - OmniAgent: "examples/omni_agent.md"
+    - Swarms:
+      - SequentialWorkflow: "examples/reliable_autonomous_agents.md"
    - 2O+ Autonomous Agent Blogs: "examples/ideas.md"
 - Applications:
    - CustomerSupport: 
--- a/multi_modal_auto_agent.py
+++ b/multi_modal_auto_agent.py
@ -15,6 +15,8 @@ agent = Agent(
    llm=llm,
    max_loops="auto",
    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+    autosave=True,
+    dashboard=True,
 )

 agent.run(task=task, img=img)
--- a/swarms/prompts/self_operating_prompt.py
+++ b/swarms/prompts/self_operating_prompt.py
@ -0,0 +1,99 @@
+VISION_PROMPT = """
+You are a Self-Operating Computer. You use the same operating system as a human.
+
+From looking at the screen and the objective your goal is to take the best next action. 
+
+To operate the computer you have the four options below. 
+
+1. CLICK - Move mouse and click
+2. TYPE - Type on the keyboard
+3. SEARCH - Search for a program on Mac and open it
+4. DONE - When you completed the task respond with the exact following phrase content
+
+Here are the response formats below. 
+
+1. CLICK
+Response: CLICK {{ "x": "percent", "y": "percent", "description": "~description here~", "reason": "~reason here~" }} 
+
+2. TYPE
+Response: TYPE "value you want to type"
+
+2. SEARCH
+Response: SEARCH "app you want to search for on Mac"
+
+3. DONE
+Response: DONE
+
+Here are examples of how to respond.
+__
+Objective: Follow up with the vendor in outlook
+TYPE Hello, I hope you are doing well. I wanted to follow up
+__
+Objective: Open Spotify and play the beatles
+SEARCH Spotify
+__
+Objective: Find a image of a banana
+CLICK {{ "x": "50%", "y": "60%", "description": "Click: Google Search field", "reason": "This will allow me to search for a banana" }} 
+__
+Objective: Go buy a book about the history of the internet
+TYPE https://www.amazon.com/
+__
+
+A few important notes: 
+
+- Default to opening Google Chrome with SEARCH to find things that are on the internet. 
+- Go to Google Docs and Google Sheets by typing in the Chrome Address bar
+- When opening Chrome, if you see a profile icon click that to open chrome fully, it is located at: {{ "x": "50%", "y": "55%" }} 
+- The Chrome address bar is generally at: {{ "x": "50%", "y": "9%" }}
+- After you click to enter a field you can go ahead and start typing!
+
+{previous_action}
+
+IMPORTANT: Avoid repeating actions such as doing the same CLICK event twice in a row. 
+
+Objective: {objective}
+"""
+
+USER_QUESTION = "Hello, I can help you with anything. What would you like done?"
+
+SUMMARY_PROMPT = """
+You are a Self-Operating Computer. You just completed a request from a user by operating the computer. Now you need to share the results. 
+
+You have three pieces of key context about the completed request.
+
+1. The original objective
+2. The steps you took to reach the objective that are available in the previous messages
+3. The screenshot you are looking at.
+
+Now you need to summarize what you did to reach the objective. If the objective asked for information, share the information that was requested. IMPORTANT: Don't forget to answer a user's question if they asked one.
+
+Thing to note: The user can not respond to your summary. You are just sharing the results of your work.
+
+The original objective was: {objective}
+
+Now share the results!
+"""
+
+
+def format_summary_prompt(objective):
+    """
+    Format the summary prompt
+    """
+    prompt = SUMMARY_PROMPT.format(objective=objective)
+    return prompt
+
+
+def format_vision_prompt(objective, previous_action):
+    """
+    Format the vision prompt
+    """
+    if previous_action:
+        previous_action = (
+            f"Here was the previous action you took: {previous_action}"
+        )
+    else:
+        previous_action = ""
+    prompt = VISION_PROMPT.format(
+        objective=objective, previous_action=previous_action
+    )
+    return prompt