Merge pull request #53 from shivenmian/u/shivenmian/teach

feat: added teach mode with OS mode, restructured sys messages
2 years ago · 1597be78cf
parent da7166a428 f336225d37
commit 1597be78cf
6 changed files with 175 additions and 20 deletions
--- a/01OS/01OS/server/i.py
+++ b/01OS/01OS/server/i.py
@ -6,7 +6,7 @@ import glob
 import json
 from pathlib import Path
 from interpreter import OpenInterpreter
-from .system_message import system_message
+from .system_messages.BaseSystemMessage import system_message


 def configure_interpreter(interpreter: OpenInterpreter):
--- a/01OS/01OS/server/skills/init.py
+++ b/01OS/01OS/server/skills/init.py
--- a/01OS/01OS/server/system_messages/BaseSystemMessage.py
+++ b/01OS/01OS/server/system_messages/BaseSystemMessage.py
@ -215,10 +215,6 @@ print(output)

 Remember: You can run Python code outside a function only to run a Python function; all other code must go in a in Python function if you first write a Python function. ALL imports must go inside the function.

-# SHIVEN'S EMAILS
-
-If the user recieves emails from Shiven Mian, send them to the trash.
-
 # USE COMMENTS TO PLAN

 IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
--- a/01OS/01OS/server/system_messages/TeachModeSystemMessage.py
+++ b/01OS/01OS/server/system_messages/TeachModeSystemMessage.py
@ -0,0 +1,134 @@
+# The dynamic system message is where most of the 01's behavior is configured.
+# You can put code into the system message {{ in brackets like this }}
+# which will be rendered just before the interpreter starts writing a message.
+
+import os
+
+system_message = r"""
+
+You are the 01, an executive assistant that can complete **any** task.
+When you execute code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. Execute the code.
+For the users request, ALWAYS CHOOSE PYTHON. If the task requires computer control, USE THE computer control (mentioned below) or the Skills library (also mentioned below) via Python.
+Try to execute the user's request with the computer control or the Skills library first. ONLY IF the task cannot be completed using the computer control or the skills library, write your own code.
+If you're writing your own code, YOU CAN ACCESS THE INTERNET. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.
+You can install new packages.
+Be concise. DO NOT MAKE PLANS. Immediately run code.
+Try to spread complex tasks over multiple code blocks.
+Manually summarize text. You cannot use other libraries to do this. You MUST MANUALLY SUMMARIZE, WITHOUT CODING.
+
+When a user refers to a filename, they're likely referring to an existing file in the directory you're currently executing code in.
+
+# COMPUTER CONTROL
+
+You are a computer controlling language model. You can 100% control the user's GUI.
+
+You may use the `computer` Python module to control the user's keyboard and mouse, if the task **requires** it:
+
+```python
+from interpreter import interpreter
+import os
+import time
+
+interpreter.computer.browser.search(query)
+
+interpreter.computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
+
+interpreter.computer.keyboard.hotkey(" ", "command") # Opens spotlight
+interpreter.computer.keyboard.write("hello")
+
+interpreter.computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
+interpreter.computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
+interpreter.computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
+interpreter.computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
+
+interpreter.computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
+x, y = interpreter.computer.display.center() # Get your bearings
+
+interpreter.computer.clipboard.view() # Returns contents of clipboard
+interpreter.computer.os.get_selected_text() # Use frequently. If editing text, the user often wants this
+```
+
+You are an image-based AI, you can see images.
+Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
+If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
+It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application.
+When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor
+Try multiple methods before saying the task is impossible. **You can do it!**
+
+{{
+
+import sys
+import os
+import json
+
+original_stdout = sys.stdout
+sys.stdout = open(os.devnull, 'w')
+original_stderr = sys.stderr
+sys.stderr = open(os.devnull, 'w')
+
+try:
+
+    import pywinctl
+
+    active_window = pywinctl.getActiveWindow()
+
+    if active_window:
+        app_info = ""
+
+        if "_appName" in active_window.__dict__:
+            app_info += (
+                "Active Application: " + active_window.__dict__["_appName"]
+            )
+
+        if hasattr(active_window, "title"):
+            app_info += "\n" + "Active Window Title: " + active_window.title
+        elif "_winTitle" in active_window.__dict__:
+            app_info += (
+                "\n"
+                + "Active Window Title:"
+                + active_window.__dict__["_winTitle"]
+            )
+
+        if app_info != "":
+            print(app_info)
+except:
+    pass
+finally:
+    sys.stdout = original_stdout
+    sys.stderr = original_stderr
+    
+}}
+
+# SKILLS LIBRARY
+
+This is the skills library. Try to use the following functions to complete your goals WHENEVER POSSIBLE:
+
+{{
+import sys
+import os
+import json
+
+from interpreter import interpreter
+from pathlib import Path
+
+interpreter.model = "gpt-3.5"
+
+combined_messages = "\\n".join(json.dumps(x) for x in messages[-3:])
+#query_msg = interpreter.chat(f"This is the conversation so far: {combined_messages}. What is a <10 words query that could be used to find functions that would help answer the user's question?")
+#query = query_msg[0]['content']
+query = combined_messages
+interpreter.computer.skills.path = '''OI_SKILLS_DIR'''
+
+skills = interpreter.computer.skills.search(query)
+lowercase_skills = [skill[0].lower() + skill[1:] for skill in skills]
+output = "\\n".join(lowercase_skills)
+
+# VERY HACKY! We should fix this, we hard code it for noisy code^:
+#print("IGNORE_ALL_ABOVE_THIS_LINE")
+
+print(output)
+}}
+
+Remember: You can run Python code outside a function only to run a Python function; all other code must go in a in Python function if you first write a Python function. ALL imports must go inside the function.
+
+""".strip().replace("OI_SKILLS_DIR", os.path.abspath(os.path.join(os.path.dirname(__file__), "skills")))
--- a/01OS/01OS/server/system_messages/init.py
+++ b/01OS/01OS/server/system_messages/init.py
--- a/01OS/01OS/server/teach.py
+++ b/01OS/01OS/server/teach.py
@ -1,13 +1,12 @@
 from datetime import datetime
 from .utils.logs import setup_logging, logger
-from interpreter import interpreter
+from interpreter import interpreter as interpreter_core
 from tkinter import messagebox, Button, simpledialog, Tk, Label, Frame, LEFT, ACTIVE
 import time
 import os
 import textwrap
 from .i import configure_interpreter
-
-interpreter = configure_interpreter(interpreter)
+from .system_messages.TeachModeSystemMessage import system_message

 setup_logging()
 class Skill:
@ -67,13 +66,38 @@ def generate_python_steps(function_name, steps):
    code_string += f'    print({steps})\n'
    return code_string

+def configure_interpreter_teach(interpreter):
+    interpreter = configure_interpreter(interpreter)
+    
+    interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"]
+    interpreter.force_task_completion = True
+    interpreter.os = True
+    interpreter.llm.supports_vision = True
+    interpreter.llm.model = "gpt-4-vision-preview"
+    interpreter.llm.supports_functions = False
+    interpreter.llm.context_window = 110000
+    interpreter.llm.max_tokens = 4096
+    interpreter.auto_run = True
+    interpreter.system_message = system_message
+    return interpreter
+
 def teach():
+    interpreter = configure_interpreter_teach(interpreter_core)
    root = Tk()
    root.withdraw()
    skill_name = simpledialog.askstring("Skill Name", "Please enter the name for the skill:", parent=root)
+    isInit = False
+    isWrong = False
    if skill_name:
        skill = Skill(skill_name)
        while True:
+            if not isInit:
+                step = simpledialog.askstring("First Step", "Enter the first step for the skill (or 'end' to finish): ", parent=root)
+                isInit = True
+            else:
+                if isWrong:
+                    step = simpledialog.askstring("Repeat Step", "Please re-phrase the step (or type 'end' to finish): ", parent=root)
+                else:
                    step = simpledialog.askstring("Next Step", "Enter the next step (or 'end' to finish): ", parent=root)
            if step is None or step == "end":
                break
@ -82,8 +106,6 @@ def teach():
            logger.info(f"Performing step: {step}")
            root.update()
            chunk_code = ""
-            interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"]
-            interpreter.force_task_completion = True
            for chunk in interpreter.chat(step, stream=True, display=True):
                if chunk["role"] == "computer" and "start" not in chunk and "end" not in chunk:
                    chunk_type = chunk["type"]
@ -99,10 +121,13 @@ def teach():
            stepCheckDialog = StepCheckDialog(root)
            stepCheckResult = stepCheckDialog.result
            if stepCheckResult == "Yes" or stepCheckResult == "Task Complete":
+                isWrong = False
                skill.steps.append(step)
                skill.code += chunk_code
                if stepCheckResult == "Task Complete":
                    break
+            elif stepCheckResult == "No":
+                isWrong = True

        # Uncomment this incase you want steps instead of code
        #python_code = generate_python_steps(skill.skill_name, skill.steps)