diff --git a/01OS/01OS/server/i.py b/01OS/01OS/server/i.py index e25ea1b..ab08355 100644 --- a/01OS/01OS/server/i.py +++ b/01OS/01OS/server/i.py @@ -29,8 +29,8 @@ def configure_interpreter(interpreter: OpenInterpreter): ### MISC SETTINGS interpreter.auto_run = True - #interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() == "python"] - interpreter.force_task_completion = True + interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() in ["applescript", "shell", "zsh", "bash", "python"]] + interpreter.force_task_completion = False interpreter.offline = True interpreter.id = 206 # Used to identify itself to other interpreters. This should be changed programatically so it's unique. diff --git a/01OS/01OS/server/server.py b/01OS/01OS/server/server.py index 2c23a78..7cb71b0 100644 --- a/01OS/01OS/server/server.py +++ b/01OS/01OS/server/server.py @@ -200,75 +200,113 @@ async def listener(): json.dump(messages, file, indent=4) accumulated_text = "" - - for chunk in interpreter.chat(messages, stream=True, display=True): - - if any([m["type"] == "image" for m in interpreter.messages]): - interpreter.llm.model = "gpt-4-vision-preview" - logger.debug("Got chunk:", chunk) + force_task_completion_message = """AUTOMATED MESSAGE: Proceed. You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task I asked for is done, say exactly 'The task is done.' If you need some specific information (like username or password) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going.""" + interpreter.messages = [m for m in interpreter.messages if m["content"] != force_task_completion_message] + insert_force_task_completion_message = True - # Send it to the user - await to_device.put(chunk) - # Yield to the event loop, so you actually send it out - await asyncio.sleep(0.01) + while insert_force_task_completion_message == True: - if os.getenv('TTS_RUNNER') == "server": - # Speak full sentences out loud - if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message": - accumulated_text += chunk["content"] - sentences = split_into_sentences(accumulated_text) + for chunk in interpreter.chat(messages, stream=True, display=True): + + if chunk["type"] == "code": + insert_force_task_completion_message = False + + if any([m["type"] == "image" for m in interpreter.messages]): + interpreter.llm.model = "gpt-4-vision-preview" + + logger.debug("Got chunk:", chunk) + + # Send it to the user + await to_device.put(chunk) + # Yield to the event loop, so you actually send it out + await asyncio.sleep(0.01) + + if os.getenv('TTS_RUNNER') == "server": + # Speak full sentences out loud + if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message": + accumulated_text += chunk["content"] + sentences = split_into_sentences(accumulated_text) + + # If we're going to speak, say we're going to stop sending text. + # This should be fixed probably, we should be able to do both in parallel, or only one. + if any(is_full_sentence(sentence) for sentence in sentences): + await to_device.put({"role": "assistant", "type": "message", "end": True}) + + if is_full_sentence(sentences[-1]): + for sentence in sentences: + await stream_tts_to_device(sentence) + accumulated_text = "" + else: + for sentence in sentences[:-1]: + await stream_tts_to_device(sentence) + accumulated_text = sentences[-1] + + # If we're going to speak, say we're going to stop sending text. + # This should be fixed probably, we should be able to do both in parallel, or only one. + if any(is_full_sentence(sentence) for sentence in sentences): + await to_device.put({"role": "assistant", "type": "message", "start": True}) - # If we're going to speak, say we're going to stop sending text. - # This should be fixed probably, we should be able to do both in parallel, or only one. - if any(is_full_sentence(sentence) for sentence in sentences): - await to_device.put({"role": "assistant", "type": "message", "end": True}) + # If we have a new message, save our progress and go back to the top + if not from_user.empty(): + + # Check if it's just an end flag. We ignore those. + temp_message = await from_user.get() - if is_full_sentence(sentences[-1]): - for sentence in sentences: - await stream_tts_to_device(sentence) - accumulated_text = "" + if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"): + # Yup. False alarm. + continue else: - for sentence in sentences[:-1]: - await stream_tts_to_device(sentence) - accumulated_text = sentences[-1] - - # If we're going to speak, say we're going to stop sending text. - # This should be fixed probably, we should be able to do both in parallel, or only one. - if any(is_full_sentence(sentence) for sentence in sentences): - await to_device.put({"role": "assistant", "type": "message", "start": True}) - - # If we have a new message, save our progress and go back to the top - if not from_user.empty(): + # Whoops! Put that back + await from_user.put(temp_message) - # Check if it's just an end flag. We ignore those. - temp_message = await from_user.get() - - if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"): - # Yup. False alarm. - continue - else: - # Whoops! Put that back - await from_user.put(temp_message) + with open(conversation_history_path, 'w') as file: + json.dump(interpreter.messages, file, indent=4) - with open(conversation_history_path, 'w') as file: - json.dump(interpreter.messages, file, indent=4) + # TODO: is triggering seemingly randomly + #logger.info("New user message recieved. Breaking.") + #break - # TODO: is triggering seemingly randomly - #logger.info("New user message recieved. Breaking.") - #break + # Also check if there's any new computer messages + if not from_computer.empty(): + + with open(conversation_history_path, 'w') as file: + json.dump(interpreter.messages, file, indent=4) - # Also check if there's any new computer messages - if not from_computer.empty(): - + logger.info("New computer message recieved. Breaking.") + break + else: with open(conversation_history_path, 'w') as file: json.dump(interpreter.messages, file, indent=4) - logger.info("New computer message recieved. Breaking.") - break - else: - with open(conversation_history_path, 'w') as file: - json.dump(interpreter.messages, file, indent=4) + force_task_completion_responses = [ + "the task is done.", + "the task is impossible.", + "let me know what you'd like to do next.", + "please provide more information.", + ] + + # Did the LLM respond with one of the key messages? + if ( + interpreter.messages + and any( + task_status in interpreter.messages[-1].get("content", "").lower() + for task_status in force_task_completion_responses + ) + ): + insert_force_task_completion_message = False + break + + if insert_force_task_completion_message: + interpreter.messages += [ + { + "role": "user", + "type": "message", + "content": force_task_completion_message, + } + ] + else: + break async def stream_tts_to_device(sentence): force_task_completion_responses = [ diff --git a/01OS/01OS/server/skills/schedule_fixed.py b/01OS/01OS/server/skills/schedule_fixed.py new file mode 100644 index 0000000..0c26a57 --- /dev/null +++ b/01OS/01OS/server/skills/schedule_fixed.py @@ -0,0 +1,5 @@ +import time + +def schedule_fixed(): + """None""" + time.sleep(delay) diff --git a/01OS/01OS/server/system_message.py b/01OS/01OS/server/system_message.py index 2c00d3d..121d59d 100644 --- a/01OS/01OS/server/system_message.py +++ b/01OS/01OS/server/system_message.py @@ -5,6 +5,57 @@ import os system_message = r""" +You are the 01, a SCREENLESS executive assistant that can complete **any** task. +When you execute code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. Execute the code. +You can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again. +You can install new packages. +Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY. +Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go. +Manually summarize text. + +# TASKS + +You should help the user manage their tasks. + +Store the user's tasks in a Python list called `tasks`. + +--- + +The user's current task is: {{ tasks[0] if tasks else "No current tasks." }} + +{{ +if len(tasks) > 1: +print("The next task is: ", tasks[1]) +}} + +--- + +When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is. + +When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them— just try to focus them on each step at a time. + +After starting a task, you should check in with the user around the estimated completion time to see if the task is completed. + +To do this, schedule a reminder based on estimated completion time using the function `schedule(datetime_object, "Your message here.")`, WHICH HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. IT IS AVALIABLE. You'll recieve the message at `datetime_object`. + +You guide the user through the list one task at a time, convincing them to move forward, giving a pep talk if need be. Your job is essentially to answer "what should I (the user) be doing right now?" for every moment of the day. + +# BROWSER + +The Google search result will be returned from this function as a string: `computer.browser.search("query")` + +# CRITICAL NOTES + +Code output, despite being sent to you by the user, **cannot be seen by the user.** You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<< + +ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user **VERY short.** DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT. + +""" + +# OLD SYSTEM MESSAGE + +old_system_message = r""" + You are the 01, an executive assistant that can complete **any** task. When you execute code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. Execute the code. You can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.