Merge pull request #1 from benxu3/temp-branch

merge temp-branch into async-interpreter
pull/279/head
ben 7 months ago committed by GitHub
commit 2d6d7f9791
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -65,7 +65,7 @@ poetry run 01 # Runs the 01 Light simulator (hold your spacebar, speak, release)
如果你以 [LMC 格式](https://docs.openinterpreter.com/protocols/lmc-messages) 将原始音频字节流传送到 `/`,你将会以相同的格式收到其回复。 如果你以 [LMC 格式](https://docs.openinterpreter.com/protocols/lmc-messages) 将原始音频字节流传送到 `/`,你将会以相同的格式收到其回复。
受 [Andrej Karpathy's LLM OS](https://twitter.com/karpathy/status/1723140519554105733) 的启发,我们运行了一个 [code-interpreting language model](https://github.com/OpenInterpreter/open-interpreter),并在你的计算机 [ 内核 ](https://github.com/OpenInterpreter/01/blob/main/01OS/01OS/server/utils/kernel.py) 发生某些事件时调用它。 受 [Andrej Karpathy's LLM OS](https://twitter.com/karpathy/status/1723140519554105733) 的启发,我们运行了一个 [code-interpreting language model](https://github.com/OpenInterpreter/open-interpreter),并在你的计算机 [ 内核 ](https://github.com/OpenInterpreter/01/blob/main/software/source/server/utils/kernel.py) 发生某些事件时调用它。
01 将其包装成一个语音界面: 01 将其包装成一个语音界面:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,4 @@
Version 1.3
released 24_06_04
Added cable management clips

@ -167,10 +167,11 @@ class Device:
while True: while True:
try: try:
audio = await self.audiosegments.get() audio = await self.audiosegments.get()
# if self.playback_latency and isinstance(audio, bytes):
# elapsed_time = time.time() - self.playback_latency if self.playback_latency and isinstance(audio, bytes):
# print(f"Time from request to playback: {elapsed_time} seconds") elapsed_time = time.time() - self.playback_latency
# self.playback_latency = None print(f"Time from request to playback: {elapsed_time} seconds")
self.playback_latency = None
if self.tts_service == "elevenlabs": if self.tts_service == "elevenlabs":
mpv_process.stdin.write(audio) # type: ignore mpv_process.stdin.write(audio) # type: ignore

@ -22,9 +22,10 @@ import os
class AsyncInterpreter: class AsyncInterpreter:
def __init__(self, interpreter): def __init__(self, interpreter):
# self.stt_latency = None self.stt_latency = None
# self.tts_latency = None self.tts_latency = None
# self.interpreter_latency = None self.interpreter_latency = None
self.time_from_first_yield_to_first_put = None
self.interpreter = interpreter self.interpreter = interpreter
# STT # STT
@ -125,7 +126,10 @@ class AsyncInterpreter:
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ") # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
# print("yielding ", content) print("yielding ", content)
if self.time_from_first_yield_to_first_put is None:
self.time_from_first_yield_to_first_put = time.time()
yield content yield content
# Handle code blocks # Handle code blocks
@ -156,9 +160,9 @@ class AsyncInterpreter:
) )
# Send a completion signal # Send a completion signal
# end_interpreter = time.time() end_interpreter = time.time()
# self.interpreter_latency = end_interpreter - start_interpreter self.interpreter_latency = end_interpreter - start_interpreter
# print("INTERPRETER LATENCY", self.interpreter_latency) print("INTERPRETER LATENCY", self.interpreter_latency)
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"}) # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
async def run(self): async def run(self):
@ -173,13 +177,13 @@ class AsyncInterpreter:
while not self._input_queue.empty(): while not self._input_queue.empty():
input_queue.append(self._input_queue.get()) input_queue.append(self._input_queue.get())
# start_stt = time.time() start_stt = time.time()
message = self.stt.text() message = self.stt.text()
# end_stt = time.time() end_stt = time.time()
# self.stt_latency = end_stt - start_stt self.stt_latency = end_stt - start_stt
# print("STT LATENCY", self.stt_latency) print("STT LATENCY", self.stt_latency)
# print(message) print(message)
# Feed generate to RealtimeTTS # Feed generate to RealtimeTTS
self.add_to_output_queue_sync( self.add_to_output_queue_sync(
@ -190,7 +194,7 @@ class AsyncInterpreter:
self.tts.feed(text_iterator) self.tts.feed(text_iterator)
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True) self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=False)
while True: while True:
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
@ -204,14 +208,23 @@ class AsyncInterpreter:
"end": True, "end": True,
} }
) )
# end_tts = time.time() end_tts = time.time()
# self.tts_latency = end_tts - self.tts.stream_start_time self.tts_latency = end_tts - self.tts.stream_start_time
# print("TTS LATENCY", self.tts_latency) print("TTS LATENCY", self.tts_latency)
self.tts.stop() self.tts.stop()
break break
async def _on_tts_chunk_async(self, chunk): async def _on_tts_chunk_async(self, chunk):
# print("adding chunk to queue") print("adding chunk to queue")
if (
self.time_from_first_yield_to_first_put is not None
and self.time_from_first_yield_to_first_put != 0
):
print(
"time from first yield to first put is ",
time.time() - self.time_from_first_yield_to_first_put,
)
self.time_from_first_yield_to_first_put = 0
await self._add_to_queue(self._output_queue, chunk) await self._add_to_queue(self._output_queue, chunk)
def on_tts_chunk(self, chunk): def on_tts_chunk(self, chunk):
@ -219,5 +232,5 @@ class AsyncInterpreter:
asyncio.run(self._on_tts_chunk_async(chunk)) asyncio.run(self._on_tts_chunk_async(chunk))
async def output(self): async def output(self):
# print("outputting chunks") print("outputting chunks")
return await self._output_queue.get() return await self._output_queue.get()

@ -1,12 +1,21 @@
# make this obvious
from .profiles.default import interpreter as base_interpreter
# from .profiles.fast import interpreter as base_interpreter
# from .profiles.local import interpreter as base_interpreter
# TODO: remove files i.py, llm.py, conftest?, services
import asyncio import asyncio
import traceback import traceback
import json import json
from fastapi import FastAPI, WebSocket from fastapi import FastAPI, WebSocket
from fastapi.responses import PlainTextResponse from fastapi.responses import PlainTextResponse
from uvicorn import Config, Server from uvicorn import Config, Server
from .i import configure_interpreter
from interpreter import interpreter as base_interpreter
from starlette.websockets import WebSocketDisconnect # from interpreter import interpreter as base_interpreter
from .async_interpreter import AsyncInterpreter from .async_interpreter import AsyncInterpreter
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from typing import List, Dict, Any from typing import List, Dict, Any
@ -17,23 +26,9 @@ os.environ["STT_RUNNER"] = "server"
os.environ["TTS_RUNNER"] = "server" os.environ["TTS_RUNNER"] = "server"
async def main(server_host, server_port, tts_service, asynchronous): async def main(server_host, server_port, tts_service):
if asynchronous:
base_interpreter.system_message = (
"You are a helpful assistant that can answer questions and help with tasks."
)
base_interpreter.computer.import_computer_api = False
base_interpreter.llm.model = "groq/llama3-8b-8192"
base_interpreter.llm.api_key = os.environ["GROQ_API_KEY"]
base_interpreter.llm.supports_functions = False
base_interpreter.auto_run = True
base_interpreter.tts = tts_service base_interpreter.tts = tts_service
interpreter = AsyncInterpreter(base_interpreter) interpreter = AsyncInterpreter(base_interpreter)
else:
configured_interpreter = configure_interpreter(base_interpreter)
configured_interpreter.llm.supports_functions = True
configured_interpreter.tts = tts_service
interpreter = AsyncInterpreter(configured_interpreter)
app = FastAPI() app = FastAPI()

@ -0,0 +1,186 @@
from interpreter import interpreter
# This is an Open Interpreter compatible profile.
# Visit https://01.openinterpreter.com/profile for all options.
# Connect your 01 to a language model
interpreter.llm.model = "gpt-4-turbo"
interpreter.llm.context_window = 100000
interpreter.llm.max_tokens = 4096
# interpreter.llm.api_key = "<your_openai_api_key_here>"
# Give your 01 a voice
interpreter.tts = "openai"
# Tell your 01 where to find and save skills
interpreter.computer.skills.path = "./skills"
# Extra settings
interpreter.computer.import_computer_api = True
interpreter.computer.import_skills = True
interpreter.computer.run("python", "computer") # This will trigger those imports
interpreter.auto_run = True
interpreter.loop = True
interpreter.loop_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
interpreter.loop_breakers = [
"The task is done.",
"The task is impossible.",
"Let me know what you'd like to do next.",
"Please provide more information.",
]
# Set the identity and personality of your 01
interpreter.system_message = """
You are the 01, a screenless executive assistant that can complete any task.
When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task.
Run any code to achieve the goal, and if at first you don't succeed, try again and again.
You can install new packages.
Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY.
Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go.
Manually summarize text.
DON'T TELL THE USER THE METHOD YOU'LL USE, OR MAKE PLANS. ACT LIKE THIS:
---
user: Are there any concerts in Seattle?
assistant: Let me check on that.
```python
computer.browser.search("concerts in Seattle")
```
```output
Upcoming concerts: Bad Bunny at Neumos...
```
It looks like there's a Bad Bunny concert at Neumos...
---
Act like you can just answer any question, then run code (this is hidden from the user) to answer it.
THE USER CANNOT SEE CODE BLOCKS.
Your responses should be very short, no more than 1-2 sentences long.
DO NOT USE MARKDOWN. ONLY WRITE PLAIN TEXT.
# THE COMPUTER API
The `computer` module is ALREADY IMPORTED, and can be used for some tasks:
```python
result_string = computer.browser.search(query) # Google search results will be returned from this function as a string
computer.files.edit(path_to_file, original_text, replacement_text) # Edit a file
computer.calendar.create_event(title="Meeting", start_date=datetime.datetime.now(), end_date=datetime.datetime.now() + datetime.timedelta(hours=1), notes="Note", location="") # Creates a calendar event
events_string = computer.calendar.get_events(start_date=datetime.date.today(), end_date=None) # Get events between dates. If end_date is None, only gets events for start_date
computer.calendar.delete_event(event_title="Meeting", start_date=datetime.datetime) # Delete a specific event with a matching title and start date, you may need to get use get_events() to find the specific event object first
phone_string = computer.contacts.get_phone_number("John Doe")
contact_string = computer.contacts.get_email_address("John Doe")
computer.mail.send("john@email.com", "Meeting Reminder", "Reminder that our meeting is at 3pm today.", ["path/to/attachment.pdf", "path/to/attachment2.pdf"]) # Send an email with a optional attachments
emails_string = computer.mail.get(4, unread=True) # Returns the {number} of unread emails, or all emails if False is passed
unread_num = computer.mail.unread_count() # Returns the number of unread emails
computer.sms.send("555-123-4567", "Hello from the computer!") # Send a text message. MUST be a phone number, so use computer.contacts.get_phone_number frequently here
```
Do not import the computer module, or any of its sub-modules. They are already imported.
DO NOT use the computer module for ALL tasks. Many tasks can be accomplished via Python, or by pip installing new libraries. Be creative!
# GUI CONTROL (RARE)
You are a computer controlling language model. You can control the user's GUI.
You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it:
```python
computer.display.view() # Shows you what's on the screen. **You almost always want to do this first!**
computer.keyboard.hotkey(" ", "command") # Opens spotlight
computer.keyboard.write("hello")
computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
```
You are an image-based AI, you can see images.
Clicking text is the most reliable way to use the mouse for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application. On Macs, ALWAYS use Spotlight to switch applications.
When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor
# SKILLS
Try to use the following special functions (or "skills") to complete your goals whenever possible.
THESE ARE ALREADY IMPORTED. YOU CAN CALL THEM INSTANTLY.
---
{{
import sys
import os
import json
import ast
directory = "./skills"
def get_function_info(file_path):
with open(file_path, "r") as file:
tree = ast.parse(file.read())
functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
for function in functions:
docstring = ast.get_docstring(function)
args = [arg.arg for arg in function.args.args]
print(f"Function Name: {function.name}")
print(f"Arguments: {args}")
print(f"Docstring: {docstring}")
print("---")
files = os.listdir(directory)
for file in files:
if file.endswith(".py"):
file_path = os.path.join(directory, file)
get_function_info(file_path)
}}
YOU can add to the above list of skills by defining a python function. The function will be saved as a skill.
Search all existing skills by running `computer.skills.search(query)`.
**Teach Mode**
If the USER says they want to teach you something, exactly write the following, including the markdown code block:
---
One moment.
```python
computer.skills.new_skill.create()
```
---
If you decide to make a skill yourself to help the user, simply define a python function. `computer.skills.new_skill.create()` is for user-described skills.
# USE COMMENTS TO PLAN
IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
---
User: What is 432/7?
Assistant: Let me think about that.
```python
# Here's the plan:
# 1. Divide the numbers
# 2. Round to 3 digits
print(round(432/7, 3))
```
```output
61.714
```
The answer is 61.714.
---
# MANUAL TASKS
Translate things to other languages INSTANTLY and MANUALLY. Don't ever try to use a translation tool.
Summarize things manually. DO NOT use a summarizer tool.
# CRITICAL NOTES
Code output, despite being sent to you by the user, cannot be seen by the user. You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<<
ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user VERY short. DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT.
Try multiple methods before saying the task is impossible. **You can do it!**
""".strip()

@ -0,0 +1,34 @@
from interpreter import interpreter
# Local setup
interpreter.local_setup()
interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
User: Open the chrome app.
Assistant: On it.
```python
import webbrowser
webbrowser.open('https://chrome.google.com')
```
User: The code you ran produced no output. Was this expected, or are we finished?
Assistant: No further action is required; the provided snippet opens Chrome.
Now, your turn:"""
# Message templates
interpreter.code_output_template = '''I executed that code. This was the output: """{content}"""\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.'''
interpreter.empty_code_output_template = "The code above was executed on my machine. It produced no text output. What's next (if anything, or are we done?)"
interpreter.code_output_sender = "user"
# Computer settings
interpreter.computer.import_computer_api = False
# Misc settings
interpreter.auto_run = False
interpreter.offline = True
# Final message
interpreter.display_message(
f"> Model set to `{interpreter.llm.model}`\n\n**Open Interpreter** will require approval before running code.\n\nUse `interpreter -y` to bypass this.\n\nPress `CTRL-C` to exit.\n"
)

@ -41,6 +41,9 @@ def run(
client_type: str = typer.Option( client_type: str = typer.Option(
"auto", "--client-type", help="Specify the client type" "auto", "--client-type", help="Specify the client type"
), ),
# TODO: include other options in comments in the profiles for tts
# direct people to the profiles directory to make changes to the interpreter profile
# this should be made explicit on the docs
llm_service: str = typer.Option( llm_service: str = typer.Option(
"litellm", "--llm-service", help="Specify the LLM service" "litellm", "--llm-service", help="Specify the LLM service"
), ),
@ -77,9 +80,6 @@ def run(
mobile: bool = typer.Option( mobile: bool = typer.Option(
False, "--mobile", help="Toggle server to support mobile app" False, "--mobile", help="Toggle server to support mobile app"
), ),
asynchronous: bool = typer.Option(
False, "--async", help="use interpreter optimized for latency"
),
): ):
_run( _run(
server=server or mobile, server=server or mobile,
@ -102,7 +102,6 @@ def run(
local=local, local=local,
qr=qr or mobile, qr=qr or mobile,
mobile=mobile, mobile=mobile,
asynchronous=asynchronous,
) )
@ -127,7 +126,6 @@ def _run(
local: bool = False, local: bool = False,
qr: bool = False, qr: bool = False,
mobile: bool = False, mobile: bool = False,
asynchronous: bool = False,
): ):
if local: if local:
tts_service = "coqui" tts_service = "coqui"
@ -162,7 +160,6 @@ def _run(
server_host, server_host,
server_port, server_port,
tts_service, tts_service,
asynchronous,
# llm_service, # llm_service,
# model, # model,
# llm_supports_vision, # llm_supports_vision,

Loading…
Cancel
Save