update docs and remove comments

pull/279/head
Ben Xu 7 months ago
parent d162ee69a3
commit 564255adee

@ -127,7 +127,9 @@ If you want to run local speech-to-text using Whisper, you must install Rust. Fo
## Customizations ## Customizations
To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in `i.py`. This file sets up an interpreter, and is powered by Open Interpreter. To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in the `profiles` directory under the `server` directory. This file sets up an interpreter, and is powered by Open Interpreter.
To specify the text-to-speech service for the 01 `base_device.py`, set `interpreter.tts` to either "openai" for OpenAI, "elevenlabs" for ElevenLabs, or "coqui" for Coqui (local) in a profile.
## Ubuntu Dependencies ## Ubuntu Dependencies

@ -91,7 +91,6 @@ class Device:
self.server_url = "" self.server_url = ""
self.ctrl_pressed = False self.ctrl_pressed = False
self.tts_service = "" self.tts_service = ""
self.playback_latency = None
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX): def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list.""" """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -165,10 +164,6 @@ class Device:
while True: while True:
try: try:
audio = await self.audiosegments.get() audio = await self.audiosegments.get()
if self.playback_latency and isinstance(audio, bytes):
elapsed_time = time.time() - self.playback_latency
print(f"Time from request to playback: {elapsed_time} seconds")
self.playback_latency = None
if self.tts_service == "elevenlabs": if self.tts_service == "elevenlabs":
mpv_process.stdin.write(audio) # type: ignore mpv_process.stdin.write(audio) # type: ignore
@ -224,7 +219,6 @@ class Device:
stream.stop_stream() stream.stop_stream()
stream.close() stream.close()
print("Recording stopped.") print("Recording stopped.")
self.playback_latency = time.time()
duration = wav_file.getnframes() / RATE duration = wav_file.getnframes() / RATE
if duration < 0.3: if duration < 0.3:

@ -22,11 +22,6 @@ import os
class AsyncInterpreter: class AsyncInterpreter:
def __init__(self, interpreter): def __init__(self, interpreter):
self.stt_latency = None
self.tts_latency = None
self.interpreter_latency = None
self.time_from_first_yield_to_first_put = None
self.interpreter = interpreter self.interpreter = interpreter
# STT # STT
@ -128,9 +123,7 @@ class AsyncInterpreter:
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ") # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
print("yielding ", content) # print("yielding ", content)
if self.time_from_first_yield_to_first_put is None:
self.time_from_first_yield_to_first_put = time.time()
yield content yield content
@ -162,9 +155,6 @@ class AsyncInterpreter:
) )
# Send a completion signal # Send a completion signal
end_interpreter = time.time()
self.interpreter_latency = end_interpreter - start_interpreter
print("INTERPRETER LATENCY", self.interpreter_latency)
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"}) # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
async def run(self): async def run(self):
@ -179,11 +169,7 @@ class AsyncInterpreter:
while not self._input_queue.empty(): while not self._input_queue.empty():
input_queue.append(self._input_queue.get()) input_queue.append(self._input_queue.get())
start_stt = time.time()
message = self.stt.text() message = self.stt.text()
end_stt = time.time()
self.stt_latency = end_stt - start_stt
print("STT LATENCY", self.stt_latency)
print(message) print(message)
@ -210,23 +196,11 @@ class AsyncInterpreter:
"end": True, "end": True,
} }
) )
end_tts = time.time()
self.tts_latency = end_tts - self.tts.stream_start_time
print("TTS LATENCY", self.tts_latency)
self.tts.stop() self.tts.stop()
break break
async def _on_tts_chunk_async(self, chunk): async def _on_tts_chunk_async(self, chunk):
print("adding chunk to queue") # print("adding chunk to queue")
if (
self.time_from_first_yield_to_first_put is not None
and self.time_from_first_yield_to_first_put != 0
):
print(
"time from first yield to first put is ",
time.time() - self.time_from_first_yield_to_first_put,
)
self.time_from_first_yield_to_first_put = 0
await self._add_to_queue(self._output_queue, chunk) await self._add_to_queue(self._output_queue, chunk)
def on_tts_chunk(self, chunk): def on_tts_chunk(self, chunk):
@ -234,8 +208,5 @@ class AsyncInterpreter:
asyncio.run(self._on_tts_chunk_async(chunk)) asyncio.run(self._on_tts_chunk_async(chunk))
async def output(self): async def output(self):
print("outputting chunks") # print("outputting chunks")
return await self._output_queue.get() return await self._output_queue.get()
def shutdown(self):
self.stt.shutdown()

@ -1,9 +1,13 @@
# TODO: import from the profiles directory the interpreter that should be served!! # import from the profiles directory the interpreter to be served
from .profiles.fast import interpreter as base_interpreter # add other profiles to the directory to define other interpreter instances and import them here
# {.profiles.fast: optimizes for STT/TTS latency with the fastest models }
# {.profiles.local: uses local models and local STT/TTS }
# {.profiles.default: uses default interpreter settings with optimized TTS latency }
# from .profiles.fast import interpreter as base_interpreter
# from .profiles.local import interpreter as base_interpreter # from .profiles.local import interpreter as base_interpreter
# from .profiles.default import interpreter as base_interpreter from .profiles.default import interpreter as base_interpreter
import asyncio import asyncio
import traceback import traceback

@ -1,3 +1,5 @@
# tests currently hang after completion
""" """
import pytest import pytest
import signal import signal

@ -3,9 +3,9 @@ from interpreter import interpreter
# This is an Open Interpreter compatible profile. # This is an Open Interpreter compatible profile.
# Visit https://01.openinterpreter.com/profile for all options. # Visit https://01.openinterpreter.com/profile for all options.
# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"} # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
interpreter.tts = "openai" interpreter.tts = "elevenlabs"
# Connect your 01 to a language model # Connect your 01 to a language model
interpreter.llm.model = "gpt-4-turbo" interpreter.llm.model = "gpt-4-turbo"

@ -3,7 +3,7 @@ from interpreter import interpreter
# This is an Open Interpreter compatible profile. # This is an Open Interpreter compatible profile.
# Visit https://01.openinterpreter.com/profile for all options. # Visit https://01.openinterpreter.com/profile for all options.
# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"} # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
interpreter.tts = "elevenlabs" interpreter.tts = "elevenlabs"
@ -16,27 +16,9 @@ interpreter.llm.context_window = 2048
interpreter.llm.max_tokens = 4096 interpreter.llm.max_tokens = 4096
interpreter.llm.temperature = 0.8 interpreter.llm.temperature = 0.8
# interpreter.llm.api_key = os.environ["GROQ_API_KEY"]
interpreter.computer.import_computer_api = False interpreter.computer.import_computer_api = False
interpreter.auto_run = True interpreter.auto_run = True
interpreter.system_message = ( interpreter.system_message = (
"You are a helpful assistant that can answer questions and help with tasks." "You are a helpful assistant that can answer questions and help with tasks."
) )
# TODO: include other options in comments in the profiles for tts
# direct people to the profiles directory to make changes to the interpreter profile
# this should be made explicit on the docs
"""
llm_service: str = "litellm",
model: str = "gpt-4",
llm_supports_vision: bool = False,
llm_supports_functions: bool = False,
context_window: int = 2048,
max_tokens: int = 4096,
temperature: float = 0.8,
tts_service: str = "elevenlabs",
stt_service: str = "openai",
"""

@ -1,6 +1,6 @@
from interpreter import interpreter from interpreter import interpreter
# 01 suports OpenAI, ElevenLabs, and Coqui (Local) TTS providers # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"} # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
interpreter.tts = "coqui" interpreter.tts = "coqui"

Loading…
Cancel
Save