Updated OI, added try

pull/84/head
killian 10 months ago
parent a82b4b264f
commit 83b776bd1a

@ -1,6 +1,7 @@
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
import traceback
from platformdirs import user_data_dir
import ast
import json
@ -183,136 +184,138 @@ async def send_messages(websocket: WebSocket):
async def listener():
while True:
while True:
if not from_user.empty():
chunk = await from_user.get()
break
elif not from_computer.empty():
chunk = from_computer.get()
break
await asyncio.sleep(1)
try:
while True:
if not from_user.empty():
chunk = await from_user.get()
break
elif not from_computer.empty():
chunk = from_computer.get()
break
await asyncio.sleep(1)
message = accumulator.accumulate(chunk)
if message == None:
# Will be None until we have a full message ready
continue
# print(str(message)[:1000])
message = accumulator.accumulate(chunk)
if message == None:
# Will be None until we have a full message ready
continue
# At this point, we have our message
# print(str(message)[:1000])
if message["type"] == "audio" and message["format"].startswith("bytes"):
# At this point, we have our message
if not message["content"]: # If it was nothing / silence
continue
if message["type"] == "audio" and message["format"].startswith("bytes"):
# Convert bytes to audio file
# Format will be bytes.wav or bytes.opus
mime_type = "audio/" + message["format"].split(".")[1]
audio_file_path = bytes_to_wav(message["content"], mime_type)
if "content" not in message or message["content"] == None: # If it was nothing / silence
continue
# For microphone debugging:
if False:
os.system(f"open {audio_file_path}")
import time
time.sleep(15)
# Convert bytes to audio file
# Format will be bytes.wav or bytes.opus
mime_type = "audio/" + message["format"].split(".")[1]
audio_file_path = bytes_to_wav(message["content"], mime_type)
text = stt(audio_file_path)
print("> ", text)
message = {"role": "user", "type": "message", "content": text}
# For microphone debugging:
if False:
os.system(f"open {audio_file_path}")
import time
time.sleep(15)
# At this point, we have only text messages
text = stt(audio_file_path)
print("> ", text)
message = {"role": "user", "type": "message", "content": text}
if type(message["content"]) != str:
print("This should be a string, but it's not:", message["content"])
message["content"] = message["content"].decode()
# At this point, we have only text messages
# Custom stop message will halt us
if message["content"].lower().strip(".,! ") == "stop":
continue
if type(message["content"]) != str:
print("This should be a string, but it's not:", message["content"])
message["content"] = message["content"].decode()
# Load, append, and save conversation history
with open(conversation_history_path, 'r') as file:
messages = json.load(file)
messages.append(message)
with open(conversation_history_path, 'w') as file:
json.dump(messages, file, indent=4)
# Custom stop message will halt us
if message["content"].lower().strip(".,! ") == "stop":
continue
accumulated_text = ""
# Load, append, and save conversation history
with open(conversation_history_path, 'r') as file:
messages = json.load(file)
messages.append(message)
with open(conversation_history_path, 'w') as file:
json.dump(messages, file, indent=4)
accumulated_text = ""
if any([m["type"] == "image" for m in messages]) and interpreter.llm.model.startswith("gpt-"):
interpreter.llm.model = "gpt-4-vision-preview"
interpreter.llm.supports_vision = True
for chunk in interpreter.chat(messages, stream=True, display=True):
if any([m["type"] == "image" for m in interpreter.messages]):
if any([m["type"] == "image" for m in messages]) and interpreter.llm.model.startswith("gpt-"):
interpreter.llm.model = "gpt-4-vision-preview"
interpreter.llm.supports_vision = True
for chunk in interpreter.chat(messages, stream=True, display=True):
logger.debug("Got chunk:", chunk)
if any([m["type"] == "image" for m in interpreter.messages]):
interpreter.llm.model = "gpt-4-vision-preview"
# Send it to the user
await to_device.put(chunk)
# Yield to the event loop, so you actually send it out
await asyncio.sleep(0.01)
if os.getenv('TTS_RUNNER') == "server":
# Speak full sentences out loud
if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message":
accumulated_text += chunk["content"]
sentences = split_into_sentences(accumulated_text)
logger.debug("Got chunk:", chunk)
# Send it to the user
await to_device.put(chunk)
# Yield to the event loop, so you actually send it out
await asyncio.sleep(0.01)
if os.getenv('TTS_RUNNER') == "server":
# Speak full sentences out loud
if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message":
accumulated_text += chunk["content"]
sentences = split_into_sentences(accumulated_text)
# If we're going to speak, say we're going to stop sending text.
# This should be fixed probably, we should be able to do both in parallel, or only one.
if any(is_full_sentence(sentence) for sentence in sentences):
await to_device.put({"role": "assistant", "type": "message", "end": True})
if is_full_sentence(sentences[-1]):
for sentence in sentences:
await stream_tts_to_device(sentence)
accumulated_text = ""
else:
for sentence in sentences[:-1]:
await stream_tts_to_device(sentence)
accumulated_text = sentences[-1]
# If we're going to speak, say we're going to stop sending text.
# This should be fixed probably, we should be able to do both in parallel, or only one.
if any(is_full_sentence(sentence) for sentence in sentences):
await to_device.put({"role": "assistant", "type": "message", "start": True})
# If we're going to speak, say we're going to stop sending text.
# This should be fixed probably, we should be able to do both in parallel, or only one.
if any(is_full_sentence(sentence) for sentence in sentences):
await to_device.put({"role": "assistant", "type": "message", "end": True})
# If we have a new message, save our progress and go back to the top
if not from_user.empty():
# Check if it's just an end flag. We ignore those.
temp_message = await from_user.get()
if is_full_sentence(sentences[-1]):
for sentence in sentences:
await stream_tts_to_device(sentence)
accumulated_text = ""
if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"):
# Yup. False alarm.
continue
else:
for sentence in sentences[:-1]:
await stream_tts_to_device(sentence)
accumulated_text = sentences[-1]
# If we're going to speak, say we're going to stop sending text.
# This should be fixed probably, we should be able to do both in parallel, or only one.
if any(is_full_sentence(sentence) for sentence in sentences):
await to_device.put({"role": "assistant", "type": "message", "start": True})
# If we have a new message, save our progress and go back to the top
if not from_user.empty():
# Check if it's just an end flag. We ignore those.
temp_message = await from_user.get()
if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"):
# Yup. False alarm.
continue
else:
# Whoops! Put that back
await from_user.put(temp_message)
# Whoops! Put that back
await from_user.put(temp_message)
with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4)
with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4)
# TODO: is triggering seemingly randomly
#logger.info("New user message recieved. Breaking.")
#break
# TODO: is triggering seemingly randomly
#logger.info("New user message recieved. Breaking.")
#break
# Also check if there's any new computer messages
if not from_computer.empty():
with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4)
# Also check if there's any new computer messages
if not from_computer.empty():
with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4)
logger.info("New computer message recieved. Breaking.")
break
logger.info("New computer message recieved. Breaking.")
break
except:
traceback.print_exc()
async def stream_tts_to_device(sentence):
force_task_completion_responses = [

@ -71,7 +71,7 @@ def create_tunnel(tunnel_method='ngrok', server_host='localhost', server_port=80
# If ngrok is installed, start it on the specified port
# process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE)
process = subprocess.Popen(f'ngrok http {server_port} --scheme http,https --log=stdout', shell=True, stdout=subprocess.PIPE)
process = subprocess.Popen(f'ngrok http {server_port} --scheme http,https --log=stdout', shell=True, stdout=subprocess.PIPE)
# Initially, no URL is found
found_url = False

3128
01OS/poetry.lock generated

File diff suppressed because one or more lines are too long

@ -24,11 +24,11 @@ pydub = "^0.25.1"
ngrok = "^1.0.0"
simpleaudio = "^1.0.4"
opencv-python = "^4.9.0.80"
open-interpreter = { git = "https://github.com/KillianLucas/open-interpreter.git", branch = "development" }
psutil = "^5.9.8"
typer = "^0.9.0"
platformdirs = "^4.2.0"
rich = "^13.7.1"
open-interpreter = "^0.2.2"
[build-system]
requires = ["poetry-core"]

Loading…
Cancel
Save