Merge pull request #198 from dheavy/fix/win10-socket-startup

Fix win10 not connecting to websocket server
pull/215/head
Ty Fiero 10 months ago committed by GitHub
commit a2eba6d5d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

1271
software/poetry.lock generated

File diff suppressed because it is too large Load Diff

@ -24,6 +24,7 @@ import tempfile
from datetime import datetime from datetime import datetime
import cv2 import cv2
import base64 import base64
import platform
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run? from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic? # In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
from ..server.utils.kernel import put_kernel_messages_into_queue from ..server.utils.kernel import put_kernel_messages_into_queue
@ -58,6 +59,7 @@ CAMERA_WARMUP_SECONDS = float(os.getenv('CAMERA_WARMUP_SECONDS', 0))
# Specify OS # Specify OS
current_platform = get_system_info() current_platform = get_system_info()
is_win10 = lambda: platform.system() == "Windows" and "10" in platform.version()
# Initialize PyAudio # Initialize PyAudio
p = pyaudio.PyAudio() p = pyaudio.PyAudio()
@ -98,7 +100,7 @@ class Device:
cap.release() cap.release()
return image_path return image_path
def encode_image_to_base64(self, image_path): def encode_image_to_base64(self, image_path):
"""Encodes an image file to a base64 string.""" """Encodes an image file to a base64 string."""
@ -124,7 +126,7 @@ class Device:
self.add_image_to_send_queue(image_path) self.add_image_to_send_queue(image_path)
self.captured_images.clear() # Clear the list after sending self.captured_images.clear() # Clear the list after sending
async def play_audiosegments(self): async def play_audiosegments(self):
"""Plays them sequentially.""" """Plays them sequentially."""
while True: while True:
@ -141,7 +143,7 @@ class Device:
def record_audio(self): def record_audio(self):
if os.getenv('STT_RUNNER') == "server": if os.getenv('STT_RUNNER') == "server":
# STT will happen on the server. we're sending audio. # STT will happen on the server. we're sending audio.
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}) send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "start": True})
@ -239,7 +241,7 @@ class Device:
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'): elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
self.fetch_image_from_camera() self.fetch_image_from_camera()
async def message_sender(self, websocket): async def message_sender(self, websocket):
while True: while True:
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get) message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
@ -252,65 +254,79 @@ class Device:
async def websocket_communication(self, WS_URL): async def websocket_communication(self, WS_URL):
show_connection_log = True show_connection_log = True
while True:
try:
async with websockets.connect(WS_URL) as websocket:
if CAMERA_ENABLED:
print("\nHold the spacebar to start recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit.")
else:
print("\nHold the spacebar to start recording. Press CTRL-C to exit.")
asyncio.create_task(self.message_sender(websocket))
while True: async def exec_ws_communication(websocket):
await asyncio.sleep(0.01) if CAMERA_ENABLED:
chunk = await websocket.recv() print("\nHold the spacebar to start recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit.")
else:
print("\nHold the spacebar to start recording. Press CTRL-C to exit.")
logger.debug(f"Got this message from the server: {type(chunk)} {chunk}") asyncio.create_task(self.message_sender(websocket))
if type(chunk) == str: while True:
chunk = json.loads(chunk) await asyncio.sleep(0.01)
chunk = await websocket.recv()
message = accumulator.accumulate(chunk) logger.debug(f"Got this message from the server: {type(chunk)} {chunk}")
if message == None:
# Will be None until we have a full message ready
continue
# At this point, we have our message if type(chunk) == str:
chunk = json.loads(chunk)
if message["type"] == "audio" and message["format"].startswith("bytes"): message = accumulator.accumulate(chunk)
if message == None:
# Will be None until we have a full message ready
continue
# Convert bytes to audio file # At this point, we have our message
audio_bytes = message["content"] if message["type"] == "audio" and message["format"].startswith("bytes"):
# Create an AudioSegment instance with the raw data # Convert bytes to audio file
audio = AudioSegment(
# raw audio data (bytes)
data=audio_bytes,
# signed 16-bit little-endian format
sample_width=2,
# 16,000 Hz frame rate
frame_rate=16000,
# mono sound
channels=1
)
self.audiosegments.append(audio) audio_bytes = message["content"]
# Run the code if that's the client's job # Create an AudioSegment instance with the raw data
if os.getenv('CODE_RUNNER') == "client": audio = AudioSegment(
if message["type"] == "code" and "end" in message: # raw audio data (bytes)
language = message["format"] data=audio_bytes,
code = message["content"] # signed 16-bit little-endian format
result = interpreter.computer.run(language, code) sample_width=2,
send_queue.put(result) # 16,000 Hz frame rate
except: frame_rate=16000,
logger.debug(traceback.format_exc()) # mono sound
if show_connection_log: channels=1
logger.info(f"Connecting to `{WS_URL}`...") )
show_connection_log = False
await asyncio.sleep(2) self.audiosegments.append(audio)
# Run the code if that's the client's job
if os.getenv('CODE_RUNNER') == "client":
if message["type"] == "code" and "end" in message:
language = message["format"]
code = message["content"]
result = interpreter.computer.run(language, code)
send_queue.put(result)
if is_win10():
logger.info('Windows 10 detected')
# Workaround for Windows 10 not latching to the websocket server.
# See https://github.com/OpenInterpreter/01/issues/197
try:
ws = websockets.connect(WS_URL)
await exec_ws_communication(ws)
except Exception as e:
logger.error(f"Error while attempting to connect: {e}")
else:
while True:
try:
async with websockets.connect(WS_URL) as websocket:
await exec_ws_communication(websocket)
except:
logger.debug(traceback.format_exc())
if show_connection_log:
logger.info(f"Connecting to `{WS_URL}`...")
show_connection_log = False
await asyncio.sleep(2)
async def start_async(self): async def start_async(self):
# Configuration for WebSocket # Configuration for WebSocket
@ -323,7 +339,7 @@ class Device:
asyncio.create_task(put_kernel_messages_into_queue(send_queue)) asyncio.create_task(put_kernel_messages_into_queue(send_queue))
asyncio.create_task(self.play_audiosegments()) asyncio.create_task(self.play_audiosegments())
# If Raspberry Pi, add the button listener, otherwise use the spacebar # If Raspberry Pi, add the button listener, otherwise use the spacebar
if current_platform.startswith("raspberry-pi"): if current_platform.startswith("raspberry-pi"):
logger.info("Raspberry Pi detected, using button on GPIO pin 15") logger.info("Raspberry Pi detected, using button on GPIO pin 15")

Loading…
Cancel
Save