Merge remote-tracking branch 'upstream/main' into u/shivenmian/user

1 year ago · 0fa932cb06
parent 50259751d9 5f7d53f0b9
commit 0fa932cb06
7 changed files with 24 additions and 22 deletions
--- a/OS/01/assistant/listen.py
+++ b/OS/01/assistant/listen.py
--- a/OS/01/assistant/assistant.py
+++ b/OS/01/assistant/assistant.py
@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
 In a while loop we watch the queue and handle it.
 """

+import ast
 import json
 import time
 import queue
@ -31,9 +32,9 @@ to_user = queue.Queue()
 to_assistant = queue.Queue()

 # This is so we only say() full sentences
-accumulated_text = ""
 def is_full_sentence(text):
    return text.endswith(('.', '!', '?'))
+
 def split_into_sentences(text):
    return re.split(r'(?<=[.!?])\s+', text)

@ -49,13 +50,13 @@ async def websocket_endpoint(websocket: WebSocket):
    while True:
        data = await websocket.receive_json()
        to_assistant.put(data)
-        if not to_user.empty():
+        while not to_user.empty():
            message = to_user.get()
            await websocket.send_json(message)

-audio_chunks = []

 def queue_listener():
+    audio_file = bytearray()
    while True:
        # Check 10x a second for new messages
        while to_assistant.empty():
@ -65,11 +66,11 @@ def queue_listener():
        # Hold the audio in a buffer. If it's ready (we got end flag, stt it)
        if message["type"] == "audio":
            if "content" in message:
-                audio_chunks.append(message)
+                audio_file.extend(bytes(ast.literal_eval(message["content"])))
            if "end" in message:
-                text = stt(audio_chunks)
-                audio_chunks = []
-                message = {"role": "user", "type": "message", "content": text}
+                content = stt(audio_file, message["format"])
+                audio_file = bytearray()
+                message = {"role": "user", "type": "message", "content": content}
            else:
                continue

--- a/OS/01/assistant/create_interpreter.py
+++ b/OS/01/assistant/create_interpreter.py
@ -55,7 +55,7 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
    interpreter.llm.api_key = os.getenv('OPENAI_API_KEY')
    interpreter.llm.model = "gpt-4"
    interpreter.auto_run = True
-    interpreter.force_task_completion = True
+    interpreter.force_task_completion = False


    ### MISC SETTINGS
@ -94,12 +94,12 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
            data = {"language": "python", "code": code}

            # Send the data to the /run endpoint
-            response = requests.post("http://localhost:8000/run", json=data, stream=True)
+            response = requests.post("http://localhost:9000/run", json=data, stream=True)

            # Stream the response
-            for line in response.iter_lines():
-                if line:  # filter out keep-alive new lines
-                    yield json.loads(line)
+            for chunk in response.iter_content(chunk_size=100000000):
+                if chunk:  # filter out keep-alive new lines
+                    yield json.loads(chunk.decode())

        def stop(self):
            """Stops the code."""
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@ -2,7 +2,6 @@ git+https://github.com/KillianLucas/open-interpreter.git
 asyncio
 pyaudio
 pynput
-redis
 fastapi
 uvicorn
 websockets
--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -2,8 +2,10 @@

 # INSTALL REQUIREMENTS

-sudo apt-get update
-sudo apt-get install redis-server
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    brew update
+    brew install portaudio ffmpeg
+fi
 pip install -r requirements.txt

 ### COMPUTER
@ -14,7 +16,7 @@ python computer/kernel_watcher.py &

 # START RUN ENDPOINT

-python computer/run.py
+python computer/run.py &

 # START SST AND TTS SERVICES

--- a/OS/01/user/record.py
+++ b/OS/01/user/record.py
@ -52,7 +52,7 @@ async def start_recording():
    print("Recording started...")
    async with websockets.connect("ws://localhost:8000/user") as websocket:
        # Send the start command with mime type
-        await websocket.send(json.dumps({"action": "command", "state": "start", "mimeType": "audio/wav"}))
+        await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "start": True}))
        while recording:
            data = stream.read(chunk)
            frames.append(data)
@ -65,13 +65,13 @@ async def start_recording():
            with open(file_path, 'rb') as audio_file:
                byte_chunk = audio_file.read(ws_chunk_size)
                while byte_chunk:
-                    await websocket.send(byte_chunk)
+                    await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_chunk)}))
                    byte_chunk = audio_file.read(ws_chunk_size)
        finally:
            os.remove(file_path)

        # Send the end command
-        await websocket.send(json.dumps({"action": "command", "state": "end"}))
+        await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "end": True}))

        # Receive a json message and then close the connection
        message = await websocket.recv()