From e397f8819f37d88e7af320ba1c7b5c6788c36ead Mon Sep 17 00:00:00 2001 From: Ben Xu Date: Wed, 24 Apr 2024 12:54:18 -0400 Subject: [PATCH] add binary audio parsing --- .../ios/react-native/package-lock.json | 13 +++ .../clients/ios/react-native/package.json | 8 +- .../ios/react-native/src/screens/Main.tsx | 99 +++++++++++++++---- software/source/server/server.py | 18 +++- software/source/server/tunnel.py | 2 +- software/source/utils/accumulator.py | 4 +- 6 files changed, 115 insertions(+), 29 deletions(-) diff --git a/software/source/clients/ios/react-native/package-lock.json b/software/source/clients/ios/react-native/package-lock.json index 81e2031..97b3864 100644 --- a/software/source/clients/ios/react-native/package-lock.json +++ b/software/source/clients/ios/react-native/package-lock.json @@ -17,12 +17,14 @@ "expo-status-bar": "~1.11.1", "react": "18.2.0", "react-native": "0.73.4", + "react-native-base64": "^0.2.1", "react-native-safe-area-context": "4.8.2", "react-native-screens": "~3.29.0" }, "devDependencies": { "@babel/core": "^7.20.0", "@types/react": "~18.2.45", + "@types/react-native-base64": "^0.2.2", "typescript": "^5.1.3" } }, @@ -6102,6 +6104,12 @@ "csstype": "^3.0.2" } }, + "node_modules/@types/react-native-base64": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@types/react-native-base64/-/react-native-base64-0.2.2.tgz", + "integrity": "sha512-obr+/L9Jaxdr+xCVS/IQcYgreg5xtnui4Wqw/G1acBUtW2CnqVJj6lK6F/5F3+5d2oZEo5xDDLqy8GVn2HbEmw==", + "dev": true + }, "node_modules/@types/scheduler": { "version": "0.16.8", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz", @@ -11492,6 +11500,11 @@ "react": "18.2.0" } }, + "node_modules/react-native-base64": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/react-native-base64/-/react-native-base64-0.2.1.tgz", + "integrity": "sha512-eHgt/MA8y5ZF0aHfZ1aTPcIkDWxza9AaEk4GcpIX+ZYfZ04RcaNahO+527KR7J44/mD3efYfM23O2C1N44ByWA==" + }, "node_modules/react-native-safe-area-context": { "version": "4.8.2", "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-4.8.2.tgz", diff --git a/software/source/clients/ios/react-native/package.json b/software/source/clients/ios/react-native/package.json index c031609..86faf84 100644 --- a/software/source/clients/ios/react-native/package.json +++ b/software/source/clients/ios/react-native/package.json @@ -13,18 +13,20 @@ "@react-navigation/native": "^6.1.14", "@react-navigation/native-stack": "^6.9.22", "expo": "~50.0.8", + "expo-av": "~13.10.5", + "expo-barcode-scanner": "~12.9.3", "expo-camera": "~14.0.5", "expo-status-bar": "~1.11.1", "react": "18.2.0", "react-native": "0.73.4", + "react-native-base64": "^0.2.1", "react-native-safe-area-context": "4.8.2", - "react-native-screens": "~3.29.0", - "expo-barcode-scanner": "~12.9.3", - "expo-av": "~13.10.5" + "react-native-screens": "~3.29.0" }, "devDependencies": { "@babel/core": "^7.20.0", "@types/react": "~18.2.45", + "@types/react-native-base64": "^0.2.2", "typescript": "^5.1.3" }, "ios": { diff --git a/software/source/clients/ios/react-native/src/screens/Main.tsx b/software/source/clients/ios/react-native/src/screens/Main.tsx index 3823c43..966c8af 100644 --- a/software/source/clients/ios/react-native/src/screens/Main.tsx +++ b/software/source/clients/ios/react-native/src/screens/Main.tsx @@ -1,6 +1,9 @@ import React, { useState, useEffect } from "react"; import { View, Text, TouchableOpacity, StyleSheet } from "react-native"; -import { Audio } from "expo-av"; +import * as FileSystem from 'expo-file-system'; +import { AVPlaybackStatus, Audio } from "expo-av"; +import { Buffer } from "buffer"; +import base64 from 'react-native-base64'; interface MainProps { route: { @@ -18,39 +21,83 @@ const Main: React.FC = ({ route }) => { const [ws, setWs] = useState(null); const [recording, setRecording] = useState(null); const [audioQueue, setAudioQueue] = useState([]); + const [isPlaying, setIsPlaying] = useState(false); + const Buffer = require('buffer/').Buffer; + + const constructTempFilePath = async (buffer: Buffer) => { + const tempFilePath = `${FileSystem.cacheDirectory}${Date.now()}` + "speech.mp3"; + await FileSystem.writeAsStringAsync( + tempFilePath, + buffer.toString("base64"), + { + encoding: FileSystem.EncodingType.Base64, + } + ); - useEffect(() => { - const playNextAudio = async () => { - if (audioQueue.length > 0) { - const uri = audioQueue.shift(); - const { sound } = await Audio.Sound.createAsync( - { uri: uri! }, - { shouldPlay: true } - ); - sound.setOnPlaybackStatusUpdate(async (status) => { - if (status.didJustFinish && !status.isLooping) { - await sound.unloadAsync(); - playNextAudio(); - } - }); + return tempFilePath; + }; + + const playNextAudio = async () => { + console.log("in playNextAudio audioQueue is", audioQueue); + console.log("isPlaying is", isPlaying); + + if (audioQueue.length > 0) { + const uri = audioQueue.shift() as string; + console.log("load audio from", uri); + setIsPlaying(true); + + try { + const { sound } = await Audio.Sound.createAsync({ uri }); + await sound.playAsync(); + console.log("playing audio from", uri); + + sound.setOnPlaybackStatusUpdate(_onPlaybackStatusUpdate); + } catch (error){ + console.log("Error playing audio", error); + setIsPlaying(false); + playNextAudio(); } - }; + } + }; + + const _onPlaybackStatusUpdate = (status: AVPlaybackStatus) => { + if (status.isLoaded && status.didJustFinish) { + setIsPlaying(false); + playNextAudio(); + } + }; + + useEffect(() => { let websocket: WebSocket; try { console.log("Connecting to WebSocket at " + scannedData); websocket = new WebSocket(scannedData); + websocket.binaryType = "blob"; websocket.onopen = () => { setConnectionStatus(`Connected to ${scannedData}`); console.log("WebSocket connected"); }; + websocket.onmessage = async (e) => { - console.log("Received message: ", e.data); - setAudioQueue((prevQueue) => [...prevQueue, e.data]); - if (audioQueue.length === 1) { + const message = JSON.parse(e.data); + + if (message.content) { + + const parsedMessage = message.content.replace(/^b'|['"]|['"]$/g, ""); + const buffer = Buffer.from(parsedMessage, 'base64') + console.log("parsed message", buffer.toString()); + + const uri = await constructTempFilePath(buffer); + setAudioQueue((prevQueue) => [...prevQueue, uri]); + } + + if (message.format === "bytes.raw" && message.end) { + console.log("calling playNextAudio"); playNextAudio(); } + }; websocket.onerror = (error) => { @@ -74,7 +121,7 @@ const Main: React.FC = ({ route }) => { websocket.close(); } }; - }, [scannedData, audioQueue]); + }, [scannedData]); const startRecording = async () => { if (recording) { @@ -108,7 +155,17 @@ const Main: React.FC = ({ route }) => { const uri = recording.getURI(); console.log("Recording stopped and stored at", uri); if (ws && uri) { - ws.send(uri); + const response = await fetch(uri); + const blob = await response.blob(); + const reader = new FileReader(); + reader.readAsArrayBuffer(blob); + reader.onloadend = () => { + const audioBytes = reader.result; + if (audioBytes) { + ws.send(audioBytes); + console.log("sent audio bytes to WebSocket"); + } + }; } } }; diff --git a/software/source/server/server.py b/software/source/server/server.py index c4dd036..444298d 100644 --- a/software/source/server/server.py +++ b/software/source/server/server.py @@ -20,6 +20,7 @@ from interpreter import interpreter from ..utils.accumulator import Accumulator from .utils.logs import setup_logging from .utils.logs import logger +import base64 from ..utils.print_markdown import print_markdown @@ -194,13 +195,24 @@ async def receive_messages(websocket: WebSocket): async def send_messages(websocket: WebSocket): while True: message = await to_device.get() - # print(f"Sending to the device: {type(message)} {str(message)[:100]}") try: if isinstance(message, dict): + print(f"Sending to the device: {type(message)} {str(message)[:100]}") await websocket.send_json(message) elif isinstance(message, bytes): - await websocket.send_bytes(message) + message = base64.b64encode(message) + str_bytes = str(message) + json_bytes = { + "role": "assistant", + "type": "audio", + "format": "message", + "content": str_bytes, + } + print( + f"Sending to the device: {type(json_bytes)} {str(json_bytes)[:100]}" + ) + await websocket.send_json(json_bytes) else: raise TypeError("Message must be a dict or bytes") except: @@ -286,7 +298,7 @@ async def listener(): logger.debug("Got chunk:", chunk) # Send it to the user - await to_device.put(chunk) + # await to_device.put(chunk) # Yield to the event loop, so you actually send it out await asyncio.sleep(0.01) diff --git a/software/source/server/tunnel.py b/software/source/server/tunnel.py index 809db08..0e0ad17 100644 --- a/software/source/server/tunnel.py +++ b/software/source/server/tunnel.py @@ -100,7 +100,7 @@ def create_tunnel( # If ngrok is installed, start it on the specified port # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE) process = subprocess.Popen( - f"ngrok http {server_port} --scheme http,https --domain=marten-advanced-dragon.ngrok-free.app --log=stdout", + f"ngrok http {server_port} --scheme http,https --domain=sterling-snail-conversely.ngrok-free.app --log=stdout", shell=True, stdout=subprocess.PIPE, ) diff --git a/software/source/utils/accumulator.py b/software/source/utils/accumulator.py index 37912b5..9f66e89 100644 --- a/software/source/utils/accumulator.py +++ b/software/source/utils/accumulator.py @@ -44,4 +44,6 @@ class Accumulator: if "content" not in self.message or type(self.message["content"]) != bytes: self.message["content"] = b"" self.message["content"] += chunk - return None + self.message["type"] = "audio" + self.message["format"] = "bytes.wav" + return self.message