add binary audio parsing

pull/256/head
Ben Xu 9 months ago
parent 625f02c9ce
commit e397f8819f

@ -17,12 +17,14 @@
"expo-status-bar": "~1.11.1", "expo-status-bar": "~1.11.1",
"react": "18.2.0", "react": "18.2.0",
"react-native": "0.73.4", "react-native": "0.73.4",
"react-native-base64": "^0.2.1",
"react-native-safe-area-context": "4.8.2", "react-native-safe-area-context": "4.8.2",
"react-native-screens": "~3.29.0" "react-native-screens": "~3.29.0"
}, },
"devDependencies": { "devDependencies": {
"@babel/core": "^7.20.0", "@babel/core": "^7.20.0",
"@types/react": "~18.2.45", "@types/react": "~18.2.45",
"@types/react-native-base64": "^0.2.2",
"typescript": "^5.1.3" "typescript": "^5.1.3"
} }
}, },
@ -6102,6 +6104,12 @@
"csstype": "^3.0.2" "csstype": "^3.0.2"
} }
}, },
"node_modules/@types/react-native-base64": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/@types/react-native-base64/-/react-native-base64-0.2.2.tgz",
"integrity": "sha512-obr+/L9Jaxdr+xCVS/IQcYgreg5xtnui4Wqw/G1acBUtW2CnqVJj6lK6F/5F3+5d2oZEo5xDDLqy8GVn2HbEmw==",
"dev": true
},
"node_modules/@types/scheduler": { "node_modules/@types/scheduler": {
"version": "0.16.8", "version": "0.16.8",
"resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz",
@ -11492,6 +11500,11 @@
"react": "18.2.0" "react": "18.2.0"
} }
}, },
"node_modules/react-native-base64": {
"version": "0.2.1",
"resolved": "https://registry.npmjs.org/react-native-base64/-/react-native-base64-0.2.1.tgz",
"integrity": "sha512-eHgt/MA8y5ZF0aHfZ1aTPcIkDWxza9AaEk4GcpIX+ZYfZ04RcaNahO+527KR7J44/mD3efYfM23O2C1N44ByWA=="
},
"node_modules/react-native-safe-area-context": { "node_modules/react-native-safe-area-context": {
"version": "4.8.2", "version": "4.8.2",
"resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-4.8.2.tgz", "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-4.8.2.tgz",

@ -13,18 +13,20 @@
"@react-navigation/native": "^6.1.14", "@react-navigation/native": "^6.1.14",
"@react-navigation/native-stack": "^6.9.22", "@react-navigation/native-stack": "^6.9.22",
"expo": "~50.0.8", "expo": "~50.0.8",
"expo-av": "~13.10.5",
"expo-barcode-scanner": "~12.9.3",
"expo-camera": "~14.0.5", "expo-camera": "~14.0.5",
"expo-status-bar": "~1.11.1", "expo-status-bar": "~1.11.1",
"react": "18.2.0", "react": "18.2.0",
"react-native": "0.73.4", "react-native": "0.73.4",
"react-native-base64": "^0.2.1",
"react-native-safe-area-context": "4.8.2", "react-native-safe-area-context": "4.8.2",
"react-native-screens": "~3.29.0", "react-native-screens": "~3.29.0"
"expo-barcode-scanner": "~12.9.3",
"expo-av": "~13.10.5"
}, },
"devDependencies": { "devDependencies": {
"@babel/core": "^7.20.0", "@babel/core": "^7.20.0",
"@types/react": "~18.2.45", "@types/react": "~18.2.45",
"@types/react-native-base64": "^0.2.2",
"typescript": "^5.1.3" "typescript": "^5.1.3"
}, },
"ios": { "ios": {

@ -1,6 +1,9 @@
import React, { useState, useEffect } from "react"; import React, { useState, useEffect } from "react";
import { View, Text, TouchableOpacity, StyleSheet } from "react-native"; import { View, Text, TouchableOpacity, StyleSheet } from "react-native";
import { Audio } from "expo-av"; import * as FileSystem from 'expo-file-system';
import { AVPlaybackStatus, Audio } from "expo-av";
import { Buffer } from "buffer";
import base64 from 'react-native-base64';
interface MainProps { interface MainProps {
route: { route: {
@ -18,39 +21,83 @@ const Main: React.FC<MainProps> = ({ route }) => {
const [ws, setWs] = useState<WebSocket | null>(null); const [ws, setWs] = useState<WebSocket | null>(null);
const [recording, setRecording] = useState<Audio.Recording | null>(null); const [recording, setRecording] = useState<Audio.Recording | null>(null);
const [audioQueue, setAudioQueue] = useState<string[]>([]); const [audioQueue, setAudioQueue] = useState<string[]>([]);
const [isPlaying, setIsPlaying] = useState<boolean>(false);
const Buffer = require('buffer/').Buffer;
const constructTempFilePath = async (buffer: Buffer) => {
const tempFilePath = `${FileSystem.cacheDirectory}${Date.now()}` + "speech.mp3";
await FileSystem.writeAsStringAsync(
tempFilePath,
buffer.toString("base64"),
{
encoding: FileSystem.EncodingType.Base64,
}
);
useEffect(() => { return tempFilePath;
const playNextAudio = async () => { };
if (audioQueue.length > 0) {
const uri = audioQueue.shift(); const playNextAudio = async () => {
const { sound } = await Audio.Sound.createAsync( console.log("in playNextAudio audioQueue is", audioQueue);
{ uri: uri! }, console.log("isPlaying is", isPlaying);
{ shouldPlay: true }
); if (audioQueue.length > 0) {
sound.setOnPlaybackStatusUpdate(async (status) => { const uri = audioQueue.shift() as string;
if (status.didJustFinish && !status.isLooping) { console.log("load audio from", uri);
await sound.unloadAsync(); setIsPlaying(true);
playNextAudio();
} try {
}); const { sound } = await Audio.Sound.createAsync({ uri });
await sound.playAsync();
console.log("playing audio from", uri);
sound.setOnPlaybackStatusUpdate(_onPlaybackStatusUpdate);
} catch (error){
console.log("Error playing audio", error);
setIsPlaying(false);
playNextAudio();
} }
};
}
};
const _onPlaybackStatusUpdate = (status: AVPlaybackStatus) => {
if (status.isLoaded && status.didJustFinish) {
setIsPlaying(false);
playNextAudio();
}
};
useEffect(() => {
let websocket: WebSocket; let websocket: WebSocket;
try { try {
console.log("Connecting to WebSocket at " + scannedData); console.log("Connecting to WebSocket at " + scannedData);
websocket = new WebSocket(scannedData); websocket = new WebSocket(scannedData);
websocket.binaryType = "blob";
websocket.onopen = () => { websocket.onopen = () => {
setConnectionStatus(`Connected to ${scannedData}`); setConnectionStatus(`Connected to ${scannedData}`);
console.log("WebSocket connected"); console.log("WebSocket connected");
}; };
websocket.onmessage = async (e) => { websocket.onmessage = async (e) => {
console.log("Received message: ", e.data); const message = JSON.parse(e.data);
setAudioQueue((prevQueue) => [...prevQueue, e.data]);
if (audioQueue.length === 1) { if (message.content) {
const parsedMessage = message.content.replace(/^b'|['"]|['"]$/g, "");
const buffer = Buffer.from(parsedMessage, 'base64')
console.log("parsed message", buffer.toString());
const uri = await constructTempFilePath(buffer);
setAudioQueue((prevQueue) => [...prevQueue, uri]);
}
if (message.format === "bytes.raw" && message.end) {
console.log("calling playNextAudio");
playNextAudio(); playNextAudio();
} }
}; };
websocket.onerror = (error) => { websocket.onerror = (error) => {
@ -74,7 +121,7 @@ const Main: React.FC<MainProps> = ({ route }) => {
websocket.close(); websocket.close();
} }
}; };
}, [scannedData, audioQueue]); }, [scannedData]);
const startRecording = async () => { const startRecording = async () => {
if (recording) { if (recording) {
@ -108,7 +155,17 @@ const Main: React.FC<MainProps> = ({ route }) => {
const uri = recording.getURI(); const uri = recording.getURI();
console.log("Recording stopped and stored at", uri); console.log("Recording stopped and stored at", uri);
if (ws && uri) { if (ws && uri) {
ws.send(uri); const response = await fetch(uri);
const blob = await response.blob();
const reader = new FileReader();
reader.readAsArrayBuffer(blob);
reader.onloadend = () => {
const audioBytes = reader.result;
if (audioBytes) {
ws.send(audioBytes);
console.log("sent audio bytes to WebSocket");
}
};
} }
} }
}; };

@ -20,6 +20,7 @@ from interpreter import interpreter
from ..utils.accumulator import Accumulator from ..utils.accumulator import Accumulator
from .utils.logs import setup_logging from .utils.logs import setup_logging
from .utils.logs import logger from .utils.logs import logger
import base64
from ..utils.print_markdown import print_markdown from ..utils.print_markdown import print_markdown
@ -194,13 +195,24 @@ async def receive_messages(websocket: WebSocket):
async def send_messages(websocket: WebSocket): async def send_messages(websocket: WebSocket):
while True: while True:
message = await to_device.get() message = await to_device.get()
# print(f"Sending to the device: {type(message)} {str(message)[:100]}")
try: try:
if isinstance(message, dict): if isinstance(message, dict):
print(f"Sending to the device: {type(message)} {str(message)[:100]}")
await websocket.send_json(message) await websocket.send_json(message)
elif isinstance(message, bytes): elif isinstance(message, bytes):
await websocket.send_bytes(message) message = base64.b64encode(message)
str_bytes = str(message)
json_bytes = {
"role": "assistant",
"type": "audio",
"format": "message",
"content": str_bytes,
}
print(
f"Sending to the device: {type(json_bytes)} {str(json_bytes)[:100]}"
)
await websocket.send_json(json_bytes)
else: else:
raise TypeError("Message must be a dict or bytes") raise TypeError("Message must be a dict or bytes")
except: except:
@ -286,7 +298,7 @@ async def listener():
logger.debug("Got chunk:", chunk) logger.debug("Got chunk:", chunk)
# Send it to the user # Send it to the user
await to_device.put(chunk) # await to_device.put(chunk)
# Yield to the event loop, so you actually send it out # Yield to the event loop, so you actually send it out
await asyncio.sleep(0.01) await asyncio.sleep(0.01)

@ -100,7 +100,7 @@ def create_tunnel(
# If ngrok is installed, start it on the specified port # If ngrok is installed, start it on the specified port
# process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE) # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE)
process = subprocess.Popen( process = subprocess.Popen(
f"ngrok http {server_port} --scheme http,https --domain=marten-advanced-dragon.ngrok-free.app --log=stdout", f"ngrok http {server_port} --scheme http,https --domain=sterling-snail-conversely.ngrok-free.app --log=stdout",
shell=True, shell=True,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) )

@ -44,4 +44,6 @@ class Accumulator:
if "content" not in self.message or type(self.message["content"]) != bytes: if "content" not in self.message or type(self.message["content"]) != bytes:
self.message["content"] = b"" self.message["content"] = b""
self.message["content"] += chunk self.message["content"] += chunk
return None self.message["type"] = "audio"
self.message["format"] = "bytes.wav"
return self.message

Loading…
Cancel
Save