diff --git a/.gitignore b/.gitignore index aeaed36..c0e95ec 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,7 @@ cython_debug/ _.aifs software/output_audio.wav .DS_Store + +# ignore node modules and .expo files +node_modules/ +.expo/ diff --git a/README.md b/README.md index a95c808..c29099b 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,13 @@
Preorder the Light‎ ‎ |‎ ‎ Get Updates‎ ‎ |‎ ‎ Documentation

+
+ + | [日本語](docs/README_JP.md) | [English](README.md) | + +
+ +
![OI-O1-BannerDemo-2](https://www.openinterpreter.com/OI-O1-BannerDemo-3.jpg) @@ -19,7 +26,7 @@ We want to help you build. [Apply for 1-on-1 support.](https://0ggfznkwh4j.typef > [!IMPORTANT] > This experimental project is under rapid development and lacks basic safeguards. Until a stable `1.0` release, only run this repository on devices without sensitive information or access to paid services. > -> **A substantial rewrite to address these concerns and more is occurring [here](https://github.com/KillianLucas/01-rewrite/tree/main).** +> **A substantial rewrite to address these concerns and more, including the addition of [RealtimeTTS](https://github.com/KoljaB/RealtimeTTS) and [RealtimeSTT](https://github.com/KoljaB/RealtimeSTT), is occurring [here](https://github.com/KillianLucas/01-rewrite/tree/main).**
@@ -51,6 +58,8 @@ poetry run 01 # Runs the 01 Light simulator (hold your spacebar, speak, release)
+**The [RealtimeTTS](https://github.com/KoljaB/RealtimeTTS) and [RealtimeSTT](https://github.com/KoljaB/RealtimeSTT) libraries in the incoming 01-rewrite are thanks to the state-of-the-art voice interface work of [Kolja Beigel](https://github.com/KoljaB). Please star those repos and consider contributing to / utilizing those projects!** + # Hardware - The **01 Light** is an ESP32-based voice interface. Build instructions are [here](https://github.com/OpenInterpreter/01/tree/main/hardware/light). A list of what to buy [here](https://github.com/OpenInterpreter/01/blob/main/hardware/light/BOM.md). diff --git a/docs/README_JA.md b/docs/README_JA.md new file mode 100644 index 0000000..868dae8 --- /dev/null +++ b/docs/README_JA.md @@ -0,0 +1,155 @@ +

+ +

+ Discord +
+
+ オープンソースの言語モデルコンピュータ。
+
Light の予約‎ ‎ |‎ ‎ 最新情報‎ ‎ |‎ ‎ ドキュメント
+

+ +
+ +![OI-O1-BannerDemo-2](https://www.openinterpreter.com/OI-O1-BannerDemo-3.jpg) + +あなたのビルドをサポートします。[1対1のサポートを申し込む。](https://0ggfznkwh4j.typeform.com/to/kkStE8WF) + +
+ +> [!IMPORTANT] +> この実験的なプロジェクトは急速に開発が進んでおり、基本的な安全策が欠けています。安定した `1.0` リリースまでは、機密情報や有料サービスへのアクセスがないデバイスでのみこのリポジトリを実行してください。 +> +> **これらの懸念やその他の懸念に対処するための大幅な書き換えが[ここ](https://github.com/KillianLucas/01-rewrite/tree/main)で行われています。** + +
+ +**01 プロジェクト** は、AI 機器のためのオープンソースのエコシステムを構築しています。 + +私たちの主力オペレーティングシステムは、Rabbit R1、Humane Pin、[Star Trek computer](https://www.youtube.com/watch?v=1ZXugicgn6U) のような会話デバイスを動かすことができます。 + +私たちは、オープンでモジュラーでフリーであり続けることで、この分野の GNU/Linux になるつもりです。 + +
+ +# ソフトウェア + +```shell +git clone https://github.com/OpenInterpreter/01 # リポジトリのクローン +cd 01/software # CD でソースディレクトリに移動 +``` + + + +```shell +brew install portaudio ffmpeg cmake # Mac OSXの依存関係のインストール +poetry install # Pythonの依存関係のインストール +export OPENAI_API_KEY=sk... # または、`poetry run 01 --local` を実行し、ローカルですべてを実行 +poetry run 01 # 01 Light シミュレーターを作動させる(スペースバーを押しながら話し、放す) +``` + + + +
+ +# ハードウェア + +- **01 Light** は ESP32 ベースの音声インターフェースです。ビルド手順は[こちら](https://github.com/OpenInterpreter/01/tree/main/hardware/light)。買うべきもののリストは[こちら](https://github.com/OpenInterpreter/01/blob/main/hardware/light/BOM.md)。 +- ご自宅のコンピューターで動作している **01 サーバー**([下記のセットアップガイド](https://github.com/OpenInterpreter/01/blob/main/README.md#01-server))と連動して動作します。 +- **Mac OSX** と **Ubuntu** は `poetry run 01` を実行することでサポートされます(**Windows** は実験的にサポートされている)。これはスペースキーを使って 01 Light をシミュレートします。 +- (近日発表) **01 Heavy** は、ローカルですべてを実行するスタンドアローンデバイスです。 + +**より多くのハードウェアをサポートし、構築するためには、皆さんの協力が必要です。** 01 は、入力(マイク、キーボードなど)、出力(スピーカー、スクリーン、モーターなど)、インターネット接続(またはローカルですべてを実行するのに十分な計算能力)があれば、どのようなデバイスでも実行できるはずです。[コントリビューションガイド →](https://github.com/OpenInterpreter/01/blob/main/CONTRIBUTING.md) + +
+ +# 何をするのか? + +01 は、`localhost:10001` で音声合成ウェブソケットを公開しています。 + +生のオーディオバイトを[ストリーミング LMC フォーマット](https://docs.openinterpreter.com/guides/streaming-response)で `/` にストリーミングすると、同じフォーマットで応答を受け取ります。 + +[Andrej Karpathy の LLM OS](https://twitter.com/karpathy/status/1723140519554105733) に一部インスパイアされ、[コード解釈言語モデル](https://github.com/OpenInterpreter/open-interpreter)を実行し、コンピュータの[カーネル](https://github.com/OpenInterpreter/01/blob/main/software/source/server/utils/kernel.py)で特定のイベントが発生したときにそれを呼び出します。 + +01 はこれを音声インターフェースで包んでいます: + +
+ +LMC

+ +# プロトコル + +## LMC メッセージ + +このシステムのさまざまなコンポーネントと通信するために、[LMC メッセージ](https://docs.openinterpreter.com/protocols/lmc-messages)フォーマットを導入します。これは、OpenAI のメッセージフォーマットを拡張し、"computer" の役割を含むようにしたものです: + +https://github.com/OpenInterpreter/01/assets/63927363/8621b075-e052-46ba-8d2e-d64b9f2a5da9 + +## ダイナミックシステムメッセージ + +ダイナミックシステムメッセージは、LLM のシステムメッセージが AI に表示される一瞬前に、その中でコードを実行することを可能にします。 + +```python +# i.py の以下の設定を編集 +interpreter.system_message = r" The time is {{time.time()}}. " # 二重括弧の中は Python として実行されます +interpreter.chat("What time is it?") # ツール/API を呼び出すことなく、次のことが分かります +``` + +# ガイド + +## 01 サーバー + +デスクトップ上でサーバーを起動し、01 Light に接続するには、以下のコマンドを実行します: + +```shell +brew install ngrok/ngrok/ngrok +ngrok authtoken ... # ngrok authtoken を使用 +poetry run 01 --server --expose +``` + +最後のコマンドは、サーバーの URL を表示します。これを 01 Light のキャプティブ WiFi ポータルに入力すると、01 Server に接続できます。 + +## ローカルモード + +``` +poetry run 01 --local +``` + +Whisper を使ってローカル音声合成を実行したい場合、Rust をインストールする必要があります。[こちら](https://www.rust-lang.org/tools/install)の指示に従ってください。 + +## カスタマイズ + +システムの動作をカスタマイズするには、`i.py` 内の[システムメッセージ、モデル、スキルライブラリのパス](https://docs.openinterpreter.com/settings/all-settings)などを編集します。このファイルはインタープリターをセットアップするもので、Open Interpreter によって動作します。 + +## Ubuntu 依存関係 + +```bash +sudo apt-get install portaudio19-dev ffmpeg cmake +``` + +# コントリビューター + +[![01 project contributors](https://contrib.rocks/image?repo=OpenInterpreter/01&max=2000)](https://github.com/OpenInterpreter/01/graphs/contributors) + +参加方法の詳細については、[コントリビューションガイド](/CONTRIBUTING.md)をご覧ください。 + +
+ +# ロードマップ + +01 の未来を見るには、[私達のロードマップ](/ROADMAP.md)をご覧ください。 + +
+ +## バックグラウンド + +### [コンテキスト ↗](https://github.com/KillianLucas/01/blob/main/CONTEXT.md) + +01 以前のデバイスの物語。 + +### [インスピレーション ↗](https://github.com/KillianLucas/01/tree/main/INSPIRATION.md) + +素晴らしいアイデアは盗みたいと思うもの。 + +
+ +○ diff --git a/docs/video_documentation/collection.md b/docs/video_documentation/collection.md new file mode 100644 index 0000000..c0390c4 --- /dev/null +++ b/docs/video_documentation/collection.md @@ -0,0 +1,83 @@ + +## For End Users +[Announcment video](https://www.youtube.com/watch?v=jWr-WeXAdeI) +[Wes Roth](https://www.youtube.com/@WesRoth) + +
+Details + +No technical coverage + +
+ +--- + +[Announcment video](https://www.youtube.com/watch?v=JaBFT3fF2fk) +[TheAIGRID](https://www.youtube.com/@TheAiGrid) + +
+Details + +[here](https://youtu.be/JaBFT3fF2fk?si=8zPGO-U6WdLNnISw&t=656) +mentions the current lack of windows support + +
+ +--- + +[Announcment video](https://www.youtube.com/watch?v=Q_p82HtBqoc) +[Matt Berman](https://www.youtube.com/@matthew_berman) + +
+Details + +[here](https://youtu.be/Q_p82HtBqoc?si=aAxjWZnBdwBbaOUr&t=579) +Berman shows an install of 01 using conda and python 3.9 +in.. looks like linux.. shows how to get openai keys. + +
+ +--- + +[Announcment video](https://www.youtube.com/watch?v=q0dJ7T7au2Y) +[WorldofAI](https://www.youtube.com/@intheworldofai) + +
+Details + + + +
+ +--- + +[Breakdown video](https://www.youtube.com/watch?v=W-VwN0n4d9Y) +[Mervin Praison](https://www.youtube.com/@MervinPraison) +
+Details +- uses conda to install 01 and uses python 3.11 on linux.. maybe mac +- 0:00 Introduction to Open Interpreter +- 0:47 Creating Apps and Summarizing Documents +- 1:20 Image Modifications and Game Creation +- 2:55 Exploratory Data Analysis and Charting +- 4:00 Server Log Analysis +- 5:01 Image and Video Editing +- 6:00 Composing Music with AI +- 7:18 Calendar Management and Email Automation +- 9:01 Integrating with Fast API and LM Studio + +
+ +--- + +[Breakdown video](https://www.youtube.com/watch?v=uyfoHQVgeY0) +[Gary Explains](https://www.youtube.com/@GaryExplains) +
for **open interpreter** not **01** +
+Details +- 3:45 states that it will run on mac/linux and windows and requires python 3.10 +
+ +## For Developers +
+Coming soon \ No newline at end of file diff --git a/software/source/clients/base_device.py b/software/source/clients/base_device.py index 3bf900e..087ca55 100644 --- a/software/source/clients/base_device.py +++ b/software/source/clients/base_device.py @@ -3,6 +3,7 @@ from dotenv import load_dotenv load_dotenv() # take environment variables from .env. import os +import sys import asyncio import threading import pyaudio @@ -58,7 +59,16 @@ CAMERA_WARMUP_SECONDS = float(os.getenv("CAMERA_WARMUP_SECONDS", 0)) # Specify OS current_platform = get_system_info() -is_win10 = lambda: platform.system() == "Windows" and "10" in platform.version() + +def is_win11(): + return sys.getwindowsversion().build >= 22000 + +def is_win10(): + try: + return platform.system() == "Windows" and "10" in platform.version() and not is_win11() + except: + return False + # Initialize PyAudio p = pyaudio.PyAudio() @@ -72,6 +82,7 @@ class Device: self.captured_images = [] self.audiosegments = [] self.server_url = "" + self.ctrl_pressed = False def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX): """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list.""" @@ -256,23 +267,39 @@ class Device: def on_press(self, key): """Detect spacebar press and Ctrl+C combination.""" self.pressed_keys.add(key) # Add the pressed key to the set + if keyboard.Key.space in self.pressed_keys: self.toggle_recording(True) - elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char("c")} <= self.pressed_keys: + elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys: logger.info("Ctrl+C pressed. Exiting...") kill_process_tree() os._exit(0) + + # Windows alternative to the above + if key == keyboard.Key.ctrl_l: + self.ctrl_pressed = True + + try: + if key.vk == 67 and self.ctrl_pressed: + logger.info("Ctrl+C pressed. Exiting...") + kill_process_tree() + os._exit(0) + # For non-character keys + except: + pass + + def on_release(self, key): """Detect spacebar release and 'c' key press for camera, and handle key release.""" - self.pressed_keys.discard( - key - ) # Remove the released key from the key press tracking set + self.pressed_keys.discard(key) # Remove the released key from the key press tracking set + if key == keyboard.Key.ctrl_l: + self.ctrl_pressed = False if key == keyboard.Key.space: self.toggle_recording(False) - elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char("c"): + elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'): self.fetch_image_from_camera() async def message_sender(self, websocket): @@ -342,7 +369,7 @@ class Device: code = message["content"] result = interpreter.computer.run(language, code) send_queue.put(result) - + if is_win10(): logger.info("Windows 10 detected") # Workaround for Windows 10 not latching to the websocket server. diff --git a/software/source/clients/ios/README.md b/software/source/clients/ios/README.md deleted file mode 100644 index fbb5280..0000000 --- a/software/source/clients/ios/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# iOS/Android Client - -[WORK IN PROGRESS] - -This repository contains the source code for the 01 iOS/Android app. Work in progress, we will continue to improve this application to get it working properly. - -Feel free to improve this and make a pull request! - -If you want to run it on your own, you will need expo. - -1. Install dependencies `npm install` -2. Run the app `npx expo start` -3. Open the app in your simulator or on your device with the expo app by scanning the QR code diff --git a/software/source/clients/ios/react-native/App.tsx b/software/source/clients/ios/react-native/App.tsx deleted file mode 100644 index 7881ba3..0000000 --- a/software/source/clients/ios/react-native/App.tsx +++ /dev/null @@ -1,22 +0,0 @@ -import * as React from "react"; -import { NavigationContainer } from "@react-navigation/native"; -import { createNativeStackNavigator } from "@react-navigation/native-stack"; -import HomeScreen from "./src/screens/HomeScreen"; -import CameraScreen from "./src/screens/Camera"; -import Main from "./src/screens/Main"; - -const Stack = createNativeStackNavigator(); - -function App() { - return ( - - - - - - - - ); -} - -export default App; diff --git a/software/source/clients/ios/react-native/src/screens/Main.tsx b/software/source/clients/ios/react-native/src/screens/Main.tsx deleted file mode 100644 index 3823c43..0000000 --- a/software/source/clients/ios/react-native/src/screens/Main.tsx +++ /dev/null @@ -1,171 +0,0 @@ -import React, { useState, useEffect } from "react"; -import { View, Text, TouchableOpacity, StyleSheet } from "react-native"; -import { Audio } from "expo-av"; - -interface MainProps { - route: { - params: { - scannedData: string; - }; - }; -} - -const Main: React.FC = ({ route }) => { - const { scannedData } = route.params; - - const [connectionStatus, setConnectionStatus] = - useState("Connecting..."); - const [ws, setWs] = useState(null); - const [recording, setRecording] = useState(null); - const [audioQueue, setAudioQueue] = useState([]); - - useEffect(() => { - const playNextAudio = async () => { - if (audioQueue.length > 0) { - const uri = audioQueue.shift(); - const { sound } = await Audio.Sound.createAsync( - { uri: uri! }, - { shouldPlay: true } - ); - sound.setOnPlaybackStatusUpdate(async (status) => { - if (status.didJustFinish && !status.isLooping) { - await sound.unloadAsync(); - playNextAudio(); - } - }); - } - }; - - let websocket: WebSocket; - try { - console.log("Connecting to WebSocket at " + scannedData); - websocket = new WebSocket(scannedData); - - websocket.onopen = () => { - setConnectionStatus(`Connected to ${scannedData}`); - console.log("WebSocket connected"); - }; - websocket.onmessage = async (e) => { - console.log("Received message: ", e.data); - setAudioQueue((prevQueue) => [...prevQueue, e.data]); - if (audioQueue.length === 1) { - playNextAudio(); - } - }; - - websocket.onerror = (error) => { - setConnectionStatus("Error connecting to WebSocket."); - console.error("WebSocket error: ", error); - }; - - websocket.onclose = () => { - setConnectionStatus("Disconnected."); - console.log("WebSocket disconnected"); - }; - - setWs(websocket); - } catch (error) { - console.log(error); - setConnectionStatus("Error creating WebSocket."); - } - - return () => { - if (websocket) { - websocket.close(); - } - }; - }, [scannedData, audioQueue]); - - const startRecording = async () => { - if (recording) { - console.log("A recording is already in progress."); - return; - } - - try { - console.log("Requesting permissions.."); - await Audio.requestPermissionsAsync(); - await Audio.setAudioModeAsync({ - allowsRecordingIOS: true, - playsInSilentModeIOS: true, - }); - console.log("Starting recording.."); - const { recording: newRecording } = await Audio.Recording.createAsync( - Audio.RECORDING_OPTIONS_PRESET_HIGH_QUALITY - ); - setRecording(newRecording); - console.log("Recording started"); - } catch (err) { - console.error("Failed to start recording", err); - } - }; - - const stopRecording = async () => { - console.log("Stopping recording.."); - setRecording(null); - if (recording) { - await recording.stopAndUnloadAsync(); - const uri = recording.getURI(); - console.log("Recording stopped and stored at", uri); - if (ws && uri) { - ws.send(uri); - } - } - }; - - return ( - - - {connectionStatus} - - - - Record - - - - ); -}; - -const styles = StyleSheet.create({ - container: { - flex: 1, - justifyContent: "center", - alignItems: "center", - backgroundColor: "#fff", - }, - circle: { - width: 100, - height: 100, - borderRadius: 50, - backgroundColor: "black", - justifyContent: "center", - alignItems: "center", - }, - button: { - width: 100, - height: 100, - borderRadius: 50, - justifyContent: "center", - alignItems: "center", - }, - buttonText: { - color: "white", - fontSize: 16, - }, - statusText: { - marginBottom: 20, - fontSize: 16, - }, -}); - -export default Main; diff --git a/software/source/clients/mobile/README.md b/software/source/clients/mobile/README.md new file mode 100644 index 0000000..cf5f856 --- /dev/null +++ b/software/source/clients/mobile/README.md @@ -0,0 +1,32 @@ +# iOS/Android Client + +**_WORK IN PROGRESS_** + +This repository contains the source code for the 01 iOS/Android app. Work in progress, we will continue to improve this application to get it working properly. + +Feel free to improve this and make a pull request! + +If you want to run it on your own, you will need to install Expo Go on your mobile device. + +## Setup Instructions + +Follow the **[software setup steps](https://github.com/OpenInterpreter/01?tab=readme-ov-file#software)** in the main repo's README first before you read this + +```shell +cd software/source/clients/mobile/react-native # cd into `react-native` +npm install # install dependencies +npx expo start # start local development server +``` + +In **Expo Go** select _Scan QR code_ to scan the QR code produced by the `npx expo start` command + +## Using the App + +```shell +cd software # cd into `software` +poetry run 01 --mobile # exposes QR code for 01 Light server +``` + +In the app, select _Scan Code_ to scan the QR code produced by the `poetry run 01 --mobile` command + +Press and hold the button to speak, release to make the request. To rescan the QR code, swipe left on the screen to go back. diff --git a/software/source/clients/mobile/react-native/App.tsx b/software/source/clients/mobile/react-native/App.tsx new file mode 100644 index 0000000..171108e --- /dev/null +++ b/software/source/clients/mobile/react-native/App.tsx @@ -0,0 +1,31 @@ +import * as React from "react"; +import { NavigationContainer } from "@react-navigation/native"; +import { createNativeStackNavigator } from "@react-navigation/native-stack"; +import HomeScreen from "./src/screens/HomeScreen"; +import CameraScreen from "./src/screens/Camera"; +import Main from "./src/screens/Main"; +import { StatusBar } from "expo-status-bar"; + +const Stack = createNativeStackNavigator(); + +function App() { + return ( + <> + + + + + + + + + + ); +} + +export default App; diff --git a/software/source/clients/ios/react-native/app.json b/software/source/clients/mobile/react-native/app.json similarity index 100% rename from software/source/clients/ios/react-native/app.json rename to software/source/clients/mobile/react-native/app.json diff --git a/software/source/clients/ios/react-native/assets/adaptive-icon.png b/software/source/clients/mobile/react-native/assets/adaptive-icon.png similarity index 100% rename from software/source/clients/ios/react-native/assets/adaptive-icon.png rename to software/source/clients/mobile/react-native/assets/adaptive-icon.png diff --git a/software/source/clients/ios/react-native/assets/favicon.png b/software/source/clients/mobile/react-native/assets/favicon.png similarity index 100% rename from software/source/clients/ios/react-native/assets/favicon.png rename to software/source/clients/mobile/react-native/assets/favicon.png diff --git a/software/source/clients/ios/react-native/assets/icon.png b/software/source/clients/mobile/react-native/assets/icon.png similarity index 100% rename from software/source/clients/ios/react-native/assets/icon.png rename to software/source/clients/mobile/react-native/assets/icon.png diff --git a/software/source/clients/mobile/react-native/assets/pip.mp3 b/software/source/clients/mobile/react-native/assets/pip.mp3 new file mode 100644 index 0000000..bc15afa Binary files /dev/null and b/software/source/clients/mobile/react-native/assets/pip.mp3 differ diff --git a/software/source/clients/mobile/react-native/assets/pop.mp3 b/software/source/clients/mobile/react-native/assets/pop.mp3 new file mode 100644 index 0000000..dedc4d1 Binary files /dev/null and b/software/source/clients/mobile/react-native/assets/pop.mp3 differ diff --git a/software/source/clients/ios/react-native/assets/splash.png b/software/source/clients/mobile/react-native/assets/splash.png similarity index 100% rename from software/source/clients/ios/react-native/assets/splash.png rename to software/source/clients/mobile/react-native/assets/splash.png diff --git a/software/source/clients/mobile/react-native/assets/yay.wav b/software/source/clients/mobile/react-native/assets/yay.wav new file mode 100644 index 0000000..6754870 Binary files /dev/null and b/software/source/clients/mobile/react-native/assets/yay.wav differ diff --git a/software/source/clients/ios/react-native/babel.config.js b/software/source/clients/mobile/react-native/babel.config.js similarity index 100% rename from software/source/clients/ios/react-native/babel.config.js rename to software/source/clients/mobile/react-native/babel.config.js diff --git a/software/source/clients/ios/react-native/package-lock.json b/software/source/clients/mobile/react-native/package-lock.json similarity index 98% rename from software/source/clients/ios/react-native/package-lock.json rename to software/source/clients/mobile/react-native/package-lock.json index 81e2031..47618dc 100644 --- a/software/source/clients/ios/react-native/package-lock.json +++ b/software/source/clients/mobile/react-native/package-lock.json @@ -14,15 +14,22 @@ "expo-av": "~13.10.5", "expo-barcode-scanner": "~12.9.3", "expo-camera": "~14.0.5", + "expo-haptics": "~12.8.1", + "expo-permissions": "^14.4.0", "expo-status-bar": "~1.11.1", "react": "18.2.0", "react-native": "0.73.4", + "react-native-base64": "^0.2.1", + "react-native-polyfill-globals": "^3.1.0", "react-native-safe-area-context": "4.8.2", - "react-native-screens": "~3.29.0" + "react-native-screens": "~3.29.0", + "text-encoding": "^0.7.0", + "zustand": "^4.5.2" }, "devDependencies": { "@babel/core": "^7.20.0", "@types/react": "~18.2.45", + "@types/react-native-base64": "^0.2.2", "typescript": "^5.1.3" } }, @@ -6089,24 +6096,30 @@ "version": "15.7.11", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.11.tgz", "integrity": "sha512-ga8y9v9uyeiLdpKddhxYQkxNDrfvuPrlFb0N1qnZZByvcElJaXthF1UhvCh9TLWJBEHeNtdnbysW7Y6Uq8CVng==", - "dev": true + "devOptional": true }, "node_modules/@types/react": { "version": "18.2.63", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.2.63.tgz", "integrity": "sha512-ppaqODhs15PYL2nGUOaOu2RSCCB4Difu4UFrP4I3NHLloXC/ESQzQMi9nvjfT1+rudd0d2L3fQPJxRSey+rGlQ==", - "dev": true, + "devOptional": true, "dependencies": { "@types/prop-types": "*", "@types/scheduler": "*", "csstype": "^3.0.2" } }, + "node_modules/@types/react-native-base64": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@types/react-native-base64/-/react-native-base64-0.2.2.tgz", + "integrity": "sha512-obr+/L9Jaxdr+xCVS/IQcYgreg5xtnui4Wqw/G1acBUtW2CnqVJj6lK6F/5F3+5d2oZEo5xDDLqy8GVn2HbEmw==", + "dev": true + }, "node_modules/@types/scheduler": { "version": "0.16.8", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz", "integrity": "sha512-WZLiwShhwLRmeV6zH+GkbOFT6Z6VklCItrDioxUnv+u4Ll+8vKeFySoFyK/0ctcRpOmwAicELfmys1sDc/Rw+A==", - "dev": true + "devOptional": true }, "node_modules/@types/stack-utils": { "version": "2.0.3", @@ -6484,6 +6497,12 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, + "node_modules/base-64": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-1.0.0.tgz", + "integrity": "sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==", + "peer": true + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -7227,7 +7246,7 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "dev": true + "devOptional": true }, "node_modules/dag-map": { "version": "1.0.2", @@ -7702,6 +7721,14 @@ "expo": "*" } }, + "node_modules/expo-haptics": { + "version": "12.8.1", + "resolved": "https://registry.npmjs.org/expo-haptics/-/expo-haptics-12.8.1.tgz", + "integrity": "sha512-ntLsHkfle8K8w9MW8pZEw92ZN3sguaGUSSIxv30fPKNeQFu7Cq/h47Qv3tONv2MO3wU48N9FbKnant6XlfptpA==", + "peerDependencies": { + "expo": "*" + } + }, "node_modules/expo-image-loader": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/expo-image-loader/-/expo-image-loader-4.6.0.tgz", @@ -7839,11 +7866,25 @@ "invariant": "^2.2.4" } }, + "node_modules/expo-permissions": { + "version": "14.4.0", + "resolved": "https://registry.npmjs.org/expo-permissions/-/expo-permissions-14.4.0.tgz", + "integrity": "sha512-oAcnJ7dlZhpBydK73cwomA2xofizayVUz+FW5REl7dMu7MYyeN/3aqhlpZ3mYddrxvG161bqu97MQr01UixUnw==", + "peerDependencies": { + "expo": "*" + } + }, "node_modules/expo-status-bar": { "version": "1.11.1", "resolved": "https://registry.npmjs.org/expo-status-bar/-/expo-status-bar-1.11.1.tgz", "integrity": "sha512-ddQEtCOgYHTLlFUe/yH67dDBIoct5VIULthyT3LRJbEwdpzAgueKsX2FYK02ldh440V87PWKCamh7R9evk1rrg==" }, + "node_modules/fast-base64-decode": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fast-base64-decode/-/fast-base64-decode-1.0.0.tgz", + "integrity": "sha512-qwaScUgUGBYeDNRnbc/KyllVU88Jk1pRHPStuF/lO7B0/RTRLj7U0lkdTAutlBblY08rwZDff6tNU9cjv6j//Q==", + "peer": true + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -10774,6 +10815,15 @@ "os-tmpdir": "^1.0.0" } }, + "node_modules/p-defer": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/p-defer/-/p-defer-3.0.0.tgz", + "integrity": "sha512-ugZxsxmtTln604yeYd29EGrNhazN2lywetzpKhfmQjW/VJmhpDmWbiX+h0zL8V91R0UXkhb3KtPmyq9PZw3aYw==", + "peer": true, + "engines": { + "node": ">=8" + } + }, "node_modules/p-finally": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", @@ -11492,6 +11542,45 @@ "react": "18.2.0" } }, + "node_modules/react-native-base64": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/react-native-base64/-/react-native-base64-0.2.1.tgz", + "integrity": "sha512-eHgt/MA8y5ZF0aHfZ1aTPcIkDWxza9AaEk4GcpIX+ZYfZ04RcaNahO+527KR7J44/mD3efYfM23O2C1N44ByWA==" + }, + "node_modules/react-native-fetch-api": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/react-native-fetch-api/-/react-native-fetch-api-3.0.0.tgz", + "integrity": "sha512-g2rtqPjdroaboDKTsJCTlcmtw54E25OjyaunUP0anOZn4Fuo2IKs8BVfe02zVggA/UysbmfSnRJIqtNkAgggNA==", + "peer": true, + "dependencies": { + "p-defer": "^3.0.0" + } + }, + "node_modules/react-native-get-random-values": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/react-native-get-random-values/-/react-native-get-random-values-1.11.0.tgz", + "integrity": "sha512-4BTbDbRmS7iPdhYLRcz3PGFIpFJBwNZg9g42iwa2P6FOv9vZj/xJc678RZXnLNZzd0qd7Q3CCF6Yd+CU2eoXKQ==", + "peer": true, + "dependencies": { + "fast-base64-decode": "^1.0.0" + }, + "peerDependencies": { + "react-native": ">=0.56" + } + }, + "node_modules/react-native-polyfill-globals": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/react-native-polyfill-globals/-/react-native-polyfill-globals-3.1.0.tgz", + "integrity": "sha512-6ACmV1SjXvZP2LN6J2yK58yNACKddcvoiKLrSQdISx32IdYStfdmGXrbAfpd+TANrTlIaZ2SLoFXohNwhnqm/w==", + "peerDependencies": { + "base-64": "*", + "react-native-fetch-api": "*", + "react-native-get-random-values": "*", + "react-native-url-polyfill": "*", + "text-encoding": "*", + "web-streams-polyfill": "*" + } + }, "node_modules/react-native-safe-area-context": { "version": "4.8.2", "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-4.8.2.tgz", @@ -11514,6 +11603,18 @@ "react-native": "*" } }, + "node_modules/react-native-url-polyfill": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/react-native-url-polyfill/-/react-native-url-polyfill-2.0.0.tgz", + "integrity": "sha512-My330Do7/DvKnEvwQc0WdcBnFPploYKp9CYlefDXzIdEaA+PAhDYllkvGeEroEzvc4Kzzj2O4yVdz8v6fjRvhA==", + "peer": true, + "dependencies": { + "whatwg-url-without-unicode": "8.0.0-3" + }, + "peerDependencies": { + "react-native": "*" + } + }, "node_modules/react-native/node_modules/ansi-styles": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", @@ -12576,6 +12677,12 @@ "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" }, + "node_modules/text-encoding": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/text-encoding/-/text-encoding-0.7.0.tgz", + "integrity": "sha512-oJQ3f1hrOnbRLOcwKz0Liq2IcrvDeZRHXhd9RgLrsT+DjWY/nty1Hi7v3dtkaEYbPYe0mUoOfzRrMwfXXwgPUA==", + "deprecated": "no longer maintained" + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -12868,6 +12975,14 @@ "react": ">=16.8" } }, + "node_modules/use-sync-external-store": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz", + "integrity": "sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA==", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + } + }, "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", @@ -12936,6 +13051,15 @@ "defaults": "^1.0.3" } }, + "node_modules/web-streams-polyfill": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0.tgz", + "integrity": "sha512-0zJXHRAYEjM2tUfZ2DiSOHAa2aw1tisnnhU3ufD57R8iefL+DcdJyRBRyJpG+NUimDgbTI/lH+gAE1PAvV3Cgw==", + "peer": true, + "engines": { + "node": ">= 8" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -13204,6 +13328,33 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zustand": { + "version": "4.5.2", + "resolved": "https://registry.npmjs.org/zustand/-/zustand-4.5.2.tgz", + "integrity": "sha512-2cN1tPkDVkwCy5ickKrI7vijSjPksFRfqS6237NzT0vqSsztTNnQdHw9mmN7uBdk3gceVXU0a+21jFzFzAc9+g==", + "dependencies": { + "use-sync-external-store": "1.2.0" + }, + "engines": { + "node": ">=12.7.0" + }, + "peerDependencies": { + "@types/react": ">=16.8", + "immer": ">=9.0.6", + "react": ">=16.8" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "immer": { + "optional": true + }, + "react": { + "optional": true + } + } } } } diff --git a/software/source/clients/ios/react-native/package.json b/software/source/clients/mobile/react-native/package.json similarity index 76% rename from software/source/clients/ios/react-native/package.json rename to software/source/clients/mobile/react-native/package.json index c031609..1b0d566 100644 --- a/software/source/clients/ios/react-native/package.json +++ b/software/source/clients/mobile/react-native/package.json @@ -13,18 +13,25 @@ "@react-navigation/native": "^6.1.14", "@react-navigation/native-stack": "^6.9.22", "expo": "~50.0.8", + "expo-av": "~13.10.5", + "expo-barcode-scanner": "~12.9.3", "expo-camera": "~14.0.5", + "expo-haptics": "~12.8.1", + "expo-permissions": "^14.4.0", "expo-status-bar": "~1.11.1", "react": "18.2.0", "react-native": "0.73.4", + "react-native-base64": "^0.2.1", + "react-native-polyfill-globals": "^3.1.0", "react-native-safe-area-context": "4.8.2", "react-native-screens": "~3.29.0", - "expo-barcode-scanner": "~12.9.3", - "expo-av": "~13.10.5" + "text-encoding": "^0.7.0", + "zustand": "^4.5.2" }, "devDependencies": { "@babel/core": "^7.20.0", "@types/react": "~18.2.45", + "@types/react-native-base64": "^0.2.2", "typescript": "^5.1.3" }, "ios": { diff --git a/software/source/clients/ios/react-native/src/screens/Camera.tsx b/software/source/clients/mobile/react-native/src/screens/Camera.tsx similarity index 83% rename from software/source/clients/ios/react-native/src/screens/Camera.tsx rename to software/source/clients/mobile/react-native/src/screens/Camera.tsx index 401c7f8..9a5d902 100644 --- a/software/source/clients/ios/react-native/src/screens/Camera.tsx +++ b/software/source/clients/mobile/react-native/src/screens/Camera.tsx @@ -3,9 +3,11 @@ import { StyleSheet, Text, TouchableOpacity, View } from "react-native"; import { Camera } from "expo-camera"; import { useNavigation } from "@react-navigation/native"; import { BarCodeScanner } from "expo-barcode-scanner"; +// import useSoundEffect from "../lib/useSoundEffect"; export default function CameraScreen() { const [permission, requestPermission] = Camera.useCameraPermissions(); + // const playYay = useSoundEffect(require("../../assets/yay.wav")); const [scanned, setScanned] = useState(false); const navigation = useNavigation(); @@ -31,18 +33,20 @@ export default function CameraScreen() { // setFacing((current) => (current === "back" ? "front" : "back")); // } - const handleBarCodeScanned = ({ + const handleBarCodeScanned = async ({ type, data, }: { type: string; data: string; }) => { + // await playYay(); setScanned(true); console.log( `Bar code with type ${type} and data ${data} has been scanned!` ); - alert(`Scanned URL: ${data}`); + // alert(`Scanned URL: ${data}`); + navigation.navigate("Main", { scannedData: data }); }; return ( @@ -64,7 +68,9 @@ export default function CameraScreen() { onPress={() => setScanned(false)} style={styles.button} > - Scan Again + + Scan Again + )} @@ -78,6 +84,7 @@ const styles = StyleSheet.create({ flex: 1, flexDirection: "column", justifyContent: "flex-end", + position: "relative", }, camera: { flex: 1, @@ -85,18 +92,22 @@ const styles = StyleSheet.create({ buttonContainer: { backgroundColor: "transparent", flexDirection: "row", - margin: 20, + margin: 2, }, button: { + position: "absolute", + top: 44, + left: 4, flex: 0.1, alignSelf: "flex-end", alignItems: "center", backgroundColor: "#000", borderRadius: 10, - padding: 15, + paddingHorizontal: 8, + paddingVertical: 6, }, text: { - fontSize: 18, + fontSize: 14, color: "white", }, }); diff --git a/software/source/clients/ios/react-native/src/screens/HomeScreen.tsx b/software/source/clients/mobile/react-native/src/screens/HomeScreen.tsx similarity index 88% rename from software/source/clients/ios/react-native/src/screens/HomeScreen.tsx rename to software/source/clients/mobile/react-native/src/screens/HomeScreen.tsx index cb0644b..270fdbc 100644 --- a/software/source/clients/ios/react-native/src/screens/HomeScreen.tsx +++ b/software/source/clients/mobile/react-native/src/screens/HomeScreen.tsx @@ -7,7 +7,7 @@ const HomeScreen = () => { return ( - + {/* */} navigation.navigate("Camera")} @@ -23,23 +23,23 @@ const styles = StyleSheet.create({ flex: 1, justifyContent: "center", alignItems: "center", - backgroundColor: "#fff", + backgroundColor: "#000", }, circle: { width: 100, height: 100, borderRadius: 50, - backgroundColor: "black", + backgroundColor: "#fff", marginBottom: 20, }, button: { - backgroundColor: "black", + backgroundColor: "#fff", paddingHorizontal: 20, paddingVertical: 10, borderRadius: 5, }, buttonText: { - color: "white", + color: "#000", fontSize: 16, }, }); diff --git a/software/source/clients/mobile/react-native/src/screens/Main.tsx b/software/source/clients/mobile/react-native/src/screens/Main.tsx new file mode 100644 index 0000000..5574eb3 --- /dev/null +++ b/software/source/clients/mobile/react-native/src/screens/Main.tsx @@ -0,0 +1,285 @@ +import React, { useState, useEffect, useCallback, useRef } from "react"; +import { + View, + Text, + TouchableOpacity, + StyleSheet, + BackHandler, +} from "react-native"; +import * as FileSystem from "expo-file-system"; +import { Audio } from "expo-av"; +import { polyfill as polyfillEncoding } from "react-native-polyfill-globals/src/encoding"; +import { Animated } from "react-native"; +import useSoundEffect from "../utils/useSoundEffect"; +import RecordButton from "../utils/RecordButton"; +import { useNavigation } from "@react-navigation/core"; + +interface MainProps { + route: { + params: { + scannedData: string; + }; + }; +} + +const Main: React.FC = ({ route }) => { + const { scannedData } = route.params; + const [connectionStatus, setConnectionStatus] = + useState("Connecting..."); + const [ws, setWs] = useState(null); + const [wsUrl, setWsUrl] = useState(""); + const [rescan, setRescan] = useState(false); + const [isPressed, setIsPressed] = useState(false); + const [recording, setRecording] = useState(null); + const audioQueueRef = useRef([]); + const soundRef = useRef(null); + const [soundUriMap, setSoundUriMap] = useState>( + new Map() + ); + const audioDir = FileSystem.documentDirectory + "01/audio/"; + const [permissionResponse, requestPermission] = Audio.usePermissions(); + polyfillEncoding(); + const backgroundColorAnim = useRef(new Animated.Value(0)).current; + const buttonBackgroundColorAnim = useRef(new Animated.Value(0)).current; + const playPip = useSoundEffect(require("../../assets/pip.mp3")); + const playPop = useSoundEffect(require("../../assets/pop.mp3")); + const navigation = useNavigation(); + const backgroundColor = backgroundColorAnim.interpolate({ + inputRange: [0, 1], + outputRange: ["black", "white"], + }); + const buttonBackgroundColor = backgroundColorAnim.interpolate({ + inputRange: [0, 1], + outputRange: ["white", "black"], + }); + + const constructTempFilePath = async (buffer: string) => { + try { + await dirExists(); + if (!buffer) { + console.log("Buffer is undefined or empty."); + return null; + } + const tempFilePath = `${audioDir}${Date.now()}.wav`; + + await FileSystem.writeAsStringAsync(tempFilePath, buffer, { + encoding: FileSystem.EncodingType.Base64, + }); + + return tempFilePath; + } catch (error) { + console.log("Failed to construct temp file path:", error); + return null; // Return null to prevent crashing, error is logged + } + }; + + async function dirExists() { + /** + * Checks if audio directory exists in device storage, if not creates it. + */ + try { + const dirInfo = await FileSystem.getInfoAsync(audioDir); + if (!dirInfo.exists) { + console.error("audio directory doesn't exist, creating..."); + await FileSystem.makeDirectoryAsync(audioDir, { intermediates: true }); + } + } catch (error) { + console.error("Error checking or creating directory:", error); + } + } + + const playNextAudio = useCallback(async () => { + if (audioQueueRef.current.length > 0 && soundRef.current == null) { + const uri = audioQueueRef.current.at(0) as string; + + try { + const { sound: newSound } = await Audio.Sound.createAsync({ uri }); + soundRef.current = newSound; + setSoundUriMap(new Map(soundUriMap.set(newSound, uri))); + await newSound.playAsync(); + newSound.setOnPlaybackStatusUpdate(_onPlayBackStatusUpdate); + } catch (error) { + console.log("Error playing audio", error); + } + } else { + // audioQueue is empty or sound is not null + return; + } + },[]); + + const _onPlayBackStatusUpdate = useCallback( + async (status: any) => { + if (status.didJustFinish) { + audioQueueRef.current.shift(); + await soundRef.current?.unloadAsync(); + if (soundRef.current) { + soundUriMap.delete(soundRef.current); + setSoundUriMap(new Map(soundUriMap)); + } + soundRef.current = null; + playNextAudio(); + } + },[]); + + useEffect(() => { + const backAction = () => { + navigation.navigate("Home"); // Always navigate back to Home + return true; // Prevent default action + }; + + // Add event listener for hardware back button on Android + const backHandler = BackHandler.addEventListener( + "hardwareBackPress", + backAction + ); + + return () => backHandler.remove(); + }, [navigation]); + + useEffect(() => { + let websocket: WebSocket; + try { + // console.log("Connecting to WebSocket at " + scannedData); + setWsUrl(scannedData); + websocket = new WebSocket(scannedData); + websocket.binaryType = "blob"; + + websocket.onopen = () => { + setConnectionStatus(`Connected`); + }; + + websocket.onmessage = async (e) => { + try { + const message = JSON.parse(e.data); + + if (message.content && message.type == "audio") { + const buffer = message.content; + if (buffer && buffer.length > 0) { + const filePath = await constructTempFilePath(buffer); + if (filePath !== null) { + audioQueueRef.current.push(filePath); + + if (audioQueueRef.current.length == 1) { + playNextAudio(); + } + } else { + console.error("Failed to create file path"); + } + } else { + console.error("Received message is empty or undefined"); + } + } + } catch (error) { + console.error("Error handling WebSocket message:", error); + } + }; + + websocket.onerror = (error) => { + setConnectionStatus("Error connecting to WebSocket."); + console.error("WebSocket error: ", error); + }; + + websocket.onclose = () => { + setConnectionStatus("Disconnected."); + }; + + setWs(websocket); + } catch (error) { + console.log(error); + setConnectionStatus("Error creating WebSocket."); + } + + return () => { + if (websocket) { + websocket.close(); + } + }; + }, [scannedData, rescan]); + + return ( + + + + { + setRescan(!rescan); + }} + > + + {connectionStatus} + + + + + ); +}; + +const styles = StyleSheet.create({ + container: { + flex: 1, + position: "relative", + }, + middle: { + flex: 1, + justifyContent: "center", + alignItems: "center", + padding: 10, + position: "relative", + }, + circle: { + width: 100, + height: 100, + borderRadius: 50, + justifyContent: "center", + alignItems: "center", + }, + qr: { + position: "absolute", + top: 30, + left: 10, + padding: 10, + zIndex: 100, + }, + icon: { + height: 40, + width: 40, + }, + topBar: { + height: 40, + backgroundColor: "#000", + paddingTop: 50, + }, + + statusText: { + fontSize: 12, + fontWeight: "bold", + }, + statusButton: { + position: "absolute", + bottom: 20, + alignSelf: "center", + }, +}); + +export default Main; diff --git a/software/source/clients/mobile/react-native/src/utils/RecordButton.tsx b/software/source/clients/mobile/react-native/src/utils/RecordButton.tsx new file mode 100644 index 0000000..ffdaeb0 --- /dev/null +++ b/software/source/clients/mobile/react-native/src/utils/RecordButton.tsx @@ -0,0 +1,151 @@ +import React, { useEffect, useCallback } from "react"; +import { TouchableOpacity, StyleSheet } from "react-native"; +import { Audio } from "expo-av"; +import { Animated } from "react-native"; +import * as Haptics from "expo-haptics"; + +interface RecordButtonProps { + playPip: () => void; + playPop: () => void; + recording: Audio.Recording | null; + setRecording: (recording: Audio.Recording | null) => void; + ws: WebSocket | null; + buttonBackgroundColorAnim: Animated.Value; + backgroundColorAnim: Animated.Value; + backgroundColor: Animated.AnimatedInterpolation; + buttonBackgroundColor: Animated.AnimatedInterpolation; + setIsPressed: (isPressed: boolean) => void; +} + +const styles = StyleSheet.create({ + circle: { + width: 100, + height: 100, + borderRadius: 50, + justifyContent: "center", + alignItems: "center", + }, + button: { + width: 100, + height: 100, + borderRadius: 50, + justifyContent: "center", + alignItems: "center", + }, +}); + +const RecordButton: React.FC = ({ + playPip, + playPop, + recording, + setRecording, + ws, + backgroundColorAnim, + buttonBackgroundColorAnim, + backgroundColor, + buttonBackgroundColor, + setIsPressed, +}: RecordButtonProps) => { + const [permissionResponse, requestPermission] = Audio.usePermissions(); + + useEffect(() => { + if (permissionResponse?.status !== "granted") { + requestPermission(); + } + }, []); + + const startRecording = useCallback(async () => { + if (recording) { + console.log("A recording is already in progress."); + return; + } + + try { + if ( + permissionResponse !== null && + permissionResponse.status !== `granted` + ) { + await requestPermission(); + } + + await Audio.setAudioModeAsync({ + allowsRecordingIOS: true, + playsInSilentModeIOS: true, + }); + + const newRecording = new Audio.Recording(); + await newRecording.prepareToRecordAsync( + Audio.RecordingOptionsPresets.HIGH_QUALITY + ); + await newRecording.startAsync(); + + setRecording(newRecording); + } catch (err) { + console.error("Failed to start recording", err); + } + }, []); + + const stopRecording = useCallback(async () => { + if (recording) { + await recording.stopAndUnloadAsync(); + await Audio.setAudioModeAsync({ + allowsRecordingIOS: false, + }); + const uri = recording.getURI(); + setRecording(null); + + if (ws && uri) { + const response = await fetch(uri); + const blob = await response.blob(); + + const reader = new FileReader(); + reader.readAsArrayBuffer(blob); + reader.onloadend = () => { + const audioBytes = reader.result; + if (audioBytes) { + ws.send(audioBytes); + } + }; + } + } + }, [recording]); + + const toggleRecording = (shouldPress: boolean) => { + Animated.timing(backgroundColorAnim, { + toValue: shouldPress ? 1 : 0, + duration: 400, + useNativeDriver: false, + }).start(); + Animated.timing(buttonBackgroundColorAnim, { + toValue: shouldPress ? 1 : 0, + duration: 400, + useNativeDriver: false, + }).start(); + }; + + return ( + { + playPip(); + setIsPressed(true); + toggleRecording(true); + startRecording(); + Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Heavy); + }} + onPressOut={() => { + playPop(); + setIsPressed(false); + toggleRecording(false); + stopRecording(); + Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Heavy); + }} + > + + + ); +}; + +export default RecordButton; diff --git a/software/source/clients/mobile/react-native/src/utils/state.ts b/software/source/clients/mobile/react-native/src/utils/state.ts new file mode 100644 index 0000000..2047977 --- /dev/null +++ b/software/source/clients/mobile/react-native/src/utils/state.ts @@ -0,0 +1,10 @@ +// store.js +import { create } from "zustand"; + +const useStore = create((set: any) => ({ + count: 0, + increase: () => set((state: any) => ({ count: state.count + 1 })), + decrease: () => set((state: any) => ({ count: state.count - 1 })), +})); + +export default useStore; diff --git a/software/source/clients/mobile/react-native/src/utils/useSoundEffect.ts b/software/source/clients/mobile/react-native/src/utils/useSoundEffect.ts new file mode 100644 index 0000000..5e73fec --- /dev/null +++ b/software/source/clients/mobile/react-native/src/utils/useSoundEffect.ts @@ -0,0 +1,29 @@ +import { useEffect, useState } from "react"; +import { Audio } from "expo-av"; + +const useSoundEffect = (soundFile: any) => { + const [sound, setSound] = useState(null); // Explicitly set initial state to null + + useEffect(() => { + const loadSound = async () => { + const { sound: newSound } = await Audio.Sound.createAsync(soundFile); + setSound(newSound); + }; + + loadSound(); + + return () => { + sound?.unloadAsync(); + }; + }, [soundFile, sound]); // Include sound in the dependency array + + const playSound = async () => { + if (sound) { + await sound.playAsync(); + } + }; + + return playSound; +}; + +export default useSoundEffect; diff --git a/software/source/clients/ios/react-native/tsconfig.json b/software/source/clients/mobile/react-native/tsconfig.json similarity index 100% rename from software/source/clients/ios/react-native/tsconfig.json rename to software/source/clients/mobile/react-native/tsconfig.json diff --git a/software/source/server/server.py b/software/source/server/server.py index c4dd036..a906ba9 100644 --- a/software/source/server/server.py +++ b/software/source/server/server.py @@ -20,7 +20,8 @@ from interpreter import interpreter from ..utils.accumulator import Accumulator from .utils.logs import setup_logging from .utils.logs import logger - +import base64 +import shutil from ..utils.print_markdown import print_markdown os.environ["STT_RUNNER"] = "server" @@ -38,7 +39,7 @@ print("") setup_logging() -accumulator = Accumulator() +accumulator_global = Accumulator() app = FastAPI() @@ -194,12 +195,13 @@ async def receive_messages(websocket: WebSocket): async def send_messages(websocket: WebSocket): while True: message = await to_device.get() - # print(f"Sending to the device: {type(message)} {str(message)[:100]}") try: if isinstance(message, dict): + # print(f"Sending to the device: {type(message)} {str(message)[:100]}") await websocket.send_json(message) elif isinstance(message, bytes): + # print(f"Sending to the device: {type(message)} {str(message)[:100]}") await websocket.send_bytes(message) else: raise TypeError("Message must be a dict or bytes") @@ -209,9 +211,12 @@ async def send_messages(websocket: WebSocket): raise -async def listener(): +async def listener(mobile: bool): while True: try: + if mobile: + accumulator_mobile = Accumulator() + while True: if not from_user.empty(): chunk = await from_user.get() @@ -221,7 +226,11 @@ async def listener(): break await asyncio.sleep(1) - message = accumulator.accumulate(chunk) + if mobile: + message = accumulator_mobile.accumulate_mobile(chunk) + else: + message = accumulator_global.accumulate(chunk) + if message == None: # Will be None until we have a full message ready continue @@ -241,7 +250,9 @@ async def listener(): # Convert bytes to audio file # Format will be bytes.wav or bytes.opus mime_type = "audio/" + message["format"].split(".")[1] + # print("input audio file content", message["content"][:100]) audio_file_path = bytes_to_wav(message["content"], mime_type) + # print("Audio file path:", audio_file_path) # For microphone debugging: if False: @@ -287,6 +298,7 @@ async def listener(): # Send it to the user await to_device.put(chunk) + # Yield to the event loop, so you actually send it out await asyncio.sleep(0.01) @@ -309,11 +321,11 @@ async def listener(): if is_full_sentence(sentences[-1]): for sentence in sentences: - await stream_tts_to_device(sentence) + await stream_tts_to_device(sentence, mobile) accumulated_text = "" else: for sentence in sentences[:-1]: - await stream_tts_to_device(sentence) + await stream_tts_to_device(sentence, mobile) accumulated_text = sentences[-1] # If we're going to speak, say we're going to stop sending text. @@ -343,7 +355,7 @@ async def listener(): json.dump(interpreter.messages, file, indent=4) # TODO: is triggering seemingly randomly - # logger.info("New user message recieved. Breaking.") + # logger.info("New user message received. Breaking.") # break # Also check if there's any new computer messages @@ -351,13 +363,13 @@ async def listener(): with open(conversation_history_path, "w") as file: json.dump(interpreter.messages, file, indent=4) - logger.info("New computer message recieved. Breaking.") + logger.info("New computer message received. Breaking.") break except: traceback.print_exc() -async def stream_tts_to_device(sentence): +async def stream_tts_to_device(sentence, mobile: bool): force_task_completion_responses = [ "the task is done", "the task is impossible", @@ -366,26 +378,44 @@ async def stream_tts_to_device(sentence): if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses: return - for chunk in stream_tts(sentence): + for chunk in stream_tts(sentence, mobile): await to_device.put(chunk) -def stream_tts(sentence): - audio_file = tts(sentence) +def stream_tts(sentence, mobile: bool): + audio_file = tts(sentence, mobile) + # Read the entire WAV file with open(audio_file, "rb") as f: audio_bytes = f.read() - os.remove(audio_file) - file_type = "bytes.raw" - chunk_size = 1024 + if mobile: + file_type = "audio/wav" + + os.remove(audio_file) + + # stream the audio as a single sentence + yield { + "role": "assistant", + "type": "audio", + "format": file_type, + "content": base64.b64encode(audio_bytes).decode("utf-8"), + "start": True, + "end": True, + } + + else: + # stream the audio in chunk sizes + os.remove(audio_file) + + file_type = "bytes.raw" + chunk_size = 1024 - # Stream the audio - yield {"role": "assistant", "type": "audio", "format": file_type, "start": True} - for i in range(0, len(audio_bytes), chunk_size): - chunk = audio_bytes[i : i + chunk_size] - yield chunk - yield {"role": "assistant", "type": "audio", "format": file_type, "end": True} + yield {"role": "assistant", "type": "audio", "format": file_type, "start": True} + for i in range(0, len(audio_bytes), chunk_size): + chunk = audio_bytes[i : i + chunk_size] + yield chunk + yield {"role": "assistant", "type": "audio", "format": file_type, "end": True} from uvicorn import Config, Server @@ -422,6 +452,7 @@ async def main( temperature, tts_service, stt_service, + mobile, ): global HOST global PORT @@ -473,7 +504,7 @@ async def main( interpreter.llm.completions = llm # Start listening - asyncio.create_task(listener()) + asyncio.create_task(listener(mobile)) # Start watching the kernel if it's your job to do that if True: # in the future, code can run on device. for now, just server. diff --git a/software/source/server/services/stt/local-whisper/stt.py b/software/source/server/services/stt/local-whisper/stt.py index 1c2743b..3426665 100644 --- a/software/source/server/services/stt/local-whisper/stt.py +++ b/software/source/server/services/stt/local-whisper/stt.py @@ -125,7 +125,7 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: def run_command(command): result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True ) return result.stdout, result.stderr @@ -156,7 +156,7 @@ def stt_wav(service_directory, wav_file_path: str): temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav" ) ffmpeg.input(wav_file_path).output( - output_path, acodec="pcm_s16le", ac=1, ar="16k" + output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic" ).run() try: transcript = get_transcription_file(service_directory, output_path) diff --git a/software/source/server/services/stt/openai/stt.py b/software/source/server/services/stt/openai/stt.py index 4cb1e4b..32dc9e2 100644 --- a/software/source/server/services/stt/openai/stt.py +++ b/software/source/server/services/stt/openai/stt.py @@ -70,7 +70,7 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: def run_command(command): result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True ) return result.stdout, result.stderr diff --git a/software/source/server/services/tts/openai/tts.py b/software/source/server/services/tts/openai/tts.py index 07e1eec..27deaf6 100644 --- a/software/source/server/services/tts/openai/tts.py +++ b/software/source/server/services/tts/openai/tts.py @@ -25,7 +25,7 @@ class Tts: def __init__(self, config): pass - def tts(self, text): + def tts(self, text, mobile): response = client.audio.speech.create( model="tts-1", voice=os.getenv("OPENAI_VOICE_NAME", "alloy"), @@ -36,9 +36,15 @@ class Tts: response.stream_to_file(temp_file.name) # TODO: hack to format audio correctly for device - outfile = tempfile.gettempdir() + "/" + "raw.dat" - ffmpeg.input(temp_file.name).output( - outfile, f="s16le", ar="16000", ac="1", loglevel="panic" - ).run() + if mobile: + outfile = tempfile.gettempdir() + "/" + "output.wav" + ffmpeg.input(temp_file.name).output( + outfile, f="wav", ar="16000", ac="1", loglevel="panic" + ).run() + else: + outfile = tempfile.gettempdir() + "/" + "raw.dat" + ffmpeg.input(temp_file.name).output( + outfile, f="s16le", ar="16000", ac="1", loglevel="panic" + ).run() return outfile diff --git a/software/source/server/services/tts/piper/tts.py b/software/source/server/services/tts/piper/tts.py index 8daa158..bd40441 100644 --- a/software/source/server/services/tts/piper/tts.py +++ b/software/source/server/services/tts/piper/tts.py @@ -12,7 +12,7 @@ class Tts: self.piper_directory = "" self.install(config["service_directory"]) - def tts(self, text): + def tts(self, text, mobile): with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: output_file = temp_file.name piper_dir = self.piper_directory @@ -34,10 +34,16 @@ class Tts: ) # TODO: hack to format audio correctly for device - outfile = tempfile.gettempdir() + "/" + "raw.dat" - ffmpeg.input(temp_file.name).output( - outfile, f="s16le", ar="16000", ac="1", loglevel="panic" - ).run() + if mobile: + outfile = tempfile.gettempdir() + "/" + "output.wav" + ffmpeg.input(temp_file.name).output( + outfile, f="wav", ar="16000", ac="1", loglevel="panic" + ).run() + else: + outfile = tempfile.gettempdir() + "/" + "raw.dat" + ffmpeg.input(temp_file.name).output( + outfile, f="s16le", ar="16000", ac="1", loglevel="panic" + ).run() return outfile diff --git a/software/source/server/tunnel.py b/software/source/server/tunnel.py index 809db08..f25a0b3 100644 --- a/software/source/server/tunnel.py +++ b/software/source/server/tunnel.py @@ -100,7 +100,7 @@ def create_tunnel( # If ngrok is installed, start it on the specified port # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE) process = subprocess.Popen( - f"ngrok http {server_port} --scheme http,https --domain=marten-advanced-dragon.ngrok-free.app --log=stdout", + f"ngrok http {server_port} --scheme http,https --log=stdout", shell=True, stdout=subprocess.PIPE, ) diff --git a/software/source/server/utils/bytes_to_wav.py b/software/source/server/utils/bytes_to_wav.py index a189257..286ae4d 100644 --- a/software/source/server/utils/bytes_to_wav.py +++ b/software/source/server/utils/bytes_to_wav.py @@ -36,7 +36,7 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: output_path = os.path.join( temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav" ) - print(mime_type, input_path, output_path) + # print(mime_type, input_path, output_path) if mime_type == "audio/raw": ffmpeg.input( input_path, @@ -57,7 +57,7 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: def run_command(command): result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True ) return result.stdout, result.stderr diff --git a/software/source/server/utils/kernel.py b/software/source/server/utils/kernel.py index fcca107..d3ce75c 100644 --- a/software/source/server/utils/kernel.py +++ b/software/source/server/utils/kernel.py @@ -5,12 +5,17 @@ load_dotenv() # take environment variables from .env. import asyncio import subprocess import platform +import os +import shutil from .logs import setup_logging from .logs import logger setup_logging() +# dmesg process created at boot time +dmesg_proc = None + def get_kernel_messages(): """ @@ -25,12 +30,37 @@ def get_kernel_messages(): output, _ = process.communicate() return output.decode("utf-8") elif current_platform == "Linux": - with open("/var/log/dmesg", "r") as file: + log_path = get_dmesg_log_path() + with open(log_path, 'r') as file: return file.read() else: logger.info("Unsupported platform.") +def get_dmesg_log_path(): + """ + Check for the existence of a readable dmesg log file and return its path. + Create an accessible path if not found. + """ + if os.access('/var/log/dmesg', os.F_OK | os.R_OK): + return '/var/log/dmesg' + + global dmesg_proc + dmesg_log_path = '/tmp/dmesg' + if dmesg_proc: + return dmesg_log_path + + logger.info("Created /tmp/dmesg.") + subprocess.run(['touch', dmesg_log_path]) + dmesg_path = shutil.which('dmesg') + if dmesg_path: + logger.info(f"Writing to {dmesg_log_path} from dmesg.") + dmesg_proc = subprocess.Popen([dmesg_path, '--follow'], text=True, stdout=subprocess.PIPE) + subprocess.Popen(['tee', dmesg_log_path], text=True, stdin=dmesg_proc.stdout, stdout=subprocess.DEVNULL) + + return dmesg_log_path + + def custom_filter(message): # Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern if "{TO_INTERPRETER{" in message and "}TO_INTERPRETER}" in message: diff --git a/software/source/server/utils/process_utils.py b/software/source/server/utils/process_utils.py index 586e4c6..5337bae 100644 --- a/software/source/server/utils/process_utils.py +++ b/software/source/server/utils/process_utils.py @@ -7,7 +7,11 @@ def kill_process_tree(): pid = os.getpid() # Get the current process ID try: # Send SIGTERM to the entire process group to ensure all processes are targeted - os.killpg(os.getpgid(pid), signal.SIGKILL) + try: + os.killpg(os.getpgid(pid), signal.SIGKILL) + # Windows implementation + except AttributeError: + os.kill(pid, signal.SIGTERM) parent = psutil.Process(pid) children = parent.children(recursive=True) for child in children: diff --git a/software/source/utils/accumulator.py b/software/source/utils/accumulator.py index 37912b5..d4715e1 100644 --- a/software/source/utils/accumulator.py +++ b/software/source/utils/accumulator.py @@ -45,3 +45,49 @@ class Accumulator: self.message["content"] = b"" self.message["content"] += chunk return None + + def accumulate_mobile(self, chunk): + # print(str(chunk)[:100]) + if type(chunk) == dict: + if "format" in chunk and chunk["format"] == "active_line": + # We don't do anything with these + return None + + if "start" in chunk: + self.message = chunk + self.message.pop("start") + return None + + if "content" in chunk: + if any( + self.message[key] != chunk[key] + for key in self.message + if key != "content" + ): + self.message = chunk + if "content" not in self.message: + self.message["content"] = chunk["content"] + else: + if type(chunk["content"]) == dict: + # dict concatenation cannot happen, so we see if chunk is a dict + self.message["content"]["content"] += chunk["content"][ + "content" + ] + else: + self.message["content"] += chunk["content"] + return None + + if "end" in chunk: + # We will proceed + message = self.message + self.message = self.template + return message + + if type(chunk) == bytes: + if "content" not in self.message or type(self.message["content"]) != bytes: + self.message["content"] = b"" + self.message["content"] += chunk + + self.message["type"] = "audio" + self.message["format"] = "bytes.wav" + return self.message diff --git a/software/start.py b/software/start.py index 4f3377f..3b143d9 100644 --- a/software/start.py +++ b/software/start.py @@ -72,13 +72,16 @@ def run( False, "--local", help="Use recommended local services for LLM, STT, and TTS" ), qr: bool = typer.Option(False, "--qr", help="Print the QR code for the server URL"), + mobile: bool = typer.Option( + False, "--mobile", help="Toggle server to support mobile app" + ), ): _run( - server=server, + server=server or mobile, server_host=server_host, server_port=server_port, tunnel_service=tunnel_service, - expose=expose, + expose=expose or mobile, client=client, server_url=server_url, client_type=client_type, @@ -92,7 +95,8 @@ def run( tts_service=tts_service, stt_service=stt_service, local=local, - qr=qr, + qr=qr or mobile, + mobile=mobile, ) @@ -116,12 +120,17 @@ def _run( stt_service: str = "openai", local: bool = False, qr: bool = False, + mobile: bool = False, ): if local: tts_service = "piper" # llm_service = "llamafile" stt_service = "local-whisper" select_local_model() + + system_type = platform.system() + if system_type == "Windows": + server_host = "localhost" if not server_url: server_url = f"{server_host}:{server_port}" @@ -129,6 +138,8 @@ def _run( if not server and not client: server = True client = True + + def handle_exit(signum, frame): os._exit(0) @@ -136,6 +147,7 @@ def _run( signal.signal(signal.SIGINT, handle_exit) if server: + # print(f"Starting server with mobile = {mobile}") loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) server_thread = threading.Thread( @@ -153,6 +165,7 @@ def _run( temperature, tts_service, stt_service, + mobile, ), ), )