diff --git a/01OS/01OS/clients/base_device.py b/01OS/01OS/clients/base_device.py index 47fdaaa..883d41e 100644 --- a/01OS/01OS/clients/base_device.py +++ b/01OS/01OS/clients/base_device.py @@ -153,7 +153,7 @@ class Device: """Record audio from the microphone and add it to the queue.""" stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) - logger.info("Recording started...") + print("Recording started...") global RECORDING # Create a temporary WAV file to store the audio data @@ -171,7 +171,7 @@ class Device: wav_file.close() stream.stop_stream() stream.close() - logger.info("Recording stopped.") + print("Recording stopped.") duration = wav_file.getnframes() / RATE if duration < 0.3: @@ -255,9 +255,9 @@ class Device: try: async with websockets.connect(WS_URL) as websocket: if CAMERA_ENABLED: - logger.info("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit.") + print("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit.") else: - logger.info("Press the spacebar to start/stop recording. Press CTRL-C to exit.") + print("Press the spacebar to start/stop recording. Press CTRL-C to exit.") asyncio.create_task(self.message_sender(websocket)) diff --git a/01OS/01OS/server/i.py b/01OS/01OS/server/i.py index 8682917..8b52abe 100644 --- a/01OS/01OS/server/i.py +++ b/01OS/01OS/server/i.py @@ -4,10 +4,178 @@ load_dotenv() # take environment variables from .env. from platformdirs import user_data_dir import os import glob +import time import json from pathlib import Path from interpreter import OpenInterpreter -from .system_messages.BaseSystemMessage import system_message + + +system_message = r""" + +You are the 01, a screenless executive assistant that can complete any task. +When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task. +Run any code to achieve the goal, and if at first you don't succeed, try again and again. +You can install new packages. +Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY. +Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go. +Manually summarize text. + +DON'T TELL THE USER THE METHOD YOU'LL USE, OR MAKE PLANS. ACT LIKE THIS: + +--- +user: Are there any concerts in Seattle? +assistant: Let me check on that. +```python +computer.browser.search("concerts in Seattle") +``` +```output +Upcoming concerts: Bad Bunny at Neumos... +``` +It looks like there's a Bad Bunny concert at Neumos... +--- + +Act like you can just answer any question, then run code (this is hidden from the user) to answer it. +THE USER CANNOT SEE CODE BLOCKS. +Your responses should be very short, no more than 1-2 sentences long. +DO NOT USE MARKDOWN. ONLY WRITE PLAIN TEXT. + +# TASKS + +Help the user manage their tasks. +Store the user's tasks in a Python list called `tasks`. +The user's current task list (it might be empty) is: {{ tasks }} +When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is. +When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them— just try to focus them on each step at a time. +After starting a task, you should check in with the user around the estimated completion time to see if the task is completed. +To do this, schedule a reminder based on estimated completion time using the function `schedule(days=0, hours=0, mins=0, secs=0, datetime="valid date time", message="Your message here.")`. You'll receive the message at the time you scheduled it. +THE SCHEDULE FUNCTION HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. +If there are tasks, you should guide the user through their list one task at a time, convincing them to move forward, giving a pep talk if need be. + +# THE COMPUTER API + +The `computer` module is ALREADY IMPORTED, and can be used for most tasks: + +```python +computer.browser.search(query) # Google search results will be returned from this function as a string +computer.files.edit(path_to_file, original_text, replacement_text) # Edit a file +computer.calendar.create_event(title="Meeting", start_date=datetime.datetime.now(), end=datetime.datetime.now() + datetime.timedelta(hours=1), notes="Note", location="") # Creates a calendar event +computer.calendar.get_events(start_date=datetime.date.today(), end_date=None) # Get events between dates. If end_date is None, only gets events for start_date +computer.calendar.delete_event(event_title="Meeting", start_date=datetime.datetime) # Delete a specific event with a matching title and start date, you may need to get use get_events() to find the specific event object first +computer.contacts.get_phone_number("John Doe") +computer.contacts.get_email_address("John Doe") +computer.mail.send("john@email.com", "Meeting Reminder", "Reminder that our meeting is at 3pm today.", ["path/to/attachment.pdf", "path/to/attachment2.pdf"]) # Send an email with a optional attachments +computer.mail.get(4, unread=True) # Returns the {number} of unread emails, or all emails if False is passed +computer.mail.unread_count() # Returns the number of unread emails +computer.sms.send("555-123-4567", "Hello from the computer!") # Send a text message. MUST be a phone number, so use computer.contacts.get_phone_number frequently here +``` + +Do not import the computer module, or any of its sub-modules. They are already imported. + +# GUI CONTROL (RARE) + +You are a computer controlling language model. You can control the user's GUI. +You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it: + +```python +computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!** +computer.keyboard.hotkey(" ", "command") # Opens spotlight +computer.keyboard.write("hello") +computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this. +computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more. +computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate +computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often +computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this +``` + +You are an image-based AI, you can see images. +Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar). +If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you. +It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application. On Macs, ALWAYS use Spotlight to switch applications. +When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor + +# SKILLS + +Try to use the following special functions (or "skills") to complete your goals whenever possible. +THESE ARE ALREADY IMPORTED. YOU CAN CALL THEM INSTANTLY. + +--- +{{ +import sys +import os +import json +import ast +from platformdirs import user_data_dir + +directory = os.path.join(user_data_dir('01'), 'skills') +if not os.path.exists(directory): + os.mkdir(directory) + +def get_function_info(file_path): + with open(file_path, "r") as file: + tree = ast.parse(file.read()) + functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)] + for function in functions: + docstring = ast.get_docstring(function) + args = [arg.arg for arg in function.args.args] + print(f"Function Name: {function.name}") + print(f"Arguments: {args}") + print(f"Docstring: {docstring}") + print("---") + +files = os.listdir(directory) +for file in files: + if file.endswith(".py"): + file_path = os.path.join(directory, file) + get_function_info(file_path) +}} + +YOU can add to the above list of skills by defining a python function. The function will be saved as a skill. +Search all existing skills by running `computer.skills.search(query)`. + +**Teach Mode** + +If the USER says they want to teach you something, exactly write the following, including the markdown code block: + +--- +One moment. +```python +computer.skills.new_skill.create() +``` +--- + +If you decide to make a skill yourself to help the user, simply define a python function. `computer.skills.new_skill.create()` is for user-described skills. + +# USE COMMENTS TO PLAN + +IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block! + +--- +User: What is 432/7? +Assistant: Let me think about that. +```python +# Here's the plan: +# 1. Divide the numbers +# 2. Round to 3 digits +print(round(432/7, 3)) +``` +```output +61.714 +``` +The answer is 61.714. +--- + +# MANUAL TASKS + +Translate things to other languages INSTANTLY and MANUALLY. Don't ever try to use a translation tool. +Summarize things manually. DO NOT use a summarizer tool. + +# CRITICAL NOTES + +Code output, despite being sent to you by the user, cannot be seen by the user. You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<< +ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user VERY short. DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT. +Try multiple methods before saying the task is impossible. **You can do it!** + +""".strip() def configure_interpreter(interpreter: OpenInterpreter): @@ -15,29 +183,153 @@ def configure_interpreter(interpreter: OpenInterpreter): ### SYSTEM MESSAGE interpreter.system_message = system_message - ### LLM SETTINGS - - # Local settings - # interpreter.llm.model = "local" - # interpreter.llm.api_base = "https://localhost:8080/v1" # Llamafile default - # interpreter.llm.max_tokens = 1000 - # interpreter.llm.context_window = 3000 + interpreter.llm.supports_vision = True + interpreter.shrink_images = True # Faster but less accurate - # Hosted settings - interpreter.llm.api_key = os.getenv('OPENAI_API_KEY') interpreter.llm.model = "gpt-4" + interpreter.llm.supports_functions = False + interpreter.llm.context_window = 110000 + interpreter.llm.max_tokens = 4096 + interpreter.auto_run = True + + interpreter.force_task_completion = True + interpreter.force_task_completion_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function.""" + interpreter.force_task_completion_breakers = [ + "The task is done.", + "The task is impossible.", + "Let me know what you'd like to do next.", + "Please provide more information.", + ] + + + # Check if required packages are installed + + # THERE IS AN INCONSISTENCY HERE. + # We should be testing if they import WITHIN OI's computer, not here. + + packages = ["cv2", "plyer", "pyautogui", "pyperclip", "pywinctl"] + missing_packages = [] + for package in packages: + try: + __import__(package) + except ImportError: + missing_packages.append(package) + + if missing_packages: + interpreter.display_message( + f"> **Missing Package(s): {', '.join(['`' + p + '`' for p in missing_packages])}**\n\nThese packages are required for OS Control.\n\nInstall them?\n" + ) + user_input = input("(y/n) > ") + if user_input.lower() != "y": + print("\nPlease try to install them manually.\n\n") + time.sleep(2) + print("Attempting to start OS control anyway...\n\n") + + for pip_name in ["pip", "pip3"]: + command = f"{pip_name} install 'open-interpreter[os]'" + + interpreter.computer.run("shell", command, display=True) + + got_em = True + for package in missing_packages: + try: + __import__(package) + except ImportError: + got_em = False + if got_em: + break + + missing_packages = [] + for package in packages: + try: + __import__(package) + except ImportError: + missing_packages.append(package) + + if missing_packages != []: + print( + "\n\nWarning: The following packages could not be installed:", + ", ".join(missing_packages), + ) + print("\nPlease try to install them manually.\n\n") + time.sleep(2) + print("Attempting to start OS control anyway...\n\n") + + + # Should we explore other options for ^ these kinds of tags? + # Like: + + # from rich import box + # from rich.console import Console + # from rich.panel import Panel + # console = Console() + # print(">\n\n") + # console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.SQUARE, expand=False), style="white on black") + # print(">\n\n") + # console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.HEAVY, expand=False), style="white on black") + # print(">\n\n") + # console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.DOUBLE, expand=False), style="white on black") + # print(">\n\n") + # console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.SQUARE, expand=False), style="white on black") + + if not interpreter.offline and not interpreter.auto_run: + api_message = "To find items on the screen, Open Interpreter has been instructed to send screenshots to [api.openinterpreter.com](https://api.openinterpreter.com/) (we do not store them). Add `--offline` to attempt this locally." + interpreter.display_message(api_message) + print("") + + if not interpreter.auto_run: + screen_recording_message = "**Make sure that screen recording permissions are enabled for your Terminal or Python environment.**" + interpreter.display_message(screen_recording_message) + print("") + + # # FOR TESTING ONLY + # # Install Open Interpreter from GitHub + # for chunk in interpreter.computer.run( + # "shell", + # "pip install git+https://github.com/KillianLucas/open-interpreter.git", + # ): + # if chunk.get("format") != "active_line": + # print(chunk.get("content")) + + import os + + from platformdirs import user_data_dir + + directory = os.path.join(user_data_dir("01"), "skills") + interpreter.computer.skills.path = directory + interpreter.computer.skills.import_skills() + + + # Initialize user's task list + interpreter.computer.run( + language="python", + code="tasks = []", + display=interpreter.verbose, + ) + + # Give it access to the computer via Python + interpreter.computer.run( + language="python", + code="import time\nfrom interpreter import interpreter\ncomputer = interpreter.computer", # We ask it to use time, so + display=interpreter.verbose, + ) + + if not interpreter.auto_run: + interpreter.display_message( + "**Warning:** In this mode, Open Interpreter will not require approval before performing actions. Be ready to close your terminal." + ) + print("") # < - Aesthetic choice + ### MISC SETTINGS interpreter.auto_run = True interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() in ["applescript", "shell", "zsh", "bash", "python"]] - interpreter.force_task_completion = False - interpreter.offline = True + interpreter.force_task_completion = True + # interpreter.offline = True interpreter.id = 206 # Used to identify itself to other interpreters. This should be changed programatically so it's unique. ### RESET conversations/user.json - - app_dir = user_data_dir('01') conversations_dir = os.path.join(app_dir, 'conversations') os.makedirs(conversations_dir, exist_ok=True) @@ -45,14 +337,4 @@ def configure_interpreter(interpreter: OpenInterpreter): with open(user_json_path, 'w') as file: json.dump([], file) - ### SKILLS - skills_dir = user_data_dir('01', 'skills') - interpreter.computer.skills.path = skills_dir - interpreter.computer.skills.import_skills() - - interpreter.computer.run("python", "tasks=[]") - - interpreter.computer.api_base = "https://oi-video-frame.vercel.app/" - interpreter.computer.run("python","print('test')") - return interpreter \ No newline at end of file diff --git a/01OS/01OS/server/server.py b/01OS/01OS/server/server.py index 318e474..8b3e1a4 100644 --- a/01OS/01OS/server/server.py +++ b/01OS/01OS/server/server.py @@ -220,11 +220,15 @@ async def listener(): time.sleep(15) text = stt(audio_file_path) - print(text) + print("> ", text) message = {"role": "user", "type": "message", "content": text} # At this point, we have only text messages + if type(message["content"]) != str: + print("This should be a string, but it's not:", message["content"]) + message["content"] = message["content"].decode() + # Custom stop message will halt us if message["content"].lower().strip(".,! ") == "stop": continue @@ -238,116 +242,77 @@ async def listener(): accumulated_text = "" - force_task_completion_message = """AUTOMATED MESSAGE: Proceed. You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task I asked for is done, say exactly 'The task is done.' If you need some specific information (like username or password) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going.""" - interpreter.messages = [m for m in interpreter.messages if m["content"] != force_task_completion_message] - insert_force_task_completion_message = True if any([m["type"] == "image" for m in messages]) and interpreter.llm.model.startswith("gpt-"): interpreter.llm.model = "gpt-4-vision-preview" interpreter.llm.supports_vision = True + + for chunk in interpreter.chat(messages, stream=True, display=True): - while insert_force_task_completion_message == True: - - for chunk in interpreter.chat(messages, stream=True, display=True): - - if chunk["type"] == "code": - insert_force_task_completion_message = False - - if any([m["type"] == "image" for m in interpreter.messages]): - interpreter.llm.model = "gpt-4-vision-preview" + if any([m["type"] == "image" for m in interpreter.messages]): + interpreter.llm.model = "gpt-4-vision-preview" - logger.debug("Got chunk:", chunk) + logger.debug("Got chunk:", chunk) - # Send it to the user - await to_device.put(chunk) - # Yield to the event loop, so you actually send it out - await asyncio.sleep(0.01) - - if os.getenv('TTS_RUNNER') == "server": - # Speak full sentences out loud - if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message": - accumulated_text += chunk["content"] - sentences = split_into_sentences(accumulated_text) - - # If we're going to speak, say we're going to stop sending text. - # This should be fixed probably, we should be able to do both in parallel, or only one. - if any(is_full_sentence(sentence) for sentence in sentences): - await to_device.put({"role": "assistant", "type": "message", "end": True}) - - if is_full_sentence(sentences[-1]): - for sentence in sentences: - await stream_tts_to_device(sentence) - accumulated_text = "" - else: - for sentence in sentences[:-1]: - await stream_tts_to_device(sentence) - accumulated_text = sentences[-1] - - # If we're going to speak, say we're going to stop sending text. - # This should be fixed probably, we should be able to do both in parallel, or only one. - if any(is_full_sentence(sentence) for sentence in sentences): - await to_device.put({"role": "assistant", "type": "message", "start": True}) + # Send it to the user + await to_device.put(chunk) + # Yield to the event loop, so you actually send it out + await asyncio.sleep(0.01) + + if os.getenv('TTS_RUNNER') == "server": + # Speak full sentences out loud + if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message": + accumulated_text += chunk["content"] + sentences = split_into_sentences(accumulated_text) - # If we have a new message, save our progress and go back to the top - if not from_user.empty(): - - # Check if it's just an end flag. We ignore those. - temp_message = await from_user.get() + # If we're going to speak, say we're going to stop sending text. + # This should be fixed probably, we should be able to do both in parallel, or only one. + if any(is_full_sentence(sentence) for sentence in sentences): + await to_device.put({"role": "assistant", "type": "message", "end": True}) - if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"): - # Yup. False alarm. - continue + if is_full_sentence(sentences[-1]): + for sentence in sentences: + await stream_tts_to_device(sentence) + accumulated_text = "" else: - # Whoops! Put that back - await from_user.put(temp_message) + for sentence in sentences[:-1]: + await stream_tts_to_device(sentence) + accumulated_text = sentences[-1] + + # If we're going to speak, say we're going to stop sending text. + # This should be fixed probably, we should be able to do both in parallel, or only one. + if any(is_full_sentence(sentence) for sentence in sentences): + await to_device.put({"role": "assistant", "type": "message", "start": True}) + + # If we have a new message, save our progress and go back to the top + if not from_user.empty(): - with open(conversation_history_path, 'w') as file: - json.dump(interpreter.messages, file, indent=4) + # Check if it's just an end flag. We ignore those. + temp_message = await from_user.get() + + if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"): + # Yup. False alarm. + continue + else: + # Whoops! Put that back + await from_user.put(temp_message) - # TODO: is triggering seemingly randomly - #logger.info("New user message recieved. Breaking.") - #break + with open(conversation_history_path, 'w') as file: + json.dump(interpreter.messages, file, indent=4) - # Also check if there's any new computer messages - if not from_computer.empty(): - - with open(conversation_history_path, 'w') as file: - json.dump(interpreter.messages, file, indent=4) + # TODO: is triggering seemingly randomly + #logger.info("New user message recieved. Breaking.") + #break - logger.info("New computer message recieved. Breaking.") - break - else: + # Also check if there's any new computer messages + if not from_computer.empty(): + with open(conversation_history_path, 'w') as file: json.dump(interpreter.messages, file, indent=4) - force_task_completion_responses = [ - "the task is done.", - "the task is impossible.", - "let me know what you'd like to do next.", - "please provide more information.", - ] - - # Did the LLM respond with one of the key messages? - if ( - interpreter.messages - and any( - task_status in interpreter.messages[-1].get("content", "").lower() - for task_status in force_task_completion_responses - ) - ): - insert_force_task_completion_message = False - break - - if insert_force_task_completion_message: - interpreter.messages += [ - { - "role": "user", - "type": "message", - "content": force_task_completion_message, - } - ] - else: - break + logger.info("New computer message recieved. Breaking.") + break + async def stream_tts_to_device(sentence): force_task_completion_responses = [ diff --git a/01OS/01OS/server/services/stt/local-whisper/stt.py b/01OS/01OS/server/services/stt/local-whisper/stt.py index 9514c1d..b318e8e 100644 --- a/01OS/01OS/server/services/stt/local-whisper/stt.py +++ b/01OS/01OS/server/services/stt/local-whisper/stt.py @@ -95,9 +95,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: f='s16le', ar='16000', ac=1, - ).output(output_path).run() + ).output(output_path, loglevel='panic').run() else: - ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() + ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k', loglevel='panic').run() try: yield output_path diff --git a/01OS/01OS/server/services/stt/openai/stt.py b/01OS/01OS/server/services/stt/openai/stt.py index 4823965..e6ecef5 100644 --- a/01OS/01OS/server/services/stt/openai/stt.py +++ b/01OS/01OS/server/services/stt/openai/stt.py @@ -44,16 +44,15 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: # Export to wav output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") - print(mime_type, input_path, output_path) if mime_type == "audio/raw": ffmpeg.input( input_path, f='s16le', ar='16000', ac=1, - ).output(output_path).run() + ).output(output_path, loglevel='panic').run() else: - ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() + ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k', loglevel='panic').run() try: yield output_path diff --git a/01OS/01OS/server/services/tts/openai/tts.py b/01OS/01OS/server/services/tts/openai/tts.py index 298b52d..23eb65b 100644 --- a/01OS/01OS/server/services/tts/openai/tts.py +++ b/01OS/01OS/server/services/tts/openai/tts.py @@ -23,7 +23,7 @@ class Tts: # TODO: hack to format audio correctly for device outfile = tempfile.gettempdir() + "/" + "raw.dat" - ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run() + ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1", loglevel='panic').run() return outfile diff --git a/01OS/01OS/server/services/tts/piper/tts.py b/01OS/01OS/server/services/tts/piper/tts.py index 53bf0dc..1b6ea57 100644 --- a/01OS/01OS/server/services/tts/piper/tts.py +++ b/01OS/01OS/server/services/tts/piper/tts.py @@ -24,7 +24,7 @@ class Tts: # TODO: hack to format audio correctly for device outfile = tempfile.gettempdir() + "/" + "raw.dat" - ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run() + ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1", loglevel='panic').run() return outfile diff --git a/01OS/01OS/server/utils/bytes_to_wav.py b/01OS/01OS/server/utils/bytes_to_wav.py index 3d84814..d40ae15 100644 --- a/01OS/01OS/server/utils/bytes_to_wav.py +++ b/01OS/01OS/server/utils/bytes_to_wav.py @@ -37,9 +37,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: f='s16le', ar='16000', ac=1, - ).output(output_path).run() + ).output(output_path, loglevel='panic').run() else: - ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() + ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k', loglevel='panic').run() try: yield output_path diff --git a/01OS/pyproject.toml b/01OS/pyproject.toml index 2ba825b..5d94e31 100644 --- a/01OS/pyproject.toml +++ b/01OS/pyproject.toml @@ -24,7 +24,7 @@ pydub = "^0.25.1" ngrok = "^1.0.0" simpleaudio = "^1.0.4" opencv-python = "^4.9.0.80" -open-interpreter = {version = "0.2.1rc2", extras = ["os"]} +open-interpreter = { git = "https://github.com/KillianLucas/open-interpreter.git", branch = "development" } psutil = "^5.9.8" typer = "^0.9.0" platformdirs = "^4.2.0"