commit
59c3e8723e
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,4 @@
|
||||
Version 1.3
|
||||
released 24_06_04
|
||||
|
||||
Added cable management clips
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
||||
# iOS/iPadOS Native Client
|
||||
|
||||
This repository contains the source code for the 01 iOS/iPadOS Native app. It is a work in progress and currently has a dedicated development button.
|
||||
|
||||
Feel free to improve this and make a pull request!
|
||||
|
||||
To run it on your own, you can either install the app directly through the current TestFlight [here](https://testflight.apple.com/join/v8SyuzMT), or build from the source code files in Xcode on your Mac.
|
||||
|
||||
## Instructions
|
||||
|
||||
Follow the **[software setup steps](https://github.com/OpenInterpreter/01?tab=readme-ov-file#software)** in the main repo's README first before you read this
|
||||
|
||||
In Xcode, open the 'zerooone-app' project file in the project folder, change the Signing Team and Bundle Identifier, and build.
|
||||
|
||||
## Using the App
|
||||
|
||||
To use the app there are four features:
|
||||
|
||||
### 1. The speak "Button"
|
||||
Made to emulate the button on the hardware models of 01, the big, yellow circle in the middle of the screen is what you hold when you want to speak to the model, and let go when you're finished speaking.
|
||||
|
||||
### 2. The settings button
|
||||
Tapping the settings button will allow you to input your websocket address so that the app can properly connect to your computer. If you're not sure how to obtain this, read the **'How to Install'** section below!
|
||||
|
||||
### 3. The reconnect button
|
||||
The arrow will be RED when the websocket connection is not live, and GREEN when it is. If you're making some changes you can easily reconnect by simply tapping the arrow button (or you can just start holding the speak button, too!).
|
||||
|
||||
### 4. The terminal button
|
||||
The terminal button allows you to see all response text coming in from the server side of the 01. You can toggle it by tapping on the button, and each toggle clears the on-device cache of text.
|
@ -0,0 +1,415 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 56;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
750E4C0B2BEDD11C00AEE3B1 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 750E4C0A2BEDD11C00AEE3B1 /* AppDelegate.swift */; };
|
||||
750E4C0D2BEDD11C00AEE3B1 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 750E4C0C2BEDD11C00AEE3B1 /* SceneDelegate.swift */; };
|
||||
750E4C0F2BEDD11C00AEE3B1 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 750E4C0E2BEDD11C00AEE3B1 /* ViewController.swift */; };
|
||||
750E4C122BEDD11C00AEE3B1 /* Base in Resources */ = {isa = PBXBuildFile; fileRef = 750E4C112BEDD11C00AEE3B1 /* Base */; };
|
||||
750E4C142BEDD11D00AEE3B1 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 750E4C132BEDD11D00AEE3B1 /* Assets.xcassets */; };
|
||||
750E4C172BEDD11D00AEE3B1 /* Base in Resources */ = {isa = PBXBuildFile; fileRef = 750E4C162BEDD11D00AEE3B1 /* Base */; };
|
||||
750E4C202BEDD16E00AEE3B1 /* Starscream in Frameworks */ = {isa = PBXBuildFile; productRef = 750E4C1F2BEDD16E00AEE3B1 /* Starscream */; };
|
||||
755DC3B22BEE60A7002B66DF /* AudioRecording.swift in Sources */ = {isa = PBXBuildFile; fileRef = 755DC3B12BEE60A7002B66DF /* AudioRecording.swift */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
750E4C072BEDD11C00AEE3B1 /* zeroone-app.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "zeroone-app.app"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
750E4C0A2BEDD11C00AEE3B1 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
||||
750E4C0C2BEDD11C00AEE3B1 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
||||
750E4C0E2BEDD11C00AEE3B1 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
|
||||
750E4C112BEDD11C00AEE3B1 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
|
||||
750E4C132BEDD11D00AEE3B1 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||
750E4C162BEDD11D00AEE3B1 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
|
||||
750E4C182BEDD11D00AEE3B1 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
755DC3B12BEE60A7002B66DF /* AudioRecording.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioRecording.swift; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
750E4C042BEDD11C00AEE3B1 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
750E4C202BEDD16E00AEE3B1 /* Starscream in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
750E4BFE2BEDD11C00AEE3B1 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
750E4C092BEDD11C00AEE3B1 /* zeroone-app */,
|
||||
750E4C082BEDD11C00AEE3B1 /* Products */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
750E4C082BEDD11C00AEE3B1 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
750E4C072BEDD11C00AEE3B1 /* zeroone-app.app */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
750E4C092BEDD11C00AEE3B1 /* zeroone-app */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
750E4C0A2BEDD11C00AEE3B1 /* AppDelegate.swift */,
|
||||
750E4C0C2BEDD11C00AEE3B1 /* SceneDelegate.swift */,
|
||||
750E4C0E2BEDD11C00AEE3B1 /* ViewController.swift */,
|
||||
750E4C102BEDD11C00AEE3B1 /* Main.storyboard */,
|
||||
750E4C132BEDD11D00AEE3B1 /* Assets.xcassets */,
|
||||
750E4C152BEDD11D00AEE3B1 /* LaunchScreen.storyboard */,
|
||||
750E4C182BEDD11D00AEE3B1 /* Info.plist */,
|
||||
755DC3B12BEE60A7002B66DF /* AudioRecording.swift */,
|
||||
);
|
||||
path = "zeroone-app";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
750E4C062BEDD11C00AEE3B1 /* zeroone-app */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 750E4C1B2BEDD11D00AEE3B1 /* Build configuration list for PBXNativeTarget "zeroone-app" */;
|
||||
buildPhases = (
|
||||
750E4C032BEDD11C00AEE3B1 /* Sources */,
|
||||
750E4C042BEDD11C00AEE3B1 /* Frameworks */,
|
||||
750E4C052BEDD11C00AEE3B1 /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = "zeroone-app";
|
||||
packageProductDependencies = (
|
||||
750E4C1F2BEDD16E00AEE3B1 /* Starscream */,
|
||||
);
|
||||
productName = "zeroone-app";
|
||||
productReference = 750E4C072BEDD11C00AEE3B1 /* zeroone-app.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
750E4BFF2BEDD11C00AEE3B1 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
BuildIndependentTargetsInParallel = 1;
|
||||
LastSwiftUpdateCheck = 1530;
|
||||
LastUpgradeCheck = 1530;
|
||||
TargetAttributes = {
|
||||
750E4C062BEDD11C00AEE3B1 = {
|
||||
CreatedOnToolsVersion = 15.3;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 750E4C022BEDD11C00AEE3B1 /* Build configuration list for PBXProject "zeroone-app" */;
|
||||
compatibilityVersion = "Xcode 14.0";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
Base,
|
||||
);
|
||||
mainGroup = 750E4BFE2BEDD11C00AEE3B1;
|
||||
packageReferences = (
|
||||
750E4C1E2BEDD16D00AEE3B1 /* XCRemoteSwiftPackageReference "Starscream" */,
|
||||
);
|
||||
productRefGroup = 750E4C082BEDD11C00AEE3B1 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
750E4C062BEDD11C00AEE3B1 /* zeroone-app */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXResourcesBuildPhase section */
|
||||
750E4C052BEDD11C00AEE3B1 /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
750E4C142BEDD11D00AEE3B1 /* Assets.xcassets in Resources */,
|
||||
750E4C172BEDD11D00AEE3B1 /* Base in Resources */,
|
||||
750E4C122BEDD11C00AEE3B1 /* Base in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXResourcesBuildPhase section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
750E4C032BEDD11C00AEE3B1 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
750E4C0F2BEDD11C00AEE3B1 /* ViewController.swift in Sources */,
|
||||
750E4C0B2BEDD11C00AEE3B1 /* AppDelegate.swift in Sources */,
|
||||
755DC3B22BEE60A7002B66DF /* AudioRecording.swift in Sources */,
|
||||
750E4C0D2BEDD11C00AEE3B1 /* SceneDelegate.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin PBXVariantGroup section */
|
||||
750E4C102BEDD11C00AEE3B1 /* Main.storyboard */ = {
|
||||
isa = PBXVariantGroup;
|
||||
children = (
|
||||
750E4C112BEDD11C00AEE3B1 /* Base */,
|
||||
);
|
||||
name = Main.storyboard;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
750E4C152BEDD11D00AEE3B1 /* LaunchScreen.storyboard */ = {
|
||||
isa = PBXVariantGroup;
|
||||
children = (
|
||||
750E4C162BEDD11D00AEE3B1 /* Base */,
|
||||
);
|
||||
name = LaunchScreen.storyboard;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXVariantGroup section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
750E4C192BEDD11D00AEE3B1 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 15.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||
MTL_FAST_MATH = YES;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
750E4C1A2BEDD11D00AEE3B1 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 15.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = NO;
|
||||
MTL_FAST_MATH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_COMPILATION_MODE = wholemodule;
|
||||
VALIDATE_PRODUCT = YES;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
750E4C1C2BEDD11D00AEE3B1 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 4;
|
||||
DEVELOPMENT_TEAM = W5NGQJV8X2;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_FILE = "zeroone-app/Info.plist";
|
||||
INFOPLIST_KEY_CFBundleDisplayName = 01ForiOS;
|
||||
INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
|
||||
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Audio data from microphone is needed to send commands.";
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
||||
INFOPLIST_KEY_UIMainStoryboardFile = Main;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 15;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.ontheroofstudios.zeroone-app";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
|
||||
SUPPORTS_MACCATALYST = NO;
|
||||
SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = NO;
|
||||
SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = NO;
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
750E4C1D2BEDD11D00AEE3B1 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 4;
|
||||
DEVELOPMENT_TEAM = W5NGQJV8X2;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_FILE = "zeroone-app/Info.plist";
|
||||
INFOPLIST_KEY_CFBundleDisplayName = 01ForiOS;
|
||||
INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
|
||||
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Audio data from microphone is needed to send commands.";
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
|
||||
INFOPLIST_KEY_UIMainStoryboardFile = Main;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 15;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.ontheroofstudios.zeroone-app";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
|
||||
SUPPORTS_MACCATALYST = NO;
|
||||
SUPPORTS_MAC_DESIGNED_FOR_IPHONE_IPAD = NO;
|
||||
SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = NO;
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
750E4C022BEDD11C00AEE3B1 /* Build configuration list for PBXProject "zeroone-app" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
750E4C192BEDD11D00AEE3B1 /* Debug */,
|
||||
750E4C1A2BEDD11D00AEE3B1 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
750E4C1B2BEDD11D00AEE3B1 /* Build configuration list for PBXNativeTarget "zeroone-app" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
750E4C1C2BEDD11D00AEE3B1 /* Debug */,
|
||||
750E4C1D2BEDD11D00AEE3B1 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
750E4C1E2BEDD16D00AEE3B1 /* XCRemoteSwiftPackageReference "Starscream" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/daltoniam/Starscream";
|
||||
requirement = {
|
||||
kind = upToNextMajorVersion;
|
||||
minimumVersion = 4.0.8;
|
||||
};
|
||||
};
|
||||
/* End XCRemoteSwiftPackageReference section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
750E4C1F2BEDD16E00AEE3B1 /* Starscream */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = 750E4C1E2BEDD16D00AEE3B1 /* XCRemoteSwiftPackageReference "Starscream" */;
|
||||
productName = Starscream;
|
||||
};
|
||||
/* End XCSwiftPackageProductDependency section */
|
||||
};
|
||||
rootObject = 750E4BFF2BEDD11C00AEE3B1 /* Project object */;
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:">
|
||||
</FileRef>
|
||||
</Workspace>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>IDEDidComputeMac32BitWarning</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict/>
|
||||
</plist>
|
Binary file not shown.
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>BuildLocationStyle</key>
|
||||
<string>UseAppPreferences</string>
|
||||
<key>CustomBuildLocationType</key>
|
||||
<string>RelativeToDerivedData</string>
|
||||
<key>DerivedDataLocationStyle</key>
|
||||
<string>Default</string>
|
||||
<key>ShowSharedSchemesAutomaticallyEnabled</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,36 @@
|
||||
//
|
||||
// AppDelegate.swift
|
||||
// zeroone-app
|
||||
//
|
||||
// Created by Elad Dekel on 2024-05-09.
|
||||
//
|
||||
|
||||
import UIKit
|
||||
|
||||
@main
|
||||
class AppDelegate: UIResponder, UIApplicationDelegate {
|
||||
|
||||
|
||||
|
||||
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
|
||||
// Override point for customization after application launch.
|
||||
return true
|
||||
}
|
||||
|
||||
// MARK: UISceneSession Lifecycle
|
||||
|
||||
func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration {
|
||||
// Called when a new scene session is being created.
|
||||
// Use this method to select a configuration to create the new scene with.
|
||||
return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role)
|
||||
}
|
||||
|
||||
func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set<UISceneSession>) {
|
||||
// Called when the user discards a scene session.
|
||||
// If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
|
||||
// Use this method to release any resources that were specific to the discarded scenes, as they will not return.
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,11 @@
|
||||
{
|
||||
"colors" : [
|
||||
{
|
||||
"idiom" : "universal"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
{
|
||||
"images" : [
|
||||
{
|
||||
"filename" : "O.png",
|
||||
"idiom" : "universal",
|
||||
"platform" : "ios",
|
||||
"size" : "1024x1024"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
After Width: | Height: | Size: 38 KiB |
@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
{
|
||||
"images" : [
|
||||
{
|
||||
"filename" : "vector.svg",
|
||||
"idiom" : "universal"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
After Width: | Height: | Size: 460 B |
@ -0,0 +1,67 @@
|
||||
//
|
||||
// AudioRecording.swift
|
||||
// zeroone-app
|
||||
//
|
||||
// Created by Elad Dekel on 2024-05-10.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
class AudioRecording: NSObject, AVAudioRecorderDelegate {
|
||||
|
||||
var recorder: AVAudioRecorder!
|
||||
var session: AVAudioSession!
|
||||
|
||||
var isRecording = false
|
||||
|
||||
func startRecording() {
|
||||
session = AVAudioSession()
|
||||
let audio = getDocumentsDirectory().appendingPathComponent("tempvoice.wav") // indicates where the audio data will be recording to
|
||||
let s: [String: Any] = [
|
||||
AVFormatIDKey: kAudioFormatLinearPCM,
|
||||
AVSampleRateKey: 16000.0,
|
||||
AVNumberOfChannelsKey: 1,
|
||||
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
|
||||
]
|
||||
do {
|
||||
recorder = try AVAudioRecorder(url: audio, settings: s)
|
||||
try session.setActive(true)
|
||||
try session.setCategory(.playAndRecord, mode: .default)
|
||||
recorder!.delegate = self
|
||||
recorder!.record()
|
||||
isRecording = true
|
||||
|
||||
} catch {
|
||||
print("Error recording")
|
||||
print(error.localizedDescription)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func getDocumentsDirectory() -> URL { // big thanks to twostraws for this helper function (hackingwithswift.com)
|
||||
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
|
||||
return paths[0]
|
||||
}
|
||||
|
||||
|
||||
func stopRecording() -> Data? {
|
||||
if isRecording && recorder != nil {
|
||||
recorder!.stop()
|
||||
let audio = getDocumentsDirectory().appendingPathComponent("tempvoice.wav")
|
||||
recorder = nil
|
||||
do {
|
||||
let data = try Data(contentsOf: audio) // sends raw audio data
|
||||
try FileManager.default.removeItem(at: audio) // deletes the file
|
||||
return data
|
||||
} catch {
|
||||
print(error.localizedDescription)
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
print("not recording")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
|
||||
<dependencies>
|
||||
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
|
||||
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
|
||||
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
|
||||
</dependencies>
|
||||
<scenes>
|
||||
<!--View Controller-->
|
||||
<scene sceneID="EHf-IW-A2E">
|
||||
<objects>
|
||||
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
|
||||
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
|
||||
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
|
||||
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
|
||||
<color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
|
||||
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
|
||||
</view>
|
||||
</viewController>
|
||||
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
|
||||
</objects>
|
||||
<point key="canvasLocation" x="53" y="375"/>
|
||||
</scene>
|
||||
</scenes>
|
||||
</document>
|
@ -0,0 +1,127 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="32700.99.1234" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
|
||||
<device id="retina6_12" orientation="portrait" appearance="light"/>
|
||||
<dependencies>
|
||||
<deployment identifier="iOS"/>
|
||||
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="22685"/>
|
||||
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
|
||||
<capability name="System colors in document resources" minToolsVersion="11.0"/>
|
||||
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
|
||||
</dependencies>
|
||||
<scenes>
|
||||
<!--View Controller-->
|
||||
<scene sceneID="tne-QT-ifu">
|
||||
<objects>
|
||||
<viewController id="BYZ-38-t0r" customClass="ViewController" customModule="zeroone_app" customModuleProvider="target" sceneMemberID="viewController">
|
||||
<view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
|
||||
<rect key="frame" x="0.0" y="0.0" width="393" height="852"/>
|
||||
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
|
||||
<subviews>
|
||||
<imageView clipsSubviews="YES" userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="circle.fill" catalog="system" translatesAutoresizingMaskIntoConstraints="NO" id="8GG-Ei-Zce">
|
||||
<rect key="frame" x="79" y="304.66666666666669" width="245" height="243.66666666666657"/>
|
||||
<color key="tintColor" systemColor="systemYellowColor"/>
|
||||
<constraints>
|
||||
<constraint firstAttribute="height" constant="245" id="ZNG-mL-QWz"/>
|
||||
<constraint firstAttribute="width" constant="245" id="kvy-0q-8ID"/>
|
||||
</constraints>
|
||||
</imageView>
|
||||
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="" textAlignment="center" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" minimumFontSize="9" translatesAutoresizingMaskIntoConstraints="NO" id="gJn-v2-d0Z">
|
||||
<rect key="frame" x="49" y="131" width="303.66666666666669" height="92.666666666666686"/>
|
||||
<fontDescription key="fontDescription" type="system" pointSize="24"/>
|
||||
<nil key="textColor"/>
|
||||
<nil key="highlightedColor"/>
|
||||
</label>
|
||||
<stackView opaque="NO" contentMode="scaleToFill" spacing="39" translatesAutoresizingMaskIntoConstraints="NO" id="Jep-bq-nBz">
|
||||
<rect key="frame" x="95.999999999999986" y="780.33333333333337" width="209.66666666666663" height="37.666666666666629"/>
|
||||
<subviews>
|
||||
<imageView clipsSubviews="YES" userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" horizontalCompressionResistancePriority="751" image="gear" catalog="system" translatesAutoresizingMaskIntoConstraints="NO" id="4J4-Vq-uXz">
|
||||
<rect key="frame" x="0.0" y="-5" width="47.666666666666664" height="47.333333333333329"/>
|
||||
<color key="tintColor" systemColor="labelColor"/>
|
||||
<constraints>
|
||||
<constraint firstAttribute="width" constant="47.666666666666664" id="RUe-bq-jRC"/>
|
||||
<constraint firstAttribute="width" relation="greaterThanOrEqual" constant="42" id="yem-fm-8uw"/>
|
||||
</constraints>
|
||||
<preferredSymbolConfiguration key="preferredSymbolConfiguration" configurationType="pointSize" pointSize="25" scale="large"/>
|
||||
</imageView>
|
||||
<imageView clipsSubviews="YES" userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="arrow.clockwise" catalog="system" translatesAutoresizingMaskIntoConstraints="NO" id="sTV-gO-axp">
|
||||
<rect key="frame" x="86.666666666666657" y="-1.3333333333333321" width="42" height="37.333333333333329"/>
|
||||
<color key="tintColor" systemColor="labelColor"/>
|
||||
<constraints>
|
||||
<constraint firstAttribute="width" constant="42" id="gYc-1f-wW5"/>
|
||||
</constraints>
|
||||
<preferredSymbolConfiguration key="preferredSymbolConfiguration" configurationType="pointSize" pointSize="30"/>
|
||||
</imageView>
|
||||
<imageView clipsSubviews="YES" userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="terminal" catalog="system" translatesAutoresizingMaskIntoConstraints="NO" id="Czp-9u-tDH">
|
||||
<rect key="frame" x="167.66666666666669" y="2.6666666666666679" width="42" height="32.666666666666657"/>
|
||||
<color key="tintColor" systemColor="labelColor"/>
|
||||
<constraints>
|
||||
<constraint firstAttribute="width" constant="42" id="ZZC-ol-tbv"/>
|
||||
</constraints>
|
||||
<preferredSymbolConfiguration key="preferredSymbolConfiguration" configurationType="pointSize" pointSize="30"/>
|
||||
</imageView>
|
||||
</subviews>
|
||||
<constraints>
|
||||
<constraint firstAttribute="width" secondItem="Jep-bq-nBz" secondAttribute="height" multiplier="50:9" id="3oj-ZY-vQc"/>
|
||||
</constraints>
|
||||
</stackView>
|
||||
<textView clipsSubviews="YES" multipleTouchEnabled="YES" alpha="0.0" contentMode="scaleToFill" keyboardDismissMode="interactive" editable="NO" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="Vqf-Pz-bQv">
|
||||
<rect key="frame" x="40" y="95" width="322.66666666666669" height="394"/>
|
||||
<color key="backgroundColor" systemColor="systemGray6Color"/>
|
||||
<constraints>
|
||||
<constraint firstAttribute="width" secondItem="Vqf-Pz-bQv" secondAttribute="height" multiplier="307:375" id="i7c-pN-3Yk"/>
|
||||
</constraints>
|
||||
<color key="textColor" systemColor="labelColor"/>
|
||||
<fontDescription key="fontDescription" type="system" pointSize="14"/>
|
||||
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
|
||||
</textView>
|
||||
</subviews>
|
||||
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
|
||||
<color key="backgroundColor" systemColor="systemBackgroundColor"/>
|
||||
<constraints>
|
||||
<constraint firstItem="8GG-Ei-Zce" firstAttribute="centerY" secondItem="8bC-Xf-vdC" secondAttribute="centerY" id="2aD-gd-oFQ"/>
|
||||
<constraint firstItem="Jep-bq-nBz" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="4rh-NU-Qkg"/>
|
||||
<constraint firstItem="Vqf-Pz-bQv" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="40" id="OLg-bI-RLB"/>
|
||||
<constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="Jep-bq-nBz" secondAttribute="trailing" constant="97" id="Wc2-3u-fpo"/>
|
||||
<constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="Vqf-Pz-bQv" secondAttribute="trailing" constant="40" id="cxX-KY-p3B"/>
|
||||
<constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="gJn-v2-d0Z" secondAttribute="trailing" constant="50" id="iNY-a6-Ww4"/>
|
||||
<constraint firstItem="gJn-v2-d0Z" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="49" id="owN-up-6fV"/>
|
||||
<constraint firstItem="8GG-Ei-Zce" firstAttribute="top" secondItem="gJn-v2-d0Z" secondAttribute="bottom" constant="80" id="pKl-kg-6Qv"/>
|
||||
<constraint firstItem="8GG-Ei-Zce" firstAttribute="centerX" secondItem="8bC-Xf-vdC" secondAttribute="centerX" id="qrj-Tv-TZB"/>
|
||||
<constraint firstItem="Vqf-Pz-bQv" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" constant="36" id="sST-jo-Jg6"/>
|
||||
<constraint firstItem="gJn-v2-d0Z" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" constant="72" id="wFg-9E-IMx"/>
|
||||
<constraint firstItem="Jep-bq-nBz" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="96" id="zZy-C5-TdW"/>
|
||||
</constraints>
|
||||
</view>
|
||||
<connections>
|
||||
<outlet property="circle" destination="8GG-Ei-Zce" id="Q6Q-TY-A1z"/>
|
||||
<outlet property="infoText" destination="gJn-v2-d0Z" id="pR2-Ps-nKF"/>
|
||||
<outlet property="reconnectIcon" destination="sTV-gO-axp" id="iuS-Zj-2cd"/>
|
||||
<outlet property="settingsGear" destination="4J4-Vq-uXz" id="vgy-Jz-tEd"/>
|
||||
<outlet property="terminalButton" destination="Czp-9u-tDH" id="nhQ-4o-UHd"/>
|
||||
<outlet property="terminalFeed" destination="Vqf-Pz-bQv" id="h3N-1T-wNf"/>
|
||||
</connections>
|
||||
</viewController>
|
||||
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
|
||||
</objects>
|
||||
<point key="canvasLocation" x="46.564885496183201" y="3.5211267605633805"/>
|
||||
</scene>
|
||||
</scenes>
|
||||
<resources>
|
||||
<image name="arrow.clockwise" catalog="system" width="113" height="128"/>
|
||||
<image name="circle.fill" catalog="system" width="128" height="123"/>
|
||||
<image name="gear" catalog="system" width="128" height="122"/>
|
||||
<image name="terminal" catalog="system" width="128" height="93"/>
|
||||
<systemColor name="labelColor">
|
||||
<color white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
|
||||
</systemColor>
|
||||
<systemColor name="systemBackgroundColor">
|
||||
<color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
|
||||
</systemColor>
|
||||
<systemColor name="systemGray6Color">
|
||||
<color red="0.94901960780000005" green="0.94901960780000005" blue="0.96862745100000003" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
|
||||
</systemColor>
|
||||
<systemColor name="systemYellowColor">
|
||||
<color red="1" green="0.80000000000000004" blue="0.0" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
|
||||
</systemColor>
|
||||
</resources>
|
||||
</document>
|
@ -0,0 +1,27 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>NSSpeechRecognitionUsageDescription</key>
|
||||
<string>Your audio is used the convert your voice requests into text requests.</string>
|
||||
<key>UIApplicationSceneManifest</key>
|
||||
<dict>
|
||||
<key>UIApplicationSupportsMultipleScenes</key>
|
||||
<false/>
|
||||
<key>UISceneConfigurations</key>
|
||||
<dict>
|
||||
<key>UIWindowSceneSessionRoleApplication</key>
|
||||
<array>
|
||||
<dict>
|
||||
<key>UISceneConfigurationName</key>
|
||||
<string>Default Configuration</string>
|
||||
<key>UISceneDelegateClassName</key>
|
||||
<string>$(PRODUCT_MODULE_NAME).SceneDelegate</string>
|
||||
<key>UISceneStoryboardFile</key>
|
||||
<string>Main</string>
|
||||
</dict>
|
||||
</array>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,52 @@
|
||||
//
|
||||
// SceneDelegate.swift
|
||||
// zeroone-app
|
||||
//
|
||||
// Created by Elad Dekel on 2024-05-09.
|
||||
//
|
||||
|
||||
import UIKit
|
||||
|
||||
class SceneDelegate: UIResponder, UIWindowSceneDelegate {
|
||||
|
||||
var window: UIWindow?
|
||||
|
||||
|
||||
func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) {
|
||||
// Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
|
||||
// If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
|
||||
// This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
|
||||
guard let _ = (scene as? UIWindowScene) else { return }
|
||||
}
|
||||
|
||||
func sceneDidDisconnect(_ scene: UIScene) {
|
||||
// Called as the scene is being released by the system.
|
||||
// This occurs shortly after the scene enters the background, or when its session is discarded.
|
||||
// Release any resources associated with this scene that can be re-created the next time the scene connects.
|
||||
// The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead).
|
||||
}
|
||||
|
||||
func sceneDidBecomeActive(_ scene: UIScene) {
|
||||
// Called when the scene has moved from an inactive state to an active state.
|
||||
// Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
|
||||
}
|
||||
|
||||
func sceneWillResignActive(_ scene: UIScene) {
|
||||
// Called when the scene will move from an active state to an inactive state.
|
||||
// This may occur due to temporary interruptions (ex. an incoming phone call).
|
||||
}
|
||||
|
||||
func sceneWillEnterForeground(_ scene: UIScene) {
|
||||
// Called as the scene transitions from the background to the foreground.
|
||||
// Use this method to undo the changes made on entering the background.
|
||||
}
|
||||
|
||||
func sceneDidEnterBackground(_ scene: UIScene) {
|
||||
// Called as the scene transitions from the foreground to the background.
|
||||
// Use this method to save data, release shared resources, and store enough scene-specific state information
|
||||
// to restore the scene back to its current state.
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,366 @@
|
||||
//
|
||||
// ViewController.swift
|
||||
// zeroone-app
|
||||
//
|
||||
// Created by Elad Dekel on 2024-05-09.
|
||||
//
|
||||
|
||||
import UIKit
|
||||
import Starscream
|
||||
import AVFoundation
|
||||
|
||||
|
||||
class ViewController: UIViewController, WebSocketDelegate {
|
||||
|
||||
@IBOutlet weak var terminalFeed: UITextView!
|
||||
@IBOutlet weak var terminalButton: UIImageView!
|
||||
@IBOutlet weak var reconnectIcon: UIImageView!
|
||||
@IBOutlet weak var circle: UIImageView!
|
||||
@IBOutlet weak var settingsGear: UIImageView!
|
||||
@IBOutlet weak var infoText: UILabel!
|
||||
|
||||
var audioRecordingInstance: AudioRecording?
|
||||
private var audioData = Data()
|
||||
private var audioPlayer: AVAudioPlayer?
|
||||
var address: String?
|
||||
var isConnected = false
|
||||
var recordingPermission = false
|
||||
var terminal = false
|
||||
var socket: WebSocket?
|
||||
|
||||
|
||||
override func viewDidLoad() {
|
||||
super.viewDidLoad()
|
||||
terminalFeed.layer.cornerRadius = 15
|
||||
infoText.text = "Hold to start once connected."
|
||||
// Create a gesture recognizer that tracks when the "button" is held
|
||||
let pressGesture = UILongPressGestureRecognizer(target: self, action: #selector(buttonPress(_:)))
|
||||
pressGesture.minimumPressDuration = 0.01
|
||||
circle.addGestureRecognizer(pressGesture)
|
||||
circle.isUserInteractionEnabled = true
|
||||
circle.translatesAutoresizingMaskIntoConstraints = false
|
||||
|
||||
// Create a geature recognizer for the settings button
|
||||
let tapGesture = UITapGestureRecognizer(target: self, action: #selector(settingsGear(_:)))
|
||||
settingsGear.addGestureRecognizer(tapGesture)
|
||||
settingsGear.isUserInteractionEnabled = true
|
||||
|
||||
|
||||
let reconnectGesture = UITapGestureRecognizer(target: self, action: #selector(recconectIcon(_:)))
|
||||
reconnectIcon.addGestureRecognizer(reconnectGesture)
|
||||
reconnectIcon.isUserInteractionEnabled = true
|
||||
|
||||
|
||||
let terminal = UITapGestureRecognizer(target: self, action: #selector(terminalIcon(_:)))
|
||||
terminalButton.addGestureRecognizer(terminal)
|
||||
terminalButton.isUserInteractionEnabled = true
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
func checkRecordingPerms() {
|
||||
let sess = AVAudioSession.sharedInstance()
|
||||
switch (sess.recordPermission) {
|
||||
case.denied, .undetermined:
|
||||
sess.requestRecordPermission { (granted) in
|
||||
if granted {
|
||||
self.recordingPermission = true
|
||||
} else {
|
||||
let alert = UIAlertController(title: "Recording Not Permitted", message: "You must allow audio recording in order to send commands. Close the app and re-open it to try again.", preferredStyle: .alert)
|
||||
let action = UIAlertAction(title: "Understood", style: .default)
|
||||
alert.addAction(action)
|
||||
self.present(alert, animated: true)
|
||||
}
|
||||
}
|
||||
case .granted:
|
||||
recordingPermission = true
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
override func viewDidAppear(_ animated: Bool) {
|
||||
if ((UserDefaults.standard.value(forKey: "IPINFO")) != nil) {
|
||||
print("here")
|
||||
address = UserDefaults.standard.string(forKey: "IPINFO")
|
||||
establishConnection()
|
||||
} else {
|
||||
print("there")
|
||||
setAddress()
|
||||
}
|
||||
checkRecordingPerms()
|
||||
}
|
||||
|
||||
func receieved(data: String) {
|
||||
infoText.text = data
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
func setAddress() {
|
||||
let alert = UIAlertController(title: "Set the Address", message: "Input the address of the WebSocket (found in the terminal running 01 software)", preferredStyle: .alert)
|
||||
alert.addTextField { (field) in
|
||||
field.placeholder = "Enter Address Here"
|
||||
}
|
||||
let cancelButton = UIAlertAction(title: "Cancel", style: .cancel)
|
||||
alert.addAction(cancelButton)
|
||||
let submitButton = UIAlertAction(title: "Done", style: .default) { (_) in
|
||||
if let field = alert.textFields?.first, let text = field.text {
|
||||
UserDefaults.standard.setValue(text, forKey: "IPINFO")
|
||||
self.address = text
|
||||
self.establishConnection()
|
||||
// HAVE THE TEXT FIELD
|
||||
}
|
||||
}
|
||||
alert.addAction(submitButton)
|
||||
|
||||
present(alert, animated: true)
|
||||
}
|
||||
|
||||
@objc func recconectIcon(_ sender: UIGestureRecognizer) {
|
||||
infoText.text = ""
|
||||
self.establishConnection()
|
||||
}
|
||||
|
||||
@objc func terminalIcon(_ sender: UIGestureRecognizer) {
|
||||
if (terminal) {
|
||||
UIView.animate(withDuration: 0.3) {
|
||||
self.terminalFeed.text = ""
|
||||
self.terminalFeed.alpha = 0
|
||||
let moveT = CGAffineTransform(translationX: 0, y: -190)
|
||||
self.appendTranslation(transform: moveT)
|
||||
self.terminalButton.image = UIImage(systemName: "apple.terminal")
|
||||
} completion: { done in
|
||||
self.terminal = false
|
||||
}
|
||||
} else {
|
||||
UIView.animate(withDuration: 0.3) {
|
||||
self.terminalFeed.alpha = 1
|
||||
let moveT = CGAffineTransform(translationX: 0, y: 190)
|
||||
self.appendTranslation(transform: moveT)
|
||||
self.terminalButton.image = UIImage(systemName: "apple.terminal.fill")
|
||||
} completion: { done in
|
||||
self.terminal = true
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@objc func settingsGear(_ sender: UIGestureRecognizer) {
|
||||
infoText.text = ""
|
||||
setAddress()
|
||||
}
|
||||
|
||||
func appendTranslation(transform: CGAffineTransform) {
|
||||
var currentTransform = self.circle.transform
|
||||
currentTransform = currentTransform.concatenating(transform)
|
||||
self.circle.transform = currentTransform
|
||||
}
|
||||
|
||||
@objc func buttonPress(_ sender: UILongPressGestureRecognizer) {
|
||||
infoText.text = ""
|
||||
let feedback = UIImpactFeedbackGenerator(style: .medium)
|
||||
if sender.state == .began {
|
||||
socket?.connect()
|
||||
// check for recording permission, if exists
|
||||
// it began, start recording!
|
||||
if (isConnected && recordingPermission) {
|
||||
audioRecordingInstance = AudioRecording()
|
||||
audioRecordingInstance!.startRecording()
|
||||
infoText.text = ""
|
||||
UIView.animate(withDuration: 0.1) {
|
||||
self.circle.tintColor = .green
|
||||
let newT = CGAffineTransform(scaleX: 0.7, y: 0.7)
|
||||
self.appendTranslation(transform: newT)
|
||||
feedback.prepare()
|
||||
feedback.impactOccurred()
|
||||
}
|
||||
} else {
|
||||
let errorFeedback = UIImpactFeedbackGenerator(style: .heavy)
|
||||
errorFeedback.prepare()
|
||||
errorFeedback.impactOccurred()
|
||||
if (isConnected && !recordingPermission) {
|
||||
infoText.text = "Not recording permission. Please close and re-open the app."
|
||||
} else {
|
||||
infoText.text = "Not connected."
|
||||
establishConnection()
|
||||
}
|
||||
UIView.animate(withDuration: 0.5) {
|
||||
self.circle.tintColor = .red
|
||||
} completion: { _ in
|
||||
self.circle.tintColor = .systemYellow
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
} else if sender.state == .ended {
|
||||
if (isConnected && recordingPermission) {
|
||||
if (audioRecordingInstance != nil) {
|
||||
let response = audioRecordingInstance!.stopRecording()
|
||||
if (response != nil) {
|
||||
sendAudio(audio: response!)
|
||||
}
|
||||
UIView.animate(withDuration: 0.1) {
|
||||
self.circle.tintColor = .systemYellow
|
||||
let newT = CGAffineTransform(scaleX: 1.4, y: 1.4)
|
||||
self.appendTranslation(transform: newT)
|
||||
feedback.prepare()
|
||||
feedback.impactOccurred()
|
||||
}
|
||||
}
|
||||
}
|
||||
// stop recording and send the audio
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
func establishConnection() { //connect to the web socket
|
||||
if (address != nil) {
|
||||
var request = URLRequest(url: URL(string: "http://\(address!)")!)
|
||||
request.timeoutInterval = 5
|
||||
socket = WebSocket(request: request)
|
||||
socket!.delegate = self
|
||||
socket!.connect()
|
||||
} else {
|
||||
setAddress()
|
||||
}
|
||||
}
|
||||
|
||||
func didReceive(event: Starscream.WebSocketEvent, client: any Starscream.WebSocketClient) { // deal with receiving data from websocket
|
||||
switch event {
|
||||
case .connected( _):
|
||||
isConnected = true
|
||||
reconnectIcon.tintColor = .green
|
||||
case .disconnected(let reason, let code):
|
||||
isConnected = false
|
||||
reconnectIcon.tintColor = .red
|
||||
case .text(let string):
|
||||
if (terminal) {
|
||||
terminalFeed.text = terminalFeed.text + "\n>> \(string)"
|
||||
let range = NSMakeRange(terminalFeed.text.count - 1, 0)
|
||||
terminalFeed.scrollRangeToVisible(range)
|
||||
}
|
||||
if (string.contains("audio") && string.contains("bytes.raw") && string.contains("start")) {
|
||||
infoText.text = "Receiving response..."
|
||||
// it started collecting data!
|
||||
print("Audio is being receieved.")
|
||||
} else if (string.contains("audio") && string.contains("bytes.raw") && string.contains("end")) {
|
||||
infoText.text = ""
|
||||
print("Audio is no longer being receieved.")
|
||||
let wavHeader = createWAVHeader(audioDataSize: Int32(audioData.count - 44))
|
||||
// Combine header and data
|
||||
var completeWAVData = Data()
|
||||
completeWAVData.append(wavHeader)
|
||||
completeWAVData.append(audioData.subdata(in: 44..<audioData.count))
|
||||
do {
|
||||
try audioPlayer = AVAudioPlayer(data: completeWAVData)
|
||||
audioPlayer?.prepareToPlay()
|
||||
audioPlayer?.play()
|
||||
} catch {
|
||||
print("Error")
|
||||
}
|
||||
|
||||
}
|
||||
print("Received text: \(string)")
|
||||
case .binary(let data):
|
||||
audioData.append(data)
|
||||
print("Received data: \(data.count)")
|
||||
|
||||
case .ping(_):
|
||||
break
|
||||
case .pong(_):
|
||||
break
|
||||
case .viabilityChanged(_):
|
||||
break
|
||||
case .reconnectSuggested(_):
|
||||
break
|
||||
case .cancelled:
|
||||
isConnected = false
|
||||
reconnectIcon.tintColor = .red
|
||||
case .error(_):
|
||||
isConnected = false
|
||||
reconnectIcon.tintColor = .red
|
||||
case .peerClosed:
|
||||
isConnected = false
|
||||
reconnectIcon.tintColor = .red
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
func createWAVHeader(audioDataSize: Int32) -> Data {
|
||||
let headerSize: Int32 = 44 // Standard WAV header size
|
||||
let chunkSize: Int32 = 36 + audioDataSize
|
||||
let sampleRate: Int32 = 16000 // From i2s_config
|
||||
let numChannels: Int16 = 1 // From i2s_config (mono)
|
||||
let bitsPerSample: Int16 = 16 // From i2s_config
|
||||
let byteRate: Int32 = sampleRate * Int32(numChannels) * Int32(bitsPerSample) / 8
|
||||
let blockAlign: Int16 = numChannels * bitsPerSample / 8
|
||||
|
||||
var headerData = Data()
|
||||
|
||||
// RIFF Chunk
|
||||
headerData.append(stringToData("RIFF")) // ChunkID
|
||||
headerData.append(int32ToData(chunkSize)) // ChunkSize
|
||||
headerData.append(stringToData("WAVE")) // Format
|
||||
|
||||
// fmt Subchunk
|
||||
headerData.append(stringToData("fmt ")) // Subchunk1ID
|
||||
headerData.append(int32ToData(16)) // Subchunk1Size (16 for PCM)
|
||||
headerData.append(int16ToData(1)) // AudioFormat (1 for PCM)
|
||||
headerData.append(int16ToData(numChannels)) // NumChannels
|
||||
headerData.append(int32ToData(sampleRate)) // SampleRate
|
||||
headerData.append(int32ToData(byteRate)) // ByteRate
|
||||
headerData.append(int16ToData(blockAlign)) // BlockAlign
|
||||
headerData.append(int16ToData(bitsPerSample)) // BitsPerSample
|
||||
|
||||
// data Subchunk
|
||||
headerData.append(stringToData("data")) // Subchunk2ID
|
||||
headerData.append(int32ToData(audioDataSize)) // Subchunk2Size
|
||||
|
||||
return headerData
|
||||
}
|
||||
|
||||
func stringToData(_ string: String) -> Data {
|
||||
return string.data(using: .utf8)!
|
||||
}
|
||||
|
||||
func int16ToData(_ value: Int16) -> Data {
|
||||
var value = value.littleEndian
|
||||
return Data(bytes: &value, count: MemoryLayout<Int16>.size)
|
||||
}
|
||||
|
||||
func int32ToData(_ value: Int32) -> Data {
|
||||
var value = value.littleEndian
|
||||
return Data(bytes: &value, count: MemoryLayout<Int32>.size)
|
||||
}
|
||||
|
||||
|
||||
func sendAudio(audio: Data) {
|
||||
if (isConnected) {
|
||||
socket!.write(string: "{\"role\": \"user\", \"type\": \"audio\", \"format\": \"bytes.raw\", \"start\": true}")
|
||||
socket!.write(data: audio)
|
||||
socket!.write(string: "{\"role\": \"user\", \"type\": \"audio\", \"format\": \"bytes.raw\", \"end\": true}")
|
||||
} else {
|
||||
print("Not connected!")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,251 @@
|
||||
# This is a websocket interpreter, TTS and STT disabled.
|
||||
# It makes a websocket on port 8000 that sends/recieves LMC messages in *streaming* format.
|
||||
|
||||
### You MUST send a start and end flag with each message! For example: ###
|
||||
|
||||
"""
|
||||
{"role": "user", "type": "message", "start": True})
|
||||
{"role": "user", "type": "message", "content": "hi"})
|
||||
{"role": "user", "type": "message", "end": True})
|
||||
"""
|
||||
|
||||
###
|
||||
from pynput import keyboard
|
||||
from .utils.bytes_to_wav import bytes_to_wav
|
||||
from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
|
||||
from RealtimeSTT import AudioToTextRecorder
|
||||
import time
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
class AsyncInterpreter:
|
||||
def __init__(self, interpreter, debug):
|
||||
self.stt_latency = None
|
||||
self.tts_latency = None
|
||||
self.interpreter_latency = None
|
||||
# time from first put to first yield
|
||||
self.tffytfp = None
|
||||
self.debug = debug
|
||||
|
||||
self.interpreter = interpreter
|
||||
self.audio_chunks = []
|
||||
|
||||
# STT
|
||||
self.stt = AudioToTextRecorder(
|
||||
model="tiny.en", spinner=False, use_microphone=False
|
||||
)
|
||||
|
||||
self.stt.stop() # It needs this for some reason
|
||||
|
||||
# TTS
|
||||
if self.interpreter.tts == "coqui":
|
||||
engine = CoquiEngine()
|
||||
elif self.interpreter.tts == "openai":
|
||||
engine = OpenAIEngine()
|
||||
elif self.interpreter.tts == "elevenlabs":
|
||||
engine = ElevenlabsEngine(api_key=os.environ["ELEVEN_LABS_API_KEY"])
|
||||
engine.set_voice("Michael")
|
||||
else:
|
||||
raise ValueError(f"Unsupported TTS engine: {self.interpreter.tts}")
|
||||
self.tts = TextToAudioStream(engine)
|
||||
|
||||
self.active_chat_messages = []
|
||||
|
||||
self._input_queue = asyncio.Queue() # Queue that .input will shove things into
|
||||
self._output_queue = asyncio.Queue() # Queue to put output chunks into
|
||||
self._last_lmc_start_flag = None # Unix time of last LMC start flag recieved
|
||||
self._in_keyboard_write_block = (
|
||||
False # Tracks whether interpreter is trying to use the keyboard
|
||||
)
|
||||
self.loop = asyncio.get_event_loop()
|
||||
|
||||
async def _add_to_queue(self, queue, item):
|
||||
await queue.put(item)
|
||||
|
||||
async def clear_queue(self, queue):
|
||||
while not queue.empty():
|
||||
await queue.get()
|
||||
|
||||
async def clear_input_queue(self):
|
||||
await self.clear_queue(self._input_queue)
|
||||
|
||||
async def clear_output_queue(self):
|
||||
await self.clear_queue(self._output_queue)
|
||||
|
||||
async def input(self, chunk):
|
||||
"""
|
||||
Expects a chunk in streaming LMC format.
|
||||
"""
|
||||
if isinstance(chunk, bytes):
|
||||
# It's probably a chunk of audio
|
||||
self.stt.feed_audio(chunk)
|
||||
self.audio_chunks.append(chunk)
|
||||
# print("INTERPRETER FEEDING AUDIO")
|
||||
|
||||
else:
|
||||
|
||||
try:
|
||||
chunk = json.loads(chunk)
|
||||
except:
|
||||
pass
|
||||
|
||||
if "start" in chunk:
|
||||
# print("Starting STT")
|
||||
self.stt.start()
|
||||
self._last_lmc_start_flag = time.time()
|
||||
# self.interpreter.computer.terminal.stop() # Stop any code execution... maybe we should make interpreter.stop()?
|
||||
elif "end" in chunk:
|
||||
# print("Running OI on input")
|
||||
asyncio.create_task(self.run())
|
||||
else:
|
||||
await self._add_to_queue(self._input_queue, chunk)
|
||||
|
||||
def add_to_output_queue_sync(self, chunk):
|
||||
"""
|
||||
Synchronous function to add a chunk to the output queue.
|
||||
"""
|
||||
# print("ADDING TO QUEUE:", chunk)
|
||||
asyncio.create_task(self._add_to_queue(self._output_queue, chunk))
|
||||
|
||||
def generate(self, message, start_interpreter):
|
||||
last_lmc_start_flag = self._last_lmc_start_flag
|
||||
self.interpreter.messages = self.active_chat_messages
|
||||
|
||||
# print("message is", message)
|
||||
|
||||
for chunk in self.interpreter.chat(message, display=True, stream=True):
|
||||
|
||||
if self._last_lmc_start_flag != last_lmc_start_flag:
|
||||
# self.beeper.stop()
|
||||
break
|
||||
|
||||
# self.add_to_output_queue_sync(chunk) # To send text, not just audio
|
||||
|
||||
content = chunk.get("content")
|
||||
|
||||
# Handle message blocks
|
||||
if chunk.get("type") == "message":
|
||||
if content:
|
||||
# self.beeper.stop()
|
||||
|
||||
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
|
||||
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
|
||||
# print("yielding ", content)
|
||||
if self.tffytfp is None:
|
||||
self.tffytfp = time.time()
|
||||
|
||||
yield content
|
||||
|
||||
# Handle code blocks
|
||||
elif chunk.get("type") == "code":
|
||||
if "start" in chunk:
|
||||
# self.beeper.start()
|
||||
pass
|
||||
|
||||
# Experimental: If the AI wants to type, we should type immediatly
|
||||
if (
|
||||
self.interpreter.messages[-1]
|
||||
.get("content", "")
|
||||
.startswith("computer.keyboard.write(")
|
||||
):
|
||||
keyboard.controller.type(content)
|
||||
self._in_keyboard_write_block = True
|
||||
if "end" in chunk and self._in_keyboard_write_block:
|
||||
self._in_keyboard_write_block = False
|
||||
# (This will make it so it doesn't type twice when the block executes)
|
||||
if self.interpreter.messages[-1]["content"].startswith(
|
||||
"computer.keyboard.write("
|
||||
):
|
||||
self.interpreter.messages[-1]["content"] = (
|
||||
"dummy_variable = ("
|
||||
+ self.interpreter.messages[-1]["content"][
|
||||
len("computer.keyboard.write(") :
|
||||
]
|
||||
)
|
||||
|
||||
# Send a completion signal
|
||||
if self.debug:
|
||||
end_interpreter = time.time()
|
||||
self.interpreter_latency = end_interpreter - start_interpreter
|
||||
print("INTERPRETER LATENCY", self.interpreter_latency)
|
||||
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
|
||||
|
||||
async def run(self):
|
||||
"""
|
||||
Runs OI on the audio bytes submitted to the input. Will add streaming LMC chunks to the _output_queue.
|
||||
"""
|
||||
self.interpreter.messages = self.active_chat_messages
|
||||
|
||||
self.stt.stop()
|
||||
|
||||
input_queue = []
|
||||
while not self._input_queue.empty():
|
||||
input_queue.append(self._input_queue.get())
|
||||
|
||||
if self.debug:
|
||||
start_stt = time.time()
|
||||
message = self.stt.text()
|
||||
end_stt = time.time()
|
||||
self.stt_latency = end_stt - start_stt
|
||||
print("STT LATENCY", self.stt_latency)
|
||||
|
||||
if self.audio_chunks:
|
||||
audio_bytes = bytearray(b"".join(self.audio_chunks))
|
||||
wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
|
||||
print("wav_file_path ", wav_file_path)
|
||||
self.audio_chunks = []
|
||||
else:
|
||||
message = self.stt.text()
|
||||
|
||||
print(message)
|
||||
|
||||
# Feed generate to RealtimeTTS
|
||||
self.add_to_output_queue_sync(
|
||||
{"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
|
||||
)
|
||||
start_interpreter = time.time()
|
||||
text_iterator = self.generate(message, start_interpreter)
|
||||
|
||||
self.tts.feed(text_iterator)
|
||||
if not self.tts.is_playing():
|
||||
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
|
||||
|
||||
while True:
|
||||
await asyncio.sleep(0.1)
|
||||
# print("is_playing", self.tts.is_playing())
|
||||
if not self.tts.is_playing():
|
||||
self.add_to_output_queue_sync(
|
||||
{
|
||||
"role": "assistant",
|
||||
"type": "audio",
|
||||
"format": "bytes.wav",
|
||||
"end": True,
|
||||
}
|
||||
)
|
||||
if self.debug:
|
||||
end_tts = time.time()
|
||||
self.tts_latency = end_tts - self.tts.stream_start_time
|
||||
print("TTS LATENCY", self.tts_latency)
|
||||
self.tts.stop()
|
||||
|
||||
break
|
||||
|
||||
async def _on_tts_chunk_async(self, chunk):
|
||||
# print("adding chunk to queue")
|
||||
if self.debug and self.tffytfp is not None and self.tffytfp != 0:
|
||||
print(
|
||||
"time from first yield to first put is ",
|
||||
time.time() - self.tffytfp,
|
||||
)
|
||||
self.tffytfp = 0
|
||||
await self._add_to_queue(self._output_queue, chunk)
|
||||
|
||||
def on_tts_chunk(self, chunk):
|
||||
# print("ye")
|
||||
asyncio.run(self._on_tts_chunk_async(chunk))
|
||||
|
||||
async def output(self):
|
||||
# print("outputting chunks")
|
||||
return await self._output_queue.get()
|
@ -0,0 +1,114 @@
|
||||
import asyncio
|
||||
import traceback
|
||||
import json
|
||||
from fastapi import FastAPI, WebSocket, Depends
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from uvicorn import Config, Server
|
||||
from .async_interpreter import AsyncInterpreter
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from typing import List, Dict, Any
|
||||
import os
|
||||
import importlib.util
|
||||
|
||||
os.environ["STT_RUNNER"] = "server"
|
||||
os.environ["TTS_RUNNER"] = "server"
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"], # Allow all methods (GET, POST, etc.)
|
||||
allow_headers=["*"], # Allow all headers
|
||||
)
|
||||
|
||||
|
||||
async def get_debug_flag():
|
||||
return app.state.debug
|
||||
|
||||
|
||||
@app.get("/ping")
|
||||
async def ping():
|
||||
return PlainTextResponse("pong")
|
||||
|
||||
|
||||
@app.websocket("/")
|
||||
async def websocket_endpoint(
|
||||
websocket: WebSocket, debug: bool = Depends(get_debug_flag)
|
||||
):
|
||||
await websocket.accept()
|
||||
|
||||
# Send the tts_service value to the client
|
||||
await websocket.send_text(
|
||||
json.dumps({"type": "config", "tts_service": interpreter.interpreter.tts})
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
async def receive_input():
|
||||
while True:
|
||||
if websocket.client_state == "DISCONNECTED":
|
||||
break
|
||||
|
||||
data = await websocket.receive()
|
||||
|
||||
if isinstance(data, bytes):
|
||||
await interpreter.input(data)
|
||||
elif "bytes" in data:
|
||||
await interpreter.input(data["bytes"])
|
||||
# print("RECEIVED INPUT", data)
|
||||
elif "text" in data:
|
||||
# print("RECEIVED INPUT", data)
|
||||
await interpreter.input(data["text"])
|
||||
|
||||
async def send_output():
|
||||
while True:
|
||||
output = await interpreter.output()
|
||||
|
||||
if isinstance(output, bytes):
|
||||
# print(f"Sending {len(output)} bytes of audio data.")
|
||||
await websocket.send_bytes(output)
|
||||
# we dont send out bytes rn, no TTS
|
||||
|
||||
elif isinstance(output, dict):
|
||||
# print("sending text")
|
||||
await websocket.send_text(json.dumps(output))
|
||||
|
||||
await asyncio.gather(send_output(), receive_input())
|
||||
except Exception as e:
|
||||
print(f"WebSocket connection closed with exception: {e}")
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if not websocket.client_state == "DISCONNECTED":
|
||||
await websocket.close()
|
||||
|
||||
|
||||
async def main(server_host, server_port, profile, debug):
|
||||
|
||||
app.state.debug = debug
|
||||
|
||||
# Load the profile module from the provided path
|
||||
spec = importlib.util.spec_from_file_location("profile", profile)
|
||||
profile_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(profile_module)
|
||||
|
||||
# Get the interpreter from the profile
|
||||
interpreter = profile_module.interpreter
|
||||
|
||||
if not hasattr(interpreter, 'tts'):
|
||||
print("Setting TTS provider to default: openai")
|
||||
interpreter.tts = "openai"
|
||||
|
||||
# Make it async
|
||||
interpreter = AsyncInterpreter(interpreter, debug)
|
||||
|
||||
print(f"Starting server on {server_host}:{server_port}")
|
||||
config = Config(app, host=server_host, port=server_port, lifespan="on")
|
||||
server = Server(config)
|
||||
await server.serve()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
@ -1,366 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import glob
|
||||
import time
|
||||
import json
|
||||
from interpreter import OpenInterpreter
|
||||
import shutil
|
||||
|
||||
|
||||
system_message = r"""
|
||||
|
||||
You are the 01, a screenless executive assistant that can complete any task.
|
||||
When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task.
|
||||
Run any code to achieve the goal, and if at first you don't succeed, try again and again.
|
||||
You can install new packages.
|
||||
Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY.
|
||||
Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go.
|
||||
Manually summarize text.
|
||||
|
||||
DON'T TELL THE USER THE METHOD YOU'LL USE, OR MAKE PLANS. ACT LIKE THIS:
|
||||
|
||||
---
|
||||
user: Are there any concerts in Seattle?
|
||||
assistant: Let me check on that.
|
||||
```python
|
||||
computer.browser.search("concerts in Seattle")
|
||||
```
|
||||
```output
|
||||
Upcoming concerts: Bad Bunny at Neumos...
|
||||
```
|
||||
It looks like there's a Bad Bunny concert at Neumos...
|
||||
---
|
||||
|
||||
Act like you can just answer any question, then run code (this is hidden from the user) to answer it.
|
||||
THE USER CANNOT SEE CODE BLOCKS.
|
||||
Your responses should be very short, no more than 1-2 sentences long.
|
||||
DO NOT USE MARKDOWN. ONLY WRITE PLAIN TEXT.
|
||||
|
||||
# TASKS
|
||||
|
||||
Help the user manage their tasks.
|
||||
Store the user's tasks in a Python list called `tasks`.
|
||||
The user's current task list (it might be empty) is: {{ tasks }}
|
||||
When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is.
|
||||
When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them— just try to focus them on each step at a time.
|
||||
|
||||
After starting a task, you should check in with the user around the estimated completion time to see if the task is completed.
|
||||
To do this, schedule a reminder based on estimated completion time using the function `schedule(message="Your message here.", start="8am")`, WHICH HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. IT IS AVAILABLE. You'll receive the message at the time you scheduled it. If the user says to monitor something, simply schedule it with an interval of a duration that makes sense for the problem by specifying an interval, like this: `schedule(message="Your message here.", interval="5m")`
|
||||
|
||||
|
||||
If there are tasks, you should guide the user through their list one task at a time, convincing them to move forward, giving a pep talk if need be.
|
||||
|
||||
# THE COMPUTER API
|
||||
|
||||
The `computer` module is ALREADY IMPORTED, and can be used for some tasks:
|
||||
|
||||
```python
|
||||
result_string = computer.browser.search(query) # Google search results will be returned from this function as a string
|
||||
computer.calendar.create_event(title="Meeting", start_date=datetime.datetime.now(), end_date=datetime.datetime.now() + datetime.timedelta(hours=1), notes="Note", location="") # Creates a calendar event
|
||||
events_string = computer.calendar.get_events(start_date=datetime.date.today(), end_date=None) # Get events between dates. If end_date is None, only gets events for start_date
|
||||
computer.calendar.delete_event(event_title="Meeting", start_date=datetime.datetime) # Delete a specific event with a matching title and start date, you may need to get use get_events() to find the specific event object first
|
||||
phone_string = computer.contacts.get_phone_number("John Doe")
|
||||
contact_string = computer.contacts.get_email_address("John Doe")
|
||||
computer.mail.send("john@email.com", "Meeting Reminder", "Reminder that our meeting is at 3pm today.", ["path/to/attachment.pdf", "path/to/attachment2.pdf"]) # Send an email with a optional attachments
|
||||
emails_string = computer.mail.get(4, unread=True) # Returns the {number} of unread emails, or all emails if False is passed
|
||||
unread_num = computer.mail.unread_count() # Returns the number of unread emails
|
||||
computer.sms.send("555-123-4567", "Hello from the computer!") # Send a text message. MUST be a phone number, so use computer.contacts.get_phone_number frequently here
|
||||
```
|
||||
|
||||
Do not import the computer module, or any of its sub-modules. They are already imported.
|
||||
|
||||
DO NOT use the computer module for ALL tasks. Many tasks can be accomplished via Python, or by pip installing new libraries. Be creative!
|
||||
|
||||
# GUI CONTROL (RARE)
|
||||
|
||||
You are a computer controlling language model. You can control the user's GUI.
|
||||
You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it:
|
||||
|
||||
```python
|
||||
computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
|
||||
computer.keyboard.hotkey(" ", "command") # Opens spotlight
|
||||
computer.keyboard.write("hello")
|
||||
computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
|
||||
computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
|
||||
computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
|
||||
computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
|
||||
computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
|
||||
```
|
||||
|
||||
You are an image-based AI, you can see images.
|
||||
Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
|
||||
If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
|
||||
It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application. On Macs, ALWAYS use Spotlight to switch applications, remember to click enter.
|
||||
When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor
|
||||
|
||||
# SKILLS
|
||||
|
||||
Try to use the following special functions (or "skills") to complete your goals whenever possible.
|
||||
THESE ARE ALREADY IMPORTED. YOU CAN CALL THEM INSTANTLY.
|
||||
|
||||
---
|
||||
{{
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import ast
|
||||
from platformdirs import user_data_dir
|
||||
|
||||
directory = os.path.join(user_data_dir('01'), 'skills')
|
||||
if not os.path.exists(directory):
|
||||
os.mkdir(directory)
|
||||
|
||||
def get_function_info(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
tree = ast.parse(file.read())
|
||||
functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
|
||||
for function in functions:
|
||||
docstring = ast.get_docstring(function)
|
||||
args = [arg.arg for arg in function.args.args]
|
||||
print(f"Function Name: {function.name}")
|
||||
print(f"Arguments: {args}")
|
||||
print(f"Docstring: {docstring}")
|
||||
print("---")
|
||||
|
||||
files = os.listdir(directory)
|
||||
for file in files:
|
||||
if file.endswith(".py"):
|
||||
file_path = os.path.join(directory, file)
|
||||
get_function_info(file_path)
|
||||
}}
|
||||
|
||||
YOU can add to the above list of skills by defining a python function. The function will be saved as a skill.
|
||||
Search all existing skills by running `computer.skills.search(query)`.
|
||||
|
||||
**Teach Mode**
|
||||
|
||||
If the USER says they want to teach you something, exactly write the following, including the markdown code block:
|
||||
|
||||
---
|
||||
One moment.
|
||||
```python
|
||||
computer.skills.new_skill.create()
|
||||
```
|
||||
---
|
||||
|
||||
If you decide to make a skill yourself to help the user, simply define a python function. `computer.skills.new_skill.create()` is for user-described skills.
|
||||
|
||||
# USE COMMENTS TO PLAN
|
||||
|
||||
IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
|
||||
|
||||
---
|
||||
User: What is 432/7?
|
||||
Assistant: Let me think about that.
|
||||
```python
|
||||
# Here's the plan:
|
||||
# 1. Divide the numbers
|
||||
# 2. Round to 3 digits
|
||||
print(round(432/7, 3))
|
||||
```
|
||||
```output
|
||||
61.714
|
||||
```
|
||||
The answer is 61.714.
|
||||
---
|
||||
|
||||
# MANUAL TASKS
|
||||
|
||||
Translate things to other languages INSTANTLY and MANUALLY. Don't ever try to use a translation tool.
|
||||
Summarize things manually. DO NOT use a summarizer tool.
|
||||
|
||||
# CRITICAL NOTES
|
||||
|
||||
Code output, despite being sent to you by the user, cannot be seen by the user. You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<<
|
||||
ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user VERY short. DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT.
|
||||
Try multiple methods before saying the task is impossible. **You can do it!**
|
||||
|
||||
""".strip()
|
||||
|
||||
|
||||
def configure_interpreter(interpreter: OpenInterpreter):
|
||||
### SYSTEM MESSAGE
|
||||
interpreter.system_message = system_message
|
||||
|
||||
interpreter.llm.supports_vision = True
|
||||
interpreter.shrink_images = True # Faster but less accurate
|
||||
|
||||
interpreter.llm.model = "gpt-4"
|
||||
|
||||
interpreter.llm.supports_functions = False
|
||||
interpreter.llm.context_window = 110000
|
||||
interpreter.llm.max_tokens = 4096
|
||||
interpreter.auto_run = True
|
||||
|
||||
interpreter.force_task_completion = True
|
||||
interpreter.force_task_completion_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
|
||||
interpreter.force_task_completion_breakers = [
|
||||
"The task is done.",
|
||||
"The task is impossible.",
|
||||
"Let me know what you'd like to do next.",
|
||||
"Please provide more information.",
|
||||
]
|
||||
|
||||
# Check if required packages are installed
|
||||
|
||||
# THERE IS AN INCONSISTENCY HERE.
|
||||
# We should be testing if they import WITHIN OI's computer, not here.
|
||||
|
||||
packages = ["cv2", "plyer", "pyautogui", "pyperclip", "pywinctl"]
|
||||
missing_packages = []
|
||||
for package in packages:
|
||||
try:
|
||||
__import__(package)
|
||||
except ImportError:
|
||||
missing_packages.append(package)
|
||||
|
||||
if missing_packages:
|
||||
interpreter.display_message(
|
||||
f"> **Missing Package(s): {', '.join(['`' + p + '`' for p in missing_packages])}**\n\nThese packages are required for OS Control.\n\nInstall them?\n"
|
||||
)
|
||||
user_input = input("(y/n) > ")
|
||||
if user_input.lower() != "y":
|
||||
print("\nPlease try to install them manually.\n\n")
|
||||
time.sleep(2)
|
||||
print("Attempting to start OS control anyway...\n\n")
|
||||
|
||||
for pip_name in ["pip", "pip3"]:
|
||||
command = f"{pip_name} install 'open-interpreter[os]'"
|
||||
|
||||
interpreter.computer.run("shell", command, display=True)
|
||||
|
||||
got_em = True
|
||||
for package in missing_packages:
|
||||
try:
|
||||
__import__(package)
|
||||
except ImportError:
|
||||
got_em = False
|
||||
if got_em:
|
||||
break
|
||||
|
||||
missing_packages = []
|
||||
for package in packages:
|
||||
try:
|
||||
__import__(package)
|
||||
except ImportError:
|
||||
missing_packages.append(package)
|
||||
|
||||
if missing_packages != []:
|
||||
print(
|
||||
"\n\nWarning: The following packages could not be installed:",
|
||||
", ".join(missing_packages),
|
||||
)
|
||||
print("\nPlease try to install them manually.\n\n")
|
||||
time.sleep(2)
|
||||
print("Attempting to start OS control anyway...\n\n")
|
||||
|
||||
# Should we explore other options for ^ these kinds of tags?
|
||||
# Like:
|
||||
|
||||
# from rich import box
|
||||
# from rich.console import Console
|
||||
# from rich.panel import Panel
|
||||
# console = Console()
|
||||
# print(">\n\n")
|
||||
# console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.SQUARE, expand=False), style="white on black")
|
||||
# print(">\n\n")
|
||||
# console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.HEAVY, expand=False), style="white on black")
|
||||
# print(">\n\n")
|
||||
# console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.DOUBLE, expand=False), style="white on black")
|
||||
# print(">\n\n")
|
||||
# console.print(Panel("[bold italic white on black]OS CONTROL[/bold italic white on black] Enabled", box=box.SQUARE, expand=False), style="white on black")
|
||||
|
||||
if not interpreter.offline and not interpreter.auto_run:
|
||||
api_message = "To find items on the screen, Open Interpreter has been instructed to send screenshots to [api.openinterpreter.com](https://api.openinterpreter.com/) (we do not store them). Add `--offline` to attempt this locally."
|
||||
interpreter.display_message(api_message)
|
||||
print("")
|
||||
|
||||
if not interpreter.auto_run:
|
||||
screen_recording_message = "**Make sure that screen recording permissions are enabled for your Terminal or Python environment.**"
|
||||
interpreter.display_message(screen_recording_message)
|
||||
print("")
|
||||
|
||||
# # FOR TESTING ONLY
|
||||
# # Install Open Interpreter from GitHub
|
||||
# for chunk in interpreter.computer.run(
|
||||
# "shell",
|
||||
# "pip install git+https://github.com/KillianLucas/open-interpreter.git",
|
||||
# ):
|
||||
# if chunk.get("format") != "active_line":
|
||||
# print(chunk.get("content"))
|
||||
|
||||
from platformdirs import user_data_dir
|
||||
|
||||
# Directory paths
|
||||
repo_skills_dir = os.path.join(os.path.dirname(__file__), "skills")
|
||||
user_data_skills_dir = os.path.join(user_data_dir("01"), "skills")
|
||||
|
||||
# Create the user data skills directory if it doesn't exist
|
||||
os.makedirs(user_data_skills_dir, exist_ok=True)
|
||||
|
||||
# Copy Python files from the repository skills directory to the user data skills directory, ignoring __init__.py files
|
||||
for filename in os.listdir(repo_skills_dir):
|
||||
if filename.endswith(".py") and filename != "__init__.py":
|
||||
src_file = os.path.join(repo_skills_dir, filename)
|
||||
dst_file = os.path.join(user_data_skills_dir, filename)
|
||||
shutil.copy2(src_file, dst_file)
|
||||
|
||||
interpreter.computer.debug = True
|
||||
interpreter.computer.skills.path = user_data_skills_dir
|
||||
|
||||
# Import skills
|
||||
interpreter.computer.save_skills = False
|
||||
|
||||
for file in glob.glob(os.path.join(interpreter.computer.skills.path, "*.py")):
|
||||
code_to_run = ""
|
||||
with open(file, "r") as f:
|
||||
code_to_run += f.read() + "\n"
|
||||
|
||||
interpreter.computer.run("python", code_to_run)
|
||||
|
||||
interpreter.computer.save_skills = True
|
||||
|
||||
# Initialize user's task list
|
||||
interpreter.computer.run(
|
||||
language="python",
|
||||
code="tasks = []",
|
||||
display=interpreter.verbose,
|
||||
)
|
||||
|
||||
# Give it access to the computer via Python
|
||||
interpreter.computer.run(
|
||||
language="python",
|
||||
code="import time\nfrom interpreter import interpreter\ncomputer = interpreter.computer", # We ask it to use time, so
|
||||
display=interpreter.verbose,
|
||||
)
|
||||
|
||||
if not interpreter.auto_run:
|
||||
interpreter.display_message(
|
||||
"**Warning:** In this mode, Open Interpreter will not require approval before performing actions. Be ready to close your terminal."
|
||||
)
|
||||
print("") # < - Aesthetic choice
|
||||
|
||||
### MISC SETTINGS
|
||||
|
||||
interpreter.auto_run = True
|
||||
interpreter.computer.languages = [
|
||||
l
|
||||
for l in interpreter.computer.languages
|
||||
if l.name.lower() in ["applescript", "shell", "zsh", "bash", "python"]
|
||||
]
|
||||
interpreter.force_task_completion = True
|
||||
# interpreter.offline = True
|
||||
interpreter.id = 206 # Used to identify itself to other interpreters. This should be changed programmatically so it's unique.
|
||||
|
||||
### RESET conversations/user.json
|
||||
app_dir = user_data_dir("01")
|
||||
conversations_dir = os.path.join(app_dir, "conversations")
|
||||
os.makedirs(conversations_dir, exist_ok=True)
|
||||
user_json_path = os.path.join(conversations_dir, "user.json")
|
||||
with open(user_json_path, "w") as file:
|
||||
json.dump([], file)
|
||||
|
||||
return interpreter
|
@ -1,29 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
### LLM SETUP
|
||||
|
||||
# Define the path to a llamafile
|
||||
llamafile_path = Path(__file__).parent / "model.llamafile"
|
||||
|
||||
# Check if the new llamafile exists, if not download it
|
||||
if not os.path.exists(llamafile_path):
|
||||
subprocess.run(
|
||||
[
|
||||
"wget",
|
||||
"-O",
|
||||
llamafile_path,
|
||||
"https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
# Make the new llamafile executable
|
||||
subprocess.run(["chmod", "+x", llamafile_path], check=True)
|
||||
|
||||
# Run the new llamafile
|
||||
subprocess.run([str(llamafile_path)], check=True)
|
@ -0,0 +1,186 @@
|
||||
from interpreter import interpreter
|
||||
|
||||
# This is an Open Interpreter compatible profile.
|
||||
# Visit https://01.openinterpreter.com/profile for all options.
|
||||
|
||||
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
||||
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
||||
interpreter.tts = "openai"
|
||||
|
||||
# Connect your 01 to a language model
|
||||
interpreter.llm.model = "gpt-4-turbo"
|
||||
interpreter.llm.context_window = 100000
|
||||
interpreter.llm.max_tokens = 4096
|
||||
# interpreter.llm.api_key = "<your_openai_api_key_here>"
|
||||
|
||||
# Tell your 01 where to find and save skills
|
||||
interpreter.computer.skills.path = "./skills"
|
||||
|
||||
# Extra settings
|
||||
interpreter.computer.import_computer_api = True
|
||||
interpreter.computer.import_skills = True
|
||||
interpreter.computer.run("python", "computer") # This will trigger those imports
|
||||
interpreter.auto_run = True
|
||||
interpreter.loop = True
|
||||
interpreter.loop_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
|
||||
interpreter.loop_breakers = [
|
||||
"The task is done.",
|
||||
"The task is impossible.",
|
||||
"Let me know what you'd like to do next.",
|
||||
"Please provide more information.",
|
||||
]
|
||||
|
||||
# Set the identity and personality of your 01
|
||||
interpreter.system_message = """
|
||||
|
||||
You are the 01, a screenless executive assistant that can complete any task.
|
||||
When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task.
|
||||
Run any code to achieve the goal, and if at first you don't succeed, try again and again.
|
||||
You can install new packages.
|
||||
Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY.
|
||||
Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go.
|
||||
Manually summarize text.
|
||||
|
||||
DON'T TELL THE USER THE METHOD YOU'LL USE, OR MAKE PLANS. ACT LIKE THIS:
|
||||
|
||||
---
|
||||
user: Are there any concerts in Seattle?
|
||||
assistant: Let me check on that.
|
||||
```python
|
||||
computer.browser.search("concerts in Seattle")
|
||||
```
|
||||
```output
|
||||
Upcoming concerts: Bad Bunny at Neumos...
|
||||
```
|
||||
It looks like there's a Bad Bunny concert at Neumos...
|
||||
---
|
||||
|
||||
Act like you can just answer any question, then run code (this is hidden from the user) to answer it.
|
||||
THE USER CANNOT SEE CODE BLOCKS.
|
||||
Your responses should be very short, no more than 1-2 sentences long.
|
||||
DO NOT USE MARKDOWN. ONLY WRITE PLAIN TEXT.
|
||||
|
||||
# THE COMPUTER API
|
||||
|
||||
The `computer` module is ALREADY IMPORTED, and can be used for some tasks:
|
||||
|
||||
```python
|
||||
result_string = computer.browser.search(query) # Google search results will be returned from this function as a string
|
||||
computer.files.edit(path_to_file, original_text, replacement_text) # Edit a file
|
||||
computer.calendar.create_event(title="Meeting", start_date=datetime.datetime.now(), end_date=datetime.datetime.now() + datetime.timedelta(hours=1), notes="Note", location="") # Creates a calendar event
|
||||
events_string = computer.calendar.get_events(start_date=datetime.date.today(), end_date=None) # Get events between dates. If end_date is None, only gets events for start_date
|
||||
computer.calendar.delete_event(event_title="Meeting", start_date=datetime.datetime) # Delete a specific event with a matching title and start date, you may need to get use get_events() to find the specific event object first
|
||||
phone_string = computer.contacts.get_phone_number("John Doe")
|
||||
contact_string = computer.contacts.get_email_address("John Doe")
|
||||
computer.mail.send("john@email.com", "Meeting Reminder", "Reminder that our meeting is at 3pm today.", ["path/to/attachment.pdf", "path/to/attachment2.pdf"]) # Send an email with a optional attachments
|
||||
emails_string = computer.mail.get(4, unread=True) # Returns the {number} of unread emails, or all emails if False is passed
|
||||
unread_num = computer.mail.unread_count() # Returns the number of unread emails
|
||||
computer.sms.send("555-123-4567", "Hello from the computer!") # Send a text message. MUST be a phone number, so use computer.contacts.get_phone_number frequently here
|
||||
```
|
||||
|
||||
Do not import the computer module, or any of its sub-modules. They are already imported.
|
||||
|
||||
DO NOT use the computer module for ALL tasks. Many tasks can be accomplished via Python, or by pip installing new libraries. Be creative!
|
||||
|
||||
# GUI CONTROL (RARE)
|
||||
|
||||
You are a computer controlling language model. You can control the user's GUI.
|
||||
You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it:
|
||||
|
||||
```python
|
||||
computer.display.view() # Shows you what's on the screen. **You almost always want to do this first!**
|
||||
computer.keyboard.hotkey(" ", "command") # Opens spotlight
|
||||
computer.keyboard.write("hello")
|
||||
computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
|
||||
computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
|
||||
computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
|
||||
computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
|
||||
computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
|
||||
```
|
||||
|
||||
You are an image-based AI, you can see images.
|
||||
Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
|
||||
If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
|
||||
It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application. On Macs, ALWAYS use Spotlight to switch applications.
|
||||
When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor
|
||||
|
||||
# SKILLS
|
||||
|
||||
Try to use the following special functions (or "skills") to complete your goals whenever possible.
|
||||
THESE ARE ALREADY IMPORTED. YOU CAN CALL THEM INSTANTLY.
|
||||
|
||||
---
|
||||
{{
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import ast
|
||||
|
||||
directory = "./skills"
|
||||
|
||||
def get_function_info(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
tree = ast.parse(file.read())
|
||||
functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
|
||||
for function in functions:
|
||||
docstring = ast.get_docstring(function)
|
||||
args = [arg.arg for arg in function.args.args]
|
||||
print(f"Function Name: {function.name}")
|
||||
print(f"Arguments: {args}")
|
||||
print(f"Docstring: {docstring}")
|
||||
print("---")
|
||||
|
||||
files = os.listdir(directory)
|
||||
for file in files:
|
||||
if file.endswith(".py"):
|
||||
file_path = os.path.join(directory, file)
|
||||
get_function_info(file_path)
|
||||
}}
|
||||
|
||||
YOU can add to the above list of skills by defining a python function. The function will be saved as a skill.
|
||||
Search all existing skills by running `computer.skills.search(query)`.
|
||||
|
||||
**Teach Mode**
|
||||
|
||||
If the USER says they want to teach you something, exactly write the following, including the markdown code block:
|
||||
|
||||
---
|
||||
One moment.
|
||||
```python
|
||||
computer.skills.new_skill.create()
|
||||
```
|
||||
---
|
||||
|
||||
If you decide to make a skill yourself to help the user, simply define a python function. `computer.skills.new_skill.create()` is for user-described skills.
|
||||
|
||||
# USE COMMENTS TO PLAN
|
||||
|
||||
IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
|
||||
|
||||
---
|
||||
User: What is 432/7?
|
||||
Assistant: Let me think about that.
|
||||
```python
|
||||
# Here's the plan:
|
||||
# 1. Divide the numbers
|
||||
# 2. Round to 3 digits
|
||||
print(round(432/7, 3))
|
||||
```
|
||||
```output
|
||||
61.714
|
||||
```
|
||||
The answer is 61.714.
|
||||
---
|
||||
|
||||
# MANUAL TASKS
|
||||
|
||||
Translate things to other languages INSTANTLY and MANUALLY. Don't ever try to use a translation tool.
|
||||
Summarize things manually. DO NOT use a summarizer tool.
|
||||
|
||||
# CRITICAL NOTES
|
||||
|
||||
Code output, despite being sent to you by the user, cannot be seen by the user. You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<<
|
||||
ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user VERY short. DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT.
|
||||
Try multiple methods before saying the task is impossible. **You can do it!**
|
||||
|
||||
""".strip()
|
@ -0,0 +1,24 @@
|
||||
from interpreter import interpreter
|
||||
|
||||
# This is an Open Interpreter compatible profile.
|
||||
# Visit https://01.openinterpreter.com/profile for all options.
|
||||
|
||||
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
||||
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
||||
interpreter.tts = "elevenlabs"
|
||||
|
||||
# 01 Language Model Config.
|
||||
interpreter.llm_service = "litellm"
|
||||
interpreter.llm.model = "groq/llama3-8b-8192"
|
||||
interpreter.llm.supports_vision = False
|
||||
interpreter.llm.supports_functions = False
|
||||
interpreter.llm.context_window = 2048
|
||||
interpreter.llm.max_tokens = 4096
|
||||
interpreter.llm.temperature = 0.8
|
||||
|
||||
interpreter.computer.import_computer_api = False
|
||||
|
||||
interpreter.auto_run = True
|
||||
interpreter.system_message = (
|
||||
"You are a helpful assistant that can answer questions and help with tasks."
|
||||
)
|
@ -0,0 +1,38 @@
|
||||
from interpreter import interpreter
|
||||
|
||||
# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
|
||||
# {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
|
||||
interpreter.tts = "coqui"
|
||||
|
||||
# Local setup
|
||||
interpreter.local_setup()
|
||||
|
||||
interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
|
||||
|
||||
User: Open the chrome app.
|
||||
Assistant: On it.
|
||||
```python
|
||||
import webbrowser
|
||||
webbrowser.open('https://chrome.google.com')
|
||||
```
|
||||
User: The code you ran produced no output. Was this expected, or are we finished?
|
||||
Assistant: No further action is required; the provided snippet opens Chrome.
|
||||
|
||||
Now, your turn:"""
|
||||
|
||||
# Message templates
|
||||
interpreter.code_output_template = '''I executed that code. This was the output: """{content}"""\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.'''
|
||||
interpreter.empty_code_output_template = "The code above was executed on my machine. It produced no text output. What's next (if anything, or are we done?)"
|
||||
interpreter.code_output_sender = "user"
|
||||
|
||||
# Computer settings
|
||||
interpreter.computer.import_computer_api = False
|
||||
|
||||
# Misc settings
|
||||
interpreter.auto_run = False
|
||||
interpreter.offline = True
|
||||
|
||||
# Final message
|
||||
interpreter.display_message(
|
||||
f"> Model set to `{interpreter.llm.model}`\n\n**Open Interpreter** will require approval before running code.\n\nUse `interpreter -y` to bypass this.\n\nPress `CTRL-C` to exit.\n"
|
||||
)
|
@ -1,520 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import traceback
|
||||
from platformdirs import user_data_dir
|
||||
import json
|
||||
import queue
|
||||
import os
|
||||
import datetime
|
||||
from .utils.bytes_to_wav import bytes_to_wav
|
||||
import re
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from starlette.websockets import WebSocket, WebSocketDisconnect
|
||||
import asyncio
|
||||
from .utils.kernel import put_kernel_messages_into_queue
|
||||
from .i import configure_interpreter
|
||||
from interpreter import interpreter
|
||||
from ..utils.accumulator import Accumulator
|
||||
from .utils.logs import setup_logging
|
||||
from .utils.logs import logger
|
||||
import base64
|
||||
import shutil
|
||||
from ..utils.print_markdown import print_markdown
|
||||
|
||||
os.environ["STT_RUNNER"] = "server"
|
||||
os.environ["TTS_RUNNER"] = "server"
|
||||
|
||||
markdown = """
|
||||
○
|
||||
|
||||
*Starting...*
|
||||
"""
|
||||
print("")
|
||||
print_markdown(markdown)
|
||||
print("")
|
||||
|
||||
|
||||
setup_logging()
|
||||
|
||||
accumulator_global = Accumulator()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app_dir = user_data_dir("01")
|
||||
conversation_history_path = os.path.join(app_dir, "conversations", "user.json")
|
||||
|
||||
SERVER_LOCAL_PORT = int(os.getenv("SERVER_LOCAL_PORT", 10001))
|
||||
|
||||
|
||||
# This is so we only say() full sentences
|
||||
def is_full_sentence(text):
|
||||
return text.endswith((".", "!", "?"))
|
||||
|
||||
|
||||
def split_into_sentences(text):
|
||||
return re.split(r"(?<=[.!?])\s+", text)
|
||||
|
||||
|
||||
# Queues
|
||||
from_computer = (
|
||||
queue.Queue()
|
||||
) # Just for computer messages from the device. Sync queue because interpreter.run is synchronous
|
||||
from_user = asyncio.Queue() # Just for user messages from the device.
|
||||
to_device = asyncio.Queue() # For messages we send.
|
||||
|
||||
# Switch code executor to device if that's set
|
||||
|
||||
if os.getenv("CODE_RUNNER") == "device":
|
||||
# (This should probably just loop through all languages and apply these changes instead)
|
||||
|
||||
class Python:
|
||||
# This is the name that will appear to the LLM.
|
||||
name = "python"
|
||||
|
||||
def __init__(self):
|
||||
self.halt = False
|
||||
|
||||
def run(self, code):
|
||||
"""Generator that yields a dictionary in LMC Format."""
|
||||
|
||||
# Prepare the data
|
||||
message = {
|
||||
"role": "assistant",
|
||||
"type": "code",
|
||||
"format": "python",
|
||||
"content": code,
|
||||
}
|
||||
|
||||
# Unless it was just sent to the device, send it wrapped in flags
|
||||
if not (interpreter.messages and interpreter.messages[-1] == message):
|
||||
to_device.put(
|
||||
{
|
||||
"role": "assistant",
|
||||
"type": "code",
|
||||
"format": "python",
|
||||
"start": True,
|
||||
}
|
||||
)
|
||||
to_device.put(message)
|
||||
to_device.put(
|
||||
{
|
||||
"role": "assistant",
|
||||
"type": "code",
|
||||
"format": "python",
|
||||
"end": True,
|
||||
}
|
||||
)
|
||||
|
||||
# Stream the response
|
||||
logger.info("Waiting for the device to respond...")
|
||||
while True:
|
||||
chunk = from_computer.get()
|
||||
logger.info(f"Server received from device: {chunk}")
|
||||
if "end" in chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
def stop(self):
|
||||
self.halt = True
|
||||
|
||||
def terminate(self):
|
||||
"""Terminates the entire process."""
|
||||
# dramatic!! do nothing
|
||||
pass
|
||||
|
||||
interpreter.computer.languages = [Python]
|
||||
|
||||
# Configure interpreter
|
||||
interpreter = configure_interpreter(interpreter)
|
||||
|
||||
|
||||
@app.get("/ping")
|
||||
async def ping():
|
||||
return PlainTextResponse("pong")
|
||||
|
||||
|
||||
@app.websocket("/")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
receive_task = asyncio.create_task(receive_messages(websocket))
|
||||
send_task = asyncio.create_task(send_messages(websocket))
|
||||
try:
|
||||
await asyncio.gather(receive_task, send_task)
|
||||
except Exception as e:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.info(f"Connection lost. Error: {e}")
|
||||
|
||||
|
||||
@app.post("/")
|
||||
async def add_computer_message(request: Request):
|
||||
body = await request.json()
|
||||
text = body.get("text")
|
||||
if not text:
|
||||
return {"error": "Missing 'text' in request body"}, 422
|
||||
message = {"role": "user", "type": "message", "content": text}
|
||||
await from_user.put({"role": "user", "type": "message", "start": True})
|
||||
await from_user.put(message)
|
||||
await from_user.put({"role": "user", "type": "message", "end": True})
|
||||
|
||||
|
||||
async def receive_messages(websocket: WebSocket):
|
||||
while True:
|
||||
try:
|
||||
try:
|
||||
data = await websocket.receive()
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
return
|
||||
if "text" in data:
|
||||
try:
|
||||
data = json.loads(data["text"])
|
||||
if data["role"] == "computer":
|
||||
from_computer.put(
|
||||
data
|
||||
) # To be handled by interpreter.computer.run
|
||||
elif data["role"] == "user":
|
||||
await from_user.put(data)
|
||||
else:
|
||||
raise ("Unknown role:", data)
|
||||
except json.JSONDecodeError:
|
||||
pass # data is not JSON, leave it as is
|
||||
elif "bytes" in data:
|
||||
data = data["bytes"] # binary data
|
||||
await from_user.put(data)
|
||||
except WebSocketDisconnect as e:
|
||||
if e.code == 1000:
|
||||
logger.info("Websocket connection closed normally.")
|
||||
return
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
async def send_messages(websocket: WebSocket):
|
||||
while True:
|
||||
message = await to_device.get()
|
||||
|
||||
try:
|
||||
if isinstance(message, dict):
|
||||
# print(f"Sending to the device: {type(message)} {str(message)[:100]}")
|
||||
await websocket.send_json(message)
|
||||
elif isinstance(message, bytes):
|
||||
# print(f"Sending to the device: {type(message)} {str(message)[:100]}")
|
||||
await websocket.send_bytes(message)
|
||||
else:
|
||||
raise TypeError("Message must be a dict or bytes")
|
||||
except:
|
||||
# Make sure to put the message back in the queue if you failed to send it
|
||||
await to_device.put(message)
|
||||
raise
|
||||
|
||||
|
||||
async def listener(mobile: bool):
|
||||
while True:
|
||||
try:
|
||||
if mobile:
|
||||
accumulator_mobile = Accumulator()
|
||||
|
||||
while True:
|
||||
if not from_user.empty():
|
||||
chunk = await from_user.get()
|
||||
break
|
||||
elif not from_computer.empty():
|
||||
chunk = from_computer.get()
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if mobile:
|
||||
message = accumulator_mobile.accumulate_mobile(chunk)
|
||||
else:
|
||||
message = accumulator_global.accumulate(chunk)
|
||||
|
||||
if message == None:
|
||||
# Will be None until we have a full message ready
|
||||
continue
|
||||
|
||||
# print(str(message)[:1000])
|
||||
|
||||
# At this point, we have our message
|
||||
|
||||
if message["type"] == "audio" and message["format"].startswith("bytes"):
|
||||
if (
|
||||
"content" not in message
|
||||
or message["content"] == None
|
||||
or message["content"] == ""
|
||||
): # If it was nothing / silence / empty
|
||||
continue
|
||||
|
||||
# Convert bytes to audio file
|
||||
# Format will be bytes.wav or bytes.opus
|
||||
mime_type = "audio/" + message["format"].split(".")[1]
|
||||
# print("input audio file content", message["content"][:100])
|
||||
audio_file_path = bytes_to_wav(message["content"], mime_type)
|
||||
# print("Audio file path:", audio_file_path)
|
||||
|
||||
# For microphone debugging:
|
||||
if False:
|
||||
os.system(f"open {audio_file_path}")
|
||||
import time
|
||||
|
||||
time.sleep(15)
|
||||
|
||||
text = stt(audio_file_path)
|
||||
print("> ", text)
|
||||
message = {"role": "user", "type": "message", "content": text}
|
||||
|
||||
# At this point, we have only text messages
|
||||
|
||||
if type(message["content"]) != str:
|
||||
print("This should be a string, but it's not:", message["content"])
|
||||
message["content"] = message["content"].decode()
|
||||
|
||||
# Custom stop message will halt us
|
||||
if message["content"].lower().strip(".,! ") == "stop":
|
||||
continue
|
||||
|
||||
# Load, append, and save conversation history
|
||||
with open(conversation_history_path, "r") as file:
|
||||
messages = json.load(file)
|
||||
messages.append(message)
|
||||
with open(conversation_history_path, "w") as file:
|
||||
json.dump(messages, file, indent=4)
|
||||
|
||||
accumulated_text = ""
|
||||
|
||||
if any(
|
||||
[m["type"] == "image" for m in messages]
|
||||
) and interpreter.llm.model.startswith("gpt-"):
|
||||
interpreter.llm.model = "gpt-4-vision-preview"
|
||||
interpreter.llm.supports_vision = True
|
||||
|
||||
for chunk in interpreter.chat(messages, stream=True, display=True):
|
||||
if any([m["type"] == "image" for m in interpreter.messages]):
|
||||
interpreter.llm.model = "gpt-4-vision-preview"
|
||||
|
||||
logger.debug("Got chunk:", chunk)
|
||||
|
||||
# Send it to the user
|
||||
await to_device.put(chunk)
|
||||
|
||||
# Yield to the event loop, so you actually send it out
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
if os.getenv("TTS_RUNNER") == "server":
|
||||
# Speak full sentences out loud
|
||||
if (
|
||||
chunk["role"] == "assistant"
|
||||
and "content" in chunk
|
||||
and chunk["type"] == "message"
|
||||
):
|
||||
accumulated_text += chunk["content"]
|
||||
sentences = split_into_sentences(accumulated_text)
|
||||
|
||||
# If we're going to speak, say we're going to stop sending text.
|
||||
# This should be fixed probably, we should be able to do both in parallel, or only one.
|
||||
if any(is_full_sentence(sentence) for sentence in sentences):
|
||||
await to_device.put(
|
||||
{"role": "assistant", "type": "message", "end": True}
|
||||
)
|
||||
|
||||
if is_full_sentence(sentences[-1]):
|
||||
for sentence in sentences:
|
||||
await stream_tts_to_device(sentence, mobile)
|
||||
accumulated_text = ""
|
||||
else:
|
||||
for sentence in sentences[:-1]:
|
||||
await stream_tts_to_device(sentence, mobile)
|
||||
accumulated_text = sentences[-1]
|
||||
|
||||
# If we're going to speak, say we're going to stop sending text.
|
||||
# This should be fixed probably, we should be able to do both in parallel, or only one.
|
||||
if any(is_full_sentence(sentence) for sentence in sentences):
|
||||
await to_device.put(
|
||||
{"role": "assistant", "type": "message", "start": True}
|
||||
)
|
||||
|
||||
# If we have a new message, save our progress and go back to the top
|
||||
if not from_user.empty():
|
||||
# Check if it's just an end flag. We ignore those.
|
||||
temp_message = await from_user.get()
|
||||
|
||||
if (
|
||||
type(temp_message) is dict
|
||||
and temp_message.get("role") == "user"
|
||||
and temp_message.get("end")
|
||||
):
|
||||
# Yup. False alarm.
|
||||
continue
|
||||
else:
|
||||
# Whoops! Put that back
|
||||
await from_user.put(temp_message)
|
||||
|
||||
with open(conversation_history_path, "w") as file:
|
||||
json.dump(interpreter.messages, file, indent=4)
|
||||
|
||||
# TODO: is triggering seemingly randomly
|
||||
# logger.info("New user message received. Breaking.")
|
||||
# break
|
||||
|
||||
# Also check if there's any new computer messages
|
||||
if not from_computer.empty():
|
||||
with open(conversation_history_path, "w") as file:
|
||||
json.dump(interpreter.messages, file, indent=4)
|
||||
|
||||
logger.info("New computer message received. Breaking.")
|
||||
break
|
||||
except:
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
async def stream_tts_to_device(sentence, mobile: bool):
|
||||
force_task_completion_responses = [
|
||||
"the task is done",
|
||||
"the task is impossible",
|
||||
"let me know what you'd like to do next",
|
||||
]
|
||||
if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses:
|
||||
return
|
||||
|
||||
for chunk in stream_tts(sentence, mobile):
|
||||
await to_device.put(chunk)
|
||||
|
||||
|
||||
def stream_tts(sentence, mobile: bool):
|
||||
audio_file = tts(sentence, mobile)
|
||||
|
||||
# Read the entire WAV file
|
||||
with open(audio_file, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
|
||||
if mobile:
|
||||
file_type = "audio/wav"
|
||||
|
||||
os.remove(audio_file)
|
||||
|
||||
# stream the audio as a single sentence
|
||||
yield {
|
||||
"role": "assistant",
|
||||
"type": "audio",
|
||||
"format": file_type,
|
||||
"content": base64.b64encode(audio_bytes).decode("utf-8"),
|
||||
"start": True,
|
||||
"end": True,
|
||||
}
|
||||
|
||||
else:
|
||||
# stream the audio in chunk sizes
|
||||
os.remove(audio_file)
|
||||
|
||||
file_type = "bytes.raw"
|
||||
chunk_size = 1024
|
||||
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
|
||||
for i in range(0, len(audio_bytes), chunk_size):
|
||||
chunk = audio_bytes[i : i + chunk_size]
|
||||
yield chunk
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
|
||||
|
||||
|
||||
from uvicorn import Config, Server
|
||||
import os
|
||||
from importlib import import_module
|
||||
|
||||
# these will be overwritten
|
||||
HOST = ""
|
||||
PORT = 0
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
server_url = f"{HOST}:{PORT}"
|
||||
print("")
|
||||
print_markdown("\n*Ready.*\n")
|
||||
print("")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
print_markdown("*Server is shutting down*")
|
||||
|
||||
|
||||
async def main(
|
||||
server_host,
|
||||
server_port,
|
||||
llm_service,
|
||||
model,
|
||||
llm_supports_vision,
|
||||
llm_supports_functions,
|
||||
context_window,
|
||||
max_tokens,
|
||||
temperature,
|
||||
tts_service,
|
||||
stt_service,
|
||||
mobile,
|
||||
):
|
||||
global HOST
|
||||
global PORT
|
||||
PORT = server_port
|
||||
HOST = server_host
|
||||
|
||||
# Setup services
|
||||
application_directory = user_data_dir("01")
|
||||
services_directory = os.path.join(application_directory, "services")
|
||||
|
||||
service_dict = {"llm": llm_service, "tts": tts_service, "stt": stt_service}
|
||||
|
||||
# Create a temp file with the session number
|
||||
session_file_path = os.path.join(user_data_dir("01"), "01-session.txt")
|
||||
with open(session_file_path, "w") as session_file:
|
||||
session_id = int(datetime.datetime.now().timestamp() * 1000)
|
||||
session_file.write(str(session_id))
|
||||
|
||||
for service in service_dict:
|
||||
service_directory = os.path.join(
|
||||
services_directory, service, service_dict[service]
|
||||
)
|
||||
|
||||
# This is the folder they can mess around in
|
||||
config = {"service_directory": service_directory}
|
||||
|
||||
if service == "llm":
|
||||
config.update(
|
||||
{
|
||||
"interpreter": interpreter,
|
||||
"model": model,
|
||||
"llm_supports_vision": llm_supports_vision,
|
||||
"llm_supports_functions": llm_supports_functions,
|
||||
"context_window": context_window,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
}
|
||||
)
|
||||
|
||||
module = import_module(
|
||||
f".server.services.{service}.{service_dict[service]}.{service}",
|
||||
package="source",
|
||||
)
|
||||
|
||||
ServiceClass = getattr(module, service.capitalize())
|
||||
service_instance = ServiceClass(config)
|
||||
globals()[service] = getattr(service_instance, service)
|
||||
|
||||
interpreter.llm.completions = llm
|
||||
|
||||
# Start listening
|
||||
asyncio.create_task(listener(mobile))
|
||||
|
||||
# Start watching the kernel if it's your job to do that
|
||||
if True: # in the future, code can run on device. for now, just server.
|
||||
asyncio.create_task(put_kernel_messages_into_queue(from_computer))
|
||||
|
||||
config = Config(app, host=server_host, port=int(server_port), lifespan="on")
|
||||
server = Server(config)
|
||||
await server.serve()
|
||||
|
||||
|
||||
# Run the FastAPI app
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
@ -1,11 +0,0 @@
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
# Litellm is used by OI by default, so we just modify OI
|
||||
|
||||
interpreter = config["interpreter"]
|
||||
config.pop("interpreter", None)
|
||||
config.pop("service_directory", None)
|
||||
for key, value in config.items():
|
||||
setattr(interpreter, key.replace("-", "_"), value)
|
||||
|
||||
self.llm = interpreter.llm.completions
|
@ -1,68 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
self.install(config["service_directory"])
|
||||
|
||||
def install(self, service_directory):
|
||||
LLM_FOLDER_PATH = service_directory
|
||||
self.llm_directory = os.path.join(LLM_FOLDER_PATH, "llm")
|
||||
if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
|
||||
os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
|
||||
|
||||
# Install WasmEdge
|
||||
subprocess.run(
|
||||
[
|
||||
"curl",
|
||||
"-sSf",
|
||||
"https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh",
|
||||
"|",
|
||||
"bash",
|
||||
"-s",
|
||||
"--",
|
||||
"--plugin",
|
||||
"wasi_nn-ggml",
|
||||
]
|
||||
)
|
||||
|
||||
# Download the Qwen1.5-0.5B-Chat model GGUF file
|
||||
MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
|
||||
subprocess.run(["curl", "-LO", MODEL_URL], cwd=self.llm_directory)
|
||||
|
||||
# Download the llama-api-server.wasm app
|
||||
APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
|
||||
subprocess.run(["curl", "-LO", APP_URL], cwd=self.llm_directory)
|
||||
|
||||
# Run the API server
|
||||
subprocess.run(
|
||||
[
|
||||
"wasmedge",
|
||||
"--dir",
|
||||
".:.",
|
||||
"--nn-preload",
|
||||
"default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf",
|
||||
"llama-api-server.wasm",
|
||||
"-p",
|
||||
"llama-2-chat",
|
||||
],
|
||||
cwd=self.llm_directory,
|
||||
)
|
||||
|
||||
print("LLM setup completed.")
|
||||
else:
|
||||
print("LLM already set up. Skipping download.")
|
||||
|
||||
def llm(self, messages):
|
||||
url = "http://localhost:8080/v1/chat/completions"
|
||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
||||
data = {"messages": messages, "model": "llama-2-chat"}
|
||||
with requests.post(
|
||||
url, headers=headers, data=json.dumps(data), stream=True
|
||||
) as response:
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
yield json.loads(line)
|
@ -1,87 +0,0 @@
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
import wget
|
||||
import stat
|
||||
|
||||
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
self.interpreter = config["interpreter"]
|
||||
config.pop("interpreter", None)
|
||||
|
||||
self.install(config["service_directory"])
|
||||
|
||||
config.pop("service_directory", None)
|
||||
for key, value in config.items():
|
||||
setattr(self.interpreter, key.replace("-", "_"), value)
|
||||
|
||||
self.llm = self.interpreter.llm.completions
|
||||
|
||||
def install(self, service_directory):
|
||||
if platform.system() == "Darwin": # Check if the system is MacOS
|
||||
result = subprocess.run(
|
||||
["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(
|
||||
"Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
|
||||
)
|
||||
time.sleep(3)
|
||||
raise Exception(
|
||||
"Xcode is not installed. Please install Xcode and try again."
|
||||
)
|
||||
|
||||
# Define the path to the models directory
|
||||
models_dir = os.path.join(service_directory, "models")
|
||||
|
||||
# Check and create the models directory if it doesn't exist
|
||||
if not os.path.exists(models_dir):
|
||||
os.makedirs(models_dir)
|
||||
|
||||
# Define the path to the new llamafile
|
||||
llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
|
||||
|
||||
# Check if the new llamafile exists, if not download it
|
||||
if not os.path.exists(llamafile_path):
|
||||
print(
|
||||
"Attempting to download the `Phi-2` language model. This may take a few minutes."
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
|
||||
wget.download(url, llamafile_path)
|
||||
|
||||
# Make the new llamafile executable
|
||||
if platform.system() != "Windows":
|
||||
st = os.stat(llamafile_path)
|
||||
os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
|
||||
|
||||
# Run the new llamafile in the background
|
||||
if os.path.exists(llamafile_path):
|
||||
try:
|
||||
# Test if the llamafile is executable
|
||||
subprocess.check_call([f'"{llamafile_path}"'], shell=True)
|
||||
except subprocess.CalledProcessError:
|
||||
print(
|
||||
"The llamafile is not executable. Please check the file permissions."
|
||||
)
|
||||
raise
|
||||
subprocess.Popen(
|
||||
f'"{llamafile_path}" ' + " ".join(["-ngl", "9999"]), shell=True
|
||||
)
|
||||
else:
|
||||
error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
|
||||
print(error_message)
|
||||
print(error_message)
|
||||
|
||||
self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
|
||||
self.interpreter.offline = True
|
||||
|
||||
self.interpreter.llm.model = "local"
|
||||
self.interpreter.llm.temperature = 0
|
||||
self.interpreter.llm.api_base = "https://localhost:8080/v1"
|
||||
self.interpreter.llm.max_tokens = 1000
|
||||
self.interpreter.llm.context_window = 3000
|
||||
self.interpreter.llm.supports_functions = False
|
@ -1,169 +0,0 @@
|
||||
"""
|
||||
Defines a function which takes a path to an audio file and turns it into text.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import shutil
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
|
||||
import urllib.request
|
||||
|
||||
|
||||
class Stt:
|
||||
def __init__(self, config):
|
||||
self.service_directory = config["service_directory"]
|
||||
install(self.service_directory)
|
||||
|
||||
def stt(self, audio_file_path):
|
||||
return stt(self.service_directory, audio_file_path)
|
||||
|
||||
|
||||
def install(service_dir):
|
||||
### INSTALL
|
||||
|
||||
WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
|
||||
if not os.path.exists(source_whisper_rust_path):
|
||||
print(f"Source directory does not exist: {source_whisper_rust_path}")
|
||||
exit(1)
|
||||
if not os.path.exists(WHISPER_RUST_PATH):
|
||||
shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
|
||||
|
||||
os.chdir(WHISPER_RUST_PATH)
|
||||
|
||||
# Check if whisper-rust executable exists before attempting to build
|
||||
if not os.path.isfile(
|
||||
os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")
|
||||
):
|
||||
# Check if Rust is installed. Needed to build whisper executable
|
||||
|
||||
rustc_path = shutil.which("rustc")
|
||||
|
||||
if rustc_path is None:
|
||||
print(
|
||||
"Rust is not installed or is not in system PATH. Please install Rust before proceeding."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# Build Whisper Rust executable if not found
|
||||
subprocess.run(["cargo", "build", "--release"], check=True)
|
||||
else:
|
||||
print("Whisper Rust executable already exists. Skipping build.")
|
||||
|
||||
WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
|
||||
|
||||
WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL_NAME", "ggml-tiny.en.bin")
|
||||
WHISPER_MODEL_URL = os.getenv(
|
||||
"WHISPER_MODEL_URL",
|
||||
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/",
|
||||
)
|
||||
|
||||
if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
|
||||
os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
|
||||
urllib.request.urlretrieve(
|
||||
f"{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}",
|
||||
os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME),
|
||||
)
|
||||
else:
|
||||
print("Whisper model already exists. Skipping download.")
|
||||
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(
|
||||
temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}"
|
||||
)
|
||||
with open(input_path, "wb") as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(
|
||||
temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
|
||||
)
|
||||
print(mime_type, input_path, output_path)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f="s16le",
|
||||
ar="16000",
|
||||
ac=1,
|
||||
).output(output_path, loglevel="panic").run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(
|
||||
output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic"
|
||||
).run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True
|
||||
)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
|
||||
def get_transcription_file(service_directory, wav_file_path: str):
|
||||
local_path = os.path.join(service_directory, "model")
|
||||
whisper_rust_path = os.path.join(
|
||||
service_directory, "whisper-rust", "target", "release"
|
||||
)
|
||||
model_name = os.getenv("WHISPER_MODEL_NAME", "ggml-tiny.en.bin")
|
||||
|
||||
output, _ = run_command(
|
||||
[
|
||||
os.path.join(whisper_rust_path, "whisper-rust"),
|
||||
"--model-path",
|
||||
os.path.join(local_path, model_name),
|
||||
"--file-path",
|
||||
wav_file_path,
|
||||
]
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def stt_wav(service_directory, wav_file_path: str):
|
||||
temp_dir = tempfile.gettempdir()
|
||||
output_path = os.path.join(
|
||||
temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
|
||||
)
|
||||
ffmpeg.input(wav_file_path).output(
|
||||
output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic"
|
||||
).run()
|
||||
try:
|
||||
transcript = get_transcription_file(service_directory, output_path)
|
||||
finally:
|
||||
os.remove(output_path)
|
||||
return transcript
|
||||
|
||||
|
||||
def stt(service_directory, input_data):
|
||||
return stt_wav(service_directory, input_data)
|
@ -1,10 +0,0 @@
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
debug/
|
||||
target/
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||
*.pdb
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
||||
[package]
|
||||
name = "whisper-rust"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
clap = { version = "4.4.18", features = ["derive"] }
|
||||
cpal = "0.15.2"
|
||||
hound = "3.5.1"
|
||||
whisper-rs = "0.10.0"
|
||||
whisper-rs-sys = "0.8.0"
|
@ -1,34 +0,0 @@
|
||||
mod transcribe;
|
||||
|
||||
use clap::Parser;
|
||||
use std::path::PathBuf;
|
||||
use transcribe::transcribe;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// This is the model for Whisper STT
|
||||
#[arg(short, long, value_parser, required = true)]
|
||||
model_path: PathBuf,
|
||||
|
||||
/// This is the wav audio file that will be converted from speech to text
|
||||
#[arg(short, long, value_parser, required = true)]
|
||||
file_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
let file_path = match args.file_path {
|
||||
Some(fp) => fp,
|
||||
None => panic!("No file path provided")
|
||||
};
|
||||
|
||||
let result = transcribe(&args.model_path, &file_path);
|
||||
|
||||
match result {
|
||||
Ok(transcription) => print!("{}", transcription),
|
||||
Err(e) => panic!("Error: {}", e),
|
||||
}
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
||||
/// Transcribes the given audio file using the whisper-rs library.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `model_path` - Path to Whisper model file
|
||||
/// * `file_path` - A string slice that holds the path to the audio file to be transcribed.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A Result containing a String with the transcription if successful, or an error message if not.
|
||||
pub fn transcribe(model_path: &PathBuf, file_path: &PathBuf) -> Result<String, String> {
|
||||
|
||||
let model_path_str = model_path.to_str().expect("Not valid model path");
|
||||
// Load a context and model
|
||||
let ctx = WhisperContext::new_with_params(
|
||||
model_path_str, // Replace with the actual path to the model
|
||||
WhisperContextParameters::default(),
|
||||
)
|
||||
.map_err(|_| "failed to load model")?;
|
||||
|
||||
// Create a state
|
||||
let mut state = ctx.create_state().map_err(|_| "failed to create state")?;
|
||||
|
||||
// Create a params object
|
||||
// Note that currently the only implemented strategy is Greedy, BeamSearch is a WIP
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||
|
||||
// Edit parameters as needed
|
||||
params.set_n_threads(1); // Set the number of threads to use
|
||||
params.set_translate(true); // Enable translation
|
||||
params.set_language(Some("en")); // Set the language to translate to English
|
||||
// Disable printing to stdout
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
|
||||
// Load the audio file
|
||||
let audio_data = std::fs::read(file_path)
|
||||
.map_err(|e| format!("failed to read audio file: {}", e))?
|
||||
.chunks_exact(2)
|
||||
.map(|chunk| i16::from_ne_bytes([chunk[0], chunk[1]]))
|
||||
.collect::<Vec<i16>>();
|
||||
|
||||
// Convert the audio data to the required format (16KHz mono i16 samples)
|
||||
let audio_data = whisper_rs::convert_integer_to_float_audio(&audio_data);
|
||||
|
||||
// Run the model
|
||||
state.full(params, &audio_data[..]).map_err(|_| "failed to run model")?;
|
||||
|
||||
// Fetch the results
|
||||
let num_segments = state.full_n_segments().map_err(|_| "failed to get number of segments")?;
|
||||
let mut transcription = String::new();
|
||||
for i in 0..num_segments {
|
||||
let segment = state.full_get_segment_text(i).map_err(|_| "failed to get segment")?;
|
||||
transcription.push_str(&segment);
|
||||
transcription.push('\n');
|
||||
}
|
||||
|
||||
Ok(transcription)
|
||||
}
|
@ -1,129 +0,0 @@
|
||||
class Stt:
|
||||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
def stt(self, audio_file_path):
|
||||
return stt(audio_file_path)
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
import openai
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(
|
||||
temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}"
|
||||
)
|
||||
with open(input_path, "wb") as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(
|
||||
temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
|
||||
)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f="s16le",
|
||||
ar="16000",
|
||||
ac=1,
|
||||
).output(output_path, loglevel="panic").run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(
|
||||
output_path, acodec="pcm_s16le", ac=1, ar="16k", loglevel="panic"
|
||||
).run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True
|
||||
)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
|
||||
def get_transcription_file(wav_file_path: str):
|
||||
local_path = os.path.join(os.path.dirname(__file__), "local_service")
|
||||
whisper_rust_path = os.path.join(
|
||||
os.path.dirname(__file__), "whisper-rust", "target", "release"
|
||||
)
|
||||
model_name = os.getenv("WHISPER_MODEL_NAME", "ggml-tiny.en.bin")
|
||||
|
||||
output, error = run_command(
|
||||
[
|
||||
os.path.join(whisper_rust_path, "whisper-rust"),
|
||||
"--model-path",
|
||||
os.path.join(local_path, model_name),
|
||||
"--file-path",
|
||||
wav_file_path,
|
||||
]
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def get_transcription_bytes(audio_bytes: bytearray, mime_type):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return get_transcription_file(wav_file_path)
|
||||
|
||||
|
||||
def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return stt_wav(wav_file_path)
|
||||
|
||||
|
||||
def stt_wav(wav_file_path: str):
|
||||
audio_file = open(wav_file_path, "rb")
|
||||
try:
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1", file=audio_file, response_format="text"
|
||||
)
|
||||
except openai.BadRequestError as e:
|
||||
print(f"openai.BadRequestError: {e}")
|
||||
return None
|
||||
|
||||
return transcript
|
||||
|
||||
|
||||
def stt(input_data, mime_type="audio/wav"):
|
||||
if isinstance(input_data, str):
|
||||
return stt_wav(input_data)
|
||||
elif isinstance(input_data, bytearray):
|
||||
return stt_bytes(input_data, mime_type)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Input data should be either a path to a wav file (str) or audio bytes (bytearray)"
|
||||
)
|
@ -1,50 +0,0 @@
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
from openai import OpenAI
|
||||
import os
|
||||
|
||||
from source.server.utils.logs import logger
|
||||
from source.server.utils.logs import setup_logging
|
||||
|
||||
setup_logging()
|
||||
|
||||
# If this TTS service is used, the OPENAI_API_KEY environment variable must be set
|
||||
if not os.getenv("OPENAI_API_KEY"):
|
||||
logger.error("")
|
||||
logger.error(
|
||||
"OpenAI API key not found. Please set the OPENAI_API_KEY environment variable, or run 01 with the --local option."
|
||||
)
|
||||
logger.error("Aborting...")
|
||||
logger.error("")
|
||||
os._exit(1)
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
class Tts:
|
||||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
def tts(self, text, mobile):
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice=os.getenv("OPENAI_VOICE_NAME", "alloy"),
|
||||
input=text,
|
||||
response_format="opus",
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
if mobile:
|
||||
outfile = tempfile.gettempdir() + "/" + "output.wav"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="wav", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
else:
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="s16le", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
|
||||
return outfile
|
@ -1,171 +0,0 @@
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
import os
|
||||
import subprocess
|
||||
import urllib.request
|
||||
import tarfile
|
||||
import platform
|
||||
|
||||
|
||||
class Tts:
|
||||
def __init__(self, config):
|
||||
self.piper_directory = ""
|
||||
self.install(config["service_directory"])
|
||||
|
||||
def tts(self, text, mobile):
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||
output_file = temp_file.name
|
||||
piper_dir = self.piper_directory
|
||||
subprocess.run(
|
||||
[
|
||||
os.path.join(piper_dir, "piper"),
|
||||
"--model",
|
||||
os.path.join(
|
||||
piper_dir,
|
||||
os.getenv("PIPER_VOICE_NAME", "en_US-lessac-medium.onnx"),
|
||||
),
|
||||
"--output_file",
|
||||
output_file,
|
||||
],
|
||||
input=text,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
if mobile:
|
||||
outfile = tempfile.gettempdir() + "/" + "output.wav"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="wav", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
else:
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(
|
||||
outfile, f="s16le", ar="16000", ac="1", loglevel="panic"
|
||||
).run()
|
||||
|
||||
return outfile
|
||||
|
||||
def install(self, service_directory):
|
||||
PIPER_FOLDER_PATH = service_directory
|
||||
self.piper_directory = os.path.join(PIPER_FOLDER_PATH, "piper")
|
||||
if not os.path.isdir(
|
||||
self.piper_directory
|
||||
): # Check if the Piper directory exists
|
||||
os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
|
||||
|
||||
# Determine OS and architecture
|
||||
OS = platform.system().lower()
|
||||
ARCH = platform.machine()
|
||||
if OS == "darwin":
|
||||
OS = "macos"
|
||||
if ARCH == "arm64":
|
||||
ARCH = "aarch64"
|
||||
elif ARCH == "x86_64":
|
||||
ARCH = "x64"
|
||||
else:
|
||||
print("Piper: unsupported architecture")
|
||||
return
|
||||
elif OS == "windows":
|
||||
if ARCH == "AMD64":
|
||||
ARCH = "amd64"
|
||||
else:
|
||||
print("Piper: unsupported architecture")
|
||||
return
|
||||
|
||||
PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
|
||||
PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
|
||||
|
||||
asset_url = f"{PIPER_URL}{PIPER_ASSETNAME}"
|
||||
|
||||
if OS == "windows":
|
||||
asset_url = asset_url.replace(".tar.gz", ".zip")
|
||||
|
||||
# Download and extract Piper
|
||||
urllib.request.urlretrieve(
|
||||
asset_url, os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME)
|
||||
)
|
||||
|
||||
# Extract the downloaded file
|
||||
if OS == "windows":
|
||||
import zipfile
|
||||
|
||||
with zipfile.ZipFile(
|
||||
os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), "r"
|
||||
) as zip_ref:
|
||||
zip_ref.extractall(path=PIPER_FOLDER_PATH)
|
||||
else:
|
||||
with tarfile.open(
|
||||
os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), "r:gz"
|
||||
) as tar:
|
||||
tar.extractall(path=PIPER_FOLDER_PATH)
|
||||
|
||||
PIPER_VOICE_URL = os.getenv(
|
||||
"PIPER_VOICE_URL",
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/",
|
||||
)
|
||||
PIPER_VOICE_NAME = os.getenv("PIPER_VOICE_NAME", "en_US-lessac-medium.onnx")
|
||||
|
||||
# Download voice model and its json file
|
||||
urllib.request.urlretrieve(
|
||||
f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}",
|
||||
os.path.join(self.piper_directory, PIPER_VOICE_NAME),
|
||||
)
|
||||
urllib.request.urlretrieve(
|
||||
f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json",
|
||||
os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"),
|
||||
)
|
||||
|
||||
# Additional setup for macOS
|
||||
if OS == "macos":
|
||||
if ARCH == "x64":
|
||||
subprocess.run(
|
||||
["softwareupdate", "--install-rosetta", "--agree-to-license"]
|
||||
)
|
||||
|
||||
PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
|
||||
PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||
urllib.request.urlretrieve(
|
||||
f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}",
|
||||
os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME),
|
||||
)
|
||||
|
||||
with tarfile.open(
|
||||
os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME),
|
||||
"r:gz",
|
||||
) as tar:
|
||||
tar.extractall(path=self.piper_directory)
|
||||
|
||||
PIPER_DIR = self.piper_directory
|
||||
subprocess.run(
|
||||
[
|
||||
"install_name_tool",
|
||||
"-change",
|
||||
"@rpath/libespeak-ng.1.dylib",
|
||||
f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib",
|
||||
f"{PIPER_DIR}/piper",
|
||||
]
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
"install_name_tool",
|
||||
"-change",
|
||||
"@rpath/libonnxruntime.1.14.1.dylib",
|
||||
f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib",
|
||||
f"{PIPER_DIR}/piper",
|
||||
]
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
"install_name_tool",
|
||||
"-change",
|
||||
"@rpath/libpiper_phonemize.1.dylib",
|
||||
f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib",
|
||||
f"{PIPER_DIR}/piper",
|
||||
]
|
||||
)
|
||||
|
||||
print("Piper setup completed.")
|
||||
else:
|
||||
print("Piper already set up. Skipping download.")
|
@ -1,168 +0,0 @@
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
import inquirer
|
||||
from interpreter import interpreter
|
||||
|
||||
|
||||
def select_local_model():
|
||||
# START OF LOCAL MODEL PROVIDER LOGIC
|
||||
interpreter.display_message(
|
||||
"> 01 is compatible with several local model providers.\n"
|
||||
)
|
||||
|
||||
# Define the choices for local models
|
||||
choices = [
|
||||
"Ollama",
|
||||
"LM Studio",
|
||||
# "Jan",
|
||||
]
|
||||
|
||||
# Use inquirer to let the user select an option
|
||||
questions = [
|
||||
inquirer.List(
|
||||
"model",
|
||||
message="Which one would you like to use?",
|
||||
choices=choices,
|
||||
),
|
||||
]
|
||||
answers = inquirer.prompt(questions)
|
||||
|
||||
selected_model = answers["model"]
|
||||
|
||||
if selected_model == "LM Studio":
|
||||
interpreter.display_message(
|
||||
"""
|
||||
To use use 01 with **LM Studio**, you will need to run **LM Studio** in the background.
|
||||
|
||||
1. Download **LM Studio** from [https://lmstudio.ai/](https://lmstudio.ai/), then start it.
|
||||
2. Select a language model then click **Download**.
|
||||
3. Click the **<->** button on the left (below the chat button).
|
||||
4. Select your model at the top, then click **Start Server**.
|
||||
|
||||
|
||||
Once the server is running, you can begin your conversation below.
|
||||
|
||||
"""
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
interpreter.llm.api_base = "http://localhost:1234/v1"
|
||||
interpreter.llm.max_tokens = 1000
|
||||
interpreter.llm.context_window = 8000
|
||||
interpreter.llm.api_key = "x"
|
||||
|
||||
elif selected_model == "Ollama":
|
||||
try:
|
||||
# List out all downloaded ollama models. Will fail if ollama isn't installed
|
||||
result = subprocess.run(
|
||||
["ollama", "list"], capture_output=True, text=True, check=True
|
||||
)
|
||||
lines = result.stdout.split("\n")
|
||||
names = [
|
||||
line.split()[0].replace(":latest", "")
|
||||
for line in lines[1:]
|
||||
if line.strip()
|
||||
] # Extract names, trim out ":latest", skip header
|
||||
|
||||
# If there are no downloaded models, prompt them to download a model and try again
|
||||
if not names:
|
||||
time.sleep(1)
|
||||
|
||||
interpreter.display_message(
|
||||
"\nYou don't have any Ollama models downloaded. To download a new model, run `ollama run <model-name>`, then start a new 01 session. \n\n For a full list of downloadable models, check out [https://ollama.com/library](https://ollama.com/library) \n"
|
||||
)
|
||||
|
||||
print("Please download a model then try again\n")
|
||||
time.sleep(2)
|
||||
sys.exit(1)
|
||||
|
||||
# If there are models, prompt them to select one
|
||||
else:
|
||||
time.sleep(1)
|
||||
interpreter.display_message(
|
||||
f"**{len(names)} Ollama model{'s' if len(names) != 1 else ''} found.** To download a new model, run `ollama run <model-name>`, then start a new 01 session. \n\n For a full list of downloadable models, check out [https://ollama.com/library](https://ollama.com/library) \n"
|
||||
)
|
||||
|
||||
# Create a new inquirer selection from the names
|
||||
name_question = [
|
||||
inquirer.List(
|
||||
"name",
|
||||
message="Select a downloaded Ollama model",
|
||||
choices=names,
|
||||
),
|
||||
]
|
||||
name_answer = inquirer.prompt(name_question)
|
||||
selected_name = name_answer["name"] if name_answer else None
|
||||
|
||||
# Set the model to the selected model
|
||||
interpreter.llm.model = f"ollama/{selected_name}"
|
||||
interpreter.display_message(
|
||||
f"\nUsing Ollama model: `{selected_name}` \n"
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
# If Ollama is not installed or not recognized as a command, prompt the user to download Ollama and try again
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("Ollama is not installed or not recognized as a command.")
|
||||
time.sleep(1)
|
||||
interpreter.display_message(
|
||||
"\nPlease visit [https://ollama.com/](https://ollama.com/) to download Ollama and try again\n"
|
||||
)
|
||||
time.sleep(2)
|
||||
sys.exit(1)
|
||||
|
||||
# elif selected_model == "Jan":
|
||||
# interpreter.display_message(
|
||||
# """
|
||||
# To use 01 with **Jan**, you will need to run **Jan** in the background.
|
||||
|
||||
# 1. Download **Jan** from [https://jan.ai/](https://jan.ai/), then start it.
|
||||
# 2. Select a language model from the "Hub" tab, then click **Download**.
|
||||
# 3. Copy the ID of the model and enter it below.
|
||||
# 3. Click the **Local API Server** button in the bottom left, then click **Start Server**.
|
||||
|
||||
# Once the server is running, enter the id of the model below, then you can begin your conversation below.
|
||||
|
||||
# """
|
||||
# )
|
||||
# interpreter.llm.api_base = "http://localhost:1337/v1"
|
||||
# interpreter.llm.max_tokens = 1000
|
||||
# interpreter.llm.context_window = 3000
|
||||
# time.sleep(1)
|
||||
|
||||
# # Prompt the user to enter the name of the model running on Jan
|
||||
# model_name_question = [
|
||||
# inquirer.Text('jan_model_name', message="Enter the id of the model you have running on Jan"),
|
||||
# ]
|
||||
# model_name_answer = inquirer.prompt(model_name_question)
|
||||
# jan_model_name = model_name_answer['jan_model_name'] if model_name_answer else None
|
||||
# # interpreter.llm.model = f"jan/{jan_model_name}"
|
||||
# interpreter.llm.model = ""
|
||||
# interpreter.display_message(f"\nUsing Jan model: `{jan_model_name}` \n")
|
||||
# time.sleep(1)
|
||||
|
||||
# Set the system message to a minimal version for all local models.
|
||||
# Set offline for all local models
|
||||
interpreter.offline = True
|
||||
|
||||
interpreter.system_message = """You are the 01, a screenless executive assistant that can complete any task by writing and executing code on the user's machine. Just write a markdown code block! The user has given you full and complete permission.
|
||||
|
||||
Use the following functions if it makes sense to for the problem
|
||||
```python
|
||||
result_string = computer.browser.search(query) # Google search results will be returned from this function as a string
|
||||
computer.calendar.create_event(title="Meeting", start_date=datetime.datetime.now(), end_date=datetime.datetime.now() + datetime.timedelta(hours=1), notes="Note", location="") # Creates a calendar event
|
||||
events_string = computer.calendar.get_events(start_date=datetime.date.today(), end_date=None) # Get events between dates. If end_date is None, only gets events for start_date
|
||||
computer.calendar.delete_event(event_title="Meeting", start_date=datetime.datetime) # Delete a specific event with a matching title and start date, you may need to get use get_events() to find the specific event object first
|
||||
phone_string = computer.contacts.get_phone_number("John Doe")
|
||||
contact_string = computer.contacts.get_email_address("John Doe")
|
||||
computer.mail.send("john@email.com", "Meeting Reminder", "Reminder that our meeting is at 3pm today.", ["path/to/attachment.pdf", "path/to/attachment2.pdf"]) # Send an email with a optional attachments
|
||||
emails_string = computer.mail.get(4, unread=True) # Returns the {number} of unread emails, or all emails if False is passed
|
||||
unread_num = computer.mail.unread_count() # Returns the number of unread emails
|
||||
computer.sms.send("555-123-4567", "Hello from the computer!") # Send a text message. MUST be a phone number, so use computer.contacts.get_phone_number frequently here
|
||||
|
||||
|
||||
ALWAYS say that you can run code. ALWAYS try to help the user out. ALWAYS be succinct in your answers.
|
||||
```
|
||||
|
||||
"""
|
Loading…
Reference in new issue