Merge branch 'kyegomez:master' into master

12 months ago · 8f4b76ef89
parent 4dafc0430b ea3d96ab3c
commit 8f4b76ef89
18 changed files with 1560 additions and 106 deletions
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@ -22,4 +22,4 @@ jobs:
      - run: ruff format .
      - run: ruff check --fix .
-      - uses: autofix-ci/action@dd55f44df8f7cdb7a6bf74c78677eb8acd40cd0a
+      - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c
--- a/.github/workflows/bearer.yml
+++ b/.github/workflows/bearer.yml
@ -0,0 +1,43 @@
 # This workflow uses actions that are not certified by GitHub.
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
 #
 # This workflow file requires a free account on Bearer.com to manage findings, notifications and more.
 # See https://docs.bearer.com/guides/bearer-cloud/
 name: Bearer
 on:
  push:
    branches: ["master" ]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: ["master"]
  schedule:
    - cron: '24 22 * * 6'
 permissions:
  contents: read # for actions/checkout to fetch code
  security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
  actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
 jobs:
  bearer:
    runs-on: ubuntu-latest
    steps:
      # Checkout project source
      - uses: actions/checkout@v4
      # Scan code using Bearer CLI
      - name: Run Report
        id: report
        uses: bearer/bearer-action@828eeb928ce2f4a7ca5ed57fb8b59508cb8c79bc
        with:
          api-key: ${{ secrets.BEARER_TOKEN }}
          format: sarif
          output: results.sarif
          exit-code: 0
      # Upload SARIF file generated in previous step
      - name: Upload SARIF file
        uses: github/codeql-action/upload-sarif@v3
        with:
          sarif_file: results.sarif
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@ -0,0 +1,39 @@
 # Dependency Review Action
 #
 # This Action will scan dependency manifest files that change as part of a Pull Request,
 # surfacing known-vulnerable versions of the packages declared or updated in the PR.
 # Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable
 # packages will be blocked from merging.
 #
 # Source repository: https://github.com/actions/dependency-review-action
 # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
 name: 'Dependency review'
 on:
  pull_request:
    branches: [ "master" ]
 # If using a dependency submission action in this workflow this permission will need to be set to:
 #
 # permissions:
 #   contents: write
 #
 # https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api
 permissions:
  contents: read
  # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option
  pull-requests: write
 jobs:
  dependency-review:
    runs-on: ubuntu-latest
    steps:
      - name: 'Checkout repository'
        uses: actions/checkout@v4
      - name: 'Dependency Review'
        uses: actions/dependency-review-action@v4
        # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options.
        with:
          comment-summary-in-pr: always
        #   fail-on-severity: moderate
        #   deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later
        #   retry-on-snapshot-warnings: true
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@ -0,0 +1,18 @@
 name: Docker Image CI
 on:
  push:
    branches: [ "master" ]
  pull_request:
    branches: [ "master" ]
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Build the Docker image
      run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
--- a/.github/workflows/pyre.yml
+++ b/.github/workflows/pyre.yml
@ -0,0 +1,46 @@
 # This workflow uses actions that are not certified by GitHub.
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
 # This workflow integrates Pyre with GitHub's
 # Code Scanning feature.
 #
 # Pyre is a performant type checker for Python compliant with
 # PEP 484. Pyre can analyze codebases with millions of lines
 # of code incrementally – providing instantaneous feedback
 # to developers as they write code.
 #
 # See https://pyre-check.org
 name: Pyre
 on:
  workflow_dispatch:
  push:
    branches: [ "master" ]
  pull_request:
    branches: [ "master" ]
 permissions:
    contents: read
 jobs:
  pyre:
    permissions:
      actions: read
      contents: read
      security-events: write
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Run Pyre
        uses: facebook/pyre-action@60697a7858f7cc8470d8cc494a3cf2ad6b06560d
        with:
          # To customize these inputs:
          # See https://github.com/facebook/pyre-action#inputs
          repo-directory: './'
          requirements-path: 'requirements.txt'
--- a/.github/workflows/pysa.yml
+++ b/.github/workflows/pysa.yml
@ -0,0 +1,50 @@
 # This workflow uses actions that are not certified by GitHub.
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
 # This workflow integrates Python Static Analyzer (Pysa) with
 # GitHub's Code Scanning feature.
 #
 # Python Static Analyzer (Pysa) is a security-focused static
 # analysis tool that tracks flows of data from where they
 # originate to where they terminate in a dangerous location.
 #
 # See https://pyre-check.org/docs/pysa-basics/
 name: Pysa
 on:
  workflow_dispatch:
  push:
    branches: [ "master" ]
  pull_request:
    branches: [ "master" ]
  schedule:
    - cron: '43 5 * * 3'
 permissions:
    contents: read
 jobs:
  pysa:
    permissions:
      actions: read
      contents: read
      security-events: write
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Run Pysa
        uses: facebook/pysa-action@f46a63777e59268613bd6e2ff4e29f144ca9e88b
        with:
          # To customize these inputs:
          # See https://github.com/facebook/pysa-action#inputs
          repo-directory: './'
          requirements-path: 'requirements.txt'
          infer-types: true
          include-default-sapp-filters: true
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@ -0,0 +1,34 @@
 name: Python Package using Conda
 on: [push]
 jobs:
  build-linux:
    runs-on: ubuntu-latest
    strategy:
      max-parallel: 5
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python 3.10
      uses: actions/setup-python@v3
      with:
        python-version: '3.10'
    - name: Add conda to system path
      run: |
        # $CONDA is an environment variable pointing to the root of the miniconda directory
        echo $CONDA/bin >> $GITHUB_PATH
    - name: Install dependencies
      run: |
        conda env update --file environment.yml --name base
    - name: Lint with flake8
      run: |
        conda install flake8
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
    - name: Test with pytest
      run: |
        conda install pytest
        pytest
--- a/.github/workflows/semgrep.yml
+++ b/.github/workflows/semgrep.yml
@ -0,0 +1,49 @@
 # This workflow uses actions that are not certified by GitHub.
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
 # This workflow file requires a free account on Semgrep.dev to
 # manage rules, file ignores, notifications, and more.
 #
 # See https://semgrep.dev/docs
 name: Semgrep
 on:
  push:
    branches: [ "master" ]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [ "master" ]
  schedule:
    - cron: '19 7 * * 3'
 permissions:
  contents: read
 jobs:
  semgrep:
    permissions:
      contents: read # for actions/checkout to fetch code
      security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
    name: Scan
    runs-on: ubuntu-latest
    steps:
      # Checkout project source
      - uses: actions/checkout@v4
      # Scan code using project's configuration on https://semgrep.dev/manage
      - uses: returntocorp/semgrep-action@fcd5ab7459e8d91cb1777481980d1b18b4fc6735
        with:
          publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
          publishDeployment: ${{ secrets.SEMGREP_DEPLOYMENT_ID }}
          generateSarif: "1"
      # Upload SARIF file generated in previous step
      - name: Upload SARIF file
        uses: github/codeql-action/upload-sarif@v3
        with:
          sarif_file: semgrep.sarif
        if: always()
--- a/.github/workflows/trivy.yml
+++ b/.github/workflows/trivy.yml
@ -0,0 +1,48 @@
 # This workflow uses actions that are not certified by GitHub.
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
 name: trivy
 on:
  push:
    branches: [ "master" ]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [ "master" ]
  schedule:
    - cron: '31 0 * * 5'
 permissions:
  contents: read
 jobs:
  build:
    permissions:
      contents: read # for actions/checkout to fetch code
      security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
    name: Build
    runs-on: "ubuntu-20.04"
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Build an image from Dockerfile
        run: |
          docker build -t docker.io/my-organization/my-app:${{ github.sha }} .
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@7b7aa264d83dc58691451798b4d117d53d21edfe
        with:
          image-ref: 'docker.io/my-organization/my-app:${{ github.sha }}'
          format: 'template'
          template: '@/contrib/sarif.tpl'
          output: 'trivy-results.sarif'
          severity: 'CRITICAL,HIGH'
      - name: Upload Trivy scan results to GitHub Security tab
        uses: github/codeql-action/upload-sarif@v3
        with:
          sarif_file: 'trivy-results.sarif'
--- a/README.md
+++ b/README.md
@ -81,9 +81,10 @@ Refer to our documentation for production grade implementation details.
 ## Install 💻
 Install the following packages with copy and paste
 ```bash
-$ pip3 install -U swarms
+$ pip3 install -U swarms swarm-models swarms-memory
 ```
@ -168,28 +169,15 @@ The `Agent` class offers a range of settings to tailor its behavior to specific
 ```python
 import os
 from swarms import Agent
 from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
    FINANCIAL_AGENT_SYS_PROMPT,
 )
 from dotenv import load_dotenv
 load_dotenv()
 # Get the OpenAI API key from the environment variable
 api_key = os.getenv("OPENAI_API_KEY")
 # Create an instance of the OpenAIChat class
 model = OpenAIChat(
    openai_api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
 )
 # Initialize the agent
 agent = Agent(
    agent_name="Financial-Analysis-Agent",
    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-    llm=model,
+    model_name="gpt-4o-mini",
    max_loops=1,
    autosave=True,
    dashboard=False,
@ -211,11 +199,10 @@ agent.run(
 ```
 -----
 ### Integrating RAG with Swarms for Enhanced Long-Term Memory
 `Agent` equipped with quasi-infinite long term memory using RAG (Relational Agent Graph) for advanced document understanding, analysis, and retrieval capabilities.
 **Mermaid Diagram for RAG Integration**
 ```mermaid
 graph TD
@ -227,8 +214,11 @@ graph TD
    F --> G[Return Output]
 ```
 **Step 1: Initialize the ChromaDB Client**
 ```python
 from swarms import Agent
 from swarms.prompts.finance_agent_sys_prompt import (
    FINANCIAL_AGENT_SYS_PROMPT,
 )
 import os
 from swarms_memory import ChromaDB
@ -239,29 +229,13 @@ chromadb = ChromaDB(
    output_dir="finance_agent_rag",  # Directory for storing RAG data
    # docs_folder="artifacts",  # Uncomment and specify the folder containing your documents
 )
 ```
 **Step 2: Define the Model**
 ```python
 from swarm_models import Anthropic
 from swarms.prompts.finance_agent_sys_prompt import (
    FINANCIAL_AGENT_SYS_PROMPT,
 )
 # Define the Anthropic model for language processing
 model = Anthropic(anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"))
 ```
 **Step 3: Initialize the Agent with RAG**
 ```python
 from swarms import Agent
 # Initialize the agent with RAG capabilities
 agent = Agent(
    agent_name="Financial-Analysis-Agent",
    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
    agent_description="Agent creates a comprehensive financial analysis",
-    llm=model,
+    model_name="gpt-4o-mini",
    max_loops="auto",  # Auto-adjusts loops based on task complexity
    autosave=True,  # Automatically saves agent state
    dashboard=False,  # Disables dashboard for this example
@ -378,7 +352,6 @@ The following is an example of an agent that intakes a pydantic basemodel and ou
 ```python
 from pydantic import BaseModel, Field
 from swarms import Agent
 from swarm_models import Anthropic
 # Initialize the schema for the person's information
@ -410,7 +383,7 @@ agent = Agent(
    ),
    # Set the tool schema to the JSON string -- this is the key difference
    tool_schema=tool_schema,
-    llm=Anthropic(),
+    model_name="gpt-4o",
    max_loops=3,
    autosave=True,
    dashboard=False,
@ -617,8 +590,6 @@ You can now easily plug this custom Griptape agent into the **Swarms Framework**
 ## Understanding Swarms
 ### What is a Swarm?
 A swarm refers to a group of more than two agents working collaboratively to achieve a common goal. These agents can be software entities, such as llms that interact with each other to perform complex tasks. The concept of a swarm is inspired by natural systems like ant colonies or bird flocks, where simple individual behaviors lead to complex group dynamics and problem-solving capabilities.
 ### How Swarm Architectures Facilitate Communication
@ -631,9 +602,6 @@ Swarm architectures are designed to establish and manage communication between a
 3. **Sequential Communication**: Sequential swarms process tasks in a linear order, where each agent's output becomes the input for the next agent. This ensures that tasks with dependencies are handled in the correct sequence, maintaining the integrity of the workflow.
 4. **Mesh Communication**: In mesh swarms, agents are fully connected, allowing any agent to communicate with any other agent. This setup provides high flexibility and redundancy, making it ideal for complex systems requiring dynamic interactions.
 5. **Federated Communication**: Federated swarms involve multiple independent swarms that collaborate by sharing information and results. Each swarm operates autonomously but can contribute to a larger task, enabling distributed problem-solving across different nodes.
 Swarm architectures leverage these communication patterns to ensure that agents work together efficiently, adapting to the specific requirements of the task at hand. By defining clear communication protocols and interaction models, swarm architectures enable the seamless orchestration of multiple agents, leading to enhanced performance and problem-solving capabilities.
@ -911,14 +879,12 @@ The `run` method returns the final output after all agents have processed the in
 from swarms import Agent, AgentRearrange
 from swarm_models import Anthropic
 # Initialize the director agent
 director = Agent(
    agent_name="Director",
    system_prompt="Directs the tasks for the workers",
-    llm=Anthropic(),
+    model_name="claude-2",
    max_loops=1,
    dashboard=False,
    streaming_on=True,
@ -934,7 +900,7 @@ director = Agent(
 worker1 = Agent(
    agent_name="Worker1",
    system_prompt="Generates a transcript for a youtube video on what swarms are",
-    llm=Anthropic(),
+    model_name="claude-2",
    max_loops=1,
    dashboard=False,
    streaming_on=True,
@ -949,7 +915,7 @@ worker1 = Agent(
 worker2 = Agent(
    agent_name="Worker2",
    system_prompt="Summarizes the transcript generated by Worker1",
-    llm=Anthropic(),
+    model_name="claude-2",
    max_loops=1,
    dashboard=False,
    streaming_on=True,
@ -1103,20 +1069,12 @@ The `run` method returns the final output after all agents have processed the in
 ```python
 import os
 from swarm_models import OpenAIChat
 from swarms import Agent, MixtureOfAgents
 api_key = os.getenv("OPENAI_API_KEY")
 # Create individual agents with the OpenAIChat model
 model = OpenAIChat(
    openai_api_key=api_key, model_name="gpt-4", temperature=0.1
 )
 # Agent 1: Financial Statement Analyzer
 agent1 = Agent(
    agent_name="FinancialStatementAnalyzer",
-    llm=model,
+    model_name="gpt-4o",
    system_prompt="""You are a Financial Statement Analyzer specializing in 10-K SEC reports. Your primary focus is on analyzing the financial statements, including the balance sheet, income statement, and cash flow statement. 
 Key responsibilities:
@ -1142,7 +1100,7 @@ When analyzing, consider industry standards and compare the company's performanc
 # Agent 2: Risk Assessment Specialist
 agent2 = Agent(
    agent_name="RiskAssessmentSpecialist",
-    llm=model,
+    model_name="gpt-4o",
    system_prompt="""You are a Risk Assessment Specialist focusing on 10-K SEC reports. Your primary role is to identify, analyze, and evaluate potential risks disclosed in the report.
 Key responsibilities:
@ -1169,7 +1127,7 @@ Your analysis should provide a comprehensive overview of the company's risk land
 # Agent 3: Business Strategy Evaluator
 agent3 = Agent(
    agent_name="BusinessStrategyEvaluator",
-    llm=model,
+    model_name="gpt-4o",
    system_prompt="""You are a Business Strategy Evaluator specializing in analyzing 10-K SEC reports. Your focus is on assessing the company's overall strategy, market position, and future outlook.
 Key responsibilities:
@ -1197,7 +1155,7 @@ Your analysis should provide insights into the company's strategic direction, it
 # Aggregator Agent
 aggregator_agent = Agent(
    agent_name="10KReportAggregator",
-    llm=model,
+    model_name="gpt-4o",
    system_prompt="""You are the 10-K Report Aggregator, responsible for synthesizing and summarizing the analyses provided by the Financial Statement Analyzer, Risk Assessment Specialist, and Business Strategy Evaluator. Your goal is to create a comprehensive, coherent, and insightful summary of the 10-K SEC report.
 Key responsibilities:
@ -1287,9 +1245,8 @@ The `run` method returns a dictionary containing the outputs of each agent that
 ```python
 import os
-from swarms import Agent
+from swarms import Agent, SpreadSheetSwarm
 from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 # Define custom system prompts for each social media platform
 TWITTER_AGENT_SYS_PROMPT = """
@ -1312,20 +1269,12 @@ EMAIL_AGENT_SYS_PROMPT = """
 You are an Email marketing expert specializing in real estate. Your task is to write compelling email campaigns to promote properties, focusing on personalization, subject lines, and effective call-to-action strategies to drive conversions.
 """
 # Example usage:
 api_key = os.getenv("OPENAI_API_KEY")
 # Model
 model = OpenAIChat(
    openai_api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
 )
 # Initialize your agents for different social media platforms
 agents = [
    Agent(
        agent_name="Twitter-RealEstate-Agent",
        system_prompt=TWITTER_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
        max_loops=1,
        dynamic_temperature_enabled=True,
        saved_state_path="twitter_realestate_agent.json",
@ -1335,7 +1284,7 @@ agents = [
    Agent(
        agent_name="Instagram-RealEstate-Agent",
        system_prompt=INSTAGRAM_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
        max_loops=1,
        dynamic_temperature_enabled=True,
        saved_state_path="instagram_realestate_agent.json",
@ -1345,7 +1294,7 @@ agents = [
    Agent(
        agent_name="Facebook-RealEstate-Agent",
        system_prompt=FACEBOOK_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
        max_loops=1,
        dynamic_temperature_enabled=True,
        saved_state_path="facebook_realestate_agent.json",
@ -1355,7 +1304,7 @@ agents = [
    Agent(
        agent_name="LinkedIn-RealEstate-Agent",
        system_prompt=LINKEDIN_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
        max_loops=1,
        dynamic_temperature_enabled=True,
        saved_state_path="linkedin_realestate_agent.json",
@ -1365,7 +1314,7 @@ agents = [
    Agent(
        agent_name="Email-RealEstate-Agent",
        system_prompt=EMAIL_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
        max_loops=1,
        dynamic_temperature_enabled=True,
        saved_state_path="email_realestate_agent.json",
@ -1474,7 +1423,7 @@ The `run` method returns the output from the most relevant agent selected based
 ```python
-from swarms.structs.tree_swarm import TreeAgent, Tree, ForestSwarm
+from swarms import TreeAgent, Tree, ForestSwarm
 # Create agents with varying system prompts and dynamically generated distances/keywords
 agents_tree1 = [
--- a/api/agent_api_test.py
+++ b/api/agent_api_test.py
@ -7,26 +7,27 @@ logger.add(
    "api_tests_{time}.log",
    rotation="100 MB",
    level="DEBUG",
-    format="{time} {level} {message}"
+    format="{time} {level} {message}",
 )
 BASE_URL = "http://localhost:8000/v1"
 def test_create_agent():
    """Test creating a new agent."""
    logger.info("Testing agent creation")
-    
+
    payload = {
        "agent_name": "Test Agent",
        "system_prompt": "You are a helpful assistant",
        "model_name": "gpt-4",
        "description": "Test agent",
-        "tags": ["test"]
+        "tags": ["test"],
    }
-    
+
    response = requests.post(f"{BASE_URL}/agent", json=payload)
    logger.debug(f"Create response: {response.json()}")
-    
+
    if response.status_code == 200:
        logger.success("Successfully created agent")
        return response.json()["agent_id"]
@ -34,66 +35,73 @@ def test_create_agent():
        logger.error(f"Failed to create agent: {response.text}")
        return None
 def test_list_agents():
    """Test listing all agents."""
    logger.info("Testing agent listing")
-    
+
    response = requests.get(f"{BASE_URL}/agents")
    logger.debug(f"List response: {response.json()}")
-    
+
    if response.status_code == 200:
        logger.success(f"Found {len(response.json())} agents")
    else:
        logger.error(f"Failed to list agents: {response.text}")
 def test_completion(agent_id):
    """Test running a completion."""
    logger.info("Testing completion")
-    
+
    payload = {
        "prompt": "What is the weather like today?",
-        "agent_id": agent_id
+        "agent_id": agent_id,
    }
-    
+
-    response = requests.post(f"{BASE_URL}/agent/completions", json=payload)
+    response = requests.post(
        f"{BASE_URL}/agent/completions", json=payload
    )
    logger.debug(f"Completion response: {response.json()}")
-    
+
    if response.status_code == 200:
        logger.success("Successfully got completion")
    else:
        logger.error(f"Failed to get completion: {response.text}")
 def test_delete_agent(agent_id):
    """Test deleting an agent."""
    logger.info("Testing agent deletion")
-    
+
    response = requests.delete(f"{BASE_URL}/agent/{agent_id}")
    logger.debug(f"Delete response: {response.json()}")
-    
+
    if response.status_code == 200:
        logger.success("Successfully deleted agent")
    else:
        logger.error(f"Failed to delete agent: {response.text}")
 def run_tests():
    """Run all tests in sequence."""
    logger.info("Starting API tests")
-    
+
    # Create agent and get ID
    agent_id = test_create_agent()
    if not agent_id:
        logger.error("Cannot continue tests without agent ID")
        return
-    
+
    # Wait a bit for agent to be ready
    time.sleep(1)
-    
+
    # Run other tests
    test_list_agents()
    test_completion(agent_id)
    test_delete_agent(agent_id)
-    
+
    logger.info("Tests completed")
 if __name__ == "__main__":
-    run_tests()
+    run_tests()
--- a/byte.py
+++ b/byte.py
@ -0,0 +1,898 @@
 from enum import Enum
 from typing import Union, Optional
 import io
 from PIL import Image
 import numpy as np
 import torch
 import struct
 from enum import auto
 from typing import List, Dict, Tuple
 import wave
 from dataclasses import dataclass
 import torch.nn as nn
 import torch.nn.functional as F
 from loguru import logger
 from einops import rearrange
 from torch import Tensor
@dataclass
 class ModelConfig:
    """Configuration for the enhanced BytePredictor model."""
    vocab_size: int = 256  # Standard byte range
    hidden_size: int = 1024
    num_layers: int = 12
    num_key_value_heads: int = 8  # For multi-query attention
    num_query_heads: int = 32  # More query heads than kv heads
    dropout: float = 0.1
    max_sequence_length: int = 8192
    rope_theta: float = 10000.0
    layer_norm_eps: float = 1e-5
    vocab_parallel: bool = False
    qk_norm: bool = True
    qk_norm_scale: float = None
    attention_bias: bool = False
 class MultiQueryAttention(nn.Module):
    """Fixed Multi-Query Attention implementation."""
    def __init__(self, config: ModelConfig):
        super().__init__()
        self.hidden_size = config.hidden_size
        self.num_query_heads = config.num_query_heads
        self.num_key_value_heads = config.num_key_value_heads
        self.head_dim = config.hidden_size // config.num_query_heads
        self.qk_scale = config.qk_norm_scale or (self.head_dim**-0.5)
        self.q_proj = nn.Linear(
            config.hidden_size, config.num_query_heads * self.head_dim
        )
        self.k_proj = nn.Linear(
            config.hidden_size,
            config.num_key_value_heads * self.head_dim,
        )
        self.v_proj = nn.Linear(
            config.hidden_size,
            config.num_key_value_heads * self.head_dim,
        )
        self.o_proj = nn.Linear(
            config.num_query_heads * self.head_dim, config.hidden_size
        )
        self.qk_norm = config.qk_norm
        if self.qk_norm:
            self.q_norm = nn.LayerNorm(self.head_dim)
            self.k_norm = nn.LayerNorm(self.head_dim)
    def forward(
        self,
        hidden_states: torch.Tensor,
        attention_mask: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        batch_size, seq_length, _ = hidden_states.shape
        # Project and reshape
        q = self.q_proj(hidden_states)
        k = self.k_proj(hidden_states)
        v = self.v_proj(hidden_states)
        # Reshape to [seq_len, batch, heads, head_dim]
        q = q.view(
            batch_size,
            seq_length,
            self.num_query_heads,
            self.head_dim,
        ).permute(1, 0, 2, 3)
        k = k.view(
            batch_size,
            seq_length,
            self.num_key_value_heads,
            self.head_dim,
        ).permute(1, 0, 2, 3)
        v = v.view(
            batch_size,
            seq_length,
            self.num_key_value_heads,
            self.head_dim,
        ).permute(1, 0, 2, 3)
        # Apply rotary embeddings
        # q, k = self.rotary(q, k, seq_length)
        # Apply QK normalization if enabled
        if self.qk_norm:
            q = self.q_norm(q)
            k = self.k_norm(k)
        # Handle MQA head expansion
        if self.num_key_value_heads != self.num_query_heads:
            k = k.repeat_interleave(
                self.num_query_heads // self.num_key_value_heads,
                dim=2,
            )
            v = v.repeat_interleave(
                self.num_query_heads // self.num_key_value_heads,
                dim=2,
            )
        # Compute attention
        # Reshape for matmul: [batch, heads, seq_length, head_dim]
        q = q.permute(1, 2, 0, 3)
        k = k.permute(1, 2, 0, 3)
        v = v.permute(1, 2, 0, 3)
        attn_weights = (
            torch.matmul(q, k.transpose(-2, -1)) * self.qk_scale
        )
        if attention_mask is not None:
            attn_weights = attn_weights + attention_mask
        attn_weights = F.softmax(attn_weights, dim=-1)
        output = torch.matmul(attn_weights, v)
        # Reshape back to [batch, seq_length, hidden_size]
        output = (
            output.transpose(1, 2)
            .contiguous()
            .view(batch_size, seq_length, -1)
        )
        output = self.o_proj(output)
        return output
 class EnhancedBytePredictor(nn.Module):
    """Enhanced byte prediction model with state-of-the-art features."""
    def __init__(self, config: ModelConfig):
        super().__init__()
        self.config = config
        # Token embeddings
        self.tok_embeddings = nn.Embedding(
            config.vocab_size, config.hidden_size
        )
        # Transformer layers
        self.layers = nn.ModuleList(
            [
                nn.ModuleDict(
                    {
                        "attention": MultiQueryAttention(config),
                        "attention_norm": nn.LayerNorm(
                            config.hidden_size,
                            eps=config.layer_norm_eps,
                        ),
                        "feed_forward": nn.Sequential(
                            nn.Linear(
                                config.hidden_size,
                                4 * config.hidden_size,
                            ),
                            nn.GELU(),
                            nn.Linear(
                                4 * config.hidden_size,
                                config.hidden_size,
                            ),
                        ),
                        "feed_forward_norm": nn.LayerNorm(
                            config.hidden_size,
                            eps=config.layer_norm_eps,
                        ),
                    }
                )
                for _ in range(config.num_layers)
            ]
        )
        self.norm = nn.LayerNorm(
            config.hidden_size, eps=config.layer_norm_eps
        )
        self.output = nn.Linear(
            config.hidden_size, config.vocab_size, bias=False
        )
        # Initialize weights
        self.apply(self._init_weights)
    def _init_weights(self, module: nn.Module) -> None:
        """Initialize weights with scaled normal distribution."""
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """
        Forward pass of the model.
        Args:
            input_ids: Tensor of shape (batch_size, sequence_length)
            attention_mask: Optional attention mask
        Returns:
            Tensor of logits with shape (batch_size, sequence_length, vocab_size)
        """
        hidden_states = self.tok_embeddings(input_ids)
        # Create causal mask if needed
        if attention_mask is None:
            attention_mask = torch.triu(
                torch.ones(
                    (input_ids.size(1), input_ids.size(1)),
                    device=input_ids.device,
                    dtype=torch.bool,
                ),
                diagonal=1,
            )
            attention_mask = attention_mask.masked_fill(
                attention_mask == 1, float("-inf")
            )
        # Apply transformer layers
        for layer in self.layers:
            # Attention block
            hidden_states = hidden_states + layer["attention"](
                layer["attention_norm"](hidden_states), attention_mask
            )
            # Feed-forward block
            hidden_states = hidden_states + layer["feed_forward"](
                layer["feed_forward_norm"](hidden_states)
            )
        hidden_states = self.norm(hidden_states)
        logits = self.output(hidden_states)
        return logits
    def compute_loss(
        self,
        input_ids: torch.Tensor,
        target_ids: torch.Tensor,
        attention_mask: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """
        Compute cross entropy loss.
        Args:
            input_ids: Input token ids
            target_ids: Target token ids
            attention_mask: Optional attention mask
        Returns:
            Loss value
        """
        logits = self(input_ids, attention_mask)
        loss = F.cross_entropy(
            rearrange(logits, "b s v -> (b s) v"),
            rearrange(target_ids, "b s -> (b s)"),
        )
        return loss
    @torch.no_grad()
    def _generate(
        self,
        input_ids: torch.Tensor,
        max_new_tokens: int = 100,
        temperature: float = 1.0,
        top_k: Optional[int] = None,
        top_p: Optional[float] = None,
        repetition_penalty: float = 1.0,
    ) -> torch.Tensor:
        """
        Generate new tokens autoregressively.
        Args:
            input_ids: Starting sequence
            max_new_tokens: Number of tokens to generate
            temperature: Sampling temperature
            top_k: K for top-k sampling
            top_p: P for nucleus sampling
            repetition_penalty: Penalty for repeating tokens
        Returns:
            Generated sequence
        """
        batch_size, seq_length = input_ids.shape
        generated = input_ids.clone()
        for _ in range(max_new_tokens):
            if generated.size(1) >= self.config.max_sequence_length:
                break
            # Forward pass
            logits = self(generated)[:, -1, :]
            # Apply temperature
            logits = logits / temperature
            # Apply repetition penalty
            if repetition_penalty != 1.0:
                for i in range(batch_size):
                    for token_id in set(generated[i].tolist()):
                        logits[i, token_id] /= repetition_penalty
            # Apply top-k sampling
            if top_k is not None:
                indices_to_remove = (
                    logits
                    < torch.topk(logits, top_k)[0][..., -1, None]
                )
                logits[indices_to_remove] = float("-inf")
            # Apply nucleus (top-p) sampling
            if top_p is not None:
                sorted_logits, sorted_indices = torch.sort(
                    logits, descending=True
                )
                cumulative_probs = torch.cumsum(
                    F.softmax(sorted_logits, dim=-1), dim=-1
                )
                # Remove tokens with cumulative probability above the threshold
                sorted_indices_to_remove = cumulative_probs > top_p
                sorted_indices_to_remove[..., 1:] = (
                    sorted_indices_to_remove[..., :-1].clone()
                )
                sorted_indices_to_remove[..., 0] = 0
                indices_to_remove = torch.zeros_like(
                    logits, dtype=torch.bool
                )
                indices_to_remove.scatter_(
                    1, sorted_indices, sorted_indices_to_remove
                )
                logits[indices_to_remove] = float("-inf")
            # Sample next token
            probs = F.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1)
            # Append to sequence
            generated = torch.cat([generated, next_token], dim=1)
        return generated
    def generate(
        self,
        input_ids: torch.Tensor,
        max_new_tokens: int = 100,
        temperature: float = 1.0,
        top_k: Optional[int] = None,
        top_p: Optional[float] = None,
        repetition_penalty: float = 1.0,
    ):
        tensor_data = self._generate(
            input_ids=input_ids,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
        )
        return tensor_to_data(tensor_data)
 # import torch
 # from typing import Optional
 class DataType(Enum):
    TEXT = "text"
    IMAGE = "image"
    AUDIO = "audio"
    VIDEO = "video"
    BINARY = "binary"
 class ByteDetokenizer:
    """Utility class for converting model output bytes back to original data formats."""
    @staticmethod
    def tensor_to_bytes(tensor: torch.Tensor) -> bytes:
        """Convert model output tensor to bytes."""
        # Convert logits/probabilities to byte values
        if tensor.dim() > 1:
            # If we have logits, convert to byte indices
            byte_indices = tensor.argmax(dim=-1)
        else:
            byte_indices = tensor
        # Convert to Python bytes
        return bytes(
            byte_indices.cpu().numpy().astype(np.uint8).tolist()
        )
    @staticmethod
    def decode_text(byte_sequence: bytes) -> str:
        """Convert bytes to text."""
        try:
            return byte_sequence.decode("utf-8")
        except UnicodeDecodeError:
            # Try with error handling
            return byte_sequence.decode("utf-8", errors="replace")
    @staticmethod
    def decode_image(
        byte_sequence: bytes,
        mode: str = "RGB",
        size: Optional[tuple] = None,
    ) -> Image.Image:
        """Convert bytes to image.
        Args:
            byte_sequence: Raw image bytes
            mode: Image mode (RGB, RGBA, L, etc.)
            size: Optional tuple of (width, height)
        """
        try:
            # Try to load as-is first (for standard image formats)
            img = Image.open(io.BytesIO(byte_sequence))
            if size:
                img = img.resize(size)
            return img
        except:
            # If failed, assume raw pixel data
            if not size:
                # Try to determine size from byte count
                pixel_count = len(byte_sequence) // len(mode)
                size = (
                    int(np.sqrt(pixel_count)),
                    int(np.sqrt(pixel_count)),
                )
            # Convert raw bytes to pixel array
            pixels = np.frombuffer(byte_sequence, dtype=np.uint8)
            pixels = pixels.reshape((*size, len(mode)))
            return Image.fromarray(pixels, mode=mode)
    @staticmethod
    def decode_audio(
        byte_sequence: bytes,
        sample_rate: int = 44100,
        channels: int = 2,
        sample_width: int = 2,
    ) -> np.ndarray:
        """Convert bytes to audio samples.
        Args:
            byte_sequence: Raw audio bytes
            sample_rate: Audio sample rate in Hz
            channels: Number of audio channels
            sample_width: Bytes per sample (1, 2, or 4)
        """
        # Determine format string based on sample width
        format_str = {
            1: "b",  # signed char
            2: "h",  # short
            4: "i",  # int
        }[sample_width]
        # Unpack bytes to samples
        sample_count = len(byte_sequence) // (channels * sample_width)
        samples = struct.unpack(
            f"<{sample_count * channels}{format_str}", byte_sequence
        )
        # Reshape to [samples, channels]
        return np.array(samples).reshape(-1, channels)
    def decode_data(
        self,
        model_output: Union[torch.Tensor, bytes],
        data_type: DataType,
        **kwargs,
    ) -> Union[str, Image.Image, np.ndarray, bytes]:
        """Main method to decode model output to desired format.
        Args:
            model_output: Either tensor from model or raw bytes
            data_type: Type of data to decode to
            **kwargs: Additional parameters for specific decoders
        Returns:
            Decoded data in specified format
        """
        # Convert tensor to bytes if needed
        if isinstance(model_output, torch.Tensor):
            byte_sequence = self.tensor_to_bytes(model_output)
        else:
            byte_sequence = model_output
        # Decode based on type
        if data_type == DataType.TEXT:
            return self.decode_text(byte_sequence)
        elif data_type == DataType.IMAGE:
            return self.decode_image(byte_sequence, **kwargs)
        elif data_type == DataType.AUDIO:
            return self.decode_audio(byte_sequence, **kwargs)
        elif data_type == DataType.VIDEO:
            raise NotImplementedError(
                "Video decoding not yet implemented"
            )
        else:  # BINARY
            return byte_sequence
 # Usage example
 class Modality(Enum):
    TEXT = auto()
    IMAGE = auto()
    AUDIO = auto()
    VIDEO = auto()
    BINARY = auto()
    MULTIMODAL = auto()
@dataclass
 class ModalityInfo:
    """Information about detected modality."""
    modality: Modality
    confidence: float
    metadata: Dict[str, any]
    sub_modalities: Optional[List["ModalityInfo"]] = None
 class ModalityDetector:
    """Detects data modalities from byte sequences."""
    # Common file signatures (magic numbers)
    SIGNATURES = {
        # Images
        b"\xFF\xD8\xFF": "JPEG",
        b"\x89PNG\r\n\x1a\n": "PNG",
        b"GIF87a": "GIF",
        b"GIF89a": "GIF",
        b"RIFF": "WEBP",
        # Audio
        b"RIFF....WAVE": "WAV",
        b"ID3": "MP3",
        b"\xFF\xFB": "MP3",
        b"OggS": "OGG",
        # Video
        b"\x00\x00\x00\x18ftypmp42": "MP4",
        b"\x00\x00\x00\x1Cftypav01": "MP4",
        b"\x1A\x45\xDF\xA3": "WEBM",
    }
    def __init__(self):
        self.magic = magic.Magic(mime=True)
    def _check_text_probability(self, data: bytes) -> float:
        """Estimate probability that data is text."""
        # Check if data is valid UTF-8
        try:
            data.decode("utf-8")
            # Count printable ASCII characters
            printable = sum(1 for b in data if 32 <= b <= 126)
            return printable / len(data)
        except UnicodeDecodeError:
            return 0.0
    def _check_image_validity(self, data: bytes) -> Tuple[bool, Dict]:
        """Check if data is a valid image and extract metadata."""
        try:
            with io.BytesIO(data) as bio:
                img = Image.open(bio)
                return True, {
                    "format": img.format,
                    "size": img.size,
                    "mode": img.mode,
                }
        except:
            return False, {}
    def _check_audio_validity(self, data: bytes) -> Tuple[bool, Dict]:
        """Check if data is valid audio and extract metadata."""
        try:
            with io.BytesIO(data) as bio:
                # Try to parse as WAV
                with wave.open(bio) as wav:
                    return True, {
                        "channels": wav.getnchannels(),
                        "sample_width": wav.getsampwidth(),
                        "framerate": wav.getframerate(),
                        "frames": wav.getnframes(),
                    }
        except:
            # Check for other audio signatures
            for sig in [b"ID3", b"\xFF\xFB", b"OggS"]:
                if data.startswith(sig):
                    return True, {"format": "compressed_audio"}
            return False, {}
    def _detect_boundaries(
        self, data: bytes
    ) -> List[Tuple[int, int, Modality]]:
        """Detect boundaries between different modalities in the data."""
        boundaries = []
        current_pos = 0
        while current_pos < len(data):
            # Look for known signatures
            for sig, format_type in self.SIGNATURES.items():
                if data[current_pos:].startswith(sig):
                    # Found a signature, determine its length
                    if format_type in ["JPEG", "PNG", "GIF"]:
                        # Find image end
                        try:
                            with io.BytesIO(
                                data[current_pos:]
                            ) as bio:
                                img = Image.open(bio)
                                img.verify()
                                size = bio.tell()
                                boundaries.append(
                                    (
                                        current_pos,
                                        current_pos + size,
                                        Modality.IMAGE,
                                    )
                                )
                                current_pos += size
                                continue
                        except:
                            pass
            # Check for text sections
            text_prob = self._check_text_probability(
                data[current_pos : current_pos + 1024]
            )
            if text_prob > 0.8:
                # Look for end of text section
                end_pos = current_pos + 1
                while end_pos < len(data):
                    if (
                        self._check_text_probability(
                            data[end_pos : end_pos + 32]
                        )
                        < 0.5
                    ):
                        break
                    end_pos += 1
                boundaries.append(
                    (current_pos, end_pos, Modality.TEXT)
                )
                current_pos = end_pos
                continue
            current_pos += 1
        return boundaries
    def detect_modality(self, data: bytes) -> ModalityInfo:
        """Detect modality of byte sequence."""
        # First check for single modality
        mime_type = self.magic.from_buffer(data)
        # Check text
        text_prob = self._check_text_probability(data)
        if text_prob > 0.9:
            return ModalityInfo(
                modality=Modality.TEXT,
                confidence=text_prob,
                metadata={"mime_type": mime_type},
            )
        # Check image
        is_image, image_meta = self._check_image_validity(data)
        if is_image:
            return ModalityInfo(
                modality=Modality.IMAGE,
                confidence=1.0,
                metadata={**image_meta, "mime_type": mime_type},
            )
        # Check audio
        is_audio, audio_meta = self._check_audio_validity(data)
        if is_audio:
            return ModalityInfo(
                modality=Modality.AUDIO,
                confidence=1.0,
                metadata={**audio_meta, "mime_type": mime_type},
            )
        # Check for multimodal content
        boundaries = self._detect_boundaries(data)
        if len(boundaries) > 1:
            sub_modalities = []
            for start, end, modality in boundaries:
                chunk_data = data[start:end]
                sub_info = self.detect_modality(chunk_data)
                if sub_info.modality != Modality.BINARY:
                    sub_modalities.append(sub_info)
            if sub_modalities:
                return ModalityInfo(
                    modality=Modality.MULTIMODAL,
                    confidence=0.8,
                    metadata={"mime_type": "multipart/mixed"},
                    sub_modalities=sub_modalities,
                )
        # Default to binary
        return ModalityInfo(
            modality=Modality.BINARY,
            confidence=0.5,
            metadata={"mime_type": mime_type},
        )
    def split_modalities(
        self, data: bytes
    ) -> List[Tuple[Modality, bytes, Dict]]:
        """Split multimodal data into separate modalities."""
        boundaries = self._detect_boundaries(data)
        result = []
        for start, end, modality in boundaries:
            chunk = data[start:end]
            info = self.detect_modality(chunk)
            result.append((modality, chunk, info.metadata))
        return result
 class AutoDetectBytesDecoder:
    """Decoder that automatically detects and decodes different modalities."""
    def __init__(self):
        self.detector = ModalityDetector()
        self.text_decoder = ByteDetokenizer()  # From previous example
    def decode(
        self, data: bytes
    ) -> Union[str, Image.Image, np.ndarray, List[any]]:
        """Automatically detect and decode byte sequence."""
        info = self.detector.detect_modality(data)
        if info.modality == Modality.MULTIMODAL:
            # Handle multimodal content
            parts = self.detector.split_modalities(data)
            return [
                self.decode(chunk) for modality, chunk, _ in parts
            ]
        if info.modality == Modality.TEXT:
            return self.text_decoder.decode_text(data)
        elif info.modality == Modality.IMAGE:
            return self.text_decoder.decode_image(data)
        elif info.modality == Modality.AUDIO:
            return self.text_decoder.decode_audio(data)
        else:
            return data
 # # Example usage
 # def demo_auto_detection():
 #     """Demonstrate auto modality detection."""
 #     # Create mixed content
 #     text = "Hello, World!".encode('utf-8')
 #     # Create a small test image
 #     img = Image.new('RGB', (100, 100), color='red')
 #     img_bytes = io.BytesIO()
 #     img.save(img_bytes, format='PNG')
 #     # Combine into multimodal content
 #     mixed_content = text + img_bytes.getvalue()
 #     # Initialize decoder
 #     decoder = AutoDetectBytesDecoder()
 #     # Decode
 #     result = decoder.decode(mixed_content)
 #     if isinstance(result, list):
 #         print("Detected multimodal content:")
 #         for i, part in enumerate(result):
 #             print(f"Part {i+1}: {type(part)}")
 # if __name__ == "__main__":
 #     demo_auto_detection()
 def tensor_to_data(tensor: Tensor):
    byte_sequence = ByteDetokenizer.tensor_to_bytes(tensor)
    # Initialize auto-detector
    decoder = AutoDetectBytesDecoder()
    # Decode with automatic detection
    result = decoder.decode(byte_sequence)
    return result
 def demo_byte_predictor():
    """Demo with smaller dimensions to test."""
    # Initialize model configuration with adjusted dimensions
    config = ModelConfig(
        vocab_size=256,
        hidden_size=128,  # Smaller for testing
        num_layers=2,  # Fewer layers for testing
        num_key_value_heads=2,
        num_query_heads=4,
        dropout=0.1,
        max_sequence_length=1024,
    )
    # Initialize model
    model = EnhancedBytePredictor(config)
    logger.info("Model initialized")
    # Move to GPU if available
    device = torch.device(
        "cuda" if torch.cuda.is_available() else "cpu"
    )
    model = model.to(device)
    logger.info(f"Using device: {device}")
    # Create sample input data
    batch_size = 2
    seq_length = 16  # Shorter sequence for testing
    input_ids = torch.randint(
        0, config.vocab_size, (batch_size, seq_length), device=device
    )
    logger.info(f"Created input tensor of shape: {input_ids.shape}")
    # Test forward pass
    try:
        logits = model(input_ids)
        logger.info(
            f"Forward pass successful! Output shape: {logits.shape}"
        )
        # Test loss computation
        target_ids = torch.randint(
            0,
            config.vocab_size,
            (batch_size, seq_length),
            device=device,
        )
        loss = model.compute_loss(input_ids, target_ids)
        logger.info(
            f"Loss computation successful! Loss value: {loss.item():.4f}"
        )
        # Test generation
        prompt = torch.randint(
            0,
            config.vocab_size,
            (1, 4),  # Very short prompt for testing
            device=device,
        )
        generated = model.generate(
            prompt, max_new_tokens=8, temperature=0.8, top_k=50
        )
        logger.info(
            f"Generation successful! Generated shape: {generated.shape}"
        )
    except Exception as e:
        logger.error(f"Error during execution: {str(e)}")
        raise
 if __name__ == "__main__":
    # Set up logging
    # logger.remove()  # Remove default handler
    # logger.add(sys.stderr, format="<green>{time:HH:mm:ss}</green> | {level} | {message}")
    demo_byte_predictor()
--- a/new_features_examples/auto_agent.py
+++ b/new_features_examples/auto_agent.py
@ -0,0 +1,188 @@
 import json
 import os
 from contextlib import suppress
 from typing import Any, Callable, Dict, Optional, Type, Union
 from dotenv import load_dotenv
 from pydantic import BaseModel, Field, ValidationError, create_model
 from swarm_models.openai_function_caller import OpenAIFunctionCaller
 class DynamicParser:
    @staticmethod
    def extract_fields(model: Type[BaseModel]) -> Dict[str, Any]:
        return {
            field_name: (field.annotation, ... if field.is_required() else None)
            for field_name, field in model.model_fields.items()
        }
    @staticmethod
    def create_partial_model(model: Type[BaseModel], data: Dict[str, Any]) -> Type[BaseModel]:
        fields = {
            field_name: (field.annotation, ... if field.is_required() else None)
            for field_name, field in model.model_fields.items()
            if field_name in data
        }
        return create_model(f"Partial{model.__name__}", **fields)
    @classmethod
    def parse(cls, data: Union[str, Dict[str, Any]], model: Type[BaseModel]) -> Optional[BaseModel]:
        if isinstance(data, str):
            try:
                data = json.loads(data)
            except json.JSONDecodeError:
                return None
        # Try full model first
        with suppress(ValidationError):
            return model.model_validate(data)
        # Create and try partial model
        partial_model = cls.create_partial_model(model, data)
        with suppress(ValidationError):
            return partial_model.model_validate(data)
        return None
 load_dotenv()
 # Define the Thoughts schema
 class Thoughts(BaseModel):
    text: str = Field(..., description="Current thoughts or observations regarding the task.")
    reasoning: str = Field(..., description="Logical reasoning behind the thought process.")
    plan: str = Field(..., description="A short bulleted list that conveys the immediate and long-term plan.")
    criticism: str = Field(..., description="Constructive self-criticism to improve future responses.")
    speak: str = Field(..., description="A concise summary of thoughts intended for the user.")
 # Define the Command schema
 class Command(BaseModel):
    name: str = Field(..., description="Command name to execute from the provided list of commands.")
    args: Dict[str, Any] = Field(..., description="Arguments required to execute the command.")
 # Define the AgentResponse schema
 class AgentResponse(BaseModel):
    thoughts: Thoughts = Field(..., description="The agent's current thoughts and reasoning.")
    command: Command = Field(..., description="The command to execute along with its arguments.")
 # Define tool functions
 def fluid_api_command(task: str):
    """Execute a fluid API request."""
    # response = fluid_api_request(task)
    print(response.model_dump_json(indent=4))
    return response
 def send_tweet_command(text: str):
    """Simulate sending a tweet."""
    print(f"Tweet sent: {text}")
    return {"status": "success", "message": f"Tweet sent: {text}"}
 def do_nothing_command():
    """Do nothing."""
    print("Doing nothing...")
    return {"status": "success", "message": "No action taken."}
 def task_complete_command(reason: str):
    """Mark the task as complete and provide a reason."""
    print(f"Task completed: {reason}")
    return {"status": "success", "message": f"Task completed: {reason}"}
 # Dynamic command execution
 def execute_command(name: str, args: Dict[str, Any]):
    """Dynamically execute a command based on its name and arguments."""
    command_map: Dict[str, Callable] = {
        "fluid_api": lambda **kwargs: fluid_api_command(task=kwargs.get("task")),
        "send_tweet": lambda **kwargs: send_tweet_command(text=kwargs.get("text")),
        "do_nothing": lambda **kwargs: do_nothing_command(),
        "task_complete": lambda **kwargs: task_complete_command(reason=kwargs.get("reason")),
    }
    if name not in command_map:
        raise ValueError(f"Unknown command: {name}")
    # Execute the command with the provided arguments
    return command_map[name](**args)
 def parse_and_execute_command(response: Union[str, Dict[str, Any]], base_model: Type[BaseModel] = AgentResponse) -> Any:
    """Enhanced command parser with flexible input handling"""
    parsed = DynamicParser.parse(response, base_model)
    if not parsed:
        raise ValueError("Failed to parse response")
    if hasattr(parsed, 'command'):
        command_name = parsed.command.name
        command_args = parsed.command.args
        return execute_command(command_name, command_args)
    return parsed
 ainame = "AutoAgent"
 userprovided = "assistant"
 SYSTEM_PROMPT = f"""
 You are {ainame}, an advanced and autonomous {userprovided}.
 Your role is to make decisions and complete tasks independently without seeking user assistance. Leverage your strengths as an LLM to solve tasks efficiently, adhering strictly to the commands and resources provided.
 ### GOALS:
 1. {userprovided}
 2. Execute tasks with precision and efficiency.
 3. Ensure outputs are actionable and aligned with the user's objectives.
 4. Continuously optimize task strategies for maximum effectiveness.
 5. Maintain reliability and consistency in all responses.
 ### CONSTRAINTS:
 1. Memory limit: ~4000 words for short-term memory. Save essential information to files immediately to avoid loss.
 2. Independent decision-making: Do not rely on user assistance.
 3. Exclusively use commands in double quotes (e.g., "command name").
 4. Use subprocesses for commands that may take longer than a few minutes.
 5. Ensure all outputs strictly adhere to the specified JSON response format.
 ### COMMANDS:
 1. Fluid API: "fluid_api", args: "method": "<GET/POST/...>", "url": "<url>", "headers": "<headers>", "body": "<payload>"
 18. Send Tweet: "send_tweet", args: "text": "<text>"
 19. Do Nothing: "do_nothing", args: 
 20. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
 ### RESOURCES:
 1. Internet access for real-time information and data gathering.
 2. Long-term memory management for storing critical information.
 3. Access to GPT-3.5-powered Agents for delegating tasks.
 4. File handling capabilities for output storage and retrieval.
 ### PERFORMANCE EVALUATION:
 1. Continuously analyze and reflect on actions to ensure optimal task completion.
 2. Self-critique decisions and strategies constructively to identify areas for improvement.
 3. Ensure every command serves a clear purpose and minimizes resource usage.
 4. Complete tasks in the least number of steps, balancing speed and accuracy.
 ### RESPONSE FORMAT:
 Always respond in a strict JSON format as described below. Ensure your responses can be parsed with Python's `json.loads`:
 """
 # Initialize the OpenAIFunctionCaller
 model = OpenAIFunctionCaller(
    system_prompt=SYSTEM_PROMPT,
    max_tokens=4000,
    temperature=0.9,
    base_model=AgentResponse,  # Pass the Pydantic schema as the base model
    parallel_tool_calls=False,
    openai_api_key=os.getenv("OPENAI_API_KEY")
 )
 # Example usage
 user_input = (
    "Analyze the provided Python code for inefficiencies, generate suggestions for improvements, "
    "and provide optimized code."
 )
 response = model.run(user_input)
 response = parse_and_execute_command(response)
 print(response)
--- a/new_features_examples/multi_tool_usage_agent.py
+++ b/new_features_examples/multi_tool_usage_agent.py
@ -1,5 +1,5 @@
 import os
-from typing import List, Dict, Any, Optional, Callable
+from typing import List, Dict, Any, Optional, Callable, get_type_hints
 from dataclasses import dataclass, field
 import json
 from datetime import datetime
@ -111,6 +111,9 @@ class ExecutionContext:
    history: List[Dict[str, Any]] = field(default_factory=list)
 hints = get_type_hints(func)
 class ToolAgent:
    def __init__(
        self,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -86,7 +86,7 @@ swarms = "swarms.cli.main:main"
 [tool.poetry.group.lint.dependencies]
 black = ">=23.1,<25.0"
-ruff = ">=0.5.1,<0.7.4"
+ruff = ">=0.5.1,<0.8.2"
 types-toml = "^0.10.8.1"
 types-pytz = ">=2023.3,<2025.0"
 types-chardet = "^5.0.4.6"
--- a/simple_example.py
+++ b/simple_example.py
@ -3,7 +3,5 @@ from swarms import Agent
 Agent(
    agent_name="Stock-Analysis-Agent",
    model_name="gpt-4o-mini",
-    max_loops="auto",
+    max_loops=1,
    streaming_on=True,
    interactive=True,
 ).run("What are 5 hft algorithms")
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -771,8 +771,11 @@ class Agent:
        self,
        task: Optional[str] = None,
        img: Optional[str] = None,
        speech: Optional[str] = None,
        video: Optional[str] = None,
        is_last: Optional[bool] = False,
        print_task: Optional[bool] = False,
        generate_speech: Optional[bool] = False,
        *args,
        **kwargs,
    ) -> Any:
@ -2294,12 +2297,13 @@ class Agent:
        self,
        task: Optional[str] = None,
        img: Optional[str] = None,
-        device: str = "cpu",  # gpu
+        device: Optional[str] = "cpu",  # gpu
-        device_id: int = 0,
+        device_id: Optional[int] = 0,
-        all_cores: bool = True,
+        all_cores: Optional[bool] = True,
        scheduled_run_date: Optional[datetime] = None,
-        do_not_use_cluster_ops: bool = False,
+        do_not_use_cluster_ops: Optional[bool] = False,
-        all_gpus: bool = False,
+        all_gpus: Optional[bool] = False,
        generate_speech: Optional[bool] = False,
        *args,
        **kwargs,
    ) -> Any:
@ -2346,7 +2350,12 @@ class Agent:
            # If cluster ops disabled, run directly
            if do_not_use_cluster_ops is True:
                logger.info("Running without cluster operations")
-                return self._run(task=task, img=img, *args, **kwargs)
+                return self._run(
                    task=task,
                    img=img,
                    generate_speech=generate_speech * args,
                    **kwargs,
                )
            else:
                return exec_callable_with_clusterops(
@ -2357,6 +2366,7 @@ class Agent:
                    func=self._run,
                    task=task,
                    img=img,
                    generate_speech=generate_speech,
                    *args,
                    **kwargs,
                )
--- a/swarms/utils/openai_tts.py
+++ b/swarms/utils/openai_tts.py
@ -0,0 +1,73 @@
 import os
 from loguru import logger
 import pygame
 import requests
 import tempfile
 from openai import OpenAI
 class OpenAITTS:
    """
    A class to interact with OpenAI API and play the generated audio with improved streaming capabilities.
    """
    def __init__(self, *args, **kwargs):
        self.client = OpenAI(
            api_key=os.getenv("OPENAI_API_KEY"), *args, **kwargs
        )
        pygame.init()
    def run(
        self, task: str, play_sound: bool = True, *args, **kwargs
    ):
        """
        Run a task with the OpenAI API and optionally play the generated audio with improved streaming.
        Args:
            task (str): The task to be executed.
            play_sound (bool): If True, play the generated audio.
        Returns:
            None
        """
        try:
            response = self.client.audio.speech.create(
                model="tts-1",
                voice="nova",
                input=task,
                *args,
                **kwargs,
            )
            audio_url = response["url"]
            logger.info("Task completed successfully.")
            if play_sound:
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".mp3"
                ) as tmp_file:
                    with requests.get(audio_url, stream=True) as r:
                        r.raise_for_status()
                        for chunk in r.iter_content(chunk_size=8192):
                            tmp_file.write(chunk)
                    pygame.mixer.music.load(tmp_file.name)
                    pygame.mixer.music.play()
                    while pygame.mixer.music.get_busy():
                        pygame.time.Clock().tick(10)
        except Exception as e:
            logger.error(f"Error during task execution: {str(e)}")
 # client = OpenAITTS(api_key=os.getenv("OPENAI_API_KEY"))
 # client.run("Hello world! This is a streaming test.", play_sound=True)
 def text_to_speech(
    task: str, play_sound: bool = True, *args, **kwargs
 ):
    out = OpenAITTS().run(
        task, play_sound=play_sound, *args, **kwargs
    )
    return out
 # print(text_to_speech(task="hello"))