diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml
index 21129735..be346103 100644
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -22,4 +22,4 @@ jobs:
       - run: ruff format .
       - run: ruff check --fix .
 
-      - uses: autofix-ci/action@dd55f44df8f7cdb7a6bf74c78677eb8acd40cd0a
+      - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c
diff --git a/.github/workflows/bearer.yml b/.github/workflows/bearer.yml
new file mode 100644
index 00000000..be0fb591
--- /dev/null
+++ b/.github/workflows/bearer.yml
@@ -0,0 +1,43 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+#
+# This workflow file requires a free account on Bearer.com to manage findings, notifications and more.
+# See https://docs.bearer.com/guides/bearer-cloud/
+name: Bearer
+
+on:
+  push:
+    branches: ["master" ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: ["master"]
+  schedule:
+    - cron: '24 22 * * 6'
+
+permissions:
+  contents: read # for actions/checkout to fetch code
+  security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
+  actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
+
+jobs:
+  bearer:
+    runs-on: ubuntu-latest
+    steps:
+      # Checkout project source
+      - uses: actions/checkout@v4
+      # Scan code using Bearer CLI
+      - name: Run Report
+        id: report
+        uses: bearer/bearer-action@828eeb928ce2f4a7ca5ed57fb8b59508cb8c79bc
+        with:
+          api-key: ${{ secrets.BEARER_TOKEN }}
+          format: sarif
+          output: results.sarif
+          exit-code: 0
+      # Upload SARIF file generated in previous step
+      - name: Upload SARIF file
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: results.sarif
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
new file mode 100644
index 00000000..9bbf3ba2
--- /dev/null
+++ b/.github/workflows/dependency-review.yml
@@ -0,0 +1,39 @@
+# Dependency Review Action
+#
+# This Action will scan dependency manifest files that change as part of a Pull Request,
+# surfacing known-vulnerable versions of the packages declared or updated in the PR.
+# Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable
+# packages will be blocked from merging.
+#
+# Source repository: https://github.com/actions/dependency-review-action
+# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
+name: 'Dependency review'
+on:
+  pull_request:
+    branches: [ "master" ]
+
+# If using a dependency submission action in this workflow this permission will need to be set to:
+#
+# permissions:
+#   contents: write
+#
+# https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api
+permissions:
+  contents: read
+  # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option
+  pull-requests: write
+
+jobs:
+  dependency-review:
+    runs-on: ubuntu-latest
+    steps:
+      - name: 'Checkout repository'
+        uses: actions/checkout@v4
+      - name: 'Dependency Review'
+        uses: actions/dependency-review-action@v4
+        # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options.
+        with:
+          comment-summary-in-pr: always
+        #   fail-on-severity: moderate
+        #   deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later
+        #   retry-on-snapshot-warnings: true
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
new file mode 100644
index 00000000..793d8e0e
--- /dev/null
+++ b/.github/workflows/docker-image.yml
@@ -0,0 +1,18 @@
+name: Docker Image CI
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Build the Docker image
+      run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml
new file mode 100644
index 00000000..2e4713d3
--- /dev/null
+++ b/.github/workflows/pyre.yml
@@ -0,0 +1,46 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# This workflow integrates Pyre with GitHub's
+# Code Scanning feature.
+#
+# Pyre is a performant type checker for Python compliant with
+# PEP 484. Pyre can analyze codebases with millions of lines
+# of code incrementally – providing instantaneous feedback
+# to developers as they write code.
+#
+# See https://pyre-check.org
+
+name: Pyre
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+permissions:
+    contents: read
+
+jobs:
+  pyre:
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Run Pyre
+        uses: facebook/pyre-action@60697a7858f7cc8470d8cc494a3cf2ad6b06560d
+        with:
+          # To customize these inputs:
+          # See https://github.com/facebook/pyre-action#inputs
+          repo-directory: './'
+          requirements-path: 'requirements.txt'
diff --git a/.github/workflows/pysa.yml b/.github/workflows/pysa.yml
new file mode 100644
index 00000000..6c301e80
--- /dev/null
+++ b/.github/workflows/pysa.yml
@@ -0,0 +1,50 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# This workflow integrates Python Static Analyzer (Pysa) with
+# GitHub's Code Scanning feature.
+#
+# Python Static Analyzer (Pysa) is a security-focused static
+# analysis tool that tracks flows of data from where they
+# originate to where they terminate in a dangerous location.
+#
+# See https://pyre-check.org/docs/pysa-basics/
+
+name: Pysa
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+  schedule:
+    - cron: '43 5 * * 3'
+
+permissions:
+    contents: read
+
+jobs:
+  pysa:
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Run Pysa
+        uses: facebook/pysa-action@f46a63777e59268613bd6e2ff4e29f144ca9e88b
+        with:
+          # To customize these inputs:
+          # See https://github.com/facebook/pysa-action#inputs
+          repo-directory: './'
+          requirements-path: 'requirements.txt'
+          infer-types: true
+          include-default-sapp-filters: true
diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
new file mode 100644
index 00000000..f3586044
--- /dev/null
+++ b/.github/workflows/python-package-conda.yml
@@ -0,0 +1,34 @@
+name: Python Package using Conda
+
+on: [push]
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        conda env update --file environment.yml --name base
+    - name: Lint with flake8
+      run: |
+        conda install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        conda install pytest
+        pytest
diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml
new file mode 100644
index 00000000..1e78a687
--- /dev/null
+++ b/.github/workflows/semgrep.yml
@@ -0,0 +1,49 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# This workflow file requires a free account on Semgrep.dev to
+# manage rules, file ignores, notifications, and more.
+#
+# See https://semgrep.dev/docs
+
+name: Semgrep
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ "master" ]
+  schedule:
+    - cron: '19 7 * * 3'
+
+permissions:
+  contents: read
+
+jobs:
+  semgrep:
+    permissions:
+      contents: read # for actions/checkout to fetch code
+      security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
+      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
+    name: Scan
+    runs-on: ubuntu-latest
+    steps:
+      # Checkout project source
+      - uses: actions/checkout@v4
+
+      # Scan code using project's configuration on https://semgrep.dev/manage
+      - uses: returntocorp/semgrep-action@fcd5ab7459e8d91cb1777481980d1b18b4fc6735
+        with:
+          publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
+          publishDeployment: ${{ secrets.SEMGREP_DEPLOYMENT_ID }}
+          generateSarif: "1"
+
+      # Upload SARIF file generated in previous step
+      - name: Upload SARIF file
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: semgrep.sarif
+        if: always()
diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml
new file mode 100644
index 00000000..d9e6c82b
--- /dev/null
+++ b/.github/workflows/trivy.yml
@@ -0,0 +1,48 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: trivy
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ "master" ]
+  schedule:
+    - cron: '31 0 * * 5'
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    permissions:
+      contents: read # for actions/checkout to fetch code
+      security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
+      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
+    name: Build
+    runs-on: "ubuntu-20.04"
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Build an image from Dockerfile
+        run: |
+          docker build -t docker.io/my-organization/my-app:${{ github.sha }} .
+
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@7b7aa264d83dc58691451798b4d117d53d21edfe
+        with:
+          image-ref: 'docker.io/my-organization/my-app:${{ github.sha }}'
+          format: 'template'
+          template: '@/contrib/sarif.tpl'
+          output: 'trivy-results.sarif'
+          severity: 'CRITICAL,HIGH'
+
+      - name: Upload Trivy scan results to GitHub Security tab
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: 'trivy-results.sarif'
diff --git a/README.md b/README.md
index 6469883d..dee89a6b 100644
--- a/README.md
+++ b/README.md
@@ -81,9 +81,10 @@ Refer to our documentation for production grade implementation details.
 
 
 ## Install 💻
+Install the following packages with copy and paste
 
 ```bash
-$ pip3 install -U swarms
+$ pip3 install -U swarms swarm-models swarms-memory
 ```
 
 
@@ -168,28 +169,15 @@ The `Agent` class offers a range of settings to tailor its behavior to specific
 ```python
 import os
 from swarms import Agent
-from swarm_models import OpenAIChat
 
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
-from dotenv import load_dotenv
-
-load_dotenv()
-
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(
-    openai_api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
 # Initialize the agent
 agent = Agent(
     agent_name="Financial-Analysis-Agent",
     system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-    llm=model,
+    model_name="gpt-4o-mini",
     max_loops=1,
     autosave=True,
     dashboard=False,
@@ -211,11 +199,10 @@ agent.run(
 
 ```
 -----
+
 ### Integrating RAG with Swarms for Enhanced Long-Term Memory
 `Agent` equipped with quasi-infinite long term memory using RAG (Relational Agent Graph) for advanced document understanding, analysis, and retrieval capabilities.
 
-
-
 **Mermaid Diagram for RAG Integration**
 ```mermaid
 graph TD
@@ -227,8 +214,11 @@ graph TD
     F --> G[Return Output]
 ```
 
-**Step 1: Initialize the ChromaDB Client**
 ```python
+from swarms import Agent
+from swarms.prompts.finance_agent_sys_prompt import (
+    FINANCIAL_AGENT_SYS_PROMPT,
+)
 import os
 
 from swarms_memory import ChromaDB
@@ -239,29 +229,13 @@ chromadb = ChromaDB(
     output_dir="finance_agent_rag",  # Directory for storing RAG data
     # docs_folder="artifacts",  # Uncomment and specify the folder containing your documents
 )
-```
-
-**Step 2: Define the Model**
-```python
-from swarm_models import Anthropic
-from swarms.prompts.finance_agent_sys_prompt import (
-    FINANCIAL_AGENT_SYS_PROMPT,
-)
-
-# Define the Anthropic model for language processing
-model = Anthropic(anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"))
-```
-
-**Step 3: Initialize the Agent with RAG**
-```python
-from swarms import Agent
 
 # Initialize the agent with RAG capabilities
 agent = Agent(
     agent_name="Financial-Analysis-Agent",
     system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
     agent_description="Agent creates a comprehensive financial analysis",
-    llm=model,
+    model_name="gpt-4o-mini",
     max_loops="auto",  # Auto-adjusts loops based on task complexity
     autosave=True,  # Automatically saves agent state
     dashboard=False,  # Disables dashboard for this example
@@ -378,7 +352,6 @@ The following is an example of an agent that intakes a pydantic basemodel and ou
 ```python
 from pydantic import BaseModel, Field
 from swarms import Agent
-from swarm_models import Anthropic
 
 
 # Initialize the schema for the person's information
@@ -410,7 +383,7 @@ agent = Agent(
     ),
     # Set the tool schema to the JSON string -- this is the key difference
     tool_schema=tool_schema,
-    llm=Anthropic(),
+    model_name="gpt-4o",
     max_loops=3,
     autosave=True,
     dashboard=False,
@@ -617,8 +590,6 @@ You can now easily plug this custom Griptape agent into the **Swarms Framework**
 
 ## Understanding Swarms
 
-### What is a Swarm?
-
 A swarm refers to a group of more than two agents working collaboratively to achieve a common goal. These agents can be software entities, such as llms that interact with each other to perform complex tasks. The concept of a swarm is inspired by natural systems like ant colonies or bird flocks, where simple individual behaviors lead to complex group dynamics and problem-solving capabilities.
 
 ### How Swarm Architectures Facilitate Communication
@@ -631,9 +602,6 @@ Swarm architectures are designed to establish and manage communication between a
 
 3. **Sequential Communication**: Sequential swarms process tasks in a linear order, where each agent's output becomes the input for the next agent. This ensures that tasks with dependencies are handled in the correct sequence, maintaining the integrity of the workflow.
 
-4. **Mesh Communication**: In mesh swarms, agents are fully connected, allowing any agent to communicate with any other agent. This setup provides high flexibility and redundancy, making it ideal for complex systems requiring dynamic interactions.
-
-5. **Federated Communication**: Federated swarms involve multiple independent swarms that collaborate by sharing information and results. Each swarm operates autonomously but can contribute to a larger task, enabling distributed problem-solving across different nodes.
 
 Swarm architectures leverage these communication patterns to ensure that agents work together efficiently, adapting to the specific requirements of the task at hand. By defining clear communication protocols and interaction models, swarm architectures enable the seamless orchestration of multiple agents, leading to enhanced performance and problem-solving capabilities.
 
@@ -911,14 +879,12 @@ The `run` method returns the final output after all agents have processed the in
 from swarms import Agent, AgentRearrange
 
 
-from swarm_models import Anthropic
-
 # Initialize the director agent
 
 director = Agent(
     agent_name="Director",
     system_prompt="Directs the tasks for the workers",
-    llm=Anthropic(),
+    model_name="claude-2",
     max_loops=1,
     dashboard=False,
     streaming_on=True,
@@ -934,7 +900,7 @@ director = Agent(
 worker1 = Agent(
     agent_name="Worker1",
     system_prompt="Generates a transcript for a youtube video on what swarms are",
-    llm=Anthropic(),
+    model_name="claude-2",
     max_loops=1,
     dashboard=False,
     streaming_on=True,
@@ -949,7 +915,7 @@ worker1 = Agent(
 worker2 = Agent(
     agent_name="Worker2",
     system_prompt="Summarizes the transcript generated by Worker1",
-    llm=Anthropic(),
+    model_name="claude-2",
     max_loops=1,
     dashboard=False,
     streaming_on=True,
@@ -1103,20 +1069,12 @@ The `run` method returns the final output after all agents have processed the in
 ```python
 
 import os
-from swarm_models import OpenAIChat
 from swarms import Agent, MixtureOfAgents
 
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create individual agents with the OpenAIChat model
-model = OpenAIChat(
-    openai_api_key=api_key, model_name="gpt-4", temperature=0.1
-)
-
 # Agent 1: Financial Statement Analyzer
 agent1 = Agent(
     agent_name="FinancialStatementAnalyzer",
-    llm=model,
+    model_name="gpt-4o",
     system_prompt="""You are a Financial Statement Analyzer specializing in 10-K SEC reports. Your primary focus is on analyzing the financial statements, including the balance sheet, income statement, and cash flow statement. 
 
 Key responsibilities:
@@ -1142,7 +1100,7 @@ When analyzing, consider industry standards and compare the company's performanc
 # Agent 2: Risk Assessment Specialist
 agent2 = Agent(
     agent_name="RiskAssessmentSpecialist",
-    llm=model,
+    model_name="gpt-4o",
     system_prompt="""You are a Risk Assessment Specialist focusing on 10-K SEC reports. Your primary role is to identify, analyze, and evaluate potential risks disclosed in the report.
 
 Key responsibilities:
@@ -1169,7 +1127,7 @@ Your analysis should provide a comprehensive overview of the company's risk land
 # Agent 3: Business Strategy Evaluator
 agent3 = Agent(
     agent_name="BusinessStrategyEvaluator",
-    llm=model,
+    model_name="gpt-4o",
     system_prompt="""You are a Business Strategy Evaluator specializing in analyzing 10-K SEC reports. Your focus is on assessing the company's overall strategy, market position, and future outlook.
 
 Key responsibilities:
@@ -1197,7 +1155,7 @@ Your analysis should provide insights into the company's strategic direction, it
 # Aggregator Agent
 aggregator_agent = Agent(
     agent_name="10KReportAggregator",
-    llm=model,
+    model_name="gpt-4o",
     system_prompt="""You are the 10-K Report Aggregator, responsible for synthesizing and summarizing the analyses provided by the Financial Statement Analyzer, Risk Assessment Specialist, and Business Strategy Evaluator. Your goal is to create a comprehensive, coherent, and insightful summary of the 10-K SEC report.
 
 Key responsibilities:
@@ -1287,9 +1245,8 @@ The `run` method returns a dictionary containing the outputs of each agent that
 
 ```python
 import os
-from swarms import Agent
+from swarms import Agent, SpreadSheetSwarm
 from swarm_models import OpenAIChat
-from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
 TWITTER_AGENT_SYS_PROMPT = """
@@ -1312,20 +1269,12 @@ EMAIL_AGENT_SYS_PROMPT = """
 You are an Email marketing expert specializing in real estate. Your task is to write compelling email campaigns to promote properties, focusing on personalization, subject lines, and effective call-to-action strategies to drive conversions.
 """
 
-# Example usage:
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Model
-model = OpenAIChat(
-    openai_api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
 # Initialize your agents for different social media platforms
 agents = [
     Agent(
         agent_name="Twitter-RealEstate-Agent",
         system_prompt=TWITTER_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
         max_loops=1,
         dynamic_temperature_enabled=True,
         saved_state_path="twitter_realestate_agent.json",
@@ -1335,7 +1284,7 @@ agents = [
     Agent(
         agent_name="Instagram-RealEstate-Agent",
         system_prompt=INSTAGRAM_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
         max_loops=1,
         dynamic_temperature_enabled=True,
         saved_state_path="instagram_realestate_agent.json",
@@ -1345,7 +1294,7 @@ agents = [
     Agent(
         agent_name="Facebook-RealEstate-Agent",
         system_prompt=FACEBOOK_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
         max_loops=1,
         dynamic_temperature_enabled=True,
         saved_state_path="facebook_realestate_agent.json",
@@ -1355,7 +1304,7 @@ agents = [
     Agent(
         agent_name="LinkedIn-RealEstate-Agent",
         system_prompt=LINKEDIN_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
         max_loops=1,
         dynamic_temperature_enabled=True,
         saved_state_path="linkedin_realestate_agent.json",
@@ -1365,7 +1314,7 @@ agents = [
     Agent(
         agent_name="Email-RealEstate-Agent",
         system_prompt=EMAIL_AGENT_SYS_PROMPT,
-        llm=model,
+        model_name="gpt-4o",
         max_loops=1,
         dynamic_temperature_enabled=True,
         saved_state_path="email_realestate_agent.json",
@@ -1474,7 +1423,7 @@ The `run` method returns the output from the most relevant agent selected based
 
 
 ```python
-from swarms.structs.tree_swarm import TreeAgent, Tree, ForestSwarm
+from swarms import TreeAgent, Tree, ForestSwarm
 
 # Create agents with varying system prompts and dynamically generated distances/keywords
 agents_tree1 = [
diff --git a/api/agent_api_test.py b/api/agent_api_test.py
index 74f519d0..066efc4f 100644
--- a/api/agent_api_test.py
+++ b/api/agent_api_test.py
@@ -7,26 +7,27 @@ logger.add(
     "api_tests_{time}.log",
     rotation="100 MB",
     level="DEBUG",
-    format="{time} {level} {message}"
+    format="{time} {level} {message}",
 )
 
 BASE_URL = "http://localhost:8000/v1"
 
+
 def test_create_agent():
     """Test creating a new agent."""
     logger.info("Testing agent creation")
-    
+
     payload = {
         "agent_name": "Test Agent",
         "system_prompt": "You are a helpful assistant",
         "model_name": "gpt-4",
         "description": "Test agent",
-        "tags": ["test"]
+        "tags": ["test"],
     }
-    
+
     response = requests.post(f"{BASE_URL}/agent", json=payload)
     logger.debug(f"Create response: {response.json()}")
-    
+
     if response.status_code == 200:
         logger.success("Successfully created agent")
         return response.json()["agent_id"]
@@ -34,66 +35,73 @@ def test_create_agent():
         logger.error(f"Failed to create agent: {response.text}")
         return None
 
+
 def test_list_agents():
     """Test listing all agents."""
     logger.info("Testing agent listing")
-    
+
     response = requests.get(f"{BASE_URL}/agents")
     logger.debug(f"List response: {response.json()}")
-    
+
     if response.status_code == 200:
         logger.success(f"Found {len(response.json())} agents")
     else:
         logger.error(f"Failed to list agents: {response.text}")
 
+
 def test_completion(agent_id):
     """Test running a completion."""
     logger.info("Testing completion")
-    
+
     payload = {
         "prompt": "What is the weather like today?",
-        "agent_id": agent_id
+        "agent_id": agent_id,
     }
-    
-    response = requests.post(f"{BASE_URL}/agent/completions", json=payload)
+
+    response = requests.post(
+        f"{BASE_URL}/agent/completions", json=payload
+    )
     logger.debug(f"Completion response: {response.json()}")
-    
+
     if response.status_code == 200:
         logger.success("Successfully got completion")
     else:
         logger.error(f"Failed to get completion: {response.text}")
 
+
 def test_delete_agent(agent_id):
     """Test deleting an agent."""
     logger.info("Testing agent deletion")
-    
+
     response = requests.delete(f"{BASE_URL}/agent/{agent_id}")
     logger.debug(f"Delete response: {response.json()}")
-    
+
     if response.status_code == 200:
         logger.success("Successfully deleted agent")
     else:
         logger.error(f"Failed to delete agent: {response.text}")
 
+
 def run_tests():
     """Run all tests in sequence."""
     logger.info("Starting API tests")
-    
+
     # Create agent and get ID
     agent_id = test_create_agent()
     if not agent_id:
         logger.error("Cannot continue tests without agent ID")
         return
-    
+
     # Wait a bit for agent to be ready
     time.sleep(1)
-    
+
     # Run other tests
     test_list_agents()
     test_completion(agent_id)
     test_delete_agent(agent_id)
-    
+
     logger.info("Tests completed")
 
+
 if __name__ == "__main__":
-    run_tests()
\ No newline at end of file
+    run_tests()
diff --git a/byte.py b/byte.py
new file mode 100644
index 00000000..d0a5a92f
--- /dev/null
+++ b/byte.py
@@ -0,0 +1,898 @@
+from enum import Enum
+from typing import Union, Optional
+import io
+from PIL import Image
+import numpy as np
+import torch
+import struct
+
+
+from enum import auto
+from typing import List, Dict, Tuple
+import wave
+from dataclasses import dataclass
+import torch.nn as nn
+import torch.nn.functional as F
+from loguru import logger
+from einops import rearrange
+from torch import Tensor
+
+
+@dataclass
+class ModelConfig:
+    """Configuration for the enhanced BytePredictor model."""
+
+    vocab_size: int = 256  # Standard byte range
+    hidden_size: int = 1024
+    num_layers: int = 12
+    num_key_value_heads: int = 8  # For multi-query attention
+    num_query_heads: int = 32  # More query heads than kv heads
+    dropout: float = 0.1
+    max_sequence_length: int = 8192
+    rope_theta: float = 10000.0
+    layer_norm_eps: float = 1e-5
+    vocab_parallel: bool = False
+    qk_norm: bool = True
+    qk_norm_scale: float = None
+    attention_bias: bool = False
+
+
+class MultiQueryAttention(nn.Module):
+    """Fixed Multi-Query Attention implementation."""
+
+    def __init__(self, config: ModelConfig):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.num_query_heads = config.num_query_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.head_dim = config.hidden_size // config.num_query_heads
+        self.qk_scale = config.qk_norm_scale or (self.head_dim**-0.5)
+
+        self.q_proj = nn.Linear(
+            config.hidden_size, config.num_query_heads * self.head_dim
+        )
+        self.k_proj = nn.Linear(
+            config.hidden_size,
+            config.num_key_value_heads * self.head_dim,
+        )
+        self.v_proj = nn.Linear(
+            config.hidden_size,
+            config.num_key_value_heads * self.head_dim,
+        )
+        self.o_proj = nn.Linear(
+            config.num_query_heads * self.head_dim, config.hidden_size
+        )
+
+        self.qk_norm = config.qk_norm
+        if self.qk_norm:
+            self.q_norm = nn.LayerNorm(self.head_dim)
+            self.k_norm = nn.LayerNorm(self.head_dim)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        batch_size, seq_length, _ = hidden_states.shape
+
+        # Project and reshape
+        q = self.q_proj(hidden_states)
+        k = self.k_proj(hidden_states)
+        v = self.v_proj(hidden_states)
+
+        # Reshape to [seq_len, batch, heads, head_dim]
+        q = q.view(
+            batch_size,
+            seq_length,
+            self.num_query_heads,
+            self.head_dim,
+        ).permute(1, 0, 2, 3)
+        k = k.view(
+            batch_size,
+            seq_length,
+            self.num_key_value_heads,
+            self.head_dim,
+        ).permute(1, 0, 2, 3)
+        v = v.view(
+            batch_size,
+            seq_length,
+            self.num_key_value_heads,
+            self.head_dim,
+        ).permute(1, 0, 2, 3)
+
+        # Apply rotary embeddings
+        # q, k = self.rotary(q, k, seq_length)
+
+        # Apply QK normalization if enabled
+        if self.qk_norm:
+            q = self.q_norm(q)
+            k = self.k_norm(k)
+
+        # Handle MQA head expansion
+        if self.num_key_value_heads != self.num_query_heads:
+            k = k.repeat_interleave(
+                self.num_query_heads // self.num_key_value_heads,
+                dim=2,
+            )
+            v = v.repeat_interleave(
+                self.num_query_heads // self.num_key_value_heads,
+                dim=2,
+            )
+
+        # Compute attention
+        # Reshape for matmul: [batch, heads, seq_length, head_dim]
+        q = q.permute(1, 2, 0, 3)
+        k = k.permute(1, 2, 0, 3)
+        v = v.permute(1, 2, 0, 3)
+
+        attn_weights = (
+            torch.matmul(q, k.transpose(-2, -1)) * self.qk_scale
+        )
+
+        if attention_mask is not None:
+            attn_weights = attn_weights + attention_mask
+
+        attn_weights = F.softmax(attn_weights, dim=-1)
+
+        output = torch.matmul(attn_weights, v)
+
+        # Reshape back to [batch, seq_length, hidden_size]
+        output = (
+            output.transpose(1, 2)
+            .contiguous()
+            .view(batch_size, seq_length, -1)
+        )
+        output = self.o_proj(output)
+
+        return output
+
+
+class EnhancedBytePredictor(nn.Module):
+    """Enhanced byte prediction model with state-of-the-art features."""
+
+    def __init__(self, config: ModelConfig):
+        super().__init__()
+        self.config = config
+
+        # Token embeddings
+        self.tok_embeddings = nn.Embedding(
+            config.vocab_size, config.hidden_size
+        )
+
+        # Transformer layers
+        self.layers = nn.ModuleList(
+            [
+                nn.ModuleDict(
+                    {
+                        "attention": MultiQueryAttention(config),
+                        "attention_norm": nn.LayerNorm(
+                            config.hidden_size,
+                            eps=config.layer_norm_eps,
+                        ),
+                        "feed_forward": nn.Sequential(
+                            nn.Linear(
+                                config.hidden_size,
+                                4 * config.hidden_size,
+                            ),
+                            nn.GELU(),
+                            nn.Linear(
+                                4 * config.hidden_size,
+                                config.hidden_size,
+                            ),
+                        ),
+                        "feed_forward_norm": nn.LayerNorm(
+                            config.hidden_size,
+                            eps=config.layer_norm_eps,
+                        ),
+                    }
+                )
+                for _ in range(config.num_layers)
+            ]
+        )
+
+        self.norm = nn.LayerNorm(
+            config.hidden_size, eps=config.layer_norm_eps
+        )
+        self.output = nn.Linear(
+            config.hidden_size, config.vocab_size, bias=False
+        )
+
+        # Initialize weights
+        self.apply(self._init_weights)
+
+    def _init_weights(self, module: nn.Module) -> None:
+        """Initialize weights with scaled normal distribution."""
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Forward pass of the model.
+
+        Args:
+            input_ids: Tensor of shape (batch_size, sequence_length)
+            attention_mask: Optional attention mask
+
+        Returns:
+            Tensor of logits with shape (batch_size, sequence_length, vocab_size)
+        """
+        hidden_states = self.tok_embeddings(input_ids)
+
+        # Create causal mask if needed
+        if attention_mask is None:
+            attention_mask = torch.triu(
+                torch.ones(
+                    (input_ids.size(1), input_ids.size(1)),
+                    device=input_ids.device,
+                    dtype=torch.bool,
+                ),
+                diagonal=1,
+            )
+            attention_mask = attention_mask.masked_fill(
+                attention_mask == 1, float("-inf")
+            )
+
+        # Apply transformer layers
+        for layer in self.layers:
+            # Attention block
+            hidden_states = hidden_states + layer["attention"](
+                layer["attention_norm"](hidden_states), attention_mask
+            )
+
+            # Feed-forward block
+            hidden_states = hidden_states + layer["feed_forward"](
+                layer["feed_forward_norm"](hidden_states)
+            )
+
+        hidden_states = self.norm(hidden_states)
+        logits = self.output(hidden_states)
+
+        return logits
+
+    def compute_loss(
+        self,
+        input_ids: torch.Tensor,
+        target_ids: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Compute cross entropy loss.
+
+        Args:
+            input_ids: Input token ids
+            target_ids: Target token ids
+            attention_mask: Optional attention mask
+
+        Returns:
+            Loss value
+        """
+        logits = self(input_ids, attention_mask)
+        loss = F.cross_entropy(
+            rearrange(logits, "b s v -> (b s) v"),
+            rearrange(target_ids, "b s -> (b s)"),
+        )
+        return loss
+
+    @torch.no_grad()
+    def _generate(
+        self,
+        input_ids: torch.Tensor,
+        max_new_tokens: int = 100,
+        temperature: float = 1.0,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        repetition_penalty: float = 1.0,
+    ) -> torch.Tensor:
+        """
+        Generate new tokens autoregressively.
+
+        Args:
+            input_ids: Starting sequence
+            max_new_tokens: Number of tokens to generate
+            temperature: Sampling temperature
+            top_k: K for top-k sampling
+            top_p: P for nucleus sampling
+            repetition_penalty: Penalty for repeating tokens
+
+        Returns:
+            Generated sequence
+        """
+        batch_size, seq_length = input_ids.shape
+        generated = input_ids.clone()
+
+        for _ in range(max_new_tokens):
+            if generated.size(1) >= self.config.max_sequence_length:
+                break
+
+            # Forward pass
+            logits = self(generated)[:, -1, :]
+
+            # Apply temperature
+            logits = logits / temperature
+
+            # Apply repetition penalty
+            if repetition_penalty != 1.0:
+                for i in range(batch_size):
+                    for token_id in set(generated[i].tolist()):
+                        logits[i, token_id] /= repetition_penalty
+
+            # Apply top-k sampling
+            if top_k is not None:
+                indices_to_remove = (
+                    logits
+                    < torch.topk(logits, top_k)[0][..., -1, None]
+                )
+                logits[indices_to_remove] = float("-inf")
+
+            # Apply nucleus (top-p) sampling
+            if top_p is not None:
+                sorted_logits, sorted_indices = torch.sort(
+                    logits, descending=True
+                )
+                cumulative_probs = torch.cumsum(
+                    F.softmax(sorted_logits, dim=-1), dim=-1
+                )
+
+                # Remove tokens with cumulative probability above the threshold
+                sorted_indices_to_remove = cumulative_probs > top_p
+                sorted_indices_to_remove[..., 1:] = (
+                    sorted_indices_to_remove[..., :-1].clone()
+                )
+                sorted_indices_to_remove[..., 0] = 0
+
+                indices_to_remove = torch.zeros_like(
+                    logits, dtype=torch.bool
+                )
+                indices_to_remove.scatter_(
+                    1, sorted_indices, sorted_indices_to_remove
+                )
+                logits[indices_to_remove] = float("-inf")
+
+            # Sample next token
+            probs = F.softmax(logits, dim=-1)
+            next_token = torch.multinomial(probs, num_samples=1)
+
+            # Append to sequence
+            generated = torch.cat([generated, next_token], dim=1)
+
+        return generated
+
+    def generate(
+        self,
+        input_ids: torch.Tensor,
+        max_new_tokens: int = 100,
+        temperature: float = 1.0,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        repetition_penalty: float = 1.0,
+    ):
+        tensor_data = self._generate(
+            input_ids=input_ids,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+        )
+
+        return tensor_to_data(tensor_data)
+
+
+# import torch
+# from typing import Optional
+
+
+class DataType(Enum):
+    TEXT = "text"
+    IMAGE = "image"
+    AUDIO = "audio"
+    VIDEO = "video"
+    BINARY = "binary"
+
+
+class ByteDetokenizer:
+    """Utility class for converting model output bytes back to original data formats."""
+
+    @staticmethod
+    def tensor_to_bytes(tensor: torch.Tensor) -> bytes:
+        """Convert model output tensor to bytes."""
+        # Convert logits/probabilities to byte values
+        if tensor.dim() > 1:
+            # If we have logits, convert to byte indices
+            byte_indices = tensor.argmax(dim=-1)
+        else:
+            byte_indices = tensor
+
+        # Convert to Python bytes
+        return bytes(
+            byte_indices.cpu().numpy().astype(np.uint8).tolist()
+        )
+
+    @staticmethod
+    def decode_text(byte_sequence: bytes) -> str:
+        """Convert bytes to text."""
+        try:
+            return byte_sequence.decode("utf-8")
+        except UnicodeDecodeError:
+            # Try with error handling
+            return byte_sequence.decode("utf-8", errors="replace")
+
+    @staticmethod
+    def decode_image(
+        byte_sequence: bytes,
+        mode: str = "RGB",
+        size: Optional[tuple] = None,
+    ) -> Image.Image:
+        """Convert bytes to image.
+
+        Args:
+            byte_sequence: Raw image bytes
+            mode: Image mode (RGB, RGBA, L, etc.)
+            size: Optional tuple of (width, height)
+        """
+        try:
+            # Try to load as-is first (for standard image formats)
+            img = Image.open(io.BytesIO(byte_sequence))
+            if size:
+                img = img.resize(size)
+            return img
+        except:
+            # If failed, assume raw pixel data
+            if not size:
+                # Try to determine size from byte count
+                pixel_count = len(byte_sequence) // len(mode)
+                size = (
+                    int(np.sqrt(pixel_count)),
+                    int(np.sqrt(pixel_count)),
+                )
+
+            # Convert raw bytes to pixel array
+            pixels = np.frombuffer(byte_sequence, dtype=np.uint8)
+            pixels = pixels.reshape((*size, len(mode)))
+
+            return Image.fromarray(pixels, mode=mode)
+
+    @staticmethod
+    def decode_audio(
+        byte_sequence: bytes,
+        sample_rate: int = 44100,
+        channels: int = 2,
+        sample_width: int = 2,
+    ) -> np.ndarray:
+        """Convert bytes to audio samples.
+
+        Args:
+            byte_sequence: Raw audio bytes
+            sample_rate: Audio sample rate in Hz
+            channels: Number of audio channels
+            sample_width: Bytes per sample (1, 2, or 4)
+        """
+        # Determine format string based on sample width
+        format_str = {
+            1: "b",  # signed char
+            2: "h",  # short
+            4: "i",  # int
+        }[sample_width]
+
+        # Unpack bytes to samples
+        sample_count = len(byte_sequence) // (channels * sample_width)
+        samples = struct.unpack(
+            f"<{sample_count * channels}{format_str}", byte_sequence
+        )
+
+        # Reshape to [samples, channels]
+        return np.array(samples).reshape(-1, channels)
+
+    def decode_data(
+        self,
+        model_output: Union[torch.Tensor, bytes],
+        data_type: DataType,
+        **kwargs,
+    ) -> Union[str, Image.Image, np.ndarray, bytes]:
+        """Main method to decode model output to desired format.
+
+        Args:
+            model_output: Either tensor from model or raw bytes
+            data_type: Type of data to decode to
+            **kwargs: Additional parameters for specific decoders
+
+        Returns:
+            Decoded data in specified format
+        """
+        # Convert tensor to bytes if needed
+        if isinstance(model_output, torch.Tensor):
+            byte_sequence = self.tensor_to_bytes(model_output)
+        else:
+            byte_sequence = model_output
+
+        # Decode based on type
+        if data_type == DataType.TEXT:
+            return self.decode_text(byte_sequence)
+        elif data_type == DataType.IMAGE:
+            return self.decode_image(byte_sequence, **kwargs)
+        elif data_type == DataType.AUDIO:
+            return self.decode_audio(byte_sequence, **kwargs)
+        elif data_type == DataType.VIDEO:
+            raise NotImplementedError(
+                "Video decoding not yet implemented"
+            )
+        else:  # BINARY
+            return byte_sequence
+
+
+# Usage example
+
+
+class Modality(Enum):
+    TEXT = auto()
+    IMAGE = auto()
+    AUDIO = auto()
+    VIDEO = auto()
+    BINARY = auto()
+    MULTIMODAL = auto()
+
+
+@dataclass
+class ModalityInfo:
+    """Information about detected modality."""
+
+    modality: Modality
+    confidence: float
+    metadata: Dict[str, any]
+    sub_modalities: Optional[List["ModalityInfo"]] = None
+
+
+class ModalityDetector:
+    """Detects data modalities from byte sequences."""
+
+    # Common file signatures (magic numbers)
+    SIGNATURES = {
+        # Images
+        b"\xFF\xD8\xFF": "JPEG",
+        b"\x89PNG\r\n\x1a\n": "PNG",
+        b"GIF87a": "GIF",
+        b"GIF89a": "GIF",
+        b"RIFF": "WEBP",
+        # Audio
+        b"RIFF....WAVE": "WAV",
+        b"ID3": "MP3",
+        b"\xFF\xFB": "MP3",
+        b"OggS": "OGG",
+        # Video
+        b"\x00\x00\x00\x18ftypmp42": "MP4",
+        b"\x00\x00\x00\x1Cftypav01": "MP4",
+        b"\x1A\x45\xDF\xA3": "WEBM",
+    }
+
+    def __init__(self):
+        self.magic = magic.Magic(mime=True)
+
+    def _check_text_probability(self, data: bytes) -> float:
+        """Estimate probability that data is text."""
+        # Check if data is valid UTF-8
+        try:
+            data.decode("utf-8")
+            # Count printable ASCII characters
+            printable = sum(1 for b in data if 32 <= b <= 126)
+            return printable / len(data)
+        except UnicodeDecodeError:
+            return 0.0
+
+    def _check_image_validity(self, data: bytes) -> Tuple[bool, Dict]:
+        """Check if data is a valid image and extract metadata."""
+        try:
+            with io.BytesIO(data) as bio:
+                img = Image.open(bio)
+                return True, {
+                    "format": img.format,
+                    "size": img.size,
+                    "mode": img.mode,
+                }
+        except:
+            return False, {}
+
+    def _check_audio_validity(self, data: bytes) -> Tuple[bool, Dict]:
+        """Check if data is valid audio and extract metadata."""
+        try:
+            with io.BytesIO(data) as bio:
+                # Try to parse as WAV
+                with wave.open(bio) as wav:
+                    return True, {
+                        "channels": wav.getnchannels(),
+                        "sample_width": wav.getsampwidth(),
+                        "framerate": wav.getframerate(),
+                        "frames": wav.getnframes(),
+                    }
+        except:
+            # Check for other audio signatures
+            for sig in [b"ID3", b"\xFF\xFB", b"OggS"]:
+                if data.startswith(sig):
+                    return True, {"format": "compressed_audio"}
+            return False, {}
+
+    def _detect_boundaries(
+        self, data: bytes
+    ) -> List[Tuple[int, int, Modality]]:
+        """Detect boundaries between different modalities in the data."""
+        boundaries = []
+        current_pos = 0
+
+        while current_pos < len(data):
+            # Look for known signatures
+            for sig, format_type in self.SIGNATURES.items():
+                if data[current_pos:].startswith(sig):
+                    # Found a signature, determine its length
+                    if format_type in ["JPEG", "PNG", "GIF"]:
+                        # Find image end
+                        try:
+                            with io.BytesIO(
+                                data[current_pos:]
+                            ) as bio:
+                                img = Image.open(bio)
+                                img.verify()
+                                size = bio.tell()
+                                boundaries.append(
+                                    (
+                                        current_pos,
+                                        current_pos + size,
+                                        Modality.IMAGE,
+                                    )
+                                )
+                                current_pos += size
+                                continue
+                        except:
+                            pass
+
+            # Check for text sections
+            text_prob = self._check_text_probability(
+                data[current_pos : current_pos + 1024]
+            )
+            if text_prob > 0.8:
+                # Look for end of text section
+                end_pos = current_pos + 1
+                while end_pos < len(data):
+                    if (
+                        self._check_text_probability(
+                            data[end_pos : end_pos + 32]
+                        )
+                        < 0.5
+                    ):
+                        break
+                    end_pos += 1
+                boundaries.append(
+                    (current_pos, end_pos, Modality.TEXT)
+                )
+                current_pos = end_pos
+                continue
+
+            current_pos += 1
+
+        return boundaries
+
+    def detect_modality(self, data: bytes) -> ModalityInfo:
+        """Detect modality of byte sequence."""
+        # First check for single modality
+        mime_type = self.magic.from_buffer(data)
+
+        # Check text
+        text_prob = self._check_text_probability(data)
+        if text_prob > 0.9:
+            return ModalityInfo(
+                modality=Modality.TEXT,
+                confidence=text_prob,
+                metadata={"mime_type": mime_type},
+            )
+
+        # Check image
+        is_image, image_meta = self._check_image_validity(data)
+        if is_image:
+            return ModalityInfo(
+                modality=Modality.IMAGE,
+                confidence=1.0,
+                metadata={**image_meta, "mime_type": mime_type},
+            )
+
+        # Check audio
+        is_audio, audio_meta = self._check_audio_validity(data)
+        if is_audio:
+            return ModalityInfo(
+                modality=Modality.AUDIO,
+                confidence=1.0,
+                metadata={**audio_meta, "mime_type": mime_type},
+            )
+
+        # Check for multimodal content
+        boundaries = self._detect_boundaries(data)
+        if len(boundaries) > 1:
+            sub_modalities = []
+            for start, end, modality in boundaries:
+                chunk_data = data[start:end]
+                sub_info = self.detect_modality(chunk_data)
+                if sub_info.modality != Modality.BINARY:
+                    sub_modalities.append(sub_info)
+
+            if sub_modalities:
+                return ModalityInfo(
+                    modality=Modality.MULTIMODAL,
+                    confidence=0.8,
+                    metadata={"mime_type": "multipart/mixed"},
+                    sub_modalities=sub_modalities,
+                )
+
+        # Default to binary
+        return ModalityInfo(
+            modality=Modality.BINARY,
+            confidence=0.5,
+            metadata={"mime_type": mime_type},
+        )
+
+    def split_modalities(
+        self, data: bytes
+    ) -> List[Tuple[Modality, bytes, Dict]]:
+        """Split multimodal data into separate modalities."""
+        boundaries = self._detect_boundaries(data)
+        result = []
+
+        for start, end, modality in boundaries:
+            chunk = data[start:end]
+            info = self.detect_modality(chunk)
+            result.append((modality, chunk, info.metadata))
+
+        return result
+
+
+class AutoDetectBytesDecoder:
+    """Decoder that automatically detects and decodes different modalities."""
+
+    def __init__(self):
+        self.detector = ModalityDetector()
+        self.text_decoder = ByteDetokenizer()  # From previous example
+
+    def decode(
+        self, data: bytes
+    ) -> Union[str, Image.Image, np.ndarray, List[any]]:
+        """Automatically detect and decode byte sequence."""
+        info = self.detector.detect_modality(data)
+
+        if info.modality == Modality.MULTIMODAL:
+            # Handle multimodal content
+            parts = self.detector.split_modalities(data)
+            return [
+                self.decode(chunk) for modality, chunk, _ in parts
+            ]
+
+        if info.modality == Modality.TEXT:
+            return self.text_decoder.decode_text(data)
+        elif info.modality == Modality.IMAGE:
+            return self.text_decoder.decode_image(data)
+        elif info.modality == Modality.AUDIO:
+            return self.text_decoder.decode_audio(data)
+        else:
+            return data
+
+
+# # Example usage
+# def demo_auto_detection():
+#     """Demonstrate auto modality detection."""
+#     # Create mixed content
+#     text = "Hello, World!".encode('utf-8')
+
+#     # Create a small test image
+#     img = Image.new('RGB', (100, 100), color='red')
+#     img_bytes = io.BytesIO()
+#     img.save(img_bytes, format='PNG')
+
+#     # Combine into multimodal content
+#     mixed_content = text + img_bytes.getvalue()
+
+#     # Initialize decoder
+#     decoder = AutoDetectBytesDecoder()
+
+#     # Decode
+#     result = decoder.decode(mixed_content)
+
+#     if isinstance(result, list):
+#         print("Detected multimodal content:")
+#         for i, part in enumerate(result):
+#             print(f"Part {i+1}: {type(part)}")
+
+# if __name__ == "__main__":
+#     demo_auto_detection()
+
+
+def tensor_to_data(tensor: Tensor):
+    byte_sequence = ByteDetokenizer.tensor_to_bytes(tensor)
+
+    # Initialize auto-detector
+    decoder = AutoDetectBytesDecoder()
+
+    # Decode with automatic detection
+    result = decoder.decode(byte_sequence)
+
+    return result
+
+
+def demo_byte_predictor():
+    """Demo with smaller dimensions to test."""
+    # Initialize model configuration with adjusted dimensions
+    config = ModelConfig(
+        vocab_size=256,
+        hidden_size=128,  # Smaller for testing
+        num_layers=2,  # Fewer layers for testing
+        num_key_value_heads=2,
+        num_query_heads=4,
+        dropout=0.1,
+        max_sequence_length=1024,
+    )
+
+    # Initialize model
+    model = EnhancedBytePredictor(config)
+    logger.info("Model initialized")
+
+    # Move to GPU if available
+    device = torch.device(
+        "cuda" if torch.cuda.is_available() else "cpu"
+    )
+    model = model.to(device)
+    logger.info(f"Using device: {device}")
+
+    # Create sample input data
+    batch_size = 2
+    seq_length = 16  # Shorter sequence for testing
+    input_ids = torch.randint(
+        0, config.vocab_size, (batch_size, seq_length), device=device
+    )
+    logger.info(f"Created input tensor of shape: {input_ids.shape}")
+
+    # Test forward pass
+    try:
+        logits = model(input_ids)
+        logger.info(
+            f"Forward pass successful! Output shape: {logits.shape}"
+        )
+
+        # Test loss computation
+        target_ids = torch.randint(
+            0,
+            config.vocab_size,
+            (batch_size, seq_length),
+            device=device,
+        )
+        loss = model.compute_loss(input_ids, target_ids)
+        logger.info(
+            f"Loss computation successful! Loss value: {loss.item():.4f}"
+        )
+
+        # Test generation
+        prompt = torch.randint(
+            0,
+            config.vocab_size,
+            (1, 4),  # Very short prompt for testing
+            device=device,
+        )
+        generated = model.generate(
+            prompt, max_new_tokens=8, temperature=0.8, top_k=50
+        )
+        logger.info(
+            f"Generation successful! Generated shape: {generated.shape}"
+        )
+
+    except Exception as e:
+        logger.error(f"Error during execution: {str(e)}")
+        raise
+
+
+if __name__ == "__main__":
+    # Set up logging
+    # logger.remove()  # Remove default handler
+    # logger.add(sys.stderr, format="<green>{time:HH:mm:ss}</green> | {level} | {message}")
+
+    demo_byte_predictor()
diff --git a/new_features_examples/auto_agent.py b/new_features_examples/auto_agent.py
new file mode 100644
index 00000000..712be089
--- /dev/null
+++ b/new_features_examples/auto_agent.py
@@ -0,0 +1,188 @@
+import json
+import os
+from contextlib import suppress
+from typing import Any, Callable, Dict, Optional, Type, Union
+
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field, ValidationError, create_model
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
+
+
+class DynamicParser:
+    @staticmethod
+    def extract_fields(model: Type[BaseModel]) -> Dict[str, Any]:
+        return {
+            field_name: (field.annotation, ... if field.is_required() else None)
+            for field_name, field in model.model_fields.items()
+        }
+    
+    @staticmethod
+    def create_partial_model(model: Type[BaseModel], data: Dict[str, Any]) -> Type[BaseModel]:
+        fields = {
+            field_name: (field.annotation, ... if field.is_required() else None)
+            for field_name, field in model.model_fields.items()
+            if field_name in data
+        }
+        return create_model(f"Partial{model.__name__}", **fields)
+
+    @classmethod
+    def parse(cls, data: Union[str, Dict[str, Any]], model: Type[BaseModel]) -> Optional[BaseModel]:
+        if isinstance(data, str):
+            try:
+                data = json.loads(data)
+            except json.JSONDecodeError:
+                return None
+
+        # Try full model first
+        with suppress(ValidationError):
+            return model.model_validate(data)
+
+        # Create and try partial model
+        partial_model = cls.create_partial_model(model, data)
+        with suppress(ValidationError):
+            return partial_model.model_validate(data)
+
+        return None
+
+
+load_dotenv()
+
+# Define the Thoughts schema
+class Thoughts(BaseModel):
+    text: str = Field(..., description="Current thoughts or observations regarding the task.")
+    reasoning: str = Field(..., description="Logical reasoning behind the thought process.")
+    plan: str = Field(..., description="A short bulleted list that conveys the immediate and long-term plan.")
+    criticism: str = Field(..., description="Constructive self-criticism to improve future responses.")
+    speak: str = Field(..., description="A concise summary of thoughts intended for the user.")
+
+# Define the Command schema
+class Command(BaseModel):
+    name: str = Field(..., description="Command name to execute from the provided list of commands.")
+    args: Dict[str, Any] = Field(..., description="Arguments required to execute the command.")
+
+# Define the AgentResponse schema
+class AgentResponse(BaseModel):
+    thoughts: Thoughts = Field(..., description="The agent's current thoughts and reasoning.")
+    command: Command = Field(..., description="The command to execute along with its arguments.")
+    
+    
+
+# Define tool functions
+def fluid_api_command(task: str):
+    """Execute a fluid API request."""
+    # response = fluid_api_request(task)
+    print(response.model_dump_json(indent=4))
+    return response
+
+
+def send_tweet_command(text: str):
+    """Simulate sending a tweet."""
+    print(f"Tweet sent: {text}")
+    return {"status": "success", "message": f"Tweet sent: {text}"}
+
+
+def do_nothing_command():
+    """Do nothing."""
+    print("Doing nothing...")
+    return {"status": "success", "message": "No action taken."}
+
+
+def task_complete_command(reason: str):
+    """Mark the task as complete and provide a reason."""
+    print(f"Task completed: {reason}")
+    return {"status": "success", "message": f"Task completed: {reason}"}
+
+
+# Dynamic command execution
+def execute_command(name: str, args: Dict[str, Any]):
+    """Dynamically execute a command based on its name and arguments."""
+    command_map: Dict[str, Callable] = {
+        "fluid_api": lambda **kwargs: fluid_api_command(task=kwargs.get("task")),
+        "send_tweet": lambda **kwargs: send_tweet_command(text=kwargs.get("text")),
+        "do_nothing": lambda **kwargs: do_nothing_command(),
+        "task_complete": lambda **kwargs: task_complete_command(reason=kwargs.get("reason")),
+    }
+
+    if name not in command_map:
+        raise ValueError(f"Unknown command: {name}")
+
+    # Execute the command with the provided arguments
+    return command_map[name](**args)
+
+
+def parse_and_execute_command(response: Union[str, Dict[str, Any]], base_model: Type[BaseModel] = AgentResponse) -> Any:
+    """Enhanced command parser with flexible input handling"""
+    parsed = DynamicParser.parse(response, base_model)
+    if not parsed:
+        raise ValueError("Failed to parse response")
+        
+    if hasattr(parsed, 'command'):
+        command_name = parsed.command.name
+        command_args = parsed.command.args
+        return execute_command(command_name, command_args)
+    
+    return parsed
+
+
+ainame = "AutoAgent"
+userprovided = "assistant"
+    
+SYSTEM_PROMPT = f"""
+You are {ainame}, an advanced and autonomous {userprovided}.
+Your role is to make decisions and complete tasks independently without seeking user assistance. Leverage your strengths as an LLM to solve tasks efficiently, adhering strictly to the commands and resources provided.
+
+### GOALS:
+1. {userprovided}
+2. Execute tasks with precision and efficiency.
+3. Ensure outputs are actionable and aligned with the user's objectives.
+4. Continuously optimize task strategies for maximum effectiveness.
+5. Maintain reliability and consistency in all responses.
+
+### CONSTRAINTS:
+1. Memory limit: ~4000 words for short-term memory. Save essential information to files immediately to avoid loss.
+2. Independent decision-making: Do not rely on user assistance.
+3. Exclusively use commands in double quotes (e.g., "command name").
+4. Use subprocesses for commands that may take longer than a few minutes.
+5. Ensure all outputs strictly adhere to the specified JSON response format.
+
+### COMMANDS:
+1. Fluid API: "fluid_api", args: "method": "<GET/POST/...>", "url": "<url>", "headers": "<headers>", "body": "<payload>"
+18. Send Tweet: "send_tweet", args: "text": "<text>"
+19. Do Nothing: "do_nothing", args: 
+20. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
+
+### RESOURCES:
+1. Internet access for real-time information and data gathering.
+2. Long-term memory management for storing critical information.
+3. Access to GPT-3.5-powered Agents for delegating tasks.
+4. File handling capabilities for output storage and retrieval.
+
+### PERFORMANCE EVALUATION:
+1. Continuously analyze and reflect on actions to ensure optimal task completion.
+2. Self-critique decisions and strategies constructively to identify areas for improvement.
+3. Ensure every command serves a clear purpose and minimizes resource usage.
+4. Complete tasks in the least number of steps, balancing speed and accuracy.
+
+### RESPONSE FORMAT:
+Always respond in a strict JSON format as described below. Ensure your responses can be parsed with Python's `json.loads`:
+"""
+
+# Initialize the OpenAIFunctionCaller
+model = OpenAIFunctionCaller(
+    system_prompt=SYSTEM_PROMPT,
+    max_tokens=4000,
+    temperature=0.9,
+    base_model=AgentResponse,  # Pass the Pydantic schema as the base model
+    parallel_tool_calls=False,
+    openai_api_key=os.getenv("OPENAI_API_KEY")
+)
+
+# Example usage
+user_input = (
+    "Analyze the provided Python code for inefficiencies, generate suggestions for improvements, "
+    "and provide optimized code."
+)
+
+response = model.run(user_input)
+response = parse_and_execute_command(response)
+print(response)
diff --git a/new_features_examples/multi_tool_usage_agent.py b/new_features_examples/multi_tool_usage_agent.py
index 44577528..1af421e2 100644
--- a/new_features_examples/multi_tool_usage_agent.py
+++ b/new_features_examples/multi_tool_usage_agent.py
@@ -1,5 +1,5 @@
 import os
-from typing import List, Dict, Any, Optional, Callable
+from typing import List, Dict, Any, Optional, Callable, get_type_hints
 from dataclasses import dataclass, field
 import json
 from datetime import datetime
@@ -111,6 +111,9 @@ class ExecutionContext:
     history: List[Dict[str, Any]] = field(default_factory=list)
 
 
+hints = get_type_hints(func)
+
+
 class ToolAgent:
     def __init__(
         self,
diff --git a/pyproject.toml b/pyproject.toml
index 5102f0d2..fc04ecc5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,7 +86,7 @@ swarms = "swarms.cli.main:main"
 
 [tool.poetry.group.lint.dependencies]
 black = ">=23.1,<25.0"
-ruff = ">=0.5.1,<0.7.4"
+ruff = ">=0.5.1,<0.8.2"
 types-toml = "^0.10.8.1"
 types-pytz = ">=2023.3,<2025.0"
 types-chardet = "^5.0.4.6"
diff --git a/simple_example.py b/simple_example.py
index 3521c677..2fcbb8f9 100644
--- a/simple_example.py
+++ b/simple_example.py
@@ -3,7 +3,5 @@ from swarms import Agent
 Agent(
     agent_name="Stock-Analysis-Agent",
     model_name="gpt-4o-mini",
-    max_loops="auto",
-    streaming_on=True,
-    interactive=True,
+    max_loops=1,
 ).run("What are 5 hft algorithms")
diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py
index ca2ac120..48c4ff63 100644
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@@ -771,8 +771,11 @@ class Agent:
         self,
         task: Optional[str] = None,
         img: Optional[str] = None,
+        speech: Optional[str] = None,
+        video: Optional[str] = None,
         is_last: Optional[bool] = False,
         print_task: Optional[bool] = False,
+        generate_speech: Optional[bool] = False,
         *args,
         **kwargs,
     ) -> Any:
@@ -2294,12 +2297,13 @@ class Agent:
         self,
         task: Optional[str] = None,
         img: Optional[str] = None,
-        device: str = "cpu",  # gpu
-        device_id: int = 0,
-        all_cores: bool = True,
+        device: Optional[str] = "cpu",  # gpu
+        device_id: Optional[int] = 0,
+        all_cores: Optional[bool] = True,
         scheduled_run_date: Optional[datetime] = None,
-        do_not_use_cluster_ops: bool = False,
-        all_gpus: bool = False,
+        do_not_use_cluster_ops: Optional[bool] = False,
+        all_gpus: Optional[bool] = False,
+        generate_speech: Optional[bool] = False,
         *args,
         **kwargs,
     ) -> Any:
@@ -2346,7 +2350,12 @@ class Agent:
             # If cluster ops disabled, run directly
             if do_not_use_cluster_ops is True:
                 logger.info("Running without cluster operations")
-                return self._run(task=task, img=img, *args, **kwargs)
+                return self._run(
+                    task=task,
+                    img=img,
+                    generate_speech=generate_speech * args,
+                    **kwargs,
+                )
 
             else:
                 return exec_callable_with_clusterops(
@@ -2357,6 +2366,7 @@ class Agent:
                     func=self._run,
                     task=task,
                     img=img,
+                    generate_speech=generate_speech,
                     *args,
                     **kwargs,
                 )
diff --git a/swarms/utils/openai_tts.py b/swarms/utils/openai_tts.py
new file mode 100644
index 00000000..3cfcbd05
--- /dev/null
+++ b/swarms/utils/openai_tts.py
@@ -0,0 +1,73 @@
+import os
+from loguru import logger
+import pygame
+import requests
+import tempfile
+from openai import OpenAI
+
+
+class OpenAITTS:
+    """
+    A class to interact with OpenAI API and play the generated audio with improved streaming capabilities.
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.client = OpenAI(
+            api_key=os.getenv("OPENAI_API_KEY"), *args, **kwargs
+        )
+        pygame.init()
+
+    def run(
+        self, task: str, play_sound: bool = True, *args, **kwargs
+    ):
+        """
+        Run a task with the OpenAI API and optionally play the generated audio with improved streaming.
+
+        Args:
+            task (str): The task to be executed.
+            play_sound (bool): If True, play the generated audio.
+
+        Returns:
+            None
+        """
+        try:
+            response = self.client.audio.speech.create(
+                model="tts-1",
+                voice="nova",
+                input=task,
+                *args,
+                **kwargs,
+            )
+            audio_url = response["url"]
+            logger.info("Task completed successfully.")
+
+            if play_sound:
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=".mp3"
+                ) as tmp_file:
+                    with requests.get(audio_url, stream=True) as r:
+                        r.raise_for_status()
+                        for chunk in r.iter_content(chunk_size=8192):
+                            tmp_file.write(chunk)
+                    pygame.mixer.music.load(tmp_file.name)
+                    pygame.mixer.music.play()
+                    while pygame.mixer.music.get_busy():
+                        pygame.time.Clock().tick(10)
+        except Exception as e:
+            logger.error(f"Error during task execution: {str(e)}")
+
+
+# client = OpenAITTS(api_key=os.getenv("OPENAI_API_KEY"))
+# client.run("Hello world! This is a streaming test.", play_sound=True)
+
+
+def text_to_speech(
+    task: str, play_sound: bool = True, *args, **kwargs
+):
+    out = OpenAITTS().run(
+        task, play_sound=play_sound, *args, **kwargs
+    )
+    return out
+
+
+# print(text_to_speech(task="hello"))