diff --git a/README.md b/README.md
index 51d389bb..4a1a229a 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,8 @@ Features:
 
 ```python
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
@@ -179,7 +180,8 @@ agent.run(
 An LLM equipped with long term memory and tools, a full stack agent capable of automating all and any digital tasks given a good prompt.
 
 ```python
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 import subprocess
 import os
@@ -848,7 +850,8 @@ An all-new swarm architecture that makes it easy to manage and oversee the outpu
 
 ```python
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/agent_with_rag_and_tools.py b/agent_with_rag_and_tools.py
index 0200b19e..f278c173 100644
--- a/agent_with_rag_and_tools.py
+++ b/agent_with_rag_and_tools.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 import subprocess
 import os
diff --git a/company_swarm_example 2.py b/company_swarm_example 2.py
deleted file mode 100644
index 213d39c4..00000000
--- a/company_swarm_example 2.py	
+++ /dev/null
@@ -1,100 +0,0 @@
-import os
-
-from swarms import Agent, OpenAIChat
-from swarms.structs.company import Company
-
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(
-    api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
-
-# Initialize the boss agent (Director)
-boss_agent = Agent(
-    agent_name="BossAgent",
-    system_prompt="""
-    You are the BossAgent responsible for managing and overseeing a swarm of agents analyzing company expenses. 
-    Your job is to dynamically assign tasks, prioritize their execution, and ensure that all agents collaborate efficiently. 
-    After receiving a report on the company's expenses, you will break down the work into smaller tasks, 
-    assigning specific tasks to each agent, such as detecting recurring high costs, categorizing expenditures, 
-    and identifying unnecessary transactions. Ensure the results are communicated back in a structured way 
-    so the finance team can take actionable steps to cut off unproductive spending. You also monitor and 
-    dynamically adapt the swarm to optimize their performance. Finally, you summarize their findings 
-    into a coherent report.
-    """,
-    llm=model,
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="boss_agent.json",
-)
-
-# Initialize worker 1: Expense Analyzer
-worker1 = Agent(
-    agent_name="ExpenseAnalyzer",
-    system_prompt="""
-    Your task is to carefully analyze the company's expense data provided to you. 
-    You will focus on identifying high-cost recurring transactions, categorizing expenditures 
-    (e.g., marketing, operations, utilities, etc.), and flagging areas where there seems to be excessive spending. 
-    You will provide a detailed breakdown of each category, along with specific recommendations for cost-cutting. 
-    Pay close attention to monthly recurring subscriptions, office supplies, and non-essential expenditures.
-    """,
-    llm=model,
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="worker1.json",
-)
-
-# Initialize worker 2: Summary Generator
-worker2 = Agent(
-    agent_name="SummaryGenerator",
-    system_prompt="""
-    After receiving the detailed breakdown from the ExpenseAnalyzer, 
-    your task is to create a concise summary of the findings. You will focus on the most actionable insights, 
-    such as highlighting the specific transactions that can be immediately cut off and summarizing the areas 
-    where the company is overspending. Your summary will be used by the BossAgent to generate the final report.
-    Be clear and to the point, emphasizing the urgency of cutting unnecessary expenses.
-    """,
-    llm=model,
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="worker2.json",
-)
-
-# Swarm-Level Prompt (Collaboration Prompt)
-swarm_prompt = """
-    As a swarm, your collective goal is to analyze the company's expenses and identify transactions that should be cut off. 
-    You will work collaboratively to break down the entire process of expense analysis into manageable steps. 
-    The BossAgent will direct the flow and assign tasks dynamically to the agents. The ExpenseAnalyzer will first 
-    focus on breaking down the expense report, identifying high-cost recurring transactions, categorizing them, 
-    and providing recommendations for potential cost reduction. After the analysis, the SummaryGenerator will then 
-    consolidate all the findings into an actionable summary that the finance team can use to immediately cut off unnecessary expenses. 
-    Together, your collaboration is essential to streamlining and improving the company’s financial health.
-"""
-
-# Create a list of agents
-agents = [boss_agent, worker1, worker2]
-
-
-# Create an organization chart
-org_chart = [[boss_agent], [worker1, worker2]]
-
-# Create a company
-company = Company(org_chart=org_chart)
-
-# Run the company
-company.run()
diff --git a/company_swarm_example.py b/company_swarm_example.py
index 213d39c4..dd8ac1c6 100644
--- a/company_swarm_example.py
+++ b/company_swarm_example.py
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.company import Company
 
 # Get the OpenAI API key from the environment variable
diff --git a/docs/applications/discord.md b/docs/applications/discord.md
index dd7de16c..e2d0be5b 100644
--- a/docs/applications/discord.md
+++ b/docs/applications/discord.md
@@ -63,7 +63,7 @@ Starts the bot using the `DISCORD_TOKEN` from the `.env` file.
 Initialize the `llm` (Language Learning Model) with your OpenAI API key:
 
 ```python
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 
 llm = OpenAIChat(
     openai_api_key="Your_OpenAI_API_Key",
diff --git a/docs/swarms/models/anthropic.md b/docs/swarms/models/anthropic.md
index 438adfbe..f8fa037e 100644
--- a/docs/swarms/models/anthropic.md
+++ b/docs/swarms/models/anthropic.md
@@ -69,7 +69,7 @@ class Anthropic:
 
 ```python
 # Import necessary modules and classes
-from swarms.models import Anthropic
+from swarm_models import Anthropic
 
 # Initialize an instance of the Anthropic class
 model = Anthropic(anthropic_api_key="")
diff --git a/docs/swarms/models/base_llm.md b/docs/swarms/models/base_llm.md
index 0c678165..c3ec89ce 100644
--- a/docs/swarms/models/base_llm.md
+++ b/docs/swarms/models/base_llm.md
@@ -164,7 +164,7 @@ To demonstrate how to use the `BaseLLM` interface, let's create an example using
 
 ```python
 # Import the BaseLLM class
-from swarms.models import BaseLLM
+from swarm_models import BaseLLM
 
 # Create an instance of the language model
 language_model = BaseLLM(
diff --git a/docs/swarms/models/base_multimodal_model.md b/docs/swarms/models/base_multimodal_model.md
index c1a8373d..fb0f45ae 100644
--- a/docs/swarms/models/base_multimodal_model.md
+++ b/docs/swarms/models/base_multimodal_model.md
@@ -31,7 +31,7 @@ pip install swarms
 To get started with Swarms, you'll need to import the library and create an instance of the `BaseMultiModalModel` class. This class serves as the foundation for running multimodal models.
 
 ```python
-from swarms.models import BaseMultiModalModel
+from swarm_models import BaseMultiModalModel
 
 model = BaseMultiModalModel(
     model_name="your_model_name",
@@ -138,7 +138,7 @@ Let's explore some usage examples of the MultiModalAI library:
 
 ```python
 # Import the library
-from swarms.models import BaseMultiModalModel
+from swarm_models import BaseMultiModalModel
 
 # Create an instance of the model
 model = BaseMultiModalModel(
@@ -159,7 +159,7 @@ print(response)
 
 ```python
 # Import the library
-from swarms.models import BaseMultiModalModel
+from swarm_models import BaseMultiModalModel
 
 # Create an instance of the model
 model = BaseMultiModalModel(
@@ -184,7 +184,7 @@ for response in responses:
 
 ```python
 # Import the library
-from swarms.models import BaseMultiModalModel
+from swarm_models import BaseMultiModalModel
 
 # Create an instance of the model
 model = BaseMultiModalModel(
@@ -209,7 +209,7 @@ for response in responses:
 
 ### Example 4: Inheriting `BaseMultiModalModel` for it's prebuilt classes
 ```python
-from swarms.models import BaseMultiModalModel
+from swarm_models import BaseMultiModalModel
 
 
 class CustomMultiModalModel(BaseMultiModalModel):
diff --git a/docs/swarms/models/dalle3.md b/docs/swarms/models/dalle3.md
index 346489c7..e847ef04 100644
--- a/docs/swarms/models/dalle3.md
+++ b/docs/swarms/models/dalle3.md
@@ -36,7 +36,7 @@ pip install swarms
 Let's get started with a quick example of using the Dalle3 library to generate an image from a text prompt:
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class
 dalle = Dalle3()
@@ -97,7 +97,7 @@ Returns:
 ### Example 1: Basic Image Generation
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class
 dalle3 = Dalle3()
@@ -115,7 +115,7 @@ print(image_url)
 ### Example 2: Creating Image Variations
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class
 dalle3 = Dalle3()
@@ -137,7 +137,7 @@ Certainly! Here are additional examples that cover various edge cases and method
 You can customize the size of the generated image by specifying the `size` parameter when creating an instance of the `Dalle3` class. Here's how to generate a smaller image:
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class with a custom image size
 dalle3 = Dalle3(size="512x512")
@@ -157,7 +157,7 @@ print(image_url)
 You can adjust the maximum number of API request retries using the `max_retries` parameter. Here's how to increase the retry limit:
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class with a higher retry limit
 dalle3 = Dalle3(max_retries=5)
@@ -177,7 +177,7 @@ print(image_url)
 To create variations of an existing image, you can use the `create_variations` method. Here's an example:
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class
 dalle3 = Dalle3()
@@ -197,7 +197,7 @@ print(variations_url)
 The Dalle3 library provides error handling for API-related issues. Here's how to handle and display API errors:
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class
 dalle3 = Dalle3()
@@ -218,7 +218,7 @@ except Exception as e:
 You can customize the quality of the generated image by specifying the `quality` parameter. Here's how to generate a high-quality image:
 
 ```python
-from swarms.models.dalle3 import Dalle3
+from swarm_models.dalle3 import Dalle3
 
 # Create an instance of the Dalle3 class with high quality
 dalle3 = Dalle3(quality="high")
diff --git a/docs/swarms/models/distilled_whisperx.md b/docs/swarms/models/distilled_whisperx.md
index 79c8c2ea..2718eb71 100644
--- a/docs/swarms/models/distilled_whisperx.md
+++ b/docs/swarms/models/distilled_whisperx.md
@@ -23,7 +23,7 @@ The `DistilWhisperModel` class is initialized with the following parameters:
 Example of initialization:
 
 ```python
-from swarms.models import DistilWhisperModel
+from swarm_models import DistilWhisperModel
 
 # Initialize with default model
 model_wrapper = DistilWhisperModel()
diff --git a/docs/swarms/models/fuyu.md b/docs/swarms/models/fuyu.md
index e54a4a22..fd90f79a 100644
--- a/docs/swarms/models/fuyu.md
+++ b/docs/swarms/models/fuyu.md
@@ -37,7 +37,7 @@ To use Fuyu, follow these steps:
 1. Initialize the Fuyu instance:
 
 ```python
-from swarms.models.fuyu import Fuyu
+from swarm_models.fuyu import Fuyu
 
 fuyu = Fuyu()
 ```
@@ -54,7 +54,7 @@ output_text = fuyu(text, img_path)
 ### Example 2 - Text Generation
 
 ```python
-from swarms.models.fuyu import Fuyu
+from swarm_models.fuyu import Fuyu
 
 fuyu = Fuyu()
 
diff --git a/docs/swarms/models/gemini.md b/docs/swarms/models/gemini.md
index d5b1b44a..012bc7dc 100644
--- a/docs/swarms/models/gemini.md
+++ b/docs/swarms/models/gemini.md
@@ -78,7 +78,7 @@ class Gemini(BaseMultiModalModel):
    **Examples**:
 
    ```python
-   from swarms.models import Gemini
+   from swarm_models import Gemini
 
    # Initialize the Gemini model
    gemini = Gemini()
@@ -128,7 +128,7 @@ class Gemini(BaseMultiModalModel):
    **Examples**:
 
    ```python
-   from swarms.models.gemini import Gemini
+   from swarm_models.gemini import Gemini
 
    # Initialize the Gemini model
    gemini = Gemini()
diff --git a/docs/swarms/models/gpt4v.md b/docs/swarms/models/gpt4v.md
index 5ad80cd9..4240fe3b 100644
--- a/docs/swarms/models/gpt4v.md
+++ b/docs/swarms/models/gpt4v.md
@@ -53,7 +53,7 @@ When initializing the `GPT4VisionAPI` class, you have the option to provide the
 Here's how you can initialize the `GPT4VisionAPI` class:
 
 ```python
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 
 # Initialize with default API key and max_tokens
 api = GPT4VisionAPI()
@@ -129,7 +129,7 @@ Let's explore some usage examples of the `GPT4VisionAPI` module to better unders
 In this example, we'll use the module with the default API key and maximum tokens to analyze an image.
 
 ```python
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 
 # Initialize with default API key and max_tokens
 api = GPT4VisionAPI()
@@ -150,7 +150,7 @@ print(response)
 If you have a custom API key, you can initialize the module with it as shown in this example.
 
 ```python
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 
 # Initialize with custom API key and max_tokens
 custom_api_key = "your_custom_api_key"
@@ -172,7 +172,7 @@ print(response)
 You can also customize the maximum token limit when initializing the module. In this example, we set it to 1000 tokens.
 
 ```python
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 
 # Initialize with default API key and custom max_tokens
 api = GPT4VisionAPI(max_tokens=1000)
diff --git a/docs/swarms/models/huggingface.md b/docs/swarms/models/huggingface.md
index 50aaa2a1..45c9b535 100644
--- a/docs/swarms/models/huggingface.md
+++ b/docs/swarms/models/huggingface.md
@@ -93,7 +93,7 @@ Here are three ways to use the `HuggingfaceLLM` class:
 #### Example 1: Basic Usage
 
 ```python
-from swarms.models import HuggingfaceLLM
+from swarm_models import HuggingfaceLLM
 
 # Initialize the HuggingfaceLLM instance with a model ID
 model_id = "NousResearch/Nous-Hermes-2-Vision-Alpha"
@@ -108,7 +108,7 @@ print(generated_text)
 #### Example 2: Custom Configuration
 
 ```python
-from swarms.models import HuggingfaceLLM
+from swarm_models import HuggingfaceLLM
 
 # Initialize with custom configuration
 custom_config = {
@@ -129,7 +129,7 @@ print(generated_text)
 #### Example 3: Distributed Processing
 
 ```python
-from swarms.models import HuggingfaceLLM
+from swarm_models import HuggingfaceLLM
 
 # Initialize for distributed processing
 inference = HuggingfaceLLM(model_id="gpt2-medium", distributed=True)
diff --git a/docs/swarms/models/idefics.md b/docs/swarms/models/idefics.md
index 57125038..30ad0b2e 100644
--- a/docs/swarms/models/idefics.md
+++ b/docs/swarms/models/idefics.md
@@ -28,7 +28,7 @@ To use Idefics, follow these steps:
 1. Initialize the Idefics instance:
 
 ```python
-from swarms.models import Idefics
+from swarm_models import Idefics
 
 model = Idefics()
 ```
@@ -46,7 +46,7 @@ print(response)
 ### Example 1 - Image Questioning
 
 ```python
-from swarms.models import Idefics
+from swarm_models import Idefics
 
 model = Idefics()
 prompts = [
@@ -59,7 +59,7 @@ print(response)
 ### Example 2 - Bidirectional Conversation
 
 ```python
-from swarms.models import Idefics
+from swarm_models import Idefics
 
 model = Idefics()
 user_input = "User: What is in this image? https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
diff --git a/docs/swarms/models/kosmos.md b/docs/swarms/models/kosmos.md
index a19ea791..6631e94e 100644
--- a/docs/swarms/models/kosmos.md
+++ b/docs/swarms/models/kosmos.md
@@ -22,7 +22,7 @@ To use Kosmos, follow these steps:
 1. Initialize the Kosmos instance:
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 ```
@@ -38,7 +38,7 @@ kosmos.multimodal_grounding(
 ### Example 1 - Multimodal Grounding
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -58,7 +58,7 @@ kosmos.referring_expression_comprehension(
 ### Example 2 - Referring Expression Comprehension
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -78,7 +78,7 @@ kosmos.referring_expression_generation(
 ### Example 3 - Referring Expression Generation
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -96,7 +96,7 @@ kosmos.grounded_vqa("What is the color of the car?", "https://example.com/car.jp
 ### Example 4 - Grounded Visual Question Answering
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -112,7 +112,7 @@ kosmos.grounded_image_captioning("https://example.com/beach.jpg")
 ### Example 5 - Grounded Image Captioning
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -128,7 +128,7 @@ kosmos.grounded_image_captioning_detailed("https://example.com/beach.jpg")
 ### Example 6 - Detailed Grounded Image Captioning
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -149,7 +149,7 @@ kosmos.draw_entity_boxes_on_image(image, entities, show=True)
 ### Example 7 - Drawing Entity Boxes on Image
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 
@@ -176,7 +176,7 @@ image = kosmos.generate_boxes(
 ### Example 8 - Generating Boxes for Entities
 
 ```python
-from swarms.models.kosmos_two import Kosmos
+from swarm_models.kosmos_two import Kosmos
 
 kosmos = Kosmos()
 entities = [
diff --git a/docs/swarms/models/layoutlm_document_qa.md b/docs/swarms/models/layoutlm_document_qa.md
index 4c6169d0..89c66644 100644
--- a/docs/swarms/models/layoutlm_document_qa.md
+++ b/docs/swarms/models/layoutlm_document_qa.md
@@ -39,7 +39,7 @@ To use LayoutLMDocumentQA, follow these steps:
 1. Initialize the LayoutLMDocumentQA instance:
 
 ```python
-from swarms.models import LayoutLMDocumentQA
+from swarm_models import LayoutLMDocumentQA
 
 layout_lm_doc_qa = LayoutLMDocumentQA()
 ```
diff --git a/docs/swarms/models/llama3.md b/docs/swarms/models/llama3.md
index 4ae0f1ef..da1df781 100644
--- a/docs/swarms/models/llama3.md
+++ b/docs/swarms/models/llama3.md
@@ -4,7 +4,7 @@
 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-from swarms.models.base_llm import BaseLLM
+from swarm_models.base_llm import BaseLLM
 
 
 class Llama3(BaseLLM):
diff --git a/docs/swarms/models/models_available_overview.md b/docs/swarms/models/models_available_overview.md
index db2c9bdd..21ce54a7 100644
--- a/docs/swarms/models/models_available_overview.md
+++ b/docs/swarms/models/models_available_overview.md
@@ -50,7 +50,7 @@ The Anthropic model is one of the many models supported by Swarms. Here's how yo
 
 ```python
 import os
-from swarms.models import Anthropic
+from swarm_models import Anthropic
 
 # Load the environment variables
 anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
@@ -73,7 +73,7 @@ print(response)
 HuggingfaceLLM allows you to use models from Hugging Face's vast repository. Here's an example:
 
 ```python
-from swarms.models import HuggingfaceLLM
+from swarm_models import HuggingfaceLLM
 
 # Define the model ID
 model_id = "NousResearch/Yarn-Mistral-7b-128k"
@@ -97,7 +97,7 @@ The OpenAIChat model is designed for conversational tasks. Here's how to use it:
 
 ```python
 import os
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 
 # Load the environment variables
 openai_api_key = os.getenv("OPENAI_API_KEY")
diff --git a/docs/swarms/models/nougat.md b/docs/swarms/models/nougat.md
index 217990a1..6749ce74 100644
--- a/docs/swarms/models/nougat.md
+++ b/docs/swarms/models/nougat.md
@@ -43,7 +43,7 @@ To use Nougat, follow these steps:
 1. Initialize the Nougat instance:
 
 ```python
-from swarms.models import Nougat
+from swarm_models import Nougat
 
 nougat = Nougat()
 ```
diff --git a/docs/swarms/models/openai.md b/docs/swarms/models/openai.md
index ae547631..39980b4d 100644
--- a/docs/swarms/models/openai.md
+++ b/docs/swarms/models/openai.md
@@ -151,7 +151,7 @@ Here are the key attributes and their descriptions for the `BaseOpenAI` and `Ope
 
 ```python
 # Import the OpenAI class
-from swarms.models import OpenAI
+from swarm_models import OpenAI
 
 # Set your OpenAI API key
 api_key = "YOUR_API_KEY"
diff --git a/docs/swarms/models/openai_chat.md b/docs/swarms/models/openai_chat.md
index d7d9b2eb..6cdde532 100644
--- a/docs/swarms/models/openai_chat.md
+++ b/docs/swarms/models/openai_chat.md
@@ -125,7 +125,7 @@ Here are the key attributes and their descriptions for the `OpenAIChat` class:
 ### Example 1: Initializing `OpenAIChat`
 
 ```python
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 
 # Initialize OpenAIChat with model name and API key
 openai_chat = OpenAIChat(model_name="gpt-3.5-turbo", openai_api_key="YOUR_API_KEY")
diff --git a/docs/swarms/models/openai_function_caller.md b/docs/swarms/models/openai_function_caller.md
index bb952ff1..16fb6f5b 100644
--- a/docs/swarms/models/openai_function_caller.md
+++ b/docs/swarms/models/openai_function_caller.md
@@ -89,7 +89,7 @@ Here are three examples demonstrating different ways to use the `OpenAIFunctionC
 
 ```python
 import openai
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from swarms.artifacts.main_artifact import Artifact
 
 
@@ -120,7 +120,7 @@ print(out)
 ### Example 2: Prompt Generator
 
 ```python
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import Sequence
 
@@ -181,7 +181,7 @@ print(out)
 ### Example 3: Sentiment Analysis 
 
 ```python
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 
 
diff --git a/docs/swarms/models/openai_tts.md b/docs/swarms/models/openai_tts.md
index b2996312..1f797a69 100644
--- a/docs/swarms/models/openai_tts.md
+++ b/docs/swarms/models/openai_tts.md
@@ -38,7 +38,7 @@ pip install swarms requests wave
 To use the `OpenAITTS` module, you need to initialize an instance of the `OpenAITTS` class. Here's how you can do it:
 
 ```python
-from swarms.models.openai_tts import OpenAITTS
+from swarm_models.openai_tts import OpenAITTS
 
 # Initialize the OpenAITTS instance
 tts = OpenAITTS(
@@ -95,7 +95,7 @@ speech_data = tts.run_and_save("Hello, world!")
 Here's a basic example of how to use the `OpenAITTS` module to generate speech from text:
 
 ```python
-from swarms.models.openai_tts import OpenAITTS
+from swarm_models.openai_tts import OpenAITTS
 
 # Initialize the OpenAITTS instance
 tts = OpenAITTS(
diff --git a/docs/swarms/models/vilt.md b/docs/swarms/models/vilt.md
index 2cb56b22..8436ea42 100644
--- a/docs/swarms/models/vilt.md
+++ b/docs/swarms/models/vilt.md
@@ -25,7 +25,7 @@ To use the Vilt model, follow these steps:
 1. Initialize the Vilt model:
 
 ```python
-from swarms.models import Vilt
+from swarm_models import Vilt
 
 model = Vilt()
 ```
diff --git a/docs/swarms/structs/agent.md b/docs/swarms/structs/agent.md
index 212df914..c268c8b2 100644
--- a/docs/swarms/structs/agent.md
+++ b/docs/swarms/structs/agent.md
@@ -135,7 +135,8 @@ And, then now you can get started with the following:
 
 ```python
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
@@ -189,7 +190,8 @@ To integrate tools with the Swarm Agent, you can pass a list of callable functio
   - with doc strings
 
 ```python
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 import subprocess
 import os
diff --git a/docs/swarms/structs/index.md b/docs/swarms/structs/index.md
index c95b5411..af362fcf 100644
--- a/docs/swarms/structs/index.md
+++ b/docs/swarms/structs/index.md
@@ -45,7 +45,8 @@ import os
 from dotenv import load_dotenv
 
 # Import the OpenAIChat model and the Agent struct
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Load the environment variables
 load_dotenv()
@@ -71,7 +72,8 @@ agent.run("Generate a 10,000 word blog on health and wellness.")
 `Agent` equipped with quasi-infinite long term memory. Great for long document understanding, analysis, and retrieval.
 
 ```python
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB # Copy and paste the code and put it in your own local directory.
 
 # Making an instance of the ChromaDB class
@@ -327,7 +329,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.structs import Agent
 
 # Load the environment variables
diff --git a/docs/swarms/structs/spreadsheet_swarm.md b/docs/swarms/structs/spreadsheet_swarm.md
index 9cdb919c..06101128 100644
--- a/docs/swarms/structs/spreadsheet_swarm.md
+++ b/docs/swarms/structs/spreadsheet_swarm.md
@@ -189,7 +189,8 @@ swarm._save_to_csv()
 ```python
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
@@ -241,7 +242,8 @@ swarm.run(
 
 ```python
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for QR code generation
@@ -308,7 +310,8 @@ swarm.run(
 ```python
 
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/docs/swarms/structs/task.md b/docs/swarms/structs/task.md
index 3bdb461c..157ac95e 100644
--- a/docs/swarms/structs/task.md
+++ b/docs/swarms/structs/task.md
@@ -46,7 +46,7 @@ Executes the task by calling the agent or model with the specified arguments and
 
 ```python
 >>> from swarms.structs import Task, Agent
->>> from swarms.models import OpenAIChat
+>>> from swarm_models import OpenAIChat
 >>> agent = Agent(llm=OpenAIChat(openai_api_key=""), max_loops=1, dashboard=False)
 >>> task = Task(description="What's the weather in Miami?", agent=agent)
 >>> task.run()
diff --git a/docs/swarms/tools/build_tool.md b/docs/swarms/tools/build_tool.md
index fb680de6..d9aa97b8 100644
--- a/docs/swarms/tools/build_tool.md
+++ b/docs/swarms/tools/build_tool.md
@@ -470,7 +470,8 @@ import os
 
 import requests
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Get the OpenAI API key from the environment variable
 api_key = os.getenv("OPENAI_API_KEY")
diff --git a/example.py b/example.py
index d2d1b3d2..aaf45b83 100644
--- a/example.py
+++ b/example.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/agents/agent_settings.py b/examples/agents/agent_settings.py
index e21d820b..8b963026 100644
--- a/examples/agents/agent_settings.py
+++ b/examples/agents/agent_settings.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/agents/easy_example.py b/examples/agents/easy_example.py
index bebdb11a..c78cb6be 100644
--- a/examples/agents/easy_example.py
+++ b/examples/agents/easy_example.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 ## Initialize the workflow
 agent = Agent(
diff --git a/examples/agents/memory/agents_and_memory/agent_with_longterm_memory.py b/examples/agents/memory/agents_and_memory/agent_with_longterm_memory.py
index 36e32081..00a63bf5 100644
--- a/examples/agents/memory/agents_and_memory/agent_with_longterm_memory.py
+++ b/examples/agents/memory/agents_and_memory/agent_with_longterm_memory.py
@@ -3,7 +3,8 @@ import os
 from dotenv import load_dotenv
 
 # Import the OpenAIChat model and the Agent struct
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 
 # Load the environment variables
diff --git a/examples/agents/o1_preview 2.py b/examples/agents/o1_preview 2.py
index c62c9f0b..67d84d35 100644
--- a/examples/agents/o1_preview 2.py	
+++ b/examples/agents/o1_preview 2.py	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/agents/o1_preview.py b/examples/agents/o1_preview.py
index c62c9f0b..67d84d35 100644
--- a/examples/agents/o1_preview.py
+++ b/examples/agents/o1_preview.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/agents/settings/monitoring/agent_ops_tools.py b/examples/agents/settings/monitoring/agent_ops_tools.py
index 9256071d..1799459a 100644
--- a/examples/agents/settings/monitoring/agent_ops_tools.py
+++ b/examples/agents/settings/monitoring/agent_ops_tools.py
@@ -24,7 +24,8 @@ sys.path.insert(0, os.getcwd())
 ################ Adding project root to PYTHONPATH ################################
 
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from agentops import record_function
 
 
diff --git a/examples/agents/settings/various_models/custom_model_with_agent.py b/examples/agents/settings/various_models/custom_model_with_agent.py
index c0511bec..dd46076d 100644
--- a/examples/agents/settings/various_models/custom_model_with_agent.py
+++ b/examples/agents/settings/various_models/custom_model_with_agent.py
@@ -1,5 +1,5 @@
 from swarms import Agent
-from swarms.models.base_llm import BaseLLM
+from swarm_models.base_llm import BaseLLM
 
 
 # Define a custom LLM class
diff --git a/examples/agents/tools/function_calling/agent_spec_func_calling.py b/examples/agents/tools/function_calling/agent_spec_func_calling.py
index e59fd29a..ccc19b36 100644
--- a/examples/agents/tools/function_calling/agent_spec_func_calling.py
+++ b/examples/agents/tools/function_calling/agent_spec_func_calling.py
@@ -1,5 +1,5 @@
 import json
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import List
 from swarms import Agent
diff --git a/examples/agents/tools/function_calling/audience_generator_agent.py b/examples/agents/tools/function_calling/audience_generator_agent.py
index 73244577..aaf91e2a 100644
--- a/examples/agents/tools/function_calling/audience_generator_agent.py
+++ b/examples/agents/tools/function_calling/audience_generator_agent.py
@@ -1,5 +1,5 @@
 import os
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import List
 
diff --git a/examples/agents/tools/function_calling/claude_artifacts_example.py b/examples/agents/tools/function_calling/claude_artifacts_example.py
index 12a809ce..52832c36 100644
--- a/examples/agents/tools/function_calling/claude_artifacts_example.py
+++ b/examples/agents/tools/function_calling/claude_artifacts_example.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 
 
diff --git a/examples/agents/tools/function_calling/idea_generator_agent.py b/examples/agents/tools/function_calling/idea_generator_agent.py
index da19822c..3283edeb 100644
--- a/examples/agents/tools/function_calling/idea_generator_agent.py
+++ b/examples/agents/tools/function_calling/idea_generator_agent.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import List
 import json
diff --git a/examples/agents/tools/function_calling/openai_function_caller_agent_rearrange.py b/examples/agents/tools/function_calling/openai_function_caller_agent_rearrange.py
index 165d831e..c08bdb26 100644
--- a/examples/agents/tools/function_calling/openai_function_caller_agent_rearrange.py
+++ b/examples/agents/tools/function_calling/openai_function_caller_agent_rearrange.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel
 
 
diff --git a/examples/agents/tools/function_calling/openai_function_caller_example.py b/examples/agents/tools/function_calling/openai_function_caller_example.py
index c0a8f0a7..22e1169e 100644
--- a/examples/agents/tools/function_calling/openai_function_caller_example.py
+++ b/examples/agents/tools/function_calling/openai_function_caller_example.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel
 
 
diff --git a/examples/agents/tools/function_calling/prompt_generator_agent.py b/examples/agents/tools/function_calling/prompt_generator_agent.py
index cc5c2e0e..3ff9ebe5 100644
--- a/examples/agents/tools/function_calling/prompt_generator_agent.py
+++ b/examples/agents/tools/function_calling/prompt_generator_agent.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import Sequence
 
diff --git a/examples/agents/tools/function_calling/react_agent.py b/examples/agents/tools/function_calling/react_agent.py
index a810c15e..885e6f7c 100644
--- a/examples/agents/tools/function_calling/react_agent.py
+++ b/examples/agents/tools/function_calling/react_agent.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import List
 
diff --git a/examples/agents/tools/function_calling/sentiment_analysis_function_calling.py b/examples/agents/tools/function_calling/sentiment_analysis_function_calling.py
index fcc8a311..3e66605f 100644
--- a/examples/agents/tools/function_calling/sentiment_analysis_function_calling.py
+++ b/examples/agents/tools/function_calling/sentiment_analysis_function_calling.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 
 
diff --git a/examples/agents/use_cases/browser/multion/multion_example.ipynb b/examples/agents/use_cases/browser/multion/multion_example.ipynb
index a2941aa6..66da08a5 100644
--- a/examples/agents/use_cases/browser/multion/multion_example.ipynb
+++ b/examples/agents/use_cases/browser/multion/multion_example.ipynb
@@ -11,7 +11,7 @@
     "from multion.client import MultiOn\n",
     "from swarms import Agent\n",
     "import os\n",
-    "from swarms.models.base_llm import BaseLLM\n",
+    "from swarm_models.base_llm import BaseLLM\n",
     "\n",
     "def check_multion_api_key():\n",
     "    \"\"\"\n",
diff --git a/examples/agents/use_cases/code_gen/ai_research_team/json_output_v.py b/examples/agents/use_cases/code_gen/ai_research_team/json_output_v.py
index 6c90c32b..e7f76867 100644
--- a/examples/agents/use_cases/code_gen/ai_research_team/json_output_v.py
+++ b/examples/agents/use_cases/code_gen/ai_research_team/json_output_v.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from swarms import create_file_in_folder
 from swarms.utils.loguru_logger import logger
diff --git a/examples/agents/use_cases/code_gen/ai_research_team/multi_agent_hf.py b/examples/agents/use_cases/code_gen/ai_research_team/multi_agent_hf.py
index 771b51d3..65bbd02f 100644
--- a/examples/agents/use_cases/code_gen/ai_research_team/multi_agent_hf.py
+++ b/examples/agents/use_cases/code_gen/ai_research_team/multi_agent_hf.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from swarms.utils.loguru_logger import logger
 import threading
diff --git a/examples/agents/use_cases/code_gen/ai_research_team/novel_pytorch_code_generator.py b/examples/agents/use_cases/code_gen/ai_research_team/novel_pytorch_code_generator.py
index de87dbd5..d22b9064 100644
--- a/examples/agents/use_cases/code_gen/ai_research_team/novel_pytorch_code_generator.py
+++ b/examples/agents/use_cases/code_gen/ai_research_team/novel_pytorch_code_generator.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from swarms import create_file_in_folder
 from swarms.tools.prebuilt.code_executor import CodeExecutor
diff --git a/examples/agents/use_cases/code_gen/amazon_review_agent.py b/examples/agents/use_cases/code_gen/amazon_review_agent.py
index 3fb3bc40..f0c1a695 100644
--- a/examples/agents/use_cases/code_gen/amazon_review_agent.py
+++ b/examples/agents/use_cases/code_gen/amazon_review_agent.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 ## Initialize the workflow
 agent = Agent(
diff --git a/examples/agents/use_cases/code_gen/api_requester_agent.py b/examples/agents/use_cases/code_gen/api_requester_agent.py
index ae7bd5f9..502e6285 100644
--- a/examples/agents/use_cases/code_gen/api_requester_agent.py
+++ b/examples/agents/use_cases/code_gen/api_requester_agent.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 agent = Agent(
     agent_name="API Requester",
diff --git a/examples/agents/use_cases/code_gen/code_interpreter_agent.py b/examples/agents/use_cases/code_gen/code_interpreter_agent.py
index b49c4099..7b46bc78 100644
--- a/examples/agents/use_cases/code_gen/code_interpreter_agent.py
+++ b/examples/agents/use_cases/code_gen/code_interpreter_agent.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from swarms.tools.prebuilt.code_executor import CodeExecutor
 from swarms.structs.concat import concat_strings
diff --git a/examples/agents/use_cases/code_gen/sql_agent.py b/examples/agents/use_cases/code_gen/sql_agent.py
index bdfd9966..f8dc06ce 100644
--- a/examples/agents/use_cases/code_gen/sql_agent.py
+++ b/examples/agents/use_cases/code_gen/sql_agent.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Get the OpenAI API key from the environment variable
 api_key = os.getenv("OPENAI_API_KEY")
diff --git a/examples/agents/use_cases/finance/estate_planning_agent.py b/examples/agents/use_cases/finance/estate_planning_agent.py
index a9f0c8eb..16c89d35 100644
--- a/examples/agents/use_cases/finance/estate_planning_agent.py
+++ b/examples/agents/use_cases/finance/estate_planning_agent.py
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Get the OpenAI API key from the environment variable
 api_key = os.getenv("OPENAI_API_KEY")
diff --git a/examples/agents/use_cases/finance/financial_agent_gpt4o_mini.py b/examples/agents/use_cases/finance/financial_agent_gpt4o_mini.py
index 52d8329c..cd4bed51 100644
--- a/examples/agents/use_cases/finance/financial_agent_gpt4o_mini.py
+++ b/examples/agents/use_cases/finance/financial_agent_gpt4o_mini.py
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/agents/use_cases/finance/main.py b/examples/agents/use_cases/finance/main.py
index 3e5c1445..fed00f5b 100644
--- a/examples/agents/use_cases/finance/main.py
+++ b/examples/agents/use_cases/finance/main.py
@@ -2,7 +2,8 @@ import os
 
 import requests
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Get the OpenAI API key from the environment variable
 api_key = os.getenv("OPENAI_API_KEY")
diff --git a/examples/agents/use_cases/finance/plaid_api_tool.py b/examples/agents/use_cases/finance/plaid_api_tool.py
index 84cd10b0..1b8b56f4 100644
--- a/examples/agents/use_cases/finance/plaid_api_tool.py
+++ b/examples/agents/use_cases/finance/plaid_api_tool.py
@@ -12,7 +12,8 @@ from plaid.model.transactions_get_response import (
     TransactionsGetResponse,
 )
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/agents/use_cases/kyle_hackathon.py b/examples/agents/use_cases/kyle_hackathon.py
index 36fcfcd2..b1c5c493 100644
--- a/examples/agents/use_cases/kyle_hackathon.py
+++ b/examples/agents/use_cases/kyle_hackathon.py
@@ -2,7 +2,8 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.agents.multion_agent import MultiOnAgent
 from swarms_memory import ChromaDB
 from swarms import tool
diff --git a/examples/agents/use_cases/law/alberto_agent 2.py b/examples/agents/use_cases/law/alberto_agent 2.py
index 77c8c028..74fc62ee 100644
--- a/examples/agents/use_cases/law/alberto_agent 2.py	
+++ b/examples/agents/use_cases/law/alberto_agent 2.py	
@@ -10,7 +10,7 @@ from typing import Optional
 
 from pydantic import BaseModel, Field
 
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 
 PROBABE_SYS_PROMPT = """
 
diff --git a/examples/agents/use_cases/law/alberto_agent.py b/examples/agents/use_cases/law/alberto_agent.py
index 77c8c028..74fc62ee 100644
--- a/examples/agents/use_cases/law/alberto_agent.py
+++ b/examples/agents/use_cases/law/alberto_agent.py
@@ -10,7 +10,7 @@ from typing import Optional
 
 from pydantic import BaseModel, Field
 
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 
 PROBABE_SYS_PROMPT = """
 
diff --git a/examples/agents/use_cases/multi_modal/multi_modal_rag_agent.py b/examples/agents/use_cases/multi_modal/multi_modal_rag_agent.py
index c309d60a..0d31924c 100644
--- a/examples/agents/use_cases/multi_modal/multi_modal_rag_agent.py
+++ b/examples/agents/use_cases/multi_modal/multi_modal_rag_agent.py
@@ -3,7 +3,8 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 from swarms.prompts.visual_cot import VISUAL_CHAIN_OF_THOUGHT
 from swarms import tool
diff --git a/examples/agents/use_cases/multi_modal/new_agent_tool_system.py b/examples/agents/use_cases/multi_modal/new_agent_tool_system.py
index 62f46678..18958770 100644
--- a/examples/agents/use_cases/multi_modal/new_agent_tool_system.py
+++ b/examples/agents/use_cases/multi_modal/new_agent_tool_system.py
@@ -13,7 +13,8 @@ import os
 from dotenv import load_dotenv
 
 # Import the OpenAIChat model and the Agent struct
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Load the environment variables
 load_dotenv()
diff --git a/examples/agents/use_cases/research/new_perplexity_agent.py b/examples/agents/use_cases/research/new_perplexity_agent.py
index 272041de..e51e340a 100644
--- a/examples/agents/use_cases/research/new_perplexity_agent.py
+++ b/examples/agents/use_cases/research/new_perplexity_agent.py
@@ -1,5 +1,5 @@
 from swarms import Agent
-from swarms.models.llama3_hosted import llama3Hosted
+from swarm_models.llama3_hosted import llama3Hosted
 from swarms_memory import ChromaDB
 from swarms.tools.prebuilt.bing_api import fetch_web_articles_bing_api
 
diff --git a/examples/agents/use_cases/research/perplexity_agent.py b/examples/agents/use_cases/research/perplexity_agent.py
index 0faab2cf..b26c1b30 100644
--- a/examples/agents/use_cases/research/perplexity_agent.py
+++ b/examples/agents/use_cases/research/perplexity_agent.py
@@ -9,7 +9,8 @@ $ pip install swarms
 - 
 """
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 from swarms.tools.prebuilt.bing_api import fetch_web_articles_bing_api
 import os
diff --git a/examples/agents/use_cases/security/perimeter_defense_agent.py b/examples/agents/use_cases/security/perimeter_defense_agent.py
index d235fa22..f92cbdee 100644
--- a/examples/agents/use_cases/security/perimeter_defense_agent.py
+++ b/examples/agents/use_cases/security/perimeter_defense_agent.py
@@ -3,7 +3,7 @@ import os
 from dotenv import load_dotenv
 
 import swarms.prompts.security_team as stsp
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 from swarms.structs import Agent
 
 # Load environment variables and initialize the Vision API
diff --git a/examples/collabs/swarms_example.ipynb b/examples/collabs/swarms_example.ipynb
index c0f52ed1..1e30e61e 100644
--- a/examples/collabs/swarms_example.ipynb
+++ b/examples/collabs/swarms_example.ipynb
@@ -1091,7 +1091,7 @@
     "\n",
     "from dotenv import load_dotenv\n",
     "\n",
-    "from swarms.models import GPT4VisionAPI\n",
+    "from swarm_models import GPT4VisionAPI\n",
     "from swarms.prompts.logistics import (\n",
     "    Efficiency_Agent_Prompt,\n",
     "    Health_Security_Agent_Prompt,\n",
@@ -1211,7 +1211,7 @@
     "\n",
     "from dotenv import load_dotenv\n",
     "\n",
-    "from swarms.models.gpt4_vision_api import GPT4VisionAPI\n",
+    "from swarm_models.gpt4_vision_api import GPT4VisionAPI\n",
     "from swarms.structs import Agent\n",
     "\n",
     "# Load the environment variables\n",
diff --git a/examples/demos/accountant_team/account_team2_example.py b/examples/demos/accountant_team/account_team2_example.py
index 5a5aafd3..29c6d9b9 100644
--- a/examples/demos/accountant_team/account_team2_example.py
+++ b/examples/demos/accountant_team/account_team2_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import Anthropic, OpenAIChat
+from swarm_models import Anthropic, OpenAIChat
 from swarms.prompts.accountant_swarm_prompts import (
     DECISION_MAKING_PROMPT,
     DOC_ANALYZER_AGENT_PROMPT,
diff --git a/examples/demos/ad_gen/ad_gen_example.py b/examples/demos/ad_gen/ad_gen_example.py
index 978ab502..8ee79706 100644
--- a/examples/demos/ad_gen/ad_gen_example.py
+++ b/examples/demos/ad_gen/ad_gen_example.py
@@ -3,8 +3,8 @@ import random
 
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
-from swarms.models.stable_diffusion import StableDiffusion
+from swarm_models import OpenAIChat
+from swarm_models import StableDiffusion
 from swarms.structs import Agent
 
 load_dotenv()
diff --git a/examples/demos/ai_acceleerated_learning/main.py b/examples/demos/ai_acceleerated_learning/main.py
index 69840da4..50e6674c 100644
--- a/examples/demos/ai_acceleerated_learning/main.py
+++ b/examples/demos/ai_acceleerated_learning/main.py
@@ -1,6 +1,7 @@
 import concurrent
 import csv
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 from dotenv import load_dotenv
 from swarms.utils.parse_code import extract_code_from_markdown
diff --git a/examples/demos/ai_research_team/main_example.py b/examples/demos/ai_research_team/main_example.py
index 96f2e417..0dc804f9 100644
--- a/examples/demos/ai_research_team/main_example.py
+++ b/examples/demos/ai_research_team/main_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import Anthropic, OpenAIChat
+from swarm_models import Anthropic, OpenAIChat
 from swarms.prompts.ai_research_team import (
     PAPER_IMPLEMENTOR_AGENT_PROMPT,
     PAPER_SUMMARY_ANALYZER,
diff --git a/examples/demos/assembly/assembly_example.py b/examples/demos/assembly/assembly_example.py
index 7ac97ab0..700a5143 100644
--- a/examples/demos/assembly/assembly_example.py
+++ b/examples/demos/assembly/assembly_example.py
@@ -1,4 +1,4 @@
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.structs import Agent
 
 llm = GPT4VisionAPI()
diff --git a/examples/demos/autoswarm/autoswarm.py b/examples/demos/autoswarm/autoswarm.py
index 309c88ea..79346529 100644
--- a/examples/demos/autoswarm/autoswarm.py
+++ b/examples/demos/autoswarm/autoswarm.py
@@ -1,6 +1,6 @@
 import os
 from dotenv import load_dotenv
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import Agent
 import swarms.prompts.autoswarm as sdsp
 
diff --git a/examples/demos/autotemp/autotemp_example.py b/examples/demos/autotemp/autotemp_example.py
index f086f112..2a744857 100644
--- a/examples/demos/autotemp/autotemp_example.py
+++ b/examples/demos/autotemp/autotemp_example.py
@@ -1,6 +1,6 @@
 import re
 
-from swarms.models.openai_models import OpenAIChat
+from swarm_models.openai_models import OpenAIChat
 
 
 class AutoTemp:
diff --git a/examples/demos/autotemp/blog_gen_example.py b/examples/demos/autotemp/blog_gen_example.py
index fe2a2317..d9c7eae0 100644
--- a/examples/demos/autotemp/blog_gen_example.py
+++ b/examples/demos/autotemp/blog_gen_example.py
@@ -3,7 +3,7 @@ import os
 from autotemp import AutoTemp
 from termcolor import colored
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import SequentialWorkflow
 
 
diff --git a/examples/demos/developer_swarm/main_example.py b/examples/demos/developer_swarm/main_example.py
index 0a2e2a95..8f8c51fb 100644
--- a/examples/demos/developer_swarm/main_example.py
+++ b/examples/demos/developer_swarm/main_example.py
@@ -19,7 +19,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.prompts.programming import DOCUMENTATION_SOP, TEST_SOP
 from swarms.structs import Agent
 
diff --git a/examples/demos/education/education_example.py b/examples/demos/education/education_example.py
index 31c08f0d..32fe6761 100644
--- a/examples/demos/education/education_example.py
+++ b/examples/demos/education/education_example.py
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
 
 import swarms.prompts.education as edu_prompts
 from swarms import Agent, SequentialWorkflow
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 
 # Load environment variables
 load_dotenv()
diff --git a/examples/demos/gemini_benchmarking/gemini_chat_example.py b/examples/demos/gemini_benchmarking/gemini_chat_example.py
index 2ea6a900..f19e6983 100644
--- a/examples/demos/gemini_benchmarking/gemini_chat_example.py
+++ b/examples/demos/gemini_benchmarking/gemini_chat_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models.gemini import Gemini
+from swarm_models.gemini import Gemini
 from swarms.prompts.react import react_prompt
 
 load_dotenv()
diff --git a/examples/demos/gemini_benchmarking/gemini_react_example.py b/examples/demos/gemini_benchmarking/gemini_react_example.py
index 37765baf..725b84a3 100644
--- a/examples/demos/gemini_benchmarking/gemini_react_example.py
+++ b/examples/demos/gemini_benchmarking/gemini_react_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models.gemini import Gemini
+from swarm_models.gemini import Gemini
 from swarms.prompts.react import react_prompt
 
 load_dotenv()
diff --git a/examples/demos/gemini_benchmarking/gemini_vcot_example.py b/examples/demos/gemini_benchmarking/gemini_vcot_example.py
index 0a5c4c99..4b10f905 100644
--- a/examples/demos/gemini_benchmarking/gemini_vcot_example.py
+++ b/examples/demos/gemini_benchmarking/gemini_vcot_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import Gemini
+from swarm_models import Gemini
 from swarms.prompts.visual_cot import VISUAL_CHAIN_OF_THOUGHT
 
 # Load the environment variables
diff --git a/examples/demos/grupa/app_example.py b/examples/demos/grupa/app_example.py
index ff5fc27d..acf13635 100644
--- a/examples/demos/grupa/app_example.py
+++ b/examples/demos/grupa/app_example.py
@@ -3,7 +3,7 @@ import os
 from dotenv import load_dotenv
 from termcolor import colored
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.prompts.code_interpreter import CODE_INTERPRETER
 from swarms.prompts.programming import DOCUMENTATION_SOP, TEST_SOP
 from swarms.structs import Agent
diff --git a/examples/demos/jarvis_multi_modal_auto_agent/jarvis_example.py b/examples/demos/jarvis_multi_modal_auto_agent/jarvis_example.py
index cce61fba..df36e7db 100644
--- a/examples/demos/jarvis_multi_modal_auto_agent/jarvis_example.py
+++ b/examples/demos/jarvis_multi_modal_auto_agent/jarvis_example.py
@@ -1,4 +1,4 @@
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
     MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
 )
diff --git a/examples/demos/llm_with_conversation/main_example.py b/examples/demos/llm_with_conversation/main_example.py
index a9e6c42a..70596b7e 100644
--- a/examples/demos/llm_with_conversation/main_example.py
+++ b/examples/demos/llm_with_conversation/main_example.py
@@ -3,7 +3,7 @@ import os
 from dotenv import load_dotenv
 
 # Import the OpenAIChat model and the Agent struct
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 
 # Load the environment variables
 load_dotenv()
diff --git a/examples/demos/logistics/logistics_example.py b/examples/demos/logistics/logistics_example.py
index 48d8b9ce..08fb9448 100644
--- a/examples/demos/logistics/logistics_example.py
+++ b/examples/demos/logistics/logistics_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 from swarms.prompts.logistics import (
     Efficiency_Agent_Prompt,
     Health_Security_Agent_Prompt,
diff --git a/examples/demos/multi_modal_autonomous_agents/multi_modal_auto_agent_example.py b/examples/demos/multi_modal_autonomous_agents/multi_modal_auto_agent_example.py
index 007776ac..fc7d7cb8 100644
--- a/examples/demos/multi_modal_autonomous_agents/multi_modal_auto_agent_example.py
+++ b/examples/demos/multi_modal_autonomous_agents/multi_modal_auto_agent_example.py
@@ -1,4 +1,4 @@
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.structs import Agent
 
 llm = GPT4VisionAPI()
diff --git a/examples/demos/multi_modal_chain_of_thought/vcot_example.py b/examples/demos/multi_modal_chain_of_thought/vcot_example.py
index 24bf60aa..f92d8de2 100644
--- a/examples/demos/multi_modal_chain_of_thought/vcot_example.py
+++ b/examples/demos/multi_modal_chain_of_thought/vcot_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.prompts.visual_cot import VISUAL_CHAIN_OF_THOUGHT
 from swarms.structs import Agent
 
diff --git a/examples/demos/multimodal_tot/idea2img_example.py b/examples/demos/multimodal_tot/idea2img_example.py
index 4a6c1da3..186d31fb 100644
--- a/examples/demos/multimodal_tot/idea2img_example.py
+++ b/examples/demos/multimodal_tot/idea2img_example.py
@@ -4,9 +4,9 @@ import os
 import streamlit as st
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
-from swarms.models.stable_diffusion import StableDiffusion
+from swarm_models import OpenAIChat
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.stable_diffusion import StableDiffusion
 from swarms.structs import Agent
 
 # Load environment variables
diff --git a/examples/demos/multimodal_tot/main_example.py b/examples/demos/multimodal_tot/main_example.py
index 2a0494dc..815a326c 100644
--- a/examples/demos/multimodal_tot/main_example.py
+++ b/examples/demos/multimodal_tot/main_example.py
@@ -20,8 +20,8 @@ import os
 from dotenv import load_dotenv
 from termcolor import colored
 
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
-from swarms.models.stable_diffusion import StableDiffusion
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.stable_diffusion import StableDiffusion
 
 # Load the environment variables
 load_dotenv()
diff --git a/examples/demos/nutrition/nutrition_example.py b/examples/demos/nutrition/nutrition_example.py
index b4331db6..66542b6e 100644
--- a/examples/demos/nutrition/nutrition_example.py
+++ b/examples/demos/nutrition/nutrition_example.py
@@ -4,7 +4,7 @@ import os
 import requests
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import Agent
 
 # Load environment variables
diff --git a/examples/demos/octomology_swarm/api.py b/examples/demos/octomology_swarm/api.py
index cccf4dfe..d33238a6 100644
--- a/examples/demos/octomology_swarm/api.py
+++ b/examples/demos/octomology_swarm/api.py
@@ -2,8 +2,8 @@ import os
 from dotenv import load_dotenv
 
 from swarms import Agent
-from swarms.models import OpenAIChat
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models import OpenAIChat
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.structs.rearrange import AgentRearrange
 
 # Load the environment variables
diff --git a/examples/demos/optimize_llm_stack/vllm_example.py b/examples/demos/optimize_llm_stack/vllm_example.py
index b032709d..31452c03 100644
--- a/examples/demos/optimize_llm_stack/vllm_example.py
+++ b/examples/demos/optimize_llm_stack/vllm_example.py
@@ -1,4 +1,4 @@
-from swarms.models import vLLM
+from swarm_models import vLLM
 
 # Initialize vLLM with custom model and parameters
 custom_vllm = vLLM(
diff --git a/examples/demos/optimize_llm_stack/vortex_example.py b/examples/demos/optimize_llm_stack/vortex_example.py
index 5badb2fd..a95e876e 100644
--- a/examples/demos/optimize_llm_stack/vortex_example.py
+++ b/examples/demos/optimize_llm_stack/vortex_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import Agent
 
 # import modal
diff --git a/examples/demos/patient_question_assist/main.py b/examples/demos/patient_question_assist/main.py
index 45b31cb4..69224f12 100644
--- a/examples/demos/patient_question_assist/main.py
+++ b/examples/demos/patient_question_assist/main.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from typing import List
 from swarms_memory import ChromaDB
 
diff --git a/examples/demos/personal_stylist/personal_stylist_example.py b/examples/demos/personal_stylist/personal_stylist_example.py
index dde64cb7..5c3f745c 100644
--- a/examples/demos/personal_stylist/personal_stylist_example.py
+++ b/examples/demos/personal_stylist/personal_stylist_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 from swarms.prompts.personal_stylist import (
     ACCESSORIES_STYLIST_AGENT_PROMPT,
     BEARD_STYLIST_AGENT_PROMPT,
diff --git a/examples/demos/plant_biologist_swarm/agricultural_swarm.py b/examples/demos/plant_biologist_swarm/agricultural_swarm.py
index 9664e6ed..e388d88f 100644
--- a/examples/demos/plant_biologist_swarm/agricultural_swarm.py
+++ b/examples/demos/plant_biologist_swarm/agricultural_swarm.py
@@ -18,7 +18,7 @@ from examples.demos.plant_biologist_swarm.prompts import (
     treatment_recommender_agent,
 )
 from swarms import Agent
-from swarms.models.gpt_o import GPT4VisionAPI
+from swarm_models.gpt_o import GPT4VisionAPI
 
 # Load the OpenAI API key from the .env file
 load_dotenv()
diff --git a/examples/demos/plant_biologist_swarm/using_concurrent_workflow.py b/examples/demos/plant_biologist_swarm/using_concurrent_workflow.py
index 84bfbc55..78bd0f0f 100644
--- a/examples/demos/plant_biologist_swarm/using_concurrent_workflow.py
+++ b/examples/demos/plant_biologist_swarm/using_concurrent_workflow.py
@@ -10,7 +10,7 @@ from examples.demos.plant_biologist_swarm.prompts import (
 )
 
 from swarms import Agent, ConcurrentWorkflow
-from swarms.models.gpt_o import GPT4VisionAPI
+from swarm_models.gpt_o import GPT4VisionAPI
 
 
 # Load the OpenAI API key from the .env file
diff --git a/examples/demos/positive_med/positive_med_example.py b/examples/demos/positive_med/positive_med_example.py
index 09cbb411..d10526da 100644
--- a/examples/demos/positive_med/positive_med_example.py
+++ b/examples/demos/positive_med/positive_med_example.py
@@ -25,7 +25,7 @@ import os
 
 from termcolor import colored
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.prompts.autobloggen import (
     AUTOBLOG_REVIEW_PROMPT,
     DRAFT_AGENT_SYSTEM_PROMPT,
diff --git a/examples/demos/security_team/security_team_example.py b/examples/demos/security_team/security_team_example.py
index d391fe32..00c9b649 100644
--- a/examples/demos/security_team/security_team_example.py
+++ b/examples/demos/security_team/security_team_example.py
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
 from termcolor import colored
 
 import swarms.prompts.security_team as stsp
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 from swarms.structs import Agent
 
 # Load environment variables and initialize the Vision API
diff --git a/examples/demos/social_media_content_generators_swarm/agents.py b/examples/demos/social_media_content_generators_swarm/agents.py
index 0ee20cff..958a51f7 100644
--- a/examples/demos/social_media_content_generators_swarm/agents.py
+++ b/examples/demos/social_media_content_generators_swarm/agents.py
@@ -10,7 +10,8 @@ Example:
 
 """
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 llm = OpenAIChat(max_tokens=4000)
 
diff --git a/examples/demos/society_of_agents/hallucination_swarm.py b/examples/demos/society_of_agents/hallucination_swarm.py
index 3f6764ba..64f0fe65 100644
--- a/examples/demos/society_of_agents/hallucination_swarm.py
+++ b/examples/demos/society_of_agents/hallucination_swarm.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 #
 # model = HuggingfaceLLM(model_id="openai-community/gpt2", max_length=1000)
diff --git a/examples/demos/society_of_agents/probate_agent.py b/examples/demos/society_of_agents/probate_agent.py
index 04660860..f85d2a79 100644
--- a/examples/demos/society_of_agents/probate_agent.py
+++ b/examples/demos/society_of_agents/probate_agent.py
@@ -6,7 +6,8 @@ extract forms from images
 
 """
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 model = OpenAIChat(max_tokens=4000)
 
diff --git a/examples/demos/swarm_of_complaince/compliance_swarm.py b/examples/demos/swarm_of_complaince/compliance_swarm.py
index 63cee018..4c7d3874 100644
--- a/examples/demos/swarm_of_complaince/compliance_swarm.py
+++ b/examples/demos/swarm_of_complaince/compliance_swarm.py
@@ -10,7 +10,7 @@ Todo [Improvements]
 """
 
 from swarms import Agent
-from swarms.models.llama3_hosted import llama3Hosted
+from swarm_models.llama3_hosted import llama3Hosted
 from swarms_memory import ChromaDB
 
 
diff --git a/examples/demos/swarm_of_mma_manufacturing/main_example.py b/examples/demos/swarm_of_mma_manufacturing/main_example.py
index 0dce5781..ffb31e76 100644
--- a/examples/demos/swarm_of_mma_manufacturing/main_example.py
+++ b/examples/demos/swarm_of_mma_manufacturing/main_example.py
@@ -19,7 +19,7 @@ import os
 from dotenv import load_dotenv
 from termcolor import colored
 
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 from swarms.structs import Agent
 
 load_dotenv()
diff --git a/examples/demos/urban_planning/urban_planning_example.py b/examples/demos/urban_planning/urban_planning_example.py
index 2a52ced7..11e5efad 100644
--- a/examples/demos/urban_planning/urban_planning_example.py
+++ b/examples/demos/urban_planning/urban_planning_example.py
@@ -3,7 +3,7 @@ import os
 from dotenv import load_dotenv
 
 import swarms.prompts.urban_planning as upp
-from swarms.models import GPT4VisionAPI, OpenAIChat
+from swarm_models import GPT4VisionAPI, OpenAIChat
 from swarms.structs import Agent, SequentialWorkflow
 
 # Load environment variables
diff --git a/examples/demos/xray/xray_example.py b/examples/demos/xray/xray_example.py
index 4e69c0af..54406f2c 100644
--- a/examples/demos/xray/xray_example.py
+++ b/examples/demos/xray/xray_example.py
@@ -2,7 +2,7 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms.models import GPT4VisionAPI, OpenAIChat
+from swarm_models import GPT4VisionAPI, OpenAIChat
 from swarms.prompts.xray_swarm_prompt import (
     TREATMENT_PLAN_PROMPT,
     XRAY_ANALYSIS_PROMPT,
diff --git a/examples/models/anthropic_example.py b/examples/models/anthropic_example.py
deleted file mode 100644
index 22dc6c00..00000000
--- a/examples/models/anthropic_example.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-
-from swarms.models import Anthropic
-
-model = Anthropic(anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"))
-
-task = "What is quantum field theory? What are 3 books on the field?"
-
-print(model(task))
diff --git a/examples/models/azure_openai.py b/examples/models/azure_openai.py
deleted file mode 100644
index 2e216d96..00000000
--- a/examples/models/azure_openai.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from swarms.models import AzureOpenAI
-
-# Initialize Azure OpenAI
-model = AzureOpenAI()
-
-# Run the model
-model(
-    "Create a youtube script for a video on how to use the swarms"
-    " framework"
-)
diff --git a/examples/models/azure_openai_example.py b/examples/models/azure_openai_example.py
deleted file mode 100644
index 6bba72f9..00000000
--- a/examples/models/azure_openai_example.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-from dotenv import load_dotenv
-from swarms import AzureOpenAI
-
-# Load the environment variables
-load_dotenv()
-
-# Create an instance of the AzureOpenAI class
-model = AzureOpenAI(
-    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
-    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
-    openai_api_version=os.getenv("OPENAI_API_VERSION"),
-    openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
-    azure_ad_token=os.getenv("AZURE_OPENAI_AD_TOKEN"),
-)
-
-# Define the prompt
-prompt = (
-    "Analyze this load document and assess it for any risks and"
-    " create a table in markdwon format."
-)
-
-# Generate a response
-response = model(prompt)
-print(response)
diff --git a/examples/models/cohere_example.py b/examples/models/cohere_example.py
deleted file mode 100644
index de128a9f..00000000
--- a/examples/models/cohere_example.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from swarms.models import Cohere
-
-cohere = Cohere(model="command-light", cohere_api_key="")
-
-out = cohere("Hello, how are you?")
diff --git a/examples/models/dalle3.jpeg b/examples/models/dalle3.jpeg
deleted file mode 100644
index 39753795..00000000
Binary files a/examples/models/dalle3.jpeg and /dev/null differ
diff --git a/examples/models/dalle3_concurrent_example.py b/examples/models/dalle3_concurrent_example.py
deleted file mode 100644
index e31f1cd8..00000000
--- a/examples/models/dalle3_concurrent_example.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-
-User task ->> GPT4 for prompt enrichment ->> Dalle3V for image generation
-->> GPT4Vision for image captioning ->> Dalle3 better image
-
-"""
-
-import os
-
-from swarms.models.dalle3 import Dalle3
-
-api_key = os.environ["OPENAI_API_KEY"]
-
-dalle3 = Dalle3(openai_api_key=api_key, n=1)
-
-# task = "Swarm of robots working super industrial ambience concept art"
-
-# image_url = dalle3(task)
-
-tasks = ["A painting of a dog", "A painting of a cat"]
-results = dalle3.process_batch_concurrently(tasks)
-
-# print(results)
diff --git a/examples/models/dalle3_example.py b/examples/models/dalle3_example.py
deleted file mode 100644
index ac9ba760..00000000
--- a/examples/models/dalle3_example.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from swarms.models.dalle3 import Dalle3
-
-model = Dalle3()
-
-task = "A painting of a dog"
-img = model(task)
diff --git a/examples/models/example_gpt4vison.py b/examples/models/example_gpt4vison.py
deleted file mode 100644
index 01026171..00000000
--- a/examples/models/example_gpt4vison.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from swarms import GPT4VisionAPI
-
-# Initialize with default API key and custom max_tokens
-api = GPT4VisionAPI(max_tokens=1000)
-
-# Define the task and image URL
-task = "Describe the scene in the image."
-img = (
-    "/home/kye/.swarms/swarms/examples/Screenshot from 2024-02-20"
-    " 05-55-34.png"
-)
-
-# Run the GPT-4 Vision model
-response = api.run(task, img)
-
-# Print the model's response
-print(response)
diff --git a/examples/models/example_idefics.py b/examples/models/example_idefics.py
deleted file mode 100644
index ea36ba77..00000000
--- a/examples/models/example_idefics.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Import the idefics model from the swarms.models module
-from swarms.models import Idefics
-
-# Create an instance of the idefics model
-model = Idefics()
-
-# Define user input with an image URL and chat with the model
-user_input = (
-    "User: What is in this image?"
-    " https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
-)
-response = model.chat(user_input)
-print(response)
-
-# Define another user input with an image URL and chat with the model
-user_input = (
-    "User: And who is that?"
-    " https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052"
-)
-response = model.chat(user_input)
-print(response)
-
-# Set the checkpoint of the model to "new_checkpoint"
-model.set_checkpoint("new_checkpoint")
-
-# Set the device of the model to "cpu"
-model.set_device("cpu")
-
-# Set the maximum length of the chat to 200
-model.set_max_length(200)
-
-# Clear the chat history of the model
-model.clear_chat_history()
diff --git a/examples/models/example_kosmos.py b/examples/models/example_kosmos.py
deleted file mode 100644
index dbfd108f..00000000
--- a/examples/models/example_kosmos.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from swarms import Kosmos
-
-# Initialize the model
-model = Kosmos()
-
-# Generate
-out = model.run("Analyze the reciepts in this image", "docs.jpg")
-
-# Print the output
-print(out)
diff --git a/examples/models/example_qwenvlmultimodal.py b/examples/models/example_qwenvlmultimodal.py
deleted file mode 100644
index 561b6f88..00000000
--- a/examples/models/example_qwenvlmultimodal.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from swarms import QwenVLMultiModal
-
-# Instantiate the QwenVLMultiModal model
-model = QwenVLMultiModal(
-    model_name="Qwen/Qwen-VL-Chat",
-    device="cuda",
-    quantize=True,
-)
-
-# Run the model
-response = model(
-    "Hello, how are you?", "https://example.com/image.jpg"
-)
-
-# Print the response
-print(response)
diff --git a/examples/models/fire_works.py b/examples/models/fire_works.py
deleted file mode 100644
index 114557c4..00000000
--- a/examples/models/fire_works.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from swarms.models.popular_llms import Fireworks
-import os
-
-# Initialize the model
-llm = Fireworks(
-    temperature=0.2,
-    max_tokens=3500,
-    openai_api_key=os.getenv("FIREWORKS_API_KEY"),
-)
-
-# Run the model
-response = llm("What is the meaning of life?")
-print(response)
diff --git a/examples/models/fuyu_example.py b/examples/models/fuyu_example.py
deleted file mode 100644
index 537de25a..00000000
--- a/examples/models/fuyu_example.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from swarms.models.fuyu import Fuyu
-
-fuyu = Fuyu()
-
-# This is the default image, you can change it to any image you want
-out = fuyu("What is this image?", "images/swarms.jpeg")
-print(out)
diff --git a/examples/models/gemini_example.py b/examples/models/gemini_example.py
deleted file mode 100644
index 75553bfc..00000000
--- a/examples/models/gemini_example.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-
-from swarms.models.gemini import Gemini
-
-load_dotenv()
-
-api_key = os.environ["GEMINI_API_KEY"]
-
-# Initialize the model
-model = Gemini(gemini_api_key=api_key)
-
-# Establish the prompt and image
-task = "What is your name"
-img = "images/github-banner-swarms.png"
-
-# Run the model
-out = model.run("What is your name?", img=img)
-print(out)
diff --git a/examples/models/gpt4_v_example.py b/examples/models/gpt4_v_example.py
deleted file mode 100644
index b434f257..00000000
--- a/examples/models/gpt4_v_example.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import os  # Import the os module for working with the operating system
-
-from dotenv import (
-    load_dotenv,  # Import the load_dotenv function from the dotenv module
-)
-
-from swarms import (
-    GPT4VisionAPI,  # Import the GPT4VisionAPI class from the swarms module
-)
-
-# Load the environment variables
-load_dotenv()
-
-# Get the API key from the environment variables
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the GPT4VisionAPI class with the API key and model name
-gpt4vision = GPT4VisionAPI(
-    openai_api_key=api_key,
-    model_name="gpt-4o",
-    max_tokens=1000,
-    openai_proxy="https://api.openai.com/v1/chat/completions",
-)
-
-# Define the URL of the image to analyze
-img = "ear.png"
-
-# Define the task to perform on the image
-task = "What is this image"
-
-# Run the GPT4VisionAPI on the image with the specified task
-answer = gpt4vision.run(task, img, return_json=True)
-
-# Print the answer
-print(answer)
diff --git a/examples/models/gpt_4o_mini.py b/examples/models/gpt_4o_mini.py
deleted file mode 100644
index c21f3a5c..00000000
--- a/examples/models/gpt_4o_mini.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from swarms import OpenAIChat
-import os
-
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(openai_api_key=api_key, model_name="gpt-4o-mini")
-
-# Query the model with a question
-out = model(
-    "What is the best state to register a business in the US for the least amount of taxes?"
-)
-
-# Print the model's response
-print(out)
diff --git a/examples/models/groq_model_exampole 2.py b/examples/models/groq_model_exampole 2.py
deleted file mode 100644
index 5fde99f0..00000000
--- a/examples/models/groq_model_exampole 2.py	
+++ /dev/null
@@ -1,18 +0,0 @@
-import os
-from swarms import OpenAIChat
-
-
-# Example usage:
-api_key = os.getenv("GROQ_API_KEY")
-
-# Model
-model = OpenAIChat(
-    openai_api_base="https://api.groq.com/openai/v1",
-    openai_api_key=api_key,
-    model_name="llama-3.1-70b-versatile",
-    temperature=0.1,
-)
-
-
-out = model("What is the essence of quantum field theory?")
-print(out)
diff --git a/examples/models/groq_model_exampole.py b/examples/models/groq_model_exampole.py
deleted file mode 100644
index 5fde99f0..00000000
--- a/examples/models/groq_model_exampole.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import os
-from swarms import OpenAIChat
-
-
-# Example usage:
-api_key = os.getenv("GROQ_API_KEY")
-
-# Model
-model = OpenAIChat(
-    openai_api_base="https://api.groq.com/openai/v1",
-    openai_api_key=api_key,
-    model_name="llama-3.1-70b-versatile",
-    temperature=0.1,
-)
-
-
-out = model("What is the essence of quantum field theory?")
-print(out)
diff --git a/examples/models/hf/llama3 2 b/examples/models/hf/llama3 2
deleted file mode 100644
index 1e00018c..00000000
--- a/examples/models/hf/llama3 2	
+++ /dev/null
@@ -1,71 +0,0 @@
-from swarms import Agent
-from swarms.prompts.finance_agent_sys_prompt import (
-    FINANCIAL_AGENT_SYS_PROMPT,
-)
-import torch
-from swarms import BaseLLM
-from transformers import AutoTokenizer, LlamaForCausalLM
-
-
-class NvidiaLlama31B(BaseLLM):
-    # Load the tokenizer and model
-    def __init__(self, max_tokens: int = 2048):
-        self.max_tokens = max_tokens
-        model_path = "nvidia/Llama-3.1-Minitron-4B-Width-Base"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-
-        device = "cuda"
-        dtype = torch.bfloat16
-        self.model = LlamaForCausalLM.from_pretrained(
-            model_path, torch_dtype=dtype, device_map=device
-        )
-
-    def run(self, task: str):
-        # Prepare the input text
-        inputs = self.tokenizer.encode(task, return_tensors="pt").to(
-            self.model.device
-        )
-
-        # Generate the output
-        outputs = self.model.generate(
-            inputs, max_length=self.max_tokens
-        )
-
-        # Decode and print the output
-        output_text = self.tokenizer.decode(outputs[0])
-        print(output_text)
-
-        return output_text
-
-
-# # Example usage:
-# model = NvidiaLlama31B()
-# out = model.run("What is the essence of quantum field theory?")
-# print(out)
-
-model = NvidiaLlama31B()
-
-# Initialize the agent
-agent = Agent(
-    agent_name="Financial-Analysis-Agent_sas_chicken_eej",
-    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-    llm=model,
-    max_loops=2,
-    autosave=True,
-    dashboard=False,
-    verbose=True,
-    dynamic_temperature_enabled=True,
-    saved_state_path="finance_agent.json",
-    user_name="swarms_corp",
-    retry_attempts=1,
-    context_length=200000,
-    return_step_meta=True,
-    disable_print_every_step=True,
-    output_type="json",
-)
-
-
-out = agent.run(
-    "How can I establish a ROTH IRA to buy stocks and get a tax break? What are the criteria"
-)
-print(out)
diff --git a/examples/models/hf/llama3.py b/examples/models/hf/llama3.py
deleted file mode 100644
index 1e00018c..00000000
--- a/examples/models/hf/llama3.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from swarms import Agent
-from swarms.prompts.finance_agent_sys_prompt import (
-    FINANCIAL_AGENT_SYS_PROMPT,
-)
-import torch
-from swarms import BaseLLM
-from transformers import AutoTokenizer, LlamaForCausalLM
-
-
-class NvidiaLlama31B(BaseLLM):
-    # Load the tokenizer and model
-    def __init__(self, max_tokens: int = 2048):
-        self.max_tokens = max_tokens
-        model_path = "nvidia/Llama-3.1-Minitron-4B-Width-Base"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-
-        device = "cuda"
-        dtype = torch.bfloat16
-        self.model = LlamaForCausalLM.from_pretrained(
-            model_path, torch_dtype=dtype, device_map=device
-        )
-
-    def run(self, task: str):
-        # Prepare the input text
-        inputs = self.tokenizer.encode(task, return_tensors="pt").to(
-            self.model.device
-        )
-
-        # Generate the output
-        outputs = self.model.generate(
-            inputs, max_length=self.max_tokens
-        )
-
-        # Decode and print the output
-        output_text = self.tokenizer.decode(outputs[0])
-        print(output_text)
-
-        return output_text
-
-
-# # Example usage:
-# model = NvidiaLlama31B()
-# out = model.run("What is the essence of quantum field theory?")
-# print(out)
-
-model = NvidiaLlama31B()
-
-# Initialize the agent
-agent = Agent(
-    agent_name="Financial-Analysis-Agent_sas_chicken_eej",
-    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-    llm=model,
-    max_loops=2,
-    autosave=True,
-    dashboard=False,
-    verbose=True,
-    dynamic_temperature_enabled=True,
-    saved_state_path="finance_agent.json",
-    user_name="swarms_corp",
-    retry_attempts=1,
-    context_length=200000,
-    return_step_meta=True,
-    disable_print_every_step=True,
-    output_type="json",
-)
-
-
-out = agent.run(
-    "How can I establish a ROTH IRA to buy stocks and get a tax break? What are the criteria"
-)
-print(out)
diff --git a/examples/models/huggingface_example.py b/examples/models/huggingface_example.py
deleted file mode 100644
index 73b9cb41..00000000
--- a/examples/models/huggingface_example.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from swarms.models import HuggingfaceLLM
-
-model_id = "NousResearch/Yarn-Mistral-7b-128k"
-inference = HuggingfaceLLM(model_id=model_id)
-
-task = "Once upon a time"
-generated_text = inference(task)
-print(generated_text)
diff --git a/examples/models/idefics_example.py b/examples/models/idefics_example.py
deleted file mode 100644
index ea36ba77..00000000
--- a/examples/models/idefics_example.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Import the idefics model from the swarms.models module
-from swarms.models import Idefics
-
-# Create an instance of the idefics model
-model = Idefics()
-
-# Define user input with an image URL and chat with the model
-user_input = (
-    "User: What is in this image?"
-    " https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
-)
-response = model.chat(user_input)
-print(response)
-
-# Define another user input with an image URL and chat with the model
-user_input = (
-    "User: And who is that?"
-    " https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052"
-)
-response = model.chat(user_input)
-print(response)
-
-# Set the checkpoint of the model to "new_checkpoint"
-model.set_checkpoint("new_checkpoint")
-
-# Set the device of the model to "cpu"
-model.set_device("cpu")
-
-# Set the maximum length of the chat to 200
-model.set_max_length(200)
-
-# Clear the chat history of the model
-model.clear_chat_history()
diff --git a/examples/models/kosmos_example.py b/examples/models/kosmos_example.py
deleted file mode 100644
index dbfd108f..00000000
--- a/examples/models/kosmos_example.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from swarms import Kosmos
-
-# Initialize the model
-model = Kosmos()
-
-# Generate
-out = model.run("Analyze the reciepts in this image", "docs.jpg")
-
-# Print the output
-print(out)
diff --git a/examples/models/layout_documentxlm_example.py b/examples/models/layout_documentxlm_example.py
deleted file mode 100644
index 281938fd..00000000
--- a/examples/models/layout_documentxlm_example.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from swarms.models import LayoutLMDocumentQA
-
-model = LayoutLMDocumentQA()
-
-# Place an image of a financial document
-out = model("What is the total amount?", "images/swarmfest.png")
-
-print(out)
diff --git a/examples/models/llama_3_hosted.py b/examples/models/llama_3_hosted.py
deleted file mode 100644
index 8d4d7de2..00000000
--- a/examples/models/llama_3_hosted.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from swarms import llama3Hosted
-
-llama3 = llama3Hosted()
-
-task = "What is the capital of France?"
-response = llama3.run(task)
-print(response)
diff --git a/examples/models/llama_function_caller_example.py b/examples/models/llama_function_caller_example.py
deleted file mode 100644
index 201009a8..00000000
--- a/examples/models/llama_function_caller_example.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from swarms.models.llama_function_caller import LlamaFunctionCaller
-
-llama_caller = LlamaFunctionCaller()
-
-
-# Add a custom function
-def get_weather(location: str, format: str) -> str:
-    # This is a placeholder for the actual implementation
-    return f"Weather at {location} in {format} format."
-
-
-llama_caller.add_func(
-    name="get_weather",
-    function=get_weather,
-    description="Get the weather at a location",
-    arguments=[
-        {
-            "name": "location",
-            "type": "string",
-            "description": "Location for the weather",
-        },
-        {
-            "name": "format",
-            "type": "string",
-            "description": "Format of the weather data",
-        },
-    ],
-)
-
-# Call the function
-result = llama_caller.call_function(
-    "get_weather", location="Paris", format="Celsius"
-)
-print(result)
-
-# Stream a user prompt
-llama_caller("Tell me about the tallest mountain in the world.")
diff --git a/examples/models/llava_example.py b/examples/models/llava_example.py
deleted file mode 100644
index 561b6f88..00000000
--- a/examples/models/llava_example.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from swarms import QwenVLMultiModal
-
-# Instantiate the QwenVLMultiModal model
-model = QwenVLMultiModal(
-    model_name="Qwen/Qwen-VL-Chat",
-    device="cuda",
-    quantize=True,
-)
-
-# Run the model
-response = model(
-    "Hello, how are you?", "https://example.com/image.jpg"
-)
-
-# Print the response
-print(response)
diff --git a/examples/models/nougat_example.py b/examples/models/nougat_example.py
deleted file mode 100644
index 97e1f1a3..00000000
--- a/examples/models/nougat_example.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from swarms.models.nougat import Nougat
-
-nougat = Nougat()
-
-out = nougat("large.png")
diff --git a/examples/models/openai_model_example.py b/examples/models/openai_model_example.py
deleted file mode 100644
index 1a58770c..00000000
--- a/examples/models/openai_model_example.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import os
-from swarms.models import OpenAIChat
-
-# Load doten
-openai = OpenAIChat(
-    openai_api_key=os.getenv("OPENAI_API_KEY"), verbose=False
-)
-
-chat = openai("What are quantum fields?")
-print(chat)
diff --git a/examples/models/palm_example.py b/examples/models/palm_example.py
deleted file mode 100644
index 5a2348ad..00000000
--- a/examples/models/palm_example.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from swarms.models import Palm
-
-palm = Palm()
-
-out = palm("what's your name")
diff --git a/examples/models/ssd_example.py b/examples/models/ssd_example.py
deleted file mode 100644
index 2234b9c8..00000000
--- a/examples/models/ssd_example.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from swarms.models.ssd_1b import SSD1B
-
-model = SSD1B()
-
-task = "A painting of a dog"
-neg_prompt = "ugly, blurry, poor quality"
-
-image_url = model(task, neg_prompt)
-print(image_url)
diff --git a/examples/models/swarms_cloud_api_example.py b/examples/models/swarms_cloud_api_example.py
deleted file mode 100644
index 914ca9f5..00000000
--- a/examples/models/swarms_cloud_api_example.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from dotenv import load_dotenv
-from openai import OpenAI
-import os
-
-load_dotenv()
-
-openai_api_key = os.getenv("SWARMS_API_KEY")
-openai_api_base = "https://api.swarms.world"
-model = "gpt-4o"
-
-client = OpenAI(api_key=openai_api_key, base_url=openai_api_base)
-# Note that this model expects the image to come before the main text
-chat_response = client.chat.completions.create(
-    model=model,
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://home-cdn.reolink.us/wp-content/uploads/2022/04/010345091648784709.4253.jpg",
-                    },
-                },
-                {"type": "text", "text": "What's in this image?"},
-            ],
-        }
-    ],
-    temperature=0.1,
-)
-print("Chat response:", chat_response)
diff --git a/examples/models/together_example.py b/examples/models/together_example.py
deleted file mode 100644
index f730f72f..00000000
--- a/examples/models/together_example.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from swarms import TogetherLLM
-
-# Initialize the model with your parameters
-model = TogetherLLM(
-    model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
-    max_tokens=1000,
-)
-
-# Run the model
-model.run(
-    "Generate a blog post about the best way to make money online."
-)
diff --git a/examples/models/tts_speech_example.py b/examples/models/tts_speech_example.py
deleted file mode 100644
index 6c33f944..00000000
--- a/examples/models/tts_speech_example.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-
-from swarms import OpenAITTS
-
-load_dotenv()
-
-tts = OpenAITTS(
-    model_name="tts-1-1106",
-    voice="onyx",
-    openai_api_key=os.getenv("OPENAI_API_KEY"),
-)
-
-out = tts.run_and_save("Dammmmmm those tacos were good")
-print(out)
diff --git a/examples/models/vilt_example.py b/examples/models/vilt_example.py
deleted file mode 100644
index 8e40f59d..00000000
--- a/examples/models/vilt_example.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from swarms.models.vilt import Vilt
-
-model = Vilt()
-
-output = model(
-    "What is this image",
-    "http://images.cocodataset.org/val2017/000000039769.jpg",
-)
diff --git a/examples/structs/swarms/agent_registry/agent_registry.py b/examples/structs/swarms/agent_registry/agent_registry.py
index cf8b6c99..dd746d5d 100644
--- a/examples/structs/swarms/agent_registry/agent_registry.py
+++ b/examples/structs/swarms/agent_registry/agent_registry.py
@@ -1,6 +1,6 @@
 from swarms.structs.agent_registry import AgentRegistry
 from swarms import Agent
-from swarms.models import Anthropic
+from swarm_models import Anthropic
 
 
 # Initialize the agents
diff --git a/examples/structs/swarms/automate_docs.py b/examples/structs/swarms/automate_docs.py
index f3268fdb..80e0427d 100644
--- a/examples/structs/swarms/automate_docs.py
+++ b/examples/structs/swarms/automate_docs.py
@@ -4,7 +4,8 @@ import threading
 from typing import Callable, List
 
 from swarms.prompts.documentation import DOCUMENTATION_WRITER_SOP
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.utils.loguru_logger import logger
 import concurrent
 
diff --git a/examples/structs/swarms/different_architectures/a_star_swarm_example.py b/examples/structs/swarms/different_architectures/a_star_swarm_example.py
index 1995b16e..01fa59a8 100644
--- a/examples/structs/swarms/different_architectures/a_star_swarm_example.py
+++ b/examples/structs/swarms/different_architectures/a_star_swarm_example.py
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/different_architectures/circular_swarm.py b/examples/structs/swarms/different_architectures/circular_swarm.py
index 8fdfaff5..b0c8e7fb 100644
--- a/examples/structs/swarms/different_architectures/circular_swarm.py
+++ b/examples/structs/swarms/different_architectures/circular_swarm.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/different_architectures/star_swarm.py b/examples/structs/swarms/different_architectures/star_swarm.py
index 2b3ec2a3..e08963fc 100644
--- a/examples/structs/swarms/different_architectures/star_swarm.py
+++ b/examples/structs/swarms/different_architectures/star_swarm.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/example_logistics.py b/examples/structs/swarms/example_logistics.py
index 9de44346..88750016 100644
--- a/examples/structs/swarms/example_logistics.py
+++ b/examples/structs/swarms/example_logistics.py
@@ -1,7 +1,7 @@
 from swarms.structs import Agent
 import os
 from dotenv import load_dotenv
-from swarms.models import GPT4VisionAPI
+from swarm_models import GPT4VisionAPI
 from swarms.prompts.logistics import (
     Health_Security_Agent_Prompt,
     Quality_Control_Agent_Prompt,
diff --git a/examples/structs/swarms/hiearchical_swarm/agent_creator 2.py b/examples/structs/swarms/hiearchical_swarm/agent_creator 2.py
index 307d6640..5023c979 100644
--- a/examples/structs/swarms/hiearchical_swarm/agent_creator 2.py	
+++ b/examples/structs/swarms/hiearchical_swarm/agent_creator 2.py	
@@ -11,7 +11,7 @@ from swarms import (
     OpenAIChat,
     SpreadSheetSwarm,
 )
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 
 agent_pool = []
 
diff --git a/examples/structs/swarms/hiearchical_swarm/agent_creator.py b/examples/structs/swarms/hiearchical_swarm/agent_creator.py
index 307d6640..5023c979 100644
--- a/examples/structs/swarms/hiearchical_swarm/agent_creator.py
+++ b/examples/structs/swarms/hiearchical_swarm/agent_creator.py
@@ -11,7 +11,7 @@ from swarms import (
     OpenAIChat,
     SpreadSheetSwarm,
 )
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 
 agent_pool = []
 
diff --git a/examples/structs/swarms/mixture_of_agents/mixture_of_agents.py b/examples/structs/swarms/mixture_of_agents/mixture_of_agents.py
index f594eddb..ec4ecbd3 100644
--- a/examples/structs/swarms/mixture_of_agents/mixture_of_agents.py
+++ b/examples/structs/swarms/mixture_of_agents/mixture_of_agents.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.mixture_of_agents import MixtureOfAgents
 
 # Initialize the director agent
diff --git a/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k 2.py b/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k 2.py
index 9cbf7cf1..4ed21d69 100644
--- a/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k 2.py	
+++ b/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k 2.py	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.mixture_of_agents import MixtureOfAgents
 
 
diff --git a/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k.py b/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k.py
index 9cbf7cf1..4ed21d69 100644
--- a/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k.py
+++ b/examples/structs/swarms/mixture_of_agents/mixture_of_agents_nividia_10k.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.mixture_of_agents import MixtureOfAgents
 
 
diff --git a/examples/structs/swarms/mixture_of_agents/moa_from_scratch 2.py b/examples/structs/swarms/mixture_of_agents/moa_from_scratch 2.py
index 8b233065..74226740 100644
--- a/examples/structs/swarms/mixture_of_agents/moa_from_scratch 2.py	
+++ b/examples/structs/swarms/mixture_of_agents/moa_from_scratch 2.py	
@@ -6,7 +6,8 @@ from typing import List
 
 from pydantic import BaseModel
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/mixture_of_agents/moa_from_scratch.py b/examples/structs/swarms/mixture_of_agents/moa_from_scratch.py
index 8b233065..74226740 100644
--- a/examples/structs/swarms/mixture_of_agents/moa_from_scratch.py
+++ b/examples/structs/swarms/mixture_of_agents/moa_from_scratch.py
@@ -6,7 +6,8 @@ from typing import List
 
 from pydantic import BaseModel
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/movers_swarm.py b/examples/structs/swarms/movers_swarm.py
index c4625876..3fb17461 100644
--- a/examples/structs/swarms/movers_swarm.py
+++ b/examples/structs/swarms/movers_swarm.py
@@ -9,7 +9,8 @@ $ pip install swarms
 - 
 """
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 from swarms.tools.prebuilt.bing_api import fetch_web_articles_bing_api
 import os
diff --git a/examples/structs/swarms/multi_agent_collaboration/agent_delegation.py b/examples/structs/swarms/multi_agent_collaboration/agent_delegation.py
index 91ce1eb3..89c2612e 100644
--- a/examples/structs/swarms/multi_agent_collaboration/agent_delegation.py
+++ b/examples/structs/swarms/multi_agent_collaboration/agent_delegation.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 
 def calculate_profit(revenue: float, expenses: float):
diff --git a/examples/structs/swarms/multi_agent_collaboration/company_example.py b/examples/structs/swarms/multi_agent_collaboration/company_example.py
index abdee607..933bf0f6 100644
--- a/examples/structs/swarms/multi_agent_collaboration/company_example.py
+++ b/examples/structs/swarms/multi_agent_collaboration/company_example.py
@@ -2,7 +2,8 @@ import os
 
 from dotenv import load_dotenv
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.company import Company
 
 load_dotenv()
diff --git a/examples/structs/swarms/multi_agent_collaboration/mixture_of_agents/moa_with_scp.py b/examples/structs/swarms/multi_agent_collaboration/mixture_of_agents/moa_with_scp.py
index e61d1536..e530dc5c 100644
--- a/examples/structs/swarms/multi_agent_collaboration/mixture_of_agents/moa_with_scp.py
+++ b/examples/structs/swarms/multi_agent_collaboration/mixture_of_agents/moa_with_scp.py
@@ -1,4 +1,5 @@
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.mixture_of_agents import MixtureOfAgents
 from swarms_memory import ChromaDB
 
diff --git a/examples/structs/swarms/multi_agent_collaboration/round_robin_swarm_example.py b/examples/structs/swarms/multi_agent_collaboration/round_robin_swarm_example.py
index f3a463ad..a6e0e77c 100644
--- a/examples/structs/swarms/multi_agent_collaboration/round_robin_swarm_example.py
+++ b/examples/structs/swarms/multi_agent_collaboration/round_robin_swarm_example.py
@@ -1,5 +1,6 @@
 from swarms.structs.round_robin import RoundRobinSwarm
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 
 # Initialize the LLM
diff --git a/examples/structs/swarms/queue_swarm/queue_swarm_2  2.py b/examples/structs/swarms/queue_swarm/queue_swarm_2  2.py
index e87770bc..dcfcd022 100644
--- a/examples/structs/swarms/queue_swarm/queue_swarm_2  2.py	
+++ b/examples/structs/swarms/queue_swarm/queue_swarm_2  2.py	
@@ -8,7 +8,8 @@ from typing import List
 from swarms.utils.calculate_func_metrics import profile_func
 from pydantic import BaseModel
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/queue_swarm/queue_swarm_2 .py b/examples/structs/swarms/queue_swarm/queue_swarm_2 .py
index e87770bc..dcfcd022 100644
--- a/examples/structs/swarms/queue_swarm/queue_swarm_2 .py	
+++ b/examples/structs/swarms/queue_swarm/queue_swarm_2 .py	
@@ -8,7 +8,8 @@ from typing import List
 from swarms.utils.calculate_func_metrics import profile_func
 from pydantic import BaseModel
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/queue_swarm/queue_swarm_example.py b/examples/structs/swarms/queue_swarm/queue_swarm_example.py
index fa8d187d..f722dcc5 100644
--- a/examples/structs/swarms/queue_swarm/queue_swarm_example.py
+++ b/examples/structs/swarms/queue_swarm/queue_swarm_example.py
@@ -1,7 +1,8 @@
 import os
 
 from swarms.structs.queue_swarm import TaskQueueSwarm
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1 2 b/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1 2
index ac1faec9..c9ae8252 100644
--- a/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1 2	
+++ b/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1 2	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.round_robin import RoundRobinSwarm
 
 SEC_DATA = """
diff --git a/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1.py b/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1.py
index ac1faec9..c9ae8252 100644
--- a/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1.py
+++ b/examples/structs/swarms/round_of_robin_swarm/rob_swarm_1.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.round_robin import RoundRobinSwarm
 
 SEC_DATA = """
diff --git a/examples/structs/swarms/search_arena/search_agents.py b/examples/structs/swarms/search_arena/search_agents.py
index 5ea84f53..32f10880 100644
--- a/examples/structs/swarms/search_arena/search_agents.py
+++ b/examples/structs/swarms/search_arena/search_agents.py
@@ -5,7 +5,8 @@ import requests
 import tavily
 from dotenv import load_dotenv
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.tools.prebuilt.bing_api import fetch_web_articles_bing_api
 
 load_dotenv()
diff --git a/examples/structs/swarms/spreadsheet_swarm/dfs_example.py b/examples/structs/swarms/spreadsheet_swarm/dfs_example.py
index 5586a1d3..049bbbb2 100644
--- a/examples/structs/swarms/spreadsheet_swarm/dfs_example.py
+++ b/examples/structs/swarms/spreadsheet_swarm/dfs_example.py
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.dfs_search_swarm import DFSSwarm
 
 # Get the OpenAI API key from the environment variable
diff --git a/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm 2.py b/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm 2.py
index eb093199..8948465a 100644
--- a/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm 2.py	
+++ b/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm 2.py	
@@ -1,6 +1,7 @@
 import uuid
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm.py b/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm.py
index eb093199..8948465a 100644
--- a/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm.py
+++ b/examples/structs/swarms/spreadsheet_swarm/real_estate_swarm.py
@@ -1,6 +1,7 @@
 import uuid
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm 2.py b/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm 2.py
index ddc23a69..59896dd8 100644
--- a/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm 2.py	
+++ b/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm 2.py	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm.py b/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm.py
index ddc23a69..59896dd8 100644
--- a/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm.py
+++ b/examples/structs/swarms/spreadsheet_swarm/social_media_marketing_spreesheet_swarm.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/csvs/README.md b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/csvs/README.md
index 0dbf5e0d..2ea3ea68 100644
--- a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/csvs/README.md
+++ b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/csvs/README.md
@@ -82,7 +82,8 @@ Features:
 
 ```python
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
@@ -179,7 +180,8 @@ agent.run(
 An LLM equipped with long term memory and tools, a full stack agent capable of automating all and any digital tasks given a good prompt.
 
 ```python
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms_memory import ChromaDB
 import subprocess
 import os
@@ -851,7 +853,8 @@ An all-new swarm architecuture, with SpreadSheetSwarm you can manage and oversee
 
 ```python
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for each social media platform
diff --git a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm 2.py b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm 2.py
index d0b6a3cb..4419c5cd 100644
--- a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm 2.py	
+++ b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm 2.py	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for QR code generation
diff --git a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm.py b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm.py
index d0b6a3cb..4419c5cd 100644
--- a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm.py
+++ b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/qr_code_generative_spreedsheet_swarm.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 # Define custom system prompts for QR code generation
diff --git a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example 2.py b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example 2.py
index 35b8353b..8aab2c80 100644
--- a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example 2.py	
+++ b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example 2.py	
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example.py b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example.py
index 35b8353b..8aab2c80 100644
--- a/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example.py
+++ b/examples/structs/swarms/spreadsheet_swarm/spreadsheet_swarm_examples/spread_sheet_example.py
@@ -1,6 +1,7 @@
 import os
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/swarms/rearrange/example.py b/examples/swarms/rearrange/example.py
index 930188db..15c6a688 100644
--- a/examples/swarms/rearrange/example.py
+++ b/examples/swarms/rearrange/example.py
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
 load_dotenv()
 
 from swarms.structs import Agent
-from swarms.models import Anthropic
+from swarm_models import Anthropic
 from swarms.structs.rearrange import AgentRearrange
 
 llm = Anthropic(anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"), streaming=True)
diff --git a/examples/tasks/task_example 2.py b/examples/tasks/task_example 2.py
index b8b81fe5..3074104b 100644
--- a/examples/tasks/task_example 2.py	
+++ b/examples/tasks/task_example 2.py	
@@ -1,7 +1,8 @@
 import os
 from datetime import datetime, timedelta
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/tasks/task_example.py b/examples/tasks/task_example.py
index b8b81fe5..3074104b 100644
--- a/examples/tasks/task_example.py
+++ b/examples/tasks/task_example.py
@@ -1,7 +1,8 @@
 import os
 from datetime import datetime, timedelta
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/examples/utils/agent_table 2.py b/examples/utils/agent_table 2.py
index be3c9054..e357fe6a 100644
--- a/examples/utils/agent_table 2.py	
+++ b/examples/utils/agent_table 2.py	
@@ -5,7 +5,8 @@ from swarms.utils.pandas_utils import (
     dict_to_dataframe,
     pydantic_model_to_dataframe,
 )
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Create an instance of the OpenAIChat class
 llm = OpenAIChat(
diff --git a/examples/utils/agent_table.py b/examples/utils/agent_table.py
index be3c9054..e357fe6a 100644
--- a/examples/utils/agent_table.py
+++ b/examples/utils/agent_table.py
@@ -5,7 +5,8 @@ from swarms.utils.pandas_utils import (
     dict_to_dataframe,
     pydantic_model_to_dataframe,
 )
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 
 # Create an instance of the OpenAIChat class
 llm = OpenAIChat(
diff --git a/examples/workshops/aug_10/book_generator_swarm.py b/examples/workshops/aug_10/book_generator_swarm.py
index 640491aa..36e767d0 100644
--- a/examples/workshops/aug_10/book_generator_swarm.py
+++ b/examples/workshops/aug_10/book_generator_swarm.py
@@ -1,4 +1,4 @@
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 from pydantic import BaseModel, Field
 from typing import Sequence
 
diff --git a/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm 2.py b/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm 2.py
index 6c81c7fa..b0a3cbf0 100644
--- a/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm 2.py	
+++ b/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm 2.py	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 
diff --git a/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm.py b/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm.py
index 6c81c7fa..b0a3cbf0 100644
--- a/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm.py
+++ b/examples/workshops/spreadsheet_workspace/aug_31_workshop/groq_api_spreadsheet_marketing_swarm.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 
diff --git a/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm 2.py b/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm 2.py
index be7d524e..93c982e7 100644
--- a/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm 2.py	
+++ b/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm 2.py	
@@ -17,7 +17,7 @@ from pydantic import BaseModel, Field
 from transformers import T5EncoderModel
 
 from swarms import Agent
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 
 hf_token = os.getenv("HF_TOKEN")
 
diff --git a/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm.py b/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm.py
index be7d524e..93c982e7 100644
--- a/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm.py
+++ b/examples/workshops/spreadsheet_workspace/aug_31_workshop/hierarchical_img_gen_swarm.py
@@ -17,7 +17,7 @@ from pydantic import BaseModel, Field
 from transformers import T5EncoderModel
 
 from swarms import Agent
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
+from swarm_models.openai_function_caller import OpenAIFunctionCaller
 
 hf_token = os.getenv("HF_TOKEN")
 
diff --git a/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm 2.py b/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm 2.py
index 22859ed4..86d3fe49 100644
--- a/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm 2.py	
+++ b/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm 2.py	
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 
diff --git a/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm.py b/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm.py
index 22859ed4..86d3fe49 100644
--- a/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm.py
+++ b/examples/workshops/spreadsheet_workspace/swarms_promotion_spreadsheet_swarm.py
@@ -1,5 +1,6 @@
 import os
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
 
 
diff --git a/pyproject.toml b/pyproject.toml
index f09574b6..4a793d17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,6 +75,7 @@ swarms-memory = "*"
 black = "*"
 swarms-cloud = "*"
 aiofiles = "*"
+swarm-models = "*"
 
 
 
diff --git a/requirements.txt b/requirements.txt
index 8530930b..b1e85932 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,4 +31,5 @@ fastapi>=0.110.1
 networkx
 swarms-memory
 pre-commit
-aiofiles
\ No newline at end of file
+aiofiles
+swarm-models
\ No newline at end of file
diff --git a/sample_rearrange 2.py b/sample_rearrange 2.py
deleted file mode 100644
index 5829c79e..00000000
--- a/sample_rearrange 2.py	
+++ /dev/null
@@ -1,112 +0,0 @@
-import os
-
-from swarms import Agent, AgentRearrange, OpenAIChat
-
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(
-    api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
-
-# Initialize the boss agent (Director)
-boss_agent = Agent(
-    agent_name="BossAgent",
-    system_prompt="""
-    You are the BossAgent responsible for managing and overseeing a swarm of agents analyzing company expenses. 
-    Your job is to dynamically assign tasks, prioritize their execution, and ensure that all agents collaborate efficiently. 
-    After receiving a report on the company's expenses, you will break down the work into smaller tasks, 
-    assigning specific tasks to each agent, such as detecting recurring high costs, categorizing expenditures, 
-    and identifying unnecessary transactions. Ensure the results are communicated back in a structured way 
-    so the finance team can take actionable steps to cut off unproductive spending. You also monitor and 
-    dynamically adapt the swarm to optimize their performance. Finally, you summarize their findings 
-    into a coherent report.
-    """,
-    llm=model,
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="boss_agent.json",
-)
-
-# Initialize worker 1: Expense Analyzer
-worker1 = Agent(
-    agent_name="ExpenseAnalyzer",
-    system_prompt="""
-    Your task is to carefully analyze the company's expense data provided to you. 
-    You will focus on identifying high-cost recurring transactions, categorizing expenditures 
-    (e.g., marketing, operations, utilities, etc.), and flagging areas where there seems to be excessive spending. 
-    You will provide a detailed breakdown of each category, along with specific recommendations for cost-cutting. 
-    Pay close attention to monthly recurring subscriptions, office supplies, and non-essential expenditures.
-    """,
-    llm=model,
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="worker1.json",
-)
-
-# Initialize worker 2: Summary Generator
-worker2 = Agent(
-    agent_name="SummaryGenerator",
-    system_prompt="""
-    After receiving the detailed breakdown from the ExpenseAnalyzer, 
-    your task is to create a concise summary of the findings. You will focus on the most actionable insights, 
-    such as highlighting the specific transactions that can be immediately cut off and summarizing the areas 
-    where the company is overspending. Your summary will be used by the BossAgent to generate the final report.
-    Be clear and to the point, emphasizing the urgency of cutting unnecessary expenses.
-    """,
-    llm=model,
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="worker2.json",
-)
-
-# Swarm-Level Prompt (Collaboration Prompt)
-swarm_prompt = """
-    As a swarm, your collective goal is to analyze the company's expenses and identify transactions that should be cut off. 
-    You will work collaboratively to break down the entire process of expense analysis into manageable steps. 
-    The BossAgent will direct the flow and assign tasks dynamically to the agents. The ExpenseAnalyzer will first 
-    focus on breaking down the expense report, identifying high-cost recurring transactions, categorizing them, 
-    and providing recommendations for potential cost reduction. After the analysis, the SummaryGenerator will then 
-    consolidate all the findings into an actionable summary that the finance team can use to immediately cut off unnecessary expenses. 
-    Together, your collaboration is essential to streamlining and improving the company’s financial health.
-"""
-
-# Create a list of agents
-agents = [boss_agent, worker1, worker2]
-
-# Define the flow pattern for the swarm
-flow = "BossAgent -> ExpenseAnalyzer -> SummaryGenerator"
-
-# Using AgentRearrange class to manage the swarm
-agent_system = AgentRearrange(
-    agents=agents, flow=flow, return_json=True
-)
-
-# Input task for the swarm
-task = f"""
-
-    {swarm_prompt}
-    
-    The company has been facing a rising number of unnecessary expenses, and the finance team needs a detailed 
-    analysis of recent transactions to identify which expenses can be cut off to improve profitability. 
-    Analyze the provided transaction data and create a detailed report on cost-cutting opportunities, 
-    focusing on recurring transactions and non-essential expenditures. 
-"""
-
-# Run the swarm system with the task
-output = agent_system.run(task)
-print(output)
diff --git a/swarms/__init__.py b/swarms/__init__.py
index c6ed0908..2624b560 100644
--- a/swarms/__init__.py
+++ b/swarms/__init__.py
@@ -9,7 +9,7 @@ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
 
 from swarms.agents import *  # noqa: E402, F403
 from swarms.artifacts import *  # noqa: E402, F403
-from swarms.models import *  # noqa: E402, F403
+from swarm_models import *  # noqa: E402, F403
 from swarms.prompts import *  # noqa: E402, F403
 from swarms.structs import *  # noqa: E402, F403
 from swarms.telemetry import *  # noqa: E402, F403
diff --git a/swarms/cli/create_agent 2.py b/swarms/cli/create_agent 2.py
index 9d68f6a4..e7b29d52 100644
--- a/swarms/cli/create_agent 2.py	
+++ b/swarms/cli/create_agent 2.py	
@@ -1,6 +1,6 @@
 import os
 from swarms.structs.agent import Agent
-from swarms.models.popular_llms import OpenAIChat
+from swarm_models.popular_llms import OpenAIChat
 from swarms.structs.agent_registry import AgentRegistry
 
 # Get the OpenAI API key from the environment variable
diff --git a/swarms/cli/create_agent.py b/swarms/cli/create_agent.py
index 9d68f6a4..e7b29d52 100644
--- a/swarms/cli/create_agent.py
+++ b/swarms/cli/create_agent.py
@@ -1,6 +1,6 @@
 import os
 from swarms.structs.agent import Agent
-from swarms.models.popular_llms import OpenAIChat
+from swarm_models.popular_llms import OpenAIChat
 from swarms.structs.agent_registry import AgentRegistry
 
 # Get the OpenAI API key from the environment variable
diff --git a/swarms/cli/parse_yaml.py b/swarms/cli/parse_yaml.py
index e7ba841f..de8e936d 100644
--- a/swarms/cli/parse_yaml.py
+++ b/swarms/cli/parse_yaml.py
@@ -5,7 +5,7 @@ from typing import List, Optional
 import json
 from swarms.structs.agent_registry import AgentRegistry
 from swarms.structs.agent import Agent
-from swarms.models.popular_llms import OpenAIChat
+from swarm_models.popular_llms import OpenAIChat
 
 
 class AgentInput(BaseModel):
diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py
deleted file mode 100644
index 1a0c847f..00000000
--- a/swarms/models/__init__.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from swarms.models.base_embedding_model import BaseEmbeddingModel
-from swarms.models.base_llm import BaseLLM  # noqa: E402
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-from swarms.models.fuyu import Fuyu  # noqa: E402
-from swarms.models.gpt4_vision_api import GPT4VisionAPI  # noqa: E402
-from swarms.models.huggingface import HuggingfaceLLM  # noqa: E402
-from swarms.models.idefics import Idefics  # noqa: E402
-from swarms.models.kosmos_two import Kosmos  # noqa: E402
-from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA
-from swarms.models.llama3_hosted import llama3Hosted
-from swarms.models.llava import LavaMultiModal  # noqa: E402
-from swarms.models.nougat import Nougat  # noqa: E402
-from swarms.models.openai_embeddings import OpenAIEmbeddings
-from swarms.models.openai_tts import OpenAITTS  # noqa: E402
-from swarms.models.palm import GooglePalm as Palm  # noqa: E402
-from swarms.models.popular_llms import Anthropic as Anthropic
-from swarms.models.popular_llms import (
-    AzureOpenAILLM as AzureOpenAI,
-)
-from swarms.models.popular_llms import (
-    CohereChat as Cohere,
-)
-from swarms.models.popular_llms import OctoAIChat
-from swarms.models.popular_llms import (
-    OpenAIChatLLM as OpenAIChat,
-)
-from swarms.models.popular_llms import (
-    OpenAILLM as OpenAI,
-)
-from swarms.models.popular_llms import ReplicateChat as Replicate
-from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
-from swarms.models.sampling_params import SamplingParams, SamplingType
-from swarms.models.together import TogetherLLM  # noqa: E402
-from swarms.models.model_types import (  # noqa: E402
-    AudioModality,
-    ImageModality,
-    MultimodalData,
-    TextModality,
-    VideoModality,
-)
-from swarms.models.vilt import Vilt  # noqa: E402
-from swarms.models.popular_llms import FireWorksAI
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
-
-__all__ = [
-    "BaseEmbeddingModel",
-    "BaseLLM",
-    "BaseMultiModalModel",
-    "Fuyu",
-    "GPT4VisionAPI",
-    "HuggingfaceLLM",
-    "Idefics",
-    "Kosmos",
-    "LayoutLMDocumentQA",
-    "LavaMultiModal",
-    "Nougat",
-    "Palm",
-    "OpenAITTS",
-    "Anthropic",
-    "AzureOpenAI",
-    "Cohere",
-    "OpenAIChat",
-    "OpenAI",
-    "OctoAIChat",
-    "QwenVLMultiModal",
-    "Replicate",
-    "SamplingParams",
-    "SamplingType",
-    "TogetherLLM",
-    "AudioModality",
-    "ImageModality",
-    "MultimodalData",
-    "TextModality",
-    "VideoModality",
-    "Vilt",
-    "OpenAIEmbeddings",
-    "llama3Hosted",
-    "FireWorksAI",
-    "OpenAIFunctionCaller",
-]
diff --git a/swarms/models/base_embedding_model.py b/swarms/models/base_embedding_model.py
deleted file mode 100644
index 5bcf2a12..00000000
--- a/swarms/models/base_embedding_model.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-
-import numpy as np
-from typing import Callable
-from swarms.artifacts.text_artifact import TextArtifact
-from swarms.utils.exponential_backoff import ExponentialBackoffMixin
-
-
-@dataclass
-class BaseEmbeddingModel(
-    ExponentialBackoffMixin,
-    ABC,
-    # SerializableMixin
-):
-    """
-    Attributes:
-        model: The name of the model to use.
-        tokenizer: An instance of `BaseTokenizer` to use when calculating tokens.
-    """
-
-    model: str = None
-    tokenizer: Callable = None
-    chunker: Callable = None
-
-    def embed_text_artifact(
-        self, artifact: TextArtifact
-    ) -> list[float]:
-        return self.embed_string(artifact.to_text())
-
-    def embed_string(self, string: str) -> list[float]:
-        for attempt in self.retrying():
-            with attempt:
-                if (
-                    self.tokenizer
-                    and self.tokenizer.count_tokens(string)
-                    > self.tokenizer.max_tokens
-                ):
-                    return self._embed_long_string(string)
-                else:
-                    return self.try_embed_chunk(string)
-
-        else:
-            raise RuntimeError("Failed to embed string.")
-
-    @abstractmethod
-    def try_embed_chunk(self, chunk: str) -> list[float]: ...
-
-    def _embed_long_string(self, string: str) -> list[float]:
-        """Embeds a string that is too long to embed in one go."""
-        chunks = self.chunker.chunk(string)
-
-        embedding_chunks = []
-        length_chunks = []
-        for chunk in chunks:
-            embedding_chunks.append(self.try_embed_chunk(chunk.value))
-            length_chunks.append(len(chunk))
-
-        # generate weighted averages
-        embedding_chunks = np.average(
-            embedding_chunks, axis=0, weights=length_chunks
-        )
-
-        # normalize length to 1
-        embedding_chunks = embedding_chunks / np.linalg.norm(
-            embedding_chunks
-        )
-
-        return embedding_chunks.tolist()
diff --git a/swarms/models/base_llm.py b/swarms/models/base_llm.py
deleted file mode 100644
index c9e47388..00000000
--- a/swarms/models/base_llm.py
+++ /dev/null
@@ -1,415 +0,0 @@
-import asyncio
-import logging
-import os
-import time
-from abc import abstractmethod
-from typing import List, Optional
-from swarms.structs.base_structure import BaseStructure
-
-
-class BaseLLM(BaseStructure):
-    """Abstract Language Model that defines the interface for all language models
-
-    Args:
-        model_name (Optional[str], optional): _description_. Defaults to None.
-        max_tokens (Optional[int], optional): _description_. Defaults to None.
-        max_length (Optional[int], optional): _description_. Defaults to None.
-        temperature (Optional[float], optional): _description_. Defaults to None.
-        top_k (Optional[float], optional): _description_. Defaults to None.
-        top_p (Optional[float], optional): _description_. Defaults to None.
-        system_prompt (Optional[str], optional): _description_. Defaults to None.
-        beam_width (Optional[int], optional): _description_. Defaults to None.
-        num_return_sequences (Optional[int], optional): _description_. Defaults to None.
-        seed (Optional[int], optional): _description_. Defaults to None.
-        frequency_penalty (Optional[float], optional): _description_. Defaults to None.
-        presence_penalty (Optional[float], optional): _description_. Defaults to None.
-        stop_token (Optional[str], optional): _description_. Defaults to None.
-        length_penalty (Optional[float], optional): _description_. Defaults to None.
-        role (Optional[str], optional): _description_. Defaults to None.
-        do_sample (Optional[bool], optional): _description_. Defaults to None.
-        early_stopping (Optional[bool], optional): _description_. Defaults to None.
-        num_beams (Optional[int], optional): _description_. Defaults to None.
-        repition_penalty (Optional[float], optional): _description_. Defaults to None.
-        pad_token_id (Optional[int], optional): _description_. Defaults to None.
-        eos_token_id (Optional[int], optional): _description_. Defaults to None.
-        bos_token_id (Optional[int], optional): _description_. Defaults to None.
-        device (Optional[str], optional): _description_. Defaults to None.
-        *args: _description_
-        **kwargs: _description_
-
-
-    """
-
-    def __init__(
-        self,
-        model_id: Optional[str] = None,
-        model_name: Optional[str] = None,
-        max_tokens: Optional[int] = None,
-        max_length: Optional[int] = None,
-        temperature: Optional[float] = None,
-        top_k: Optional[float] = None,
-        top_p: Optional[float] = None,
-        system_prompt: Optional[str] = None,
-        beam_width: Optional[int] = None,
-        num_return_sequences: Optional[int] = None,
-        seed: Optional[int] = None,
-        frequency_penalty: Optional[float] = None,
-        presence_penalty: Optional[float] = None,
-        stop_token: Optional[str] = None,
-        length_penalty: Optional[float] = None,
-        role: Optional[str] = None,
-        do_sample: Optional[bool] = None,
-        early_stopping: Optional[bool] = None,
-        num_beams: Optional[int] = None,
-        repition_penalty: Optional[float] = None,
-        pad_token_id: Optional[int] = None,
-        eos_token_id: Optional[int] = None,
-        bos_token_id: Optional[int] = None,
-        device: Optional[str] = None,
-        freq_penalty: Optional[float] = None,
-        stop_token_id: Optional[int] = None,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_id = model_id
-        self.model_name = model_name
-        self.max_tokens = max_tokens
-        self.temperature = temperature
-        self.top_k = top_k
-        self.top_p = top_p
-        self.system_prompt = system_prompt
-        self.beam_width = beam_width
-        self.num_return_sequences = num_return_sequences
-        self.seed = seed
-        self.frequency_penalty = frequency_penalty
-        self.presence_penalty = presence_penalty
-        self.stop_token = stop_token
-        self.length_penalty = length_penalty
-        self.role = role
-        self.max_length = max_length
-        self.do_sample = do_sample
-        self.early_stopping = early_stopping
-        self.num_beams = num_beams
-        self.repition_penalty = repition_penalty
-        self.pad_token_id = pad_token_id
-        self.eos_token_id = eos_token_id
-        self.bos_token_id = bos_token_id
-        self.device = device
-        self.frequency_penalty = freq_penalty
-        self.stop_token_id = stop_token_id
-
-        # Attributes
-        self.history = ""
-        self.start_time = None
-        self.end_time = None
-        self.history = []
-
-    @abstractmethod
-    def run(self, task: Optional[str] = None, *args, **kwargs) -> str:
-        """generate text using language model"""
-
-    async def arun(self, task: Optional[str] = None, *args, **kwargs):
-        """Asynchronous run
-
-        Args:
-            task (Optional[str], optional): _description_. Defaults to None.
-        """
-        loop = asyncio.get_event_loop()
-        result = await loop.run_in_executor(None, self.run, task)
-        return result
-
-    def batch_run(self, tasks: List[str], *args, **kwargs):
-        """Batch run with language model
-
-        Args:
-            tasks (List[str]): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        return [self.run(task) for task in tasks]
-
-    async def abatch_run(self, tasks: List[str], *args, **kwargs):
-        """Asynchronous batch run with language model
-
-        Args:
-            tasks (List[str]): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        return await asyncio.gather(
-            *(self.arun(task) for task in tasks)
-        )
-
-    def chat(self, task: str, history: str = "") -> str:
-        """Chat with the model"""
-        complete_task = (
-            task + " | " + history
-        )  # Delimiter for clarity
-        return self.run(complete_task)
-
-    def __call__(self, task: str) -> str:
-        """Call the model"""
-        return self.run(task)
-
-    def _tokens_per_second(self) -> float:
-        """Tokens per second"""
-        elapsed_time = self.end_time - self.start_time
-        if elapsed_time == 0:
-            return float("inf")
-        return self._num_tokens() / elapsed_time
-
-    # def _num_tokens(self, text: str) -> int:
-    # """Number of tokens"""
-    # tokenizer = self.tokenizer
-    # return count_tokens(text)
-
-    def _time_for_generation(self, task: str) -> float:
-        """Time for Generation"""
-        self.start_time = time.time()
-        self.run(task)
-        self.end_time = time.time()
-        return self.end_time - self.start_time
-
-    def generate_summary(self, text: str) -> str:
-        """Generate Summary"""
-
-    def set_temperature(self, value: float):
-        """Set Temperature"""
-        self.temperature = value
-
-    def set_max_tokens(self, value: int):
-        """Set new max tokens"""
-        self.max_tokens = value
-
-    def clear_history(self):
-        """Clear history"""
-        self.history = []
-
-    def enable_logging(self, log_file: str = "model.log"):
-        """Initialize logging for the model."""
-        logging.basicConfig(filename=log_file, level=logging.INFO)
-        self.log_file = log_file
-
-    def log_event(self, message: str):
-        """Log an event."""
-        logging.info(
-            f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {message}"
-        )
-
-    def save_checkpoint(self, checkpoint_dir: str = "checkpoints"):
-        """Save the model state."""
-        # This is a placeholder for actual checkpointing logic.
-        if not os.path.exists(checkpoint_dir):
-            os.makedirs(checkpoint_dir)
-        checkpoint_path = os.path.join(
-            checkpoint_dir,
-            f'checkpoint_{time.strftime("%Y%m%d-%H%M%S")}.ckpt',
-        )
-        # Save model state to checkpoint_path
-        self.log_event(f"Model checkpoint saved at {checkpoint_path}")
-
-    def load_checkpoint(self, checkpoint_path: str):
-        """Load the model state from a checkpoint."""
-        # This is a placeholder for actual loading logic.
-        # Load model state from checkpoint_path
-        self.log_event(f"Model state loaded from {checkpoint_path}")
-
-    def toggle_creative_mode(self, enable: bool):
-        """Toggle creative mode for the model."""
-        self.creative_mode = enable
-        self.log_event(
-            f"Creative mode {'enabled' if enable else 'disabled'}."
-        )
-
-    def track_resource_utilization(self):
-        """Track and report resource utilization."""
-        # This is a placeholder for actual tracking logic.
-        # Logic to track CPU, memory, etc.
-        utilization_report = "Resource utilization report here"
-        return utilization_report
-
-    def get_generation_time(self) -> float:
-        """Get generation time"""
-        if self.start_time and self.end_time:
-            return self.end_time - self.start_time
-        return 0
-
-    def set_max_length(self, max_length: int):
-        """Set max length
-
-        Args:
-            max_length (int): _description_
-        """
-        self.max_length = max_length
-
-    def set_model_name(self, model_name: str):
-        """Set model name
-
-        Args:
-            model_name (str): _description_
-        """
-        self.model_name = model_name
-
-    def set_frequency_penalty(self, frequency_penalty: float):
-        """Set frequency penalty
-
-        Args:
-            frequency_penalty (float): _description_
-        """
-        self.frequency_penalty = frequency_penalty
-
-    def set_presence_penalty(self, presence_penalty: float):
-        """Set presence penalty
-
-        Args:
-            presence_penalty (float): _description_
-        """
-        self.presence_penalty = presence_penalty
-
-    def set_stop_token(self, stop_token: str):
-        """Set stop token
-
-        Args:
-            stop_token (str): _description_
-        """
-        self.stop_token = stop_token
-
-    def set_length_penalty(self, length_penalty: float):
-        """Set length penalty
-
-        Args:
-            length_penalty (float): _description_
-        """
-        self.length_penalty = length_penalty
-
-    def set_role(self, role: str):
-        """Set role
-
-        Args:
-            role (str): _description_
-        """
-        self.role = role
-
-    def set_top_k(self, top_k: int):
-        """Set top k
-
-        Args:
-            top_k (int): _description_
-        """
-        self.top_k = top_k
-
-    def set_top_p(self, top_p: float):
-        """Set top p
-
-        Args:
-            top_p (float): _description_
-        """
-        self.top_p = top_p
-
-    def set_num_beams(self, num_beams: int):
-        """Set num beams
-
-        Args:
-            num_beams (int): _description_
-        """
-        self.num_beams = num_beams
-
-    def set_do_sample(self, do_sample: bool):
-        """set do sample
-
-
-        Args:
-            do_sample (bool): _description_
-        """
-        self.do_sample = do_sample
-
-    def set_early_stopping(self, early_stopping: bool):
-        """set early stopping
-
-        Args:
-            early_stopping (bool): _description_
-        """
-        self.early_stopping = early_stopping
-
-    def set_seed(self, seed: int):
-        """Set seed
-
-        Args:
-            seed ([type]): [description]
-        """
-        self.seed = seed
-
-    def set_device(self, device: str):
-        """Set device
-
-        Args:
-            device (str): _description_
-        """
-        self.device = device
-
-    def metrics(self) -> str:
-        """
-        Metrics
-
-        Returns:
-            str: _description_
-        """
-        _sec_to_first_token = self._sec_to_first_token()
-        _tokens_per_second = self._tokens_per_second()
-        _num_tokens = self._num_tokens(self.history)
-        _time_for_generation = self._time_for_generation(self.history)
-
-        return f"""
-        SEC TO FIRST TOKEN: {_sec_to_first_token}
-        TOKENS/SEC: {_tokens_per_second}
-        TOKENS: {_num_tokens}
-        Tokens/SEC: {_time_for_generation}
-        """
-
-    def time_to_first_token(self, prompt: str) -> float:
-        """Time to first token
-
-        Args:
-            prompt (str): _description_
-
-        Returns:
-            float: _description_
-        """
-        start_time = time.time()
-        self.track_resource_utilization(
-            prompt
-        )  # assuming `generate` is a method that generates tokens
-        first_token_time = time.time()
-        return first_token_time - start_time
-
-    def generation_latency(self, prompt: str) -> float:
-        """generation latency
-
-        Args:
-            prompt (str): _description_
-
-        Returns:
-            float: _description_
-        """
-        start_time = time.time()
-        self.run(prompt)
-        end_time = time.time()
-        return end_time - start_time
-
-    def throughput(self, prompts: List[str]) -> float:
-        """throughput
-
-        Args:
-            prompts (): _description_
-
-        Returns:
-            float: _description_
-        """
-        start_time = time.time()
-        for prompt in prompts:
-            self.run(prompt)
-        end_time = time.time()
-        return len(prompts) / (end_time - start_time)
diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py
deleted file mode 100644
index 96b63002..00000000
--- a/swarms/models/base_multimodal_model.py
+++ /dev/null
@@ -1,329 +0,0 @@
-import asyncio
-import base64
-import concurrent.futures
-import time
-from abc import abstractmethod
-from concurrent.futures import ThreadPoolExecutor
-from io import BytesIO
-from typing import List, Optional, Tuple
-from swarms.structs.base_structure import BaseStructure
-import requests
-from PIL import Image
-from termcolor import colored
-
-
-class BaseMultiModalModel(BaseStructure):
-    """
-    Base class for multimodal models
-
-
-    Args:
-        model_name (Optional[str], optional): Model name. Defaults to None.
-        temperature (Optional[int], optional): Temperature. Defaults to 0.5.
-        max_tokens (Optional[int], optional): Max tokens. Defaults to 500.
-        max_workers (Optional[int], optional): Max workers. Defaults to 10.
-        top_p (Optional[int], optional): Top p. Defaults to 1.
-        top_k (Optional[int], optional): Top k. Defaults to 50.
-        beautify (Optional[bool], optional): Beautify. Defaults to False.
-        device (Optional[str], optional): Device. Defaults to "cuda".
-        max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500.
-        retries (Optional[int], optional): Retries. Defaults to 3.
-
-    Examples:
-        >>> from swarms.models.base_multimodal_model import BaseMultiModalModel
-        >>> model = BaseMultiModalModel()
-        >>> model.run("Generate a summary of this text")
-        >>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
-        >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
-        >>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
-        >>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"])
-        >>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
-        >>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"])
-        >>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
-        >>> model.generate_summary("Generate a summary of this text")
-        >>> model.set_temperature(0.5)
-        >>> model.set_max_tokens(500)
-        >>> model.get_generation_time()
-        >>> model.get_chat_history()
-        >>> model.get_unique_chat_history()
-        >>> model.get_chat_history_length()
-        >>> model.get_unique_chat_history_length()
-        >>> model.get_chat_history_tokens()
-        >>> model.print_beautiful("Print this beautifully")
-        >>> model.stream("Stream this")
-        >>> model.unique_chat_history()
-        >>> model.clear_chat_history()
-        >>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/")
-
-    """
-
-    def __init__(
-        self,
-        model_name: Optional[str] = None,
-        temperature: Optional[int] = 0.5,
-        max_tokens: Optional[int] = 500,
-        max_workers: Optional[int] = 10,
-        top_p: Optional[int] = 1,
-        top_k: Optional[int] = 50,
-        beautify: Optional[bool] = False,
-        device: Optional[str] = "cuda",
-        max_new_tokens: Optional[int] = 500,
-        retries: Optional[int] = 3,
-        system_prompt: Optional[str] = None,
-        meta_prompt: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.max_workers = max_workers
-        self.top_p = top_p
-        self.top_k = top_k
-        self.beautify = beautify
-        self.device = device
-        self.max_new_tokens = max_new_tokens
-        self.retries = retries
-        self.system_prompt = system_prompt
-        self.meta_prompt = meta_prompt
-        self.chat_history = []
-
-    @abstractmethod
-    def run(
-        self,
-        task: Optional[str] = None,
-        img: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        """Run the model"""
-
-    def __call__(
-        self,
-        task: Optional[str] = None,
-        img: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        """Call the model
-
-        Args:
-            task (str): _description_
-            img (str): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        return self.run(task, img, *args, **kwargs)
-
-    async def arun(self, task: str, img: str, *args, **kwargs):
-        """Run the model asynchronously"""
-
-    def get_img_from_web(self, img: str, *args, **kwargs):
-        """Get the image from the web"""
-        try:
-            response = requests.get(img)
-            response.raise_for_status()
-            image_pil = Image.open(BytesIO(response.content))
-            return image_pil
-        except requests.RequestException as error:
-            print(
-                f"Error fetching image from {img} and error: {error}"
-            )
-            return None
-
-    def encode_img(self, img: str):
-        """Encode the image to base64"""
-        with open(img, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-
-    def get_img(self, img: str):
-        """Get the image from the path"""
-        image_pil = Image.open(img)
-        return image_pil
-
-    def clear_chat_history(self):
-        """Clear the chat history"""
-        self.chat_history = []
-
-    def run_many(
-        self, tasks: List[str], imgs: List[str], *args, **kwargs
-    ):
-        """
-        Run the model on multiple tasks and images all at once using concurrent
-
-        Args:
-            tasks (List[str]): List of tasks
-            imgs (List[str]): List of image paths
-
-        Returns:
-            List[str]: List of responses
-
-
-        """
-        # Instantiate the thread pool executor
-        with ThreadPoolExecutor(
-            max_workers=self.max_workers
-        ) as executor:
-            results = executor.map(self.run, tasks, imgs)
-
-        # Print the results for debugging
-        for result in results:
-            print(result)
-
-    def run_batch(
-        self, tasks_images: List[Tuple[str, str]]
-    ) -> List[str]:
-        """Process a batch of tasks and images"""
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            futures = [
-                executor.submit(self.run, task, img)
-                for task, img in tasks_images
-            ]
-            results = [future.result() for future in futures]
-        return results
-
-    async def run_batch_async(
-        self, tasks_images: List[Tuple[str, str]]
-    ) -> List[str]:
-        """Process a batch of tasks and images asynchronously"""
-        loop = asyncio.get_event_loop()
-        futures = [
-            loop.run_in_executor(None, self.run, task, img)
-            for task, img in tasks_images
-        ]
-        return await asyncio.gather(*futures)
-
-    async def run_batch_async_with_retries(
-        self, tasks_images: List[Tuple[str, str]]
-    ) -> List[str]:
-        """Process a batch of tasks and images asynchronously with retries"""
-        loop = asyncio.get_event_loop()
-        futures = [
-            loop.run_in_executor(
-                None, self.run_with_retries, task, img
-            )
-            for task, img in tasks_images
-        ]
-        return await asyncio.gather(*futures)
-
-    def unique_chat_history(self):
-        """Get the unique chat history"""
-        return list(set(self.chat_history))
-
-    def run_with_retries(self, task: str, img: str):
-        """Run the model with retries"""
-        for i in range(self.retries):
-            try:
-                return self.run(task, img)
-            except Exception as error:
-                print(f"Error with the request {error}")
-                continue
-
-    def run_batch_with_retries(
-        self, tasks_images: List[Tuple[str, str]]
-    ):
-        """Run the model with retries"""
-        for i in range(self.retries):
-            try:
-                return self.run_batch(tasks_images)
-            except Exception as error:
-                print(f"Error with the request {error}")
-                continue
-
-    def _tokens_per_second(self) -> float:
-        """Tokens per second"""
-        elapsed_time = self.end_time - self.start_time
-        if elapsed_time == 0:
-            return float("inf")
-        return self._num_tokens() / elapsed_time
-
-    def _time_for_generation(self, task: str) -> float:
-        """Time for Generation"""
-        self.start_time = time.time()
-        self.run(task)
-        self.end_time = time.time()
-        return self.end_time - self.start_time
-
-    @abstractmethod
-    def generate_summary(self, text: str) -> str:
-        """Generate Summary"""
-
-    def set_temperature(self, value: float):
-        """Set Temperature"""
-        self.temperature = value
-
-    def set_max_tokens(self, value: int):
-        """Set new max tokens"""
-        self.max_tokens = value
-
-    def get_generation_time(self) -> float:
-        """Get generation time"""
-        if self.start_time and self.end_time:
-            return self.end_time - self.start_time
-        return 0
-
-    def get_chat_history(self):
-        """Get the chat history"""
-        return self.chat_history
-
-    def get_unique_chat_history(self):
-        """Get the unique chat history"""
-        return list(set(self.chat_history))
-
-    def get_chat_history_length(self):
-        """Get the chat history length"""
-        return len(self.chat_history)
-
-    def get_unique_chat_history_length(self):
-        """Get the unique chat history length"""
-        return len(list(set(self.chat_history)))
-
-    def get_chat_history_tokens(self):
-        """Get the chat history tokens"""
-        return self._num_tokens()
-
-    def print_beautiful(self, content: str, color: str = "cyan"):
-        """Print Beautifully with termcolor"""
-        content = colored(content, color)
-        print(content)
-
-    def stream_response(self, text: str):
-        """Stream the output
-
-        Args:
-            content (str): _description_
-        """
-        for chunk in text:
-            print(chunk)
-
-    def meta_prompt(self):
-        """Meta Prompt
-
-        Returns:
-            _type_: _description_
-        """
-        META_PROMPT = """
-        For any labels or markings on an image that you reference in your response, please 
-        enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for 
-        example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be 
-        numbers or letters and typically correspond to specific segments or parts of the image.
-        """
-        return META_PROMPT
-
-    def set_device(self, device):
-        """
-        Changes the device used for inference.
-
-        Parameters
-        ----------
-            device : str
-                The new device to use for inference.
-        """
-        self.device = device
-        self.model.to(self.device)
-
-    def set_max_length(self, max_length):
-        """Set max_length"""
-        self.max_length = max_length
diff --git a/swarms/models/base_tts.py b/swarms/models/base_tts.py
deleted file mode 100644
index a92a3bb7..00000000
--- a/swarms/models/base_tts.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import wave
-from abc import abstractmethod
-from typing import Optional
-
-from swarms.models.base_llm import BaseLLM
-
-
-class BaseTTSModel(BaseLLM):
-    """Base class for all TTS models.
-
-    Args:
-        BaseLLM (_type_): _description_
-        model_name (_type_): _description_
-        voice (_type_): _description_
-        chunk_size (_type_): _description_
-        save_to_file (bool, optional): _description_. Defaults to False.
-        saved_filepath (Optional[str], optional): _description_. Defaults to None.
-
-    Raises:
-        NotImplementedError: _description_
-
-    Methods:
-        save: save the model to a file.
-        load: load the model from a file.
-        run: run the model on the given task.
-        __call__: call the model on the given task.
-        save_to_file: save the speech data to a file.
-
-    """
-
-    def __init__(
-        self,
-        model_name,
-        voice,
-        chunk_size,
-        save_to_file: bool = False,
-        saved_filepath: Optional[str] = None,
-    ):
-        self.model_name = model_name
-        self.voice = voice
-        self.chunk_size = chunk_size
-        self.save_to_file = save_to_file
-        self.saved_filepath = saved_filepath
-
-    def save(self, filepath: Optional[str] = None):
-        """Save the model to a file.
-
-        Args:
-            filepath (Optional[str], optional): _description_. Defaults to None.
-        """
-
-    def load(self, filepath: Optional[str] = None):
-        """Load the model from a file.
-
-        Args:
-            filepath (Optional[str], optional): _description_. Defaults to None.
-        """
-
-    @abstractmethod
-    def run(self, task: str, *args, **kwargs):
-        """Run the model on the given task.
-
-        Args:
-            task (str): _description_
-        """
-
-    def __call__(self, task: str, *args, **kwargs):
-        """Call the model on the given task.
-
-        Args:
-            task (str): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        return self.run(task, *args, **kwargs)
-
-    def save_to_file(self, speech_data, filename):
-        """Save the speech data to a file.
-
-        Args:
-            speech_data (bytes): The speech data.
-            filename (str): The path to the file where the speech will be saved.
-        """
-        with wave.open(filename, "wb") as file:
-            file.setnchannels(1)
-            file.setsampwidth(2)
-            file.setframerate(22050)
-            file.writeframes(speech_data)
diff --git a/swarms/models/base_ttv.py b/swarms/models/base_ttv.py
deleted file mode 100644
index 00052ba5..00000000
--- a/swarms/models/base_ttv.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import asyncio
-from abc import abstractmethod
-from concurrent.futures import ThreadPoolExecutor
-from typing import List, Optional
-
-from diffusers.utils import export_to_video
-
-from swarms.models.base_llm import BaseLLM
-
-
-class BaseTextToVideo(BaseLLM):
-    """BaseTextToVideo class represents prebuilt text-to-video models."""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    @abstractmethod
-    def run(self, *args, **kwargs):
-        pass
-
-    def __call__(
-        self,
-        task: Optional[str] = None,
-        img: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        """
-        Performs forward pass on the input task and returns the path of the generated video.
-
-        Args:
-            task (str): The task to perform.
-
-        Returns:
-            str: The path of the generated video.
-        """
-        return self.run(task, img, *args, **kwargs)
-
-    def save_video_path(
-        self, video_path: Optional[str] = None, *args, **kwargs
-    ):
-        """Saves the generated video to the specified path.
-
-        Args:
-            video_path (Optional[str], optional): _description_. Defaults to None.
-
-        Returns:
-         str: The path of the generated video.
-        """
-        return export_to_video(video_path, *args, **kwargs)
-
-    def run_batched(
-        self,
-        tasks: List[str] = None,
-        imgs: List[str] = None,
-        *args,
-        **kwargs,
-    ):
-        # TODO: Implement batched inference
-        tasks = tasks or []
-        imgs = imgs or []
-        if len(tasks) != len(imgs):
-            raise ValueError(
-                "The number of tasks and images should be the same."
-            )
-        return [
-            self.run(task, img, *args, **kwargs)
-            for task, img in zip(tasks, imgs)
-        ]
-
-    def run_concurrent_batched(
-        self,
-        tasks: List[str] = None,
-        imgs: List[str] = None,
-        *args,
-        **kwargs,
-    ):
-        tasks = tasks or []
-        imgs = imgs or []
-        if len(tasks) != len(imgs):
-            raise ValueError(
-                "The number of tasks and images should be the same."
-            )
-        with ThreadPoolExecutor(max_workers=4) as executor:
-            loop = asyncio.get_event_loop()
-            tasks = [
-                loop.run_in_executor(
-                    executor, self.run, task, img, *args, **kwargs
-                )
-                for task, img in zip(tasks, imgs)
-            ]
-            return loop.run_until_complete(asyncio.gather(*tasks))
-
-    # Run the model in async mode
-    def arun(
-        self,
-        task: Optional[str] = None,
-        img: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        loop = asyncio.get_event_loop()
-        return loop.run_until_complete(
-            self.run(task, img, *args, **kwargs)
-        )
-
-    def arun_batched(
-        self,
-        tasks: List[str] = None,
-        imgs: List[str] = None,
-        *args,
-        **kwargs,
-    ):
-        loop = asyncio.get_event_loop()
-        return loop.run_until_complete(
-            self.run_batched(tasks, imgs, *args, **kwargs)
-        )
diff --git a/swarms/models/cog_vlm.py b/swarms/models/cog_vlm.py
deleted file mode 100644
index ff78936c..00000000
--- a/swarms/models/cog_vlm.py
+++ /dev/null
@@ -1,528 +0,0 @@
-import base64
-import os
-import time
-from io import BytesIO
-from typing import List, Literal, Optional, Tuple, Union
-
-import torch
-from PIL import Image
-from pydantic import BaseModel, Field
-from transformers import (
-    AutoModelForCausalLM,
-    LlamaTokenizer,
-    TextIteratorStreamer,
-)
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-from swarms.utils.loguru_logger import logger
-
-MODEL_PATH = "THUDM/cogvlm-chat-hf"
-TOKENIZER_PATH = "lmsys/vicuna-7b-v1.5"
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-QUANT_ENABLED = False
-
-
-class ImageUrl(BaseModel):
-    url: str
-
-
-class TextContent(BaseModel):
-    type: Literal["text"]
-    text: str
-
-
-class ImageUrlContent(BaseModel):
-    type: Literal["image_url"]
-    image_url: ImageUrl
-
-
-ContentItem = Union[TextContent, ImageUrlContent]
-
-
-class ChatMessageInput(BaseModel):
-    role: Literal["user", "assistant", "system"]
-    content: Union[str, List[ContentItem]]
-    name: Optional[str] = None
-
-
-class ChatMessageResponse(BaseModel):
-    role: Literal["assistant"]
-    content: str = None
-    name: Optional[str] = None
-
-
-class DeltaMessage(BaseModel):
-    role: Optional[Literal["user", "assistant", "system"]] = None
-    content: Optional[str] = None
-
-
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: List[ChatMessageInput]
-    temperature: Optional[float] = 0.8
-    top_p: Optional[float] = 0.8
-    max_tokens: Optional[int] = None
-    stream: Optional[bool] = False
-    # Additional parameters
-    repetition_penalty: Optional[float] = 1.0
-
-
-class ChatCompletionResponseChoice(BaseModel):
-    index: int
-    message: ChatMessageResponse
-
-
-class ChatCompletionResponseStreamChoice(BaseModel):
-    index: int
-    delta: DeltaMessage
-
-
-class UsageInfo(BaseModel):
-    prompt_tokens: int = 0
-    total_tokens: int = 0
-    completion_tokens: Optional[int] = 0
-
-
-class ChatCompletionResponse(BaseModel):
-    model: str
-    object: Literal["chat.completion", "chat.completion.chunk"]
-    choices: List[
-        Union[
-            ChatCompletionResponseChoice,
-            ChatCompletionResponseStreamChoice,
-        ]
-    ]
-    created: Optional[int] = Field(
-        default_factory=lambda: int(time.time())
-    )
-    usage: Optional[UsageInfo] = None
-
-
-# async def create_chat_completion(request: ChatCompletionRequest):
-#     global model, tokenizer
-
-#     gen_params = dict(
-#         messages=request.messages,
-#         temperature=request.temperature,
-#         top_p=request.top_p,
-#         max_tokens=request.max_tokens or 1024,
-#         echo=False,
-#         stream=request.stream,
-#     )
-
-#     # if request.stream:
-#         # predict(request.model, gen_params)
-#     # response = generate_cogvlm(model, tokenizer, gen_params)
-
-#     usage = UsageInfo()
-
-#     message = ChatMessageResponse(
-#         role="assistant",
-#         content=response["text"],
-#     )
-#     logger.debug(f"==== message ====\n{message}")
-#     choice_data = ChatCompletionResponseChoice(
-#         index=0,
-#         message=message,
-#     )
-#     task_usage = UsageInfo.model_validate(response["usage"])
-#     for usage_key, usage_value in task_usage.model_dump().items():
-#         setattr(
-#             usage, usage_key, getattr(usage, usage_key) + usage_value
-#         )
-#     return ChatCompletionResponse(
-#         model=request.model,
-#         choices=[choice_data],
-#         object="chat.completion",
-#         usage=usage,
-#     )
-
-
-class CogVLMMultiModal(BaseMultiModalModel):
-    """
-    Initializes the CogVLM model.
-
-    Args:
-        model_name (str): The path or name of the pre-trained model.
-        tokenizer (str): The path or name of the tokenizer.
-        device (str): The device to run the model on.
-        quantize (bool): Whether to enable quantization.
-        torch_type (str): The torch data type to use.
-        temperature (float): The temperature for sampling.
-        top_p (float): The top-p value for sampling.
-        max_tokens (int): The maximum number of tokens to generate.
-        echo (bool): Whether to echo the input text.
-        stream (bool): Whether to stream the output.
-        repetition_penalty (float): The repetition penalty for sampling.
-        do_sample (bool): Whether to use sampling during generation.
-        *args: Additional positional arguments.
-        **kwargs: Additional keyword arguments.
-
-    Methods:
-        run: Generates a response using the CogVLM model.
-        generate_stream_cogvlm: Generates a stream of responses using the CogVLM model in inference mode.
-        process_history_and_images: Processes history messages to extract text, identify the last user query, and convert base64 encoded image URLs to PIL images.
-
-    Example:
-    >>> model = CogVLMMultiModal()
-    >>> response = model("Describe this image with meticlous details.", "https://example.com/image.jpg")
-    >>> print(response)
-    """
-
-    def __init__(
-        self,
-        model_name: str = MODEL_PATH,
-        tokenizer: str = TOKENIZER_PATH,
-        device: str = DEVICE,
-        quantize: bool = QUANT_ENABLED,
-        torch_type: str = "float16",
-        temperature: float = 0.5,
-        top_p: float = 0.9,
-        max_tokens: int = 3500,
-        echo: bool = False,
-        stream: bool = False,
-        repetition_penalty: float = 1.0,
-        do_sample: bool = True,
-        *args,
-        **kwargs,
-    ):
-        super().__init__()
-        self.model_name = model_name
-        self.device = device
-        self.tokenizer = tokenizer
-        self.device = device
-        self.quantize = quantize
-        self.torch_type = torch_type
-        self.temperature = temperature
-        self.top_p = top_p
-        self.max_tokens = max_tokens
-        self.echo = echo
-        self.stream = stream
-        self.repetition_penalty = repetition_penalty
-        self.do_sample = do_sample
-
-        if os.environ.get("QUANT_ENABLED"):
-            pass
-        else:
-            with torch.cuda.device(device):
-                __, total_bytes = torch.cuda.mem_get_info()
-                total_gb = total_bytes / (1 << 30)
-                if total_gb < 40:
-                    pass
-
-        torch.cuda.empty_cache()
-
-        self.tokenizer = LlamaTokenizer.from_pretrained(
-            tokenizer, trust_remote_code=True
-        )
-
-        if (
-            torch.cuda.is_available()
-            and torch.cuda.get_device_capability()[0] >= 8
-        ):
-            torch_type = torch.bfloat16
-        else:
-            torch_type = torch.float16
-
-        print(
-            f"========Use torch type as:{torch_type} with"
-            f" device:{device}========\n\n"
-        )
-
-        if "cuda" in device:
-            if QUANT_ENABLED:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    load_in_4bit=True,
-                    trust_remote_code=True,
-                    torch_dtype=torch_type,
-                    low_cpu_mem_usage=True,
-                    *args,
-                    **kwargs,
-                ).eval()
-            else:
-                self.model = (
-                    AutoModelForCausalLM.from_pretrained(
-                        model_name,
-                        load_in_4bit=False,
-                        trust_remote_code=True,
-                        torch_dtype=torch_type,
-                        low_cpu_mem_usage=True,
-                        *args,
-                        **kwargs,
-                    )
-                    .to(device)
-                    .eval()
-                )
-
-        else:
-            self.model = (
-                AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    trust_remote_code=True,
-                    *args,
-                    **kwargs,
-                )
-                .float()
-                .to(device)
-                .eval()
-            )
-
-    def run(self, task: str, img: str, *args, **kwargs):
-        """
-        Generates a response using the CogVLM model. It processes the chat history and image data, if any,
-        and then invokes the model to generate a response.
-        """
-        messages = [task]
-
-        params = dict(
-            messages=messages,
-            temperature=self.temperature,
-            repitition_penalty=self.repetition_penalty,
-            top_p=self.top_p,
-            max_new_tokens=self.max_tokens,
-        )
-
-        for response in self.generate_stream_cogvlm(params):
-            pass
-
-        return response
-
-    @torch.inference_mode()
-    def generate_stream_cogvlm(
-        self,
-        params: dict,
-    ):
-        """
-        Generates a stream of responses using the CogVLM model in inference mode.
-        It's optimized to handle continuous input-output interactions with the model in a streaming manner.
-        """
-        messages = params["messages"]
-        temperature = float(params.get("temperature", 1.0))
-        repetition_penalty = float(
-            params.get("repetition_penalty", 1.0)
-        )
-        top_p = float(params.get("top_p", 1.0))
-        max_new_tokens = int(params.get("max_tokens", 256))
-        query, history, image_list = self.process_history_and_images(
-            messages
-        )
-
-        logger.debug(f"==== request ====\n{query}")
-
-        input_by_model = self.model.build_conversation_input_ids(
-            self.tokenizer,
-            query=query,
-            history=history,
-            images=[image_list[-1]],
-        )
-        inputs = {
-            "input_ids": (
-                input_by_model["input_ids"]
-                .unsqueeze(0)
-                .to(self.device)
-            ),
-            "token_type_ids": (
-                input_by_model["token_type_ids"]
-                .unsqueeze(0)
-                .to(self.device)
-            ),
-            "attention_mask": (
-                input_by_model["attention_mask"]
-                .unsqueeze(0)
-                .to(self.device)
-            ),
-            "images": [
-                [
-                    input_by_model["images"][0]
-                    .to(self.device)
-                    .to(self.torch_type)
-                ]
-            ],
-        }
-        if (
-            "cross_images" in input_by_model
-            and input_by_model["cross_images"]
-        ):
-            inputs["cross_images"] = [
-                [
-                    input_by_model["cross_images"][0]
-                    .to(self.device)
-                    .to(self.torch_type)
-                ]
-            ]
-
-        input_echo_len = len(inputs["input_ids"][0])
-        streamer = TextIteratorStreamer(
-            tokenizer=self.tokenizer,
-            timeout=60.0,
-            skip_promptb=True,
-            skip_special_tokens=True,
-        )
-        gen_kwargs = {
-            "repetition_penalty": repetition_penalty,
-            "max_new_tokens": max_new_tokens,
-            "do_sample": True if temperature > 1e-5 else False,
-            "top_p": top_p if temperature > 1e-5 else 0,
-            "streamer": streamer,
-        }
-        if temperature > 1e-5:
-            gen_kwargs["temperature"] = temperature
-
-        total_len = 0
-        generated_text = ""
-        with torch.no_grad():
-            self.model.generate(**inputs, **gen_kwargs)
-            for next_text in streamer:
-                generated_text += next_text
-                yield {
-                    "text": generated_text,
-                    "usage": {
-                        "prompt_tokens": input_echo_len,
-                        "completion_tokens": (
-                            total_len - input_echo_len
-                        ),
-                        "total_tokens": total_len,
-                    },
-                }
-        ret = {
-            "text": generated_text,
-            "usage": {
-                "prompt_tokens": input_echo_len,
-                "completion_tokens": total_len - input_echo_len,
-                "total_tokens": total_len,
-            },
-        }
-        yield ret
-
-    def process_history_and_images(
-        self,
-        messages: List[ChatMessageInput],
-    ) -> Tuple[
-        Optional[str],
-        Optional[List[Tuple[str, str]]],
-        Optional[List[Image.Image]],
-    ]:
-        """
-        Process history messages to extract text, identify the last user query,
-        and convert base64 encoded image URLs to PIL images.
-
-        Args:
-            messages(List[ChatMessageInput]): List of ChatMessageInput objects.
-        return: A tuple of three elements:
-                - The last user query as a string.
-                - Text history formatted as a list of tuples for the model.
-                - List of PIL Image objects extracted from the messages.
-        """
-        formatted_history = []
-        image_list = []
-        last_user_query = ""
-
-        for i, message in enumerate(messages):
-            role = message.role
-            content = message.content
-
-            # Extract text content
-            if isinstance(content, list):  # text
-                text_content = " ".join(
-                    item.text
-                    for item in content
-                    if isinstance(item, TextContent)
-                )
-            else:
-                text_content = content
-
-            # Extract image data
-            if isinstance(content, list):  # image
-                for item in content:
-                    if isinstance(item, ImageUrlContent):
-                        image_url = item.image_url.url
-                        if image_url.startswith(
-                            "data:image/jpeg;base64,"
-                        ):
-                            base64_encoded_image = image_url.split(
-                                "data:image/jpeg;base64,"
-                            )[1]
-                            image_data = base64.b64decode(
-                                base64_encoded_image
-                            )
-                            image = Image.open(
-                                BytesIO(image_data)
-                            ).convert("RGB")
-                            image_list.append(image)
-
-            # Format history
-            if role == "user":
-                if i == len(messages) - 1:
-                    last_user_query = text_content
-                else:
-                    formatted_history.append((text_content, ""))
-            elif role == "assistant":
-                if formatted_history:
-                    if formatted_history[-1][1] != "":
-                        raise AssertionError(
-                            "the last query is answered. answer"
-                            f" again. {formatted_history[-1][0]},"
-                            f" {formatted_history[-1][1]},"
-                            f" {text_content}"
-                        )
-                    formatted_history[-1] = (
-                        formatted_history[-1][0],
-                        text_content,
-                    )
-                else:
-                    raise AssertionError(
-                        "assistant reply before user"
-                    )
-            else:
-                raise AssertionError(f"unrecognized role: {role}")
-
-        return last_user_query, formatted_history, image_list
-
-    async def predict(self, params: dict):
-        """
-        Handle streaming predictions. It continuously generates responses for a given input stream.
-        This is particularly useful for real-time, continuous interactions with the model.
-        """
-
-        choice_data = ChatCompletionResponseStreamChoice(
-            index=0,
-            delta=DeltaMessage(role="assistant"),
-            finish_reason=None,
-        )
-        chunk = ChatCompletionResponse(
-            model=self.model_name,
-            choices=[choice_data],
-            object="chat.completion.chunk",
-        )
-        yield f"{chunk.model_dump_json(exclude_unset=True)}"
-
-        previous_text = ""
-        for new_response in self.generate_stream_cogvlm(params):
-            decoded_unicode = new_response["text"]
-            delta_text = decoded_unicode[len(previous_text) :]
-            previous_text = decoded_unicode
-            delta = DeltaMessage(
-                content=delta_text,
-                role="assistant",
-            )
-            choice_data = ChatCompletionResponseStreamChoice(
-                index=0,
-                delta=delta,
-            )
-            chunk = ChatCompletionResponse(
-                model=self.model_name,
-                choices=[choice_data],
-                object="chat.completion.chunk",
-            )
-            yield f"{chunk.model_dump_json(exclude_unset=True)}"
-        choice_data = ChatCompletionResponseStreamChoice(
-            index=0,
-            delta=DeltaMessage(),
-        )
-        chunk = ChatCompletionResponse(
-            model=self.model_name,
-            choices=[choice_data],
-            object="chat.completion.chunk",
-        )
-        yield f"{chunk.model_dump_json(exclude_unset=True)}"
diff --git a/swarms/models/dalle3.py b/swarms/models/dalle3.py
deleted file mode 100644
index 0e02c3d6..00000000
--- a/swarms/models/dalle3.py
+++ /dev/null
@@ -1,367 +0,0 @@
-import concurrent.futures
-import logging
-import os
-import uuid
-from dataclasses import dataclass
-from io import BytesIO
-from typing import List
-
-import backoff
-import openai
-import requests
-from cachetools import TTLCache
-from dotenv import load_dotenv
-from openai import OpenAI
-from PIL import Image
-from pydantic import field_validator
-from termcolor import colored
-
-load_dotenv()
-
-# Configure Logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-def handle_errors(self, function):
-    def wrapper(*args, **kwargs):
-        try:
-            return function(*args, **kwargs)
-        except Exception as error:
-            logger.error(error)
-            raise
-
-    return wrapper
-
-
-@dataclass
-class Dalle3:
-    """
-    Dalle3 model class
-
-    Attributes:
-    -----------
-    image_url: str
-        The image url generated by the Dalle3 API
-
-    Methods:
-    --------
-    __call__(self, task: str) -> Dalle3:
-        Makes a call to the Dalle3 API and returns the image url
-
-    Example:
-    --------
-    >>> dalle3 = Dalle3()
-    >>> task = "A painting of a dog"
-    >>> image_url = dalle3(task)
-    >>> print(image_url)
-    https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png
-
-    """
-
-    model: str = "dall-e-3"
-    img: str = None
-    size: str = "1024x1024"
-    max_retries: int = 3
-    quality: str = "standard"
-    openai_api_key: str = None or os.getenv("OPENAI_API_KEY")
-    n: int = 1
-    save_path: str = "images"
-    max_time_seconds: int = 60
-    save_folder: str = "images"
-    image_format: str = "png"
-    client = OpenAI(
-        api_key=openai_api_key,
-    )
-    cache = TTLCache(maxsize=100, ttl=3600)
-    dashboard: bool = False
-
-    def __post_init__(self):
-        """Post init method"""
-        if self.openai_api_key is None:
-            raise ValueError("Please provide an openai api key")
-        if self.img is not None:
-            self.img = self.convert_to_bytesio(self.img)
-
-        os.makedirs(self.save_path, exist_ok=True)
-
-    class Config:
-        """Config class for the Dalle3 model"""
-
-        arbitrary_types_allowed = True
-
-    @field_validator("max_retries", "time_seconds")
-    @classmethod
-    def must_be_positive(cls, value):
-        if value <= 0:
-            raise ValueError("Must be positive")
-        return value
-
-    def read_img(self, img: str):
-        """Read the image using pil"""
-        img = Image.open(img)
-        return img
-
-    def set_width_height(self, img: str, width: int, height: int):
-        """Set the width and height of the image"""
-        img = self.read_img(img)
-        img = img.resize((width, height))
-        return img
-
-    def convert_to_bytesio(self, img: str, format: str = "PNG"):
-        """Convert the image to an bytes io object"""
-        byte_stream = BytesIO()
-        img.save(byte_stream, format=format)
-        byte_array = byte_stream.getvalue()
-        return byte_array
-
-    @backoff.on_exception(
-        backoff.expo, Exception, max_time=max_time_seconds
-    )
-    def __call__(self, task: str):
-        """
-        Text to image conversion using the Dalle3 API
-
-        Parameters:
-        -----------
-        task: str
-            The task to be converted to an image
-
-        Returns:
-        --------
-        Dalle3:
-            An instance of the Dalle3 class with the image url generated by the Dalle3 API
-
-        Example:
-        --------
-        >>> dalle3 = Dalle3()
-        >>> task = "A painting of a dog"
-        >>> image_url = dalle3(task)
-        >>> print(image_url)
-        https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png
-        """
-        if self.dashboard:
-            self.print_dashboard()
-        if task in self.cache:
-            return self.cache[task]
-        try:
-            # Making a call to the the Dalle3 API
-            response = self.client.images.generate(
-                model=self.model,
-                prompt=task,
-                size=self.size,
-                quality=self.quality,
-                n=self.n,
-            )
-            # Extracting the image url from the response
-            img = response.data[0].url
-
-            filename = f"{self._generate_uuid()}.{self.image_format}"
-
-            # Download and save the image
-            self._download_image(img, filename)
-
-            img_path = os.path.join(self.save_path, filename)
-            self.cache[task] = img_path
-
-            return img_path
-        except openai.OpenAIError as error:
-            # Handling exceptions and printing the errors details
-            print(
-                colored(
-                    (
-                        f"Error running Dalle3: {error} try"
-                        " optimizing your api key and or try again"
-                    ),
-                    "red",
-                )
-            )
-            raise error
-
-    def _generate_image_name(self, task: str):
-        """Generate a sanitized file name based on the task"""
-        sanitized_task = "".join(
-            char for char in task if char.isalnum() or char in " _ -"
-        ).rstrip()
-        return f"{sanitized_task}.{self.image_format}"
-
-    def _download_image(self, img_url: str, filename: str):
-        """
-        Download the image from the given URL and save it to a specified filename within self.save_path.
-
-        Args:
-        img_url (str): URL of the image to download.
-        filename (str): Filename to save the image.
-        """
-        full_path = os.path.join(self.save_path, filename)
-        response = requests.get(img_url)
-        if response.status_code == 200:
-            with open(full_path, "wb") as file:
-                file.write(response.content)
-        else:
-            raise ValueError(
-                f"Failed to download image from {img_url}"
-            )
-
-    def create_variations(self, img: str):
-        """
-        Create variations of an image using the Dalle3 API
-
-        Parameters:
-        -----------
-        img: str
-            The image to be used for the API request
-
-        Returns:
-        --------
-        img: str
-            The image url generated by the Dalle3 API
-
-        Example:
-        --------
-        >>> dalle3 = Dalle3()
-        >>> img = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png"
-        >>> img = dalle3.create_variations(img)
-        >>> print(img)
-
-
-        """
-        try:
-            response = self.client.images.create_variation(
-                img=open(img, "rb"), n=self.n, size=self.size
-            )
-            img = response.data[0].url
-
-            return img
-        except (Exception, openai.OpenAIError) as error:
-            print(
-                colored(
-                    (
-                        f"Error running Dalle3: {error} try"
-                        " optimizing your api key and or try again"
-                    ),
-                    "red",
-                )
-            )
-            print(
-                colored(
-                    f"Error running Dalle3: {error.http_status}",
-                    "red",
-                )
-            )
-            print(
-                colored(f"Error running Dalle3: {error.error}", "red")
-            )
-            raise error
-
-    def print_dashboard(self):
-        """Print the Dalle3 dashboard"""
-        print(
-            colored(
-                f"""Dalle3 Dashboard: 
-                    --------------------
-
-                    Model: {self.model}
-                    Image: {self.img}
-                    Size: {self.size}
-                    Max Retries: {self.max_retries}
-                    Quality: {self.quality}
-                    N: {self.n}
-                    Save Path: {self.save_path}
-                    Time Seconds: {self.time_seconds}
-                    Save Folder: {self.save_folder}
-                    Image Format: {self.image_format}
-                    --------------------
-                    
-                    
-                    """,
-                "green",
-            )
-        )
-
-    def process_batch_concurrently(
-        self, tasks: List[str], max_workers: int = 5
-    ):
-        """
-
-        Process a batch of tasks concurrently
-
-        Args:
-        tasks (List[str]): A list of tasks to be processed
-        max_workers (int): The maximum number of workers to use for the concurrent processing
-
-        Returns:
-        --------
-        results (List[str]): A list of image urls generated by the Dalle3 API
-
-        Example:
-        --------
-        >>> dalle3 = Dalle3()
-        >>> tasks = ["A painting of a dog", "A painting of a cat"]
-        >>> results = dalle3.process_batch_concurrently(tasks)
-        >>> print(results)
-        ['https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png',
-
-        """
-        with concurrent.futures.ThreadPoolExecutor(
-            max_workers=max_workers
-        ) as executor:
-            future_to_task = {
-                executor.submit(self, task): task for task in tasks
-            }
-            results = []
-            for future in concurrent.futures.as_completed(
-                future_to_task
-            ):
-                task = future_to_task[future]
-                try:
-                    img = future.result()
-                    results.append(img)
-
-                    print(f"Task {task} completed: {img}")
-                except Exception as error:
-                    print(
-                        colored(
-                            (
-                                f"Error running Dalle3: {error} try"
-                                " optimizing your api key and or try"
-                                " again"
-                            ),
-                            "red",
-                        )
-                    )
-                    print(
-                        colored(
-                            (
-                                "Error running Dalle3:"
-                                f" {error.http_status}"
-                            ),
-                            "red",
-                        )
-                    )
-                    print(
-                        colored(
-                            f"Error running Dalle3: {error.error}",
-                            "red",
-                        )
-                    )
-                    raise error
-
-    def _generate_uuid(self):
-        """Generate a uuid"""
-        return str(uuid.uuid4())
-
-    def __repr__(self):
-        """Repr method for the Dalle3 class"""
-        return f"Dalle3(image_url={self.image_url})"
-
-    def __str__(self):
-        """Str method for the Dalle3 class"""
-        return f"Dalle3(image_url={self.image_url})"
-
-    @backoff.on_exception(
-        backoff.expo, Exception, max_tries=max_retries
-    )
-    def rate_limited_call(self, task: str):
-        """Rate limited call to the Dalle3 API"""
-        return self.__call__(task)
diff --git a/swarms/models/embeddings_base.py b/swarms/models/embeddings_base.py
deleted file mode 100644
index e91c415f..00000000
--- a/swarms/models/embeddings_base.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""Interface for embedding models."""
-
-from abc import ABC, abstractmethod
-from typing import List
-
-
-class Embeddings(ABC):
-    """Interface for embedding models."""
-
-    @abstractmethod
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        """Embed search docs."""
-
-    @abstractmethod
-    def embed_query(self, text: str) -> List[float]:
-        """Embed query text."""
-
-    async def aembed_documents(
-        self, texts: List[str]
-    ) -> List[List[float]]:
-        """Embed search docs."""
-        raise NotImplementedError
-
-    async def aembed_query(self, text: str) -> List[float]:
-        """Embed query text."""
-        raise NotImplementedError
diff --git a/swarms/models/fuyu.py b/swarms/models/fuyu.py
deleted file mode 100644
index e02e53a5..00000000
--- a/swarms/models/fuyu.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from PIL import Image
-from termcolor import colored
-from transformers import (
-    AutoTokenizer,
-    FuyuForCausalLM,
-    FuyuImageProcessor,
-    FuyuProcessor,
-)
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class Fuyu(BaseMultiModalModel):
-    """
-    Fuyu model by Adept
-
-
-    Args:
-        BaseMultiModalModel (BaseMultiModalModel): [description]
-        model_name (str, optional): [description]. Defaults to "adept/fuyu-8b".
-        device_map (str, optional): [description]. Defaults to "auto".
-        max_new_tokens (int, optional): [description]. Defaults to 500.
-        *args: [description]
-        **kwargs: [description]
-
-
-
-    Examples:
-    >>> from swarms.models import Fuyu
-    >>> model = Fuyu()
-    >>> model.run("Hello, world!", "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG")
-    """
-
-    def __init__(
-        self,
-        model_name: str = "adept/fuyu-8b",
-        device_map: str = "auto",
-        max_new_tokens: int = 500,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(model_name=model_name, *args, **kwargs)
-        self.model_name = model_name
-        self.device_map = device_map
-        self.max_new_tokens = max_new_tokens
-
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.image_processor = FuyuImageProcessor()
-        self.processor = FuyuProcessor(
-            image_processor=self.image_processor,
-            tokenizer=self.tokenizer,
-        )
-        self.model = FuyuForCausalLM.from_pretrained(
-            model_name,
-            device_map=device_map,
-            *args,
-            **kwargs,
-        )
-
-    def get_img(self, img: str):
-        """Get the image from the path"""
-        image_pil = Image.open(img)
-        return image_pil
-
-    def run(self, text: str = None, img: str = None, *args, **kwargs):
-        """Run the pipeline
-
-        Args:
-            text (str): _description_
-            img (str): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        try:
-            img = self.get_img(img)
-            model_inputs = self.processor(
-                text=text,
-                images=[img],
-                device=self.device_map,
-            )
-
-            for k, v in model_inputs.items():
-                model_inputs[k] = v.to(self.device_map)
-
-            output = self.model.generate(
-                max_new_tokens=self.max_new_tokens,
-                *args,
-                **model_inputs,
-                **kwargs,
-            )
-            text = self.processor.batch_decode(
-                output[:, -7:],
-                skip_special_tokens=True,
-            )
-            return print(str(text))
-        except Exception as error:
-            print(
-                colored(
-                    (
-                        "Error in"
-                        f" {self.__class__.__name__} pipeline:"
-                        f" {error}"
-                    ),
-                    "red",
-                )
-            )
diff --git a/swarms/models/gemini.py b/swarms/models/gemini.py
deleted file mode 100644
index fc7b4439..00000000
--- a/swarms/models/gemini.py
+++ /dev/null
@@ -1,277 +0,0 @@
-import os
-import subprocess as sp
-from pathlib import Path
-
-from dotenv import load_dotenv
-from PIL import Image
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-try:
-    import google.generativeai as genai
-    from google.generativeai.types import GenerationConfig
-except ImportError as error:
-    print(f"Error importing google.generativeai: {error}")
-    print("Please install the google.generativeai package")
-    print("pip install google-generativeai")
-    sp.run(["pip", "install", "--upgrade", "google-generativeai"])
-
-
-load_dotenv()
-
-
-# Helpers
-def get_gemini_api_key_env():
-    """Get the Gemini API key from the environment
-
-    Raises:
-        ValueError: _description_
-
-    Returns:
-        _type_: _description_
-    """
-    key = os.getenv("GEMINI_API_KEY")
-    if key is None:
-        raise ValueError("Please provide a Gemini API key")
-    return str(key)
-
-
-# Main class
-class Gemini(BaseMultiModalModel):
-    """Gemini model
-
-    Args:
-        model_name (str, optional): _description_. Defaults to "gemini-pro".
-        gemini_api_key (str, optional): _description_. Defaults to get_gemini_api_key_env.
-        return_safety (bool, optional): _description_. Defaults to False.
-        candidates (bool, optional): _description_. Defaults to False.
-        stream (bool, optional): _description_. Defaults to False.
-        candidate_count (int, optional): _description_. Defaults to 1.
-        stop_sequence ([type], optional): _description_. Defaults to ['x'].
-        max_tokens (int, optional): _description_. Defaults to 100.
-        temperature (float, optional): _description_. Defaults to 0.9.
-
-    Methods:
-        run: Run the Gemini model
-        process_img: Process the image
-        chat: Chat with the Gemini model
-        list_models: List the Gemini models
-        stream_tokens: Stream the tokens
-        process_img_pil: Process img
-
-
-
-    Examples:
-        >>> from swarms.models import Gemini
-        >>> gemini = Gemini()
-        >>> gemini.run(
-                task="A dog",
-                img="dog.png",
-            )
-    """
-
-    def __init__(
-        self,
-        model_name: str = "gemini-pro-vision",
-        gemini_api_key: str = get_gemini_api_key_env,
-        return_safety: bool = False,
-        candidates: bool = False,
-        stream: bool = False,
-        candidate_count: int = 1,
-        transport: str = "rest",
-        stop_sequence=["x"],
-        max_tokens: int = 100,
-        temperature: float = 0.9,
-        system_prompt: str = None,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(model_name, *args, **kwargs)
-        self.model_name = model_name
-        self.gemini_api_key = gemini_api_key
-        self.safety = return_safety
-        self.candidates = candidates
-        self.stream = stream
-        self.candidate_count = candidate_count
-        self.stop_sequence = stop_sequence
-        self.max_tokens = max_tokens
-        self.temperature = temperature
-        self.system_prompt = system_prompt
-
-        # Configure the API key
-        genai.configure(
-            api_key=gemini_api_key,
-            transport=transport,
-            *args,
-            **kwargs,
-        )
-
-        # Prepare the generation config
-        self.generation_config = GenerationConfig(
-            candidate_count=candidate_count,
-            # stop_sequence=stop_sequence,
-            max_output_tokens=max_tokens,
-            temperature=temperature,
-            *args,
-            **kwargs,
-        )
-
-        # Initialize the model
-        self.model = genai.GenerativeModel(
-            model_name, *args, **kwargs
-        )
-
-        # Check for the key
-        if self.gemini_api_key is None:
-            raise ValueError("Please provide a Gemini API key")
-
-    def system_prompt_prep(
-        self,
-        task: str = None,
-        *args,
-        **kwargs,
-    ):
-        """System prompt
-
-        Args:
-            system_prompt (str, optional): _description_. Defaults to None.
-        """
-        PROMPT = f"""
-        
-        {self.system_prompt}
-        
-        ######
-        
-        {task}
-        
-        """
-        return PROMPT
-
-    def run(
-        self,
-        task: str = None,
-        img: str = None,
-        *args,
-        **kwargs,
-    ) -> str:
-        """Run the Gemini model
-
-        Args:
-            task (str, optional): textual task. Defaults to None.
-            img (str, optional): img. Defaults to None.
-
-        Returns:
-            str: output from the model
-        """
-        try:
-            prepare_prompt = self.system_prompt_prep(task)
-            if img:
-                # process_img = self.process_img(img, *args, **kwargs)
-                process_img = self.process_img_pil(img)
-                response = self.model.generate_content(
-                    contents=[prepare_prompt, process_img],
-                    generation_config=self.generation_config,
-                    stream=self.stream,
-                    *args,
-                    **kwargs,
-                )
-                return response.text
-            else:
-                response = self.model.generate_content(
-                    prepare_prompt,
-                    stream=self.stream,
-                    *args,
-                    **kwargs,
-                )
-                return response.text
-        except Exception as error:
-            print(f"Error running Gemini model: {error}")
-            print(f"Please check the task and image: {task}, {img}")
-            raise error
-
-    def process_img(
-        self,
-        img: str = None,
-        type: str = "image/png",
-        *args,
-        **kwargs,
-    ):
-        """Process the image
-
-        Args:
-            img (str, optional): _description_. Defaults to None.
-            type (str, optional): _description_. Defaults to "image/png".
-
-        Raises:
-            ValueError: _description_
-            ValueError: _description_
-            ValueError: _description_
-        """
-        try:
-            if img is None:
-                raise ValueError("Please provide an image to process")
-            if type is None:
-                raise ValueError("Please provide the image type")
-            if self.gemini_api_key is None:
-                raise ValueError("Please provide a Gemini API key")
-
-            # Load the image
-            img = [
-                {"mime_type": type, "data": Path(img).read_bytes()}
-            ]
-        except Exception as error:
-            print(f"Error processing image: {error}")
-
-    def chat(
-        self,
-        task: str = None,
-        img: str = None,
-        *args,
-        **kwargs,
-    ) -> str:
-        """Chat with the Gemini model
-
-        Args:
-            task (str, optional): _description_. Defaults to None.
-            img (str, optional): _description_. Defaults to None.
-
-        Returns:
-            str: _description_
-        """
-        chat = self.model.start_chat()
-        response = chat.send_message(task, *args, **kwargs)
-        response1 = response.text
-        print(response1)
-        response = chat.send_message(img, *args, **kwargs)
-
-    def list_models(self) -> str:
-        """List the Gemini models
-
-        Returns:
-            str: _description_
-        """
-        for m in genai.list_models():
-            if "generateContent" in m.supported_generation_methods:
-                print(m.name)
-
-    def stream_tokens(self, content: str = None):
-        """Stream the tokens
-
-        Args:
-            content (t, optional): _description_. Defaults to None.
-        """
-        for chunk in content:
-            print(chunk.text)
-            print("_" * 80)
-
-    def process_img_pil(self, img: str = None):
-        """Process img
-
-        Args:
-            img (str, optional): _description_. Defaults to None.
-
-        Returns:
-            _type_: _description_
-        """
-        img = Image.open(img)
-        return img
diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py
deleted file mode 100644
index e6b07204..00000000
--- a/swarms/models/gpt4_vision_api.py
+++ /dev/null
@@ -1,377 +0,0 @@
-import base64
-import json
-import logging
-import os
-from typing import Optional
-
-import aiohttp
-import requests
-from dotenv import load_dotenv
-from termcolor import colored
-from swarms.utils.loguru_logger import logger
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-# Load environment variables
-load_dotenv()
-openai_api_key = os.getenv("OPENAI_API_KEY")
-
-
-gpt4_vision_system_prompt = """
-You are an multi-modal autonomous agent. You are given a task and an image. You must generate a response to the task and image.
-
-"""
-
-
-class GPT4VisionAPI(BaseMultiModalModel):
-    """
-    GPT-4 Vision API
-
-    This class is a wrapper for the OpenAI API. It is used to run the GPT-4 Vision model.
-
-    Parameters
-    ----------
-    openai_api_key : str
-        The OpenAI API key. Defaults to the OPENAI_API_KEY environment variable.
-    max_tokens : int
-        The maximum number of tokens to generate. Defaults to 300.
-
-
-    Methods
-    -------
-    encode_image(img: str)
-        Encode image to base64.
-    run(task: str, img: str)
-        Run the model.
-    __call__(task: str, img: str)
-        Run the model.
-
-    Examples:
-    ---------
-    >>> from swarms.models import GPT4VisionAPI
-    >>> llm = GPT4VisionAPI()
-    >>> task = "What is the color of the object?"
-    >>> img = "https://i.imgur.com/2M2ZGwC.jpeg"
-    >>> llm.run(task, img)
-
-
-    """
-
-    def __init__(
-        self,
-        openai_api_key: str = openai_api_key,
-        model_name: str = "gpt-4-vision-preview",
-        logging_enabled: bool = False,
-        max_workers: int = 10,
-        max_tokens: str = 300,
-        openai_proxy: str = "https://api.openai.com/v1/chat/completions",
-        beautify: bool = False,
-        streaming_enabled: Optional[bool] = False,
-        meta_prompt: Optional[bool] = False,
-        system_prompt: Optional[str] = gpt4_vision_system_prompt,
-        *args,
-        **kwargs,
-    ):
-        super(GPT4VisionAPI).__init__(*args, **kwargs)
-        self.openai_api_key = openai_api_key
-        self.logging_enabled = logging_enabled
-        self.model_name = model_name
-        self.max_workers = max_workers
-        self.max_tokens = max_tokens
-        self.openai_proxy = openai_proxy
-        self.beautify = beautify
-        self.streaming_enabled = streaming_enabled
-        self.meta_prompt = meta_prompt
-        self.system_prompt = system_prompt
-
-        if self.logging_enabled:
-            logging.basicConfig(level=logging.DEBUG)
-        else:
-            # Disable debug logs for requests and urllib3
-            logging.getLogger("requests").setLevel(logging.WARNING)
-            logging.getLogger("urllib3").setLevel(logging.WARNING)
-
-        if self.meta_prompt:
-            self.system_prompt = self.meta_prompt_init()
-
-    def encode_image(self, img: str):
-        """Encode image to base64."""
-        if not os.path.exists(img):
-            print(f"Image file not found: {img}")
-            return None
-
-        with open(img, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-
-    def download_img_then_encode(self, img: str):
-        """Download image from URL then encode image to base64 using requests"""
-        if not os.path.exists(img):
-            print(f"Image file not found: {img}")
-            return None
-
-        response = requests.get(img)
-        return base64.b64encode(response.content).decode("utf-8")
-
-    # Function to handle vision tasks
-    def run(
-        self,
-        task: str = None,
-        img: str = None,
-        multi_imgs: list = None,
-        return_json: bool = False,
-        *args,
-        **kwargs,
-    ):
-        """Run the model."""
-        try:
-            base64_image = self.encode_image(img)
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.openai_api_key}",
-            }
-            payload = {
-                "model": self.model_name,
-                "messages": [
-                    {
-                        "role": "system",
-                        "content": [self.system_prompt],
-                    },
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": task},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{base64_image}"
-                                },
-                            },
-                        ],
-                    },
-                ],
-                "max_tokens": self.max_tokens,
-                **kwargs,
-            }
-            response = requests.post(headers=headers, json=payload)
-
-            # Get the response as a JSON object
-            response_json = response.json()
-
-            # Return the JSON object if return_json is True
-            if return_json is True:
-                print(response_json)
-                return response_json
-            else:
-                return response_json
-
-        except Exception as error:
-            logger.error(
-                f"Error with the request: {error}, make sure you"
-                " double check input types and positions"
-            )
-            raise error
-
-    def video_prompt(self, frames):
-        """
-        SystemPrompt is a class that generates a prompt for the user to respond to.
-        The prompt is generated based on the current state of the system.
-
-        Parameters
-        ----------
-        frames : list
-            A list of base64 frames
-
-        Returns
-        -------
-        PROMPT : str
-            The system prompt
-
-        Examples
-        --------
-
-        >>> from swarms.models import GPT4VisionAPI
-        >>> llm = GPT4VisionAPI()
-        >>> video = "video.mp4"
-        >>> base64_frames = llm.process_video(video)
-        >>> prompt = llm.video_prompt(base64_frames)
-        >>> print(prompt)
-
-        """
-        PROMPT = f"""
-        These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
-        
-        {frames}
-        """
-        return PROMPT
-
-    def stream_response(self, content: str):
-        """Stream the response of the output
-
-        Args:
-            content (str): _description_
-        """
-        for chunk in content:
-            print(chunk)
-
-    def __call__(
-        self,
-        task: Optional[str] = None,
-        img: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        """Call the model
-
-        Args:
-            task (Optional[str], optional): _description_. Defaults to None.
-            img (Optional[str], optional): _description_. Defaults to None.
-
-        Raises:
-            error: _description_
-        """
-        try:
-            base64_image = self.encode_image(img)
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {openai_api_key}",
-            }
-            payload = {
-                "model": self.model_name,
-                "messages": [
-                    {
-                        "role": "system",
-                        "content": [self.system_prompt],
-                    },
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": task},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{base64_image}"
-                                },
-                            },
-                        ],
-                    },
-                ],
-                "max_tokens": self.max_tokens,
-            }
-            response = requests.post(
-                self.openai_proxy,
-                headers=headers,
-                json=payload,
-            )
-
-            out = response.json()
-            content = out["choices"][0]["message"]["content"]
-
-            if self.streaming_enabled:
-                content = self.stream_response(content)
-
-            if self.beautify:
-                content = colored(content, "cyan")
-                print(content)
-            else:
-                print(content)
-
-        except Exception as error:
-            print(f"Error with the request: {error}")
-            raise error
-
-    async def arun(
-        self,
-        task: Optional[str] = None,
-        img: Optional[str] = None,
-    ):
-        """
-        Asynchronously run the model
-
-        Overview:
-        ---------
-        This method is used to asynchronously run the model. It is used to run the model
-        on a single task and image.
-
-        Parameters:
-        ----------
-        task : str
-            The task to run the model on.
-        img : str
-            The image to run the task on
-
-        """
-        try:
-            base64_image = self.encode_image(img)
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {openai_api_key}",
-            }
-            payload = {
-                "model": "gpt-4-vision-preview",
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": task},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{base64_image}"
-                                },
-                            },
-                        ],
-                    }
-                ],
-                "max_tokens": self.max_tokens,
-            }
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    self.openai_proxy,
-                    headers=headers,
-                    data=json.dumps(payload),
-                ) as response:
-                    out = await response.json()
-                    content = out["choices"][0]["message"]["content"]
-                    print(content)
-        except Exception as error:
-            print(f"Error with the request {error}")
-            raise error
-
-    def health_check(self):
-        """Health check for the GPT4Vision model"""
-        try:
-            response = requests.get(
-                "https://api.openai.com/v1/engines"
-            )
-            return response.status_code == 200
-        except requests.RequestException as error:
-            print(f"Health check failed: {error}")
-            return False
-
-    def print_dashboard(self):
-        dashboard = print(
-            colored(
-                f"""
-            GPT4Vision Dashboard
-            -------------------
-            Model: {self.model_name}
-            Max Workers: {self.max_workers}
-            OpenAIProxy: {self.openai_proxy}
-            """,
-                "green",
-            )
-        )
-        return dashboard
-
-    # def meta_prompt_init(self):
-    #     """Meta Prompt
-
-    #     Returns:
-    #         _type_: _description_
-    #     """
-    #     META_PROMPT = """
-    #     For any labels or markings on an image that you reference in your response, please
-    #     enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for
-    #     example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be
-    #     numbers or letters and typically correspond to specific segments or parts of the image.
-    #     """
-    #     return META_PROMPT
diff --git a/swarms/models/huggingface.py b/swarms/models/huggingface.py
deleted file mode 100644
index 84df4cfb..00000000
--- a/swarms/models/huggingface.py
+++ /dev/null
@@ -1,420 +0,0 @@
-import asyncio
-import concurrent.futures
-import logging
-from typing import List, Tuple
-
-import torch
-from termcolor import colored
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-)
-
-from swarms.models.base_llm import BaseLLM
-
-
-class HuggingfaceLLM(BaseLLM):
-    """
-    A class for running inference on a given model.
-
-    Attributes:
-        model_id (str): The ID of the model.
-        device (str): The device to run the model on (either 'cuda' or 'cpu').
-        max_length (int): The maximum length of the output sequence.
-        quantize (bool, optional): Whether to use quantization. Defaults to False.
-        quantization_config (dict, optional): The configuration for quantization.
-        verbose (bool, optional): Whether to print verbose logs. Defaults to False.
-        logger (logging.Logger, optional): The logger to use. Defaults to a basic logger.
-
-    Methods:
-        run(task: str, max_length: int = 500) -> str:
-            Generate a response based on the prompt text.
-
-        __call__(task: str, max_length: int = 500) -> str:
-            Generate a response based on the prompt text.
-
-        save_model(path: str):
-            Save the model to a given path.
-
-        gpu_available() -> bool:
-            Check if GPU is available.
-
-        memory_consumption() -> dict:
-            Get the memory consumption of the GPU.
-
-        print_dashboard(task: str):
-            Print dashboard.
-
-        set_device(device: str):
-            Changes the device used for inference.
-
-        set_max_length(max_length: int):
-            Set max_length.
-
-        set_verbose(verbose: bool):
-            Set verbose.
-
-        set_distributed(distributed: bool):
-            Set distributed.
-
-        set_decoding(decoding: bool):
-            Set decoding.
-
-        set_max_workers(max_workers: int):
-            Set max_workers.
-
-        set_repitition_penalty(repitition_penalty: float):
-            Set repitition_penalty.
-
-        set_no_repeat_ngram_size(no_repeat_ngram_size: int):
-            Set no_repeat_ngram_size.
-
-        set_temperature(temperature: float):
-            Set temperature.
-
-        set_top_k(top_k: int):
-            Set top_k.
-
-        set_top_p(top_p: float):
-            Set top_p.
-
-        set_quantize(quantize: bool):
-            Set quantize.
-
-        set_quantization_config(quantization_config: dict):
-            Set quantization_config.
-
-        set_model_id(model_id: str):
-            Set model_id.
-
-        set_model(model):
-            Set model.
-
-        set_tokenizer(tokenizer):
-            Set tokenizer.
-
-        set_logger(logger):
-            Set logger.
-
-
-    Examples:
-        >>> llm = HuggingfaceLLM(
-        ...     model_id="EleutherAI/gpt-neo-2.7B",
-        ...     device="cuda",
-        ...     max_length=500,
-        ...     quantize=True,
-        ...     quantization_config={
-        ...         "load_in_4bit": True,
-        ...         "bnb_4bit_use_double_quant": True,
-        ...         "bnb_4bit_quant_type": "nf4",
-        ...         "bnb_4bit_compute_dtype": torch.bfloat16,
-        ...     },
-        ... )
-        >>> llm("Generate a 10,000 word blog on mental clarity and the benefits of meditation.")
-        'Generate a 10,000 word
-    """
-
-    def __init__(
-        self,
-        model_id: str,
-        device: str = None,
-        max_length: int = 500,
-        quantize: bool = False,
-        quantization_config: dict = None,
-        verbose=False,
-        distributed=False,
-        decoding=False,
-        max_workers: int = 5,
-        repitition_penalty: float = 1.3,
-        no_repeat_ngram_size: int = 5,
-        temperature: float = 0.7,
-        top_k: int = 40,
-        top_p: float = 0.8,
-        dtype=torch.bfloat16,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.logger = logging.getLogger(__name__)
-        self.device = (
-            device
-            if device
-            else ("cuda" if torch.cuda.is_available() else "cpu")
-        )
-        self.model_id = model_id
-        self.max_length = max_length
-        self.verbose = verbose
-        self.distributed = distributed
-        self.decoding = decoding
-        self.quantize = quantize
-        self.quantization_config = quantization_config
-        self.max_workers = max_workers
-        self.repitition_penalty = repitition_penalty
-        self.no_repeat_ngram_size = no_repeat_ngram_size
-        self.temperature = temperature
-        self.top_k = top_k
-        self.top_p = top_p
-        self.dtype = dtype
-
-        if self.distributed:
-            assert (
-                torch.cuda.device_count() > 1
-            ), "You need more than 1 gpu for distributed processing"
-
-        bnb_config = None
-        if quantize:
-            if not quantization_config:
-                quantization_config = {
-                    "load_in_4bit": True,
-                    "bnb_4bit_use_double_quant": True,
-                    "bnb_4bit_quant_type": "nf4",
-                    "bnb_4bit_compute_dtype": dtype,
-                }
-            bnb_config = BitsAndBytesConfig(**quantization_config)
-
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-
-        if quantize:
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_id,
-                quantization_config=bnb_config,
-                *args,
-                **kwargs,
-            )
-        else:
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_id, *args, **kwargs
-            ).to(self.device)
-
-    def print_error(self, error: str):
-        """Print error"""
-        print(colored(f"Error: {error}", "red"))
-
-    async def async_run(self, task: str):
-        """Ashcnronous generate text for a given prompt"""
-        return await asyncio.to_thread(self.run, task)
-
-    def concurrent_run(self, tasks: List[str], max_workers: int = 5):
-        """Concurrently generate text for a list of prompts."""
-        with concurrent.futures.ThreadPoolExecutor(
-            max_workers=max_workers
-        ) as executor:
-            results = list(executor.map(self.run, tasks))
-        return results
-
-    def run_batch(
-        self, tasks_images: List[Tuple[str, str]]
-    ) -> List[str]:
-        """Process a batch of tasks and images"""
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            futures = [
-                executor.submit(self.run, task, img)
-                for task, img in tasks_images
-            ]
-            results = [future.result() for future in futures]
-        return results
-
-    def run(self, task: str, *args, **kwargs):
-        """
-        Generate a response based on the prompt text.
-
-        Args:
-        - task (str): Text to prompt the model.
-        - max_length (int): Maximum length of the response.
-
-        Returns:
-        - Generated text (str).
-        """
-        try:
-            inputs = self.tokenizer.encode(task, return_tensors="pt")
-
-            if self.decoding:
-                with torch.no_grad():
-                    for _ in range(self.max_length):
-                        output_sequence = []
-
-                        outputs = self.model.generate(
-                            inputs,
-                            max_length=len(inputs) + 1,
-                            do_sample=True,
-                        )
-                        output_tokens = outputs[0][-1]
-                        output_sequence.append(output_tokens.item())
-
-                        # print token in real-time
-                        print(
-                            self.tokenizer.decode(
-                                [output_tokens],
-                                skip_special_tokens=True,
-                            ),
-                            end="",
-                            flush=True,
-                        )
-                        inputs = outputs
-            else:
-                with torch.no_grad():
-                    outputs = self.model.generate(
-                        inputs,
-                        max_length=self.max_length,
-                        do_sample=True,
-                        *args,
-                        **kwargs,
-                    )
-
-            return self.tokenizer.decode(
-                outputs[0], skip_special_tokens=True
-            )
-        except Exception as e:
-            print(
-                colored(
-                    (
-                        "HuggingfaceLLM could not generate text"
-                        f" because of error: {e}, try optimizing your"
-                        " arguments"
-                    ),
-                    "red",
-                )
-            )
-            raise
-
-    def __call__(self, task: str, *args, **kwargs):
-        return self.run(task, *args, **kwargs)
-
-    async def __call_async__(self, task: str, *args, **kwargs) -> str:
-        """Call the model asynchronously""" ""
-        return await self.run_async(task, *args, **kwargs)
-
-    def save_model(self, path: str):
-        """Save the model to a given path"""
-        self.model.save_pretrained(path)
-        self.tokenizer.save_pretrained(path)
-
-    def gpu_available(self) -> bool:
-        """Check if GPU is available"""
-        return torch.cuda.is_available()
-
-    def memory_consumption(self) -> dict:
-        """Get the memory consumption of the GPU"""
-        if self.gpu_available():
-            torch.cuda.synchronize()
-            allocated = torch.cuda.memory_allocated()
-            reserved = torch.cuda.memory_reserved()
-            return {"allocated": allocated, "reserved": reserved}
-        else:
-            return {"error": "GPU not available"}
-
-    def print_dashboard(self, task: str):
-        """Print dashboard"""
-
-        dashboard = print(
-            colored(
-                f"""
-                HuggingfaceLLM Dashboard
-                --------------------------------------------
-                Model Name: {self.model_id}
-                Tokenizer: {self.tokenizer}
-                Model MaxLength: {self.max_length}
-                Model Device: {self.device}
-                Model Quantization: {self.quantize}
-                Model Quantization Config: {self.quantization_config}
-                Model Verbose: {self.verbose}
-                Model Distributed: {self.distributed}
-                Model Decoding: {self.decoding}
-
-                ----------------------------------------
-                Metadata:
-                    Task Memory Consumption: {self.memory_consumption()}
-                    GPU Available: {self.gpu_available()}
-                ----------------------------------------
-
-                Task Environment:
-                    Task: {task}
-
-                """,
-                "red",
-            )
-        )
-
-        print(dashboard)
-
-    def set_device(self, device):
-        """
-        Changes the device used for inference.
-
-        Parameters
-        ----------
-            device : str
-                The new device to use for inference.
-        """
-        self.device = device
-        if self.model is not None:
-            self.model.to(self.device)
-
-    def set_max_length(self, max_length):
-        """Set max_length"""
-        self.max_length = max_length
-
-    def clear_chat_history(self):
-        """Clear chat history"""
-        self.chat_history = []
-
-    def set_verbose(self, verbose):
-        """Set verbose"""
-        self.verbose = verbose
-
-    def set_distributed(self, distributed):
-        """Set distributed"""
-        self.distributed = distributed
-
-    def set_decoding(self, decoding):
-        """Set decoding"""
-        self.decoding = decoding
-
-    def set_max_workers(self, max_workers):
-        """Set max_workers"""
-        self.max_workers = max_workers
-
-    def set_repitition_penalty(self, repitition_penalty):
-        """Set repitition_penalty"""
-        self.repitition_penalty = repitition_penalty
-
-    def set_no_repeat_ngram_size(self, no_repeat_ngram_size):
-        """Set no_repeat_ngram_size"""
-        self.no_repeat_ngram_size = no_repeat_ngram_size
-
-    def set_temperature(self, temperature):
-        """Set temperature"""
-        self.temperature = temperature
-
-    def set_top_k(self, top_k):
-        """Set top_k"""
-        self.top_k = top_k
-
-    def set_top_p(self, top_p):
-        """Set top_p"""
-        self.top_p = top_p
-
-    def set_quantize(self, quantize):
-        """Set quantize"""
-        self.quantize = quantize
-
-    def set_quantization_config(self, quantization_config):
-        """Set quantization_config"""
-        self.quantization_config = quantization_config
-
-    def set_model_id(self, model_id):
-        """Set model_id"""
-        self.model_id = model_id
-
-    def set_model(self, model):
-        """Set model"""
-        self.model = model
-
-    def set_tokenizer(self, tokenizer):
-        """Set tokenizer"""
-        self.tokenizer = tokenizer
-
-    def set_logger(self, logger):
-        """Set logger"""
-        self.logger = logger
diff --git a/swarms/models/huggingface_pipeline.py b/swarms/models/huggingface_pipeline.py
deleted file mode 100644
index 118766a0..00000000
--- a/swarms/models/huggingface_pipeline.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from abc import abstractmethod
-
-import torch
-from termcolor import colored
-
-from swarms.models.base_llm import BaseLLM
-from transformers.pipelines import pipeline
-
-
-class HuggingfacePipeline(BaseLLM):
-    """HuggingfacePipeline
-
-    Args:
-        BaseLLM (BaseLLM): [description]
-        task (str, optional): [description]. Defaults to "text-generation".
-        model_name (str, optional): [description]. Defaults to None.
-        use_fp8 (bool, optional): [description]. Defaults to False.
-        *args: [description]
-        **kwargs: [description]
-
-    Raises:
-
-    """
-
-    def __init__(
-        self,
-        task_type: str = "text-generation",
-        model_name: str = None,
-        use_fp8: bool = False,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.task_type = task_type
-        self.model_name = model_name
-        self.use_fp8 = use_fp8
-
-        if torch.cuda.is_available():
-            self.use_fp8 = True
-        else:
-            self.use_fp8 = False
-
-        self.pipe = pipeline(
-            task_type, model_name, use_fp8=use_fp8 * args, **kwargs
-        )
-
-    @abstractmethod
-    def run(self, task: str, *args, **kwargs) -> str:
-        """Run the pipeline
-
-        Args:
-            task (str): [description]
-            *args: [description]
-            **kwargs: [description]
-
-        Returns:
-            _type_: _description_
-        """
-        try:
-            out = self.pipeline(task, *args, **kwargs)
-            return out
-        except Exception as error:
-            print(
-                colored(
-                    (
-                        "Error in"
-                        f" {self.__class__.__name__} pipeline:"
-                        f" {error}"
-                    ),
-                    "red",
-                )
-            )
diff --git a/swarms/models/idefics.py b/swarms/models/idefics.py
deleted file mode 100644
index cc654221..00000000
--- a/swarms/models/idefics.py
+++ /dev/null
@@ -1,189 +0,0 @@
-from typing import Callable, Optional
-
-import torch
-from termcolor import colored
-from transformers import AutoProcessor, IdeficsForVisionText2Text
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-def autodetect_device():
-    """
-    Autodetects the device to use for inference.
-
-    Returns
-    -------
-        str
-            The device to use for inference.
-    """
-    return "cuda" if torch.cuda.is_available() else "cpu"
-
-
-class Idefics(BaseMultiModalModel):
-    """
-
-    A class for multimodal inference using pre-trained models from the Hugging Face Hub.
-
-    Attributes
-    ----------
-    device : str
-        The device to use for inference.
-    model_name : str, optional
-        The name of the pre-trained model model_name (default is "HuggingFaceM4/idefics-9b-instruct").
-    processor : transformers.PreTrainedProcessor
-        The pre-trained processor.
-    max_length : int
-        The maximum length of the generated text.
-    chat_history : list
-        The chat history.
-
-    Methods
-    -------
-    infer(prompts, batched_mode=True)
-        Generates text based on the provided prompts.
-    chat(user_input)
-        Engages in a continuous bidirectional conversation based on the user input.
-    set_model_name(model_name)
-        Changes the model model_name.
-    set_device(device)
-        Changes the device used for inference.
-    set_max_length(max_length)
-        Changes the maximum length of the generated text.
-    clear_chat_history()
-        Clears the chat history.
-
-
-    # Usage
-    ```
-    from swarms.models import idefics
-
-    model = idefics()
-
-    user_input = "User: What is in this image? https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
-    response = model.chat(user_input)
-    print(response)
-
-    user_input = "User: And who is that? https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052"
-    response = model.chat(user_input)
-    print(response)
-
-    model.set_model_name("new_model_name")
-    model.set_device("cpu")
-    model.set_max_length(200)
-    model.clear_chat_history()
-    ```
-
-    """
-
-    def __init__(
-        self,
-        model_name: Optional[
-            str
-        ] = "HuggingFaceM4/idefics-9b-instruct",
-        device: Callable = autodetect_device,
-        torch_dtype=torch.bfloat16,
-        max_length: int = 100,
-        batched_mode: bool = True,
-        *args,
-        **kwargs,
-    ):
-        # Initialize the parent class
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.device = device
-        self.max_length = max_length
-        self.batched_mode = batched_mode
-
-        self.chat_history = []
-        self.device = (
-            device
-            if device
-            else ("cuda" if torch.cuda.is_available() else "cpu")
-        )
-        self.model = IdeficsForVisionText2Text.from_pretrained(
-            model_name, torch_dtype=torch_dtype, *args, **kwargs
-        ).to(self.device)
-
-        self.processor = AutoProcessor.from_pretrained(
-            model_name, *args, **kwargs
-        )
-
-    def run(
-        self, task: str = None, img: str = None, *args, **kwargs
-    ) -> str:
-        """
-        Generates text based on the provided prompts.
-
-        Parameters
-        ----------
-            task : str
-                the task to perform
-            batched_mode : bool, optional
-                Whether to process the prompts in batched mode. If True, all prompts are
-                processed together. If False, only the first prompt is processed (default is True).
-
-        Returns
-        -------
-            list
-                A list of generated text strings.
-        """
-        try:
-            inputs = (
-                self.processor(
-                    task,
-                    add_end_of_utterance_token=False,
-                    return_tensors="pt",
-                    *args,
-                    **kwargs,
-                ).to(self.device)
-                if self.batched_mode
-                else self.processor(task, return_tensors="pt").to(
-                    self.device
-                )
-            )
-
-            exit_condition = self.processor.tokenizer(
-                "<end_of_utterance>", add_special_tokens=False
-            ).input_ids
-
-            bad_words_ids = self.processor.tokenizer(
-                ["<image>", "<fake_token_around_image"],
-                add_special_tokens=False,
-            ).input_ids
-
-            generated_ids = self.model.generate(
-                **inputs,
-                eos_token_id=exit_condition,
-                bad_words_ids=bad_words_ids,
-                max_length=self.max_length,
-            )
-            generated_text = self.processor.batch_decode(
-                generated_ids, skip_special_tokens=True
-            )
-            return generated_text
-
-        except Exception as error:
-            print(
-                colored(
-                    (
-                        "Error in"
-                        f" {self.__class__.__name__} pipeline:"
-                        f" {error}"
-                    ),
-                    "red",
-                )
-            )
-
-    def set_model_name(self, model_name):
-        """
-        Changes the model model_name.
-
-        Parameters
-        ----------
-            model_name : str
-                The name of the new pre-trained model model_name.
-        """
-        self.model = IdeficsForVisionText2Text.from_pretrained(
-            model_name, torch_dtype=torch.bfloat16
-        ).to(self.device)
-        self.processor = AutoProcessor.from_pretrained(model_name)
diff --git a/swarms/models/kosmos_two.py b/swarms/models/kosmos_two.py
deleted file mode 100644
index 76cd87b8..00000000
--- a/swarms/models/kosmos_two.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import requests
-from PIL import Image
-from transformers import AutoModelForVision2Seq, AutoProcessor
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-# utils
-def is_overlapping(rect1, rect2):
-    x1, y1, x2, y2 = rect1
-    x3, y3, x4, y4 = rect2
-    return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4)
-
-
-class Kosmos(BaseMultiModalModel):
-    """A class representing the Kosmos model.
-
-    This model is used for multi-modal tasks such as grounding, referring expression comprehension,
-    referring expression generation, grounded VQA, grounded image captioning, and more.
-
-    Args:
-        model_name (str): The name or path of the pre-trained model.
-        max_new_tokens (int): The maximum number of new tokens to generate.
-        verbose (bool): Whether to print verbose output.
-        *args: Variable length argument list.
-        **kwargs: Arbitrary keyword arguments.
-
-    Attributes:
-        max_new_tokens (int): The maximum number of new tokens to generate.
-        model (AutoModelForVision2Seq): The pre-trained model for vision-to-sequence tasks.
-        processor (AutoProcessor): The pre-trained processor for vision-to-sequence tasks.
-    """
-
-    def __init__(
-        self,
-        model_name="ydshieh/kosmos-2-patch14-224",
-        max_new_tokens: int = 64,
-        verbose: bool = False,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-
-        self.max_new_tokens = max_new_tokens
-
-        self.model = AutoModelForVision2Seq.from_pretrained(
-            model_name, trust_remote_code=True, *args, **kwargs
-        )
-        self.processor = AutoProcessor.from_pretrained(
-            model_name, trust_remote_code=True, *args, **kwargs
-        )
-
-    def get_image(self, url: str):
-        """Get image from url
-
-        Args:
-            url (str): The URL of the image.
-
-        Returns:
-            PIL.Image: The image object.
-        """
-        return Image.open(requests.get(url, stream=True).raw)
-
-    def run(self, task: str, image: str, *args, **kwargs):
-        """Run the model
-
-        Args:
-            task (str): The task to run.
-            image (str): The URL of the image.
-        """
-        inputs = self.processor(
-            text=task, images=image, return_tensors="pt"
-        )
-        generated_ids = self.model.generate(
-            pixel_values=inputs["pixel_values"],
-            input_ids=inputs["input_ids"][:, :-1],
-            attention_mask=inputs["attention_mask"][:, :-1],
-            image_embeds=None,
-            img_attn_mask=inputs["img_attn_mask"][:, :-1],
-            use_cache=True,
-            max_new_tokens=self.max_new_tokens,
-        )
-
-        generated_texts = self.processor.batch_decode(
-            generated_ids,
-            skip_special_tokens=True,
-        )[0]
-
-        processed_text, entities = (
-            self.processor.post_process_generation(generated_texts)
-        )
-
-        return processed_text, entities
-
-    # tasks
-    def multimodal_grounding(self, phrase, image_url):
-        task = f"<grounding><phrase> {phrase} </phrase>"
-        self.run(task, image_url)
-
-    def referring_expression_comprehension(self, phrase, image_url):
-        task = f"<grounding><phrase> {phrase} </phrase>"
-        self.run(task, image_url)
-
-    def referring_expression_generation(self, phrase, image_url):
-        task = (
-            "<grounding><phrase>"
-            " It</phrase><object><patch_index_0044><patch_index_0863></object> is"
-        )
-        self.run(task, image_url)
-
-    def grounded_vqa(self, question, image_url):
-        task = f"<grounding> Question: {question} Answer:"
-        self.run(task, image_url)
-
-    def grounded_image_captioning(self, image_url):
-        task = "<grounding> An image of"
-        self.run(task, image_url)
-
-    def grounded_image_captioning_detailed(self, image_url):
-        task = "<grounding> Describe this image in detail"
-        self.run(task, image_url)
-
-    def generate_boxees(self, task, image_url):
-        image = self.get_image(image_url)
-        processed_text, entities = self.process_task(task, image)
-        self.draw_entity_boxes_on_image(image, entities, show=True)
diff --git a/swarms/models/layoutlm_document_qa.py b/swarms/models/layoutlm_document_qa.py
deleted file mode 100644
index 09aa9a1a..00000000
--- a/swarms/models/layoutlm_document_qa.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""
-LayoutLMDocumentQA is a multimodal good for
-visual question answering on real world docs lik invoice, pdfs, etc
-"""
-
-from transformers import pipeline
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class LayoutLMDocumentQA(BaseMultiModalModel):
-    """
-    LayoutLMDocumentQA for document question answering:
-
-    Args:
-        model_name (str, optional): [description]. Defaults to "impira/layoutlm-document-qa".
-        task (str, optional): [description]. Defaults to "document-question-answering".
-
-    Usage:
-    >>> from swarms.models import LayoutLMDocumentQA
-    >>> model = LayoutLMDocumentQA()
-    >>> out = model("What is the total amount?", "path/to/img.png")
-    >>> print(out)
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "impira/layoutlm-document-qa",
-        task_type: str = "document-question-answering",
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.task_type = task_type
-        self.pipeline = pipeline(task_type, model=model_name)
-
-    def __call__(self, task: str, img_path: str, *args, **kwargs):
-        """Call the LayoutLMDocumentQA model
-
-        Args:
-            task (str): _description_
-            img_path (str): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        out = self.pipeline(img_path, task)
-        out = str(out)
-        return out
diff --git a/swarms/models/llama3_hosted.py b/swarms/models/llama3_hosted.py
deleted file mode 100644
index 88a9979f..00000000
--- a/swarms/models/llama3_hosted.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import requests
-import json
-from swarms.models.base_llm import BaseLLM
-
-
-class llama3Hosted(BaseLLM):
-    """
-    A class representing a hosted version of the Llama3 model.
-
-    Args:
-        model (str): The name or path of the Llama3 model to use.
-        temperature (float): The temperature parameter for generating responses.
-        max_tokens (int): The maximum number of tokens in the generated response.
-        system_prompt (str): The system prompt to use for generating responses.
-        *args: Variable length argument list.
-        **kwargs: Arbitrary keyword arguments.
-
-    Attributes:
-        model (str): The name or path of the Llama3 model.
-        temperature (float): The temperature parameter for generating responses.
-        max_tokens (int): The maximum number of tokens in the generated response.
-        system_prompt (str): The system prompt for generating responses.
-
-    Methods:
-        run(task, *args, **kwargs): Generates a response for the given task.
-
-    """
-
-    def __init__(
-        self,
-        model: str = "meta-llama/Meta-Llama-3-8B-Instruct",
-        temperature: float = 0.8,
-        max_tokens: int = 4000,
-        system_prompt: str = "You are a helpful assistant.",
-        base_url: str = "http://34.204.8.31:30001/v1/chat/completions",
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model = model
-        self.temperature = temperature
-        self.max_tokens = max_tokens
-        self.system_prompt = system_prompt
-        self.base_url = base_url
-
-    def run(self, task: str, *args, **kwargs) -> str:
-        """
-        Generates a response for the given task.
-
-        Args:
-            task (str): The user's task or input.
-
-        Returns:
-            str: The generated response from the Llama3 model.
-
-        """
-
-        payload = json.dumps(
-            {
-                "model": self.model,
-                "messages": [
-                    {"role": "system", "content": self.system_prompt},
-                    {"role": "user", "content": task},
-                ],
-                "stop_token_ids": [128009, 128001],
-                "temperature": self.temperature,
-                "max_tokens": self.max_tokens,
-            }
-        )
-
-        headers = {"Content-Type": "application/json"}
-
-        response = requests.request(
-            "POST", self.base_url, headers=headers, data=payload
-        )
-
-        response_json = response.json()
-        assistant_message = response_json["choices"][0]["message"][
-            "content"
-        ]
-
-        return assistant_message
diff --git a/swarms/models/llama_function_caller.py b/swarms/models/llama_function_caller.py
deleted file mode 100644
index 0f175edb..00000000
--- a/swarms/models/llama_function_caller.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# !pip install accelerate
-# !pip install torch
-# !pip install transformers
-# !pip install bitsandbytes
-
-from typing import Callable, Dict, List
-
-import torch
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-    TextStreamer,
-)
-from swarms.models.base_llm import BaseLLM
-
-
-class LlamaFunctionCaller(BaseLLM):
-    """
-    A class to manage and execute Llama functions.
-
-    Attributes:
-    -----------
-    model: transformers.AutoModelForCausalLM
-        The loaded Llama model.
-    tokenizer: transformers.AutoTokenizer
-        The tokenizer for the Llama model.
-    functions: Dict[str, Callable]
-        A dictionary of functions available for execution.
-
-    Methods:
-    --------
-    __init__(self, model_id: str, cache_dir: str, runtime: str)
-        Initializes the LlamaFunctionCaller with the specified model.
-    add_func(self, name: str, function: Callable, description: str, arguments: List[Dict])
-        Adds a new function to the LlamaFunctionCaller.
-    call_function(self, name: str, **kwargs)
-        Calls the specified function with given arguments.
-    stream(self, user_prompt: str)
-        Streams a user prompt to the model and prints the response.
-
-
-    Example:
-
-    # Example usage
-    model_id = "Your-Model-ID"
-    cache_dir = "Your-Cache-Directory"
-    runtime = "cuda"  # or 'cpu'
-
-    llama_caller = LlamaFunctionCaller(model_id, cache_dir, runtime)
-
-
-    # Add a custom function
-    def get_weather(location: str, format: str) -> str:
-        # This is a placeholder for the actual implementation
-        return f"Weather at {location} in {format} format."
-
-
-    llama_caller.add_func(
-        name="get_weather",
-        function=get_weather,
-        description="Get the weather at a location",
-        arguments=[
-            {
-                "name": "location",
-                "type": "string",
-                "description": "Location for the weather",
-            },
-            {
-                "name": "format",
-                "type": "string",
-                "description": "Format of the weather data",
-            },
-        ],
-    )
-
-    # Call the function
-    result = llama_caller.call_function("get_weather", location="Paris", format="Celsius")
-    print(result)
-
-    # Stream a user prompt
-    llama_caller("Tell me about the tallest mountain in the world.")
-
-    """
-
-    def __init__(
-        self,
-        model_id: str = "Trelis/Llama-2-7b-chat-hf-function-calling-v2",
-        cache_dir: str = "llama_cache",
-        runtime: str = "auto",
-        max_tokens: int = 500,
-        streaming: bool = False,
-        *args,
-        **kwargs,
-    ):
-        self.model_id = model_id
-        self.cache_dir = cache_dir
-        self.runtime = runtime
-        self.max_tokens = max_tokens
-        self.streaming = streaming
-
-        # Load the model and tokenizer
-        self.model = self._load_model()
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_id, cache_dir=cache_dir, use_fast=True
-        )
-        self.functions = {}
-
-    def _load_model(self):
-        # Configuration for loading the model
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16,
-        )
-        return AutoModelForCausalLM.from_pretrained(
-            self.model_id,
-            quantization_config=bnb_config,
-            device_map=self.runtime,
-            trust_remote_code=True,
-            cache_dir=self.cache_dir,
-        )
-
-    def add_func(
-        self,
-        name: str,
-        function: Callable,
-        description: str,
-        arguments: List[Dict],
-    ):
-        """
-        Adds a new function to the LlamaFunctionCaller.
-
-        Args:
-            name (str): The name of the function.
-            function (Callable): The function to execute.
-            description (str): Description of the function.
-            arguments (List[Dict]): List of argument specifications.
-        """
-        self.functions[name] = {
-            "function": function,
-            "description": description,
-            "arguments": arguments,
-        }
-
-    def call_function(self, name: str, **kwargs):
-        """
-        Calls the specified function with given arguments.
-
-        Args:
-            name (str): The name of the function to call.
-            **kwargs: Keyword arguments for the function call.
-
-        Returns:
-            The result of the function call.
-        """
-        if name not in self.functions:
-            raise ValueError(f"Function {name} not found.")
-
-        func_info = self.functions[name]
-        return func_info["function"](**kwargs)
-
-    def __call__(self, task: str, **kwargs):
-        """
-        Streams a user prompt to the model and prints the response.
-
-        Args:
-            task (str): The user prompt to stream.
-        """
-        # Format the prompt
-        prompt = f"{task}\n\n"
-
-        # Encode and send to the model
-        inputs = self.tokenizer([prompt], return_tensors="pt").to(
-            self.runtime
-        )
-
-        streamer = TextStreamer(self.tokenizer)
-
-        if self.streaming:
-            out = self.model.generate(
-                **inputs,
-                streamer=streamer,
-                max_new_tokens=self.max_tokens,
-                **kwargs,
-            )
-
-            return out
-        else:
-            out = self.model.generate(
-                **inputs, max_length=self.max_tokens, **kwargs
-            )
-            # return self.tokenizer.decode(out[0], skip_special_tokens=True)
-            return out
-
-
-# llama_caller = LlamaFunctionCaller()
-
-
-# # Add a custom function
-# def get_weather(location: str, format: str) -> str:
-#     # This is a placeholder for the actual implementation
-#     return f"Weather at {location} in {format} format."
-
-
-# llama_caller.add_func(
-#     name="get_weather",
-#     function=get_weather,
-#     description="Get the weather at a location",
-#     arguments=[
-#         {
-#             "name": "location",
-#             "type": "string",
-#             "description": "Location for the weather",
-#         },
-#         {
-#             "name": "format",
-#             "type": "string",
-#             "description": "Format of the weather data",
-#         },
-#     ],
-# )
-
-# # Call the function
-# result = llama_caller.call_function("get_weather", location="Paris", format="Celsius")
-# print(result)
-
-# # Stream a user prompt
-# llama_caller("Tell me about the tallest mountain in the world.")
diff --git a/swarms/models/llava.py b/swarms/models/llava.py
deleted file mode 100644
index 5aa4681f..00000000
--- a/swarms/models/llava.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from io import BytesIO
-from typing import Tuple, Union
-
-import requests
-from PIL import Image
-from transformers import AutoProcessor, LlavaForConditionalGeneration
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class LavaMultiModal(BaseMultiModalModel):
-    """
-    A class to handle multi-modal inputs (text and image) using the Llava model for conditional generation.
-
-    Attributes:
-        model_name (str): The name or path of the pre-trained model.
-        max_length (int): The maximum length of the generated sequence.
-
-    Args:
-        model_name (str): The name of the pre-trained model.
-        max_length (int): The maximum length of the generated sequence.
-        *args: Additional positional arguments.
-        **kwargs: Additional keyword arguments.
-
-    Examples:
-    >>> model = LavaMultiModal()
-    >>> model.run("A cat", "https://example.com/cat.jpg")
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "llava-hf/llava-1.5-7b-hf",
-        max_length: int = 30,
-        *args,
-        **kwargs,
-    ) -> None:
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.max_length = max_length
-
-        self.model = LlavaForConditionalGeneration.from_pretrained(
-            model_name, *args, **kwargs
-        )
-        self.processor = AutoProcessor.from_pretrained(model_name)
-
-    def run(
-        self, text: str, img: str, *args, **kwargs
-    ) -> Union[str, Tuple[None, str]]:
-        """
-        Processes the input text and image, and generates a response.
-
-        Args:
-            text (str): The input text for the model.
-            img (str): The URL of the image to process.
-            max_length (int): The maximum length of the generated sequence.
-
-        Returns:
-            Union[str, Tuple[None, str]]: The generated response string or a tuple (None, error message) in case of an error.
-        """
-        try:
-            response = requests.get(img, stream=True)
-            response.raise_for_status()
-            image = Image.open(BytesIO(response.content))
-
-            inputs = self.processor(
-                text=text, images=image, return_tensors="pt"
-            )
-
-            # Generate
-            generate_ids = self.model.generate(
-                **inputs, max_length=self.max_length, **kwargs
-            )
-            return self.processor.batch_decode(
-                generate_ids,
-                skip_special_tokens=True,
-                clean_up_tokenization_spaces=False,
-                *args,
-            )[0]
-
-        except requests.RequestException as e:
-            return None, f"Error fetching image: {str(e)}"
-        except Exception as e:
-            return None, f"Error during model processing: {str(e)}"
diff --git a/swarms/models/model_router.py b/swarms/models/model_router.py
deleted file mode 100644
index c695c20e..00000000
--- a/swarms/models/model_router.py
+++ /dev/null
@@ -1,359 +0,0 @@
-from typing import List, Union
-
-from swarms.models.base_embedding_model import BaseEmbeddingModel
-from swarms.models.base_llm import BaseLLM
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-from swarms.models.fuyu import Fuyu  # noqa: E402
-from swarms.models.gpt4_vision_api import GPT4VisionAPI  # noqa: E402
-from swarms.models.huggingface import HuggingfaceLLM  # noqa: E402
-from swarms.models.idefics import Idefics  # noqa: E402
-from swarms.models.kosmos_two import Kosmos  # noqa: E402
-from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA
-from swarms.models.llama3_hosted import llama3Hosted
-from swarms.models.llava import LavaMultiModal  # noqa: E402
-from swarms.models.nougat import Nougat  # noqa: E402
-from swarms.models.openai_embeddings import OpenAIEmbeddings
-from swarms.models.openai_function_caller import OpenAIFunctionCaller
-from swarms.models.openai_tts import OpenAITTS  # noqa: E402
-from swarms.models.palm import GooglePalm as Palm  # noqa: E402
-from swarms.models.popular_llms import Anthropic as Anthropic
-from swarms.models.popular_llms import (
-    AzureOpenAILLM as AzureOpenAI,
-)
-from swarms.models.popular_llms import (
-    CohereChat as Cohere,
-)
-from swarms.models.popular_llms import FireWorksAI, OctoAIChat
-from swarms.models.popular_llms import (
-    OpenAIChatLLM as OpenAIChat,
-)
-from swarms.models.popular_llms import (
-    OpenAILLM as OpenAI,
-)
-from swarms.models.popular_llms import ReplicateChat as Replicate
-from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
-from swarms.models.sampling_params import SamplingParams
-from swarms.models.together import TogetherLLM  # noqa: E402
-from swarms.models.vilt import Vilt  # noqa: E402
-from swarms.structs.base_structure import BaseStructure
-from swarms.utils.loguru_logger import logger
-
-# New type BaseLLM and BaseEmbeddingModel and BaseMultimodalModel
-omni_model_type = Union[
-    BaseLLM, BaseEmbeddingModel, BaseMultiModalModel, callable
-]
-list_of_omni_model_type = List[omni_model_type]
-
-
-models = [
-    BaseLLM,
-    BaseEmbeddingModel,
-    BaseMultiModalModel,
-    Fuyu,
-    GPT4VisionAPI,
-    HuggingfaceLLM,
-    Idefics,
-    Kosmos,
-    LayoutLMDocumentQA,
-    llama3Hosted,
-    LavaMultiModal,
-    Nougat,
-    OpenAIEmbeddings,
-    OpenAITTS,
-    Palm,
-    Anthropic,
-    AzureOpenAI,
-    Cohere,
-    OctoAIChat,
-    OpenAIChat,
-    OpenAI,
-    Replicate,
-    QwenVLMultiModal,
-    SamplingParams,
-    TogetherLLM,
-    Vilt,
-    FireWorksAI,
-    OpenAIFunctionCaller,
-]
-
-
-class ModelRouter(BaseStructure):
-    """
-    A router for managing multiple models.
-
-    Attributes:
-        model_router_id (str): The ID of the model router.
-        model_router_description (str): The description of the model router.
-        model_pool (List[omni_model_type]): The list of models in the model pool.
-
-    Methods:
-        check_for_models(): Checks if there are any models in the model pool.
-        add_model(model: omni_model_type): Adds a model to the model pool.
-        add_models(models: List[omni_model_type]): Adds multiple models to the model pool.
-        get_model_by_name(model_name: str) -> omni_model_type: Retrieves a model from the model pool by its name.
-        get_multiple_models_by_name(model_names: List[str]) -> List[omni_model_type]: Retrieves multiple models from the model pool by their names.
-        get_model_pool() -> List[omni_model_type]: Retrieves the entire model pool.
-        get_model_by_index(index: int) -> omni_model_type: Retrieves a model from the model pool by its index.
-        get_model_by_id(model_id: str) -> omni_model_type: Retrieves a model from the model pool by its ID.
-        dict() -> dict: Returns a dictionary representation of the model router.
-
-    """
-
-    def __init__(
-        self,
-        model_router_id: str = "model_router",
-        model_router_description: str = "A router for managing multiple models.",
-        model_pool: List[omni_model_type] = models,
-        verbose: bool = False,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_router_id = model_router_id
-        self.model_router_description = model_router_description
-        self.model_pool = model_pool
-        self.verbose = verbose
-
-        self.check_for_models()
-        # self.refactor_model_class_if_invoke()
-
-    def check_for_models(self):
-        """
-        Checks if there are any models in the model pool.
-
-        Returns:
-            None
-
-        Raises:
-            ValueError: If no models are found in the model pool.
-        """
-        if len(self.model_pool) == 0:
-            raise ValueError("No models found in model pool.")
-
-    def add_model(self, model: omni_model_type):
-        """
-        Adds a model to the model pool.
-
-        Args:
-            model (omni_model_type): The model to be added.
-
-        Returns:
-            str: A success message indicating that the model has been added to the model pool.
-        """
-        logger.info(f"Adding model {model.name} to model pool.")
-        self.model_pool.append(model)
-        return "Model successfully added to model pool."
-
-    def add_models(self, models: List[omni_model_type]):
-        """
-        Adds multiple models to the model pool.
-
-        Args:
-            models (List[omni_model_type]): The models to be added.
-
-        Returns:
-            str: A success message indicating that the models have been added to the model pool.
-        """
-        logger.info("Adding models to model pool.")
-        self.model_pool.extend(models)
-        return "Models successfully added to model pool."
-
-    # def query_model_from_langchain(self, model_name: str, *args, **kwargs):
-    #     """
-    #     Query a model from langchain community.
-
-    #     Args:
-    #         model_name (str): The name of the model.
-    #         *args: Additional positional arguments to be passed to the model.
-    #         **kwargs: Additional keyword arguments to be passed to the model.
-
-    #     Returns:
-    #         omni_model_type: The model object.
-
-    #     Raises:
-    #         ValueError: If the model with the given name is not found in the model pool.
-    #     """
-    #     from langchain_community.llms import __getattr__
-
-    #     logger.info(
-    #         f"Querying model {model_name} from langchain community."
-    #     )
-    #     model = __getattr__(model_name)(*args, **kwargs)
-    #     model = self.refactor_model_class_if_invoke_class(model)
-
-    #     return model
-
-    def get_model_by_name(self, model_name: str) -> omni_model_type:
-        """
-        Retrieves a model from the model pool by its name.
-
-        Args:
-            model_name (str): The name of the model.
-
-        Returns:
-            omni_model_type: The model object.
-
-        Raises:
-            ValueError: If the model with the given name is not found in the model pool.
-        """
-        logger.info(f"Retrieving model {model_name} from model pool.")
-        for model in self.model_pool:
-            if model_name in [
-                model.name,
-                model.model_id,
-                model.model_name,
-            ]:
-                return model
-        raise ValueError(
-            f"Model {model_name} not found in model pool."
-        )
-
-    def get_multiple_models_by_name(
-        self, model_names: List[str]
-    ) -> List[omni_model_type]:
-        """
-        Retrieves multiple models from the model pool by their names.
-
-        Args:
-            model_names (List[str]): The names of the models.
-
-        Returns:
-            List[omni_model_type]: The list of model objects.
-
-        Raises:
-            ValueError: If any of the models with the given names are not found in the model pool.
-        """
-        logger.info(
-            f"Retrieving multiple models {model_names} from model pool."
-        )
-        models = []
-        for model_name in model_names:
-            models.append(self.get_model_by_name(model_name))
-        return models
-
-    def get_model_pool(self) -> List[omni_model_type]:
-        """
-        Retrieves the entire model pool.
-
-        Returns:
-            List[omni_model_type]: The list of model objects in the model pool.
-        """
-        return self.model_pool
-
-    def get_model_by_index(self, index: int) -> omni_model_type:
-        """
-        Retrieves a model from the model pool by its index.
-
-        Args:
-            index (int): The index of the model in the model pool.
-
-        Returns:
-            omni_model_type: The model object.
-
-        Raises:
-            IndexError: If the index is out of range.
-        """
-        return self.model_pool[index]
-
-    def get_model_by_id(self, model_id: str) -> omni_model_type:
-        """
-        Retrieves a model from the model pool by its ID.
-
-        Args:
-            model_id (str): The ID of the model.
-
-        Returns:
-            omni_model_type: The model object.
-
-        Raises:
-            ValueError: If the model with the given ID is not found in the model pool.
-        """
-        name = model_id
-        for model in self.model_pool:
-            if (
-                hasattr(model, "model_id")
-                and name == model.model_id
-                or hasattr(model, "model_name")
-                and name == model.model_name
-                or hasattr(model, "name")
-                and name == model.name
-                or hasattr(model, "model")
-                and name == model.model
-            ):
-                return model
-        raise ValueError(f"Model {model_id} not found in model pool.")
-
-    def refactor_model_class_if_invoke(self):
-        """
-        Refactors the model class if it has an 'invoke' method.
-
-        Checks to see if the model pool has a model with an 'invoke' method and refactors it to have a 'run' method and '__call__' method.
-
-        Returns:
-            str: A success message indicating that the model classes have been refactored.
-        """
-        for model in self.model_pool:
-            if hasattr(model, "invoke"):
-                model.run = model.invoke
-                model.__call__ = model.invoke
-                logger.info(
-                    f"Refactored model {model.name} to have run and __call__ methods."
-                )
-
-                # Update the model in the model pool
-                self.model_pool[self.model_pool.index(model)] = model
-
-        return "Model classes successfully refactored."
-
-    def refactor_model_class_if_invoke_class(
-        self, model: callable, *args, **kwargs
-    ) -> callable:
-        """
-        Refactors the model class if it has an 'invoke' method.
-
-        Checks to see if the model pool has a model with an 'invoke' method and refactors it to have a 'run' method and '__call__' method.
-
-        Returns:
-            str: A success message indicating that the model classes have been refactored.
-        """
-        if hasattr(model, "invoke"):
-            model.run = model.invoke
-            model.__call__ = model.invoke
-            logger.info(
-                f"Refactored model {model.name} to have run and __call__ methods."
-            )
-
-        return model
-
-    def find_model_by_name_and_run(
-        self,
-        model_name: str = None,
-        task: str = None,
-        *args,
-        **kwargs,
-    ) -> str:
-        """
-        Finds a model by its name and runs a task on it.
-
-        Args:
-            model_name (str): The name of the model.
-            task (str): The task to be run on the model.
-            *args: Additional positional arguments to be passed to the task.
-            **kwargs: Additional keyword arguments to be passed to the task.
-
-        Returns:
-            str: The result of running the task on the model.
-
-        Raises:
-            ValueError: If the model with the given name is not found in the model pool.
-        """
-        model = self.get_model_by_name(model_name)
-        return model.run(task, *args, **kwargs)
-
-
-# model = ModelRouter()
-# print(model.to_dict())
-# print(model.get_model_pool())
-# print(model.get_model_by_index(0))
-# print(model.get_model_by_id("stability-ai/stable-diffusion:"))
-# # print(model.get_multiple_models_by_name(["gpt-4o", "gpt-4"]))
diff --git a/swarms/models/model_types.py b/swarms/models/model_types.py
deleted file mode 100644
index 49b1ed9d..00000000
--- a/swarms/models/model_types.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from typing import List, Optional
-
-from pydantic import BaseModel
-
-
-class TextModality(BaseModel):
-    content: str
-
-
-class ImageModality(BaseModel):
-    url: str
-    alt_text: Optional[str] = None
-
-
-class AudioModality(BaseModel):
-    url: str
-    transcript: Optional[str] = None
-
-
-class VideoModality(BaseModel):
-    url: str
-    transcript: Optional[str] = None
-
-
-class MultimodalData(BaseModel):
-    text: Optional[List[TextModality]] = None
-    images: Optional[List[ImageModality]] = None
-    audio: Optional[List[AudioModality]] = None
-    video: Optional[List[VideoModality]] = None
diff --git a/swarms/models/moondream_mm.py b/swarms/models/moondream_mm.py
deleted file mode 100644
index c1db54fc..00000000
--- a/swarms/models/moondream_mm.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from PIL import Image
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class MoonDream(BaseMultiModalModel):
-    """
-    MoonDream is a multi-modal model that combines text and image inputs to generate descriptive answers for images.
-
-    Args:
-        model_name (str): The name or path of the pre-trained model to be used.
-        revision (str): The specific revision of the pre-trained model to be used.
-
-    Attributes:
-        model_name (str): The name or path of the pre-trained model.
-        revision (str): The specific revision of the pre-trained model.
-        model (AutoModelForCausalLM): The pre-trained model for generating answers.
-        tokenizer (AutoTokenizer): The tokenizer for processing text inputs.
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "vikhyatk/moondream2",
-        revision: str = "2024-03-04",
-        system_prompt: str = None,
-        *args,
-        **kwargs,
-    ):
-        super().__init__()
-        self.model_name = model_name
-        self.revision = revision
-        self.system_prompt = system_prompt
-
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            trust_remote_code=True,
-            revision=revision,
-            *args,
-            **kwargs,
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name, revision=revision
-        )
-
-    def run(self, task: str, img: str):
-        """
-        Runs the MoonDream model to generate a descriptive answer for the given image.
-
-        Args:
-            task (str): The task or question related to the image.
-            img (str): The path or URL of the image file.
-
-        Returns:
-            str: The descriptive answer generated by the MoonDream model.
-
-        """
-        image = Image.open(img)
-        enc_image = self.model.encode_image(image)
-        return self.model.answer_question(
-            enc_image, f"{self.system_propmpt} {task}", self.tokenizer
-        )
diff --git a/swarms/models/nougat.py b/swarms/models/nougat.py
deleted file mode 100644
index 9cba23f2..00000000
--- a/swarms/models/nougat.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
-Nougat by Meta
-
-Good for:
-- transcribe Scientific PDFs into an easy to use markdown
-format
-- Extracting information from PDFs
-- Extracting metadata from pdfs
-
-"""
-
-import re
-
-import torch
-from PIL import Image
-from transformers import NougatProcessor, VisionEncoderDecoderModel
-
-
-class Nougat:
-    """
-    Nougat
-
-    Args:
-        model_name_or_path: str, default="facebook/nougat-base"
-        min_length: int, default=1
-        max_new_tokens: int, default=30
-
-    Usage:
-    >>> from swarms.models.nougat import Nougat
-    >>> nougat = Nougat()
-    >>> nougat("path/to/image.png")
-
-
-    """
-
-    def __init__(
-        self,
-        model_name_or_path="facebook/nougat-base",
-        min_length: int = 1,
-        max_new_tokens: int = 5000,
-    ):
-        self.model_name_or_path = model_name_or_path
-        self.min_length = min_length
-        self.max_new_tokens = max_new_tokens
-
-        self.processor = NougatProcessor.from_pretrained(
-            self.model_name_or_path
-        )
-        self.model = VisionEncoderDecoderModel.from_pretrained(
-            self.model_name_or_path
-        )
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(self.device)
-
-    def get_image(self, img: str):
-        """Get an image from a path"""
-        img = Image.open(img)
-
-        if img.mode == "L":
-            img = img.convert("RGB")
-        return img
-
-    def __call__(self, img: str, *args, **kwargs):
-        """Call the model with an image_path str as an input"""
-        image = Image.open(img)
-        pixel_values = self.processor(
-            image, return_tensors="pt"
-        ).pixel_values
-
-        # Generate transcriptions, here we only generate 30 tokens
-        outputs = self.model.generate(
-            pixel_values.to(self.device),
-            min_length=self.min_length,
-            max_new_tokens=self.max_new_tokens,
-            *args,
-            **kwargs,
-        )
-
-        sequence = self.processor.batch_decode(
-            outputs, skip_special_tokens=True
-        )[0]
-        sequence = self.processor.post_process_generation(
-            sequence, fix_markdown=False
-        )
-
-        out = print(sequence)
-        return out
-
-    def clean_nougat_output(raw_output):
-        """Clean the output from nougat to be more readable"""
-        # Define the pattern to extract the relevant data
-        daily_balance_pattern = (
-            r"\*\*(\d{2}/\d{2}/\d{4})\*\*\n\n\*\*([\d,]+\.\d{2})\*\*"
-        )
-
-        # Find all matches of the pattern
-        matches = re.findall(daily_balance_pattern, raw_output)
-
-        # Convert the matches to a readable format
-        cleaned_data = [
-            f"Date: {date}, Amount: {amount.replace(',', '')}"
-            for date, amount in matches
-        ]
-
-        # Join the cleaned data with new lines for readability
-        return "\n".join(cleaned_data)
diff --git a/swarms/models/ollama_model 2.py b/swarms/models/ollama_model 2.py
deleted file mode 100644
index 1604ee5d..00000000
--- a/swarms/models/ollama_model 2.py	
+++ /dev/null
@@ -1,135 +0,0 @@
-from loguru import logger
-import subprocess
-from pydantic import BaseModel, Field, ValidationError
-from typing import List, Optional, Union
-
-
-try:
-    import ollama
-except ImportError:
-    logger.error("Failed to import ollama")
-    subprocess.run(["pip", "install", "ollama"])
-    import ollama
-
-
-class Message(BaseModel):
-    role: str = Field(
-        ...,
-        regex="^(user|system|assistant)$",
-        description="The role of the message sender.",
-    )
-    content: str = Field(
-        ..., min_length=1, description="The content of the message."
-    )
-
-
-class OllamaModel:
-    def __init__(
-        self,
-        model_name: str,
-        host: Optional[str] = None,
-        timeout: int = 30,
-        stream: bool = False,
-    ):
-        """
-        Initializes the OllamaModel with the model name and optional parameters.
-
-        Args:
-            model_name (str): The name of the model to interact with (e.g., 'llama3.1').
-            host (str, optional): The Ollama host to connect to. Defaults to None.
-            timeout (int, optional): Timeout for the requests. Defaults to 30 seconds.
-            stream (bool, optional): Enable streaming for responses. Defaults to False.
-        """
-        self.model_name = model_name
-        self.host = host
-        self.timeout = timeout
-        self.stream = stream
-
-        self.client = ollama.Client(host=host) if host else None
-
-    def validate_messages(
-        self, messages: List[Message]
-    ) -> List[dict]:
-        """
-        Validates the list of messages using Pydantic schema.
-
-        Args:
-            messages (List[Message]): List of messages to validate.
-
-        Returns:
-            List[dict]: Validated messages in dictionary format.
-        """
-        try:
-            return [message.dict() for message in messages]
-        except ValidationError as e:
-            print(f"Validation error: {e}")
-            return []
-
-    def chat(
-        self, messages: List[Message], *args, **kwargs
-    ) -> Union[str, None]:
-        """Executes the chat task."""
-        validated_messages = self.validate_messages(messages)
-        if not validated_messages:
-            return None
-
-        if self.stream:
-            stream = ollama.chat(
-                model=self.model_name,
-                messages=validated_messages,
-                stream=True,
-                *args,
-                **kwargs,
-            )
-            for chunk in stream:
-                print(chunk["message"]["content"], end="", flush=True)
-        else:
-            response = ollama.chat(
-                model=self.model_name, messages=validated_messages
-            )
-            return response["message"]["content"]
-
-    def generate(self, prompt: str) -> Optional[str]:
-        """Generates text based on a prompt."""
-        if len(prompt) == 0:
-            print("Prompt cannot be empty.")
-            return None
-
-        response = ollama.generate(
-            model=self.model_name, prompt=prompt
-        )
-        return response.get("message", {}).get("content", None)
-
-    def list_models(self) -> List[str]:
-        """Lists available models."""
-        return ollama.list()
-
-    def show_model(self) -> dict:
-        """Shows details of the current model."""
-        return ollama.show(self.model_name)
-
-    def create_model(self, modelfile: str) -> dict:
-        """Creates a new model from a modelfile."""
-        return ollama.create(
-            model=self.model_name, modelfile=modelfile
-        )
-
-    def delete_model(self) -> bool:
-        """Deletes the current model."""
-        try:
-            ollama.delete(self.model_name)
-            return True
-        except ollama.ResponseError as e:
-            print(f"Error deleting model: {e}")
-            return False
-
-    def run(self, task: str, *args, **kwargs):
-        """
-        Executes the task based on the task string.
-
-        Args:
-            task (str): The task to execute, such as 'chat', 'generate', etc.
-        """
-        return ollama.generate(
-            model=self.model_name, prompt=task, *args, **kwargs
-        )
diff --git a/swarms/models/ollama_model.py b/swarms/models/ollama_model.py
deleted file mode 100644
index 1604ee5d..00000000
--- a/swarms/models/ollama_model.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from loguru import logger
-import subprocess
-from pydantic import BaseModel, Field, ValidationError
-from typing import List, Optional, Union
-
-
-try:
-    import ollama
-except ImportError:
-    logger.error("Failed to import ollama")
-    subprocess.run(["pip", "install", "ollama"])
-    import ollama
-
-
-class Message(BaseModel):
-    role: str = Field(
-        ...,
-        regex="^(user|system|assistant)$",
-        description="The role of the message sender.",
-    )
-    content: str = Field(
-        ..., min_length=1, description="The content of the message."
-    )
-
-
-class OllamaModel:
-    def __init__(
-        self,
-        model_name: str,
-        host: Optional[str] = None,
-        timeout: int = 30,
-        stream: bool = False,
-    ):
-        """
-        Initializes the OllamaModel with the model name and optional parameters.
-
-        Args:
-            model_name (str): The name of the model to interact with (e.g., 'llama3.1').
-            host (str, optional): The Ollama host to connect to. Defaults to None.
-            timeout (int, optional): Timeout for the requests. Defaults to 30 seconds.
-            stream (bool, optional): Enable streaming for responses. Defaults to False.
-        """
-        self.model_name = model_name
-        self.host = host
-        self.timeout = timeout
-        self.stream = stream
-
-        self.client = ollama.Client(host=host) if host else None
-
-    def validate_messages(
-        self, messages: List[Message]
-    ) -> List[dict]:
-        """
-        Validates the list of messages using Pydantic schema.
-
-        Args:
-            messages (List[Message]): List of messages to validate.
-
-        Returns:
-            List[dict]: Validated messages in dictionary format.
-        """
-        try:
-            return [message.dict() for message in messages]
-        except ValidationError as e:
-            print(f"Validation error: {e}")
-            return []
-
-    def chat(
-        self, messages: List[Message], *args, **kwargs
-    ) -> Union[str, None]:
-        """Executes the chat task."""
-        validated_messages = self.validate_messages(messages)
-        if not validated_messages:
-            return None
-
-        if self.stream:
-            stream = ollama.chat(
-                model=self.model_name,
-                messages=validated_messages,
-                stream=True,
-                *args,
-                **kwargs,
-            )
-            for chunk in stream:
-                print(chunk["message"]["content"], end="", flush=True)
-        else:
-            response = ollama.chat(
-                model=self.model_name, messages=validated_messages
-            )
-            return response["message"]["content"]
-
-    def generate(self, prompt: str) -> Optional[str]:
-        """Generates text based on a prompt."""
-        if len(prompt) == 0:
-            print("Prompt cannot be empty.")
-            return None
-
-        response = ollama.generate(
-            model=self.model_name, prompt=prompt
-        )
-        return response.get("message", {}).get("content", None)
-
-    def list_models(self) -> List[str]:
-        """Lists available models."""
-        return ollama.list()
-
-    def show_model(self) -> dict:
-        """Shows details of the current model."""
-        return ollama.show(self.model_name)
-
-    def create_model(self, modelfile: str) -> dict:
-        """Creates a new model from a modelfile."""
-        return ollama.create(
-            model=self.model_name, modelfile=modelfile
-        )
-
-    def delete_model(self) -> bool:
-        """Deletes the current model."""
-        try:
-            ollama.delete(self.model_name)
-            return True
-        except ollama.ResponseError as e:
-            print(f"Error deleting model: {e}")
-            return False
-
-    def run(self, task: str, *args, **kwargs):
-        """
-        Executes the task based on the task string.
-
-        Args:
-            task (str): The task to execute, such as 'chat', 'generate', etc.
-        """
-        return ollama.generate(
-            model=self.model_name, prompt=task, *args, **kwargs
-        )
diff --git a/swarms/models/open_dalle.py b/swarms/models/open_dalle.py
deleted file mode 100644
index 57e8846b..00000000
--- a/swarms/models/open_dalle.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from typing import Any, Optional
-
-import torch
-from diffusers import AutoPipelineForText2Image
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class OpenDalle(BaseMultiModalModel):
-    """OpenDalle model class
-
-    Attributes:
-        model_name (str): The name or path of the model to be used. Defaults to "dataautogpt3/OpenDalleV1.1".
-        torch_dtype (torch.dtype): The torch data type to be used. Defaults to torch.float16.
-        device (str): The device to be used for computation. Defaults to "cuda".
-
-    Examples:
-        >>> from swarms.models.open_dalle import OpenDalle
-        >>> od = OpenDalle()
-        >>> od.run("A picture of a cat")
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "dataautogpt3/OpenDalleV1.1",
-        torch_dtype: Any = torch.float16,
-        device: str = "cuda",
-        *args,
-        **kwargs,
-    ):
-        """
-        Initializes the OpenDalle model.
-
-        Args:
-            model_name (str, optional): The name or path of the model to be used. Defaults to "dataautogpt3/OpenDalleV1.1".
-            torch_dtype (torch.dtype, optional): The torch data type to be used. Defaults to torch.float16.
-            device (str, optional): The device to be used for computation. Defaults to "cuda".
-            *args: Variable length argument list.
-            **kwargs: Arbitrary keyword arguments.
-        """
-        self.pipeline = AutoPipelineForText2Image.from_pretrained(
-            model_name, torch_dtype=torch_dtype, *args, **kwargs
-        ).to(device)
-
-    def run(self, task: Optional[str] = None, *args, **kwargs):
-        """Run the OpenDalle model
-
-        Args:
-            task (str, optional): The task to be performed. Defaults to None.
-            *args: Variable length argument list.
-            **kwargs: Arbitrary keyword arguments.
-
-        Returns:
-            [type]: [description]
-        """
-        try:
-            if task is None:
-                raise ValueError("Task cannot be None")
-            if not isinstance(task, str):
-                raise TypeError("Task must be a string")
-            if len(task) < 1:
-                raise ValueError("Task cannot be empty")
-            return self.pipeline(task, *args, **kwargs).images[0]
-        except Exception as error:
-            print(f"[ERROR][OpenDalle] {error}")
-            raise error
diff --git a/swarms/models/open_router.py b/swarms/models/open_router.py
deleted file mode 100644
index 4140b736..00000000
--- a/swarms/models/open_router.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from swarms.models.base_llm import BaseLLM
-from pydantic import BaseModel
-from typing import List, Dict
-import openai
-
-
-class OpenRouterRequest(BaseModel):
-    model: str
-    messages: List[Dict[str, str]] = []
-
-
-class OpenRouterChat(BaseLLM):
-    """
-    A class representing an OpenRouter chat model.
-
-    Args:
-        model_name (str): The name of the OpenRouter model.
-        base_url (str, optional): The base URL for the OpenRouter API. Defaults to "https://openrouter.ai/api/v1/chat/completions".
-        openrouter_api_key (str, optional): The API key for accessing the OpenRouter API. Defaults to None.
-        system_prompt (str, optional): The system prompt for the chat model. Defaults to None.
-        *args: Variable length argument list.
-        **kwargs: Arbitrary keyword arguments.
-
-    Attributes:
-        model_name (str): The name of the OpenRouter model.
-        base_url (str): The base URL for the OpenRouter API.
-        openrouter_api_key (str): The API key for accessing the OpenRouter API.
-        system_prompt (str): The system prompt for the chat model.
-
-    Methods:
-        run(task, *args, **kwargs): Runs the chat model with the given task.
-
-    """
-
-    def __init__(
-        self,
-        model_name: str,
-        base_url: str = "https://openrouter.ai/api/v1/chat/completions",
-        openrouter_api_key: str = None,
-        system_prompt: str = None,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.base_url = base_url
-        self.openrouter_api_key = openrouter_api_key
-        self.system_prompt = system_prompt
-
-        openai.api_base = "https://openrouter.ai/api/v1"
-        openai.api_key = openrouter_api_key
-
-    def run(self, task: str, *args, **kwargs) -> str:
-        """
-        Runs the chat model with the given task.
-
-        Args:
-            task (str): The user's task for the chat model.
-            *args: Variable length argument list.
-            **kwargs: Arbitrary keyword arguments.
-
-        Returns:
-            str: The response generated by the chat model.
-
-        """
-        response = openai.ChatCompletion.create(
-            model=self.model_name,
-            messages=[
-                {"role": "system", "content": self.system_prompt},
-                {"role": "user", "content": task},
-            ]
-            * args,
-            **kwargs,
-        )
-        return response.choices[0].message.text
diff --git a/swarms/models/openai_embeddings.py b/swarms/models/openai_embeddings.py
deleted file mode 100644
index 0193f0cc..00000000
--- a/swarms/models/openai_embeddings.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from langchain_community.embeddings.openai import OpenAIEmbeddings
-
-__all__ = [
-    "OpenAIEmbeddings",
-]
diff --git a/swarms/models/openai_function_caller.py b/swarms/models/openai_function_caller.py
deleted file mode 100644
index ba77829f..00000000
--- a/swarms/models/openai_function_caller.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import openai
-from pydantic import BaseModel
-import os
-from swarms.utils.loguru_logger import logger
-from swarms.models.base_llm import BaseLLM
-from typing import List
-
-
-class OpenAIFunctionCaller(BaseLLM):
-    """
-    A class that represents a caller for OpenAI chat completions.
-
-    Args:
-        system_prompt (str): The system prompt to be used in the chat completion.
-        model_name (str): The name of the OpenAI model to be used.
-        max_tokens (int): The maximum number of tokens in the generated completion.
-        temperature (float): The temperature parameter for randomness in the completion.
-        base_model (BaseModel): The base model to be used for the completion.
-        openai_api_key (str): The API key for accessing the OpenAI service.
-        parallel_tool_calls (bool): Whether to make parallel tool calls.
-        top_p (float): The top-p parameter for nucleus sampling in the completion.
-
-    Attributes:
-        system_prompt (str): The system prompt to be used in the chat completion.
-        model_name (str): The name of the OpenAI model to be used.
-        max_tokens (int): The maximum number of tokens in the generated completion.
-        temperature (float): The temperature parameter for randomness in the completion.
-        base_model (BaseModel): The base model to be used for the completion.
-        parallel_tool_calls (bool): Whether to make parallel tool calls.
-        top_p (float): The top-p parameter for nucleus sampling in the completion.
-        client (openai.OpenAI): The OpenAI client for making API calls.
-
-    Methods:
-        check_api_key: Checks if the API key is provided and retrieves it from the environment if not.
-        run: Runs the chat completion with the given task and returns the generated completion.
-
-    """
-
-    def __init__(
-        self,
-        system_prompt: str = None,
-        model_name: str = "gpt-4o-2024-08-06",
-        max_tokens: int = 4000,
-        temperature: float = 0.4,
-        base_model: BaseModel = None,
-        openai_api_key: str = None,
-        parallel_tool_calls: bool = False,
-        top_p: float = 0.9,
-        *args,
-        **kwargs,
-    ):
-        super().__init__()
-        self.system_prompt = system_prompt
-        self.model_name = model_name
-        self.max_tokens = max_tokens
-        self.temperature = temperature
-        self.openai_api_key = openai_api_key
-        self.base_model = base_model
-        self.parallel_tool_calls = parallel_tool_calls
-        self.top_p = top_p
-        self.client = openai.OpenAI(api_key=self.check_api_key())
-
-    def check_api_key(self) -> str:
-        """
-        Checks if the API key is provided and retrieves it from the environment if not.
-
-        Returns:
-            str: The API key.
-
-        """
-        if self.openai_api_key is None:
-            self.openai_api_key = os.getenv("OPENAI_API_KEY")
-
-        return self.openai_api_key
-
-    def run(self, task: str, *args, **kwargs) -> dict:
-        """
-        Runs the chat completion with the given task and returns the generated completion.
-
-        Args:
-            task (str): The user's task for the chat completion.
-            *args: Additional positional arguments to be passed to the OpenAI API.
-            **kwargs: Additional keyword arguments to be passed to the OpenAI API.
-
-        Returns:
-            str: The generated completion.
-
-        """
-        try:
-            completion = self.client.beta.chat.completions.parse(
-                model=self.model_name,
-                messages=[
-                    {"role": "system", "content": self.system_prompt},
-                    {"role": "user", "content": task},
-                ],
-                max_tokens=self.max_tokens,
-                temperature=self.temperature,
-                response_format=self.base_model,
-                parallel_tool_calls=self.parallel_tool_calls,
-                tools=(
-                    [openai.pydantic_function_tool(self.base_model)]
-                ),
-                *args,
-                **kwargs,
-            )
-
-            out = (
-                completion.choices[0]
-                .message.tool_calls[0]
-                .function.arguments
-            )
-
-            # Conver str to dict
-            # print(out)
-            out = eval(out)
-            return out
-        except Exception as error:
-            logger.error(
-                f"Error in running OpenAI chat completion: {error}"
-            )
-            return None
-
-    def convert_to_dict_from_base_model(
-        self, base_model: BaseModel
-    ) -> dict:
-        return openai.pydantic_function_tool(base_model)
-
-    def convert_list_of_base_models(
-        self, base_models: List[BaseModel]
-    ):
-        """
-        Converts a list of BaseModels to a list of dictionaries.
-
-        Args:
-            base_models (List[BaseModel]): A list of BaseModels to be converted.
-
-        Returns:
-            List[Dict]: A list of dictionaries representing the converted BaseModels.
-        """
-        return [
-            self.convert_to_dict_from_base_model(base_model)
-            for base_model in base_models
-        ]
-
-
-# def agents_list(
-#     agents: List[Agent] = None,
-# ) -> str:
-#     responses = []
-
-#     for agent in agents:
-#         name = agent.agent_name
-#         description = agent.description
-#         response = f"Agent Name {name}: Description {description}"
-#         responses.append(response)
-
-#     return concat_strings(responses)
-
-
-# class HierarchicalOrderCall(BaseModel):
-#     agent_name: str
-#     task: str
-
-
-# # Example usage:
-# # Initialize the function caller
-# function_caller = OpenAIFunctionCaller(
-#     system_prompt="You are a helpful assistant.",
-#     openai_api_key="","
-#     max_tokens=500,
-#     temperature=0.5,
-#     base_model=HierarchicalOrderCall,
-# )
-
-# # Run the function caller
-# response = function_caller.run(
-#     "Send an order to the financial agent twice"
-# )
-# print(response)
diff --git a/swarms/models/openai_tts.py b/swarms/models/openai_tts.py
deleted file mode 100644
index f3e8b850..00000000
--- a/swarms/models/openai_tts.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import os
-import subprocess
-import sys
-
-import requests
-from dotenv import load_dotenv
-
-from swarms.models.base_llm import BaseLLM
-
-try:
-    import wave
-except ImportError as error:
-    print(f"Import Error: {error} - Please install pyaudio")
-    subprocess.check_call(
-        [sys.executable, "-m", "pip", "install", "pyaudio"]
-    )
-
-
-# Load .env file
-load_dotenv()
-
-
-# OpenAI API Key env
-def openai_api_key_env():
-    openai_api_key = os.getenv("OPENAI_API_KEY")
-    return openai_api_key
-
-
-class OpenAITTS(BaseLLM):
-    """OpenAI TTS model
-
-    Attributes:
-        model_name (str): _description_
-        proxy_url (str): _description_
-        openai_api_key (str): _description_
-        voice (str): _description_
-        chunk_size (_type_): _description_
-
-    Methods:
-        run: _description_
-
-
-    Examples:
-    >>> from swarms.models.openai_tts import OpenAITTS
-    >>> tts = OpenAITTS(
-    ...     model_name = "tts-1-1106",
-    ...     proxy_url = "https://api.openai.com/v1/audio/speech",
-    ...     openai_api_key = openai_api_key_env,
-    ...     voice = "onyx",
-    ... )
-    >>> tts.run("Hello world")
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "tts-1-1106",
-        proxy_url: str = "https://api.openai.com/v1/audio/speech",
-        openai_api_key: str = openai_api_key_env,
-        voice: str = "onyx",
-        chunk_size=1024 * 1024,
-        autosave: bool = False,
-        saved_filepath: str = None,
-        *args,
-        **kwargs,
-    ):
-        super().__init__()
-        self.model_name = model_name
-        self.proxy_url = proxy_url
-        self.openai_api_key = openai_api_key
-        self.voice = voice
-        self.chunk_size = chunk_size
-        self.autosave = autosave
-        self.saved_filepath = saved_filepath
-
-        self.saved_filepath = "runs/tts_speech.wav"
-
-    def run(self, task: str, *args, **kwargs):
-        """Run the tts model
-
-        Args:
-            task (str): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        response = requests.post(
-            self.proxy_url,
-            headers={
-                "Authorization": f"Bearer {self.openai_api_key}",
-            },
-            json={
-                "model": self.model_name,
-                "input": task,
-                "voice": self.voice,
-            },
-        )
-
-        audio = b""
-        for chunk in response.iter_content(chunk_size=1024 * 1024):
-            audio += chunk
-        return audio
-
-    def run_and_save(self, task: str = None, *args, **kwargs):
-        """Run the TTS model and save the output to a file.
-
-        Args:
-            task (str): The text to be converted to speech.
-            filename (str): The path to the file where the speech will be saved.
-
-        Returns:
-            bytes: The speech data.
-        """
-        # Run the TTS model.
-        speech_data = self.run(task)
-
-        # Save the speech data to a file.
-        with wave.open(self.saved_filepath, "wb") as file:
-            file.setnchannels(1)
-            file.setsampwidth(2)
-            file.setframerate(22050)
-            file.writeframes(speech_data)
-
-        return speech_data
diff --git a/swarms/models/palm.py b/swarms/models/palm.py
deleted file mode 100644
index 301ce1bd..00000000
--- a/swarms/models/palm.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from langchain_community.llms.google_palm import GooglePalm
-
-__all__ = [
-    "GooglePalm",
-]
diff --git a/swarms/models/popular_llms.py b/swarms/models/popular_llms.py
deleted file mode 100644
index 852c56b4..00000000
--- a/swarms/models/popular_llms.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from langchain_community.chat_models.azure_openai import (
-    AzureChatOpenAI,
-)
-from langchain_community.chat_models.openai import (
-    ChatOpenAI as OpenAIChat,
-)
-from langchain_community.llms.anthropic import Anthropic
-from langchain_community.llms.cohere import Cohere
-from langchain_community.llms.mosaicml import MosaicML
-from langchain_community.llms.openai import (
-    OpenAI,
-)  # , OpenAIChat, AzureOpenAI
-from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
-from langchain_community.llms.replicate import Replicate
-from langchain_community.llms.fireworks import Fireworks  # noqa: F401
-
-
-class Anthropic(Anthropic):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class CohereChat(Cohere):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class MosaicMLChat(MosaicML):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class OpenAILLM(OpenAI):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class ReplicateChat(Replicate):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class AzureOpenAILLM(AzureChatOpenAI):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class OpenAIChatLLM(OpenAIChat):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def __call__(self, *args, **kwargs):
-        out = self.invoke(*args, **kwargs)
-        return out.content.strip()
-
-    def run(self, *args, **kwargs):
-        out = self.invoke(*args, **kwargs)
-        return out.content.strip()
-
-
-class OctoAIChat(OctoAIEndpoint):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-
-class FireWorksAI(Fireworks):
-    def __call__(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
-
-    def run(self, *args, **kwargs):
-        return self.invoke(*args, **kwargs)
diff --git a/swarms/models/qwen.py b/swarms/models/qwen.py
deleted file mode 100644
index b5a4ed1a..00000000
--- a/swarms/models/qwen.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Optional, Tuple
-
-from PIL import Image
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-@dataclass
-class QwenVLMultiModal(BaseMultiModalModel):
-    """
-    QwenVLMultiModal is a class that represents a multi-modal model for Qwen chatbot.
-    It inherits from the BaseMultiModalModel class.
-
-
-    Args:
-        model_name (str): The name of the model to be used.
-        device (str): The device to run the model on.
-        args (tuple): Additional positional arguments.
-        kwargs (dict): Additional keyword arguments.
-        quantize (bool): A flag to indicate whether to quantize the model.
-        return_bounding_boxes (bool): A flag to indicate whether to return bounding boxes for the image.
-
-
-    Examples:
-    >>> qwen = QwenVLMultiModal()
-    >>> response = qwen.run("Hello", "https://example.com/image.jpg")
-    >>> print(response)
-    """
-
-    model_name: str = "Qwen/Qwen-VL"
-    device: str = "cuda"
-    args: tuple = field(default_factory=tuple)
-    kwargs: dict = field(default_factory=dict)
-    quantize: bool = False
-    return_bounding_boxes: bool = False
-
-    def __post_init__(self):
-        """
-        Initializes the QwenVLMultiModal object.
-        It initializes the tokenizer and the model for the Qwen chatbot.
-        """
-
-        if self.quantize:
-            self.model_name = "Qwen/Qwen-VL-Chat-Int4"
-
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            self.model_name, trust_remote_code=True
-        )
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_name,
-            device_map=self.device,
-            trust_remote_code=True,
-        ).eval()
-
-    def run(
-        self, text: str, img: str, *args, **kwargs
-    ) -> Tuple[Optional[str], Optional[Image.Image]]:
-        """
-        Runs the Qwen chatbot model on the given text and image inputs.
-
-        Args:
-            text (str): The input text for the chatbot.
-            img (str): The input image for the chatbot.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            Tuple[Optional[str], Optional[Image.Image]]: A tuple containing the response generated by the chatbot
-            and the image associated with the response (if any).
-        """
-        try:
-            if self.return_bounding_boxes:
-                query = self.tokenizer.from_list_format(
-                    [
-                        {"image": img, "text": text},
-                    ]
-                )
-
-                inputs = self.tokenizer(query, return_tensors="pt")
-                inputs = inputs.to(self.model.device)
-                pred = self.model.generate(**inputs)
-                response = self.tokenizer.decode(
-                    pred.cpu()[0], skip_special_tokens=False
-                )
-
-                image_bb = self.tokenizer.draw_bbox_on_latest_picture(
-                    response
-                )
-
-                if image_bb:
-                    image_bb.save("output.jpg")
-                else:
-                    print("No bounding boxes found in the image.")
-
-                return response, image_bb
-            else:
-                query = self.tokenizer.from_list_format(
-                    [
-                        {"image": img, "text": text},
-                    ]
-                )
-
-                inputs = self.tokenizer(query, return_tensors="pt")
-                inputs = inputs.to(self.model.device)
-                pred = self.model.generate(**inputs)
-                response = self.tokenizer.decode(
-                    pred.cpu()[0], skip_special_tokens=False
-                )
-                return response
-        except Exception as error:
-            print(f"[ERROR]: [QwenVLMultiModal]: {error}")
-
-    def chat(
-        self, text: str, img: str, *args, **kwargs
-    ) -> tuple[str, list]:
-        """
-        Chat with the model using text and image inputs.
-
-        Args:
-            text (str): The text input for the chat.
-            img (str): The image input for the chat.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            tuple[str, list]: A tuple containing the response and chat history.
-
-        Raises:
-            Exception: If an error occurs during the chat.
-
-        """
-        try:
-            response, history = self.model.chat(
-                self.tokenizer,
-                query=f"<img>{img}</img>这是什么",
-                history=None,
-            )
-            return response, history
-        except Exception as e:
-            raise Exception(
-                "An error occurred during the chat."
-            ) from e
diff --git a/swarms/models/sam.py b/swarms/models/sam.py
deleted file mode 100644
index f47d5a89..00000000
--- a/swarms/models/sam.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from typing import List
-
-import requests
-import torch
-from PIL import Image
-from transformers import SamModel, SamProcessor
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-
-class SAM:
-    """
-    Class representing the SAM (Segmentation and Masking) model.
-
-    Args:
-        model_name (str): The name of the pre-trained SAM model. Default is "facebook/sam-vit-huge".
-        device (torch.device): The device to run the model on. Default is the current device.
-        input_points (List[List[int]]): The 2D location of a window in the image to segment. Default is [[450, 600]].
-        *args: Additional positional arguments.
-        **kwargs: Additional keyword arguments.
-
-    Attributes:
-        model_name (str): The name of the pre-trained SAM model.
-        device (torch.device): The device to run the model on.
-        input_points (List[List[int]]): The 2D location of a window in the image to segment.
-        model (SamModel): The pre-trained SAM model.
-        processor (SamProcessor): The processor for the SAM model.
-
-    Methods:
-        run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks.
-        process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image.
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "facebook/sam-vit-huge",
-        device=device,
-        input_points: List[List[int]] = [[450, 600]],
-        *args,
-        **kwargs,
-    ):
-        self.model_name = model_name
-        self.device = device
-        self.input_points = input_points
-
-        self.model = SamModel.from_pretrained(
-            model_name, *args, **kwargs
-        ).to(device)
-
-        self.processor = SamProcessor.from_pretrained(model_name)
-
-    def run(self, task: str = None, img: str = None, *args, **kwargs):
-        """
-        Runs the SAM model on the given image and returns the segmentation scores and masks.
-
-        Args:
-            task: The task to perform. Not used in this method.
-            img: The input image to segment.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            Tuple: A tuple containing the segmentation scores and masks.
-
-        """
-        img = self.process_img(img)
-
-        # Specify the points of the mask to segment
-        input_points = [
-            self.input_points
-        ]  # 2D location of a window in the image
-
-        # Preprocess the image
-        inputs = self.processor(
-            img, input_points=input_points, return_tensors="pt"
-        ).to(device)
-
-        with torch.no_grad():
-            outputs = self.model(**inputs)  # noqa: E999
-
-        masks = self.processor.image_processor.post_process_masks(
-            outputs.pred_masks.cpu(),
-            inputs["original_sizes"].cpu(),
-            inputs["reshaped_input_sizes"].cpu(),
-        )
-        scores = outputs.iou_scores
-
-        return scores, masks
-
-    def process_img(self, img: str = None, *args, **kwargs):
-        """
-        Processes the input image and returns the processed image.
-
-        Args:
-            img (str): The URL or file path of the input image.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            Image: The processed image.
-
-        """
-        raw_image = Image.open(
-            requests.get(img, stream=True, *args, **kwargs).raw
-        ).convert("RGB")
-
-        return raw_image
diff --git a/swarms/models/sampling_params.py b/swarms/models/sampling_params.py
deleted file mode 100644
index d231c295..00000000
--- a/swarms/models/sampling_params.py
+++ /dev/null
@@ -1,300 +0,0 @@
-"""Sampling parameters for text generation."""
-
-from enum import IntEnum
-from functools import cached_property
-from typing import Callable, List, Optional, Union
-
-import torch
-
-_SAMPLING_EPS = 1e-5
-
-
-class SamplingType(IntEnum):
-    GREEDY = 0
-    RANDOM = 1
-    BEAM = 2
-
-
-LogitsProcessor = Callable[[List[int], torch.Tensor], torch.Tensor]
-"""LogitsProcessor is a function that takes a list of previously generated
-tokens and a tensor of the logits for the next token, and returns a modified
-tensor of logits to sample from."""
-
-
-class SamplingParams:
-    """Sampling parameters for text generation.
-
-    Overall, we follow the sampling parameters from the OpenAI text completion
-    API (https://platform.openai.com/docs/api-reference/completions/create).
-    In addition, we support beam search, which is not supported by OpenAI.
-
-    Args:
-        n: Number of output sequences to return for the given prompt.
-        best_of: Number of output sequences that are generated from the prompt.
-            From these `best_of` sequences, the top `n` sequences are returned.
-            `best_of` must be greater than or equal to `n`. This is treated as
-            the beam width when `use_beam_search` is True. By default, `best_of`
-            is set to `n`.
-        presence_penalty: Float that penalizes new tokens based on whether they
-            appear in the generated text so far. Values > 0 encourage the model
-            to use new tokens, while values < 0 encourage the model to repeat
-            tokens.
-        frequency_penalty: Float that penalizes new tokens based on their
-            frequency in the generated text so far. Values > 0 encourage the
-            model to use new tokens, while values < 0 encourage the model to
-            repeat tokens.
-        repetition_penalty: Float that penalizes new tokens based on whether
-            they appear in the prompt and the generated text so far. Values > 1
-            encourage the model to use new tokens, while values < 1 encourage
-            the model to repeat tokens.
-        temperature: Float that controls the randomness of the sampling. Lower
-            values make the model more deterministic, while higher values make
-            the model more random. Zero means greedy sampling.
-        top_p: Float that controls the cumulative probability of the top tokens
-            to consider. Must be in (0, 1]. Set to 1 to consider all tokens.
-        top_k: Integer that controls the number of top tokens to consider. Set
-            to -1 to consider all tokens.
-        min_p: Float that represents the minimum probability for a token to be
-            considered, relative to the probability of the most likely token.
-            Must be in [0, 1]. Set to 0 to disable this.
-        use_beam_search: Whether to use beam search instead of sampling.
-        length_penalty: Float that penalizes sequences based on their length.
-            Used in beam search.
-        early_stopping: Controls the stopping condition for beam search. It
-            accepts the following values: `True`, where the generation stops as
-            soon as there are `best_of` complete candidates; `False`, where an
-            heuristic is applied and the generation stops when is it very
-            unlikely to find better candidates; `"never"`, where the beam search
-            procedure only stops when there cannot be better candidates
-            (canonical beam search algorithm).
-        stop: List of strings that stop the generation when they are generated.
-            The returned output will not contain the stop strings.
-        stop_token_ids: List of tokens that stop the generation when they are
-            generated. The returned output will contain the stop tokens unless
-            the stop tokens are special tokens.
-        include_stop_str_in_output: Whether to include the stop strings in output
-            text. Defaults to False.
-        ignore_eos: Whether to ignore the EOS token and continue generating
-            tokens after the EOS token is generated.
-        max_tokens: Maximum number of tokens to generate per output sequence.
-        logprobs: Number of log probabilities to return per output token.
-            Note that the implementation follows the OpenAI API: The return
-            result includes the log probabilities on the `logprobs` most likely
-            tokens, as well the chosen tokens. The API will always return the
-            log probability of the sampled token, so there  may be up to
-            `logprobs+1` elements in the response.
-        prompt_logprobs: Number of log probabilities to return per prompt token.
-        skip_special_tokens: Whether to skip special tokens in the output.
-        spaces_between_special_tokens: Whether to add spaces between special
-            tokens in the output.  Defaults to True.
-        logits_processors: List of functions that modify logits based on
-            previously generated tokens.
-    """
-
-    def __init__(
-        self,
-        n: int = 1,
-        best_of: Optional[int] = None,
-        presence_penalty: float = 0.0,
-        frequency_penalty: float = 0.0,
-        repetition_penalty: float = 1.0,
-        temperature: float = 1.0,
-        top_p: float = 1.0,
-        top_k: int = -1,
-        min_p: float = 0.0,
-        use_beam_search: bool = False,
-        length_penalty: float = 1.0,
-        early_stopping: Union[bool, str] = False,
-        stop: Union[str, List[str], None] = None,
-        stop_token_ids: Optional[List[int]] = None,
-        include_stop_str_in_output: bool = False,
-        ignore_eos: bool = False,
-        max_tokens: Optional[int] = 16,
-        logprobs: Optional[int] = None,
-        prompt_logprobs: Optional[int] = None,
-        skip_special_tokens: bool = True,
-        spaces_between_special_tokens: bool = True,
-        logits_processors: Optional[List[LogitsProcessor]] = None,
-    ) -> None:
-        self.n = n
-        self.best_of = best_of if best_of is not None else n
-        self.presence_penalty = presence_penalty
-        self.frequency_penalty = frequency_penalty
-        self.repetition_penalty = repetition_penalty
-        self.temperature = temperature
-        self.top_p = top_p
-        self.top_k = top_k
-        self.min_p = min_p
-        self.use_beam_search = use_beam_search
-        self.length_penalty = length_penalty
-        self.early_stopping = early_stopping
-        if stop is None:
-            self.stop = []
-        elif isinstance(stop, str):
-            self.stop = [stop]
-        else:
-            self.stop = list(stop)
-        if stop_token_ids is None:
-            self.stop_token_ids = []
-        else:
-            self.stop_token_ids = list(stop_token_ids)
-        self.ignore_eos = ignore_eos
-        self.max_tokens = max_tokens
-        self.logprobs = logprobs
-        self.prompt_logprobs = prompt_logprobs
-        self.skip_special_tokens = skip_special_tokens
-        self.spaces_between_special_tokens = (
-            spaces_between_special_tokens
-        )
-        self.logits_processors = logits_processors
-        self.include_stop_str_in_output = include_stop_str_in_output
-        self._verify_args()
-        if self.use_beam_search:
-            self._verify_beam_search()
-        else:
-            self._verify_non_beam_search()
-            if self.temperature < _SAMPLING_EPS:
-                # Zero temperature means greedy sampling.
-                self.top_p = 1.0
-                self.top_k = -1
-                self.min_p = 0.0
-                self._verify_greedy_sampling()
-
-    def _verify_args(self) -> None:
-        if self.n < 1:
-            raise ValueError(f"n must be at least 1, got {self.n}.")
-        if self.best_of < self.n:
-            raise ValueError(
-                "best_of must be greater than or equal to n, "
-                f"got n={self.n} and best_of={self.best_of}."
-            )
-        if not -2.0 <= self.presence_penalty <= 2.0:
-            raise ValueError(
-                "presence_penalty must be in [-2, 2], got "
-                f"{self.presence_penalty}."
-            )
-        if not -2.0 <= self.frequency_penalty <= 2.0:
-            raise ValueError(
-                "frequency_penalty must be in [-2, 2], got "
-                f"{self.frequency_penalty}."
-            )
-        if not 0.0 < self.repetition_penalty <= 2.0:
-            raise ValueError(
-                "repetition_penalty must be in (0, 2], got "
-                f"{self.repetition_penalty}."
-            )
-        if self.temperature < 0.0:
-            raise ValueError(
-                "temperature must be non-negative, got"
-                f" {self.temperature}."
-            )
-        if not 0.0 < self.top_p <= 1.0:
-            raise ValueError(
-                f"top_p must be in (0, 1], got {self.top_p}."
-            )
-        if self.top_k < -1 or self.top_k == 0:
-            raise ValueError(
-                "top_k must be -1 (disable), or at least 1, "
-                f"got {self.top_k}."
-            )
-        if not 0.0 <= self.min_p <= 1.0:
-            raise ValueError(
-                f"min_p must be in [0, 1], got {self.min_p}."
-            )
-        if self.max_tokens is not None and self.max_tokens < 1:
-            raise ValueError(
-                "max_tokens must be at least 1, got"
-                f" {self.max_tokens}."
-            )
-        if self.logprobs is not None and self.logprobs < 0:
-            raise ValueError(
-                f"logprobs must be non-negative, got {self.logprobs}."
-            )
-        if (
-            self.prompt_logprobs is not None
-            and self.prompt_logprobs < 0
-        ):
-            raise ValueError(
-                "prompt_logprobs must be non-negative, got "
-                f"{self.prompt_logprobs}."
-            )
-
-    def _verify_beam_search(self) -> None:
-        if self.best_of == 1:
-            raise ValueError(
-                "best_of must be greater than 1 when using beam "
-                f"search. Got {self.best_of}."
-            )
-        if self.temperature > _SAMPLING_EPS:
-            raise ValueError(
-                "temperature must be 0 when using beam search."
-            )
-        if self.top_p < 1.0 - _SAMPLING_EPS:
-            raise ValueError(
-                "top_p must be 1 when using beam search."
-            )
-        if self.top_k != -1:
-            raise ValueError(
-                "top_k must be -1 when using beam search."
-            )
-        if self.early_stopping not in [True, False, "never"]:
-            raise ValueError(
-                "early_stopping must be True, False, or 'never', "
-                f"got {self.early_stopping}."
-            )
-
-    def _verify_non_beam_search(self) -> None:
-        if self.early_stopping is not False:
-            raise ValueError(
-                "early_stopping is not effective and must be "
-                "False when not using beam search."
-            )
-        if (
-            self.length_penalty < 1.0 - _SAMPLING_EPS
-            or self.length_penalty > 1.0 + _SAMPLING_EPS
-        ):
-            raise ValueError(
-                "length_penalty is not effective and must be the "
-                "default value of 1.0 when not using beam search."
-            )
-
-    def _verify_greedy_sampling(self) -> None:
-        if self.best_of > 1:
-            raise ValueError(
-                "best_of must be 1 when using greedy sampling."
-                f"Got {self.best_of}."
-            )
-
-    @cached_property
-    def sampling_type(self) -> SamplingType:
-        if self.use_beam_search:
-            return SamplingType.BEAM
-        if self.temperature < _SAMPLING_EPS:
-            return SamplingType.GREEDY
-        return SamplingType.RANDOM
-
-    def __repr__(self) -> str:
-        return (
-            f"SamplingParams(n={self.n}, "
-            f"best_of={self.best_of}, "
-            f"presence_penalty={self.presence_penalty}, "
-            f"frequency_penalty={self.frequency_penalty}, "
-            f"repetition_penalty={self.repetition_penalty}, "
-            f"temperature={self.temperature}, "
-            f"top_p={self.top_p}, "
-            f"top_k={self.top_k}, "
-            f"min_p={self.min_p}, "
-            f"use_beam_search={self.use_beam_search}, "
-            f"length_penalty={self.length_penalty}, "
-            f"early_stopping={self.early_stopping}, "
-            f"stop={self.stop}, "
-            f"stop_token_ids={self.stop_token_ids}, "
-            f"include_stop_str_in_output={self.include_stop_str_in_output}, "
-            f"ignore_eos={self.ignore_eos}, "
-            f"max_tokens={self.max_tokens}, "
-            f"logprobs={self.logprobs}, "
-            f"prompt_logprobs={self.prompt_logprobs}, "
-            f"skip_special_tokens={self.skip_special_tokens}, "
-            "spaces_between_special_tokens="
-            f"{self.spaces_between_special_tokens})"
-        )
diff --git a/swarms/models/ssd_1b.py b/swarms/models/ssd_1b.py
deleted file mode 100644
index 3042d1ab..00000000
--- a/swarms/models/ssd_1b.py
+++ /dev/null
@@ -1,280 +0,0 @@
-import concurrent.futures
-import os
-import uuid
-from dataclasses import dataclass
-from io import BytesIO
-from typing import List
-
-import backoff
-import torch
-from cachetools import TTLCache
-from diffusers import StableDiffusionXLPipeline
-from PIL import Image
-from pydantic import field_validator
-from termcolor import colored
-
-
-@dataclass
-class SSD1B:
-    """
-    SSD1B model class
-
-    Attributes:
-    -----------
-    image_url: str
-        The image url generated by the SSD1B API
-
-    Methods:
-    --------
-    __call__(self, task: str) -> SSD1B:
-        Makes a call to the SSD1B API and returns the image url
-
-    Example:
-    --------
-        model = SSD1B()
-        task = "A painting of a dog"
-        neg_prompt = "ugly, blurry, poor quality"
-        image_url = model(task, neg_prompt)
-        print(image_url)
-    """
-
-    model: str = "dall-e-3"
-    img: str = None
-    size: str = "1024x1024"
-    max_retries: int = 3
-    quality: str = "standard"
-    model_name: str = "segment/SSD-1B"
-    n: int = 1
-    save_path: str = "images"
-    max_time_seconds: int = 60
-    save_folder: str = "images"
-    image_format: str = "png"
-    device: str = "cuda"
-    dashboard: bool = False
-    cache = TTLCache(maxsize=100, ttl=3600)
-    pipe = StableDiffusionXLPipeline.from_pretrained(
-        "segmind/SSD-1B",
-        torch_dtype=torch.float16,
-        use_safetensors=True,
-        variant="fp16",
-    ).to(device)
-
-    def __post_init__(self):
-        """Post init method"""
-
-        if self.img is not None:
-            self.img = self.convert_to_bytesio(self.img)
-
-        os.makedirs(self.save_path, exist_ok=True)
-
-    class Config:
-        """Config class for the SSD1B model"""
-
-        arbitrary_types_allowed = True
-
-    @field_validator("max_retries", "time_seconds")
-    @classmethod
-    def must_be_positive(cls, value):
-        if value <= 0:
-            raise ValueError("Must be positive")
-        return value
-
-    def read_img(self, img: str):
-        """Read the image using pil"""
-        img = Image.open(img)
-        return img
-
-    def set_width_height(self, img: str, width: int, height: int):
-        """Set the width and height of the image"""
-        img = self.read_img(img)
-        img = img.resize((width, height))
-        return img
-
-    def convert_to_bytesio(self, img: str, format: str = "PNG"):
-        """Convert the image to an bytes io object"""
-        byte_stream = BytesIO()
-        img.save(byte_stream, format=format)
-        byte_array = byte_stream.getvalue()
-        return byte_array
-
-    @backoff.on_exception(
-        backoff.expo, Exception, max_time=max_time_seconds
-    )
-    def __call__(self, task: str, neg_prompt: str):
-        """
-        Text to image conversion using the SSD1B API
-
-        Parameters:
-        -----------
-        task: str
-            The task to be converted to an image
-
-        Returns:
-        --------
-        SSD1B:
-            An instance of the SSD1B class with the image url generated by the SSD1B API
-
-        Example:
-        --------
-        >>> dalle3 = SSD1B()
-        >>> task = "A painting of a dog"
-        >>> image_url = dalle3(task)
-        >>> print(image_url)
-        https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png
-        """
-        if self.dashboard:
-            self.print_dashboard()
-        if task in self.cache:
-            return self.cache[task]
-        try:
-            img = self.pipe(
-                prompt=task, neg_prompt=neg_prompt
-            ).images[0]
-
-            # Generate a unique filename for the image
-            img_name = f"{uuid.uuid4()}.{self.image_format}"
-            img_path = os.path.join(self.save_path, img_name)
-
-            # Save the image
-            img.save(img_path, self.image_format)
-            self.cache[task] = img_path
-
-            return img_path
-
-        except Exception as error:
-            # Handling exceptions and printing the errors details
-            print(
-                colored(
-                    (
-                        f"Error running SSD1B: {error} try optimizing"
-                        " your api key and or try again"
-                    ),
-                    "red",
-                )
-            )
-            raise error
-
-    def _generate_image_name(self, task: str):
-        """Generate a sanitized file name based on the task"""
-        sanitized_task = "".join(
-            char for char in task if char.isalnum() or char in " _ -"
-        ).rstrip()
-        return f"{sanitized_task}.{self.image_format}"
-
-    def _download_image(self, img: Image, filename: str):
-        """
-        Save the PIL Image object to a file.
-        """
-        full_path = os.path.join(self.save_path, filename)
-        img.save(full_path, self.image_format)
-
-    def print_dashboard(self):
-        """Print the SSD1B dashboard"""
-        print(
-            colored(
-                f"""SSD1B Dashboard: 
-                    --------------------
-
-                    Model: {self.model}
-                    Image: {self.img}
-                    Size: {self.size}
-                    Max Retries: {self.max_retries}
-                    Quality: {self.quality}
-                    N: {self.n}
-                    Save Path: {self.save_path}
-                    Time Seconds: {self.time_seconds}
-                    Save Folder: {self.save_folder}
-                    Image Format: {self.image_format}
-                    --------------------
-                    
-                    
-                    """,
-                "green",
-            )
-        )
-
-    def process_batch_concurrently(
-        self, tasks: List[str], max_workers: int = 5
-    ):
-        """
-
-        Process a batch of tasks concurrently
-
-        Args:
-        tasks (List[str]): A list of tasks to be processed
-        max_workers (int): The maximum number of workers to use for the concurrent processing
-
-        Returns:
-        --------
-        results (List[str]): A list of image urls generated by the SSD1B API
-
-        Example:
-        --------
-        >>> model = SSD1B()
-        >>> tasks = ["A painting of a dog", "A painting of a cat"]
-        >>> results = model.process_batch_concurrently(tasks)
-        >>> print(results)
-
-        """
-        with concurrent.futures.ThreadPoolExecutor(
-            max_workers=max_workers
-        ) as executor:
-            future_to_task = {
-                executor.submit(self, task): task for task in tasks
-            }
-            results = []
-            for future in concurrent.futures.as_completed(
-                future_to_task
-            ):
-                task = future_to_task[future]
-                try:
-                    img = future.result()
-                    results.append(img)
-
-                    print(f"Task {task} completed: {img}")
-                except Exception as error:
-                    print(
-                        colored(
-                            (
-                                f"Error running SSD1B: {error} try"
-                                " optimizing your api key and or try"
-                                " again"
-                            ),
-                            "red",
-                        )
-                    )
-                    print(
-                        colored(
-                            (
-                                "Error running SSD1B:"
-                                f" {error.http_status}"
-                            ),
-                            "red",
-                        )
-                    )
-                    print(
-                        colored(
-                            f"Error running SSD1B: {error.error}",
-                            "red",
-                        )
-                    )
-                    raise error
-
-    def _generate_uuid(self):
-        """Generate a uuid"""
-        return str(uuid.uuid4())
-
-    def __repr__(self):
-        """Repr method for the SSD1B class"""
-        return f"SSD1B(image_url={self.image_url})"
-
-    def __str__(self):
-        """Str method for the SSD1B class"""
-        return f"SSD1B(image_url={self.image_url})"
-
-    @backoff.on_exception(
-        backoff.expo, Exception, max_tries=max_retries
-    )
-    def rate_limited_call(self, task: str):
-        """Rate limited call to the SSD1B API"""
-        return self.__call__(task)
diff --git a/swarms/models/tiktoken_wrapper.py b/swarms/models/tiktoken_wrapper.py
deleted file mode 100644
index c114200b..00000000
--- a/swarms/models/tiktoken_wrapper.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import tiktoken
-
-import concurrent.futures
-from typing import List
-
-
-class TikTokenizer:
-    def __init__(
-        self,
-        model_name: str = "o200k_base",
-    ):
-        """
-        Initializes a TikTokenizer object.
-
-        Args:
-            model_name (str, optional): The name of the model to use for tokenization. Defaults to "gpt-4o".
-        """
-        try:
-            self.model_name = model_name
-            self.encoding = tiktoken.get_encoding(model_name)
-        except Exception as e:
-            raise ValueError(
-                f"Failed to initialize tokenizer with model '{model_name}': {str(e)}"
-            )
-
-    def encode(self, string: str) -> str:
-        """
-        Tokenizes a text string.
-
-        Args:
-            string (str): The input text string.
-
-        Returns:
-            str: The tokenized text string.
-        """
-        return self.encoding.encode(string)
-
-    def decode(self, tokens: List[int]) -> str:
-        """
-        Detokenizes a text string.
-
-        Args:
-            string (str): The input tokenized text string.
-
-        Returns:
-            str: The detokenized text string.
-        """
-        return self.encoding.decode(tokens)
-
-    def count_tokens(self, string: str) -> int:
-        """
-        Returns the number of tokens in a text string.
-
-        Args:
-            string (str): The input text string.
-
-        Returns:
-            int: The number of tokens in the text string.
-        """
-        num_tokens = 0
-
-        def count_tokens_in_chunk(chunk):
-            nonlocal num_tokens
-            num_tokens += len(self.encoding.encode(chunk))
-
-        # Split the string into chunks for parallel processing
-        chunks = [
-            string[i : i + 1000] for i in range(0, len(string), 1000)
-        ]
-
-        # Create a ThreadPoolExecutor with maximum threads
-        with concurrent.futures.ThreadPoolExecutor(
-            max_workers=10
-        ) as executor:
-            # Submit each chunk for processing
-            futures = [
-                executor.submit(count_tokens_in_chunk, chunk)
-                for chunk in chunks
-            ]
-
-            # Wait for all futures to complete
-            concurrent.futures.wait(futures)
-
-        return num_tokens
-
-
-# # Path: swarms/models/tiktoken_wrapper.py
-# # Example
-# # Initialize the TikTokenizer object with the default model
-# tokenizer = TikTokenizer()
-
-# # Tokenize a text string
-
-# text = "Hello, how are you doing today?"
-# tokens = tokenizer.encode(text)
-
-# print(f"Tokens: {tokens}")
-
-# # Count the number of tokens in the text string
-# num_tokens = tokenizer.count_tokens(text)
-# print(f"Number of tokens: {num_tokens}")
diff --git a/swarms/models/together.py b/swarms/models/together.py
deleted file mode 100644
index fbb5ae51..00000000
--- a/swarms/models/together.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import logging
-import os
-from typing import Optional
-
-import requests
-from dotenv import load_dotenv
-
-from swarms.models.base_llm import BaseLLM
-
-# Load environment variables
-load_dotenv()
-
-
-def together_api_key_env():
-    """Get the API key from the environment."""
-    return os.getenv("TOGETHER_API_KEY")
-
-
-class TogetherLLM(BaseLLM):
-    """
-    GPT-4 Vision API
-
-    This class is a wrapper for the OpenAI API. It is used to run the GPT-4 Vision model.
-
-    Parameters
-    ----------
-    together_api_key : str
-        The OpenAI API key. Defaults to the together_api_key environment variable.
-    max_tokens : int
-        The maximum number of tokens to generate. Defaults to 300.
-
-
-    Methods
-    -------
-    encode_image(img: str)
-        Encode image to base64.
-    run(task: str, img: str)
-        Run the model.
-    __call__(task: str, img: str)
-        Run the model.
-
-    Examples:
-    ---------
-    >>> from swarms.models import GPT4VisionAPI
-    >>> llm = GPT4VisionAPI()
-    >>> task = "What is the color of the object?"
-    >>> img = "https://i.imgur.com/2M2ZGwC.jpeg"
-    >>> llm.run(task, img)
-
-
-    """
-
-    def __init__(
-        self,
-        together_api_key: str = together_api_key_env,
-        model_name: str = "mistralai/Mixtral-8x7B-Instruct-v0.1",
-        logging_enabled: bool = False,
-        max_workers: int = 10,
-        max_tokens: str = 300,
-        api_endpoint: str = "https://api.together.xyz",
-        beautify: bool = False,
-        streaming_enabled: Optional[bool] = False,
-        meta_prompt: Optional[bool] = False,
-        system_prompt: Optional[str] = None,
-        *args,
-        **kwargs,
-    ):
-        super(TogetherLLM).__init__(*args, **kwargs)
-        self.together_api_key = together_api_key
-        self.logging_enabled = logging_enabled
-        self.model_name = model_name
-        self.max_workers = max_workers
-        self.max_tokens = max_tokens
-        self.api_endpoint = api_endpoint
-        self.beautify = beautify
-        self.streaming_enabled = streaming_enabled
-        self.meta_prompt = meta_prompt
-        self.system_prompt = system_prompt
-
-        if self.logging_enabled:
-            logging.basicConfig(level=logging.DEBUG)
-        else:
-            # Disable debug logs for requests and urllib3
-            logging.getLogger("requests").setLevel(logging.WARNING)
-            logging.getLogger("urllib3").setLevel(logging.WARNING)
-
-        if self.meta_prompt:
-            self.system_prompt = self.meta_prompt_init()
-
-    # Function to handle vision tasks
-    def run(self, task: str = None, *args, **kwargs):
-        """Run the model."""
-        try:
-            headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.together_api_key}",
-            }
-            payload = {
-                "model": self.model_name,
-                "messages": [
-                    {
-                        "role": "system",
-                        "content": [self.system_prompt],
-                    },
-                    {
-                        "role": "user",
-                        "content": task,
-                    },
-                ],
-                "max_tokens": self.max_tokens,
-                **kwargs,
-            }
-            response = requests.post(
-                self.api_endpoint,
-                headers=headers,
-                json=payload,
-                *args,
-                **kwargs,
-            )
-
-            out = response.json()
-            content = (
-                out["choices"][0]
-                .get("message", {})
-                .get("content", None)
-            )
-            if self.streaming_enabled:
-                content = self.stream_response(content)
-
-            return content
-
-        except Exception as error:
-            print(
-                f"Error with the request: {error}, make sure you"
-                " double check input types and positions"
-            )
-            return None
diff --git a/swarms/models/vilt.py b/swarms/models/vilt.py
deleted file mode 100644
index 60425e52..00000000
--- a/swarms/models/vilt.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import requests
-from PIL import Image
-from transformers import ViltForQuestionAnswering, ViltProcessor
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class Vilt(BaseMultiModalModel):
-    """
-    Vision-and-Language Transformer (ViLT) model fine-tuned on VQAv2.
-    It was introduced in the paper ViLT: Vision-and-Language Transformer Without
-    Convolution or Region Supervision by Kim et al. and first released in this repository.
-
-    Disclaimer: The team releasing ViLT did not write a model card for this model
-    so this model card has been written by the Hugging Face team.
-
-    https://huggingface.co/dandelin/vilt-b32-finetuned-vqa
-
-
-    Example:
-        >>> model = Vilt()
-        >>> output = model("What is this image", "http://images.cocodataset.org/val2017/000000039769.jpg")
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "dandelin/vilt-b32-finetuned-vqa",
-        *args,
-        **kwargs,
-    ):
-        super().__init__(model_name, *args, **kwargs)
-        self.processor = ViltProcessor.from_pretrained(
-            model_name, *args, **kwargs
-        )
-        self.model = ViltForQuestionAnswering.from_pretrained(
-            model_name, *args, **kwargs
-        )
-
-    def run(self, task: str = None, img: str = None, *args, **kwargs):
-        """
-        Run the model
-
-
-        Args:
-
-        """
-        # Download the image
-        image = Image.open(requests.get(img, stream=True).raw)
-
-        encoding = self.processor(image, task, return_tensors="pt")
-
-        # Forward pass
-        outputs = self.model(**encoding)
-        logits = outputs.logits
-        idx = logits.argmax(-1).item()
-        print("Predicted Answer:", self.model.config.id2label[idx])
diff --git a/swarms/models/vip_llava.py b/swarms/models/vip_llava.py
deleted file mode 100644
index db532913..00000000
--- a/swarms/models/vip_llava.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from io import BytesIO
-
-import requests
-import torch
-from PIL import Image
-from transformers import (
-    AutoProcessor,
-    VipLlavaForConditionalGeneration,
-)
-
-from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-
-class VipLlavaMultiModal(BaseMultiModalModel):
-    """
-    A multi-modal model for VIP-LLAVA.
-
-    Args:
-        model_name (str): The name or path of the pre-trained model.
-        max_new_tokens (int): The maximum number of new tokens to generate.
-        device_map (str): The device mapping for the model.
-        torch_dtype: The torch data type for the model.
-        *args: Additional positional arguments.
-        **kwargs: Additional keyword arguments.
-    """
-
-    def __init__(
-        self,
-        model_name: str = "llava-hf/vip-llava-7b-hf",
-        max_new_tokens: int = 500,
-        device_map: str = "auto",
-        torch_dtype=torch.float16,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.max_new_tokens = max_new_tokens
-        self.device_map = device_map
-        self.torch_dtype = torch_dtype
-
-        self.model = VipLlavaForConditionalGeneration.from_pretrained(
-            model_name,
-            device_map=device_map,
-            torch_dtype=torch_dtype,
-            *args,
-            **kwargs,
-        )
-        self.processor = AutoProcessor.from_pretrained(
-            model_name, *args, **kwargs
-        )
-
-    def run(self, text: str, img: str, *args, **kwargs):
-        """
-        Run the VIP-LLAVA model.
-
-        Args:
-            text (str): The input text.
-            img (str): The URL of the input image.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            str: The generated output text.
-            tuple: A tuple containing None and the error message if an error occurs.
-        """
-        try:
-            response = requests.get(img, stream=True)
-            response.raise_for_status()
-            image = Image.open(BytesIO(response.content))
-
-            inputs = self.processor(
-                text=text,
-                images=image,
-                return_tensors="pt",
-                *args,
-                **kwargs,
-            ).to(0, self.torch_dtype)
-
-            # Generate
-            generate_ids = self.model.generate(
-                **inputs, max_new_tokens=self.max_new_tokens, **kwargs
-            )
-
-            return self.processor.decode(
-                generate_ids[0][len(inputs["input_ids"][0]) :],
-                skip_special_tokens=True,
-            )
-
-        except requests.RequestException as error:
-            return None, f"Error fetching image: {error}"
-
-        except Exception as error:
-            return None, f"Error during model inference: {error}"
diff --git a/swarms/models/zeroscope.py b/swarms/models/zeroscope.py
deleted file mode 100644
index 01e578fa..00000000
--- a/swarms/models/zeroscope.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import torch
-from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
-from diffusers.utils import export_to_video
-
-
-class ZeroscopeTTV:
-    """
-    ZeroscopeTTV class represents a zero-shot video generation model.
-
-    Args:
-        model_name (str): The name of the pre-trained model to use.
-        torch_dtype (torch.dtype): The torch data type to use for computations.
-        chunk_size (int): The size of chunks for forward chunking.
-        dim (int): The dimension along which to split the input for forward chunking.
-        num_inference_steps (int): The number of inference steps to perform.
-        height (int): The height of the video frames.
-        width (int): The width of the video frames.
-        num_frames (int): The number of frames in the video.
-
-    Attributes:
-        model_name (str): The name of the pre-trained model.
-        torch_dtype (torch.dtype): The torch data type used for computations.
-        chunk_size (int): The size of chunks for forward chunking.
-        dim (int): The dimension along which the input is split for forward chunking.
-        num_inference_steps (int): The number of inference steps to perform.
-        height (int): The height of the video frames.
-        width (int): The width of the video frames.
-        num_frames (int): The number of frames in the video.
-        pipe (DiffusionPipeline): The diffusion pipeline for video generation.
-
-    Methods:
-        forward(task: str = None, *args, **kwargs) -> str:
-            Performs forward pass on the input task and returns the path of the generated video.
-
-    Examples:
-        >>> from swarms.models
-        >>> zeroscope = ZeroscopeTTV()
-        >>> task = "A person is walking on the street."
-        >>> video_path = zeroscope(task)
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "cerspense/zeroscope_v2_576w",
-        torch_dtype=torch.float16,
-        chunk_size: int = 1,
-        dim: int = 1,
-        num_inference_steps: int = 40,
-        height: int = 320,
-        width: int = 576,
-        num_frames: int = 36,
-        *args,
-        **kwargs,
-    ):
-        self.model_name = model_name
-        self.torch_dtype = torch_dtype
-        self.chunk_size = chunk_size
-        self.dim = dim
-        self.num_inference_steps = num_inference_steps
-        self.height = height
-        self.width = width
-        self.num_frames = num_frames
-
-        self.pipe = DiffusionPipeline.from_pretrained(
-            model_name, torch_dtype=torch_dtype, *args, **kwargs
-        )
-        self.pipe.scheduler = DPMSolverMultistepScheduler(
-            self.pipe.scheduler.config,
-        )
-        self.pipe_enable_model_cpu_offload()
-        self.pipe.enable_vae_slicing()
-        self.pipe.unet.enable_forward_chunking(
-            chunk_size=chunk_size, dim=dim
-        )
-
-    def run(self, task: str = None, *args, **kwargs):
-        """
-        Performs a forward pass on the input task and returns the path of the generated video.
-
-        Args:
-            task (str): The input task for video generation.
-
-        Returns:
-            str: The path of the generated video.
-        """
-        try:
-            video_frames = self.pipe(
-                task,
-                num_inference_steps=self.num_inference_steps,
-                height=self.height,
-                width=self.width,
-                num_frames=self.num_frames,
-                *args,
-                **kwargs,
-            ).frames
-            video_path = export_to_video(video_frames)
-            return video_path
-        except Exception as error:
-            print(f"Error in [ZeroscopeTTV.forward]: {error}")
-            raise error
diff --git a/swarms/prompts/autoswarm.py b/swarms/prompts/autoswarm.py
index 0d76d020..8ded2027 100644
--- a/swarms/prompts/autoswarm.py
+++ b/swarms/prompts/autoswarm.py
@@ -49,7 +49,7 @@ Output Format: A complete Python script that is ready for copy/paste to GitHub a
 Here is an example of a a working swarm script that you can use as a rough template for the logic:
 import os
 from dotenv import load_dotenv
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import Agent
 import swarms.prompts.swarm_daddy as sdsp
 
diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py
index 8f07a4ed..3b96d86b 100644
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@@ -26,7 +26,7 @@ from pydantic import BaseModel
 from swarms_memory import BaseVectorDatabase
 from termcolor import colored
 
-from swarms.models.tiktoken_wrapper import TikTokenizer
+from swarm_models.tiktoken_wrapper import TikTokenizer
 from swarms.prompts.agent_system_prompts import AGENT_SYSTEM_PROMPT_3
 from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
     MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
@@ -164,7 +164,7 @@ class Agent:
 
 
     Examples:
-    >>> from swarms.models import OpenAIChat
+    >>> from swarm_models import OpenAIChat
     >>> from swarms.structs import Agent
     >>> llm = OpenAIChat()
     >>> agent = Agent(llm=llm, max_loops=1)
@@ -885,22 +885,6 @@ class Agent:
             )
             raise error
 
-<<<<<<< HEAD
-    async def astream_events(
-        self, task: str = None, img: str = None, *args, **kwargs
-    ):
-        """
-        Run the Agent with LangChain's astream_events API.
-        Only works with LangChain-based models.
-        """
-        try:
-            async for evt in self.llm.astream_events(task, version="v1"):
-                yield evt
-        except Exception as e:
-            print(f"Error streaming events: {e}")
-
-    def __call__(self, task: str = None, img: str = None, *args, **kwargs):
-=======
     # @run_on_cpu
     # def run(
     #     self,
@@ -1104,7 +1088,6 @@ class Agent:
     def __call__(
         self, task: str = None, img: str = None, *args, **kwargs
     ):
->>>>>>> ce359f5e ([5.6.8])
         """Call the agent
 
         Args:
diff --git a/swarms/structs/base_workflow.py b/swarms/structs/base_workflow.py
index 053bf2ba..b5deb916 100644
--- a/swarms/structs/base_workflow.py
+++ b/swarms/structs/base_workflow.py
@@ -180,7 +180,7 @@ class BaseWorkflow(BaseStructure):
             ValueError: If the task is not found in the workflow.
 
         Examples:
-        >>> from swarms.models import OpenAIChat
+        >>> from swarm_models import OpenAIChat
         >>> from swarms.structs import SequentialWorkflow
         >>> llm = OpenAIChat(openai_api_key="")
         >>> workflow = SequentialWorkflow(max_loops=1)
@@ -218,7 +218,7 @@ class BaseWorkflow(BaseStructure):
             ValueError: If the task is not found in the workflow.
 
         Examples:
-        >>> from swarms.models import OpenAIChat
+        >>> from swarm_models import OpenAIChat
         >>> from swarms.structs import SequentialWorkflow
         >>> llm = OpenAIChat(openai_api_key="")
         >>> workflow = SequentialWorkflow(max_loops=1)
@@ -257,7 +257,7 @@ class BaseWorkflow(BaseStructure):
             filepath (str): The path to save the workflow state to.
 
         Examples:
-        >>> from swarms.models import OpenAIChat
+        >>> from swarm_models import OpenAIChat
         >>> from swarms.structs import SequentialWorkflow
         >>> llm = OpenAIChat(openai_api_key="")
         >>> workflow = SequentialWorkflow(max_loops=1)
@@ -329,7 +329,7 @@ class BaseWorkflow(BaseStructure):
             filepath (str): The path to load the workflow state from.
 
         Examples:
-        >>> from swarms.models import OpenAIChat
+        >>> from swarm_models import OpenAIChat
         >>> from swarms.structs import SequentialWorkflow
         >>> llm = OpenAIChat(openai_api_key="")
         >>> workflow = SequentialWorkflow(max_loops=1)
@@ -372,7 +372,7 @@ class BaseWorkflow(BaseStructure):
             **kwargs: Additional keyword arguments to pass to the dashboard.
 
         Examples:
-        >>> from swarms.models import OpenAIChat
+        >>> from swarm_models import OpenAIChat
         >>> from swarms.structs import SequentialWorkflow
         >>> llm = OpenAIChat(openai_api_key="")
         >>> workflow = SequentialWorkflow(max_loops=1)
diff --git a/swarms/structs/dfs_search_swarm.py b/swarms/structs/dfs_search_swarm.py
index 655dc097..a47dca3e 100644
--- a/swarms/structs/dfs_search_swarm.py
+++ b/swarms/structs/dfs_search_swarm.py
@@ -1,5 +1,6 @@
 # import os
-# from swarms import Agent, OpenAIChat
+# from swarms import Agent
+from swarm_models import OpenAIChat
 # from typing import List
 
 # class DepthFirstSearchSwarm:
@@ -79,7 +80,8 @@
 
 # ####################
 # import os
-# from swarms import Agent, OpenAIChat
+# from swarms import Agent
+from swarm_models import OpenAIChat
 
 # class DFSSwarm:
 #     def __init__(self, agents):
diff --git a/swarms/structs/graph_workflow.py b/swarms/structs/graph_workflow.py
index 2e665e33..66cbaf63 100644
--- a/swarms/structs/graph_workflow.py
+++ b/swarms/structs/graph_workflow.py
@@ -225,7 +225,8 @@ class GraphWorkflow(BaseModel):
 
 # # Example usage
 # if __name__ == "__main__":
-#     from swarms import Agent, OpenAIChat
+#     from swarms import Agent
+from swarm_models import OpenAIChat
 #     import os
 #     from dotenv import load_dotenv
 
diff --git a/swarms/structs/hiearchical_swarm.py b/swarms/structs/hiearchical_swarm.py
index ae44c714..82fa6ba2 100644
--- a/swarms/structs/hiearchical_swarm.py
+++ b/swarms/structs/hiearchical_swarm.py
@@ -6,7 +6,7 @@ from swarms.structs.base_swarm import BaseSwarm
 from swarms.structs.agent import Agent
 from swarms.structs.concat import concat_strings
 from swarms.structs.agent_registry import AgentRegistry
-from swarms.models.base_llm import BaseLLM
+from swarm_models.base_llm import BaseLLM
 from swarms.structs.conversation import Conversation
 
 
diff --git a/swarms/structs/monte_carlo_swarm.py b/swarms/structs/monte_carlo_swarm.py
index 2908aa23..a2b558ba 100644
--- a/swarms/structs/monte_carlo_swarm.py
+++ b/swarms/structs/monte_carlo_swarm.py
@@ -200,7 +200,8 @@ def average_aggregator(results: List[float]) -> float:
 
 
 # import os
-# from swarms import Agent, OpenAIChat
+# from swarms import Agent
+from swarm_models import OpenAIChat
 # from typing import List, Union, Callable
 # from collections import Counter
 
diff --git a/swarms/structs/multi_agent_collab.py b/swarms/structs/multi_agent_collab.py
index 2914787d..9f99f0f8 100644
--- a/swarms/structs/multi_agent_collab.py
+++ b/swarms/structs/multi_agent_collab.py
@@ -85,7 +85,7 @@ class MultiAgentCollaboration(BaseSwarm):
 
 
     Usage:
-    >>> from swarms.models import OpenAIChat
+    >>> from swarm_models import OpenAIChat
     >>> from swarms.structs import Agent
     >>> from swarms.swarms.multi_agent_collab import MultiAgentCollaboration
     >>>
diff --git a/swarms/structs/omni_agent_types.py b/swarms/structs/omni_agent_types.py
index e2c310cf..9a0f3f6a 100644
--- a/swarms/structs/omni_agent_types.py
+++ b/swarms/structs/omni_agent_types.py
@@ -4,8 +4,8 @@ from typing import (
     Sequence,
     Union,
 )
-from swarms.models.base_llm import BaseLLM
-from swarms.models.base_multimodal_model import BaseMultiModalModel
+from swarm_models.base_llm import BaseLLM
+from swarm_models.base_multimodal_model import BaseMultiModalModel
 from swarms.structs.agent import Agent
 
 # Unified type for agent
diff --git a/swarms/structs/pulsar_swarm 2.py b/swarms/structs/pulsar_swarm 2.py
index 0cbd7b38..199bdefe 100644
--- a/swarms/structs/pulsar_swarm 2.py	
+++ b/swarms/structs/pulsar_swarm 2.py	
@@ -3,7 +3,8 @@ import sys
 import datetime
 from typing import List, Dict, Any, Optional
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/swarms/structs/pulsar_swarm.py b/swarms/structs/pulsar_swarm.py
index 0cbd7b38..199bdefe 100644
--- a/swarms/structs/pulsar_swarm.py
+++ b/swarms/structs/pulsar_swarm.py
@@ -3,7 +3,8 @@ import sys
 import datetime
 from typing import List, Dict, Any, Optional
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/swarms/structs/recursive_workflow.py b/swarms/structs/recursive_workflow.py
index 286e8810..455522b5 100644
--- a/swarms/structs/recursive_workflow.py
+++ b/swarms/structs/recursive_workflow.py
@@ -21,7 +21,7 @@ class RecursiveWorkflow(BaseStructure):
         stop_token (Any): The token that indicates when to stop the workflow.
 
     Examples:
-    >>> from swarms.models import OpenAIChat
+    >>> from swarm_models import OpenAIChat
     >>> from swarms.structs import RecursiveWorkflow, Task
     >>> llm = OpenAIChat(openai_api_key="")
     >>> task = Task(llm, "What's the weather in miami")
diff --git a/swarms/structs/run_agents_in_parallel_async_multiprocess.py b/swarms/structs/run_agents_in_parallel_async_multiprocess.py
index a6630cc0..c3a4d87f 100644
--- a/swarms/structs/run_agents_in_parallel_async_multiprocess.py
+++ b/swarms/structs/run_agents_in_parallel_async_multiprocess.py
@@ -1,6 +1,7 @@
 import os
 import asyncio
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 import uvloop
 from multiprocessing import cpu_count
 from swarms.utils.calculate_func_metrics import profile_func
diff --git a/swarms/structs/task.py b/swarms/structs/task.py
index 049ee458..70293426 100644
--- a/swarms/structs/task.py
+++ b/swarms/structs/task.py
@@ -46,7 +46,7 @@ class Task(BaseModel):
 
     Examples:
     >>> from swarms.structs import Task, Agent
-    >>> from swarms.models import OpenAIChat
+    >>> from swarm_models import OpenAIChat
     >>> agent = Agent(llm=OpenAIChat(openai_api_key=""), max_loops=1, dashboard=False)
     >>> task = Task(description="What's the weather in miami", agent=agent)
     >>> task.run()
@@ -106,7 +106,7 @@ class Task(BaseModel):
 
         Examples:
         >>> from swarms.structs import Task, Agent
-        >>> from swarms.models import OpenAIChat
+        >>> from swarm_models import OpenAIChat
         >>> agent = Agent(llm=OpenAIChat(openai_api_key=""), max_loops=1, dashboard=False)
         >>> task = Task(description="What's the weather in miami", agent=agent)
         >>> task.run()
diff --git a/swarms/tools/json_former.py b/swarms/tools/json_former.py
index 8e5e041b..01d608a5 100644
--- a/swarms/tools/json_former.py
+++ b/swarms/tools/json_former.py
@@ -9,7 +9,7 @@ from swarms.tools.logits_processor import (
     OutputNumbersTokens,
     StringStoppingCriteria,
 )
-from swarms.models.base_llm import BaseLLM
+from swarm_models.base_llm import BaseLLM
 
 GENERATION_MARKER = "|GENERATION|"
 
diff --git a/swarms/tools/prebuilt/code_executor.py b/swarms/tools/prebuilt/code_executor.py
index 115f141a..4ea7597e 100644
--- a/swarms/tools/prebuilt/code_executor.py
+++ b/swarms/tools/prebuilt/code_executor.py
@@ -1,7 +1,7 @@
 import os
 import subprocess
 from loguru import logger
-from swarms.models.tiktoken_wrapper import TikTokenizer
+from swarm_models.tiktoken_wrapper import TikTokenizer
 
 
 class CodeExecutor:
diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py
index cc48479a..1f583889 100644
--- a/tests/models/test_anthropic.py
+++ b/tests/models/test_anthropic.py
@@ -3,7 +3,7 @@ from unittest.mock import Mock, patch
 
 import pytest
 
-from swarms.models.anthropic import Anthropic
+from swarm_models.anthropic import Anthropic
 
 
 # Mock the Anthropic API client for testing
diff --git a/tests/models/test_fuyu.py b/tests/models/test_fuyu.py
index e76e11bb..60044de2 100644
--- a/tests/models/test_fuyu.py
+++ b/tests/models/test_fuyu.py
@@ -5,7 +5,7 @@ import torch
 from PIL import Image
 from transformers import FuyuImageProcessor, FuyuProcessor
 
-from swarms.models.fuyu import Fuyu
+from swarm_models.fuyu import Fuyu
 
 
 # Basic test to ensure instantiation of class.
diff --git a/tests/models/test_gemini.py b/tests/models/test_gemini.py
index a61d1676..91e7c0ac 100644
--- a/tests/models/test_gemini.py
+++ b/tests/models/test_gemini.py
@@ -2,7 +2,7 @@ from unittest.mock import Mock, patch
 
 import pytest
 
-from swarms.models.gemini import Gemini
+from swarm_models.gemini import Gemini
 
 
 # Define test fixtures
diff --git a/tests/models/test_gpt4_vision_api.py b/tests/models/test_gpt4_vision_api.py
index ac797280..3a67f8ee 100644
--- a/tests/models/test_gpt4_vision_api.py
+++ b/tests/models/test_gpt4_vision_api.py
@@ -7,7 +7,7 @@ from aiohttp import ClientResponseError
 from dotenv import load_dotenv
 from requests.exceptions import RequestException
 
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 
 load_dotenv()
 
diff --git a/tests/models/test_hf.py b/tests/models/test_hf.py
index ad56699c..65e52712 100644
--- a/tests/models/test_hf.py
+++ b/tests/models/test_hf.py
@@ -4,7 +4,7 @@ from unittest.mock import patch
 import pytest
 import torch
 
-from swarms.models.huggingface import HuggingfaceLLM
+from swarm_models.huggingface import HuggingfaceLLM
 
 
 # Fixture for the class instance
diff --git a/tests/models/test_hf_pipeline.py b/tests/models/test_hf_pipeline.py
index 8580dd56..98490623 100644
--- a/tests/models/test_hf_pipeline.py
+++ b/tests/models/test_hf_pipeline.py
@@ -3,7 +3,7 @@ from unittest.mock import patch
 import pytest
 import torch
 
-from swarms.models.huggingface_pipeline import HuggingfacePipeline
+from swarm_models.huggingface_pipeline import HuggingfacePipeline
 
 
 @pytest.fixture
diff --git a/tests/models/test_idefics.py b/tests/models/test_idefics.py
index 3bfee679..f381d41b 100644
--- a/tests/models/test_idefics.py
+++ b/tests/models/test_idefics.py
@@ -3,7 +3,7 @@ from unittest.mock import patch
 import pytest
 import torch
 
-from swarms.models.idefics import (
+from swarm_models.idefics import (
     AutoProcessor,
     Idefics,
     IdeficsForVisionText2Text,
diff --git a/tests/models/test_imports.py b/tests/models/test_imports.py
index 2da66f21..bdca4350 100644
--- a/tests/models/test_imports.py
+++ b/tests/models/test_imports.py
@@ -1,4 +1,4 @@
-from swarms.models import __all__
+from swarm_models import __all__
 
 EXPECTED_ALL = [
     "Anthropic",
diff --git a/tests/models/test_kosmos.py b/tests/models/test_kosmos.py
index 1219f895..ce7c36d6 100644
--- a/tests/models/test_kosmos.py
+++ b/tests/models/test_kosmos.py
@@ -4,7 +4,7 @@ import pytest
 import requests
 
 # This will be your project directory
-from swarms.models.kosmos_two import Kosmos, is_overlapping
+from swarm_models.kosmos_two import Kosmos, is_overlapping
 
 # A placeholder image URL for testing
 TEST_IMAGE_URL = "https://images.unsplash.com/photo-1673267569891-ca4246caafd7?auto=format&fit=crop&q=60&w=400&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHx0b3BpYy1mZWVkfDM1fEpwZzZLaWRsLUhrfHxlbnwwfHx8fHw%3D"
diff --git a/tests/models/test_nougat.py b/tests/models/test_nougat.py
index 858845a6..2c7f6361 100644
--- a/tests/models/test_nougat.py
+++ b/tests/models/test_nougat.py
@@ -6,7 +6,7 @@ import torch
 from PIL import Image
 from transformers import NougatProcessor, VisionEncoderDecoderModel
 
-from swarms.models.nougat import Nougat
+from swarm_models.nougat import Nougat
 
 
 @pytest.fixture
diff --git a/tests/models/test_open_dalle.py b/tests/models/test_open_dalle.py
index 4ff14e10..4dfd200c 100644
--- a/tests/models/test_open_dalle.py
+++ b/tests/models/test_open_dalle.py
@@ -1,7 +1,7 @@
 import pytest
 import torch
 
-from swarms.models.open_dalle import OpenDalle
+from swarm_models.open_dalle import OpenDalle
 
 
 def test_init():
diff --git a/tests/models/test_openaitts.py b/tests/models/test_openaitts.py
index 42745284..03e1e9c4 100644
--- a/tests/models/test_openaitts.py
+++ b/tests/models/test_openaitts.py
@@ -2,7 +2,7 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
-from swarms.models.openai_tts import OpenAITTS
+from swarm_models.openai_tts import OpenAITTS
 
 
 def test_openaitts_initialization():
diff --git a/tests/models/test_qwen.py b/tests/models/test_qwen.py
index a920256c..3e5c937e 100644
--- a/tests/models/test_qwen.py
+++ b/tests/models/test_qwen.py
@@ -1,6 +1,6 @@
 from unittest.mock import Mock, patch
 
-from swarms.models.qwen import QwenVLMultiModal
+from swarm_models.qwen import QwenVLMultiModal
 
 
 def test_post_init():
diff --git a/tests/models/test_ssd_1b.py b/tests/models/test_ssd_1b.py
index f658f853..86a7e94a 100644
--- a/tests/models/test_ssd_1b.py
+++ b/tests/models/test_ssd_1b.py
@@ -1,7 +1,7 @@
 import pytest
 from PIL import Image
 
-from swarms.models.ssd_1b import SSD1B
+from swarm_models.ssd_1b import SSD1B
 
 
 # Create fixtures if needed
diff --git a/tests/models/test_timm_model.py b/tests/models/test_timm_model.py
index c4e37126..5fdaac5a 100644
--- a/tests/models/test_timm_model.py
+++ b/tests/models/test_timm_model.py
@@ -3,7 +3,7 @@ from unittest.mock import Mock, patch
 import pytest
 import torch
 
-from swarms.models import TimmModel
+from swarm_models import TimmModel
 
 
 def test_timm_model_init():
diff --git a/tests/models/test_togther.py b/tests/models/test_togther.py
index dd2a2f89..c7a0421c 100644
--- a/tests/models/test_togther.py
+++ b/tests/models/test_togther.py
@@ -4,7 +4,7 @@ from unittest.mock import Mock, patch
 import pytest
 import requests
 
-from swarms.models.together import TogetherLLM
+from swarm_models.together import TogetherLLM
 
 
 @pytest.fixture
diff --git a/tests/models/test_vilt.py b/tests/models/test_vilt.py
index d849f98e..8e222637 100644
--- a/tests/models/test_vilt.py
+++ b/tests/models/test_vilt.py
@@ -2,7 +2,7 @@ from unittest.mock import Mock, patch
 
 import pytest
 
-from swarms.models.vilt import Image, Vilt, requests
+from swarm_models.vilt import Image, Vilt, requests
 
 
 # Fixture for Vilt instance
diff --git a/tests/models/test_zeroscope.py b/tests/models/test_zeroscope.py
index 25a4c597..c8809cd1 100644
--- a/tests/models/test_zeroscope.py
+++ b/tests/models/test_zeroscope.py
@@ -2,7 +2,7 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
-from swarms.models.zeroscope import ZeroscopeTTV
+from swarm_models.zeroscope import ZeroscopeTTV
 
 
 @patch("swarms.models.zeroscope.DiffusionPipeline")
diff --git "a/tests/profiling_agent 2.py\\" "b/tests/profiling_agent 2.py\\"
index 6ef5487f..8f1b0220 100644
--- "a/tests/profiling_agent 2.py\\"	
+++ "b/tests/profiling_agent 2.py\\"	
@@ -4,7 +4,8 @@ start_time = time.time()
 
 import os
 import uuid
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/tests/profiling_agent.py b/tests/profiling_agent.py
index 6ef5487f..8f1b0220 100644
--- a/tests/profiling_agent.py
+++ b/tests/profiling_agent.py
@@ -4,7 +4,8 @@ start_time = time.time()
 
 import os
 import uuid
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
 )
diff --git a/tests/structs/test_agent.py b/tests/structs/test_agent.py
index c3a50a63..4a145029 100644
--- a/tests/structs/test_agent.py
+++ b/tests/structs/test_agent.py
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch
 import pytest
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs.agent import Agent, stop_when_repeats
 from swarms.utils.loguru_logger import logger
 
diff --git a/tests/structs/test_base_workflow.py b/tests/structs/test_base_workflow.py
index ccb7a563..fbb8d710 100644
--- a/tests/structs/test_base_workflow.py
+++ b/tests/structs/test_base_workflow.py
@@ -4,7 +4,7 @@ import os
 import pytest
 from dotenv import load_dotenv
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import BaseWorkflow
 
 load_dotenv()
diff --git a/tests/structs/test_groupchat.py b/tests/structs/test_groupchat.py
index e8096d9c..99222365 100644
--- a/tests/structs/test_groupchat.py
+++ b/tests/structs/test_groupchat.py
@@ -1,7 +1,7 @@
 import pytest
 
-from swarms.models import OpenAIChat
-from swarms.models.anthropic import Anthropic
+from swarm_models import OpenAIChat
+from swarm_models.anthropic import Anthropic
 from swarms.structs.agent import Agent
 from swarms.structs.groupchat import GroupChat, GroupChatManager
 
diff --git a/tests/structs/test_multi_agent_collab.py b/tests/structs/test_multi_agent_collab.py
index 3f7d27b6..db06c9c0 100644
--- a/tests/structs/test_multi_agent_collab.py
+++ b/tests/structs/test_multi_agent_collab.py
@@ -4,7 +4,8 @@ from unittest.mock import Mock
 
 import pytest
 
-from swarms import Agent, OpenAIChat
+from swarms import Agent
+from swarm_models import OpenAIChat
 from swarms.structs.multi_agent_collab import MultiAgentCollaboration
 
 # Initialize the director agent
diff --git a/tests/structs/test_recursive_workflow.py b/tests/structs/test_recursive_workflow.py
index 5b24f921..75cd5145 100644
--- a/tests/structs/test_recursive_workflow.py
+++ b/tests/structs/test_recursive_workflow.py
@@ -2,7 +2,7 @@ from unittest.mock import Mock, create_autospec
 
 import pytest
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import RecursiveWorkflow, Task
 
 
diff --git a/tests/structs/test_sequential_workflow.py b/tests/structs/test_sequential_workflow.py
index d92c4928..1327d0ae 100644
--- a/tests/structs/test_sequential_workflow.py
+++ b/tests/structs/test_sequential_workflow.py
@@ -4,7 +4,7 @@ from unittest.mock import patch
 
 import pytest
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs.agent import Agent
 from swarms.structs.sequential_workflow import (
     SequentialWorkflow,
diff --git a/tests/structs/test_task.py b/tests/structs/test_task.py
index 64724bcb..32fc9803 100644
--- a/tests/structs/test_task.py
+++ b/tests/structs/test_task.py
@@ -5,7 +5,7 @@ from unittest.mock import Mock
 import pytest
 from dotenv import load_dotenv
 
-from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarm_models.gpt4_vision_api import GPT4VisionAPI
 from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
     MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
 )
diff --git a/tests/structs/test_team.py b/tests/structs/test_team.py
index 2628789b..df92fe95 100644
--- a/tests/structs/test_team.py
+++ b/tests/structs/test_team.py
@@ -1,7 +1,7 @@
 import json
 import unittest
 
-from swarms.models import OpenAIChat
+from swarm_models import OpenAIChat
 from swarms.structs import Agent, Task
 from swarms.structs.team import Team