diff --git a/README.md b/README.md
index 64db98ae..aa8822e8 100644
--- a/README.md
+++ b/README.md
@@ -824,6 +824,115 @@ out = llm.run(task=task, img=img)
 print(out)
 ```
 
+### `GPT4Vision`
+```python
+from swarms import GPT4VisionAPI
+
+# Initialize with default API key and custom max_tokens
+api = GPT4VisionAPI(max_tokens=1000)
+
+# Define the task and image URL
+task = "Describe the scene in the image."
+img = "https://i.imgur.com/4P4ZRxU.jpeg"
+
+# Run the GPT-4 Vision model
+response = api.run(task, img)
+
+# Print the model's response
+print(response)
+```
+
+### `QwenVLMultiModal`
+A radically simple interface for QwenVLMultiModal comes complete with Quantization to turn it on just set quantize to true!
+
+```python
+from swarms import QwenVLMultiModal
+
+# Instantiate the QwenVLMultiModal model
+model = QwenVLMultiModal(
+    model_name="Qwen/Qwen-VL-Chat",
+    device="cuda",
+    quantize=True,
+)
+
+# Run the model
+response = model(
+    "Hello, how are you?", "https://example.com/image.jpg"
+)
+
+# Print the response
+print(response)
+
+
+```
+
+
+### `Kosmos`
+- Multi-Modal Model from microsoft!
+
+```python
+from swarms import Kosmos
+
+# Initialize the model
+model = Kosmos()
+
+# Generate
+out = model.run("Analyze the reciepts in this image", "docs.jpg")
+
+# Print the output
+print(out)
+
+```
+
+
+### `Idefics`
+- Multi-Modal model from Huggingface team!
+
+```python
+# Import the idefics model from the swarms.models module
+from swarms.models import Idefics
+
+# Create an instance of the idefics model
+model = Idefics()
+
+# Define user input with an image URL and chat with the model
+user_input = (
+    "User: What is in this image?"
+    " https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
+)
+response = model.chat(user_input)
+print(response)
+
+# Define another user input with an image URL and chat with the model
+user_input = (
+    "User: And who is that?"
+    " https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052"
+)
+response = model.chat(user_input)
+print(response)
+
+# Set the checkpoint of the model to "new_checkpoint"
+model.set_checkpoint("new_checkpoint")
+
+# Set the device of the model to "cpu"
+model.set_device("cpu")
+
+# Set the maximum length of the chat to 200
+model.set_max_length(200)
+
+# Clear the chat history of the model
+model.clear_chat_history()
+
+
+```
+
+## Radically Simple AI Model APIs
+We provide a vast array of language and multi-modal model APIs for you to generate text, images, music, speech, and even videos. Get started below:
+
+
+
+-----
+
 
 ### `Anthropic`
 ```python
@@ -900,23 +1009,6 @@ print(image_url)
 ```
 
 
-### `GPT4Vision`
-```python
-from swarms import GPT4VisionAPI
-
-# Initialize with default API key and custom max_tokens
-api = GPT4VisionAPI(max_tokens=1000)
-
-# Define the task and image URL
-task = "Describe the scene in the image."
-img = "https://i.imgur.com/4P4ZRxU.jpeg"
-
-# Run the GPT-4 Vision model
-response = api.run(task, img)
-
-# Print the model's response
-print(response)
-```
 
 
 ### Text to Video with `ZeroscopeTTV`
@@ -938,7 +1030,7 @@ print(video_path)
 ```
 
 
-### ModelScope
+<!-- ### ModelScope
 ```python
 from swarms.models import ModelScopeAutoModel
 
@@ -960,32 +1052,9 @@ cog_agent = CogAgent()
 # Run the model on the tests
 cog_agent.run("Describe this scene", "images/1.jpg")
 
-```
+``` -->
 
 
-### `QwenVLMultiModal`
-A radically simple interface for QwenVLMultiModal comes complete with Quantization to turn it on just set quantize to true!
-
-```python
-from swarms import QwenVLMultiModal
-
-# Instantiate the QwenVLMultiModal model
-model = QwenVLMultiModal(
-    model_name="Qwen/Qwen-VL-Chat",
-    device="cuda",
-    quantize=True,
-)
-
-# Run the model
-response = model(
-    "Hello, how are you?", "https://example.com/image.jpg"
-)
-
-# Print the response
-print(response)
-
-
-```
 
 ----
 
diff --git a/playground/models/idefics.py b/playground/models/idefics.py
index 39d6f4eb..ea36ba77 100644
--- a/playground/models/idefics.py
+++ b/playground/models/idefics.py
@@ -1,7 +1,10 @@
-from swarms.models import idefics
+# Import the idefics model from the swarms.models module
+from swarms.models import Idefics
 
-model = idefics()
+# Create an instance of the idefics model
+model = Idefics()
 
+# Define user input with an image URL and chat with the model
 user_input = (
     "User: What is in this image?"
     " https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
@@ -9,6 +12,7 @@ user_input = (
 response = model.chat(user_input)
 print(response)
 
+# Define another user input with an image URL and chat with the model
 user_input = (
     "User: And who is that?"
     " https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052"
@@ -16,7 +20,14 @@ user_input = (
 response = model.chat(user_input)
 print(response)
 
+# Set the checkpoint of the model to "new_checkpoint"
 model.set_checkpoint("new_checkpoint")
+
+# Set the device of the model to "cpu"
 model.set_device("cpu")
+
+# Set the maximum length of the chat to 200
 model.set_max_length(200)
+
+# Clear the chat history of the model
 model.clear_chat_history()
diff --git a/playground/models/kosmos.py b/playground/models/kosmos.py
new file mode 100644
index 00000000..3d0f1dd2
--- /dev/null
+++ b/playground/models/kosmos.py
@@ -0,0 +1,10 @@
+from swarms import Kosmos
+
+# Initialize the model
+model = Kosmos()
+
+# Generate
+out = model.run("Analyze the reciepts in this image", "docs.jpg")
+
+# Print the output
+print(out)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 7a45a177..cd5f9f74 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms"
-version = "3.8.1"
+version = "3.8.2"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py
index fcd67dc6..a8fb119a 100644
--- a/swarms/models/__init__.py
+++ b/swarms/models/__init__.py
@@ -48,6 +48,8 @@ from swarms.models.vip_llava import VipLlavaMultiModal  # noqa: E402
 from swarms.models.llava import LavaMultiModal  # noqa: E402
 from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
 from swarms.models.clipq import CLIPQ  # noqa: E402
+from swarms.models.kosmos_two import Kosmos  # noqa: E402   
+from swarms.models.fuyu import Fuyu  # noqa: E402
 
 # from swarms.models.dalle3 import Dalle3
 # from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402
@@ -79,7 +81,6 @@ __all__ = [
     "Zephyr",
     "BaseMultiModalModel",
     "Idefics",
-    # "Kosmos",
     "Vilt",
     "Nougat",
     "LayoutLMDocumentQA",
@@ -102,9 +103,6 @@ __all__ = [
     "AudioModality",
     "VideoModality",
     "MultimodalData",
-    # "CogAgent",
-    # "ModelScopePipeline",
-    # "ModelScopeAutoModel",
     "TogetherLLM",
     "TimmModel",
     "UltralyticsModel",
@@ -112,4 +110,6 @@ __all__ = [
     "LavaMultiModal",
     "QwenVLMultiModal",
     "CLIPQ",
+    "Kosmos",
+    "Fuyu",
 ]