From 37ea8cc58dcc612f9e35048a7ad454835b681a68 Mon Sep 17 00:00:00 2001
From: Kye <kye@apacmediasolutions.com>
Date: Mon, 5 Feb 2024 09:59:28 -0800
Subject: [PATCH] [BUFG][Mistral

---
 playground/models/miqu.py  | 13 +++++++
 pyproject.toml             |  2 +-
 swarms/__init__.py         |  5 ---
 swarms/memory/chroma_db.py |  4 +--
 swarms/models/mistral.py   | 73 +++++++++-----------------------------
 5 files changed, 33 insertions(+), 64 deletions(-)
 create mode 100644 playground/models/miqu.py

diff --git a/playground/models/miqu.py b/playground/models/miqu.py
new file mode 100644
index 00000000..f6518a5f
--- /dev/null
+++ b/playground/models/miqu.py
@@ -0,0 +1,13 @@
+from swarms import Mistral
+
+
+# Initialize the model
+model = Mistral(
+    model_name="mistralai/Mistral-7B-v0.1",
+    max_length=500,
+    use_flash_attention=True,
+    load_in_4bit=True
+)
+
+# Run the model
+result = model.run("What is the meaning of life?")
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 6e3cdc7f..5128ab33 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms"
-version = "4.0.5"
+version = "4.0.9"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
diff --git a/swarms/__init__.py b/swarms/__init__.py
index 2a682f5b..3a28d980 100644
--- a/swarms/__init__.py
+++ b/swarms/__init__.py
@@ -1,13 +1,8 @@
 # from swarms.telemetry.main import Telemetry  # noqa: E402, F403
 from swarms.telemetry.bootup import bootup  # noqa: E402, F403
-from swarms.telemetry.user_utils import (
-    get_user_device_data,
-)  # noqa: E402, F403
 
 bootup()
 
-get_user_device_data()
-
 from swarms.agents import *  # noqa: E402, F403
 from swarms.structs import *  # noqa: E402, F403
 from swarms.models import *  # noqa: E402, F403
diff --git a/swarms/memory/chroma_db.py b/swarms/memory/chroma_db.py
index 3d355b4f..155acf43 100644
--- a/swarms/memory/chroma_db.py
+++ b/swarms/memory/chroma_db.py
@@ -5,7 +5,7 @@ from typing import Optional, Callable, List
 
 import chromadb
 from dotenv import load_dotenv
-from chromadb.utils.data_loaders import ImageLoader
+# from chromadb.utils.data import ImageLoader
 from chromadb.utils.embedding_functions import (
     OpenCLIPEmbeddingFunction,
 )
@@ -75,7 +75,7 @@ class ChromaDB:
         if data_loader:
             self.data_loader = data_loader
         else:
-            self.data_loader = ImageLoader()
+            self.data_loader = None
 
         # Embedding model
         if embedding_function:
diff --git a/swarms/models/mistral.py b/swarms/models/mistral.py
index aeeb37a8..d0146ef5 100644
--- a/swarms/models/mistral.py
+++ b/swarms/models/mistral.py
@@ -2,9 +2,9 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from swarms.structs.message import Message
+from swarms.models.base_llm import AbstractLLM
 
-
-class Mistral:
+class Mistral(AbstractLLM):
     """
     Mistral is an all-new llm
 
@@ -38,7 +38,10 @@ class Mistral:
         temperature: float = 1.0,
         max_length: int = 100,
         do_sample: bool = True,
+        *args,
+        **kwargs
     ):
+        super().__init__()
         self.ai_name = ai_name
         self.system_prompt = system_prompt
         self.model_name = model_name
@@ -46,6 +49,7 @@ class Mistral:
         self.use_flash_attention = use_flash_attention
         self.temperature = temperature
         self.max_length = max_length
+        self.do_sample = do_sample
 
         # Check if the specified device is available
         if not torch.cuda.is_available() and device == "cuda":
@@ -54,49 +58,18 @@ class Mistral:
                 " device."
             )
 
-        # Load the model and tokenizer
-        self.model = None
-        self.tokenizer = None
-        self.load_model()
-
         self.history = []
 
-    def load_model(self):
-        try:
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_name
-            )
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_name
-            )
-            self.model.to(self.device)
-        except Exception as e:
-            raise ValueError(
-                f"Error loading the Mistral model: {str(e)}"
-            )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name, *args, **kwargs
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name, *args, **kwargs
+        )
+        
+        self.model.to(self.device)
 
-    def run(self, task: str):
-        """Run the model on a given task."""
-
-        try:
-            model_inputs = self.tokenizer(
-                [task], return_tensors="pt"
-            ).to(self.device)
-            generated_ids = self.model.generate(
-                **model_inputs,
-                max_length=self.max_length,
-                do_sample=self.do_sample,
-                temperature=self.temperature,
-                max_new_tokens=self.max_length,
-            )
-            output_text = self.tokenizer.batch_decode(generated_ids)[
-                0
-            ]
-            return output_text
-        except Exception as e:
-            raise ValueError(f"Error running the model: {str(e)}")
-
-    def __call__(self, task: str):
+    def run(self, task: str, *args, **kwargs):
         """Run the model on a given task."""
 
         try:
@@ -109,6 +82,7 @@ class Mistral:
                 do_sample=self.do_sample,
                 temperature=self.temperature,
                 max_new_tokens=self.max_length,
+                **kwargs
             )
             output_text = self.tokenizer.batch_decode(generated_ids)[
                 0
@@ -158,17 +132,4 @@ class Mistral:
             # add error to history
             self.history.append(Message("Agent", error_message))
 
-            return error_message
-
-    def _stream_response(self, response: str = None):
-        """
-        Yield the response token by token (word by word)
-
-        Usage:
-        --------------
-        for token in _stream_response(response):
-            print(token)
-
-        """
-        for token in response.split():
-            yield token
+            return error_message
\ No newline at end of file