[BUFG][Mistral

pull/377/merge
Kye 11 months ago
parent 1562d25c43
commit 37ea8cc58d

@ -0,0 +1,13 @@
from swarms import Mistral
# Initialize the model
model = Mistral(
model_name="mistralai/Mistral-7B-v0.1",
max_length=500,
use_flash_attention=True,
load_in_4bit=True
)
# Run the model
result = model.run("What is the meaning of life?")

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "swarms"
version = "4.0.5"
version = "4.0.9"
description = "Swarms - Pytorch"
license = "MIT"
authors = ["Kye Gomez <kye@apac.ai>"]

@ -1,13 +1,8 @@
# from swarms.telemetry.main import Telemetry # noqa: E402, F403
from swarms.telemetry.bootup import bootup # noqa: E402, F403
from swarms.telemetry.user_utils import (
get_user_device_data,
) # noqa: E402, F403
bootup()
get_user_device_data()
from swarms.agents import * # noqa: E402, F403
from swarms.structs import * # noqa: E402, F403
from swarms.models import * # noqa: E402, F403

@ -5,7 +5,7 @@ from typing import Optional, Callable, List
import chromadb
from dotenv import load_dotenv
from chromadb.utils.data_loaders import ImageLoader
# from chromadb.utils.data import ImageLoader
from chromadb.utils.embedding_functions import (
OpenCLIPEmbeddingFunction,
)
@ -75,7 +75,7 @@ class ChromaDB:
if data_loader:
self.data_loader = data_loader
else:
self.data_loader = ImageLoader()
self.data_loader = None
# Embedding model
if embedding_function:

@ -2,9 +2,9 @@ import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from swarms.structs.message import Message
from swarms.models.base_llm import AbstractLLM
class Mistral:
class Mistral(AbstractLLM):
"""
Mistral is an all-new llm
@ -38,7 +38,10 @@ class Mistral:
temperature: float = 1.0,
max_length: int = 100,
do_sample: bool = True,
*args,
**kwargs
):
super().__init__()
self.ai_name = ai_name
self.system_prompt = system_prompt
self.model_name = model_name
@ -46,6 +49,7 @@ class Mistral:
self.use_flash_attention = use_flash_attention
self.temperature = temperature
self.max_length = max_length
self.do_sample = do_sample
# Check if the specified device is available
if not torch.cuda.is_available() and device == "cuda":
@ -54,49 +58,18 @@ class Mistral:
" device."
)
# Load the model and tokenizer
self.model = None
self.tokenizer = None
self.load_model()
self.history = []
def load_model(self):
try:
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name
)
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name
)
self.model.to(self.device)
except Exception as e:
raise ValueError(
f"Error loading the Mistral model: {str(e)}"
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name, *args, **kwargs
)
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name, *args, **kwargs
)
self.model.to(self.device)
def run(self, task: str):
"""Run the model on a given task."""
try:
model_inputs = self.tokenizer(
[task], return_tensors="pt"
).to(self.device)
generated_ids = self.model.generate(
**model_inputs,
max_length=self.max_length,
do_sample=self.do_sample,
temperature=self.temperature,
max_new_tokens=self.max_length,
)
output_text = self.tokenizer.batch_decode(generated_ids)[
0
]
return output_text
except Exception as e:
raise ValueError(f"Error running the model: {str(e)}")
def __call__(self, task: str):
def run(self, task: str, *args, **kwargs):
"""Run the model on a given task."""
try:
@ -109,6 +82,7 @@ class Mistral:
do_sample=self.do_sample,
temperature=self.temperature,
max_new_tokens=self.max_length,
**kwargs
)
output_text = self.tokenizer.batch_decode(generated_ids)[
0
@ -158,17 +132,4 @@ class Mistral:
# add error to history
self.history.append(Message("Agent", error_message))
return error_message
def _stream_response(self, response: str = None):
"""
Yield the response token by token (word by word)
Usage:
--------------
for token in _stream_response(response):
print(token)
"""
for token in response.split():
yield token
return error_message
Loading…
Cancel
Save