commit
cf93dd5b1b
@ -0,0 +1,97 @@
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
# Import the models, structs, and telemetry modules
|
||||
from swarms import (
|
||||
Gemini,
|
||||
GPT4VisionAPI,
|
||||
Mixtral,
|
||||
OpenAI,
|
||||
ToolAgent,
|
||||
BlocksList,
|
||||
)
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Get the environment variables
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
||||
|
||||
# Tool Agent
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"databricks/dolly-v2-12b"
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-12b")
|
||||
json_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "number"},
|
||||
"is_student": {"type": "boolean"},
|
||||
"courses": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
}
|
||||
toolagent = ToolAgent(
|
||||
model=model, tokenizer=tokenizer, json_schema=json_schema
|
||||
)
|
||||
|
||||
# Blocks List which enables you to build custom swarms by adding classes or functions
|
||||
swarm = BlocksList(
|
||||
"SocialMediaSwarm",
|
||||
"A swarm of social media agents",
|
||||
[
|
||||
OpenAI(openai_api_key=openai_api_key),
|
||||
Mixtral(),
|
||||
GPT4VisionAPI(openai_api_key=openai_api_key),
|
||||
Gemini(gemini_api_key=gemini_api_key),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# Add the new block to the swarm
|
||||
swarm.add(toolagent)
|
||||
|
||||
# Remove a block from the swarm
|
||||
swarm.remove(toolagent)
|
||||
|
||||
# Update a block in the swarm
|
||||
swarm.update(toolagent)
|
||||
|
||||
# Get a block at a specific index
|
||||
block_at_index = swarm.get(0)
|
||||
|
||||
# Get all blocks in the swarm
|
||||
all_blocks = swarm.get_all()
|
||||
|
||||
# Get blocks by name
|
||||
openai_blocks = swarm.get_by_name("OpenAI")
|
||||
|
||||
# Get blocks by type
|
||||
gpt4_blocks = swarm.get_by_type("GPT4VisionAPI")
|
||||
|
||||
# Get blocks by ID
|
||||
block_by_id = swarm.get_by_id(toolagent.id)
|
||||
|
||||
# Get blocks by parent
|
||||
blocks_by_parent = swarm.get_by_parent(swarm)
|
||||
|
||||
# Get blocks by parent ID
|
||||
blocks_by_parent_id = swarm.get_by_parent_id(swarm.id)
|
||||
|
||||
# Get blocks by parent name
|
||||
blocks_by_parent_name = swarm.get_by_parent_name(swarm.name)
|
||||
|
||||
# Get blocks by parent type
|
||||
blocks_by_parent_type = swarm.get_by_parent_type(type(swarm).__name__)
|
||||
|
||||
# Get blocks by parent description
|
||||
blocks_by_parent_description = swarm.get_by_parent_description(
|
||||
swarm.description
|
||||
)
|
||||
|
||||
# Run the block in the swarm
|
||||
inference = swarm.run_block(toolagent, "Hello World")
|
||||
print(inference)
|
@ -0,0 +1,129 @@
|
||||
import torch
|
||||
from PIL import Image
|
||||
from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||
from swarms.models.base_multimodal_model import BaseMultiModalModel
|
||||
|
||||
device_check = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
|
||||
class CogAgent(BaseMultiModalModel):
|
||||
"""CogAgent
|
||||
|
||||
Multi-modal conversational agent that can be used to chat with
|
||||
images and text. It is based on the CogAgent model from the
|
||||
ModelScope library.
|
||||
|
||||
Attributes:
|
||||
model_name (str): The name of the model to be used
|
||||
tokenizer_name (str): The name of the tokenizer to be used
|
||||
dtype (torch.bfloat16): The data type to be used
|
||||
low_cpu_mem_usage (bool): Whether to use low CPU memory
|
||||
load_in_4bit (bool): Whether to load in 4-bit
|
||||
trust_remote_code (bool): Whether to trust remote code
|
||||
device (str): The device to be used
|
||||
|
||||
Examples:
|
||||
>>> from swarms.models.cog_agent import CogAgent
|
||||
>>> cog_agent = CogAgent()
|
||||
>>> cog_agent.run("How are you?", "images/1.jpg")
|
||||
<s> I'm fine. How are you? </s>
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str = "ZhipuAI/cogagent-chat",
|
||||
tokenizer_name: str = "I-ModelScope/vicuna-7b-v1.5",
|
||||
dtype=torch.bfloat16,
|
||||
low_cpu_mem_usage: bool = True,
|
||||
load_in_4bit: bool = True,
|
||||
trust_remote_code: bool = True,
|
||||
device=device_check,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__()
|
||||
self.model_name = model_name
|
||||
self.tokenizer_name = tokenizer_name
|
||||
self.dtype = dtype
|
||||
self.low_cpu_mem_usage = low_cpu_mem_usage
|
||||
self.load_in_4bit = load_in_4bit
|
||||
self.trust_remote_code = trust_remote_code
|
||||
self.device = device
|
||||
|
||||
self.model = (
|
||||
AutoModelForCausalLM.from_pretrained(
|
||||
self.model_name,
|
||||
torch_dtype=self.dtype,
|
||||
low_cpu_mem_usage=self.low_cpu_mem_usage,
|
||||
load_in_4bit=self.load_in_4bit,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
.to(self.device)
|
||||
.eval()
|
||||
)
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||
self.tokenizer_name
|
||||
)
|
||||
|
||||
def run(self, task: str, img: str, *args, **kwargs):
|
||||
"""Run the model
|
||||
|
||||
Args:
|
||||
task (str): The task to be performed
|
||||
img (str): The image path
|
||||
|
||||
"""
|
||||
image = Image.open(img).convert("RGB")
|
||||
|
||||
input_by_model = self.model.build_conversation_input_ids(
|
||||
self.tokenizer,
|
||||
query=task,
|
||||
history=[],
|
||||
images=[image],
|
||||
)
|
||||
|
||||
inputs = {
|
||||
"input_ids": (
|
||||
input_by_model["input_ids"]
|
||||
.unsqueeze(0)
|
||||
.to(self.device)
|
||||
),
|
||||
"token_type_ids": (
|
||||
input_by_model["token_type_ids"]
|
||||
.unsqueeze(0)
|
||||
.to(self.device)
|
||||
),
|
||||
"attention_mask": (
|
||||
input_by_model["attention_mask"]
|
||||
.unsqueeze(0)
|
||||
.to(self.device)
|
||||
),
|
||||
"images": [
|
||||
[
|
||||
input_by_model["images"][0]
|
||||
.to(self.device)
|
||||
.to(self.dtype)
|
||||
]
|
||||
],
|
||||
}
|
||||
if (
|
||||
"cross_images" in input_by_model
|
||||
and input_by_model["cross_images"]
|
||||
):
|
||||
inputs["cross_images"] = [
|
||||
[
|
||||
input_by_model["cross_images"][0]
|
||||
.to(self.device)
|
||||
.to(self.dtype)
|
||||
]
|
||||
]
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = self.model(**inputs, **kwargs)
|
||||
outputs = outputs[:, inputs["input_ids"].shape[1] :]
|
||||
response = self.decode(outputs[0])
|
||||
response = response.split("</s>")[0]
|
||||
print(response)
|
@ -0,0 +1,8 @@
|
||||
from swarms.utils.disable_logging import disable_logging
|
||||
from swarms.telemetry.auto_upgrade_swarms import auto_update
|
||||
|
||||
|
||||
def bootup():
|
||||
"""Bootup swarms"""
|
||||
disable_logging()
|
||||
auto_update()
|
Loading…
Reference in new issue