pull/469/head
Kye Gomez 7 months ago
parent 692bd0789e
commit b8381a8d45

@ -407,56 +407,6 @@ print(f"Generated data: {generated_data}")
```
### `Worker`
The `Worker` is a simple all-in-one agent equipped with an LLM, tools, and RAG for low level tasks.
✅ Plug in and Play LLM. Utilize any LLM from anywhere and any framework
✅ Reliable RAG: Utilizes FAISS for efficient RAG but it's modular so you can use any DB.
✅ Multi-Step Parallel Function Calling: Use any tool
```python
# Importing necessary modules
import os
from dotenv import load_dotenv
from swarms import OpenAIChat, Worker, tool
# Loading environment variables from .env file
load_dotenv()
# Retrieving the OpenAI API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")
# Create a tool
@tool
def search_api(query: str):
pass
# Creating a Worker instance
worker = Worker(
name="My Worker",
role="Worker",
human_in_the_loop=False,
tools=[search_api],
temperature=0.5,
llm=OpenAIChat(openai_api_key=api_key),
)
# Running the worker with a prompt
out = worker.run("Hello, how are you? Create an image of how your are doing!")
# Printing the output
print(out)
```
------
@ -1189,6 +1139,7 @@ Apache License
# Citation
Please cite Swarms in your paper or your project if you found it beneficial in any way! Appreciate you.
```bibtex
@misc{swarms,
author = {Gomez, Kye},

@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "swarms"
version = "5.1.5"
version = "5.1.6"
description = "Swarms - Pytorch"
license = "MIT"
authors = ["Kye Gomez <kye@apac.ai>"]

@ -1,3 +1,4 @@
import requests
from PIL import Image
from transformers import AutoModelForVision2Seq, AutoProcessor
@ -13,6 +14,23 @@ def is_overlapping(rect1, rect2):
class Kosmos(BaseMultiModalModel):
"""A class representing the Kosmos model.
This model is used for multi-modal tasks such as grounding, referring expression comprehension,
referring expression generation, grounded VQA, grounded image captioning, and more.
Args:
model_name (str): The name or path of the pre-trained model.
max_new_tokens (int): The maximum number of new tokens to generate.
verbose (bool): Whether to print verbose output.
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Attributes:
max_new_tokens (int): The maximum number of new tokens to generate.
model (AutoModelForVision2Seq): The pre-trained model for vision-to-sequence tasks.
processor (AutoProcessor): The pre-trained processor for vision-to-sequence tasks.
"""
def __init__(
self,
@ -37,10 +55,10 @@ class Kosmos(BaseMultiModalModel):
"""Get image from url
Args:
url (str): url of image
url (str): The URL of the image.
Returns:
_type_: _description_
PIL.Image: The image object.
"""
return Image.open(requests.get(url, stream=True).raw)
@ -48,8 +66,8 @@ class Kosmos(BaseMultiModalModel):
"""Run the model
Args:
task (str): task to run
image (str): img url
task (str): The task to run.
image (str): The URL of the image.
"""
inputs = self.processor(
text=task, images=image, return_tensors="pt"

Loading…
Cancel
Save