diff --git a/README.md b/README.md index 217c363c..6da93e39 100644 --- a/README.md +++ b/README.md @@ -407,56 +407,6 @@ print(f"Generated data: {generated_data}") ``` -### `Worker` -The `Worker` is a simple all-in-one agent equipped with an LLM, tools, and RAG for low level tasks. - -✅ Plug in and Play LLM. Utilize any LLM from anywhere and any framework - -✅ Reliable RAG: Utilizes FAISS for efficient RAG but it's modular so you can use any DB. - -✅ Multi-Step Parallel Function Calling: Use any tool - -```python -# Importing necessary modules -import os - -from dotenv import load_dotenv - -from swarms import OpenAIChat, Worker, tool - -# Loading environment variables from .env file -load_dotenv() - -# Retrieving the OpenAI API key from environment variables -api_key = os.getenv("OPENAI_API_KEY") - - -# Create a tool -@tool -def search_api(query: str): - pass - - -# Creating a Worker instance -worker = Worker( - name="My Worker", - role="Worker", - human_in_the_loop=False, - tools=[search_api], - temperature=0.5, - llm=OpenAIChat(openai_api_key=api_key), -) - -# Running the worker with a prompt -out = worker.run("Hello, how are you? Create an image of how your are doing!") - -# Printing the output -print(out) -``` - ------- - - @@ -1189,6 +1139,7 @@ Apache License # Citation Please cite Swarms in your paper or your project if you found it beneficial in any way! Appreciate you. + ```bibtex @misc{swarms, author = {Gomez, Kye}, diff --git a/pyproject.toml b/pyproject.toml index d6cf6396..f148b92c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "5.1.5" +version = "5.1.6" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] diff --git a/swarms/models/kosmos_two.py b/swarms/models/kosmos_two.py index ba03bc54..c8c6b819 100644 --- a/swarms/models/kosmos_two.py +++ b/swarms/models/kosmos_two.py @@ -1,3 +1,4 @@ + import requests from PIL import Image from transformers import AutoModelForVision2Seq, AutoProcessor @@ -13,6 +14,23 @@ def is_overlapping(rect1, rect2): class Kosmos(BaseMultiModalModel): + """A class representing the Kosmos model. + + This model is used for multi-modal tasks such as grounding, referring expression comprehension, + referring expression generation, grounded VQA, grounded image captioning, and more. + + Args: + model_name (str): The name or path of the pre-trained model. + max_new_tokens (int): The maximum number of new tokens to generate. + verbose (bool): Whether to print verbose output. + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Attributes: + max_new_tokens (int): The maximum number of new tokens to generate. + model (AutoModelForVision2Seq): The pre-trained model for vision-to-sequence tasks. + processor (AutoProcessor): The pre-trained processor for vision-to-sequence tasks. + """ def __init__( self, @@ -37,10 +55,10 @@ class Kosmos(BaseMultiModalModel): """Get image from url Args: - url (str): url of image + url (str): The URL of the image. Returns: - _type_: _description_ + PIL.Image: The image object. """ return Image.open(requests.get(url, stream=True).raw) @@ -48,8 +66,8 @@ class Kosmos(BaseMultiModalModel): """Run the model Args: - task (str): task to run - image (str): img url + task (str): The task to run. + image (str): The URL of the image. """ inputs = self.processor( text=task, images=image, return_tensors="pt"