anthropic + kosmo2 + fastvit

Former-commit-id: 6b4c2d45d3
dockerize
Kye 1 year ago
parent da120e1aef
commit 0c4dd88f98

@ -1,7 +1,6 @@
# !pip install --upgrade swarms==2.0.6 # !pip install --upgrade swarms==2.0.6
from swarms.models import OpenAIChat from swarms.models import OpenAIChat
from swarms.models.nougat import Nougat from swarms.models.nougat import Nougat
from swarms.structs import Flow from swarms.structs import Flow
@ -11,7 +10,10 @@ from swarms.structs.sequential_workflow import SequentialWorkflow
IMAGE_OF_FINANCIAL_DOC_URL = "bank_statement_2.jpg" IMAGE_OF_FINANCIAL_DOC_URL = "bank_statement_2.jpg"
# Example usage # Example usage
api_key = "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" # Your actual API key here api_key = (
"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" # Your actual API key here
)
# Initialize the OCR model # Initialize the OCR model
def ocr_model(img: str): def ocr_model(img: str):
@ -19,12 +21,14 @@ def ocr_model(img: str):
analyze_finance_docs = ocr(img) analyze_finance_docs = ocr(img)
return str(analyze_finance_docs) return str(analyze_finance_docs)
# Initialize the language flow # Initialize the language flow
llm = OpenAIChat( llm = OpenAIChat(
openai_api_key=api_key, openai_api_key=api_key,
temperature=0.5, temperature=0.5,
) )
# Create a prompt for the language model # Create a prompt for the language model
def summary_agent_prompt(analyzed_doc: str): def summary_agent_prompt(analyzed_doc: str):
analyzed_doc = ocr_model(img=analyzed_doc) analyzed_doc = ocr_model(img=analyzed_doc)
@ -36,6 +40,7 @@ def summary_agent_prompt(analyzed_doc: str):
{analyzed_doc} {analyzed_doc}
""" """
# Initialize the Flow with the language flow # Initialize the Flow with the language flow
flow1 = Flow(llm=llm, max_loops=1, dashboard=False) flow1 = Flow(llm=llm, max_loops=1, dashboard=False)
@ -49,7 +54,10 @@ workflow = SequentialWorkflow(max_loops=1)
workflow.add(summary_agent_prompt(IMAGE_OF_FINANCIAL_DOC_URL), flow1) workflow.add(summary_agent_prompt(IMAGE_OF_FINANCIAL_DOC_URL), flow1)
# Suppose the next task takes the output of the first task as input # Suppose the next task takes the output of the first task as input
workflow.add("Provide an actionable step by step plan on how to cut costs from the analyzed financial document.", flow2) workflow.add(
"Provide an actionable step by step plan on how to cut costs from the analyzed financial document.",
flow2,
)
# Run the workflow # Run the workflow
workflow.run() workflow.run()

@ -1,9 +1,7 @@
from swarms.models.anthropic import Anthropic from swarms.models.anthropic import Anthropic
model = Anthropic( model = Anthropic(anthropic_api_key="")
anthropic_api_key=""
)
task = "Say hello to" task = "Say hello to"

File diff suppressed because it is too large Load Diff

@ -0,0 +1,80 @@
import json
import os
from typing import List
import numpy as np
import timm
import torch
from PIL import Image
from pydantic import BaseModel, StrictFloat, StrictInt, validator
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the classes for image classification
with open(os.path.join(os.path.dirname(__file__), "fast_vit_classes.json")) as f:
FASTVIT_IMAGENET_1K_CLASSES = json.load(f)
class ClassificationResult(BaseModel):
class_id: List[StrictInt]
confidence: List[StrictFloat]
@validator("class_id", "confidence", pre=True, each_item=True)
def check_list_contents(cls, v):
assert isinstance(v, int) or isinstance(v, float), "must be integer or float"
return v
class FastViT:
"""
FastViT model for image classification
Args:
img (str): path to the input image
confidence_threshold (float): confidence threshold for the model's predictions
Returns:
ClassificationResult: a pydantic BaseModel containing the class ids and confidences of the model's predictions
Example:
>>> fastvit = FastViT()
>>> result = fastvit(img="path_to_image.jpg", confidence_threshold=0.5)
To use, create a json file called: fast_vit_classes.json
"""
def __init__(self):
self.model = timm.create_model(
"hf_hub:timm/fastvit_s12.apple_in1k", pretrained=True
).to(DEVICE)
data_config = timm.data.resolve_model_data_config(self.model)
self.transforms = timm.data.create_transform(**data_config, is_training=False)
self.model.eval()
def __call__(
self, img: str, confidence_threshold: float = 0.5
) -> ClassificationResult:
"""classifies the input image and returns the top k classes and their probabilities"""
img = Image.open(img).convert("RGB")
img_tensor = self.transforms(img).unsqueeze(0).to(DEVICE)
with torch.no_grad():
output = self.model(img_tensor)
probabilities = torch.nn.functional.softmax(output, dim=1)
# Get top k classes and their probabilities
top_probs, top_classes = torch.topk(
probabilities, k=FASTVIT_IMAGENET_1K_CLASSES
)
# Filter by confidence threshold
mask = top_probs > confidence_threshold
top_probs, top_classes = top_probs[mask], top_classes[mask]
# Convert to Python lists and map class indices to labels if needed
top_probs = top_probs.cpu().numpy().tolist()
top_classes = top_classes.cpu().numpy().tolist()
# top_class_labels = [FASTVIT_IMAGENET_1K_CLASSES[i] for i in top_classes] # Uncomment if class labels are needed
return ClassificationResult(class_id=top_classes, confidence=top_probs)

@ -0,0 +1,100 @@
from typing import List, Tuple
import numpy as np
from PIL import Image
from pydantic import BaseModel, root_validator, validator
from transformers import AutoModelForVision2Seq, AutoProcessor
# Assuming the Detections class represents the output of the model prediction
class Detections(BaseModel):
xyxy: List[Tuple[float, float, float, float]]
class_id: List[int]
confidence: List[float]
@root_validator
def check_length(cls, values):
assert (
len(values.get("xyxy"))
== len(values.get("class_id"))
== len(values.get("confidence"))
), "All fields must have the same length."
return values
@validator("xyxy", "class_id", "confidence", pre=True, each_item=True)
def check_not_empty(cls, v):
if isinstance(v, list) and len(v) == 0:
raise ValueError("List must not be empty")
return v
@classmethod
def empty(cls):
return cls(xyxy=[], class_id=[], confidence=[])
class Kosmos2(BaseModel):
model: AutoModelForVision2Seq
processor: AutoProcessor
@classmethod
def initialize(cls):
model = AutoModelForVision2Seq.from_pretrained(
"ydshieh/kosmos-2-patch14-224", trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(
"ydshieh/kosmos-2-patch14-224", trust_remote_code=True
)
return cls(model=model, processor=processor)
def __call__(self, img: str) -> Detections:
image = Image.open(img)
prompt = "<grounding>An image of"
inputs = self.processor(text=prompt, images=image, return_tensors="pt")
outputs = self.model.generate(**inputs, use_cache=True, max_new_tokens=64)
generated_text = self.processor.batch_decode(outputs, skip_special_tokens=True)[
0
]
# The actual processing of generated_text to entities would go here
# For the purpose of this example, assume a mock function 'extract_entities' exists:
entities = self.extract_entities(generated_text)
# Convert entities to detections format
detections = self.process_entities_to_detections(entities, image)
return detections
def extract_entities(
self, text: str
) -> List[Tuple[str, Tuple[float, float, float, float]]]:
# Placeholder function for entity extraction
# This should be replaced with the actual method of extracting entities
return []
def process_entities_to_detections(
self,
entities: List[Tuple[str, Tuple[float, float, float, float]]],
image: Image.Image,
) -> Detections:
if not entities:
return Detections.empty()
class_ids = [0] * len(entities) # Replace with actual class ID extraction logic
xyxys = [
(
e[1][0] * image.width,
e[1][1] * image.height,
e[1][2] * image.width,
e[1][3] * image.height,
)
for e in entities
]
confidences = [1.0] * len(entities) # Placeholder confidence
return Detections(xyxy=xyxys, class_id=class_ids, confidence=confidences)
# Usage:
# kosmos2 = Kosmos2.initialize()
# detections = kosmos2(img="path_to_image.jpg")

@ -286,12 +286,14 @@ class SequentialWorkflow:
) )
) )
task = Task(description=task, flow=kwargs["flow"], args=list(kwargs["args"]), kwargs=kwargs["kwargs"]) task = Task(
description=task,
flow=kwargs["flow"],
args=list(kwargs["args"]),
kwargs=kwargs["kwargs"],
)
self.tasks.append(task) self.tasks.append(task)
def load_workflow_state(self, filepath: str = None, **kwargs) -> None: def load_workflow_state(self, filepath: str = None, **kwargs) -> None:
""" """
Loads the workflow state from a json file and restores the workflow state. Loads the workflow state from a json file and restores the workflow state.

Loading…
Cancel
Save