from swarms.models.vilt import Vilt
model = Vilt()
output = model(
"What is this image", "http://images.cocodataset.org/val2017/000000039769.jpg"
)