From b54626f98c3c4c52f05d0fd69fe495b7f431f109 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 17 Dec 2023 21:30:01 -0500 Subject: [PATCH] [DEMO][Gemini VCOT] --- .../gemini_vcot.py | 29 +++++++++++++++++++ swarms/models/gemini.py | 8 ++--- 2 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 playground/demos/multi_modal_chain_of_thought/gemini_vcot.py diff --git a/playground/demos/multi_modal_chain_of_thought/gemini_vcot.py b/playground/demos/multi_modal_chain_of_thought/gemini_vcot.py new file mode 100644 index 00000000..1690d8fe --- /dev/null +++ b/playground/demos/multi_modal_chain_of_thought/gemini_vcot.py @@ -0,0 +1,29 @@ +import os + +from dotenv import load_dotenv + +from swarms.models import Gemini +from swarms.prompts.visual_cot import VISUAL_CHAIN_OF_THOUGHT + +# Load the environment variables +load_dotenv() + +# Get the API key from the environment +api_key = os.environ.get("GEMINI_API_KEY") + +# Initialize the language model +llm = Gemini( + gemini_api_key=api_key, + temperature=0.5, + max_tokens=1000, + system_prompt=VISUAL_CHAIN_OF_THOUGHT + +) + +# Initialize the task +task = "This is an eye test. What do you see?" +img = "playground/demos/multi_modal_chain_of_thought/eyetest.jpg" + +# Run the workflow on a task +out = llm.run(task=task, img=img) +print(out) diff --git a/swarms/models/gemini.py b/swarms/models/gemini.py index 9e8ea8f9..9ac38576 100644 --- a/swarms/models/gemini.py +++ b/swarms/models/gemini.py @@ -49,7 +49,7 @@ class Gemini(BaseMultiModalModel): stream (bool, optional): _description_. Defaults to False. candidate_count (int, optional): _description_. Defaults to 1. stop_sequence ([type], optional): _description_. Defaults to ['x']. - max_output_tokens (int, optional): _description_. Defaults to 100. + max_tokens (int, optional): _description_. Defaults to 100. temperature (float, optional): _description_. Defaults to 0.9. Methods: @@ -80,7 +80,7 @@ class Gemini(BaseMultiModalModel): stream: bool = False, candidate_count: int = 1, stop_sequence=["x"], - max_output_tokens: int = 100, + max_tokens: int = 100, temperature: float = 0.9, system_prompt: str = None, *args, @@ -94,7 +94,7 @@ class Gemini(BaseMultiModalModel): self.stream = stream self.candidate_count = candidate_count self.stop_sequence = stop_sequence - self.max_output_tokens = max_output_tokens + self.max_tokens = max_tokens self.temperature = temperature self.system_prompt = system_prompt @@ -102,7 +102,7 @@ class Gemini(BaseMultiModalModel): self.generation_config = GenerationConfig( candidate_count=candidate_count, # stop_sequence=stop_sequence, - max_output_tokens=max_output_tokens, + max_tokens=max_tokens, temperature=temperature, *args, **kwargs,