diff --git a/example.py b/example.py index 6c27bceb..74e79812 100644 --- a/example.py +++ b/example.py @@ -1,6 +1,8 @@ from swarms.models import OpenAIChat from swarms.structs import Flow +from langchain.schema.messages import ChatMessage +message = ChatMessage(role="user", content='Translate the following English text to French: Hello World"') api_key = "" # Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC @@ -8,6 +10,7 @@ llm = OpenAIChat( # model_name="gpt-4" openai_api_key=api_key, temperature=0.5, + message = message # max_tokens=100, ) @@ -15,7 +18,7 @@ llm = OpenAIChat( ## Initialize the workflow flow = Flow( llm=llm, - max_loops=5, + max_loops=2, dashboard=True, # tools = [search_api, slack, ] # stopping_condition=None, # You can define a stopping condition as needed. @@ -29,9 +32,7 @@ flow = Flow( # out = flow.load_state("flow_state.json") # temp = flow.dynamic_temperature() # filter = flow.add_response_filter("Trump") -out = flow.run( - "Generate a 10,000 word blog on mental clarity and the benefits of meditation." -) +out = flow.run(message) # out = flow.validate_response(out) # out = flow.analyze_feedback(out) # out = flow.print_history_and_memory() diff --git a/playground/posmed/__pycache__/PosMedPrompts.cpython-310.pyc b/playground/posmed/__pycache__/PosMedPrompts.cpython-310.pyc deleted file mode 100644 index 4bcfbb74..00000000 Binary files a/playground/posmed/__pycache__/PosMedPrompts.cpython-310.pyc and /dev/null differ diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index 30a37a57..a381f5f8 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -1,13 +1,14 @@ import sys -log_file = open("errors.txt", "w") -sys.stderr = log_file +# log_file = open("errors.txt", "w") +# sys.stderr = log_file + # LLMs from swarms.models.anthropic import Anthropic from swarms.models.petals import Petals from swarms.models.mistral import Mistral -from swarms.models.openai_models import OpenAI, AzureOpenAI, OpenAIChat +from swarms.models.openai_models import OpenAIChat, AzureOpenAI, OpenAI from swarms.models.zephyr import Zephyr from swarms.models.biogpt import BioGPT from swarms.models.huggingface import HuggingfaceLLM diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py index 6793d0fa..b227cc07 100644 --- a/swarms/models/openai_models.py +++ b/swarms/models/openai_models.py @@ -1,6 +1,6 @@ -"""OpenAI chat wrapper.""" from __future__ import annotations +"""OpenAI chat wrapper.""" import logging import os import sys @@ -72,7 +72,7 @@ def _import_tiktoken() -> Any: def _create_retry_decorator( - llm: ChatOpenAI, + llm: OpenAIChat, run_manager: Optional[ Union[AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun] ] = None, @@ -92,7 +92,7 @@ def _create_retry_decorator( async def acompletion_with_retry( - llm: ChatOpenAI, + llm: OpenAIChat, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> Any: @@ -139,8 +139,91 @@ def _convert_delta_to_message_chunk( else: return default_class(content=content) +class OpenAI(BaseChatModel): + """OpenAI large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class.., + + Example: + .. code-block:: python + + from swarms.models import OpenAI + openai = OpenAI(model_name="text-davinci-003") + openai("What is the report on the 2022 oympian games?") + """ + + @property + def _invocation_params(self) -> Dict[str, Any]: + return {**{"model": self.model_name}, **super()._invocation_params} + + +class AzureOpenAI(BaseChatModel): + """Azure-specific OpenAI large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from swarms.models import AzureOpenAI + openai = AzureOpenAI(model_name="text-davinci-003") + """ + + deployment_name: str = "" + """Deployment name to use.""" + openai_api_type: str = "" + openai_api_version: str = "" + + @root_validator() + def validate_azure_settings(cls, values: Dict) -> Dict: + values["openai_api_version"] = get_from_dict_or_env( + values, + "openai_api_version", + "OPENAI_API_VERSION", + ) + values["openai_api_type"] = get_from_dict_or_env( + values, "openai_api_type", "OPENAI_API_TYPE", "azure" + ) + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return { + **{"deployment_name": self.deployment_name}, + **super()._identifying_params, + } + + @property + def _invocation_params(self) -> Dict[str, Any]: + openai_params = { + "engine": self.deployment_name, + "api_type": self.openai_api_type, + "api_version": self.openai_api_version, + } + return {**openai_params, **super()._invocation_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "azure" + + @property + def lc_attributes(self) -> Dict[str, Any]: + return { + "openai_api_type": self.openai_api_type, + "openai_api_version": self.openai_api_version, + } + -class ChatOpenAI(BaseChatModel): +class OpenAIChat(BaseChatModel): """`OpenAI` Chat large language models API. To use, you should have the ``openai`` python package installed, and the diff --git a/swarms/structs/flow.py b/swarms/structs/flow.py index 6e0a0c50..0cb1d5d3 100644 --- a/swarms/structs/flow.py +++ b/swarms/structs/flow.py @@ -517,7 +517,7 @@ class Flow: def _run(self, **kwargs: Any) -> str: """Generate a result using the provided keyword args.""" task = self.format_prompt(**kwargs) - response, history = self._generate(task, task) + response, history = self.llm._generate([ task, task ]) logging.info(f"Message history: {history}") return response diff --git a/swarms/structs/self._generate b/swarms/structs/self._generate new file mode 100644 index 00000000..e69de29b diff --git a/swarms/tools/autogpt.py b/swarms/tools/autogpt.py deleted file mode 100644 index cf5450e6..00000000 --- a/swarms/tools/autogpt.py +++ /dev/null @@ -1,188 +0,0 @@ -import asyncio -import os -from contextlib import contextmanager -from typing import Optional - -import pandas as pd -import torch -from langchain.agents import tool -from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent -from langchain.chains.qa_with_sources.loading import ( - BaseCombineDocumentsChain, -) -from langchain.docstore.document import Document -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.tools import BaseTool -from PIL import Image -from pydantic import Field -from transformers import ( - BlipForQuestionAnswering, - BlipProcessor, -) - -from swarms.utils.logger import logger - -ROOT_DIR = "./data/" - - -@contextmanager -def pushd(new_dir): - """Context manager for changing the current working directory.""" - prev_dir = os.getcwd() - os.chdir(new_dir) - try: - yield - finally: - os.chdir(prev_dir) - - -@tool -def process_csv( - llm, csv_file_path: str, instructions: str, output_path: Optional[str] = None -) -> str: - """Process a CSV by with pandas in a limited REPL.\ - Only use this after writing data to disk as a csv file.\ - Any figures must be saved to disk to be viewed by the human.\ - Instructions should be written in natural language, not code. Assume the dataframe is already loaded.""" - with pushd(ROOT_DIR): - try: - df = pd.read_csv(csv_file_path) - except Exception as e: - return f"Error: {e}" - agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=False) - if output_path is not None: - instructions += f" Save output to disk at {output_path}" - try: - result = agent.run(instructions) - return result - except Exception as e: - return f"Error: {e}" - - -async def async_load_playwright(url: str) -> str: - """Load the specified URLs using Playwright and parse using BeautifulSoup.""" - from bs4 import BeautifulSoup - from playwright.async_api import async_playwright - - results = "" - async with async_playwright() as p: - browser = await p.chromium.launch(headless=True) - try: - page = await browser.new_page() - await page.goto(url) - - page_source = await page.content() - soup = BeautifulSoup(page_source, "html.parser") - - for script in soup(["script", "style"]): - script.extract() - - text = soup.get_text() - lines = (line.strip() for line in text.splitlines()) - chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) - results = "\n".join(chunk for chunk in chunks if chunk) - except Exception as e: - results = f"Error: {e}" - await browser.close() - return results - - -def run_async(coro): - event_loop = asyncio.get_event_loop() - return event_loop.run_until_complete(coro) - - -@tool -def browse_web_page(url: str) -> str: - """Verbose way to scrape a whole webpage. Likely to cause issues parsing.""" - return run_async(async_load_playwright(url)) - - -def _get_text_splitter(): - return RecursiveCharacterTextSplitter( - # Set a really small chunk size, just to show. - chunk_size=500, - chunk_overlap=20, - length_function=len, - ) - - -class WebpageQATool(BaseTool): - name = "query_webpage" - description = ( - "Browse a webpage and retrieve the information relevant to the question." - ) - text_splitter: RecursiveCharacterTextSplitter = Field( - default_factory=_get_text_splitter - ) - qa_chain: BaseCombineDocumentsChain - - def _run(self, url: str, question: str) -> str: - """Useful for browsing websites and scraping the text information.""" - result = browse_web_page.run(url) - docs = [Document(page_content=result, metadata={"source": url})] - web_docs = self.text_splitter.split_documents(docs) - results = [] - # TODO: Handle this with a MapReduceChain - for i in range(0, len(web_docs), 4): - input_docs = web_docs[i : i + 4] - window_result = self.qa_chain( - {"input_documents": input_docs, "question": question}, - return_only_outputs=True, - ) - results.append(f"Response from window {i} - {window_result}") - results_docs = [ - Document(page_content="\n".join(results), metadata={"source": url}) - ] - return self.qa_chain( - {"input_documents": results_docs, "question": question}, - return_only_outputs=True, - ) - - async def _arun(self, url: str, question: str) -> str: - raise NotImplementedError - - -class EdgeGPTTool: - # Initialize the custom tool - def __init__( - self, - model, - name="EdgeGPTTool", - description="Tool that uses EdgeGPTModel to generate responses", - ): - super().__init__(name=name, description=description) - self.model = model - - def _run(self, prompt): - return self.model.__call__(prompt) - - -@tool -def VQAinference(self, inputs): - """ - Answer Question About The Image, VQA Multi-Modal Worker agent - description="useful when you need an answer for a question based on an image. " - "like: what is the background color of the last image, how many cats in this figure, what is in this figure. " - "The input to this tool should be a comma separated string of two, representing the image_path and the question", - - """ - device = "cuda:0" - torch_dtype = torch.float16 if "cuda" in device else torch.float32 - processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") - model = BlipForQuestionAnswering.from_pretrained( - "Salesforce/blip-vqa-base", torch_dtype=torch_dtype - ).to(device) - - image_path, question = inputs.split(",") - raw_image = Image.open(image_path).convert("RGB") - inputs = processor(raw_image, question, return_tensors="pt").to(device, torch_dtype) - out = model.generate(**inputs) - answer = processor.decode(out[0], skip_special_tokens=True) - - logger.debug( - f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input" - f" Question: {question}, Output Answer: {answer}" - ) - - return answer diff --git a/swarms/tools/mm_models.py b/swarms/tools/mm_models.py deleted file mode 100644 index 58fe11e5..00000000 --- a/swarms/tools/mm_models.py +++ /dev/null @@ -1,271 +0,0 @@ -import os -import uuid - -import numpy as np -import torch -from diffusers import ( - EulerAncestralDiscreteScheduler, - StableDiffusionInpaintPipeline, - StableDiffusionInstructPix2PixPipeline, - StableDiffusionPipeline, -) -from PIL import Image -from transformers import ( - BlipForConditionalGeneration, - BlipForQuestionAnswering, - BlipProcessor, - CLIPSegForImageSegmentation, - CLIPSegProcessor, -) - -from swarms.prompts.prebuild.multi_modal_prompts import IMAGE_PROMPT -from swarms.tools.tool import tool -from swarms.utils.logger import logger -from swarms.utils.main import BaseHandler, get_new_image_name - - -class MaskFormer: - def __init__(self, device): - print("Initializing MaskFormer to %s" % device) - self.device = device - self.processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") - self.model = CLIPSegForImageSegmentation.from_pretrained( - "CIDAS/clipseg-rd64-refined" - ).to(device) - - def inference(self, image_path, text): - threshold = 0.5 - min_area = 0.02 - padding = 20 - original_image = Image.open(image_path) - image = original_image.resize((512, 512)) - inputs = self.processor( - text=text, images=image, padding="max_length", return_tensors="pt" - ).to(self.device) - with torch.no_grad(): - outputs = self.model(**inputs) - mask = torch.sigmoid(outputs[0]).squeeze().cpu().numpy() > threshold - area_ratio = len(np.argwhere(mask)) / (mask.shape[0] * mask.shape[1]) - if area_ratio < min_area: - return None - true_indices = np.argwhere(mask) - mask_array = np.zeros_like(mask, dtype=bool) - for idx in true_indices: - padded_slice = tuple( - slice(max(0, i - padding), i + padding + 1) for i in idx - ) - mask_array[padded_slice] = True - visual_mask = (mask_array * 255).astype(np.uint8) - image_mask = Image.fromarray(visual_mask) - return image_mask.resize(original_image.size) - - -class ImageEditing: - def __init__(self, device): - print("Initializing ImageEditing to %s" % device) - self.device = device - self.mask_former = MaskFormer(device=self.device) - self.revision = "fp16" if "cuda" in device else None - self.torch_dtype = torch.float16 if "cuda" in device else torch.float32 - self.inpaint = StableDiffusionInpaintPipeline.from_pretrained( - "runwayml/stable-diffusion-inpainting", - revision=self.revision, - torch_dtype=self.torch_dtype, - ).to(device) - - @tool( - name="Remove Something From The Photo", - description=( - "useful when you want to remove and object or something from the photo " - "from its description or location. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the object need to be removed. " - ), - ) - def inference_remove(self, inputs): - image_path, to_be_removed_txt = inputs.split(",") - return self.inference_replace(f"{image_path},{to_be_removed_txt},background") - - @tool( - name="Replace Something From The Photo", - description=( - "useful when you want to replace an object from the object description or" - " location with another object from its description. The input to this tool" - " should be a comma separated string of three, representing the image_path," - " the object to be replaced, the object to be replaced with " - ), - ) - def inference_replace(self, inputs): - image_path, to_be_replaced_txt, replace_with_txt = inputs.split(",") - original_image = Image.open(image_path) - original_size = original_image.size - mask_image = self.mask_former.inference(image_path, to_be_replaced_txt) - updated_image = self.inpaint( - prompt=replace_with_txt, - image=original_image.resize((512, 512)), - mask_image=mask_image.resize((512, 512)), - ).images[0] - updated_image_path = get_new_image_name( - image_path, func_name="replace-something" - ) - updated_image = updated_image.resize(original_size) - updated_image.save(updated_image_path) - - logger.debug( - f"\nProcessed ImageEditing, Input Image: {image_path}, Replace" - f" {to_be_replaced_txt} to {replace_with_txt}, Output Image:" - f" {updated_image_path}" - ) - - return updated_image_path - - -class InstructPix2Pix: - def __init__(self, device): - print("Initializing InstructPix2Pix to %s" % device) - self.device = device - self.torch_dtype = torch.float16 if "cuda" in device else torch.float32 - self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( - "timbrooks/instruct-pix2pix", - safety_checker=None, - torch_dtype=self.torch_dtype, - ).to(device) - self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config( - self.pipe.scheduler.config - ) - - @tool( - name="Instruct Image Using Text", - description=( - "useful when you want to the style of the image to be like the text. " - "like: make it look like a painting. or make it like a robot. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the text. " - ), - ) - def inference(self, inputs): - """Change style of image.""" - logger.debug("===> Starting InstructPix2Pix Inference") - image_path, text = inputs.split(",")[0], ",".join(inputs.split(",")[1:]) - original_image = Image.open(image_path) - image = self.pipe( - text, image=original_image, num_inference_steps=40, image_guidance_scale=1.2 - ).images[0] - updated_image_path = get_new_image_name(image_path, func_name="pix2pix") - image.save(updated_image_path) - - logger.debug( - f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text:" - f" {text}, Output Image: {updated_image_path}" - ) - - return updated_image_path - - -class Text2Image: - def __init__(self, device): - print("Initializing Text2Image to %s" % device) - self.device = device - self.torch_dtype = torch.float16 if "cuda" in device else torch.float32 - self.pipe = StableDiffusionPipeline.from_pretrained( - "runwayml/stable-diffusion-v1-5", torch_dtype=self.torch_dtype - ) - self.pipe.to(device) - self.a_prompt = "best quality, extremely detailed" - self.n_prompt = ( - "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, " - "fewer digits, cropped, worst quality, low quality" - ) - - @tool( - name="Generate Image From User Input Text", - description=( - "useful when you want to generate an image from a user input text and save" - " it to a file. like: generate an image of an object or something, or" - " generate an image that includes some objects. The input to this tool" - " should be a string, representing the text used to generate image. " - ), - ) - def inference(self, text): - image_filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png") - prompt = text + ", " + self.a_prompt - image = self.pipe(prompt, negative_prompt=self.n_prompt).images[0] - image.save(image_filename) - - logger.debug( - f"\nProcessed Text2Image, Input Text: {text}, Output Image:" - f" {image_filename}" - ) - - return image_filename - - -class VisualQuestionAnswering: - def __init__(self, device): - print("Initializing VisualQuestionAnswering to %s" % device) - self.torch_dtype = torch.float16 if "cuda" in device else torch.float32 - self.device = device - self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") - self.model = BlipForQuestionAnswering.from_pretrained( - "Salesforce/blip-vqa-base", torch_dtype=self.torch_dtype - ).to(self.device) - - @tool( - name="Answer Question About The Image", - description=( - "useful when you need an answer for a question based on an image. like:" - " what is the background color of the last image, how many cats in this" - " figure, what is in this figure. The input to this tool should be a comma" - " separated string of two, representing the image_path and the question" - ), - ) - def inference(self, inputs): - image_path, question = inputs.split(",") - raw_image = Image.open(image_path).convert("RGB") - inputs = self.processor(raw_image, question, return_tensors="pt").to( - self.device, self.torch_dtype - ) - out = self.model.generate(**inputs) - answer = self.processor.decode(out[0], skip_special_tokens=True) - - logger.debug( - f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input" - f" Question: {question}, Output Answer: {answer}" - ) - - return answer - - -class ImageCaptioning(BaseHandler): - def __init__(self, device): - print("Initializing ImageCaptioning to %s" % device) - self.device = device - self.torch_dtype = torch.float16 if "cuda" in device else torch.float32 - self.processor = BlipProcessor.from_pretrained( - "Salesforce/blip-image-captioning-base" - ) - self.model = BlipForConditionalGeneration.from_pretrained( - "Salesforce/blip-image-captioning-base", torch_dtype=self.torch_dtype - ).to(self.device) - - def handle(self, filename: str): - img = Image.open(filename) - width, height = img.size - ratio = min(512 / width, 512 / height) - width_new, height_new = (round(width * ratio), round(height * ratio)) - img = img.resize((width_new, height_new)) - img = img.convert("RGB") - img.save(filename, "PNG") - print(f"Resize image form {width}x{height} to {width_new}x{height_new}") - - inputs = self.processor(Image.open(filename), return_tensors="pt").to( - self.device, self.torch_dtype - ) - out = self.model.generate(**inputs) - description = self.processor.decode(out[0], skip_special_tokens=True) - print( - f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text:" - f" {description}" - ) - - return IMAGE_PROMPT.format(filename=filename, description=description)