From 7f48e29fb7965c7b2e3a774147ae528619e47611 Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 27 Jun 2023 07:08:35 -0400 Subject: [PATCH] tool clean up --- swarms/agents/workers/auto_agent.py | 112 ++-------------------- swarms/tools/main.py | 144 ++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 105 deletions(-) diff --git a/swarms/agents/workers/auto_agent.py b/swarms/agents/workers/auto_agent.py index e794e9fe..f4b0bec6 100644 --- a/swarms/agents/workers/auto_agent.py +++ b/swarms/agents/workers/auto_agent.py @@ -22,7 +22,7 @@ from langchain.tools.file_management.write import WriteFileTool ROOT_DIR = "./data/" -from langchain.t1ools import BaseTool, DuckDuckGoSearchRun +from langchain.tools import BaseTool, DuckDuckGoSearchRun from langchain.text_splitter import RecursiveCharacterTextSplitter from pydantic import Field @@ -38,7 +38,7 @@ from langchain.tools.human.tool import HumanInputRun from swarms.agents.workers.auto_agent import MultiModalVisualAgent -from swarms.tools.main import Terminal, CodeWriter, CodeEditor +from swarms.tools.main import Terminal, CodeWriter, CodeEditor, process_csv, WebpageQATool class MultiModalVisualAgentTool(BaseTool): name = "multi_visual_agent" @@ -61,124 +61,26 @@ vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), { -@contextmanager -def pushd(new_dir): - """Context manager for changing the current working directory.""" - prev_dir = os.getcwd() - os.chdir(new_dir) - try: - yield - finally: - os.chdir(prev_dir) - -@tool -def process_csv( - csv_file_path: str, instructions: str, output_path: Optional[str] = None -) -> str: - """Process a CSV by with pandas in a limited REPL.\ - Only use this after writing data to disk as a csv file.\ - Any figures must be saved to disk to be viewed by the human.\ - Instructions should be written in natural language, not code. Assume the dataframe is already loaded.""" - with pushd(ROOT_DIR): - try: - df = pd.read_csv(csv_file_path) - except Exception as e: - return f"Error: {e}" - agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True) - if output_path is not None: - instructions += f" Save output to disk at {output_path}" - try: - result = agent.run(instructions) - return result - except Exception as e: - return f"Error: {e}" - - -async def async_load_playwright(url: str) -> str: - """Load the specified URLs using Playwright and parse using BeautifulSoup.""" - from bs4 import BeautifulSoup - from playwright.async_api import async_playwright - - results = "" - async with async_playwright() as p: - browser = await p.chromium.launch(headless=True) - try: - page = await browser.new_page() - await page.goto(url) - - page_source = await page.content() - soup = BeautifulSoup(page_source, "html.parser") - - for script in soup(["script", "style"]): - script.extract() - - text = soup.get_text() - lines = (line.strip() for line in text.splitlines()) - chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) - results = "\n".join(chunk for chunk in chunks if chunk) - except Exception as e: - results = f"Error: {e}" - await browser.close() - return results - -def run_async(coro): - event_loop = asyncio.get_event_loop() - return event_loop.run_until_complete(coro) - -@tool -def browse_web_page(url: str) -> str: - """Verbose way to scrape a whole webpage. Likely to cause issues parsing.""" - return run_async(async_load_playwright(url)) - - -def _get_text_splitter(): - return RecursiveCharacterTextSplitter( - # Set a really small chunk size, just to show. - chunk_size = 500, - chunk_overlap = 20, - length_function = len, - ) - - -class WebpageQATool(BaseTool): - name = "query_webpage" - description = "Browse a webpage and retrieve the information relevant to the question." - text_splitter: RecursiveCharacterTextSplitter = Field(default_factory=_get_text_splitter) - qa_chain: BaseCombineDocumentsChain - - def _run(self, url: str, question: str) -> str: - """Useful for browsing websites and scraping the text information.""" - result = browse_web_page.run(url) - docs = [Document(page_content=result, metadata={"source": url})] - web_docs = self.text_splitter.split_documents(docs) - results = [] - # TODO: Handle this with a MapReduceChain - for i in range(0, len(web_docs), 4): - input_docs = web_docs[i:i+4] - window_result = self.qa_chain({"input_documents": input_docs, "question": question}, return_only_outputs=True) - results.append(f"Response from window {i} - {window_result}") - results_docs = [Document(page_content="\n".join(results), metadata={"source": url})] - return self.qa_chain({"input_documents": results_docs, "question": question}, return_only_outputs=True) - - async def _arun(self, url: str, question: str) -> str: - raise NotImplementedError - - query_website_tool = WebpageQATool(qa_chain=load_qa_with_sources_chain(llm)) # !pip install duckduckgo_search web_search = DuckDuckGoSearchRun() + +#MM CHILD AGENT multimodal_agent = MultiModalVisualAgent() # multimodal_agent_tool = MultiModalVisualAgentTool(MultiModalVisualAgent) tools = [ + web_search, WriteFileTool(root_dir="./data"), ReadFileTool(root_dir="./data"), process_csv, + + query_website_tool, Terminal, CodeWriter, diff --git a/swarms/tools/main.py b/swarms/tools/main.py index ab7b7395..77b46e31 100644 --- a/swarms/tools/main.py +++ b/swarms/tools/main.py @@ -1420,3 +1420,147 @@ class BrowserActionTool(BaseTool): return f"Action {action_type} completed successfully." except Exception as e: return f"Error: {e}" + + +#--------------------------------------> END + + + + + +#--------------------------------------> AUTO GPT TOOLS + +# General +import os +import pandas as pd + +from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent +from langchain.docstore.document import Document +import asyncio +import nest_asyncio + +# Tools +import os +from contextlib import contextmanager +from typing import Optional +from langchain.agents import tool +from langchain.tools.file_management.read import ReadFileTool +from langchain.tools.file_management.write import WriteFileTool + +ROOT_DIR = "./data/" + +from langchain.tools import BaseTool, DuckDuckGoSearchRun +from langchain.text_splitter import RecursiveCharacterTextSplitter + +from pydantic import Field +from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain + + + +@contextmanager +def pushd(new_dir): + """Context manager for changing the current working directory.""" + prev_dir = os.getcwd() + os.chdir(new_dir) + try: + yield + finally: + os.chdir(prev_dir) + +@tool +def process_csv( + csv_file_path: str, instructions: str, output_path: Optional[str] = None +) -> str: + """Process a CSV by with pandas in a limited REPL.\ + Only use this after writing data to disk as a csv file.\ + Any figures must be saved to disk to be viewed by the human.\ + Instructions should be written in natural language, not code. Assume the dataframe is already loaded.""" + with pushd(ROOT_DIR): + try: + df = pd.read_csv(csv_file_path) + except Exception as e: + return f"Error: {e}" + agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True) + if output_path is not None: + instructions += f" Save output to disk at {output_path}" + try: + result = agent.run(instructions) + return result + except Exception as e: + return f"Error: {e}" + + +async def async_load_playwright(url: str) -> str: + """Load the specified URLs using Playwright and parse using BeautifulSoup.""" + from bs4 import BeautifulSoup + from playwright.async_api import async_playwright + + results = "" + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + try: + page = await browser.new_page() + await page.goto(url) + + page_source = await page.content() + soup = BeautifulSoup(page_source, "html.parser") + + for script in soup(["script", "style"]): + script.extract() + + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + results = "\n".join(chunk for chunk in chunks if chunk) + except Exception as e: + results = f"Error: {e}" + await browser.close() + return results + +def run_async(coro): + event_loop = asyncio.get_event_loop() + return event_loop.run_until_complete(coro) + +@tool +def browse_web_page(url: str) -> str: + """Verbose way to scrape a whole webpage. Likely to cause issues parsing.""" + return run_async(async_load_playwright(url)) + + +def _get_text_splitter(): + return RecursiveCharacterTextSplitter( + # Set a really small chunk size, just to show. + chunk_size = 500, + chunk_overlap = 20, + length_function = len, + ) + + +class WebpageQATool(BaseTool): + name = "query_webpage" + description = "Browse a webpage and retrieve the information relevant to the question." + text_splitter: RecursiveCharacterTextSplitter = Field(default_factory=_get_text_splitter) + qa_chain: BaseCombineDocumentsChain + + def _run(self, url: str, question: str) -> str: + """Useful for browsing websites and scraping the text information.""" + result = browse_web_page.run(url) + docs = [Document(page_content=result, metadata={"source": url})] + web_docs = self.text_splitter.split_documents(docs) + results = [] + # TODO: Handle this with a MapReduceChain + for i in range(0, len(web_docs), 4): + input_docs = web_docs[i:i+4] + window_result = self.qa_chain({"input_documents": input_docs, "question": question}, return_only_outputs=True) + results.append(f"Response from window {i} - {window_result}") + results_docs = [Document(page_content="\n".join(results), metadata={"source": url})] + return self.qa_chain({"input_documents": results_docs, "question": question}, return_only_outputs=True) + + async def _arun(self, url: str, question: str) -> str: + raise NotImplementedError + + +query_website_tool = WebpageQATool(qa_chain=load_qa_with_sources_chain(llm)) + +# !pip install duckduckgo_search +web_search = DuckDuckGoSearchRun()