From 7ee4fe323bc426a55a2af78016d05aa5ee4867e9 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 19 Nov 2023 16:41:56 -0800 Subject: [PATCH] clean up of pdf to text and acount swarm --- .../demos/accountant_team/accountant_team.py | 42 +++-- playground/worker/ultranode_example.py | 15 -- playground/worker/worker.py | 17 -- playground/worker/worker_auto.py | 15 -- playground/worker/worker_ultra.py | 25 --- pyproject.toml | 3 +- requirements.txt | 1 + swarms/structs/flow.py | 176 ++++++++++-------- swarms/utils/__init__.py | 2 + swarms/utils/pdf_to_text.py | 44 +++++ 10 files changed, 177 insertions(+), 163 deletions(-) delete mode 100644 playground/worker/ultranode_example.py delete mode 100644 playground/worker/worker.py delete mode 100644 playground/worker/worker_auto.py delete mode 100644 playground/worker/worker_ultra.py create mode 100644 swarms/utils/pdf_to_text.py diff --git a/playground/demos/accountant_team/accountant_team.py b/playground/demos/accountant_team/accountant_team.py index 0c1dd6eb..1401ef32 100644 --- a/playground/demos/accountant_team/accountant_team.py +++ b/playground/demos/accountant_team/accountant_team.py @@ -1,13 +1,27 @@ -from swarms.models.nougat import Nougat -from swarms.structs import Flow -from swarms.models import OpenAIChat, Anthropic +import os from typing import List +from dotenv import load_dotenv + +from swarms.models import Anthropic, OpenAIChat +from swarms.structs import Flow +from swarms.utils.pdf_to_text import pdf_to_text + + +# Environment variables +load_dotenv() +anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") +openai_api_key = os.getenv("OPENAI_API_KEY") + # Base llms -llm1 = OpenAIChat() -llm2 = Anthropic() -nougat = Nougat() +llm1 = OpenAIChat( + openai_api_key=openai_api_key, +) + +llm2 = Anthropic( + anthropic_api_key=anthropic_api_key, +) # Prompts for each agent @@ -67,23 +81,27 @@ class AccountantSwarms: def __init__( self, - financial_document_img: str, - financial_document_list_img: List[str] = None, + pdf_path: str, + list_pdfs: List[str] = None, fraud_detection_instructions: str = None, summary_agent_instructions: str = None, decision_making_support_agent_instructions: str = None, ): super().__init__() - self.financial_document_img = financial_document_img + self.pdf_path = pdf_path + self.list_pdfs = list_pdfs self.fraud_detection_instructions = fraud_detection_instructions self.summary_agent_instructions = summary_agent_instructions + self.decision_making_support_agent_instructions = ( + decision_making_support_agent_instructions + ) def run(self): - # Extract text from the image - analyzed_doc = self.nougat(self.financial_document_img) + # Transform the pdf to text + pdf_text = pdf_to_text(self.pdf_path) # Detect fraud in the document - fraud_detection_agent_output = self.fraud_detection_agent(analyzed_doc) + fraud_detection_agent_output = self.fraud_detection_agent(pdf_text) # Generate an actionable summary of the document summary_agent_output = self.summary_agent(fraud_detection_agent_output) diff --git a/playground/worker/ultranode_example.py b/playground/worker/ultranode_example.py deleted file mode 100644 index 4bd1d80c..00000000 --- a/playground/worker/ultranode_example.py +++ /dev/null @@ -1,15 +0,0 @@ -from swarms import WorkerUltraUltraNode - -# Define an objective -objective = """ -Please make a web GUI for using HTTP API server. -The name of it is Swarms. -You can check the server code at ./main.py. -The server is served on localhost:8000. -Users should be able to write text input as 'query' and url array as 'files', and check the response. -Users input form should be delivered in JSON format. -I want it to have neumorphism-style. Serve it on port 4500. -""" - -node = WorkerUltraUltraNode(objective) -result = node.execute() diff --git a/playground/worker/worker.py b/playground/worker/worker.py deleted file mode 100644 index 00f15f1a..00000000 --- a/playground/worker/worker.py +++ /dev/null @@ -1,17 +0,0 @@ -from langchain.models import OpenAIChat -from swarms import Worker - -llm = OpenAIChat(model_name="gpt-4", openai_api_key="api-key", temperature=0.5) - -node = Worker( - llm=llm, - ai_name="Optimus Prime", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, - temperature=0.5, -) - -task = "What were the winning boston marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times." -response = node.run(task) -print(response) diff --git a/playground/worker/worker_auto.py b/playground/worker/worker_auto.py deleted file mode 100644 index 3b7e0c16..00000000 --- a/playground/worker/worker_auto.py +++ /dev/null @@ -1,15 +0,0 @@ -from swarms import worker_node - -# Your OpenAI API key -api_key = "sksdsds" - -# Initialize a WorkerNode with your API key -node = worker_node(api_key) - -# Define an objective -objective = "Please make a web GUI for using HTTP API server..." - -# Run the task -task = node.run(objective) - -print(task) diff --git a/playground/worker/worker_ultra.py b/playground/worker/worker_ultra.py deleted file mode 100644 index 69da3f30..00000000 --- a/playground/worker/worker_ultra.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -from swarms.swarms.swarms import WorkerUltra - -api_key = os.getenv("OPENAI_API_KEY") - -# Define an objective -objective = """ -Please make a web GUI for using HTTP API server. -The name of it is Swarms. -You can check the server code at ./main.py. -The server is served on localhost:8000. -Users should be able to write text input as 'query' and url array as 'files', and check the response. -Users input form should be delivered in JSON format. -I want it to have neumorphism-style. Serve it on port 4500. - -""" - -# Create an instance of WorkerUltra -worker = WorkerUltra(objective, api_key) - -# Execute the task -result = worker.execute() - -# Print the result -print(result) diff --git a/pyproject.toml b/pyproject.toml index df0e31ab..c96f5119 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.3.6" +version = "2.3.7" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] @@ -39,6 +39,7 @@ backoff = "*" marshmallow = "*" datasets = "*" diffusers = "*" +PyPDF2 = "*" accelerate = "*" sentencepiece = "*" wget = "*" diff --git a/requirements.txt b/requirements.txt index 8dad8dc6..b6a1d69a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,7 @@ huggingface-hub google-generativeai sentencepiece duckduckgo-search +PyPDF2 agent-protocol accelerate chromadb diff --git a/swarms/structs/flow.py b/swarms/structs/flow.py index ba060b8b..171cafc9 100644 --- a/swarms/structs/flow.py +++ b/swarms/structs/flow.py @@ -151,10 +151,11 @@ class Flow: interactive: bool = False, dashboard: bool = False, agent_name: str = " Autonomous Agent XYZ1B", + agent_description: str = None, system_prompt: str = FLOW_SYSTEM_PROMPT, # tools: List[Any] = None, dynamic_temperature: bool = False, - SOP: str = None, + sop: str = None, saved_state_path: Optional[str] = "flow_state.json", autosave: bool = False, context_length: int = 8192, @@ -180,13 +181,14 @@ class Flow: self.user_name = user_name self.context_length = context_length # SOPS to inject into the system prompt - self.SOP = SOP + self.sop = sop # The max_loops will be set dynamically if the dynamic_loop if self.dynamic_loops: self.max_loops = "auto" # self.tools = tools or [] self.system_prompt = system_prompt self.agent_name = agent_name + self.agent_description = agent_description self.saved_state_path = saved_state_path self.autosave = autosave self.response_filters = [] @@ -402,77 +404,81 @@ class Flow: 5. Repeat until stopping condition is met or max_loops is reached """ - # dynamic_prompt = self.construct_dynamic_prompt() - # combined_prompt = f"{dynamic_prompt}\n{task}" - - # Activate Autonomous agent message - self.activate_autonomous_agent() - - response = task # or combined_prompt - history = [f"{self.user_name}: {task}"] - - # If dashboard = True then print the dashboard - if self.dashboard: - self.print_dashboard(task) - - loop_count = 0 - # for i in range(self.max_loops): - while self.max_loops == "auto" or loop_count < self.max_loops: - loop_count += 1 - print(colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")) - print("\n") - - if self.stopping_token: - if self._check_stopping_condition(response) or parse_done_token( - response - ): - break - - # Adjust temperature, comment if no work - if self.dynamic_temperature: - self.dynamic_temperature() - - # Preparing the prompt - task = self.agent_history_prompt(FLOW_SYSTEM_PROMPT, response) - - attempt = 0 - while attempt < self.retry_attempts: - try: - response = self.llm( - task, - **kwargs, - ) - # If there are any tools then parse and execute them - # if self.tools: - # self.parse_and_execute_tools(response) - - if self.interactive: - print(f"AI: {response}") - history.append(f"AI: {response}") - response = input("You: ") - history.append(f"Human: {response}") - else: - print(f"AI: {response}") - history.append(f"AI: {response}") - print(response) - break - except Exception as e: - logging.error(f"Error generating response: {e}") - attempt += 1 - time.sleep(self.retry_interval) - history.append(response) - time.sleep(self.loop_interval) - self.memory.append(history) - - if self.autosave: - save_path = self.saved_state_path or "flow_state.json" - print(colored(f"Autosaving flow state to {save_path}", "green")) - self.save_state(save_path) - - if self.return_history: - return response, history + try: + # dynamic_prompt = self.construct_dynamic_prompt() + # combined_prompt = f"{dynamic_prompt}\n{task}" + + # Activate Autonomous agent message + self.activate_autonomous_agent() + + response = task # or combined_prompt + history = [f"{self.user_name}: {task}"] + + # If dashboard = True then print the dashboard + if self.dashboard: + self.print_dashboard(task) + + loop_count = 0 + # for i in range(self.max_loops): + while self.max_loops == "auto" or loop_count < self.max_loops: + loop_count += 1 + print(colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")) + print("\n") + + if self.stopping_token: + if self._check_stopping_condition(response) or parse_done_token( + response + ): + break + + # Adjust temperature, comment if no work + if self.dynamic_temperature: + self.dynamic_temperature() + + # Preparing the prompt + task = self.agent_history_prompt(FLOW_SYSTEM_PROMPT, response) + + attempt = 0 + while attempt < self.retry_attempts: + try: + response = self.llm( + task, + **kwargs, + ) + # If there are any tools then parse and execute them + # if self.tools: + # self.parse_and_execute_tools(response) + + if self.interactive: + print(f"AI: {response}") + history.append(f"AI: {response}") + response = input("You: ") + history.append(f"Human: {response}") + else: + print(f"AI: {response}") + history.append(f"AI: {response}") + print(response) + break + except Exception as e: + logging.error(f"Error generating response: {e}") + attempt += 1 + time.sleep(self.retry_interval) + history.append(response) + time.sleep(self.loop_interval) + self.memory.append(history) + + if self.autosave: + save_path = self.saved_state_path or "flow_state.json" + print(colored(f"Autosaving flow state to {save_path}", "green")) + self.save_state(save_path) + + if self.return_history: + return response, history - return response + return response + except Exception as error: + print(f"Error running flow: {error}") + raise async def arun(self, task: str, **kwargs): """ @@ -572,13 +578,27 @@ class Flow: Returns: str: The agent history prompt """ - system_prompt = system_prompt or self.system_prompt - agent_history_prompt = f""" - SYSTEM_PROMPT: {system_prompt} + if self.sop: + system_prompt = system_prompt or self.system_prompt + agent_history_prompt = f""" + SYSTEM_PROMPT: {system_prompt} + + Follow this standard operating procedure (SOP) to complete tasks: + {self.sop} + + ----------------- + History of conversations between yourself and your user {self.user_name}: {history} + """ + return agent_history_prompt + else: + system_prompt = system_prompt or self.system_prompt + agent_history_prompt = f""" + SYSTEM_PROMPT: {system_prompt} - History: {history} - """ - return agent_history_prompt + + History: {history} + """ + return agent_history_prompt async def run_concurrent(self, tasks: List[str], **kwargs): """ diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py index da323121..d5ce3583 100644 --- a/swarms/utils/__init__.py +++ b/swarms/utils/__init__.py @@ -2,10 +2,12 @@ from swarms.utils.display_markdown import display_markdown_message from swarms.utils.futures import execute_futures_dict from swarms.utils.code_interpreter import SubprocessCodeInterpreter from swarms.utils.parse_code import extract_code_in_backticks_in_string +from swarms.utils.pdf_to_text import pdf_to_text __all__ = [ "display_markdown_message", "execute_futures_dict", "SubprocessCodeInterpreter", "extract_code_in_backticks_in_string", + "pdf_to_text", ] diff --git a/swarms/utils/pdf_to_text.py b/swarms/utils/pdf_to_text.py new file mode 100644 index 00000000..9d0f97b8 --- /dev/null +++ b/swarms/utils/pdf_to_text.py @@ -0,0 +1,44 @@ +import sys +import os + +try: + import PyPDF2 +except ImportError: + print("PyPDF2 not installed. Please install it using: pip install PyPDF2") + sys.exit(1) + + + +def pdf_to_text(pdf_path): + """ + Converts a PDF file to a string of text. + + Args: + pdf_path (str): The path to the PDF file to be converted. + + Returns: + str: The text extracted from the PDF. + + Raises: + FileNotFoundError: If the PDF file is not found at the specified path. + Exception: If there is an error in reading the PDF file. + """ + try: + # Open the PDF file + with open(pdf_path, 'rb') as file: + pdf_reader = PyPDF2.PdfReader(file) + text = "" + + # Iterate through each page and extract text + for page in pdf_reader.pages: + text += page.extract_text() + "\n" + + return text + except FileNotFoundError: + raise FileNotFoundError(f"The file at {pdf_path} was not found.") + except Exception as e: + raise Exception(f"An error occurred while reading the PDF file: {e}") + +# Example usage +# text = pdf_to_text("test.pdf") +# print(text) \ No newline at end of file