From 1dc1e8f270b2317749c4833eabf3df68e167dd6f Mon Sep 17 00:00:00 2001 From: Kye Date: Thu, 9 Nov 2023 12:04:13 -0500 Subject: [PATCH] account swarm + layout document fix --- .gitignore | 1 + CONTRIBUTING.md | 2 +- demos/accountant_team/accountant_team.py | 55 +++++++++--------------- example.py | 2 +- pyproject.toml | 2 +- sequential_workflow_example.py | 2 +- swarms/__init__.py | 12 +++--- swarms/models/layoutlm_document_qa.py | 4 +- swarms/models/nougat.py | 17 +++++++- 9 files changed, 47 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index a336e116..767abb9d 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ error.txt # C extensions *.so +.ruff_cache errors.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index be04abaa..04f0f593 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -111,7 +111,7 @@ You can learn more about mkdocs on the [mkdocs website](https://www.mkdocs.org/) 3. Make sure the script has execute permissions: ```sh - chmod +x quality.sh + chmod +x code_quality.sh ``` 4. Run the script: diff --git a/demos/accountant_team/accountant_team.py b/demos/accountant_team/accountant_team.py index 06f89684..7eadec96 100644 --- a/demos/accountant_team/accountant_team.py +++ b/demos/accountant_team/accountant_team.py @@ -1,50 +1,35 @@ -# !pip install --upgrade swarms==2.0.6 - -from swarms.models import BioGPT +import re from swarms.models.nougat import Nougat from swarms.structs import Flow +from swarms.models import OpenAIChat +from swarms.models import LayoutLMDocumentQA # # URL of the image of the financial document IMAGE_OF_FINANCIAL_DOC_URL = "bank_statement_2.jpg" # Example usage -api_key = "" # Your actual API key here - -# Initialize the OCR model - +api_key = "" # Initialize the language flow -llm = BioGPT() - - -# Create a prompt for the language model -def summary_agent_prompt(analyzed_doc: str): - model = Nougat( - max_new_tokens=5000, - ) - - out = model(analyzed_doc) - - return f""" - Generate an actionable summary of this financial document, provide bulletpoints: +llm = OpenAIChat( + openai_api_key=api_key, +) - Here is the Analyzed Document: - --- - {out} - """ +# LayoutLM Document QA +pdf_analyzer = LayoutLMDocumentQA() +question = "What is the total amount of expenses?" +answer = pdf_analyzer( + question, + IMAGE_OF_FINANCIAL_DOC_URL, +) # Initialize the Flow with the language flow -flow1 = Flow(llm=llm, max_loops=1, dashboard=False) - -# Create another Flow for a different task -flow2 = Flow(llm=llm, max_loops=1, dashboard=False) - +agent = Flow(llm=llm) +SUMMARY_AGENT_PROMPT = f""" +Generate an actionable summary of this financial document be very specific and precise, provide bulletpoints be very specific provide methods of lowering expenses: {answer}" +""" # Add tasks to the workflow -summary_agent = flow1.run(summary_agent_prompt(IMAGE_OF_FINANCIAL_DOC_URL)) - -# Suppose the next task takes the output of the first task as input -out = flow2.run( - f"Provide an actionable step by step plan on how to cut costs from the analyzed financial document. {summary_agent}" -) +summary_agent = agent.run(SUMMARY_AGENT_PROMPT) +print(summary_agent) diff --git a/example.py b/example.py index c84448a8..6c27bceb 100644 --- a/example.py +++ b/example.py @@ -1,7 +1,7 @@ from swarms.models import OpenAIChat from swarms.structs import Flow -api_key = "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" +api_key = "" # Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC llm = OpenAIChat( diff --git a/pyproject.toml b/pyproject.toml index bad710f4..4ea6bffb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.8.1" transformers = "*" -openai = "*" +openai = "0.28.1" langchain = "*" asyncio = "*" nest_asyncio = "*" diff --git a/sequential_workflow_example.py b/sequential_workflow_example.py index feb6c748..51a48df2 100644 --- a/sequential_workflow_example.py +++ b/sequential_workflow_example.py @@ -3,7 +3,7 @@ from swarms.structs import Flow from swarms.structs.sequential_workflow import SequentialWorkflow # Example usage -api_key = "" # Your actual API key here +api_key = "" # Initialize the language flow llm = OpenAIChat( diff --git a/swarms/__init__.py b/swarms/__init__.py index 8f0dfc26..5de7829b 100644 --- a/swarms/__init__.py +++ b/swarms/__init__.py @@ -1,9 +1,3 @@ -from swarms.agents import * -from swarms.swarms import * -from swarms.structs import * -from swarms.models import * # import * only works when __all__ = [] is defined in __init__.py -from swarms.chunkers import * -from swarms.workers import * import os import warnings @@ -12,3 +6,9 @@ warnings.filterwarnings("ignore", category=UserWarning) # disable tensorflow warnings os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" +from swarms.agents import * +from swarms.swarms import * +from swarms.structs import * +from swarms.models import * # import * only works when __all__ = [] is defined in __init__.py +from swarms.chunkers import * +from swarms.workers import * diff --git a/swarms/models/layoutlm_document_qa.py b/swarms/models/layoutlm_document_qa.py index 6fe83210..26734a25 100644 --- a/swarms/models/layoutlm_document_qa.py +++ b/swarms/models/layoutlm_document_qa.py @@ -3,10 +3,8 @@ LayoutLMDocumentQA is a multimodal good for visual question answering on real world docs lik invoice, pdfs, etc """ from transformers import pipeline -from swarms.models.base import AbstractModel - -class LayoutLMDocumentQA(AbstractModel): +class LayoutLMDocumentQA: """ LayoutLMDocumentQA for document question answering: diff --git a/swarms/models/nougat.py b/swarms/models/nougat.py index a362f94f..9dee7d1b 100644 --- a/swarms/models/nougat.py +++ b/swarms/models/nougat.py @@ -8,7 +8,7 @@ format - Extracting metadata from pdfs """ - +import re import torch from PIL import Image from transformers import NougatProcessor, VisionEncoderDecoderModel @@ -70,5 +70,18 @@ class Nougat: sequence = self.processor.post_process_generation(sequence, fix_markdown=False) - out = print(repr(sequence)) + out = print(sequence) return out + + def clean_nougat_output(raw_output): + # Define the pattern to extract the relevant data + daily_balance_pattern = r"\*\*(\d{2}/\d{2}/\d{4})\*\*\n\n\*\*([\d,]+\.\d{2})\*\*" + + # Find all matches of the pattern + matches = re.findall(daily_balance_pattern, raw_output) + + # Convert the matches to a readable format + cleaned_data = ["Date: {}, Amount: {}".format(date, amount.replace(',', '')) for date, amount in matches] + + # Join the cleaned data with new lines for readability + return '\n'.join(cleaned_data)