You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
198 lines
7.4 KiB
198 lines
7.4 KiB
import os
|
|
from typing import List
|
|
|
|
from PyPDF2 import PdfReader
|
|
|
|
from swarms import Agent, OpenAIChat, SpreadSheetSwarm
|
|
|
|
|
|
def chunk_text(text: str, chunk_size: int = 400) -> List[str]:
|
|
"""
|
|
Splits the input text into chunks of the specified size.
|
|
|
|
Args:
|
|
text (str): The text to be chunked.
|
|
chunk_size (int): The size of each chunk.
|
|
|
|
Returns:
|
|
List[str]: A list of text chunks.
|
|
"""
|
|
return [
|
|
text[i : i + chunk_size]
|
|
for i in range(0, len(text), chunk_size)
|
|
]
|
|
|
|
|
|
def select_central_chunks(
|
|
chunks: List[str], keep_ratio: float = 0.7
|
|
) -> List[str]:
|
|
"""
|
|
Selects the central portion of the chunks based on the keep_ratio.
|
|
|
|
Args:
|
|
chunks (List[str]): List of text chunks.
|
|
keep_ratio (float): The ratio of chunks to keep, centered in the middle of the list.
|
|
|
|
Returns:
|
|
List[str]: A list of selected central chunks.
|
|
"""
|
|
total_chunks = len(chunks)
|
|
keep_count = int(total_chunks * keep_ratio)
|
|
|
|
# Determine start and end index to keep the central portion
|
|
start_index = (total_chunks - keep_count) // 2
|
|
end_index = start_index + keep_count
|
|
|
|
return chunks[start_index:end_index]
|
|
|
|
|
|
def convert_pdf_to_text(
|
|
file_path: str, chunk_size: int = 300, keep_ratio: float = 0.5
|
|
) -> str:
|
|
"""
|
|
Converts a PDF into text, splits the text into chunks, selects the central chunks, and returns a single string.
|
|
|
|
Args:
|
|
file_path (str): The path to the PDF file.
|
|
chunk_size (int): The size of each text chunk.
|
|
keep_ratio (float): The ratio of the central chunks to keep.
|
|
|
|
Returns:
|
|
str: A string containing the selected central portion of the text.
|
|
"""
|
|
with open(file_path, "rb") as pdf_file_obj:
|
|
pdf_reader = PdfReader(pdf_file_obj)
|
|
text = ""
|
|
for page in pdf_reader.pages:
|
|
text += page.extract_text()
|
|
|
|
chunks = chunk_text(text, chunk_size)
|
|
central_chunks = select_central_chunks(chunks, keep_ratio)
|
|
|
|
# Concatenate the selected chunks into a single string
|
|
return " ".join(central_chunks)
|
|
|
|
|
|
# Define custom system prompts for each agent
|
|
|
|
FINANCIAL_ANALYSIS_AGENT_SYS_PROMPT = """
|
|
You are an expert financial analyst specializing in interpreting corporate financial statements. Your task is to analyze the income statement, balance sheet, and cash flow statement from Nvidia's 10-K report. Identify key financial metrics, trends, and ratios that are critical for executive decision-making. Focus on revenue growth, profitability, liquidity, and solvency.
|
|
"""
|
|
|
|
RISK_ASSESSMENT_AGENT_SYS_PROMPT = """
|
|
You are an expert in risk management and financial compliance. Your task is to thoroughly review the risk factors outlined in Nvidia's 10-K report. Identify the most significant risks that could impact the company's financial performance, including market, operational, regulatory, and competitive risks. Provide an executive summary of the potential impact and mitigation strategies.
|
|
"""
|
|
|
|
MARKET_TRENDS_AGENT_SYS_PROMPT = """
|
|
You are a market analyst specializing in the semiconductor industry. Your task is to analyze market trends, competitive positioning, and external economic factors mentioned in Nvidia's 10-K report. Identify key opportunities and threats in the market, and provide insights on how these could influence Nvidia's strategic decisions.
|
|
"""
|
|
|
|
STRATEGIC_RECOMMENDATIONS_AGENT_SYS_PROMPT = """
|
|
You are a strategic consultant for executive leadership. Your task is to synthesize the analyses of financial performance, risks, and market trends from Nvidia's 10-K report. Based on these insights, provide actionable strategic recommendations for Nvidia's leadership team to enhance growth, manage risks, and capitalize on market opportunities.
|
|
"""
|
|
|
|
PDF_PARSING_AGENT_SYS_PROMPT = """
|
|
You are an expert in parsing and extracting data from complex documents. Your task is to extract the necessary financial statements, risk factors, and market analysis sections from Nvidia's 10-K report PDF. Provide the relevant text and data in a structured format for further analysis by other agents.
|
|
"""
|
|
|
|
REVENUE_MAXIMIZATION_AGENT_SYS_PROMPT = """
|
|
You are a revenue maximization specialist. Your task is to identify potential revenue growth opportunities for Nvidia based on the information provided in the 10-K report. Focus on innovative strategies, new markets, and product offerings that can drive revenue growth and enhance profitability.
|
|
"""
|
|
|
|
EXPENSES_MINIMIZATION_AGENT_SYS_PROMPT = """
|
|
You are an expert in minimizing expenses and optimizing cost structures. Your task is to analyze Nvidia's cost structure and operating expenses outlined in the 10-K report. Identify areas where cost savings can be achieved, operational efficiencies can be improved, and expenses can be minimized to enhance profitability.
|
|
"""
|
|
|
|
# Initialize the OpenAI model
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
model = OpenAIChat(
|
|
openai_api_key=api_key,
|
|
model_name="gpt-4o-mini",
|
|
temperature=0.0,
|
|
max_tokens=4000,
|
|
)
|
|
|
|
# Initialize the agents for analyzing the 10-K report
|
|
agents = [
|
|
Agent(
|
|
agent_name="PDF-Parsing-Agent",
|
|
system_prompt=PDF_PARSING_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="pdf_parsing_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
Agent(
|
|
agent_name="Financial-Analysis-Agent",
|
|
system_prompt=FINANCIAL_ANALYSIS_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="financial_analysis_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
Agent(
|
|
agent_name="Risk-Assessment-Agent",
|
|
system_prompt=RISK_ASSESSMENT_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="risk_assessment_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
Agent(
|
|
agent_name="Market-Trends-Agent",
|
|
system_prompt=MARKET_TRENDS_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="market_trends_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
Agent(
|
|
agent_name="Strategic-Recommendations-Agent",
|
|
system_prompt=STRATEGIC_RECOMMENDATIONS_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="strategic_recommendations_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
Agent(
|
|
agent_name="Revenue-Maximization-Agent",
|
|
system_prompt=REVENUE_MAXIMIZATION_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="strategic_recommendations_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
Agent(
|
|
agent_name="Expenses-Minimization-Agent",
|
|
system_prompt=EXPENSES_MINIMIZATION_AGENT_SYS_PROMPT,
|
|
llm=model,
|
|
max_loops=1,
|
|
saved_state_path="strategic_recommendations_agent.json",
|
|
user_name="swarms_corp",
|
|
retry_attempts=1,
|
|
),
|
|
]
|
|
|
|
# Create a Swarm with the list of agents
|
|
swarm = SpreadSheetSwarm(
|
|
name="Nvidia-10K-Analysis-Swarm",
|
|
description="A swarm that analyzes Nvidia's 10-K report to provide executive-level financial insights and strategic recommendations.",
|
|
agents=agents,
|
|
autosave_on=True,
|
|
run_all_agents=False,
|
|
max_loops=1,
|
|
workspace_dir="spreadsheet_workspace",
|
|
)
|
|
|
|
# Run the swarm to analyze the 10-K report
|
|
out = swarm.run(
|
|
task=f"Analyze Nvidia's 10-K report and provide your analysis: {convert_pdf_to_text("10k.pdf", 200)}"
|
|
)
|
|
print(out)
|