You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
68 lines
2.4 KiB
68 lines
2.4 KiB
|
|
from swarms.structs.agent import Agent
|
|
from swarms.utils.pdf_to_text import pdf_to_text
|
|
import asyncio
|
|
|
|
class DocumentProcessingPipeline:
|
|
def __init__(self):
|
|
self.document_analyzer = Agent(
|
|
agent_name="Document-Analyzer",
|
|
agent_description="Enterprise document analysis specialist",
|
|
system_prompt="""You are an expert document analyzer specializing in:
|
|
1. Complex Document Structure Analysis
|
|
2. Key Information Extraction
|
|
3. Compliance Verification
|
|
4. Document Classification
|
|
5. Content Validation""",
|
|
max_loops=2,
|
|
model_name="gpt-4"
|
|
)
|
|
|
|
self.legal_reviewer = Agent(
|
|
agent_name="Legal-Reviewer",
|
|
agent_description="Legal compliance and risk assessment specialist",
|
|
system_prompt="""You are a legal review expert focusing on:
|
|
1. Regulatory Compliance Check
|
|
2. Legal Risk Assessment
|
|
3. Contractual Obligation Analysis
|
|
4. Privacy Requirement Verification
|
|
5. Legal Term Extraction""",
|
|
max_loops=2,
|
|
model_name="gpt-4"
|
|
)
|
|
|
|
self.data_extractor = Agent(
|
|
agent_name="Data-Extractor",
|
|
agent_description="Structured data extraction specialist",
|
|
system_prompt="""You are a data extraction expert specializing in:
|
|
1. Named Entity Recognition
|
|
2. Relationship Extraction
|
|
3. Tabular Data Processing
|
|
4. Metadata Extraction
|
|
5. Data Standardization""",
|
|
max_loops=2,
|
|
model_name="gpt-4"
|
|
)
|
|
|
|
async def process_document(self, document_path):
|
|
# Convert document to text
|
|
document_text = pdf_to_text(document_path)
|
|
|
|
# Parallel processing tasks
|
|
tasks = [
|
|
self.document_analyzer.arun(f"Analyze this document: {document_text}"),
|
|
self.legal_reviewer.arun(f"Review legal aspects: {document_text}"),
|
|
self.data_extractor.arun(f"Extract structured data: {document_text}")
|
|
]
|
|
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
return {
|
|
"document_analysis": results[0],
|
|
"legal_review": results[1],
|
|
"extracted_data": results[2]
|
|
}
|
|
|
|
# Usage
|
|
processor = DocumentProcessingPipeline()
|