You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/examples/document_processing/enterprise_doc_processor.py

68 lines
2.4 KiB

from swarms.structs.agent import Agent
from swarms.utils.pdf_to_text import pdf_to_text
import asyncio
class DocumentProcessingPipeline:
def __init__(self):
self.document_analyzer = Agent(
agent_name="Document-Analyzer",
agent_description="Enterprise document analysis specialist",
system_prompt="""You are an expert document analyzer specializing in:
1. Complex Document Structure Analysis
2. Key Information Extraction
3. Compliance Verification
4. Document Classification
5. Content Validation""",
max_loops=2,
model_name="gpt-4"
)
self.legal_reviewer = Agent(
agent_name="Legal-Reviewer",
agent_description="Legal compliance and risk assessment specialist",
system_prompt="""You are a legal review expert focusing on:
1. Regulatory Compliance Check
2. Legal Risk Assessment
3. Contractual Obligation Analysis
4. Privacy Requirement Verification
5. Legal Term Extraction""",
max_loops=2,
model_name="gpt-4"
)
self.data_extractor = Agent(
agent_name="Data-Extractor",
agent_description="Structured data extraction specialist",
system_prompt="""You are a data extraction expert specializing in:
1. Named Entity Recognition
2. Relationship Extraction
3. Tabular Data Processing
4. Metadata Extraction
5. Data Standardization""",
max_loops=2,
model_name="gpt-4"
)
async def process_document(self, document_path):
# Convert document to text
document_text = pdf_to_text(document_path)
# Parallel processing tasks
tasks = [
self.document_analyzer.arun(f"Analyze this document: {document_text}"),
self.legal_reviewer.arun(f"Review legal aspects: {document_text}"),
self.data_extractor.arun(f"Extract structured data: {document_text}")
]
results = await asyncio.gather(*tasks)
return {
"document_analysis": results[0],
"legal_review": results[1],
"extracted_data": results[2]
}
# Usage
processor = DocumentProcessingPipeline()