parent
2b4be2bef7
commit
4a9a0ba3ef
@ -1,141 +0,0 @@
|
||||
from typing import Any, List, Optional, Union
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from doc_master import doc_master
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
)
|
||||
def _process_document(doc_path: Union[str, Path]) -> str:
|
||||
"""Safely process a single document with retries.
|
||||
|
||||
Args:
|
||||
doc_path: Path to the document to process
|
||||
|
||||
Returns:
|
||||
Processed document text
|
||||
|
||||
Raises:
|
||||
Exception: If document processing fails after retries
|
||||
"""
|
||||
try:
|
||||
return doc_master(
|
||||
file_path=str(doc_path), output_type="string"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error processing document {doc_path}: {str(e)}"
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def handle_input_docs(
|
||||
agents: Any,
|
||||
docs: Optional[List[Union[str, Path]]] = None,
|
||||
doc_folder: Optional[Union[str, Path]] = None,
|
||||
max_workers: int = 4,
|
||||
chunk_size: int = 1000000,
|
||||
) -> Any:
|
||||
"""
|
||||
Add document content to agent prompts with improved reliability and performance.
|
||||
|
||||
Args:
|
||||
agents: Dictionary mapping agent names to Agent objects
|
||||
docs: List of document paths
|
||||
doc_folder: Path to folder containing documents
|
||||
max_workers: Maximum number of parallel document processing workers
|
||||
chunk_size: Maximum characters to process at once to avoid memory issues
|
||||
|
||||
Raises:
|
||||
ValueError: If neither docs nor doc_folder is provided
|
||||
RuntimeError: If document processing fails
|
||||
"""
|
||||
if not agents:
|
||||
logger.warning(
|
||||
"No agents provided, skipping document distribution"
|
||||
)
|
||||
return
|
||||
|
||||
if not docs and not doc_folder:
|
||||
logger.warning(
|
||||
"No documents or folder provided, skipping document distribution"
|
||||
)
|
||||
return
|
||||
|
||||
logger.info("Starting document distribution to agents")
|
||||
|
||||
try:
|
||||
processed_docs = []
|
||||
|
||||
# Process individual documents in parallel
|
||||
if docs:
|
||||
with ThreadPoolExecutor(
|
||||
max_workers=max_workers
|
||||
) as executor:
|
||||
future_to_doc = {
|
||||
executor.submit(_process_document, doc): doc
|
||||
for doc in docs
|
||||
}
|
||||
|
||||
for future in as_completed(future_to_doc):
|
||||
doc = future_to_doc[future]
|
||||
try:
|
||||
processed_docs.append(future.result())
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to process document {doc}: {str(e)}"
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Document processing failed: {str(e)}"
|
||||
)
|
||||
|
||||
# Process folder if specified
|
||||
elif doc_folder:
|
||||
try:
|
||||
folder_content = doc_master(
|
||||
folder_path=str(doc_folder), output_type="string"
|
||||
)
|
||||
processed_docs.append(folder_content)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to process folder {doc_folder}: {str(e)}"
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Folder processing failed: {str(e)}"
|
||||
)
|
||||
|
||||
# Combine and chunk the processed documents
|
||||
combined_data = "\n".join(processed_docs)
|
||||
|
||||
# Update agent prompts in chunks to avoid memory issues
|
||||
for agent in agents.values():
|
||||
try:
|
||||
for i in range(0, len(combined_data), chunk_size):
|
||||
chunk = combined_data[i : i + chunk_size]
|
||||
if i == 0:
|
||||
agent.system_prompt += (
|
||||
"\nDocuments:\n" + chunk
|
||||
)
|
||||
else:
|
||||
agent.system_prompt += chunk
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to update agent prompt: {str(e)}"
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Agent prompt update failed: {str(e)}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Successfully added documents to {len(agents)} agents"
|
||||
)
|
||||
|
||||
return agents
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Document distribution failed: {str(e)}")
|
||||
raise RuntimeError(f"Document distribution failed: {str(e)}")
|
@ -1,102 +0,0 @@
|
||||
from typing import Union, Dict, List, Tuple, Any
|
||||
|
||||
|
||||
def any_to_str(data: Union[str, Dict, List, Tuple, Any]) -> str:
|
||||
"""Convert any input data type to a nicely formatted string.
|
||||
|
||||
This function handles conversion of various Python data types into a clean string representation.
|
||||
It recursively processes nested data structures and handles None values gracefully.
|
||||
|
||||
Args:
|
||||
data: Input data of any type to convert to string. Can be:
|
||||
- Dictionary
|
||||
- List/Tuple
|
||||
- String
|
||||
- None
|
||||
- Any other type that can be converted via str()
|
||||
|
||||
Returns:
|
||||
str: A formatted string representation of the input data.
|
||||
- Dictionaries are formatted as "key: value" pairs separated by commas
|
||||
- Lists/tuples are comma-separated
|
||||
- None returns empty string
|
||||
- Other types are converted using str()
|
||||
|
||||
Examples:
|
||||
>>> any_to_str({'a': 1, 'b': 2})
|
||||
'a: 1, b: 2'
|
||||
>>> any_to_str([1, 2, 3])
|
||||
'1, 2, 3'
|
||||
>>> any_to_str(None)
|
||||
''
|
||||
"""
|
||||
try:
|
||||
if isinstance(data, dict):
|
||||
# Format dictionary with newlines and indentation
|
||||
items = []
|
||||
for k, v in data.items():
|
||||
value = any_to_str(v)
|
||||
items.append(f"{k}: {value}")
|
||||
return "\n".join(items)
|
||||
|
||||
elif isinstance(data, (list, tuple)):
|
||||
# Format sequences with brackets and proper spacing
|
||||
items = [any_to_str(x) for x in data]
|
||||
if len(items) == 0:
|
||||
return "[]" if isinstance(data, list) else "()"
|
||||
return (
|
||||
f"[{', '.join(items)}]"
|
||||
if isinstance(data, list)
|
||||
else f"({', '.join(items)})"
|
||||
)
|
||||
|
||||
elif data is None:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
# Handle strings and other types
|
||||
if isinstance(data, str):
|
||||
return f'"{data}"'
|
||||
return str(data)
|
||||
|
||||
except Exception as e:
|
||||
return f"Error converting data: {str(e)}"
|
||||
|
||||
|
||||
def main():
|
||||
# Example 1: Dictionary
|
||||
print("Dictionary:")
|
||||
print(
|
||||
any_to_str(
|
||||
{
|
||||
"name": "John",
|
||||
"age": 30,
|
||||
"hobbies": ["reading", "hiking"],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
print("\nNested Dictionary:")
|
||||
print(
|
||||
any_to_str(
|
||||
{
|
||||
"user": {
|
||||
"id": 123,
|
||||
"details": {"city": "New York", "active": True},
|
||||
},
|
||||
"data": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
print("\nList and Tuple:")
|
||||
print(any_to_str([1, "text", None, (1, 2)]))
|
||||
print(any_to_str((True, False, None)))
|
||||
|
||||
print("\nEmpty Collections:")
|
||||
print(any_to_str([]))
|
||||
print(any_to_str({}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in new issue