You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/swarms/utils/data_to_text.py

78 lines
1.8 KiB

import csv
import json
import os
from swarms.utils.pdf_to_text import pdf_to_text
def csv_to_text(file):
with open(file) as file:
reader = csv.reader(file)
data = list(reader)
return str(data)
def json_to_text(file):
with open(file) as file:
data = json.load(file)
return json.dumps(data)
def txt_to_text(file):
with open(file) as file:
data = file.read()
return data
def md_to_text(file):
if not os.path.exists(file):
raise FileNotFoundError(f"No such file or directory: '{file}'")
with open(file) as file:
data = file.read()
return data
def data_to_text(file):
"""
Converts the given data file to text format.
Args:
file (str): The path to the data file.
Returns:
str: The text representation of the data file.
Raises:
FileNotFoundError: If the file does not exist.
IOError: If there is an error reading the file.
Examples:
>>> data_to_text("data.csv")
'This is the text representation of the data file.'
"""
if not os.path.exists(file):
raise FileNotFoundError(f"File not found: {file}")
try:
_, ext = os.path.splitext(file)
ext = (
ext.lower()
) # Convert extension to lowercase for case-insensitive comparison
if ext == ".csv":
return csv_to_text(file)
elif ext == ".json":
return json_to_text(file)
elif ext == ".txt":
return txt_to_text(file)
elif ext == ".pdf":
return pdf_to_text(file)
elif ext == ".md":
return md_to_text(file)
else:
with open(file) as file:
data = file.read()
return data
except Exception as e:
raise OSError(f"Error reading file: {file}") from e