You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
78 lines
1.8 KiB
78 lines
1.8 KiB
import csv
|
|
import json
|
|
import os
|
|
|
|
from swarms.utils.pdf_to_text import pdf_to_text
|
|
|
|
|
|
def csv_to_text(file):
|
|
with open(file) as file:
|
|
reader = csv.reader(file)
|
|
data = list(reader)
|
|
return str(data)
|
|
|
|
|
|
def json_to_text(file):
|
|
with open(file) as file:
|
|
data = json.load(file)
|
|
return json.dumps(data)
|
|
|
|
|
|
def txt_to_text(file):
|
|
with open(file) as file:
|
|
data = file.read()
|
|
return data
|
|
|
|
|
|
def md_to_text(file):
|
|
if not os.path.exists(file):
|
|
raise FileNotFoundError(f"No such file or directory: '{file}'")
|
|
with open(file) as file:
|
|
data = file.read()
|
|
return data
|
|
|
|
|
|
def data_to_text(file):
|
|
"""
|
|
Converts the given data file to text format.
|
|
|
|
Args:
|
|
file (str): The path to the data file.
|
|
|
|
Returns:
|
|
str: The text representation of the data file.
|
|
|
|
Raises:
|
|
FileNotFoundError: If the file does not exist.
|
|
IOError: If there is an error reading the file.
|
|
|
|
Examples:
|
|
>>> data_to_text("data.csv")
|
|
'This is the text representation of the data file.'
|
|
|
|
"""
|
|
if not os.path.exists(file):
|
|
raise FileNotFoundError(f"File not found: {file}")
|
|
|
|
try:
|
|
_, ext = os.path.splitext(file)
|
|
ext = (
|
|
ext.lower()
|
|
) # Convert extension to lowercase for case-insensitive comparison
|
|
if ext == ".csv":
|
|
return csv_to_text(file)
|
|
elif ext == ".json":
|
|
return json_to_text(file)
|
|
elif ext == ".txt":
|
|
return txt_to_text(file)
|
|
elif ext == ".pdf":
|
|
return pdf_to_text(file)
|
|
elif ext == ".md":
|
|
return md_to_text(file)
|
|
else:
|
|
with open(file) as file:
|
|
data = file.read()
|
|
return data
|
|
except Exception as e:
|
|
raise OSError(f"Error reading file: {file}") from e
|