From 4480ead6aaa10819f573d5b465a2dbfcf06a7899 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 15 Oct 2023 01:53:05 -0400 Subject: [PATCH] exa error --- apps/paper_swarm.py | 66 ++++++++++++++++++++ images/404.txt | 5 ++ images/Swarms.md | 2 - pyproject.toml | 3 +- swarms/prompts/multi_modal_visual_prompts.py | 2 +- swarms/tools/autogpt.py | 1 - 6 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 apps/paper_swarm.py create mode 100644 images/404.txt delete mode 100644 images/Swarms.md diff --git a/apps/paper_swarm.py b/apps/paper_swarm.py new file mode 100644 index 00000000..203d79e4 --- /dev/null +++ b/apps/paper_swarm.py @@ -0,0 +1,66 @@ +""" +Paper Swarm +1. Scrape https://huggingface.co/papers for all papers, by search for all links on the paper with a /papers/, then clicks, gets the header, and then the abstract. +and various links and then adds them to a txt file for each paper on https://huggingface.co/papers + +2. Feed prompts iteratively into Anthropic for summarizations + value score on impact, reliability, and novel, and other paper ranking mechanisms + +3. Store papers in a database with metadata. Agents can use retrieval + +4. Discord Bot // Twitter Bot +""" + + +import requests +from bs4 import BeautifulSoup +import os + +class Paper: + def __init__(self, title, date, authors, abstract): + self.title = title + self.date = date + self.authors = authors + self.abstract = abstract + +class Scraper: + def __init__(self, url): + self.url = url + + def get_paper_links(self): + response = requests.get(self.url) + soup = BeautifulSoup(response.text, 'html.parser') + links = [a['href'] for a in soup.find_all('a', href=True) if '/papers/' in a['href']] + return links + + def get_paper_details(self, link): + response = requests.get(self.url + link) + soup = BeautifulSoup(response.text, 'html.parser') + title = soup.find('h1').text + date_tag = soup.find('time') + date = date_tag.text if date_tag else 'Unknown' + authors = [author.text for author in soup.find_all('span', class_='author')] + abstract_tag = soup.find('div', class_='abstract') + abstract = abstract_tag.text if abstract_tag else 'Abstract not found' + return Paper(title, date, authors, abstract) + +class FileWriter: + def __init__(self, directory): + self.directory = directory + + def write_paper(self, paper): + with open(os.path.join(self.directory, paper.title + '.txt'), 'w') as f: + f.write(f"h1: {paper.title}\n") + f.write(f"Published on {paper.date}\n") + f.write("Authors:\n") + for author in paper.authors: + f.write(f"{author}\n") + f.write("Abstract\n") + f.write(paper.abstract) + +scraper = Scraper('https://huggingface.co/papers') +file_writer = FileWriter('images') + +links = scraper.get_paper_links() +for link in links: + paper = scraper.get_paper_details(link) + file_writer.write_paper(paper) \ No newline at end of file diff --git a/images/404.txt b/images/404.txt new file mode 100644 index 00000000..2fcf39ac --- /dev/null +++ b/images/404.txt @@ -0,0 +1,5 @@ +h1: 404 +Published on Unknown +Authors: +Abstract +Abstract not found \ No newline at end of file diff --git a/images/Swarms.md b/images/Swarms.md deleted file mode 100644 index afd306fe..00000000 --- a/images/Swarms.md +++ /dev/null @@ -1,2 +0,0 @@ -# Agents -* Agents are the fundamental building blocks of a swarm, they are indivi \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4ac04b5b..0480a343 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "1.8.1" +version = "1.8.2" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] @@ -46,7 +46,6 @@ redis = "*" Pillow = "*" chromadb = "*" agent-protocol = "*" -exxa = "*" open-interpreter = "*" tabulate = "*" termcolor = "*" diff --git a/swarms/prompts/multi_modal_visual_prompts.py b/swarms/prompts/multi_modal_visual_prompts.py index 5bf2da2b..e6c70c1a 100644 --- a/swarms/prompts/multi_modal_visual_prompts.py +++ b/swarms/prompts/multi_modal_visual_prompts.py @@ -45,4 +45,4 @@ New input: {input} Since Worker Multi-Modal Agent is a text language model, Worker Multi-Modal Agent must use tools to observe images rather than imagination. The thoughts and observations are only visible for Worker Multi-Modal Agent, Worker Multi-Modal Agent should remember to repeat important information in the final response for Human. Thought: Do I need to use a tool? {agent_scratchpad} Let's think step by step. -""" \ No newline at end of file +""" diff --git a/swarms/tools/autogpt.py b/swarms/tools/autogpt.py index d93211d1..80d280bc 100644 --- a/swarms/tools/autogpt.py +++ b/swarms/tools/autogpt.py @@ -146,7 +146,6 @@ class WebpageQATool(BaseTool): raise NotImplementedError - @tool def VQAinference(self, inputs): """