You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
224 lines
9.6 KiB
224 lines
9.6 KiB
import requests
|
|
import json
|
|
from datetime import date, datetime, timedelta
|
|
import os
|
|
from ..tool import Tool
|
|
from typing import Optional, List, Dict, Any
|
|
from serpapi import GoogleSearch
|
|
|
|
|
|
def build_tool(config) -> Tool:
|
|
tool = Tool(
|
|
"Google Scholar Info",
|
|
"Look up google scholar information",
|
|
name_for_model="Google_Scholar",
|
|
description_for_model="Plugin for look up Google Scholar information",
|
|
logo_url="https://your-app-url.com/.well-known/logo.png",
|
|
contact_email="hello@contact.com",
|
|
legal_info_url="hello@legal.com"
|
|
)
|
|
|
|
KEY = config["subscription_key"]
|
|
|
|
@tool.get("/search_google_scholar")
|
|
def search_google_scholar(
|
|
query: str,
|
|
engine: str = "google_scholar",
|
|
cites: Optional[str] = None,
|
|
as_ylo: Optional[int] = None,
|
|
as_yhi: Optional[int] = None,
|
|
scisbd: Optional[int] = None,
|
|
cluster: Optional[str] = None,
|
|
hl: Optional[str] = None,
|
|
lr: Optional[str] = None,
|
|
start: Optional[int] = None,
|
|
num: Optional[int] = None,
|
|
as_sdt: Optional[str] = None,
|
|
safe: Optional[str] = None,
|
|
filter: Optional[str] = None,
|
|
as_vis: Optional[str] = None,
|
|
):
|
|
"""
|
|
Search for scholarly articles based on a query according to the google scholar
|
|
:param query: The query to search for.
|
|
:param engine: The search engine to use, default is "google_scholar"
|
|
:param cites: The unique ID of an article for triggering "Cited By" searches
|
|
:param as_ylo: The starting year for results (e.g., if as_ylo=2018, results before this year will be omitted)
|
|
:param as_yhi: The ending year for results (e.g., if as_yhi=2018, results after this year will be omitted)
|
|
:param scisbd: Defines articles added in the last year, sorted by date. It can be set to 1 to include only abstracts, or 2 to include everything
|
|
:param cluster: The unique ID of an article for triggering "All Versions" searches
|
|
:param hl: The language to use for the Google Scholar search
|
|
:param lr: One or multiple languages to limit the search to
|
|
:param start: The result offset for pagination (0 is the first page of results, 10 is the 2nd page, etc.)
|
|
:param num: The maximum number of results to return, limited to 20
|
|
:param as_sdt: Can be used either as a search type or a filter
|
|
:param safe: The level of filtering for adult content
|
|
:param filter: Defines if the filters for 'Similar Results' and 'Omitted Results' are on or off
|
|
:param as_vis: Defines whether to include citations or not
|
|
:return: Return a list of dictionaries of the papers
|
|
"""
|
|
params = {
|
|
"q": query,
|
|
"engine": engine,
|
|
"api_key": KEY,
|
|
"cites": cites,
|
|
"as_ylo": as_ylo,
|
|
"as_yhi": as_yhi,
|
|
"scisbd": scisbd,
|
|
"cluster": cluster,
|
|
"hl": hl,
|
|
"lr": lr,
|
|
"start": start,
|
|
"num": num,
|
|
"as_sdt": as_sdt,
|
|
"safe": safe,
|
|
"filter": filter,
|
|
"as_vis": as_vis
|
|
}
|
|
|
|
search = GoogleSearch(params)
|
|
results = search.get_dict()
|
|
organic_results = results["organic_results"]
|
|
|
|
return organic_results
|
|
|
|
@tool.get("/search_author")
|
|
def search_author(
|
|
author_id: str,
|
|
hl: Optional[str] = None,
|
|
view_op: Optional[str] = None,
|
|
sort: Optional[str] = None,
|
|
citation_id: Optional[str] = None,
|
|
start: Optional[int] = None,
|
|
num: Optional[int] = None,
|
|
no_cache: Optional[bool] = None,
|
|
async_req: Optional[bool] = None,
|
|
output: Optional[str] = None
|
|
):
|
|
"""
|
|
Search for an author using the Google Scholar Author API.
|
|
:param author_id: Required. The ID of an author.
|
|
:param hl: Optional. The language to use for the Google Scholar Author search. Default is 'en'.
|
|
:param view_op: Optional. Used for viewing specific parts of a page.
|
|
:param sort: Optional. Used for sorting and refining articles.
|
|
:param citation_id: Optional. Used for retrieving individual article citation.
|
|
:param start: Optional. Defines the result offset. Default is 0.
|
|
:param num: Optional. Defines the number of results to return. Default is 20.
|
|
:param no_cache: Optional. Forces SerpApi to fetch the results even if a cached version is already present. Default is False.
|
|
:param async_req: Optional. Defines the way you want to submit your search to SerpApi. Default is False.
|
|
:param output: Optional. Defines the final output you want. Default is 'json'.
|
|
:return: Returns the search results of the author basic information.
|
|
"""
|
|
params = {
|
|
"engine": "google_scholar_author",
|
|
"author_id": author_id,
|
|
"api_key": KEY,
|
|
"hl": hl,
|
|
"view_op": view_op,
|
|
"sort": sort,
|
|
"citation_id": citation_id,
|
|
"start": start,
|
|
"num": num,
|
|
"no_cache": no_cache,
|
|
"async": async_req,
|
|
"output": output
|
|
}
|
|
|
|
|
|
search = GoogleSearch(params)
|
|
results = search.get_dict()
|
|
author = results["author"]
|
|
|
|
return author
|
|
|
|
@tool.get("/get_citation")
|
|
def get_citation(
|
|
q: str,
|
|
no_cache: Optional[bool] = None,
|
|
async_: Optional[bool] = None,
|
|
output: Optional[str] = 'json') -> Dict[str, Any]:
|
|
"""
|
|
Function to get citation results from the Google Scholar organic results using the Google Scholar Cite API.
|
|
|
|
Parameters:
|
|
q (str): ID of an individual Google Scholar organic search result.
|
|
engine (str, optional): Set to 'google_scholar_cite' to use the Google Scholar API engine. Defaults to 'google_scholar_cite'.
|
|
no_cache (bool, optional): If set to True, will force SerpApi to fetch the Google Scholar Cite results even if a cached version is already present. Defaults to None.
|
|
async_ (bool, optional): If set to True, will submit search to SerpApi and retrieve results later. Defaults to None.
|
|
api_key (str): SerpApi private key to use.
|
|
output (str, optional): Final output format. Set to 'json' to get a structured JSON of the results, or 'html' to get the raw html retrieved. Defaults to 'json'.
|
|
|
|
Returns:
|
|
Dict[str, Any]: Returns the search results in the specified format.
|
|
"""
|
|
|
|
params = {
|
|
"q": q,
|
|
"engine": 'google_scholar_cite',
|
|
"no_cache": no_cache,
|
|
"async": async_,
|
|
"api_key": KEY,
|
|
"output": output
|
|
}
|
|
|
|
search = GoogleSearch(params)
|
|
results = search.get_dict()
|
|
citation = results["citations"]
|
|
|
|
return citation
|
|
|
|
@tool.get("/get_profile")
|
|
def get_profile(self,
|
|
mauthors: str,
|
|
hl: Optional[str] = 'en',
|
|
after_author: Optional[str] = None,
|
|
before_author: Optional[str] = None,
|
|
no_cache: Optional[bool] = False,
|
|
_async: Optional[bool] = False,
|
|
output: Optional[str] = 'json'
|
|
) -> Dict:
|
|
"""
|
|
The getProfile function is used to scrape profile results from the Google Scholar Profiles search page.
|
|
|
|
Args:
|
|
mauthors (str): Defines the author you want to search for.
|
|
hl (str, optional): Defines the language to use for the Google Scholar Profiles search.
|
|
It's a two-letter language code. (e.g., 'en' for English, 'es' for Spanish, or 'fr' for French).
|
|
Defaults to 'en'.
|
|
after_author (str, optional): Defines the next page token.
|
|
It is used for retrieving the next page results.
|
|
The parameter has the precedence over before_author parameter. Defaults to None.
|
|
before_author (str, optional): Defines the previous page token.
|
|
It is used for retrieving the previous page results. Defaults to None.
|
|
no_cache (bool, optional): Will force SerpApi to fetch the Google Scholar Profiles results even if a cached version is already present.
|
|
Defaults to False.
|
|
_async (bool, optional): Defines the way you want to submit your search to SerpApi. Defaults to False.
|
|
api_key (str): Defines the SerpApi private key to use.
|
|
output (str, optional): Defines the final output you want.
|
|
It can be set to 'json' (default) to get a structured JSON of the results,
|
|
or 'html' to get the raw html retrieved. Defaults to 'json'.
|
|
|
|
Returns:
|
|
Dict: The Google Scholar Profiles search results.
|
|
"""
|
|
params = {
|
|
'mauthors': mauthors,
|
|
'engine':'google_scholar_profiles',
|
|
'hl': hl,
|
|
'after_author': after_author,
|
|
'before_author': before_author,
|
|
'engine': 'google_scholar_profiles',
|
|
'no_cache': no_cache,
|
|
'async': _async,
|
|
'api_key': KEY,
|
|
'output': output
|
|
}
|
|
|
|
search = GoogleSearch(params)
|
|
results = search.get_dict()
|
|
profile = results['profiles']
|
|
|
|
return profile
|
|
|
|
return tool
|