diff --git a/docs/dev/engines/online/astrophysics_data_system.rst b/docs/dev/engines/online/astrophysics_data_system.rst new file mode 100644 index 000000000..30690fcd9 --- /dev/null +++ b/docs/dev/engines/online/astrophysics_data_system.rst @@ -0,0 +1,8 @@ +.. _astrophysics_data_system engine: + +============================== +Astrophysics Data System (ADS) +============================== + +.. automodule:: searx.engines.astrophysics_data_system + :members: diff --git a/searx/engines/astrophysics_data_system.py b/searx/engines/astrophysics_data_system.py index a1d942b50..59efa226a 100644 --- a/searx/engines/astrophysics_data_system.py +++ b/searx/engines/astrophysics_data_system.py @@ -1,93 +1,163 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""".. sidebar:: info +"""The Astrophysics Data System (ADS_) is a digital library portal for +researchers in astronomy and physics, operated by the Smithsonian Astrophysical +Observatory (SAO) under a NASA grant. The ADS_ is a solr instance, but not with +the standard API paths. -The Astrophysics Data System (ADS) is a digital library portal for researchers in astronomy and physics, -operated by the Smithsonian Astrophysical Observatory (SAO) under a NASA grant. -The engine is adapted from the solr engine. +.. note:: + The ADS_ engine requires an :py:obj:`API key `. + +This engine uses the `search/query`_ API endpoint. Since the user's search term +is passed through, the `search syntax`_ of ADS can be used (at least to some +extent). + +.. _ADS: https://ui.adsabs.harvard.edu +.. _search/query: https://ui.adsabs.harvard.edu/help/api/api-docs.html#get-/search/query +.. _search syntax: https://ui.adsabs.harvard.edu/help/search/search-syntax + + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`api_key` +- :py:obj:`ads_sort` + +.. code:: yaml + + - name: astrophysics data system + api_key: "..." + inactive: false + + +Implementations +=============== """ -# pylint: disable=global-statement +import typing as t from datetime import datetime -from json import loads from urllib.parse import urlencode + +from searx.utils import html_to_text from searx.exceptions import SearxEngineAPIException +from searx.result_types import EngineResults + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams about = { - "website": 'https://ui.adsabs.harvard.edu/', - "wikidata_id": 'Q752099', - "official_api_documentation": 'https://ui.adsabs.harvard.edu/help/api/api-docs.html', + "website": "https://ui.adsabs.harvard.edu/", + "wikidata_id": "Q752099", + "official_api_documentation": "https://ui.adsabs.harvard.edu/help/api/api-docs.html", "use_official_api": True, "require_api_key": True, - "results": 'JSON', + "results": "JSON", } -base_url = 'https://api.adsabs.harvard.edu/v1/search' -result_base_url = 'https://ui.adsabs.harvard.edu/abs/' -rows = 10 -sort = '' # sorting: asc or desc -field_list = ['bibcode', 'author', 'title', 'abstract', 'doi', 'date'] # list of field names to display on the UI -default_fields = '' # default field to query -query_fields = '' # query fields +categories = ["science", "scientific publications"] paging = True -api_key = 'unset' +base_url = "https://api.adsabs.harvard.edu/v1/search/query" + +api_key = "unset" +"""Get an API token as described in https://ui.adsabs.harvard.edu/help/api""" + +ads_field_list = [ + "abstract", + "author", + "bibcode", + "comment", + "date", + "doi", + "isbn", + "issn", + "keyword", + "page", + "page_count", + "page_range", + "pub", + "pubdate", + "pubnote", + "read_count", + "title", + "volume", + "year", +] +"""Set of fields to return in the response from ADS.""" + +ads_rows = 10 +"""How many records to return for the ADS request.""" + +ads_sort = "read_count desc" +"""The format is 'field' + 'direction' where direction is one of 'asc' or 'desc' +and field is any of the valid indexes.""" -def init(_): - if api_key == 'unset': - raise SearxEngineAPIException('missing ADS API key') +def setup(engine_settings: dict[str, t.Any]) -> bool: + """Initialization of the ADS_ engine, checks whether the :py:obj:`api_key` + is set, otherwise the engine is inactive. + """ + key: str = engine_settings.get("api_key", "") + if key and key not in ("unset", "unknown", "..."): + return True + logger.error("Astrophysics Data System (ADS) API key is not set or invalid.") + return False -def request(query, params): - query_params = {'q': query, 'rows': rows} - if field_list: - query_params['fl'] = ','.join(field_list) - if query_fields: - query_params['qf'] = ','.join(query_fields) - if default_fields: - query_params['df'] = default_fields - if sort: - query_params['sort'] = sort +def request(query: str, params: "OnlineParams") -> None: - query_params['start'] = rows * (params['pageno'] - 1) + args: dict[str, str | int] = { + "q": query, + "fl": ",".join(ads_field_list), + "rows": ads_rows, + "start": ads_rows * (params["pageno"] - 1), + } + if ads_sort: + args["sort"] = ads_sort - params['headers']['Authorization'] = f'Bearer {api_key}' - params['url'] = f"{base_url}/query?{urlencode(query_params)}" - - return params + params["headers"]["Authorization"] = f"Bearer {api_key}" + params["url"] = f"{base_url}?{urlencode(args)}" -def response(resp): - try: - resp_json = loads(resp.text) - except Exception as e: - raise SearxEngineAPIException("failed to parse response") from e +def response(resp: "SXNG_Response") -> EngineResults: - if 'error' in resp_json: - raise SearxEngineAPIException(resp_json['error']['msg']) + res = EngineResults() + json_data: dict[str, dict[str, t.Any]] = resp.json() - resp_json = resp_json["response"] - result_len = resp_json["numFound"] - results = [] + if "error" in json_data: + raise SearxEngineAPIException(json_data["error"]["msg"]) - for res in resp_json["docs"]: - author = res.get("author") + def _str(k: str) -> str: + return str(doc.get(k, "")) - if author: - author = author[0] + ' et al.' + def _list(k: str) -> list[str]: + return doc.get(k, []) - results.append( - { - 'url': result_base_url + res.get("bibcode") + "/", - 'title': res.get("title")[0], - 'author': author, - 'content': res.get("abstract"), - 'doi': res.get("doi"), - 'publishedDate': datetime.fromisoformat(res.get("date")), - } + for doc in json_data["response"]["docs"]: + authors: list[str] = doc["author"] + if len(authors) > 15: + # There are articles with hundreds of authors + authors = authors[:15] + ["et al."] + + paper = res.types.Paper( + url=f"https://ui.adsabs.harvard.edu/abs/{doc.get('bibcode')}/", + title=html_to_text(_list("title")[0]), + authors=authors, + content=html_to_text(_str("abstract")), + doi=_list("doi")[0], + issn=_list("issn"), + isbn=_list("isbn"), + tags=_list("keyword"), + pages=",".join(_list("page")), + publisher=_str("pub") + " " + _str("year"), + publishedDate=datetime.fromisoformat(_str("date")), + volume=_str("volume"), + views=_str("read_count"), + comments=" / ".join(_list("pubnote")), ) + res.add(paper) - results.append({'number_of_results': result_len}) - - return results + return res diff --git a/searx/settings.yml b/searx/settings.yml index 217658088..9b85692fc 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -407,13 +407,12 @@ engines: require_api_key: false results: JSON - # - name: astrophysics data system - # engine: astrophysics_data_system - # sort: asc - # weight: 5 - # categories: [science] - # api_key: your-new-key - # shortcut: ads + - name: astrophysics data system + engine: astrophysics_data_system + shortcut: ads + # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html + api_key: "" + inactive: true - name: alpine linux packages engine: alpinelinux