[mod] ADS engine: revision of the engine (Paper result)

Revision of the Astrophysics Data System (ADS) engine / use of the result type
Paper as well as other typifications.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-15 09:02:54 +02:00 committed by Markus Heiser
parent f8f7adce6b
commit 1520a8d545
3 changed files with 146 additions and 69 deletions

View File

@ -0,0 +1,8 @@
.. _astrophysics_data_system engine:
==============================
Astrophysics Data System (ADS)
==============================
.. automodule:: searx.engines.astrophysics_data_system
:members:

View File

@ -1,93 +1,163 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""".. sidebar:: info """The Astrophysics Data System (ADS_) is a digital library portal for
researchers in astronomy and physics, operated by the Smithsonian Astrophysical
Observatory (SAO) under a NASA grant. The ADS_ is a solr instance, but not with
the standard API paths.
The Astrophysics Data System (ADS) is a digital library portal for researchers in astronomy and physics, .. note::
operated by the Smithsonian Astrophysical Observatory (SAO) under a NASA grant.
The engine is adapted from the solr engine.
The ADS_ engine requires an :py:obj:`API key <api_key>`.
This engine uses the `search/query`_ API endpoint. Since the user's search term
is passed through, the `search syntax`_ of ADS can be used (at least to some
extent).
.. _ADS: https://ui.adsabs.harvard.edu
.. _search/query: https://ui.adsabs.harvard.edu/help/api/api-docs.html#get-/search/query
.. _search syntax: https://ui.adsabs.harvard.edu/help/search/search-syntax
Configuration
=============
The engine has the following additional settings:
- :py:obj:`api_key`
- :py:obj:`ads_sort`
.. code:: yaml
- name: astrophysics data system
api_key: "..."
inactive: false
Implementations
===============
""" """
# pylint: disable=global-statement import typing as t
from datetime import datetime from datetime import datetime
from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import html_to_text
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = { about = {
"website": 'https://ui.adsabs.harvard.edu/', "website": "https://ui.adsabs.harvard.edu/",
"wikidata_id": 'Q752099', "wikidata_id": "Q752099",
"official_api_documentation": 'https://ui.adsabs.harvard.edu/help/api/api-docs.html', "official_api_documentation": "https://ui.adsabs.harvard.edu/help/api/api-docs.html",
"use_official_api": True, "use_official_api": True,
"require_api_key": True, "require_api_key": True,
"results": 'JSON', "results": "JSON",
} }
base_url = 'https://api.adsabs.harvard.edu/v1/search' categories = ["science", "scientific publications"]
result_base_url = 'https://ui.adsabs.harvard.edu/abs/'
rows = 10
sort = '' # sorting: asc or desc
field_list = ['bibcode', 'author', 'title', 'abstract', 'doi', 'date'] # list of field names to display on the UI
default_fields = '' # default field to query
query_fields = '' # query fields
paging = True paging = True
api_key = 'unset' base_url = "https://api.adsabs.harvard.edu/v1/search/query"
api_key = "unset"
"""Get an API token as described in https://ui.adsabs.harvard.edu/help/api"""
ads_field_list = [
"abstract",
"author",
"bibcode",
"comment",
"date",
"doi",
"isbn",
"issn",
"keyword",
"page",
"page_count",
"page_range",
"pub",
"pubdate",
"pubnote",
"read_count",
"title",
"volume",
"year",
]
"""Set of fields to return in the response from ADS."""
ads_rows = 10
"""How many records to return for the ADS request."""
ads_sort = "read_count desc"
"""The format is 'field' + 'direction' where direction is one of 'asc' or 'desc'
and field is any of the valid indexes."""
def init(_): def setup(engine_settings: dict[str, t.Any]) -> bool:
if api_key == 'unset': """Initialization of the ADS_ engine, checks whether the :py:obj:`api_key`
raise SearxEngineAPIException('missing ADS API key') is set, otherwise the engine is inactive.
"""
key: str = engine_settings.get("api_key", "")
if key and key not in ("unset", "unknown", "..."):
return True
logger.error("Astrophysics Data System (ADS) API key is not set or invalid.")
return False
def request(query, params): def request(query: str, params: "OnlineParams") -> None:
query_params = {'q': query, 'rows': rows}
if field_list:
query_params['fl'] = ','.join(field_list)
if query_fields:
query_params['qf'] = ','.join(query_fields)
if default_fields:
query_params['df'] = default_fields
if sort:
query_params['sort'] = sort
query_params['start'] = rows * (params['pageno'] - 1) args: dict[str, str | int] = {
"q": query,
"fl": ",".join(ads_field_list),
"rows": ads_rows,
"start": ads_rows * (params["pageno"] - 1),
}
if ads_sort:
args["sort"] = ads_sort
params['headers']['Authorization'] = f'Bearer {api_key}' params["headers"]["Authorization"] = f"Bearer {api_key}"
params['url'] = f"{base_url}/query?{urlencode(query_params)}" params["url"] = f"{base_url}?{urlencode(args)}"
return params
def response(resp): def response(resp: "SXNG_Response") -> EngineResults:
try:
resp_json = loads(resp.text)
except Exception as e:
raise SearxEngineAPIException("failed to parse response") from e
if 'error' in resp_json: res = EngineResults()
raise SearxEngineAPIException(resp_json['error']['msg']) json_data: dict[str, dict[str, t.Any]] = resp.json()
resp_json = resp_json["response"] if "error" in json_data:
result_len = resp_json["numFound"] raise SearxEngineAPIException(json_data["error"]["msg"])
results = []
for res in resp_json["docs"]: def _str(k: str) -> str:
author = res.get("author") return str(doc.get(k, ""))
if author: def _list(k: str) -> list[str]:
author = author[0] + ' et al.' return doc.get(k, [])
results.append( for doc in json_data["response"]["docs"]:
{ authors: list[str] = doc["author"]
'url': result_base_url + res.get("bibcode") + "/", if len(authors) > 15:
'title': res.get("title")[0], # There are articles with hundreds of authors
'author': author, authors = authors[:15] + ["et al."]
'content': res.get("abstract"),
'doi': res.get("doi"), paper = res.types.Paper(
'publishedDate': datetime.fromisoformat(res.get("date")), url=f"https://ui.adsabs.harvard.edu/abs/{doc.get('bibcode')}/",
} title=html_to_text(_list("title")[0]),
authors=authors,
content=html_to_text(_str("abstract")),
doi=_list("doi")[0],
issn=_list("issn"),
isbn=_list("isbn"),
tags=_list("keyword"),
pages=",".join(_list("page")),
publisher=_str("pub") + " " + _str("year"),
publishedDate=datetime.fromisoformat(_str("date")),
volume=_str("volume"),
views=_str("read_count"),
comments=" / ".join(_list("pubnote")),
) )
res.add(paper)
results.append({'number_of_results': result_len}) return res
return results

View File

@ -407,13 +407,12 @@ engines:
require_api_key: false require_api_key: false
results: JSON results: JSON
# - name: astrophysics data system - name: astrophysics data system
# engine: astrophysics_data_system engine: astrophysics_data_system
# sort: asc shortcut: ads
# weight: 5 # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html
# categories: [science] api_key: ""
# api_key: your-new-key inactive: true
# shortcut: ads
- name: alpine linux packages - name: alpine linux packages
engine: alpinelinux engine: alpinelinux