mirror of
https://github.com/searxng/searxng.git
synced 2025-09-29 15:30:51 -04:00
Revision of the Astrophysics Data System (ADS) engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
164 lines
4.4 KiB
Python
164 lines
4.4 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""The Astrophysics Data System (ADS_) is a digital library portal for
|
|
researchers in astronomy and physics, operated by the Smithsonian Astrophysical
|
|
Observatory (SAO) under a NASA grant. The ADS_ is a solr instance, but not with
|
|
the standard API paths.
|
|
|
|
.. note::
|
|
|
|
The ADS_ engine requires an :py:obj:`API key <api_key>`.
|
|
|
|
This engine uses the `search/query`_ API endpoint. Since the user's search term
|
|
is passed through, the `search syntax`_ of ADS can be used (at least to some
|
|
extent).
|
|
|
|
.. _ADS: https://ui.adsabs.harvard.edu
|
|
.. _search/query: https://ui.adsabs.harvard.edu/help/api/api-docs.html#get-/search/query
|
|
.. _search syntax: https://ui.adsabs.harvard.edu/help/search/search-syntax
|
|
|
|
|
|
Configuration
|
|
=============
|
|
|
|
The engine has the following additional settings:
|
|
|
|
- :py:obj:`api_key`
|
|
- :py:obj:`ads_sort`
|
|
|
|
.. code:: yaml
|
|
|
|
- name: astrophysics data system
|
|
api_key: "..."
|
|
inactive: false
|
|
|
|
|
|
Implementations
|
|
===============
|
|
"""
|
|
|
|
import typing as t
|
|
|
|
from datetime import datetime
|
|
from urllib.parse import urlencode
|
|
|
|
from searx.utils import html_to_text
|
|
from searx.exceptions import SearxEngineAPIException
|
|
from searx.result_types import EngineResults
|
|
|
|
if t.TYPE_CHECKING:
|
|
from searx.extended_types import SXNG_Response
|
|
from searx.search.processors import OnlineParams
|
|
|
|
about = {
|
|
"website": "https://ui.adsabs.harvard.edu/",
|
|
"wikidata_id": "Q752099",
|
|
"official_api_documentation": "https://ui.adsabs.harvard.edu/help/api/api-docs.html",
|
|
"use_official_api": True,
|
|
"require_api_key": True,
|
|
"results": "JSON",
|
|
}
|
|
|
|
categories = ["science", "scientific publications"]
|
|
paging = True
|
|
base_url = "https://api.adsabs.harvard.edu/v1/search/query"
|
|
|
|
api_key = "unset"
|
|
"""Get an API token as described in https://ui.adsabs.harvard.edu/help/api"""
|
|
|
|
ads_field_list = [
|
|
"abstract",
|
|
"author",
|
|
"bibcode",
|
|
"comment",
|
|
"date",
|
|
"doi",
|
|
"isbn",
|
|
"issn",
|
|
"keyword",
|
|
"page",
|
|
"page_count",
|
|
"page_range",
|
|
"pub",
|
|
"pubdate",
|
|
"pubnote",
|
|
"read_count",
|
|
"title",
|
|
"volume",
|
|
"year",
|
|
]
|
|
"""Set of fields to return in the response from ADS."""
|
|
|
|
ads_rows = 10
|
|
"""How many records to return for the ADS request."""
|
|
|
|
ads_sort = "read_count desc"
|
|
"""The format is 'field' + 'direction' where direction is one of 'asc' or 'desc'
|
|
and field is any of the valid indexes."""
|
|
|
|
|
|
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
|
"""Initialization of the ADS_ engine, checks whether the :py:obj:`api_key`
|
|
is set, otherwise the engine is inactive.
|
|
"""
|
|
key: str = engine_settings.get("api_key", "")
|
|
if key and key not in ("unset", "unknown", "..."):
|
|
return True
|
|
logger.error("Astrophysics Data System (ADS) API key is not set or invalid.")
|
|
return False
|
|
|
|
|
|
def request(query: str, params: "OnlineParams") -> None:
|
|
|
|
args: dict[str, str | int] = {
|
|
"q": query,
|
|
"fl": ",".join(ads_field_list),
|
|
"rows": ads_rows,
|
|
"start": ads_rows * (params["pageno"] - 1),
|
|
}
|
|
if ads_sort:
|
|
args["sort"] = ads_sort
|
|
|
|
params["headers"]["Authorization"] = f"Bearer {api_key}"
|
|
params["url"] = f"{base_url}?{urlencode(args)}"
|
|
|
|
|
|
def response(resp: "SXNG_Response") -> EngineResults:
|
|
|
|
res = EngineResults()
|
|
json_data: dict[str, dict[str, t.Any]] = resp.json()
|
|
|
|
if "error" in json_data:
|
|
raise SearxEngineAPIException(json_data["error"]["msg"])
|
|
|
|
def _str(k: str) -> str:
|
|
return str(doc.get(k, ""))
|
|
|
|
def _list(k: str) -> list[str]:
|
|
return doc.get(k, [])
|
|
|
|
for doc in json_data["response"]["docs"]:
|
|
authors: list[str] = doc["author"]
|
|
if len(authors) > 15:
|
|
# There are articles with hundreds of authors
|
|
authors = authors[:15] + ["et al."]
|
|
|
|
paper = res.types.Paper(
|
|
url=f"https://ui.adsabs.harvard.edu/abs/{doc.get('bibcode')}/",
|
|
title=html_to_text(_list("title")[0]),
|
|
authors=authors,
|
|
content=html_to_text(_str("abstract")),
|
|
doi=_list("doi")[0],
|
|
issn=_list("issn"),
|
|
isbn=_list("isbn"),
|
|
tags=_list("keyword"),
|
|
pages=",".join(_list("page")),
|
|
publisher=_str("pub") + " " + _str("year"),
|
|
publishedDate=datetime.fromisoformat(_str("date")),
|
|
volume=_str("volume"),
|
|
views=_str("read_count"),
|
|
comments=" / ".join(_list("pubnote")),
|
|
)
|
|
res.add(paper)
|
|
|
|
return res
|