mirror of
https://github.com/searxng/searxng.git
synced 2025-09-29 15:30:51 -04:00
[mod] ADS engine: revision of the engine (Paper result)
Revision of the Astrophysics Data System (ADS) engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
f8f7adce6b
commit
1520a8d545
8
docs/dev/engines/online/astrophysics_data_system.rst
Normal file
8
docs/dev/engines/online/astrophysics_data_system.rst
Normal file
@ -0,0 +1,8 @@
|
||||
.. _astrophysics_data_system engine:
|
||||
|
||||
==============================
|
||||
Astrophysics Data System (ADS)
|
||||
==============================
|
||||
|
||||
.. automodule:: searx.engines.astrophysics_data_system
|
||||
:members:
|
@ -1,93 +1,163 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
""".. sidebar:: info
|
||||
"""The Astrophysics Data System (ADS_) is a digital library portal for
|
||||
researchers in astronomy and physics, operated by the Smithsonian Astrophysical
|
||||
Observatory (SAO) under a NASA grant. The ADS_ is a solr instance, but not with
|
||||
the standard API paths.
|
||||
|
||||
The Astrophysics Data System (ADS) is a digital library portal for researchers in astronomy and physics,
|
||||
operated by the Smithsonian Astrophysical Observatory (SAO) under a NASA grant.
|
||||
The engine is adapted from the solr engine.
|
||||
.. note::
|
||||
|
||||
The ADS_ engine requires an :py:obj:`API key <api_key>`.
|
||||
|
||||
This engine uses the `search/query`_ API endpoint. Since the user's search term
|
||||
is passed through, the `search syntax`_ of ADS can be used (at least to some
|
||||
extent).
|
||||
|
||||
.. _ADS: https://ui.adsabs.harvard.edu
|
||||
.. _search/query: https://ui.adsabs.harvard.edu/help/api/api-docs.html#get-/search/query
|
||||
.. _search syntax: https://ui.adsabs.harvard.edu/help/search/search-syntax
|
||||
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following additional settings:
|
||||
|
||||
- :py:obj:`api_key`
|
||||
- :py:obj:`ads_sort`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: astrophysics data system
|
||||
api_key: "..."
|
||||
inactive: false
|
||||
|
||||
|
||||
Implementations
|
||||
===============
|
||||
"""
|
||||
|
||||
# pylint: disable=global-statement
|
||||
import typing as t
|
||||
|
||||
from datetime import datetime
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.utils import html_to_text
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": 'https://ui.adsabs.harvard.edu/',
|
||||
"wikidata_id": 'Q752099',
|
||||
"official_api_documentation": 'https://ui.adsabs.harvard.edu/help/api/api-docs.html',
|
||||
"website": "https://ui.adsabs.harvard.edu/",
|
||||
"wikidata_id": "Q752099",
|
||||
"official_api_documentation": "https://ui.adsabs.harvard.edu/help/api/api-docs.html",
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
base_url = 'https://api.adsabs.harvard.edu/v1/search'
|
||||
result_base_url = 'https://ui.adsabs.harvard.edu/abs/'
|
||||
rows = 10
|
||||
sort = '' # sorting: asc or desc
|
||||
field_list = ['bibcode', 'author', 'title', 'abstract', 'doi', 'date'] # list of field names to display on the UI
|
||||
default_fields = '' # default field to query
|
||||
query_fields = '' # query fields
|
||||
categories = ["science", "scientific publications"]
|
||||
paging = True
|
||||
api_key = 'unset'
|
||||
base_url = "https://api.adsabs.harvard.edu/v1/search/query"
|
||||
|
||||
api_key = "unset"
|
||||
"""Get an API token as described in https://ui.adsabs.harvard.edu/help/api"""
|
||||
|
||||
ads_field_list = [
|
||||
"abstract",
|
||||
"author",
|
||||
"bibcode",
|
||||
"comment",
|
||||
"date",
|
||||
"doi",
|
||||
"isbn",
|
||||
"issn",
|
||||
"keyword",
|
||||
"page",
|
||||
"page_count",
|
||||
"page_range",
|
||||
"pub",
|
||||
"pubdate",
|
||||
"pubnote",
|
||||
"read_count",
|
||||
"title",
|
||||
"volume",
|
||||
"year",
|
||||
]
|
||||
"""Set of fields to return in the response from ADS."""
|
||||
|
||||
ads_rows = 10
|
||||
"""How many records to return for the ADS request."""
|
||||
|
||||
ads_sort = "read_count desc"
|
||||
"""The format is 'field' + 'direction' where direction is one of 'asc' or 'desc'
|
||||
and field is any of the valid indexes."""
|
||||
|
||||
|
||||
def init(_):
|
||||
if api_key == 'unset':
|
||||
raise SearxEngineAPIException('missing ADS API key')
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
"""Initialization of the ADS_ engine, checks whether the :py:obj:`api_key`
|
||||
is set, otherwise the engine is inactive.
|
||||
"""
|
||||
key: str = engine_settings.get("api_key", "")
|
||||
if key and key not in ("unset", "unknown", "..."):
|
||||
return True
|
||||
logger.error("Astrophysics Data System (ADS) API key is not set or invalid.")
|
||||
return False
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query_params = {'q': query, 'rows': rows}
|
||||
if field_list:
|
||||
query_params['fl'] = ','.join(field_list)
|
||||
if query_fields:
|
||||
query_params['qf'] = ','.join(query_fields)
|
||||
if default_fields:
|
||||
query_params['df'] = default_fields
|
||||
if sort:
|
||||
query_params['sort'] = sort
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
|
||||
query_params['start'] = rows * (params['pageno'] - 1)
|
||||
args: dict[str, str | int] = {
|
||||
"q": query,
|
||||
"fl": ",".join(ads_field_list),
|
||||
"rows": ads_rows,
|
||||
"start": ads_rows * (params["pageno"] - 1),
|
||||
}
|
||||
if ads_sort:
|
||||
args["sort"] = ads_sort
|
||||
|
||||
params['headers']['Authorization'] = f'Bearer {api_key}'
|
||||
params['url'] = f"{base_url}/query?{urlencode(query_params)}"
|
||||
|
||||
return params
|
||||
params["headers"]["Authorization"] = f"Bearer {api_key}"
|
||||
params["url"] = f"{base_url}?{urlencode(args)}"
|
||||
|
||||
|
||||
def response(resp):
|
||||
try:
|
||||
resp_json = loads(resp.text)
|
||||
except Exception as e:
|
||||
raise SearxEngineAPIException("failed to parse response") from e
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
|
||||
if 'error' in resp_json:
|
||||
raise SearxEngineAPIException(resp_json['error']['msg'])
|
||||
res = EngineResults()
|
||||
json_data: dict[str, dict[str, t.Any]] = resp.json()
|
||||
|
||||
resp_json = resp_json["response"]
|
||||
result_len = resp_json["numFound"]
|
||||
results = []
|
||||
if "error" in json_data:
|
||||
raise SearxEngineAPIException(json_data["error"]["msg"])
|
||||
|
||||
for res in resp_json["docs"]:
|
||||
author = res.get("author")
|
||||
def _str(k: str) -> str:
|
||||
return str(doc.get(k, ""))
|
||||
|
||||
if author:
|
||||
author = author[0] + ' et al.'
|
||||
def _list(k: str) -> list[str]:
|
||||
return doc.get(k, [])
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': result_base_url + res.get("bibcode") + "/",
|
||||
'title': res.get("title")[0],
|
||||
'author': author,
|
||||
'content': res.get("abstract"),
|
||||
'doi': res.get("doi"),
|
||||
'publishedDate': datetime.fromisoformat(res.get("date")),
|
||||
}
|
||||
for doc in json_data["response"]["docs"]:
|
||||
authors: list[str] = doc["author"]
|
||||
if len(authors) > 15:
|
||||
# There are articles with hundreds of authors
|
||||
authors = authors[:15] + ["et al."]
|
||||
|
||||
paper = res.types.Paper(
|
||||
url=f"https://ui.adsabs.harvard.edu/abs/{doc.get('bibcode')}/",
|
||||
title=html_to_text(_list("title")[0]),
|
||||
authors=authors,
|
||||
content=html_to_text(_str("abstract")),
|
||||
doi=_list("doi")[0],
|
||||
issn=_list("issn"),
|
||||
isbn=_list("isbn"),
|
||||
tags=_list("keyword"),
|
||||
pages=",".join(_list("page")),
|
||||
publisher=_str("pub") + " " + _str("year"),
|
||||
publishedDate=datetime.fromisoformat(_str("date")),
|
||||
volume=_str("volume"),
|
||||
views=_str("read_count"),
|
||||
comments=" / ".join(_list("pubnote")),
|
||||
)
|
||||
res.add(paper)
|
||||
|
||||
results.append({'number_of_results': result_len})
|
||||
|
||||
return results
|
||||
return res
|
||||
|
@ -407,13 +407,12 @@ engines:
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
# - name: astrophysics data system
|
||||
# engine: astrophysics_data_system
|
||||
# sort: asc
|
||||
# weight: 5
|
||||
# categories: [science]
|
||||
# api_key: your-new-key
|
||||
# shortcut: ads
|
||||
- name: astrophysics data system
|
||||
engine: astrophysics_data_system
|
||||
shortcut: ads
|
||||
# read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html
|
||||
api_key: ""
|
||||
inactive: true
|
||||
|
||||
- name: alpine linux packages
|
||||
engine: alpinelinux
|
||||
|
Loading…
x
Reference in New Issue
Block a user