[mod] Springer Nature engine: revision of the engine (Paper result)

Revision of the engine / use of the result type Paper as well as other
typifications.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-10 16:47:31 +02:00 committed by Markus Heiser
parent 4b4bf0ecaf
commit 4c42704c80
3 changed files with 168 additions and 56 deletions

View File

@ -0,0 +1,8 @@
.. _springer engine:
===============
Springer Nature
===============
.. automodule:: searx.engines.springer
:members:

View File

@ -1,71 +1,175 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Springer Nature (science) """`Springer Nature`_ is a global publisher dedicated to providing service to
research community with official Springer-API_ (API-Playground_).
.. note::
The Springer engine requires an API key, which can be obtained via the
`Springer subscription`_.
Since the search term is passed 1:1 to the API, SearXNG users can use the
`Supported Query Parameters`_.
- ``!springer (doi:10.1007/s10948-025-07019-1 OR doi:10.1007/s10948-025-07035-1)``
- ``!springer keyword:ybco``
However, please note that the available options depend on the subscription type.
For example, the ``year:`` filter requires a *Premium Plan* subscription.
- ``!springer keyword:ybco year:2024``
The engine uses the REST Meta-API_ `v2` endpoint, but there is also a `Python
API Wrapper`_.
.. _Python API Wrapper: https://pypi.org/project/springernature-api-client/
.. _Springer Nature: https://www.springernature.com/
.. _Springer subscription: https://dev.springernature.com/subscription/
.. _Springer-API: https://dev.springernature.com/docs/introduction/
.. _API-Playground: https://dev.springernature.com/docs/live-documentation/
.. _Meta-API: https://dev.springernature.com/docs/api-endpoints/meta-api/
.. _Supported Query Parameters: https://dev.springernature.com/docs/supported-query-params/
Configuration
=============
The engine has the following additional settings:
- :py:obj:`api_key`
.. code:: yaml
- name: springer nature
api_key: "..."
inactive: false
Implementations
===============
""" """
import typing as t
from datetime import datetime from datetime import datetime
from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException from searx.network import raise_for_httperror
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = { about = {
"website": 'https://www.springernature.com/', "website": "https://www.springernature.com/",
"wikidata_id": 'Q21096327', "wikidata_id": "Q21096327",
"official_api_documentation": 'https://dev.springernature.com/', "official_api_documentation": "https://dev.springernature.com/docs/live-documentation/",
"use_official_api": True, "use_official_api": True,
"require_api_key": True, "require_api_key": True,
"results": 'JSON', "results": "JSON",
} }
categories = ['science', 'scientific publications'] categories = ["science", "scientific publications"]
paging = True paging = True
nb_per_page = 10 nb_per_page = 10
api_key = 'unset' """Number of results to return in the request, see `Pagination and Limits`_ for
more details.
base_url = 'https://api.springernature.com/metadata/json?' .. _Pagination and Limits:
https://dev.springernature.com/docs/advanced-querying/pagination-limits/
"""
api_key = ""
"""Key used for the Meta-API_. Get your API key from: `Springer subscription`_"""
base_url = "https://api.springernature.com/meta/v2/json"
"""An enhanced endpoint with additional metadata fields and optimized queries
for more efficient and comprehensive retrieval (Meta-API_ `v2`).
"""
def request(query, params): def setup(engine_settings: dict[str, t.Any]) -> bool:
if api_key == 'unset': """Initialization of the Springer engine, checks whether the
raise SearxEngineAPIException('missing Springer-Nature API key') :py:obj:`api_key` is set, otherwise the engine is inactive.
args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key}) """
params['url'] = base_url + args key: str = engine_settings.get("api_key", "")
logger.debug("query_url --> %s", params['url']) try:
return params # Springer's API key is a hex value
int(key, 16)
return True
except ValueError:
logger.error("Springer's API key is not set or invalid.")
return False
def response(resp): def request(query: str, params: "OnlineParams") -> None:
results = [] args = {
json_data = loads(resp.text) "api_key": api_key,
"q": query,
"s": nb_per_page * (params["pageno"] - 1),
"p": nb_per_page,
}
params["url"] = f"{base_url}?{urlencode(args)}"
# For example, the ``year:`` filter requires a *Premium Plan* subscription.
params["raise_for_httperror"] = False
for record in json_data['records']:
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') def response(resp: "SXNG_Response") -> EngineResults:
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
tags = record.get('genre') res = EngineResults()
if isinstance(tags, str): json_data = resp.json()
tags = [tags]
results.append( if (
{ resp.status_code == 403
'template': 'paper.html', and json_data["status"].lower() == "fail"
'url': record['url'][0]['value'].replace('http://', 'https://', 1), and "premium feature" in json_data["message"].lower()
'title': record['title'], ):
'content': record['abstract'], return res
'comments': record['publicationName'], raise_for_httperror(resp)
'tags': tags,
'publishedDate': published, def field(k: str) -> str:
'type': record.get('contentType'), return str(record.get(k, ""))
'authors': authors,
# 'editor': '', for record in json_data["records"]:
'publisher': record.get('publisher'), published = datetime.strptime(record["publicationDate"], "%Y-%m-%d")
'journal': record.get('publicationName'), authors: list[str] = [" ".join(author["creator"].split(", ")[::-1]) for author in record["creators"]]
'volume': record.get('volume') or None,
'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]), pdf_url = ""
'number': record.get('number') or None, html_url = ""
'doi': record.get('doi'), url_list: list[dict[str, str]] = record["url"]
'issn': [x for x in [record.get('issn')] if x],
'isbn': [x for x in [record.get('isbn')] if x], for item in url_list:
# 'pdf_url' : '' if item["platform"] != "web":
} continue
val = item["value"].replace("http://", "https://", 1)
if item["format"] == "html":
html_url = val
elif item["format"] == "pdf":
pdf_url = val
paper = res.types.Paper(
url=html_url,
# html_url=html_url,
pdf_url=pdf_url,
title=field("title"),
content=field("abstract"),
comments=field("publicationName"),
tags=record.get("keyword", []),
publishedDate=published,
type=field("contentType"),
authors=authors,
publisher=field("publisher"),
journal=field("publicationName"),
volume=field("volume"),
pages="-".join([x for x in [field("startingPage"), field("endingPage")] if x]),
number=field("number"),
doi=field("doi"),
issn=[x for x in [field("issn")] if x],
isbn=[x for x in [field("isbn")] if x],
) )
return results res.add(paper)
return res

View File

@ -1984,13 +1984,13 @@ engines:
# query_fields: '' # query fields # query_fields: '' # query fields
# enable_http: true # enable_http: true
# - name: springer nature - name: springer nature
# engine: springer engine: springer
# # get your API key from: https://dev.springernature.com/signup shortcut: springer
# # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" timeout: 5
# api_key: 'unset' # read https://docs.searxng.org/dev/engines/online/springer.html
# shortcut: springer api_key: ""
# timeout: 15.0 inactive: true
- name: startpage - name: startpage
engine: startpage engine: startpage