[mod] Springer Nature engine: revision of the engine (Paper result)

Revision of the engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2025-12-01 10:45:08 -05:00 · 2025-09-10 16:47:31 +02:00 · 2025-09-10 16:47:31 +02:00 · 4c42704c80
commit 4c42704c80
parent 4b4bf0ecaf
3 changed files with 168 additions and 56 deletions
--- a/docs/dev/engines/online/springer.rst
+++ b/docs/dev/engines/online/springer.rst
@ -0,0 +1,8 @@
 .. _springer engine:
 ===============
 Springer Nature
 ===============
 .. automodule:: searx.engines.springer
   :members:
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@ -1,71 +1,175 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Springer Nature (science)
+"""`Springer Nature`_ is a global publisher dedicated to providing service to
 research community with official Springer-API_ (API-Playground_).
 .. note::
   The Springer engine requires an API key, which can be obtained via the
   `Springer subscription`_.
 Since the search term is passed 1:1 to the API, SearXNG users can use the
 `Supported Query Parameters`_.
 - ``!springer (doi:10.1007/s10948-025-07019-1 OR doi:10.1007/s10948-025-07035-1)``
 - ``!springer keyword:ybco``
 However, please note that the available options depend on the subscription type.
 For example, the ``year:`` filter requires a *Premium Plan* subscription.
 - ``!springer keyword:ybco year:2024``
 The engine uses the REST Meta-API_ `v2` endpoint, but there is also a `Python
 API Wrapper`_.
 .. _Python API Wrapper: https://pypi.org/project/springernature-api-client/
 .. _Springer Nature: https://www.springernature.com/
 .. _Springer subscription:  https://dev.springernature.com/subscription/
 .. _Springer-API: https://dev.springernature.com/docs/introduction/
 .. _API-Playground: https://dev.springernature.com/docs/live-documentation/
 .. _Meta-API: https://dev.springernature.com/docs/api-endpoints/meta-api/
 .. _Supported Query Parameters: https://dev.springernature.com/docs/supported-query-params/
 Configuration
 =============
 The engine has the following additional settings:
 - :py:obj:`api_key`
 .. code:: yaml
  - name: springer nature
    api_key: "..."
    inactive: false
 Implementations
 ===============
 """
 import typing as t
 from datetime import datetime
 from json import loads
 from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
+from searx.network import raise_for_httperror
 from searx.result_types import EngineResults
 if t.TYPE_CHECKING:
    from searx.extended_types import SXNG_Response
    from searx.search.processors import OnlineParams
 about = {
-    "website": 'https://www.springernature.com/',
+    "website": "https://www.springernature.com/",
-    "wikidata_id": 'Q21096327',
+    "wikidata_id": "Q21096327",
-    "official_api_documentation": 'https://dev.springernature.com/',
+    "official_api_documentation": "https://dev.springernature.com/docs/live-documentation/",
    "use_official_api": True,
    "require_api_key": True,
-    "results": 'JSON',
+    "results": "JSON",
 }
-categories = ['science', 'scientific publications']
+categories = ["science", "scientific publications"]
 paging = True
 nb_per_page = 10
-api_key = 'unset'
+"""Number of results to return in the request, see `Pagination and Limits`_ for
 more details.
-base_url = 'https://api.springernature.com/metadata/json?'
+.. _Pagination and Limits:
    https://dev.springernature.com/docs/advanced-querying/pagination-limits/
 """
 api_key = ""
 """Key used for the Meta-API_.  Get your API key from: `Springer subscription`_"""
 base_url = "https://api.springernature.com/meta/v2/json"
 """An enhanced endpoint with additional metadata fields and optimized queries
 for more efficient and comprehensive retrieval (Meta-API_ `v2`).
 """
-def request(query, params):
+def setup(engine_settings: dict[str, t.Any]) -> bool:
-    if api_key == 'unset':
+    """Initialization of the Springer engine, checks whether the
-        raise SearxEngineAPIException('missing Springer-Nature API key')
+    :py:obj:`api_key` is set, otherwise the engine is inactive.
-    args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key})
+    """
-    params['url'] = base_url + args
+    key: str = engine_settings.get("api_key", "")
-    logger.debug("query_url --> %s", params['url'])
+    try:
-    return params
+        # Springer's API key is a hex value
        int(key, 16)
        return True
    except ValueError:
        logger.error("Springer's API key is not set or invalid.")
        return False
-def response(resp):
+def request(query: str, params: "OnlineParams") -> None:
-    results = []
+    args = {
-    json_data = loads(resp.text)
+        "api_key": api_key,
        "q": query,
        "s": nb_per_page * (params["pageno"] - 1),
        "p": nb_per_page,
    }
    params["url"] = f"{base_url}?{urlencode(args)}"
    # For example, the ``year:`` filter requires a *Premium Plan* subscription.
    params["raise_for_httperror"] = False
-    for record in json_data['records']:
+
-        published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
+def response(resp: "SXNG_Response") -> EngineResults:
-        authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
+
-        tags = record.get('genre')
+    res = EngineResults()
-        if isinstance(tags, str):
+    json_data = resp.json()
-            tags = [tags]
+
-        results.append(
+    if (
-            {
+        resp.status_code == 403
-                'template': 'paper.html',
+        and json_data["status"].lower() == "fail"
-                'url': record['url'][0]['value'].replace('http://', 'https://', 1),
+        and "premium feature" in json_data["message"].lower()
-                'title': record['title'],
+    ):
-                'content': record['abstract'],
+        return res
-                'comments': record['publicationName'],
+    raise_for_httperror(resp)
-                'tags': tags,
+
-                'publishedDate': published,
+    def field(k: str) -> str:
-                'type': record.get('contentType'),
+        return str(record.get(k, ""))
-                'authors': authors,
+
-                # 'editor': '',
+    for record in json_data["records"]:
-                'publisher': record.get('publisher'),
+        published = datetime.strptime(record["publicationDate"], "%Y-%m-%d")
-                'journal': record.get('publicationName'),
+        authors: list[str] = [" ".join(author["creator"].split(", ")[::-1]) for author in record["creators"]]
-                'volume': record.get('volume') or None,
+
-                'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
+        pdf_url = ""
-                'number': record.get('number') or None,
+        html_url = ""
-                'doi': record.get('doi'),
+        url_list: list[dict[str, str]] = record["url"]
-                'issn': [x for x in [record.get('issn')] if x],
+
-                'isbn': [x for x in [record.get('isbn')] if x],
+        for item in url_list:
-                # 'pdf_url' : ''
+            if item["platform"] != "web":
-            }
+                continue
            val = item["value"].replace("http://", "https://", 1)
            if item["format"] == "html":
                html_url = val
            elif item["format"] == "pdf":
                pdf_url = val
        paper = res.types.Paper(
            url=html_url,
            # html_url=html_url,
            pdf_url=pdf_url,
            title=field("title"),
            content=field("abstract"),
            comments=field("publicationName"),
            tags=record.get("keyword", []),
            publishedDate=published,
            type=field("contentType"),
            authors=authors,
            publisher=field("publisher"),
            journal=field("publicationName"),
            volume=field("volume"),
            pages="-".join([x for x in [field("startingPage"), field("endingPage")] if x]),
            number=field("number"),
            doi=field("doi"),
            issn=[x for x in [field("issn")] if x],
            isbn=[x for x in [field("isbn")] if x],
        )
-    return results
+        res.add(paper)
    return res
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -1984,13 +1984,13 @@ engines:
  #   query_fields: '' # query fields
  #   enable_http: true
-  # - name: springer nature
+  - name: springer nature
-  #   engine: springer
+    engine: springer
-  #   # get your API key from: https://dev.springernature.com/signup
+    shortcut: springer
-  #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
+    timeout: 5
-  #   api_key: 'unset'
+    # read https://docs.searxng.org/dev/engines/online/springer.html
-  #   shortcut: springer
+    api_key: ""
-  #   timeout: 15.0
+    inactive: true
  - name: startpage
    engine: startpage