diff --git a/docs/dev/engines/online/springer.rst b/docs/dev/engines/online/springer.rst new file mode 100644 index 000000000..56808f0b5 --- /dev/null +++ b/docs/dev/engines/online/springer.rst @@ -0,0 +1,8 @@ +.. _springer engine: + +=============== +Springer Nature +=============== + +.. automodule:: searx.engines.springer + :members: diff --git a/searx/engines/springer.py b/searx/engines/springer.py index ba59fa926..098a438d7 100644 --- a/searx/engines/springer.py +++ b/searx/engines/springer.py @@ -1,71 +1,175 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Springer Nature (science) +"""`Springer Nature`_ is a global publisher dedicated to providing service to +research community with official Springer-API_ (API-Playground_). + +.. note:: + + The Springer engine requires an API key, which can be obtained via the + `Springer subscription`_. + +Since the search term is passed 1:1 to the API, SearXNG users can use the +`Supported Query Parameters`_. + +- ``!springer (doi:10.1007/s10948-025-07019-1 OR doi:10.1007/s10948-025-07035-1)`` +- ``!springer keyword:ybco`` + +However, please note that the available options depend on the subscription type. + +For example, the ``year:`` filter requires a *Premium Plan* subscription. + +- ``!springer keyword:ybco year:2024`` + +The engine uses the REST Meta-API_ `v2` endpoint, but there is also a `Python +API Wrapper`_. + +.. _Python API Wrapper: https://pypi.org/project/springernature-api-client/ +.. _Springer Nature: https://www.springernature.com/ +.. _Springer subscription: https://dev.springernature.com/subscription/ +.. _Springer-API: https://dev.springernature.com/docs/introduction/ +.. _API-Playground: https://dev.springernature.com/docs/live-documentation/ +.. _Meta-API: https://dev.springernature.com/docs/api-endpoints/meta-api/ +.. _Supported Query Parameters: https://dev.springernature.com/docs/supported-query-params/ + + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`api_key` + +.. code:: yaml + + - name: springer nature + api_key: "..." + inactive: false + + +Implementations +=============== """ +import typing as t + from datetime import datetime -from json import loads from urllib.parse import urlencode -from searx.exceptions import SearxEngineAPIException +from searx.network import raise_for_httperror +from searx.result_types import EngineResults + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams about = { - "website": 'https://www.springernature.com/', - "wikidata_id": 'Q21096327', - "official_api_documentation": 'https://dev.springernature.com/', + "website": "https://www.springernature.com/", + "wikidata_id": "Q21096327", + "official_api_documentation": "https://dev.springernature.com/docs/live-documentation/", "use_official_api": True, "require_api_key": True, - "results": 'JSON', + "results": "JSON", } -categories = ['science', 'scientific publications'] +categories = ["science", "scientific publications"] + paging = True nb_per_page = 10 -api_key = 'unset' +"""Number of results to return in the request, see `Pagination and Limits`_ for +more details. -base_url = 'https://api.springernature.com/metadata/json?' +.. _Pagination and Limits: + https://dev.springernature.com/docs/advanced-querying/pagination-limits/ +""" + +api_key = "" +"""Key used for the Meta-API_. Get your API key from: `Springer subscription`_""" + +base_url = "https://api.springernature.com/meta/v2/json" +"""An enhanced endpoint with additional metadata fields and optimized queries +for more efficient and comprehensive retrieval (Meta-API_ `v2`). +""" -def request(query, params): - if api_key == 'unset': - raise SearxEngineAPIException('missing Springer-Nature API key') - args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key}) - params['url'] = base_url + args - logger.debug("query_url --> %s", params['url']) - return params +def setup(engine_settings: dict[str, t.Any]) -> bool: + """Initialization of the Springer engine, checks whether the + :py:obj:`api_key` is set, otherwise the engine is inactive. + """ + key: str = engine_settings.get("api_key", "") + try: + # Springer's API key is a hex value + int(key, 16) + return True + except ValueError: + logger.error("Springer's API key is not set or invalid.") + return False -def response(resp): - results = [] - json_data = loads(resp.text) +def request(query: str, params: "OnlineParams") -> None: + args = { + "api_key": api_key, + "q": query, + "s": nb_per_page * (params["pageno"] - 1), + "p": nb_per_page, + } + params["url"] = f"{base_url}?{urlencode(args)}" + # For example, the ``year:`` filter requires a *Premium Plan* subscription. + params["raise_for_httperror"] = False - for record in json_data['records']: - published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') - authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']] - tags = record.get('genre') - if isinstance(tags, str): - tags = [tags] - results.append( - { - 'template': 'paper.html', - 'url': record['url'][0]['value'].replace('http://', 'https://', 1), - 'title': record['title'], - 'content': record['abstract'], - 'comments': record['publicationName'], - 'tags': tags, - 'publishedDate': published, - 'type': record.get('contentType'), - 'authors': authors, - # 'editor': '', - 'publisher': record.get('publisher'), - 'journal': record.get('publicationName'), - 'volume': record.get('volume') or None, - 'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]), - 'number': record.get('number') or None, - 'doi': record.get('doi'), - 'issn': [x for x in [record.get('issn')] if x], - 'isbn': [x for x in [record.get('isbn')] if x], - # 'pdf_url' : '' - } + +def response(resp: "SXNG_Response") -> EngineResults: + + res = EngineResults() + json_data = resp.json() + + if ( + resp.status_code == 403 + and json_data["status"].lower() == "fail" + and "premium feature" in json_data["message"].lower() + ): + return res + raise_for_httperror(resp) + + def field(k: str) -> str: + return str(record.get(k, "")) + + for record in json_data["records"]: + published = datetime.strptime(record["publicationDate"], "%Y-%m-%d") + authors: list[str] = [" ".join(author["creator"].split(", ")[::-1]) for author in record["creators"]] + + pdf_url = "" + html_url = "" + url_list: list[dict[str, str]] = record["url"] + + for item in url_list: + if item["platform"] != "web": + continue + val = item["value"].replace("http://", "https://", 1) + if item["format"] == "html": + html_url = val + elif item["format"] == "pdf": + pdf_url = val + + paper = res.types.Paper( + url=html_url, + # html_url=html_url, + pdf_url=pdf_url, + title=field("title"), + content=field("abstract"), + comments=field("publicationName"), + tags=record.get("keyword", []), + publishedDate=published, + type=field("contentType"), + authors=authors, + publisher=field("publisher"), + journal=field("publicationName"), + volume=field("volume"), + pages="-".join([x for x in [field("startingPage"), field("endingPage")] if x]), + number=field("number"), + doi=field("doi"), + issn=[x for x in [field("issn")] if x], + isbn=[x for x in [field("isbn")] if x], ) - return results + res.add(paper) + + return res diff --git a/searx/settings.yml b/searx/settings.yml index d72e84ff7..200f96de9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1984,13 +1984,13 @@ engines: # query_fields: '' # query fields # enable_http: true - # - name: springer nature - # engine: springer - # # get your API key from: https://dev.springernature.com/signup - # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" - # api_key: 'unset' - # shortcut: springer - # timeout: 15.0 + - name: springer nature + engine: springer + shortcut: springer + timeout: 5 + # read https://docs.searxng.org/dev/engines/online/springer.html + api_key: "" + inactive: true - name: startpage engine: startpage