mirror of
https://github.com/searxng/searxng.git
synced 2025-12-01 10:45:08 -05:00
[mod] Springer Nature engine: revision of the engine (Paper result)
Revision of the engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
4b4bf0ecaf
commit
4c42704c80
8
docs/dev/engines/online/springer.rst
Normal file
8
docs/dev/engines/online/springer.rst
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.. _springer engine:
|
||||||
|
|
||||||
|
===============
|
||||||
|
Springer Nature
|
||||||
|
===============
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.springer
|
||||||
|
:members:
|
||||||
@ -1,71 +1,175 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Springer Nature (science)
|
"""`Springer Nature`_ is a global publisher dedicated to providing service to
|
||||||
|
research community with official Springer-API_ (API-Playground_).
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The Springer engine requires an API key, which can be obtained via the
|
||||||
|
`Springer subscription`_.
|
||||||
|
|
||||||
|
Since the search term is passed 1:1 to the API, SearXNG users can use the
|
||||||
|
`Supported Query Parameters`_.
|
||||||
|
|
||||||
|
- ``!springer (doi:10.1007/s10948-025-07019-1 OR doi:10.1007/s10948-025-07035-1)``
|
||||||
|
- ``!springer keyword:ybco``
|
||||||
|
|
||||||
|
However, please note that the available options depend on the subscription type.
|
||||||
|
|
||||||
|
For example, the ``year:`` filter requires a *Premium Plan* subscription.
|
||||||
|
|
||||||
|
- ``!springer keyword:ybco year:2024``
|
||||||
|
|
||||||
|
The engine uses the REST Meta-API_ `v2` endpoint, but there is also a `Python
|
||||||
|
API Wrapper`_.
|
||||||
|
|
||||||
|
.. _Python API Wrapper: https://pypi.org/project/springernature-api-client/
|
||||||
|
.. _Springer Nature: https://www.springernature.com/
|
||||||
|
.. _Springer subscription: https://dev.springernature.com/subscription/
|
||||||
|
.. _Springer-API: https://dev.springernature.com/docs/introduction/
|
||||||
|
.. _API-Playground: https://dev.springernature.com/docs/live-documentation/
|
||||||
|
.. _Meta-API: https://dev.springernature.com/docs/api-endpoints/meta-api/
|
||||||
|
.. _Supported Query Parameters: https://dev.springernature.com/docs/supported-query-params/
|
||||||
|
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
The engine has the following additional settings:
|
||||||
|
|
||||||
|
- :py:obj:`api_key`
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: springer nature
|
||||||
|
api_key: "..."
|
||||||
|
inactive: false
|
||||||
|
|
||||||
|
|
||||||
|
Implementations
|
||||||
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from json import loads
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.network import raise_for_httperror
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
from searx.search.processors import OnlineParams
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.springernature.com/',
|
"website": "https://www.springernature.com/",
|
||||||
"wikidata_id": 'Q21096327',
|
"wikidata_id": "Q21096327",
|
||||||
"official_api_documentation": 'https://dev.springernature.com/',
|
"official_api_documentation": "https://dev.springernature.com/docs/live-documentation/",
|
||||||
"use_official_api": True,
|
"use_official_api": True,
|
||||||
"require_api_key": True,
|
"require_api_key": True,
|
||||||
"results": 'JSON',
|
"results": "JSON",
|
||||||
}
|
}
|
||||||
|
|
||||||
categories = ['science', 'scientific publications']
|
categories = ["science", "scientific publications"]
|
||||||
|
|
||||||
paging = True
|
paging = True
|
||||||
nb_per_page = 10
|
nb_per_page = 10
|
||||||
api_key = 'unset'
|
"""Number of results to return in the request, see `Pagination and Limits`_ for
|
||||||
|
more details.
|
||||||
|
|
||||||
base_url = 'https://api.springernature.com/metadata/json?'
|
.. _Pagination and Limits:
|
||||||
|
https://dev.springernature.com/docs/advanced-querying/pagination-limits/
|
||||||
|
"""
|
||||||
|
|
||||||
|
api_key = ""
|
||||||
|
"""Key used for the Meta-API_. Get your API key from: `Springer subscription`_"""
|
||||||
|
|
||||||
|
base_url = "https://api.springernature.com/meta/v2/json"
|
||||||
|
"""An enhanced endpoint with additional metadata fields and optimized queries
|
||||||
|
for more efficient and comprehensive retrieval (Meta-API_ `v2`).
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||||
if api_key == 'unset':
|
"""Initialization of the Springer engine, checks whether the
|
||||||
raise SearxEngineAPIException('missing Springer-Nature API key')
|
:py:obj:`api_key` is set, otherwise the engine is inactive.
|
||||||
args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key})
|
"""
|
||||||
params['url'] = base_url + args
|
key: str = engine_settings.get("api_key", "")
|
||||||
logger.debug("query_url --> %s", params['url'])
|
try:
|
||||||
return params
|
# Springer's API key is a hex value
|
||||||
|
int(key, 16)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
logger.error("Springer's API key is not set or invalid.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def request(query: str, params: "OnlineParams") -> None:
|
||||||
results = []
|
args = {
|
||||||
json_data = loads(resp.text)
|
"api_key": api_key,
|
||||||
|
"q": query,
|
||||||
|
"s": nb_per_page * (params["pageno"] - 1),
|
||||||
|
"p": nb_per_page,
|
||||||
|
}
|
||||||
|
params["url"] = f"{base_url}?{urlencode(args)}"
|
||||||
|
# For example, the ``year:`` filter requires a *Premium Plan* subscription.
|
||||||
|
params["raise_for_httperror"] = False
|
||||||
|
|
||||||
for record in json_data['records']:
|
|
||||||
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
|
|
||||||
tags = record.get('genre')
|
res = EngineResults()
|
||||||
if isinstance(tags, str):
|
json_data = resp.json()
|
||||||
tags = [tags]
|
|
||||||
results.append(
|
if (
|
||||||
{
|
resp.status_code == 403
|
||||||
'template': 'paper.html',
|
and json_data["status"].lower() == "fail"
|
||||||
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
|
and "premium feature" in json_data["message"].lower()
|
||||||
'title': record['title'],
|
):
|
||||||
'content': record['abstract'],
|
return res
|
||||||
'comments': record['publicationName'],
|
raise_for_httperror(resp)
|
||||||
'tags': tags,
|
|
||||||
'publishedDate': published,
|
def field(k: str) -> str:
|
||||||
'type': record.get('contentType'),
|
return str(record.get(k, ""))
|
||||||
'authors': authors,
|
|
||||||
# 'editor': '',
|
for record in json_data["records"]:
|
||||||
'publisher': record.get('publisher'),
|
published = datetime.strptime(record["publicationDate"], "%Y-%m-%d")
|
||||||
'journal': record.get('publicationName'),
|
authors: list[str] = [" ".join(author["creator"].split(", ")[::-1]) for author in record["creators"]]
|
||||||
'volume': record.get('volume') or None,
|
|
||||||
'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
|
pdf_url = ""
|
||||||
'number': record.get('number') or None,
|
html_url = ""
|
||||||
'doi': record.get('doi'),
|
url_list: list[dict[str, str]] = record["url"]
|
||||||
'issn': [x for x in [record.get('issn')] if x],
|
|
||||||
'isbn': [x for x in [record.get('isbn')] if x],
|
for item in url_list:
|
||||||
# 'pdf_url' : ''
|
if item["platform"] != "web":
|
||||||
}
|
continue
|
||||||
|
val = item["value"].replace("http://", "https://", 1)
|
||||||
|
if item["format"] == "html":
|
||||||
|
html_url = val
|
||||||
|
elif item["format"] == "pdf":
|
||||||
|
pdf_url = val
|
||||||
|
|
||||||
|
paper = res.types.Paper(
|
||||||
|
url=html_url,
|
||||||
|
# html_url=html_url,
|
||||||
|
pdf_url=pdf_url,
|
||||||
|
title=field("title"),
|
||||||
|
content=field("abstract"),
|
||||||
|
comments=field("publicationName"),
|
||||||
|
tags=record.get("keyword", []),
|
||||||
|
publishedDate=published,
|
||||||
|
type=field("contentType"),
|
||||||
|
authors=authors,
|
||||||
|
publisher=field("publisher"),
|
||||||
|
journal=field("publicationName"),
|
||||||
|
volume=field("volume"),
|
||||||
|
pages="-".join([x for x in [field("startingPage"), field("endingPage")] if x]),
|
||||||
|
number=field("number"),
|
||||||
|
doi=field("doi"),
|
||||||
|
issn=[x for x in [field("issn")] if x],
|
||||||
|
isbn=[x for x in [field("isbn")] if x],
|
||||||
)
|
)
|
||||||
return results
|
res.add(paper)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|||||||
@ -1984,13 +1984,13 @@ engines:
|
|||||||
# query_fields: '' # query fields
|
# query_fields: '' # query fields
|
||||||
# enable_http: true
|
# enable_http: true
|
||||||
|
|
||||||
# - name: springer nature
|
- name: springer nature
|
||||||
# engine: springer
|
engine: springer
|
||||||
# # get your API key from: https://dev.springernature.com/signup
|
shortcut: springer
|
||||||
# # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
|
timeout: 5
|
||||||
# api_key: 'unset'
|
# read https://docs.searxng.org/dev/engines/online/springer.html
|
||||||
# shortcut: springer
|
api_key: ""
|
||||||
# timeout: 15.0
|
inactive: true
|
||||||
|
|
||||||
- name: startpage
|
- name: startpage
|
||||||
engine: startpage
|
engine: startpage
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user