mirror of
https://github.com/searxng/searxng.git
synced 2025-12-01 10:45:08 -05:00
[mod] Springer Nature engine: revision of the engine (Paper result)
Revision of the engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
4b4bf0ecaf
commit
4c42704c80
8
docs/dev/engines/online/springer.rst
Normal file
8
docs/dev/engines/online/springer.rst
Normal file
@ -0,0 +1,8 @@
|
||||
.. _springer engine:
|
||||
|
||||
===============
|
||||
Springer Nature
|
||||
===============
|
||||
|
||||
.. automodule:: searx.engines.springer
|
||||
:members:
|
||||
@ -1,71 +1,175 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Springer Nature (science)
|
||||
"""`Springer Nature`_ is a global publisher dedicated to providing service to
|
||||
research community with official Springer-API_ (API-Playground_).
|
||||
|
||||
.. note::
|
||||
|
||||
The Springer engine requires an API key, which can be obtained via the
|
||||
`Springer subscription`_.
|
||||
|
||||
Since the search term is passed 1:1 to the API, SearXNG users can use the
|
||||
`Supported Query Parameters`_.
|
||||
|
||||
- ``!springer (doi:10.1007/s10948-025-07019-1 OR doi:10.1007/s10948-025-07035-1)``
|
||||
- ``!springer keyword:ybco``
|
||||
|
||||
However, please note that the available options depend on the subscription type.
|
||||
|
||||
For example, the ``year:`` filter requires a *Premium Plan* subscription.
|
||||
|
||||
- ``!springer keyword:ybco year:2024``
|
||||
|
||||
The engine uses the REST Meta-API_ `v2` endpoint, but there is also a `Python
|
||||
API Wrapper`_.
|
||||
|
||||
.. _Python API Wrapper: https://pypi.org/project/springernature-api-client/
|
||||
.. _Springer Nature: https://www.springernature.com/
|
||||
.. _Springer subscription: https://dev.springernature.com/subscription/
|
||||
.. _Springer-API: https://dev.springernature.com/docs/introduction/
|
||||
.. _API-Playground: https://dev.springernature.com/docs/live-documentation/
|
||||
.. _Meta-API: https://dev.springernature.com/docs/api-endpoints/meta-api/
|
||||
.. _Supported Query Parameters: https://dev.springernature.com/docs/supported-query-params/
|
||||
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following additional settings:
|
||||
|
||||
- :py:obj:`api_key`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: springer nature
|
||||
api_key: "..."
|
||||
inactive: false
|
||||
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
from datetime import datetime
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": 'https://www.springernature.com/',
|
||||
"wikidata_id": 'Q21096327',
|
||||
"official_api_documentation": 'https://dev.springernature.com/',
|
||||
"website": "https://www.springernature.com/",
|
||||
"wikidata_id": "Q21096327",
|
||||
"official_api_documentation": "https://dev.springernature.com/docs/live-documentation/",
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ['science', 'scientific publications']
|
||||
categories = ["science", "scientific publications"]
|
||||
|
||||
paging = True
|
||||
nb_per_page = 10
|
||||
api_key = 'unset'
|
||||
"""Number of results to return in the request, see `Pagination and Limits`_ for
|
||||
more details.
|
||||
|
||||
base_url = 'https://api.springernature.com/metadata/json?'
|
||||
.. _Pagination and Limits:
|
||||
https://dev.springernature.com/docs/advanced-querying/pagination-limits/
|
||||
"""
|
||||
|
||||
api_key = ""
|
||||
"""Key used for the Meta-API_. Get your API key from: `Springer subscription`_"""
|
||||
|
||||
base_url = "https://api.springernature.com/meta/v2/json"
|
||||
"""An enhanced endpoint with additional metadata fields and optimized queries
|
||||
for more efficient and comprehensive retrieval (Meta-API_ `v2`).
|
||||
"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
if api_key == 'unset':
|
||||
raise SearxEngineAPIException('missing Springer-Nature API key')
|
||||
args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key})
|
||||
params['url'] = base_url + args
|
||||
logger.debug("query_url --> %s", params['url'])
|
||||
return params
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
"""Initialization of the Springer engine, checks whether the
|
||||
:py:obj:`api_key` is set, otherwise the engine is inactive.
|
||||
"""
|
||||
key: str = engine_settings.get("api_key", "")
|
||||
try:
|
||||
# Springer's API key is a hex value
|
||||
int(key, 16)
|
||||
return True
|
||||
except ValueError:
|
||||
logger.error("Springer's API key is not set or invalid.")
|
||||
return False
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
json_data = loads(resp.text)
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
args = {
|
||||
"api_key": api_key,
|
||||
"q": query,
|
||||
"s": nb_per_page * (params["pageno"] - 1),
|
||||
"p": nb_per_page,
|
||||
}
|
||||
params["url"] = f"{base_url}?{urlencode(args)}"
|
||||
# For example, the ``year:`` filter requires a *Premium Plan* subscription.
|
||||
params["raise_for_httperror"] = False
|
||||
|
||||
for record in json_data['records']:
|
||||
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
|
||||
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
|
||||
tags = record.get('genre')
|
||||
if isinstance(tags, str):
|
||||
tags = [tags]
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
|
||||
'title': record['title'],
|
||||
'content': record['abstract'],
|
||||
'comments': record['publicationName'],
|
||||
'tags': tags,
|
||||
'publishedDate': published,
|
||||
'type': record.get('contentType'),
|
||||
'authors': authors,
|
||||
# 'editor': '',
|
||||
'publisher': record.get('publisher'),
|
||||
'journal': record.get('publicationName'),
|
||||
'volume': record.get('volume') or None,
|
||||
'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
|
||||
'number': record.get('number') or None,
|
||||
'doi': record.get('doi'),
|
||||
'issn': [x for x in [record.get('issn')] if x],
|
||||
'isbn': [x for x in [record.get('isbn')] if x],
|
||||
# 'pdf_url' : ''
|
||||
}
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
|
||||
res = EngineResults()
|
||||
json_data = resp.json()
|
||||
|
||||
if (
|
||||
resp.status_code == 403
|
||||
and json_data["status"].lower() == "fail"
|
||||
and "premium feature" in json_data["message"].lower()
|
||||
):
|
||||
return res
|
||||
raise_for_httperror(resp)
|
||||
|
||||
def field(k: str) -> str:
|
||||
return str(record.get(k, ""))
|
||||
|
||||
for record in json_data["records"]:
|
||||
published = datetime.strptime(record["publicationDate"], "%Y-%m-%d")
|
||||
authors: list[str] = [" ".join(author["creator"].split(", ")[::-1]) for author in record["creators"]]
|
||||
|
||||
pdf_url = ""
|
||||
html_url = ""
|
||||
url_list: list[dict[str, str]] = record["url"]
|
||||
|
||||
for item in url_list:
|
||||
if item["platform"] != "web":
|
||||
continue
|
||||
val = item["value"].replace("http://", "https://", 1)
|
||||
if item["format"] == "html":
|
||||
html_url = val
|
||||
elif item["format"] == "pdf":
|
||||
pdf_url = val
|
||||
|
||||
paper = res.types.Paper(
|
||||
url=html_url,
|
||||
# html_url=html_url,
|
||||
pdf_url=pdf_url,
|
||||
title=field("title"),
|
||||
content=field("abstract"),
|
||||
comments=field("publicationName"),
|
||||
tags=record.get("keyword", []),
|
||||
publishedDate=published,
|
||||
type=field("contentType"),
|
||||
authors=authors,
|
||||
publisher=field("publisher"),
|
||||
journal=field("publicationName"),
|
||||
volume=field("volume"),
|
||||
pages="-".join([x for x in [field("startingPage"), field("endingPage")] if x]),
|
||||
number=field("number"),
|
||||
doi=field("doi"),
|
||||
issn=[x for x in [field("issn")] if x],
|
||||
isbn=[x for x in [field("isbn")] if x],
|
||||
)
|
||||
return results
|
||||
res.add(paper)
|
||||
|
||||
return res
|
||||
|
||||
@ -1984,13 +1984,13 @@ engines:
|
||||
# query_fields: '' # query fields
|
||||
# enable_http: true
|
||||
|
||||
# - name: springer nature
|
||||
# engine: springer
|
||||
# # get your API key from: https://dev.springernature.com/signup
|
||||
# # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
|
||||
# api_key: 'unset'
|
||||
# shortcut: springer
|
||||
# timeout: 15.0
|
||||
- name: springer nature
|
||||
engine: springer
|
||||
shortcut: springer
|
||||
timeout: 5
|
||||
# read https://docs.searxng.org/dev/engines/online/springer.html
|
||||
api_key: ""
|
||||
inactive: true
|
||||
|
||||
- name: startpage
|
||||
engine: startpage
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user