[mod] CORE engine: revision of the engine (Paper result)

Revision of the engine / use of the result type Paper as well as other
typifications.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-10 16:34:30 +02:00 committed by Markus Heiser
parent 22e73727c0
commit 3ec6d65f9b
2 changed files with 85 additions and 75 deletions

View File

@ -5,6 +5,10 @@ research from repositories and journals.
.. _CORE: https://core.ac.uk/about .. _CORE: https://core.ac.uk/about
.. note::
The CORE engine requires an :py:obj:`API key <api_key>`.
.. _core engine config: .. _core engine config:
Configuration Configuration
@ -17,135 +21,141 @@ The engine has the following additional settings:
.. code:: yaml .. code:: yaml
- name: core.ac.uk - name: core.ac.uk
engine: core
categories: science
shortcut: cor
api_key: "..." api_key: "..."
timeout: 5 inactive: false
Implementations Implementations
=============== ===============
""" """
# pylint: disable=too-many-branches
import typing as t
from datetime import datetime from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = { about = {
"website": 'https://core.ac.uk', "website": "https://core.ac.uk",
"wikidata_id": 'Q22661180', "wikidata_id": "Q22661180",
"official_api_documentation": 'https://api.core.ac.uk/docs/v3', "official_api_documentation": "https://api.core.ac.uk/docs/v3",
"use_official_api": True, "use_official_api": True,
"require_api_key": True, "require_api_key": True,
"results": 'JSON', "results": "JSON",
} }
api_key = 'unset' api_key = ""
"""For an API key register at https://core.ac.uk/services/api and insert """For an API key register at https://core.ac.uk/services/api and insert
the API key in the engine :ref:`core engine config`.""" the API key in the engine :ref:`core engine config`."""
categories = ['science', 'scientific publications'] categories = ["science", "scientific publications"]
paging = True paging = True
nb_per_page = 10 nb_per_page = 10
base_url = 'https://api.core.ac.uk/v3/search/works/' base_url = "https://api.core.ac.uk/v3/search/works/"
def request(query, params): def setup(engine_settings: dict[str, t.Any]) -> bool:
if api_key == 'unset': """Initialization of the CORE_ engine, checks whether the :py:obj:`api_key`
raise SearxEngineAPIException('missing CORE API key') is set, otherwise the engine is inactive.
"""
key: str = engine_settings.get("api_key", "")
if key and key not in ("unset", "unknown", "..."):
return True
logger.error("CORE's API key is not set or invalid.")
return False
def request(query: str, params: "OnlineParams") -> None:
# API v3 uses different parameters # API v3 uses different parameters
search_params = { search_params = {
'q': query, "q": query,
'offset': (params['pageno'] - 1) * nb_per_page, "offset": (params["pageno"] - 1) * nb_per_page,
'limit': nb_per_page, "limit": nb_per_page,
'sort': 'relevance', "sort": "relevance",
} }
params['url'] = base_url + '?' + urlencode(search_params) params["url"] = base_url + "?" + urlencode(search_params)
params['headers'] = {'Authorization': f'Bearer {api_key}'} params["headers"] = {"Authorization": f"Bearer {api_key}"}
return params
def response(resp): def response(resp: "SXNG_Response") -> EngineResults:
results = [] # pylint: disable=too-many-branches
res = EngineResults()
json_data = resp.json() json_data = resp.json()
for result in json_data.get('results', []): for result in json_data.get("results", []):
# Get title # Get title
if not result.get('title'): if not result.get("title"):
continue continue
# Get URL - try different options # Get URL - try different options
url = None url: str | None = None
# Try DOI first # Try DOI first
doi = result.get('doi') doi: str = result.get("doi")
if doi: if doi:
url = f'https://doi.org/{doi}' url = f"https://doi.org/{doi}"
if url is None and result.get('doi'): if url is None and result.get("doi"):
# use the DOI reference # use the DOI reference
url = 'https://doi.org/' + str(result['doi']) url = "https://doi.org/" + str(result["doi"])
elif result.get('id'): elif result.get("id"):
url = 'https://core.ac.uk/works/' + str(result['id']) url = "https://core.ac.uk/works/" + str(result["id"])
elif result.get('downloadUrl'): elif result.get("downloadUrl"):
url = result['downloadUrl'] url = result["downloadUrl"]
elif result.get('sourceFulltextUrls'): elif result.get("sourceFulltextUrls"):
url = result['sourceFulltextUrls'] url = result["sourceFulltextUrls"]
else: else:
continue continue
# Published date # Published date
published_date = None published_date = None
raw_date = result.get('publishedDate') or result.get('depositedDate') raw_date = result.get("publishedDate") or result.get("depositedDate")
if raw_date: if raw_date:
try: try:
published_date = datetime.fromisoformat(result['publishedDate'].replace('Z', '+00:00')) published_date = datetime.fromisoformat(result["publishedDate"].replace("Z", "+00:00"))
except (ValueError, AttributeError): except (ValueError, AttributeError):
pass pass
# Handle journals # Handle journals
journals = [] journals = []
if result.get('journals'): if result.get("journals"):
journals = [j.get('title') for j in result['journals'] if j.get('title')] journals = [j.get("title") for j in result["journals"] if j.get("title")]
# Handle publisher # Handle publisher
publisher = result.get('publisher', '').strip("'") publisher = result.get("publisher", "").strip("'")
if publisher:
publisher = publisher.strip("'")
# Handle authors # Handle authors
authors = set() authors: set[str] = set()
for i in result.get('authors', []): for i in result.get("authors", []):
name = i.get("name") name: str | None = i.get("name")
if name: if name:
authors.add(name) authors.add(name)
results.append( res.add(
{ res.types.Paper(
'template': 'paper.html', title=result.get("title"),
'title': result.get('title'), url=url,
'url': url, content=result.get("fullText", "") or "",
'content': result.get('fullText', '') or '', tags=result.get("fieldOfStudy", []),
# 'comments': '', publishedDate=published_date,
'tags': result.get('fieldOfStudy', []), type=result.get("documentType", "") or "",
'publishedDate': published_date, authors=authors,
'type': result.get('documentType', '') or '', editor=", ".join(result.get("contributors", [])),
'authors': authors, publisher=publisher,
'editor': ', '.join(result.get('contributors', [])), journal=", ".join(journals),
'publisher': publisher, doi=result.get("doi"),
'journal': ', '.join(journals), pdf_url=result.get("downloadUrl", {}) or result.get("sourceFulltextUrls", {}),
'doi': result.get('doi'), )
# 'issn' : ''
# 'isbn' : ''
'pdf_url': result.get('downloadUrl', {}) or result.get("sourceFulltextUrls", {}),
}
) )
return results return res

View File

@ -659,12 +659,12 @@ engines:
timeout: 30 timeout: 30
disabled: true disabled: true
# - name: core.ac.uk - name: core.ac.uk
# engine: core engine: core
# categories: science shortcut: cor
# shortcut: cor # read https://docs.searxng.org/dev/engines/online/core.html
# # get your API key from: https://core.ac.uk/api-keys/register/ api_key: ""
# api_key: 'unset' inactive: true
- name: cppreference - name: cppreference
engine: cppreference engine: cppreference