mirror of
https://github.com/searxng/searxng.git
synced 2025-10-25 15:52:31 -04:00
Revision of the engine / use of the result type Paper as well as other typifications. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
162 lines
4.4 KiB
Python
162 lines
4.4 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||
"""CORE_ (COnnecting REpositories) provides a comprehensive bibliographic
|
||
database of the world’s scholarly literature, collecting and indexing
|
||
research from repositories and journals.
|
||
|
||
.. _CORE: https://core.ac.uk/about
|
||
|
||
.. note::
|
||
|
||
The CORE engine requires an :py:obj:`API key <api_key>`.
|
||
|
||
.. _core engine config:
|
||
|
||
Configuration
|
||
=============
|
||
|
||
The engine has the following additional settings:
|
||
|
||
- :py:obj:`api_key`
|
||
|
||
.. code:: yaml
|
||
|
||
- name: core.ac.uk
|
||
api_key: "..."
|
||
inactive: false
|
||
|
||
Implementations
|
||
===============
|
||
|
||
"""
|
||
|
||
import typing as t
|
||
|
||
from datetime import datetime
|
||
from urllib.parse import urlencode
|
||
|
||
from searx.result_types import EngineResults
|
||
|
||
if t.TYPE_CHECKING:
|
||
from searx.extended_types import SXNG_Response
|
||
from searx.search.processors import OnlineParams
|
||
|
||
|
||
about = {
|
||
"website": "https://core.ac.uk",
|
||
"wikidata_id": "Q22661180",
|
||
"official_api_documentation": "https://api.core.ac.uk/docs/v3",
|
||
"use_official_api": True,
|
||
"require_api_key": True,
|
||
"results": "JSON",
|
||
}
|
||
|
||
api_key = ""
|
||
"""For an API key register at https://core.ac.uk/services/api and insert
|
||
the API key in the engine :ref:`core engine config`."""
|
||
|
||
categories = ["science", "scientific publications"]
|
||
paging = True
|
||
nb_per_page = 10
|
||
base_url = "https://api.core.ac.uk/v3/search/works/"
|
||
|
||
|
||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||
"""Initialization of the CORE_ engine, checks whether the :py:obj:`api_key`
|
||
is set, otherwise the engine is inactive.
|
||
"""
|
||
|
||
key: str = engine_settings.get("api_key", "")
|
||
if key and key not in ("unset", "unknown", "..."):
|
||
return True
|
||
logger.error("CORE's API key is not set or invalid.")
|
||
return False
|
||
|
||
|
||
def request(query: str, params: "OnlineParams") -> None:
|
||
|
||
# API v3 uses different parameters
|
||
search_params = {
|
||
"q": query,
|
||
"offset": (params["pageno"] - 1) * nb_per_page,
|
||
"limit": nb_per_page,
|
||
"sort": "relevance",
|
||
}
|
||
|
||
params["url"] = base_url + "?" + urlencode(search_params)
|
||
params["headers"] = {"Authorization": f"Bearer {api_key}"}
|
||
|
||
|
||
def response(resp: "SXNG_Response") -> EngineResults:
|
||
# pylint: disable=too-many-branches
|
||
res = EngineResults()
|
||
json_data = resp.json()
|
||
|
||
for result in json_data.get("results", []):
|
||
# Get title
|
||
if not result.get("title"):
|
||
continue
|
||
|
||
# Get URL - try different options
|
||
url: str | None = None
|
||
|
||
# Try DOI first
|
||
doi: str = result.get("doi")
|
||
if doi:
|
||
url = f"https://doi.org/{doi}"
|
||
|
||
if url is None and result.get("doi"):
|
||
# use the DOI reference
|
||
url = "https://doi.org/" + str(result["doi"])
|
||
elif result.get("id"):
|
||
url = "https://core.ac.uk/works/" + str(result["id"])
|
||
elif result.get("downloadUrl"):
|
||
url = result["downloadUrl"]
|
||
elif result.get("sourceFulltextUrls"):
|
||
url = result["sourceFulltextUrls"]
|
||
else:
|
||
continue
|
||
|
||
# Published date
|
||
published_date = None
|
||
|
||
raw_date = result.get("publishedDate") or result.get("depositedDate")
|
||
if raw_date:
|
||
try:
|
||
published_date = datetime.fromisoformat(result["publishedDate"].replace("Z", "+00:00"))
|
||
except (ValueError, AttributeError):
|
||
pass
|
||
|
||
# Handle journals
|
||
journals = []
|
||
if result.get("journals"):
|
||
journals = [j.get("title") for j in result["journals"] if j.get("title")]
|
||
|
||
# Handle publisher
|
||
publisher = result.get("publisher", "").strip("'")
|
||
|
||
# Handle authors
|
||
authors: set[str] = set()
|
||
for i in result.get("authors", []):
|
||
name: str | None = i.get("name")
|
||
if name:
|
||
authors.add(name)
|
||
|
||
res.add(
|
||
res.types.Paper(
|
||
title=result.get("title"),
|
||
url=url,
|
||
content=result.get("fullText", "") or "",
|
||
tags=result.get("fieldOfStudy", []),
|
||
publishedDate=published_date,
|
||
type=result.get("documentType", "") or "",
|
||
authors=authors,
|
||
editor=", ".join(result.get("contributors", [])),
|
||
publisher=publisher,
|
||
journal=", ".join(journals),
|
||
doi=result.get("doi"),
|
||
pdf_url=result.get("downloadUrl", {}) or result.get("sourceFulltextUrls", {}),
|
||
)
|
||
)
|
||
|
||
return res
|