mirror of
https://github.com/searxng/searxng.git
synced 2025-11-19 13:03:08 -05:00
The size of the full-size images from ``thumbnail.url`` is usually several MB. By reducing the full-size image to 80 pixels, the data size for a thumb is reduced from MB to a few KB. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
138 lines
3.8 KiB
Python
138 lines
3.8 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Reuters_ (news) is an international news agency.
|
|
|
|
.. _Reuters: https://www.reuters.com
|
|
|
|
Configuration
|
|
=============
|
|
|
|
The engine has the following additional settings:
|
|
|
|
- :py:obj:`sort_order`
|
|
|
|
.. code:: yaml
|
|
|
|
- name: reuters
|
|
engine: reuters
|
|
shortcut: reu
|
|
sort_order: "relevance"
|
|
|
|
Implementations
|
|
===============
|
|
|
|
"""
|
|
|
|
from json import dumps
|
|
from urllib.parse import quote_plus
|
|
from datetime import datetime, timedelta
|
|
from dateutil import parser
|
|
|
|
from searx.result_types import EngineResults
|
|
|
|
about = {
|
|
"website": "https://www.reuters.com",
|
|
"wikidata_id": "Q130879",
|
|
"official_api_documentation": None,
|
|
"use_official_api": False,
|
|
"require_api_key": False,
|
|
"results": "JSON",
|
|
}
|
|
|
|
categories = ["news"]
|
|
time_range_support = True
|
|
paging = True
|
|
|
|
base_url = "https://www.reuters.com"
|
|
|
|
results_per_page = 20
|
|
sort_order = "relevance"
|
|
"""Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""
|
|
|
|
time_range_duration_map = {
|
|
"day": 1,
|
|
"week": 7,
|
|
"month": 30,
|
|
"year": 365,
|
|
}
|
|
|
|
|
|
def request(query, params):
|
|
args = {
|
|
"keyword": query,
|
|
"offset": (params["pageno"] - 1) * results_per_page,
|
|
"orderby": sort_order,
|
|
"size": results_per_page,
|
|
"website": "reuters",
|
|
}
|
|
if params["time_range"]:
|
|
time_diff_days = time_range_duration_map[params["time_range"]]
|
|
start_date = datetime.now() - timedelta(days=time_diff_days)
|
|
args["start_date"] = start_date.isoformat()
|
|
|
|
params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
|
|
return params
|
|
|
|
|
|
def response(resp) -> EngineResults:
|
|
res = EngineResults()
|
|
|
|
resp_json = resp.json()
|
|
if not resp_json.get("result"):
|
|
return res
|
|
|
|
for result in resp_json["result"].get("articles", []):
|
|
res.add(
|
|
res.types.MainResult(
|
|
url=base_url + result["canonical_url"],
|
|
title=result["web"],
|
|
content=result["description"],
|
|
thumbnail=resize_url(result.get("thumbnail", {}), height=80),
|
|
metadata=result.get("kicker", {}).get("name"),
|
|
publishedDate=parser.isoparse(result["display_time"]),
|
|
)
|
|
)
|
|
return res
|
|
|
|
|
|
def resize_url(thumbnail: dict[str, str], width: int = 0, height: int = 0) -> str:
|
|
"""Generates a URL for Reuter's thumbnail with the dimensions *width* and
|
|
*height*. If no URL can be generated from the *thumbnail data*, an empty
|
|
string will be returned.
|
|
|
|
width: default is *unset* (``0``)
|
|
Image width in pixels (negative values are ignored). If only width is
|
|
specified, the height matches the original aspect ratio.
|
|
|
|
height: default is *unset* (``0``)
|
|
Image height in pixels (negative values are ignored). If only height is
|
|
specified, the width matches the original aspect ratio.
|
|
|
|
The file size of a full-size image is usually several MB; when reduced to a
|
|
height of, for example, 80 points, only a few KB remain!
|
|
|
|
Fields of the *thumbnail data* (``result.articles.[<int>].thumbnail``):
|
|
|
|
thumbnail.url:
|
|
Is a full-size image (>MB).
|
|
|
|
thumbnail.width & .height:
|
|
Dimensions of the full-size image.
|
|
|
|
thumbnail.resizer_url:
|
|
Reuters has a *resizer* `REST-API for the images`_, this is the URL of the
|
|
service. This URL includes the ``&auth`` argument, other arguments are
|
|
``&width=<int>`` and ``&height=<int>``.
|
|
|
|
.. _REST-API for the images:
|
|
https://dev.arcxp.com/photo-center/image-resizer/resizer-v2-how-to-transform-images/#query-parameters
|
|
"""
|
|
|
|
url = thumbnail.get("resizer_url")
|
|
if not url:
|
|
return ""
|
|
if int(width) > 0:
|
|
url += f"&width={int(width)}"
|
|
if int(height) > 0:
|
|
url += f"&height={int(height)}"
|
|
return url
|