mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 10:37:06 -04:00 
			
		
		
		
	FIxes publishedDate format in reuters engine to encompass ISO 8601 times both with and without milliseconds.
Why is this change important?
Previously, the engine would sometimes fail saying:
2025-08-12 21:13:23,091 ERROR:searx.engines.reuters: exception : time data '2024-04-15T19:08:30.833Z' does not match format '%Y-%m-%dT%H:%M:%SZ'
Traceback (most recent call last):
...
  File "/usr/local/searxng/searx/engines/reuters.py", line 87, in response
    publishedDate=datetime.strptime(result["display_time"], "%Y-%m-%dT%H:%M:%SZ"),
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...
Note that most queries seem to work with Reuters, but there are some results that have the additional milliseconds and fail. Regardless, the change is backwards compatible as both the formats (with and without the ms) should now parse correctly.
		
	
			
		
			
				
	
	
		
			91 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			91 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| """Reuters_ (news) is an international news agency.
 | |
| 
 | |
| .. _Reuters: https://www.reuters.com
 | |
| 
 | |
| Configuration
 | |
| =============
 | |
| 
 | |
| The engine has the following additional settings:
 | |
| 
 | |
| - :py:obj:`sort_order`
 | |
| 
 | |
| .. code:: yaml
 | |
| 
 | |
|    - name: reuters
 | |
|      engine: reuters
 | |
|      shortcut: reu
 | |
|      sort_order: "relevance"
 | |
| 
 | |
| 
 | |
| Implementations
 | |
| ===============
 | |
| 
 | |
| """
 | |
| 
 | |
| from json import dumps
 | |
| from urllib.parse import quote_plus
 | |
| from datetime import datetime, timedelta
 | |
| 
 | |
| from searx.result_types import EngineResults
 | |
| 
 | |
| about = {
 | |
|     "website": "https://www.reuters.com",
 | |
|     "wikidata_id": "Q130879",
 | |
|     "official_api_documentation": None,
 | |
|     "use_official_api": False,
 | |
|     "require_api_key": False,
 | |
|     "results": "JSON",
 | |
| }
 | |
| 
 | |
| categories = ["news"]
 | |
| time_range_support = True
 | |
| paging = True
 | |
| 
 | |
| base_url = "https://www.reuters.com"
 | |
| 
 | |
| results_per_page = 20
 | |
| sort_order = "relevance"
 | |
| """Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""
 | |
| 
 | |
| time_range_duration_map = {
 | |
|     "day": 1,
 | |
|     "week": 7,
 | |
|     "month": 30,
 | |
|     "year": 365,
 | |
| }
 | |
| 
 | |
| 
 | |
| def request(query, params):
 | |
|     args = {
 | |
|         "keyword": query,
 | |
|         "offset": (params["pageno"] - 1) * results_per_page,
 | |
|         "orderby": sort_order,
 | |
|         "size": results_per_page,
 | |
|         "website": "reuters",
 | |
|     }
 | |
|     if params["time_range"]:
 | |
|         time_diff_days = time_range_duration_map[params["time_range"]]
 | |
|         start_date = datetime.now() - timedelta(days=time_diff_days)
 | |
|         args["start_date"] = start_date.isoformat()
 | |
| 
 | |
|     params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
 | |
|     return params
 | |
| 
 | |
| 
 | |
| def response(resp) -> EngineResults:
 | |
|     res = EngineResults()
 | |
| 
 | |
|     for result in resp.json().get("result", {}).get("articles", []):
 | |
|         res.add(
 | |
|             res.types.MainResult(
 | |
|                 url=base_url + result["canonical_url"],
 | |
|                 title=result["web"],
 | |
|                 content=result["description"],
 | |
|                 thumbnail=result.get("thumbnail", {}).get("url", ""),
 | |
|                 metadata=result.get("kicker", {}).get("name"),
 | |
|                 publishedDate=datetime.fromisoformat(result["display_time"]),
 | |
|             )
 | |
|         )
 | |
|     return res
 |