diff --git a/docs/dev/engines/engine_overview.rst b/docs/dev/engines/engine_overview.rst index 145773007..76741851e 100644 --- a/docs/dev/engines/engine_overview.rst +++ b/docs/dev/engines/engine_overview.rst @@ -144,9 +144,9 @@ parameters with default value can be redefined for special purposes. ====================== ============== ======================================================================== url str ``''`` method str ``'GET'`` - headers set ``{}`` - data set ``{}`` - cookies set ``{}`` + headers dict ``{}`` + data dict ``{}`` + cookies dict ``{}`` verify bool ``True`` headers.User-Agent str a random User-Agent category str current category, like ``'general'`` @@ -226,9 +226,9 @@ following parameters can be used to specify a search request: =================== =========== ========================================================================== url str requested url method str HTTP request method - headers set HTTP header information - data set HTTP data information - cookies set HTTP cookies + headers dict HTTP header information + data dict HTTP data information + cookies dict HTTP cookies verify bool Performing SSL-Validity check allow_redirects bool Follow redirects max_redirects int maximum redirects, hard limit @@ -249,6 +249,3 @@ by templates. For more details read section: - :ref:`simple theme templates` - :ref:`result types` - - - diff --git a/searx/data/currencies.py b/searx/data/currencies.py index 3378a5022..a328789e3 100644 --- a/searx/data/currencies.py +++ b/searx/data/currencies.py @@ -1,22 +1,23 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Simple implementation to store currencies data in a SQL database.""" - __all__ = ["CurrenciesDB"] +import typing as t import json import pathlib from .core import get_cache, log +@t.final class CurrenciesDB: # pylint: disable=missing-class-docstring - ctx_names = "data_currencies_names" - ctx_iso4217 = "data_currencies_iso4217" + ctx_names: str = "data_currencies_names" + ctx_iso4217: str = "data_currencies_iso4217" - json_file = pathlib.Path(__file__).parent / "currencies.json" + json_file: pathlib.Path = pathlib.Path(__file__).parent / "currencies.json" def __init__(self): self.cache = get_cache() @@ -33,23 +34,27 @@ class CurrenciesDB: def load(self): log.debug("init searx.data.CURRENCIES") with open(self.json_file, encoding="utf-8") as f: - data_dict = json.load(f) + data_dict: dict[str, dict[str, str]] = json.load(f) for key, value in data_dict["names"].items(): self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None) for key, value in data_dict["iso4217"].items(): self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None) - def name_to_iso4217(self, name): + def name_to_iso4217(self, name: str) -> str | None: self.init() - ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names) + ret_val: str | list[str] | None = self.cache.get(key=name, default=None, ctx=self.ctx_names) if isinstance(ret_val, list): # if more alternatives, use the last in the list ret_val = ret_val[-1] return ret_val - def iso4217_to_name(self, iso4217, language): + def iso4217_to_name(self, iso4217: str, language: str) -> str | None: self.init() - iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217) - return iso4217_languages.get(language, iso4217) + iso4217_languages: dict[str, str] = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217) + return iso4217_languages.get(language) + + def is_iso4217(self, iso4217: str) -> bool: + item = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217) + return bool(item) diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py index a78981561..9d864e622 100644 --- a/searx/enginelib/__init__.py +++ b/searx/enginelib/__init__.py @@ -39,6 +39,7 @@ if t.TYPE_CHECKING: from searx.enginelib.traits import EngineTraits from searx.extended_types import SXNG_Response from searx.result_types import EngineResults + from searx.search.processors import OfflineParamTypes, OnlineParamTypes ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache( ExpireCacheCfg( @@ -195,6 +196,10 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods paging: bool """Engine supports multiple pages.""" + max_page: int = 0 + """If the engine supports paging, then this is the value for the last page + that is still supported. ``0`` means unlimited numbers of pages.""" + time_range_support: bool """Engine supports search time range.""" @@ -304,14 +309,49 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods weight: int """Weighting of the results of this engine (:ref:`weight `).""" - def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter] - """Initialization of the engine. If no initialization is needed, drop - this init function.""" + def setup(self, engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument + """Dynamic setup of the engine settings. + + With this method, the engine's setup is carried out. For example, to + check or dynamically adapt the values handed over in the parameter + ``engine_settings``. The return value (True/False) indicates whether + the setup was successful and the engine can be built or rejected. + + The method is optional and is called synchronously as part of the + initialization of the service and is therefore only suitable for simple + (local) exams/changes at the engine setting. The :py:obj:`Engine.init` + method must be used for longer tasks in which values of a remote must be + determined, for example. + """ + return True + + def init(self, engine_settings: dict[str, t.Any]) -> bool | None: # pylint: disable=unused-argument + """Initialization of the engine. + + The method is optional and asynchronous (in a thread). It is suitable, + for example, for setting up a cache (for the engine) or for querying + values (required by the engine) from a remote. + + Whether the initialization was successful can be indicated by the return + value ``True`` or even ``False``. + + - If no return value is given from this init method (``None``), this is + equivalent to ``True``. + + - If an exception is thrown as part of the initialization, this is + equivalent to ``False``. + """ + return True @abc.abstractmethod - def request(self, query: str, params: dict[str, t.Any]) -> None: - """Build up the params for the online request.""" + def search(self, query: str, params: "OfflineParamTypes") -> "EngineResults": + """Search method of the ``offline`` engines""" + + @abc.abstractmethod + def request(self, query: str, params: "OnlineParamTypes") -> None: + """Method to build the parameters for the request of an ``online`` + engine.""" @abc.abstractmethod def response(self, resp: "SXNG_Response") -> "EngineResults": - """Parse out the result items from the response.""" + """Method to parse the response of an ``online`` engine.""" diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index b1e24aea2..30ef7fd75 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -51,7 +51,10 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool DEFAULT_CATEGORY = 'other' categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []} + engines: "dict[str, Engine | types.ModuleType]" = {} +"""Global registered engine instances.""" + engine_shortcuts = {} """Simple map of registered *shortcuts* to name of the engine (or ``None``). @@ -144,6 +147,9 @@ def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | N set_loggers(engine, engine_name) + if not call_engine_setup(engine, engine_data): + return None + if not any(cat in settings['categories_as_tabs'] for cat in engine.categories): engine.categories.append(DEFAULT_CATEGORY) @@ -223,6 +229,25 @@ def is_engine_active(engine: "Engine | types.ModuleType"): return True +def call_engine_setup(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]) -> bool: + setup_ok = False + setup_func = getattr(engine, "setup", None) + + if setup_func is None: + setup_ok = True + elif not callable(setup_func): + logger.error("engine's setup method isn't a callable (is of type: %s)", type(setup_func)) + else: + try: + setup_ok = engine.setup(engine_data) + except Exception as e: # pylint: disable=broad-except + logger.exception('exception : {0}'.format(e)) + + if not setup_ok: + logger.error("%s: Engine setup was not successful, engine is set to inactive.", engine.name) + return setup_ok + + def register_engine(engine: "Engine | types.ModuleType"): if engine.name in engines: logger.error('Engine config error: ambiguous name: {0}'.format(engine.name)) diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index c4c757e3f..0b9b339a9 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,53 +1,58 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Currency convert (DuckDuckGo) -""" +"""Currency convert (DuckDuckGo)""" +import typing as t import json from searx.result_types import EngineResults +if t.TYPE_CHECKING: + from searx.search.processors import OnlineCurrenciesParams + from searx.extended_types import SXNG_Response + # about about = { - "website": 'https://duckduckgo.com/', - "wikidata_id": 'Q12805', - "official_api_documentation": 'https://duckduckgo.com/api', + "website": "https://duckduckgo.com/", + "wikidata_id": "Q12805", + "official_api_documentation": "https://duckduckgo.com/api", "use_official_api": False, "require_api_key": False, - "results": 'JSONP', + "results": "JSONP", "description": "Service from DuckDuckGo.", } -engine_type = 'online_currency' -categories = [] -base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' +engine_type = "online_currency" +categories = ["currency", "general"] + +base_url = "https://duckduckgo.com/js/spice/currency/1/%(from_iso4217)s/%(to_iso4217)s" +ddg_link_url = "https://duckduckgo.com/?q=%(from_iso4217)s+to+%(to_iso4217)s" + weight = 100 -https_support = True + +def request(query: str, params: "OnlineCurrenciesParams") -> None: # pylint: disable=unused-argument + params["url"] = base_url % params -def request(_query, params): - params['url'] = base_url.format(params['from'], params['to']) - return params - - -def response(resp) -> EngineResults: +def response(resp: "SXNG_Response") -> EngineResults: res = EngineResults() # remove first and last lines to get only json - json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2] + json_resp = resp.text[resp.text.find("\n") + 1 : resp.text.rfind("\n") - 2] try: conversion_rate = float(json.loads(json_resp)["to"][0]["mid"]) except IndexError: return res - answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format( - resp.search_params['amount'], - resp.search_params['from'], - resp.search_params['amount'] * conversion_rate, - resp.search_params['to'], - conversion_rate, - resp.search_params['from_name'], - resp.search_params['to_name'], - ) - url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}" + params: OnlineCurrenciesParams = resp.search_params # pyright: ignore[reportAssignmentType] + answer = "{0} {1} = {2} {3} (1 {5} : {4} {6})".format( + params["amount"], + params["from_iso4217"], + params["amount"] * conversion_rate, + params["to_iso4217"], + conversion_rate, + params["from_name"], + params["to_name"], + ) + url = ddg_link_url % params res.add(res.types.Answer(answer=answer, url=url)) return res diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index bda056edd..d393eae92 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -24,7 +24,6 @@ engine_type = 'online_dictionary' categories = ['general', 'translate'] base_url = "https://dictzone.com" weight = 100 -https_support = True def request(query, params): # pylint: disable=unused-argument diff --git a/searx/engines/metacpan.py b/searx/engines/metacpan.py index 50608bc11..32bc55b89 100644 --- a/searx/engines/metacpan.py +++ b/searx/engines/metacpan.py @@ -3,7 +3,6 @@ """ from urllib.parse import urlunparse -from json import dumps # about about = { @@ -56,7 +55,7 @@ def request(query, params): query_data = query_data_template query_data["query"]["multi_match"]["query"] = query query_data["from"] = (params["pageno"] - 1) * number_of_results - params["data"] = dumps(query_data) + params["json"] = query_data return params diff --git a/searx/engines/translated.py b/searx/engines/translated.py index cffb6eda3..08808cfd2 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -22,7 +22,6 @@ categories = ['general', 'translate'] api_url = "https://api.mymemory.translated.net" web_url = "https://mymemory.translated.net" weight = 100 -https_support = True api_key = '' diff --git a/searx/exceptions.py b/searx/exceptions.py index 4743c8d56..6b150929e 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -74,9 +74,9 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): """ if suspended_time is None: suspended_time = self._get_default_suspended_time() - super().__init__(message + ', suspended_time=' + str(suspended_time)) + self.message: str = f"{message} (suspended_time={suspended_time})" self.suspended_time: int = suspended_time - self.message: str = message + super().__init__(self.message) def _get_default_suspended_time(self) -> int: from searx import get_setting # pylint: disable=C0415 diff --git a/searx/extended_types.py b/searx/extended_types.py index 36efecddc..059ad947e 100644 --- a/searx/extended_types.py +++ b/searx/extended_types.py @@ -30,6 +30,7 @@ import httpx if typing.TYPE_CHECKING: import searx.preferences import searx.results + from searx.search.processors import ParamTypes class SXNG_Request(flask.Request): @@ -78,6 +79,8 @@ class SXNG_Response(httpx.Response): response = typing.cast(SXNG_Response, response) if response.ok: ... + query_was = search_params["query"] """ ok: bool + search_params: "ParamTypes" diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index e653bbf2f..c0666383d 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -24,17 +24,6 @@ LogParametersType = tuple[str, ...] class ErrorContext: # pylint: disable=missing-class-docstring - __slots__ = ( - 'filename', - 'function', - 'line_no', - 'code', - 'exception_classname', - 'log_message', - 'log_parameters', - 'secondary', - ) - def __init__( # pylint: disable=too-many-arguments self, filename: str, @@ -159,7 +148,7 @@ def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many- return () -def get_exception_classname(exc: Exception) -> str: +def get_exception_classname(exc: BaseException) -> str: exc_class = exc.__class__ exc_name = exc_class.__qualname__ exc_module = exc_class.__module__ @@ -182,7 +171,7 @@ def get_error_context( return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary) -def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None: +def count_exception(engine_name: str, exc: BaseException, secondary: bool = False) -> None: if not settings['general']['enable_metrics']: return framerecords = inspect.trace() diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 070388d2e..3a3b93d08 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, global-statement -__all__ = ["initialize", "check_network_configuration", "raise_for_httperror"] +__all__ = ["get_network", "initialize", "check_network_configuration", "raise_for_httperror"] import typing as t @@ -22,6 +22,8 @@ from .network import get_network, initialize, check_network_configuration # pyl from .client import get_loop from .raise_for_httperror import raise_for_httperror +if t.TYPE_CHECKING: + from searx.network.network import Network THREADLOCAL = threading.local() """Thread-local data is data for thread specific values.""" @@ -31,7 +33,7 @@ def reset_time_for_thread(): THREADLOCAL.total_time = 0 -def get_time_for_thread(): +def get_time_for_thread() -> float | None: """returns thread's total time or None""" return THREADLOCAL.__dict__.get('total_time') @@ -45,7 +47,7 @@ def set_context_network_name(network_name: str): THREADLOCAL.network = get_network(network_name) -def get_context_network(): +def get_context_network() -> "Network": """If set return thread's network. If unset, return value from :py:obj:`get_network`. @@ -68,7 +70,7 @@ def _record_http_time(): THREADLOCAL.total_time += time_after_request - time_before_request -def _get_timeout(start_time: float, kwargs): +def _get_timeout(start_time: float, kwargs: t.Any) -> float: # pylint: disable=too-many-branches timeout: float | None @@ -91,7 +93,7 @@ def _get_timeout(start_time: float, kwargs): return timeout -def request(method, url, **kwargs) -> SXNG_Response: +def request(method: str, url: str, **kwargs: t.Any) -> SXNG_Response: """same as requests/requests/api.py request(...)""" with _record_http_time() as start_time: network = get_context_network() @@ -183,15 +185,15 @@ def head(url: str, **kwargs: t.Any) -> SXNG_Response: return request('head', url, **kwargs) -def post(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: +def post(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response: return request('post', url, data=data, **kwargs) -def put(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: +def put(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response: return request('put', url, data=data, **kwargs) -def patch(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: +def patch(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response: return request('patch', url, data=data, **kwargs) @@ -250,7 +252,7 @@ def _close_response_method(self): continue -def stream(method: str, url: str, **kwargs: t.Any) -> tuple[httpx.Response, Iterable[bytes]]: +def stream(method: str, url: str, **kwargs: t.Any) -> tuple[SXNG_Response, Iterable[bytes]]: """Replace httpx.stream. Usage: diff --git a/searx/network/client.py b/searx/network/client.py index 8e69a9d46..bd21bc9b5 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -138,7 +138,7 @@ def get_transport_for_socks_proxy( password=proxy_password, rdns=rdns, loop=get_loop(), - verify=_verify, + verify=_verify, # pyright: ignore[reportArgumentType] http2=http2, local_address=local_address, limits=limit, diff --git a/searx/network/network.py b/searx/network/network.py index f52d9f87e..c5987bfff 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -1,8 +1,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=global-statement # pylint: disable=missing-module-docstring, missing-class-docstring + +__all__ = ["get_network"] + import typing as t -from collections.abc import Generator, AsyncIterator +from collections.abc import Generator + import atexit import asyncio @@ -74,7 +78,7 @@ class Network: using_tor_proxy: bool = False, local_addresses: str | list[str] | None = None, retries: int = 0, - retry_on_http_error: None = None, + retry_on_http_error: bool = False, max_redirects: int = 30, logger_name: str = None, # pyright: ignore[reportArgumentType] ): @@ -232,14 +236,14 @@ class Network: return kwargs_clients @staticmethod - def extract_do_raise_for_httperror(kwargs): + def extract_do_raise_for_httperror(kwargs: dict[str, t.Any]): do_raise_for_httperror = True if 'raise_for_httperror' in kwargs: do_raise_for_httperror = kwargs['raise_for_httperror'] del kwargs['raise_for_httperror'] return do_raise_for_httperror - def patch_response(self, response: httpx.Response | SXNG_Response, do_raise_for_httperror: bool) -> SXNG_Response: + def patch_response(self, response: httpx.Response, do_raise_for_httperror: bool) -> SXNG_Response: if isinstance(response, httpx.Response): response = t.cast(SXNG_Response, response) # requests compatibility (response is not streamed) @@ -255,7 +259,7 @@ class Network: raise return response - def is_valid_response(self, response: SXNG_Response): + def is_valid_response(self, response: httpx.Response): # pylint: disable=too-many-boolean-expressions if ( (self.retry_on_http_error is True and 400 <= response.status_code <= 599) @@ -265,9 +269,7 @@ class Network: return False return True - async def call_client( - self, stream: bool, method: str, url: str, **kwargs: t.Any - ) -> AsyncIterator[SXNG_Response] | None: + async def call_client(self, stream: bool, method: str, url: str, **kwargs: t.Any) -> SXNG_Response: retries = self.retries was_disconnected = False do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs) @@ -278,9 +280,9 @@ class Network: client.cookies = httpx.Cookies(cookies) try: if stream: - response = client.stream(method, url, **kwargs) # pyright: ignore[reportAny] + response = client.stream(method, url, **kwargs) else: - response = await client.request(method, url, **kwargs) # pyright: ignore[reportAny] + response = await client.request(method, url, **kwargs) if self.is_valid_response(response) or retries <= 0: return self.patch_response(response, do_raise_for_httperror) except httpx.RemoteProtocolError as e: @@ -298,7 +300,7 @@ class Network: raise e retries -= 1 - async def request(self, method: str, url: str, **kwargs): + async def request(self, method: str, url: str, **kwargs: t.Any) -> SXNG_Response: return await self.call_client(False, method, url, **kwargs) async def stream(self, method: str, url: str, **kwargs): @@ -358,7 +360,7 @@ def initialize( 'proxies': settings_outgoing['proxies'], 'max_redirects': settings_outgoing['max_redirects'], 'retries': settings_outgoing['retries'], - 'retry_on_http_error': None, + 'retry_on_http_error': False, } def new_network(params: dict[str, t.Any], logger_name: str | None = None): diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 3ea33ff12..62539579c 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -1,8 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, too-few-public-methods -# the public namespace has not yet been finally defined .. -# __all__ = [..., ] +__all__ = ["SearchWithPlugins"] import typing as t @@ -22,7 +21,7 @@ from searx.metrics import initialize as initialize_metrics, counter_inc from searx.network import initialize as initialize_network, check_network_configuration from searx.results import ResultContainer from searx.search.checker import initialize as initialize_checker -from searx.search.processors import PROCESSORS, initialize as initialize_processors +from searx.search.processors import PROCESSORS if t.TYPE_CHECKING: @@ -44,7 +43,7 @@ def initialize( if check_network: check_network_configuration() initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics) - initialize_processors(settings_engines) + PROCESSORS.init(settings_engines) if enable_checker: initialize_checker() @@ -52,8 +51,6 @@ def initialize( class Search: """Search information container""" - __slots__ = "search_query", "result_container", "start_time", "actual_timeout" # type: ignore - def __init__(self, search_query: "SearchQuery"): """Initialize the Search""" # init vars @@ -185,8 +182,6 @@ class Search: class SearchWithPlugins(Search): """Inherit from the Search class, add calls to the plugins.""" - __slots__ = 'user_plugins', 'request' - def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]): super().__init__(search_query) self.user_plugins = user_plugins diff --git a/searx/search/models.py b/searx/search/models.py index 62424390f..6d14a9657 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -24,42 +24,29 @@ class EngineRef: return hash((self.name, self.category)) +@typing.final class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = ( - 'query', - 'engineref_list', - 'lang', - 'locale', - 'safesearch', - 'pageno', - 'time_range', - 'timeout_limit', - 'external_bang', - 'engine_data', - 'redirect_to_first_result', - ) - def __init__( self, query: str, - engineref_list: typing.List[EngineRef], + engineref_list: list[EngineRef], lang: str = 'all', - safesearch: int = 0, + safesearch: typing.Literal[0, 1, 2] = 0, pageno: int = 1, - time_range: typing.Optional[str] = None, - timeout_limit: typing.Optional[float] = None, - external_bang: typing.Optional[str] = None, - engine_data: typing.Optional[typing.Dict[str, str]] = None, - redirect_to_first_result: typing.Optional[bool] = None, + time_range: typing.Literal["day", "week", "month", "year"] | None = None, + timeout_limit: float | None = None, + external_bang: str | None = None, + engine_data: dict[str, dict[str, str]] | None = None, + redirect_to_first_result: bool | None = None, ): # pylint:disable=too-many-arguments self.query = query self.engineref_list = engineref_list self.lang = lang - self.safesearch = safesearch + self.safesearch: typing.Literal[0, 1, 2] = safesearch self.pageno = pageno - self.time_range = time_range + self.time_range: typing.Literal["day", "week", "month", "year"] | None = time_range self.timeout_limit = timeout_limit self.external_bang = external_bang self.engine_data = engine_data or {} diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 760513253..5e896c711 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -2,83 +2,95 @@ """Implement request processors used by engine-types.""" __all__ = [ - 'EngineProcessor', - 'OfflineProcessor', - 'OnlineProcessor', - 'OnlineDictionaryProcessor', - 'OnlineCurrencyProcessor', - 'OnlineUrlSearchProcessor', - 'PROCESSORS', + "OfflineParamTypes", + "OnlineCurrenciesParams", + "OnlineDictParams", + "OnlineParamTypes", + "OnlineParams", + "OnlineUrlSearchParams", + "PROCESSORS", + "ParamTypes", + "RequestParams", ] import typing as t -import threading - from searx import logger from searx import engines -from .online import OnlineProcessor +from .abstract import EngineProcessor, RequestParams from .offline import OfflineProcessor -from .online_dictionary import OnlineDictionaryProcessor -from .online_currency import OnlineCurrencyProcessor -from .online_url_search import OnlineUrlSearchProcessor -from .abstract import EngineProcessor +from .online import OnlineProcessor, OnlineParams +from .online_dictionary import OnlineDictionaryProcessor, OnlineDictParams +from .online_currency import OnlineCurrencyProcessor, OnlineCurrenciesParams +from .online_url_search import OnlineUrlSearchProcessor, OnlineUrlSearchParams -if t.TYPE_CHECKING: - from searx.enginelib import Engine +logger = logger.getChild("search.processors") -logger = logger.getChild('search.processors') -PROCESSORS: dict[str, EngineProcessor] = {} -"""Cache request processors, stored by *engine-name* (:py:func:`initialize`) +OnlineParamTypes: t.TypeAlias = OnlineParams | OnlineDictParams | OnlineCurrenciesParams | OnlineUrlSearchParams +OfflineParamTypes: t.TypeAlias = RequestParams +ParamTypes: t.TypeAlias = OfflineParamTypes | OnlineParamTypes + + +class ProcessorMap(dict[str, EngineProcessor]): + """Class to manage :py:obj:`EngineProcessor` instances in a key/value map + (instances stored by *engine-name*).""" + + processor_types: dict[str, type[EngineProcessor]] = { + OnlineProcessor.engine_type: OnlineProcessor, + OfflineProcessor.engine_type: OfflineProcessor, + OnlineDictionaryProcessor.engine_type: OnlineDictionaryProcessor, + OnlineCurrencyProcessor.engine_type: OnlineCurrencyProcessor, + OnlineUrlSearchProcessor.engine_type: OnlineUrlSearchProcessor, + } + + def init(self, engine_list: list[dict[str, t.Any]]): + """Initialize all engines and registers a processor for each engine.""" + + for eng_settings in engine_list: + eng_name: str = eng_settings["name"] + + if eng_settings.get("inactive", False) is True: + logger.info("Engine of name '%s' is inactive.", eng_name) + continue + + eng_obj = engines.engines.get(eng_name) + if eng_obj is None: + logger.warning("Engine of name '%s' does not exists.", eng_name) + continue + + eng_type = getattr(eng_obj, "engine_type", "online") + proc_cls = self.processor_types.get(eng_type) + if proc_cls is None: + logger.error("Engine '%s' is of unknown engine_type: %s", eng_type) + continue + + # initialize (and register) the engine + eng_proc = proc_cls(eng_obj) + eng_proc.initialize(self.register_processor) + + def register_processor(self, eng_proc: EngineProcessor, eng_proc_ok: bool) -> bool: + """Register the :py:obj:`EngineProcessor`. + + This method is usually passed as a callback to the initialization of the + :py:obj:`EngineProcessor`. + + The value (true/false) passed in ``eng_proc_ok`` indicates whether the + initialization of the :py:obj:`EngineProcessor` was successful; if this + is not the case, the processor is not registered. + """ + + if eng_proc_ok: + self[eng_proc.engine.name] = eng_proc + # logger.debug("registered engine processor: %s", eng_proc.engine.name) + else: + logger.error("init method of engine %s failed (%s).", eng_proc.engine.name) + + return eng_proc_ok + + +PROCESSORS = ProcessorMap() +"""Global :py:obj:`ProcessorMap`. :meta hide-value: """ - - -def get_processor_class(engine_type: str) -> type[EngineProcessor] | None: - """Return processor class according to the ``engine_type``""" - for c in [ - OnlineProcessor, - OfflineProcessor, - OnlineDictionaryProcessor, - OnlineCurrencyProcessor, - OnlineUrlSearchProcessor, - ]: - if c.engine_type == engine_type: - return c - return None - - -def get_processor(engine: "Engine | ModuleType", engine_name: str) -> EngineProcessor | None: - """Return processor instance that fits to ``engine.engine.type``""" - engine_type = getattr(engine, 'engine_type', 'online') - processor_class = get_processor_class(engine_type) - if processor_class is not None: - return processor_class(engine, engine_name) - return None - - -def initialize_processor(processor: EngineProcessor): - """Initialize one processor - - Call the init function of the engine - """ - if processor.has_initialize_function: - _t = threading.Thread(target=processor.initialize, daemon=True) - _t.start() - - -def initialize(engine_list: list[dict[str, t.Any]]): - """Initialize all engines and store a processor for each engine in - :py:obj:`PROCESSORS`.""" - for engine_data in engine_list: - engine_name: str = engine_data['name'] - engine = engines.engines.get(engine_name) - if engine: - processor = get_processor(engine, engine_name) - if processor is None: - engine.logger.error('Error get processor for engine %s', engine_name) - else: - initialize_processor(processor) - PROCESSORS[engine_name] = processor diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 2dd56855a..ec94ed3bf 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -1,7 +1,5 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Abstract base classes for engine request processors. - -""" +"""Abstract base classes for all engine processors.""" import typing as t @@ -10,25 +8,75 @@ import threading from abc import abstractmethod, ABC from timeit import default_timer -from searx import settings, logger +from searx import get_setting +from searx import logger from searx.engines import engines from searx.network import get_time_for_thread, get_network from searx.metrics import histogram_observe, counter_inc, count_exception, count_error -from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException +from searx.exceptions import SearxEngineAccessDeniedException from searx.utils import get_engine_from_settings if t.TYPE_CHECKING: + import types from searx.enginelib import Engine + from searx.search.models import SearchQuery + from searx.results import ResultContainer + from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage] -logger = logger.getChild('searx.search.processor') -SUSPENDED_STATUS: dict[int | str, 'SuspendedStatus'] = {} + +logger = logger.getChild("searx.search.processor") +SUSPENDED_STATUS: dict[int | str, "SuspendedStatus"] = {} + + +class RequestParams(t.TypedDict): + """Basic quantity of the Request parameters of all engine types.""" + + query: str + """Search term, stripped of search syntax arguments.""" + + category: str + """Current category, like ``general``. + + .. hint:: + + This field is deprecated, don't use it in further implementations. + + This field is currently *arbitrarily* filled with the name of "one"" + category (the name of the first category of the engine). In practice, + however, it is not clear what this "one" category should be; in principle, + multiple categories can also be activated in a search. + """ + + pageno: int + """Current page number, where the first page is ``1``.""" + + safesearch: t.Literal[0, 1, 2] + """Safe-Search filter (0:normal, 1:moderate, 2:strict).""" + + time_range: t.Literal["day", "week", "month", "year"] | None + """Time-range filter.""" + + engine_data: dict[str, str] + """Allows the transfer of (engine specific) data to the next request of the + client. In the case of the ``online`` engines, this data is delivered to + the client via the HTML ``
`` in response. + + If the client then sends this form back to the server with the next request, + this data will be available. + + This makes it possible to carry data from one request to the next without a + session context, but this feature (is fragile) and should only be used in + exceptional cases. See also :ref:`engine_data`.""" + + searxng_locale: str + """Language / locale filter from the search request, a string like 'all', + 'en', 'en-US', 'zh-HK' .. and others, for more details see + :py:obj:`searx.locales`.""" class SuspendedStatus: """Class to handle suspend state.""" - __slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock' - def __init__(self): self.lock: threading.Lock = threading.Lock() self.continuous_errors: int = 0 @@ -39,18 +87,18 @@ class SuspendedStatus: def is_suspended(self): return self.suspend_end_time >= default_timer() - def suspend(self, suspended_time: int, suspend_reason: str): + def suspend(self, suspended_time: int | None, suspend_reason: str): with self.lock: # update continuous_errors / suspend_end_time self.continuous_errors += 1 if suspended_time is None: - suspended_time = min( - settings['search']['max_ban_time_on_fail'], - self.continuous_errors * settings['search']['ban_time_on_fail'], - ) + max_ban: int = get_setting("search.max_ban_time_on_fail") + ban_fail: int = get_setting("search.ban_time_on_fail") + suspended_time = min(max_ban, ban_fail) + self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason - logger.debug('Suspend for %i seconds', suspended_time) + logger.debug("Suspend for %i seconds", suspended_time) def resume(self): with self.lock: @@ -63,31 +111,63 @@ class SuspendedStatus: class EngineProcessor(ABC): """Base classes used for all types of request processors.""" - __slots__ = 'engine', 'engine_name', 'suspended_status', 'logger' + engine_type: str - def __init__(self, engine: "Engine|ModuleType", engine_name: str): - self.engine: "Engine" = engine - self.engine_name: str = engine_name - self.logger: logging.Logger = engines[engine_name].logger - key = get_network(self.engine_name) - key = id(key) if key else self.engine_name + def __init__(self, engine: "Engine|types.ModuleType"): + self.engine: "Engine" = engine # pyright: ignore[reportAttributeAccessIssue] + self.logger: logging.Logger = engines[engine.name].logger + key = get_network(self.engine.name) + key = id(key) if key else self.engine.name self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) - def initialize(self): + def initialize(self, callback: t.Callable[["EngineProcessor", bool], bool]): + """Initialization of *this* :py:obj:`EngineProcessor`. + + If processor's engine has an ``init`` method, it is called first. + Engine's ``init`` method is executed in a thread, meaning that the + *registration* (the ``callback``) may occur later and is not already + established by the return from this registration method. + + Registration only takes place if the ``init`` method is not available or + is successfully run through. + """ + + if not hasattr(self.engine, "init"): + callback(self, True) + return + + if not callable(self.engine.init): + logger.error("Engine's init method isn't a callable (is of type: %s).", type(self.engine.init)) + callback(self, False) + return + + def __init_processor_thread(): + eng_ok = self.init_engine() + callback(self, eng_ok) + + # set up and start a thread + threading.Thread(target=__init_processor_thread, daemon=True).start() + + def init_engine(self) -> bool: + eng_setting = get_engine_from_settings(self.engine.name) + init_ok: bool | None = False try: - self.engine.init(get_engine_from_settings(self.engine_name)) - except SearxEngineResponseException as exc: - self.logger.warning('Fail to initialize // %s', exc) + init_ok = self.engine.init(eng_setting) except Exception: # pylint: disable=broad-except - self.logger.exception('Fail to initialize') - else: - self.logger.debug('Initialized') + logger.exception("Init method of engine %s failed due to an exception.", self.engine.name) + init_ok = False + # In older engines, None is returned from the init method, which is + # equivalent to indicating that the initialization was successful. + if init_ok is None: + init_ok = True + return init_ok - @property - def has_initialize_function(self): - return hasattr(self.engine, 'init') - - def handle_exception(self, result_container, exception_or_message, suspend=False): + def handle_exception( + self, + result_container: "ResultContainer", + exception_or_message: BaseException | str, + suspend: bool = False, + ): # update result_container if isinstance(exception_or_message, BaseException): exception_class = exception_or_message.__class__ @@ -96,13 +176,13 @@ class EngineProcessor(ABC): error_message = module_name + exception_class.__qualname__ else: error_message = exception_or_message - result_container.add_unresponsive_engine(self.engine_name, error_message) + result_container.add_unresponsive_engine(self.engine.name, error_message) # metrics - counter_inc('engine', self.engine_name, 'search', 'count', 'error') + counter_inc('engine', self.engine.name, 'search', 'count', 'error') if isinstance(exception_or_message, BaseException): - count_exception(self.engine_name, exception_or_message) + count_exception(self.engine.name, exception_or_message) else: - count_error(self.engine_name, exception_or_message) + count_error(self.engine.name, exception_or_message) # suspend the engine ? if suspend: suspended_time = None @@ -110,51 +190,63 @@ class EngineProcessor(ABC): suspended_time = exception_or_message.suspended_time self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member - def _extend_container_basic(self, result_container, start_time, search_results): + def _extend_container_basic( + self, + result_container: "ResultContainer", + start_time: float, + search_results: "list[Result | LegacyResult]", + ): # update result_container - result_container.extend(self.engine_name, search_results) + result_container.extend(self.engine.name, search_results) engine_time = default_timer() - start_time page_load_time = get_time_for_thread() - result_container.add_timing(self.engine_name, engine_time, page_load_time) + result_container.add_timing(self.engine.name, engine_time, page_load_time) # metrics - counter_inc('engine', self.engine_name, 'search', 'count', 'successful') - histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total') + counter_inc('engine', self.engine.name, 'search', 'count', 'successful') + histogram_observe(engine_time, 'engine', self.engine.name, 'time', 'total') if page_load_time is not None: - histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http') + histogram_observe(page_load_time, 'engine', self.engine.name, 'time', 'http') - def extend_container(self, result_container, start_time, search_results): + def extend_container( + self, + result_container: "ResultContainer", + start_time: float, + search_results: "list[Result | LegacyResult]|None", + ): if getattr(threading.current_thread(), '_timeout', False): # the main thread is not waiting anymore - self.handle_exception(result_container, 'timeout', None) + self.handle_exception(result_container, 'timeout', False) else: # check if the engine accepted the request if search_results is not None: self._extend_container_basic(result_container, start_time, search_results) self.suspended_status.resume() - def extend_container_if_suspended(self, result_container): + def extend_container_if_suspended(self, result_container: "ResultContainer") -> bool: if self.suspended_status.is_suspended: result_container.add_unresponsive_engine( - self.engine_name, self.suspended_status.suspend_reason, suspended=True + self.engine.name, self.suspended_status.suspend_reason, suspended=True ) return True return False - def get_params(self, search_query, engine_category) -> dict[str, t.Any]: - """Returns a set of (see :ref:`request params `) or - ``None`` if request is not supported. + def get_params(self, search_query: "SearchQuery", engine_category: str) -> RequestParams | None: + """Returns a dictionary with the :ref:`request parameters ` (:py:obj:`RequestParams`), if the search condition + is not supported by the engine, ``None`` is returned: - Not supported conditions (``None`` is returned): + - *time range* filter in search conditions, but the engine does not have + a corresponding filter + - page number > 1 when engine does not support paging + - page number > ``max_page`` - - A page-number > 1 when engine does not support paging. - - A time range when the engine does not support time range. """ # if paging is not supported, skip if search_query.pageno > 1 and not self.engine.paging: return None # if max page is reached, skip - max_page = self.engine.max_page or settings['search']['max_page'] + max_page = self.engine.max_page or get_setting("search.max_page") if max_page and max_page < search_query.pageno: return None @@ -162,39 +254,45 @@ class EngineProcessor(ABC): if search_query.time_range and not self.engine.time_range_support: return None - params = {} - params["query"] = search_query.query - params['category'] = engine_category - params['pageno'] = search_query.pageno - params['safesearch'] = search_query.safesearch - params['time_range'] = search_query.time_range - params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) - params['searxng_locale'] = search_query.lang + params: RequestParams = { + "query": search_query.query, + "category": engine_category, + "pageno": search_query.pageno, + "safesearch": search_query.safesearch, + "time_range": search_query.time_range, + "engine_data": search_query.engine_data.get(self.engine.name, {}), + "searxng_locale": search_query.lang, + } - # deprecated / vintage --> use params['searxng_locale'] + # deprecated / vintage --> use params["searxng_locale"] # # Conditions related to engine's traits are implemented in engine.traits - # module. Don't do 'locale' decisions here in the abstract layer of the + # module. Don't do "locale" decisions here in the abstract layer of the # search processor, just pass the value from user's choice unchanged to # the engine request. - if hasattr(self.engine, 'language') and self.engine.language: - params['language'] = self.engine.language + if hasattr(self.engine, "language") and self.engine.language: + params["language"] = self.engine.language # pyright: ignore[reportGeneralTypeIssues] else: - params['language'] = search_query.lang + params["language"] = search_query.lang # pyright: ignore[reportGeneralTypeIssues] return params @abstractmethod - def search(self, query, params, result_container, start_time, timeout_limit): + def search( + self, + query: str, + params: RequestParams, + result_container: "ResultContainer", + start_time: float, + timeout_limit: float, + ): pass def get_tests(self): - tests = getattr(self.engine, 'tests', None) - if tests is None: - tests = getattr(self.engine, 'additional_tests', {}) - tests.update(self.get_default_tests()) - return tests + # deprecated! + return {} def get_default_tests(self): + # deprecated! return {} diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index 8835bfbf2..32e7164bf 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -1,26 +1,32 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Processors for engine-type: ``offline`` +"""Processors for engine-type: ``offline``""" -""" +import typing as t +from .abstract import EngineProcessor, RequestParams -from .abstract import EngineProcessor +if t.TYPE_CHECKING: + from searx.results import ResultContainer class OfflineProcessor(EngineProcessor): - """Processor class used by ``offline`` engines""" + """Processor class used by ``offline`` engines.""" - engine_type = 'offline' + engine_type: str = "offline" - def _search_basic(self, query, params): - return self.engine.search(query, params) - - def search(self, query, params, result_container, start_time, timeout_limit): + def search( + self, + query: str, + params: RequestParams, + result_container: "ResultContainer", + start_time: float, + timeout_limit: float, + ): try: - search_results = self._search_basic(query, params) + search_results = self.engine.search(query, params) self.extend_container(result_container, start_time, search_results) except ValueError as e: # do not record the error - self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) + self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine.name, e)) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) - self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) + self.logger.exception('engine {0} : exception : {1}'.format(self.engine.name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 778b4ac4d..23bb7fda0 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -1,8 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Processors for engine-type: ``online`` +"""Processor used for ``online`` engines.""" -""" -# pylint: disable=use-dict-literal +__all__ = ["OnlineProcessor", "OnlineParams"] + +import typing as t from timeit import default_timer import asyncio @@ -17,50 +18,132 @@ from searx.exceptions import ( SearxEngineTooManyRequestsException, ) from searx.metrics.error_recorder import count_error -from .abstract import EngineProcessor +from .abstract import EngineProcessor, RequestParams + +if t.TYPE_CHECKING: + from searx.search.models import SearchQuery + from searx.results import ResultContainer + from searx.result_types import EngineResults -def default_request_params(): +class HTTPParams(t.TypedDict): + """HTTP request parameters""" + + method: t.Literal["GET", "POST"] + """HTTP request method.""" + + headers: dict[str, str] + """HTTP header information.""" + + data: dict[str, str] + """Sending `form encoded data`_. + + .. _form encoded data: + https://www.python-httpx.org/quickstart/#sending-form-encoded-data + """ + + json: dict[str, t.Any] + """`Sending `JSON encoded data`_. + + .. _JSON encoded data: + https://www.python-httpx.org/quickstart/#sending-json-encoded-data + """ + + content: bytes + """`Sending `binary request data`_. + + .. _binary request data: + https://www.python-httpx.org/quickstart/#sending-json-encoded-data + """ + + url: str + """Requested url.""" + + cookies: dict[str, str] + """HTTP cookies.""" + + allow_redirects: bool + """Follow redirects""" + + max_redirects: int + """Maximum redirects, hard limit.""" + + soft_max_redirects: int + """Maximum redirects, soft limit. Record an error but don't stop the engine.""" + + verify: None | t.Literal[False] | str # not sure str really works + """If not ``None``, it overrides the verify value defined in the network. Use + ``False`` to accept any server certificate and use a path to file to specify a + server certificate""" + + auth: str | None + """An authentication to use when sending requests.""" + + raise_for_httperror: bool + """Raise an exception if the `HTTP response status code`_ is ``>= 300``. + + .. _HTTP response status code: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status + """ + + +class OnlineParams(HTTPParams, RequestParams): + """Request parameters of a ``online`` engine.""" + + +def default_request_params() -> HTTPParams: """Default request parameters for ``online`` engines.""" return { - # fmt: off - 'method': 'GET', - 'headers': {}, - 'data': {}, - 'url': '', - 'cookies': {}, - 'auth': None - # fmt: on + "method": "GET", + "headers": {}, + "data": {}, + "json": {}, + "content": b"", + "url": "", + "cookies": {}, + "allow_redirects": False, + "max_redirects": 0, + "soft_max_redirects": 0, + "auth": None, + "verify": None, + "raise_for_httperror": True, } class OnlineProcessor(EngineProcessor): """Processor class for ``online`` engines.""" - engine_type = 'online' + engine_type: str = "online" - def initialize(self): + def init_engine(self) -> bool: + """This method is called in a thread, and before the base method is + called, the network must be set up for the ``online`` engines.""" + self.init_network_in_thread(start_time=default_timer(), timeout_limit=self.engine.timeout) + return super().init_engine() + + def init_network_in_thread(self, start_time: float, timeout_limit: float): # set timeout for all HTTP requests - searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer()) + searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time) # reset the HTTP total time searx.network.reset_time_for_thread() # set the network - searx.network.set_context_network_name(self.engine_name) - super().initialize() + searx.network.set_context_network_name(self.engine.name) - def get_params(self, search_query, engine_category): - """Returns a set of :ref:`request params ` or ``None`` - if request is not supported. - """ - params = super().get_params(search_query, engine_category) - if params is None: - return None + def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineParams | None: + """Returns a dictionary with the :ref:`request params ` (:py:obj:`OnlineParams`), if the search condition is not + supported by the engine, ``None`` is returned.""" - # add default params - params.update(default_request_params()) + base_params: RequestParams | None = super().get_params(search_query, engine_category) + if base_params is None: + return base_params + + params: OnlineParams = {**default_request_params(), **base_params} + + headers = params["headers"] # add an user agent - params['headers']['User-Agent'] = gen_useragent() + headers["User-Agent"] = gen_useragent() # add Accept-Language header if self.engine.send_accept_language_header and search_query.locale: @@ -71,73 +154,77 @@ class OnlineProcessor(EngineProcessor): search_query.locale.territory, search_query.locale.language, ) - params['headers']['Accept-Language'] = ac_lang + headers["Accept-Language"] = ac_lang - self.logger.debug('HTTP Accept-Language: %s', params['headers'].get('Accept-Language', '')) + self.logger.debug("HTTP Accept-Language: %s", headers.get("Accept-Language", "")) return params - def _send_http_request(self, params): - # create dictionary which contain all - # information about the request - request_args = dict(headers=params['headers'], cookies=params['cookies'], auth=params['auth']) + def _send_http_request(self, params: OnlineParams): - # verify - # if not None, it overrides the verify value defined in the network. - # use False to accept any server certificate - # use a path to file to specify a server certificate - verify = params.get('verify') + # create dictionary which contain all information about the request + request_args: dict[str, t.Any] = { + "headers": params["headers"], + "cookies": params["cookies"], + "auth": params["auth"], + } + + verify = params.get("verify") if verify is not None: - request_args['verify'] = params['verify'] + request_args["verify"] = verify # max_redirects - max_redirects = params.get('max_redirects') + max_redirects = params.get("max_redirects") if max_redirects: - request_args['max_redirects'] = max_redirects + request_args["max_redirects"] = max_redirects # allow_redirects - if 'allow_redirects' in params: - request_args['allow_redirects'] = params['allow_redirects'] + if "allow_redirects" in params: + request_args["allow_redirects"] = params["allow_redirects"] # soft_max_redirects - soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0) + soft_max_redirects: int = params.get("soft_max_redirects", max_redirects or 0) # raise_for_status - request_args['raise_for_httperror'] = params.get('raise_for_httperror', True) + request_args["raise_for_httperror"] = params.get("raise_for_httperror", True) # specific type of request (GET or POST) - if params['method'] == 'GET': + if params["method"] == "GET": req = searx.network.get else: req = searx.network.post - - request_args['data'] = params['data'] + if params["data"]: + request_args["data"] = params["data"] + if params["json"]: + request_args["json"] = params["json"] + if params["content"]: + request_args["content"] = params["content"] # send the request - response = req(params['url'], **request_args) + response = req(params["url"], **request_args) # check soft limit of the redirect count if len(response.history) > soft_max_redirects: # unexpected redirect : record an error # but the engine might still return valid results. - status_code = str(response.status_code or '') - reason = response.reason_phrase or '' + status_code = str(response.status_code or "") + reason = response.reason_phrase or "" hostname = response.url.host count_error( - self.engine_name, - '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), + self.engine.name, + "{} redirects, maximum: {}".format(len(response.history), soft_max_redirects), (status_code, reason, hostname), secondary=True, ) return response - def _search_basic(self, query, params): + def _search_basic(self, query: str, params: OnlineParams) -> "EngineResults|None": # update request parameters dependent on # search-engine (contained in engines folder) self.engine.request(query, params) # ignoring empty urls - if not params['url']: + if not params["url"]: return None # send request @@ -147,13 +234,15 @@ class OnlineProcessor(EngineProcessor): response.search_params = params return self.engine.response(response) - def search(self, query, params, result_container, start_time, timeout_limit): - # set timeout for all HTTP requests - searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time) - # reset the HTTP total time - searx.network.reset_time_for_thread() - # set the network - searx.network.set_context_network_name(self.engine_name) + def search( # pyright: ignore[reportIncompatibleMethodOverride] + self, + query: str, + params: OnlineParams, + result_container: "ResultContainer", + start_time: float, + timeout_limit: float, + ): + self.init_network_in_thread(start_time, timeout_limit) try: # send requests and parse the results @@ -162,7 +251,7 @@ class OnlineProcessor(EngineProcessor): except ssl.SSLError as e: # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) - self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine_name).verify)) + self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine.name).verify)) except (httpx.TimeoutException, asyncio.TimeoutError) as e: # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) @@ -179,55 +268,13 @@ class OnlineProcessor(EngineProcessor): default_timer() - start_time, timeout_limit, e ) ) - except SearxEngineCaptchaException as e: + except ( + SearxEngineCaptchaException, + SearxEngineTooManyRequestsException, + SearxEngineAccessDeniedException, + ) as e: self.handle_exception(result_container, e, suspend=True) - self.logger.exception('CAPTCHA') - except SearxEngineTooManyRequestsException as e: - self.handle_exception(result_container, e, suspend=True) - self.logger.exception('Too many requests') - except SearxEngineAccessDeniedException as e: - self.handle_exception(result_container, e, suspend=True) - self.logger.exception('SearXNG is blocked') + self.logger.exception(e.message) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) - self.logger.exception('exception : {0}'.format(e)) - - def get_default_tests(self): - tests = {} - - tests['simple'] = { - 'matrix': {'query': ('life', 'computer')}, - 'result_container': ['not_empty'], - } - - if getattr(self.engine, 'paging', False): - tests['paging'] = { - 'matrix': {'query': 'time', 'pageno': (1, 2, 3)}, - 'result_container': ['not_empty'], - 'test': ['unique_results'], - } - if 'general' in self.engine.categories: - # avoid documentation about HTML tags (