[mod] addition of various type hints / engine processors

Continuation of #5147 .. typification of the engine processors.

BTW:

- removed obsolete engine property https_support
- fixed & improved currency_convert
- engine instances can now implement a engine.setup method

[#5147] https://github.com/searxng/searxng/pull/5147

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-11 19:10:27 +02:00 committed by Markus Heiser
parent 23257bddce
commit 8f8343dc0d
28 changed files with 814 additions and 522 deletions

View File

@ -144,9 +144,9 @@ parameters with default value can be redefined for special purposes.
====================== ============== ========================================================================
url str ``''``
method str ``'GET'``
headers set ``{}``
data set ``{}``
cookies set ``{}``
headers dict ``{}``
data dict ``{}``
cookies dict ``{}``
verify bool ``True``
headers.User-Agent str a random User-Agent
category str current category, like ``'general'``
@ -226,9 +226,9 @@ following parameters can be used to specify a search request:
=================== =========== ==========================================================================
url str requested url
method str HTTP request method
headers set HTTP header information
data set HTTP data information
cookies set HTTP cookies
headers dict HTTP header information
data dict HTTP data information
cookies dict HTTP cookies
verify bool Performing SSL-Validity check
allow_redirects bool Follow redirects
max_redirects int maximum redirects, hard limit
@ -249,6 +249,3 @@ by templates. For more details read section:
- :ref:`simple theme templates`
- :ref:`result types`

View File

@ -1,22 +1,23 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store currencies data in a SQL database."""
__all__ = ["CurrenciesDB"]
import typing as t
import json
import pathlib
from .core import get_cache, log
@t.final
class CurrenciesDB:
# pylint: disable=missing-class-docstring
ctx_names = "data_currencies_names"
ctx_iso4217 = "data_currencies_iso4217"
ctx_names: str = "data_currencies_names"
ctx_iso4217: str = "data_currencies_iso4217"
json_file = pathlib.Path(__file__).parent / "currencies.json"
json_file: pathlib.Path = pathlib.Path(__file__).parent / "currencies.json"
def __init__(self):
self.cache = get_cache()
@ -33,23 +34,27 @@ class CurrenciesDB:
def load(self):
log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f:
data_dict = json.load(f)
data_dict: dict[str, dict[str, str]] = json.load(f)
for key, value in data_dict["names"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
for key, value in data_dict["iso4217"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
def name_to_iso4217(self, name):
def name_to_iso4217(self, name: str) -> str | None:
self.init()
ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names)
ret_val: str | list[str] | None = self.cache.get(key=name, default=None, ctx=self.ctx_names)
if isinstance(ret_val, list):
# if more alternatives, use the last in the list
ret_val = ret_val[-1]
return ret_val
def iso4217_to_name(self, iso4217, language):
def iso4217_to_name(self, iso4217: str, language: str) -> str | None:
self.init()
iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217)
return iso4217_languages.get(language, iso4217)
iso4217_languages: dict[str, str] = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217)
return iso4217_languages.get(language)
def is_iso4217(self, iso4217: str) -> bool:
item = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217)
return bool(item)

View File

@ -39,6 +39,7 @@ if t.TYPE_CHECKING:
from searx.enginelib.traits import EngineTraits
from searx.extended_types import SXNG_Response
from searx.result_types import EngineResults
from searx.search.processors import OfflineParamTypes, OnlineParamTypes
ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
ExpireCacheCfg(
@ -195,6 +196,10 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
paging: bool
"""Engine supports multiple pages."""
max_page: int = 0
"""If the engine supports paging, then this is the value for the last page
that is still supported. ``0`` means unlimited numbers of pages."""
time_range_support: bool
"""Engine supports search time range."""
@ -304,14 +309,49 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
weight: int
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter]
"""Initialization of the engine. If no initialization is needed, drop
this init function."""
def setup(self, engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
"""Dynamic setup of the engine settings.
With this method, the engine's setup is carried out. For example, to
check or dynamically adapt the values handed over in the parameter
``engine_settings``. The return value (True/False) indicates whether
the setup was successful and the engine can be built or rejected.
The method is optional and is called synchronously as part of the
initialization of the service and is therefore only suitable for simple
(local) exams/changes at the engine setting. The :py:obj:`Engine.init`
method must be used for longer tasks in which values of a remote must be
determined, for example.
"""
return True
def init(self, engine_settings: dict[str, t.Any]) -> bool | None: # pylint: disable=unused-argument
"""Initialization of the engine.
The method is optional and asynchronous (in a thread). It is suitable,
for example, for setting up a cache (for the engine) or for querying
values (required by the engine) from a remote.
Whether the initialization was successful can be indicated by the return
value ``True`` or even ``False``.
- If no return value is given from this init method (``None``), this is
equivalent to ``True``.
- If an exception is thrown as part of the initialization, this is
equivalent to ``False``.
"""
return True
@abc.abstractmethod
def request(self, query: str, params: dict[str, t.Any]) -> None:
"""Build up the params for the online request."""
def search(self, query: str, params: "OfflineParamTypes") -> "EngineResults":
"""Search method of the ``offline`` engines"""
@abc.abstractmethod
def request(self, query: str, params: "OnlineParamTypes") -> None:
"""Method to build the parameters for the request of an ``online``
engine."""
@abc.abstractmethod
def response(self, resp: "SXNG_Response") -> "EngineResults":
"""Parse out the result items from the response."""
"""Method to parse the response of an ``online`` engine."""

View File

@ -51,7 +51,10 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool
DEFAULT_CATEGORY = 'other'
categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []}
engines: "dict[str, Engine | types.ModuleType]" = {}
"""Global registered engine instances."""
engine_shortcuts = {}
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
@ -144,6 +147,9 @@ def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | N
set_loggers(engine, engine_name)
if not call_engine_setup(engine, engine_data):
return None
if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
engine.categories.append(DEFAULT_CATEGORY)
@ -223,6 +229,25 @@ def is_engine_active(engine: "Engine | types.ModuleType"):
return True
def call_engine_setup(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]) -> bool:
setup_ok = False
setup_func = getattr(engine, "setup", None)
if setup_func is None:
setup_ok = True
elif not callable(setup_func):
logger.error("engine's setup method isn't a callable (is of type: %s)", type(setup_func))
else:
try:
setup_ok = engine.setup(engine_data)
except Exception as e: # pylint: disable=broad-except
logger.exception('exception : {0}'.format(e))
if not setup_ok:
logger.error("%s: Engine setup was not successful, engine is set to inactive.", engine.name)
return setup_ok
def register_engine(engine: "Engine | types.ModuleType"):
if engine.name in engines:
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))

View File

@ -1,53 +1,58 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Currency convert (DuckDuckGo)
"""
"""Currency convert (DuckDuckGo)"""
import typing as t
import json
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.search.processors import OnlineCurrenciesParams
from searx.extended_types import SXNG_Response
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": 'https://duckduckgo.com/api',
"website": "https://duckduckgo.com/",
"wikidata_id": "Q12805",
"official_api_documentation": "https://duckduckgo.com/api",
"use_official_api": False,
"require_api_key": False,
"results": 'JSONP',
"results": "JSONP",
"description": "Service from DuckDuckGo.",
}
engine_type = 'online_currency'
categories = []
base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
engine_type = "online_currency"
categories = ["currency", "general"]
base_url = "https://duckduckgo.com/js/spice/currency/1/%(from_iso4217)s/%(to_iso4217)s"
ddg_link_url = "https://duckduckgo.com/?q=%(from_iso4217)s+to+%(to_iso4217)s"
weight = 100
https_support = True
def request(query: str, params: "OnlineCurrenciesParams") -> None: # pylint: disable=unused-argument
params["url"] = base_url % params
def request(_query, params):
params['url'] = base_url.format(params['from'], params['to'])
return params
def response(resp) -> EngineResults:
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
# remove first and last lines to get only json
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
json_resp = resp.text[resp.text.find("\n") + 1 : resp.text.rfind("\n") - 2]
try:
conversion_rate = float(json.loads(json_resp)["to"][0]["mid"])
except IndexError:
return res
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
resp.search_params['amount'],
resp.search_params['from'],
resp.search_params['amount'] * conversion_rate,
resp.search_params['to'],
conversion_rate,
resp.search_params['from_name'],
resp.search_params['to_name'],
)
url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}"
params: OnlineCurrenciesParams = resp.search_params # pyright: ignore[reportAssignmentType]
answer = "{0} {1} = {2} {3} (1 {5} : {4} {6})".format(
params["amount"],
params["from_iso4217"],
params["amount"] * conversion_rate,
params["to_iso4217"],
conversion_rate,
params["from_name"],
params["to_name"],
)
url = ddg_link_url % params
res.add(res.types.Answer(answer=answer, url=url))
return res

View File

@ -24,7 +24,6 @@ engine_type = 'online_dictionary'
categories = ['general', 'translate']
base_url = "https://dictzone.com"
weight = 100
https_support = True
def request(query, params): # pylint: disable=unused-argument

View File

@ -3,7 +3,6 @@
"""
from urllib.parse import urlunparse
from json import dumps
# about
about = {
@ -56,7 +55,7 @@ def request(query, params):
query_data = query_data_template
query_data["query"]["multi_match"]["query"] = query
query_data["from"] = (params["pageno"] - 1) * number_of_results
params["data"] = dumps(query_data)
params["json"] = query_data
return params

View File

@ -22,7 +22,6 @@ categories = ['general', 'translate']
api_url = "https://api.mymemory.translated.net"
web_url = "https://mymemory.translated.net"
weight = 100
https_support = True
api_key = ''

View File

@ -74,9 +74,9 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
"""
if suspended_time is None:
suspended_time = self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time))
self.message: str = f"{message} (suspended_time={suspended_time})"
self.suspended_time: int = suspended_time
self.message: str = message
super().__init__(self.message)
def _get_default_suspended_time(self) -> int:
from searx import get_setting # pylint: disable=C0415

View File

@ -30,6 +30,7 @@ import httpx
if typing.TYPE_CHECKING:
import searx.preferences
import searx.results
from searx.search.processors import ParamTypes
class SXNG_Request(flask.Request):
@ -78,6 +79,8 @@ class SXNG_Response(httpx.Response):
response = typing.cast(SXNG_Response, response)
if response.ok:
...
query_was = search_params["query"]
"""
ok: bool
search_params: "ParamTypes"

View File

@ -24,17 +24,6 @@ LogParametersType = tuple[str, ...]
class ErrorContext: # pylint: disable=missing-class-docstring
__slots__ = (
'filename',
'function',
'line_no',
'code',
'exception_classname',
'log_message',
'log_parameters',
'secondary',
)
def __init__( # pylint: disable=too-many-arguments
self,
filename: str,
@ -159,7 +148,7 @@ def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many-
return ()
def get_exception_classname(exc: Exception) -> str:
def get_exception_classname(exc: BaseException) -> str:
exc_class = exc.__class__
exc_name = exc_class.__qualname__
exc_module = exc_class.__module__
@ -182,7 +171,7 @@ def get_error_context(
return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
def count_exception(engine_name: str, exc: BaseException, secondary: bool = False) -> None:
if not settings['general']['enable_metrics']:
return
framerecords = inspect.trace()

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, global-statement
__all__ = ["initialize", "check_network_configuration", "raise_for_httperror"]
__all__ = ["get_network", "initialize", "check_network_configuration", "raise_for_httperror"]
import typing as t
@ -22,6 +22,8 @@ from .network import get_network, initialize, check_network_configuration # pyl
from .client import get_loop
from .raise_for_httperror import raise_for_httperror
if t.TYPE_CHECKING:
from searx.network.network import Network
THREADLOCAL = threading.local()
"""Thread-local data is data for thread specific values."""
@ -31,7 +33,7 @@ def reset_time_for_thread():
THREADLOCAL.total_time = 0
def get_time_for_thread():
def get_time_for_thread() -> float | None:
"""returns thread's total time or None"""
return THREADLOCAL.__dict__.get('total_time')
@ -45,7 +47,7 @@ def set_context_network_name(network_name: str):
THREADLOCAL.network = get_network(network_name)
def get_context_network():
def get_context_network() -> "Network":
"""If set return thread's network.
If unset, return value from :py:obj:`get_network`.
@ -68,7 +70,7 @@ def _record_http_time():
THREADLOCAL.total_time += time_after_request - time_before_request
def _get_timeout(start_time: float, kwargs):
def _get_timeout(start_time: float, kwargs: t.Any) -> float:
# pylint: disable=too-many-branches
timeout: float | None
@ -91,7 +93,7 @@ def _get_timeout(start_time: float, kwargs):
return timeout
def request(method, url, **kwargs) -> SXNG_Response:
def request(method: str, url: str, **kwargs: t.Any) -> SXNG_Response:
"""same as requests/requests/api.py request(...)"""
with _record_http_time() as start_time:
network = get_context_network()
@ -183,15 +185,15 @@ def head(url: str, **kwargs: t.Any) -> SXNG_Response:
return request('head', url, **kwargs)
def post(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
def post(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response:
return request('post', url, data=data, **kwargs)
def put(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
def put(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response:
return request('put', url, data=data, **kwargs)
def patch(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
def patch(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response:
return request('patch', url, data=data, **kwargs)
@ -250,7 +252,7 @@ def _close_response_method(self):
continue
def stream(method: str, url: str, **kwargs: t.Any) -> tuple[httpx.Response, Iterable[bytes]]:
def stream(method: str, url: str, **kwargs: t.Any) -> tuple[SXNG_Response, Iterable[bytes]]:
"""Replace httpx.stream.
Usage:

View File

@ -138,7 +138,7 @@ def get_transport_for_socks_proxy(
password=proxy_password,
rdns=rdns,
loop=get_loop(),
verify=_verify,
verify=_verify, # pyright: ignore[reportArgumentType]
http2=http2,
local_address=local_address,
limits=limit,

View File

@ -1,8 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=global-statement
# pylint: disable=missing-module-docstring, missing-class-docstring
__all__ = ["get_network"]
import typing as t
from collections.abc import Generator, AsyncIterator
from collections.abc import Generator
import atexit
import asyncio
@ -74,7 +78,7 @@ class Network:
using_tor_proxy: bool = False,
local_addresses: str | list[str] | None = None,
retries: int = 0,
retry_on_http_error: None = None,
retry_on_http_error: bool = False,
max_redirects: int = 30,
logger_name: str = None, # pyright: ignore[reportArgumentType]
):
@ -232,14 +236,14 @@ class Network:
return kwargs_clients
@staticmethod
def extract_do_raise_for_httperror(kwargs):
def extract_do_raise_for_httperror(kwargs: dict[str, t.Any]):
do_raise_for_httperror = True
if 'raise_for_httperror' in kwargs:
do_raise_for_httperror = kwargs['raise_for_httperror']
del kwargs['raise_for_httperror']
return do_raise_for_httperror
def patch_response(self, response: httpx.Response | SXNG_Response, do_raise_for_httperror: bool) -> SXNG_Response:
def patch_response(self, response: httpx.Response, do_raise_for_httperror: bool) -> SXNG_Response:
if isinstance(response, httpx.Response):
response = t.cast(SXNG_Response, response)
# requests compatibility (response is not streamed)
@ -255,7 +259,7 @@ class Network:
raise
return response
def is_valid_response(self, response: SXNG_Response):
def is_valid_response(self, response: httpx.Response):
# pylint: disable=too-many-boolean-expressions
if (
(self.retry_on_http_error is True and 400 <= response.status_code <= 599)
@ -265,9 +269,7 @@ class Network:
return False
return True
async def call_client(
self, stream: bool, method: str, url: str, **kwargs: t.Any
) -> AsyncIterator[SXNG_Response] | None:
async def call_client(self, stream: bool, method: str, url: str, **kwargs: t.Any) -> SXNG_Response:
retries = self.retries
was_disconnected = False
do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
@ -278,9 +280,9 @@ class Network:
client.cookies = httpx.Cookies(cookies)
try:
if stream:
response = client.stream(method, url, **kwargs) # pyright: ignore[reportAny]
response = client.stream(method, url, **kwargs)
else:
response = await client.request(method, url, **kwargs) # pyright: ignore[reportAny]
response = await client.request(method, url, **kwargs)
if self.is_valid_response(response) or retries <= 0:
return self.patch_response(response, do_raise_for_httperror)
except httpx.RemoteProtocolError as e:
@ -298,7 +300,7 @@ class Network:
raise e
retries -= 1
async def request(self, method: str, url: str, **kwargs):
async def request(self, method: str, url: str, **kwargs: t.Any) -> SXNG_Response:
return await self.call_client(False, method, url, **kwargs)
async def stream(self, method: str, url: str, **kwargs):
@ -358,7 +360,7 @@ def initialize(
'proxies': settings_outgoing['proxies'],
'max_redirects': settings_outgoing['max_redirects'],
'retries': settings_outgoing['retries'],
'retry_on_http_error': None,
'retry_on_http_error': False,
}
def new_network(params: dict[str, t.Any], logger_name: str | None = None):

View File

@ -1,8 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, too-few-public-methods
# the public namespace has not yet been finally defined ..
# __all__ = [..., ]
__all__ = ["SearchWithPlugins"]
import typing as t
@ -22,7 +21,7 @@ from searx.metrics import initialize as initialize_metrics, counter_inc
from searx.network import initialize as initialize_network, check_network_configuration
from searx.results import ResultContainer
from searx.search.checker import initialize as initialize_checker
from searx.search.processors import PROCESSORS, initialize as initialize_processors
from searx.search.processors import PROCESSORS
if t.TYPE_CHECKING:
@ -44,7 +43,7 @@ def initialize(
if check_network:
check_network_configuration()
initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics)
initialize_processors(settings_engines)
PROCESSORS.init(settings_engines)
if enable_checker:
initialize_checker()
@ -52,8 +51,6 @@ def initialize(
class Search:
"""Search information container"""
__slots__ = "search_query", "result_container", "start_time", "actual_timeout" # type: ignore
def __init__(self, search_query: "SearchQuery"):
"""Initialize the Search"""
# init vars
@ -185,8 +182,6 @@ class Search:
class SearchWithPlugins(Search):
"""Inherit from the Search class, add calls to the plugins."""
__slots__ = 'user_plugins', 'request'
def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]):
super().__init__(search_query)
self.user_plugins = user_plugins

View File

@ -24,42 +24,29 @@ class EngineRef:
return hash((self.name, self.category))
@typing.final
class SearchQuery:
"""container for all the search parameters (query, language, etc...)"""
__slots__ = (
'query',
'engineref_list',
'lang',
'locale',
'safesearch',
'pageno',
'time_range',
'timeout_limit',
'external_bang',
'engine_data',
'redirect_to_first_result',
)
def __init__(
self,
query: str,
engineref_list: typing.List[EngineRef],
engineref_list: list[EngineRef],
lang: str = 'all',
safesearch: int = 0,
safesearch: typing.Literal[0, 1, 2] = 0,
pageno: int = 1,
time_range: typing.Optional[str] = None,
timeout_limit: typing.Optional[float] = None,
external_bang: typing.Optional[str] = None,
engine_data: typing.Optional[typing.Dict[str, str]] = None,
redirect_to_first_result: typing.Optional[bool] = None,
time_range: typing.Literal["day", "week", "month", "year"] | None = None,
timeout_limit: float | None = None,
external_bang: str | None = None,
engine_data: dict[str, dict[str, str]] | None = None,
redirect_to_first_result: bool | None = None,
): # pylint:disable=too-many-arguments
self.query = query
self.engineref_list = engineref_list
self.lang = lang
self.safesearch = safesearch
self.safesearch: typing.Literal[0, 1, 2] = safesearch
self.pageno = pageno
self.time_range = time_range
self.time_range: typing.Literal["day", "week", "month", "year"] | None = time_range
self.timeout_limit = timeout_limit
self.external_bang = external_bang
self.engine_data = engine_data or {}

View File

@ -2,83 +2,95 @@
"""Implement request processors used by engine-types."""
__all__ = [
'EngineProcessor',
'OfflineProcessor',
'OnlineProcessor',
'OnlineDictionaryProcessor',
'OnlineCurrencyProcessor',
'OnlineUrlSearchProcessor',
'PROCESSORS',
"OfflineParamTypes",
"OnlineCurrenciesParams",
"OnlineDictParams",
"OnlineParamTypes",
"OnlineParams",
"OnlineUrlSearchParams",
"PROCESSORS",
"ParamTypes",
"RequestParams",
]
import typing as t
import threading
from searx import logger
from searx import engines
from .online import OnlineProcessor
from .abstract import EngineProcessor, RequestParams
from .offline import OfflineProcessor
from .online_dictionary import OnlineDictionaryProcessor
from .online_currency import OnlineCurrencyProcessor
from .online_url_search import OnlineUrlSearchProcessor
from .abstract import EngineProcessor
from .online import OnlineProcessor, OnlineParams
from .online_dictionary import OnlineDictionaryProcessor, OnlineDictParams
from .online_currency import OnlineCurrencyProcessor, OnlineCurrenciesParams
from .online_url_search import OnlineUrlSearchProcessor, OnlineUrlSearchParams
if t.TYPE_CHECKING:
from searx.enginelib import Engine
logger = logger.getChild("search.processors")
logger = logger.getChild('search.processors')
PROCESSORS: dict[str, EngineProcessor] = {}
"""Cache request processors, stored by *engine-name* (:py:func:`initialize`)
OnlineParamTypes: t.TypeAlias = OnlineParams | OnlineDictParams | OnlineCurrenciesParams | OnlineUrlSearchParams
OfflineParamTypes: t.TypeAlias = RequestParams
ParamTypes: t.TypeAlias = OfflineParamTypes | OnlineParamTypes
class ProcessorMap(dict[str, EngineProcessor]):
"""Class to manage :py:obj:`EngineProcessor` instances in a key/value map
(instances stored by *engine-name*)."""
processor_types: dict[str, type[EngineProcessor]] = {
OnlineProcessor.engine_type: OnlineProcessor,
OfflineProcessor.engine_type: OfflineProcessor,
OnlineDictionaryProcessor.engine_type: OnlineDictionaryProcessor,
OnlineCurrencyProcessor.engine_type: OnlineCurrencyProcessor,
OnlineUrlSearchProcessor.engine_type: OnlineUrlSearchProcessor,
}
def init(self, engine_list: list[dict[str, t.Any]]):
"""Initialize all engines and registers a processor for each engine."""
for eng_settings in engine_list:
eng_name: str = eng_settings["name"]
if eng_settings.get("inactive", False) is True:
logger.info("Engine of name '%s' is inactive.", eng_name)
continue
eng_obj = engines.engines.get(eng_name)
if eng_obj is None:
logger.warning("Engine of name '%s' does not exists.", eng_name)
continue
eng_type = getattr(eng_obj, "engine_type", "online")
proc_cls = self.processor_types.get(eng_type)
if proc_cls is None:
logger.error("Engine '%s' is of unknown engine_type: %s", eng_type)
continue
# initialize (and register) the engine
eng_proc = proc_cls(eng_obj)
eng_proc.initialize(self.register_processor)
def register_processor(self, eng_proc: EngineProcessor, eng_proc_ok: bool) -> bool:
"""Register the :py:obj:`EngineProcessor`.
This method is usually passed as a callback to the initialization of the
:py:obj:`EngineProcessor`.
The value (true/false) passed in ``eng_proc_ok`` indicates whether the
initialization of the :py:obj:`EngineProcessor` was successful; if this
is not the case, the processor is not registered.
"""
if eng_proc_ok:
self[eng_proc.engine.name] = eng_proc
# logger.debug("registered engine processor: %s", eng_proc.engine.name)
else:
logger.error("init method of engine %s failed (%s).", eng_proc.engine.name)
return eng_proc_ok
PROCESSORS = ProcessorMap()
"""Global :py:obj:`ProcessorMap`.
:meta hide-value:
"""
def get_processor_class(engine_type: str) -> type[EngineProcessor] | None:
"""Return processor class according to the ``engine_type``"""
for c in [
OnlineProcessor,
OfflineProcessor,
OnlineDictionaryProcessor,
OnlineCurrencyProcessor,
OnlineUrlSearchProcessor,
]:
if c.engine_type == engine_type:
return c
return None
def get_processor(engine: "Engine | ModuleType", engine_name: str) -> EngineProcessor | None:
"""Return processor instance that fits to ``engine.engine.type``"""
engine_type = getattr(engine, 'engine_type', 'online')
processor_class = get_processor_class(engine_type)
if processor_class is not None:
return processor_class(engine, engine_name)
return None
def initialize_processor(processor: EngineProcessor):
"""Initialize one processor
Call the init function of the engine
"""
if processor.has_initialize_function:
_t = threading.Thread(target=processor.initialize, daemon=True)
_t.start()
def initialize(engine_list: list[dict[str, t.Any]]):
"""Initialize all engines and store a processor for each engine in
:py:obj:`PROCESSORS`."""
for engine_data in engine_list:
engine_name: str = engine_data['name']
engine = engines.engines.get(engine_name)
if engine:
processor = get_processor(engine, engine_name)
if processor is None:
engine.logger.error('Error get processor for engine %s', engine_name)
else:
initialize_processor(processor)
PROCESSORS[engine_name] = processor

View File

@ -1,7 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Abstract base classes for engine request processors.
"""
"""Abstract base classes for all engine processors."""
import typing as t
@ -10,25 +8,75 @@ import threading
from abc import abstractmethod, ABC
from timeit import default_timer
from searx import settings, logger
from searx import get_setting
from searx import logger
from searx.engines import engines
from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
from searx.exceptions import SearxEngineAccessDeniedException
from searx.utils import get_engine_from_settings
if t.TYPE_CHECKING:
import types
from searx.enginelib import Engine
from searx.search.models import SearchQuery
from searx.results import ResultContainer
from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
logger = logger.getChild('searx.search.processor')
SUSPENDED_STATUS: dict[int | str, 'SuspendedStatus'] = {}
logger = logger.getChild("searx.search.processor")
SUSPENDED_STATUS: dict[int | str, "SuspendedStatus"] = {}
class RequestParams(t.TypedDict):
"""Basic quantity of the Request parameters of all engine types."""
query: str
"""Search term, stripped of search syntax arguments."""
category: str
"""Current category, like ``general``.
.. hint::
This field is deprecated, don't use it in further implementations.
This field is currently *arbitrarily* filled with the name of "one""
category (the name of the first category of the engine). In practice,
however, it is not clear what this "one" category should be; in principle,
multiple categories can also be activated in a search.
"""
pageno: int
"""Current page number, where the first page is ``1``."""
safesearch: t.Literal[0, 1, 2]
"""Safe-Search filter (0:normal, 1:moderate, 2:strict)."""
time_range: t.Literal["day", "week", "month", "year"] | None
"""Time-range filter."""
engine_data: dict[str, str]
"""Allows the transfer of (engine specific) data to the next request of the
client. In the case of the ``online`` engines, this data is delivered to
the client via the HTML ``<form>`` in response.
If the client then sends this form back to the server with the next request,
this data will be available.
This makes it possible to carry data from one request to the next without a
session context, but this feature (is fragile) and should only be used in
exceptional cases. See also :ref:`engine_data`."""
searxng_locale: str
"""Language / locale filter from the search request, a string like 'all',
'en', 'en-US', 'zh-HK' .. and others, for more details see
:py:obj:`searx.locales`."""
class SuspendedStatus:
"""Class to handle suspend state."""
__slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
def __init__(self):
self.lock: threading.Lock = threading.Lock()
self.continuous_errors: int = 0
@ -39,18 +87,18 @@ class SuspendedStatus:
def is_suspended(self):
return self.suspend_end_time >= default_timer()
def suspend(self, suspended_time: int, suspend_reason: str):
def suspend(self, suspended_time: int | None, suspend_reason: str):
with self.lock:
# update continuous_errors / suspend_end_time
self.continuous_errors += 1
if suspended_time is None:
suspended_time = min(
settings['search']['max_ban_time_on_fail'],
self.continuous_errors * settings['search']['ban_time_on_fail'],
)
max_ban: int = get_setting("search.max_ban_time_on_fail")
ban_fail: int = get_setting("search.ban_time_on_fail")
suspended_time = min(max_ban, ban_fail)
self.suspend_end_time = default_timer() + suspended_time
self.suspend_reason = suspend_reason
logger.debug('Suspend for %i seconds', suspended_time)
logger.debug("Suspend for %i seconds", suspended_time)
def resume(self):
with self.lock:
@ -63,31 +111,63 @@ class SuspendedStatus:
class EngineProcessor(ABC):
"""Base classes used for all types of request processors."""
__slots__ = 'engine', 'engine_name', 'suspended_status', 'logger'
engine_type: str
def __init__(self, engine: "Engine|ModuleType", engine_name: str):
self.engine: "Engine" = engine
self.engine_name: str = engine_name
self.logger: logging.Logger = engines[engine_name].logger
key = get_network(self.engine_name)
key = id(key) if key else self.engine_name
def __init__(self, engine: "Engine|types.ModuleType"):
self.engine: "Engine" = engine # pyright: ignore[reportAttributeAccessIssue]
self.logger: logging.Logger = engines[engine.name].logger
key = get_network(self.engine.name)
key = id(key) if key else self.engine.name
self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
def initialize(self):
def initialize(self, callback: t.Callable[["EngineProcessor", bool], bool]):
"""Initialization of *this* :py:obj:`EngineProcessor`.
If processor's engine has an ``init`` method, it is called first.
Engine's ``init`` method is executed in a thread, meaning that the
*registration* (the ``callback``) may occur later and is not already
established by the return from this registration method.
Registration only takes place if the ``init`` method is not available or
is successfully run through.
"""
if not hasattr(self.engine, "init"):
callback(self, True)
return
if not callable(self.engine.init):
logger.error("Engine's init method isn't a callable (is of type: %s).", type(self.engine.init))
callback(self, False)
return
def __init_processor_thread():
eng_ok = self.init_engine()
callback(self, eng_ok)
# set up and start a thread
threading.Thread(target=__init_processor_thread, daemon=True).start()
def init_engine(self) -> bool:
eng_setting = get_engine_from_settings(self.engine.name)
init_ok: bool | None = False
try:
self.engine.init(get_engine_from_settings(self.engine_name))
except SearxEngineResponseException as exc:
self.logger.warning('Fail to initialize // %s', exc)
init_ok = self.engine.init(eng_setting)
except Exception: # pylint: disable=broad-except
self.logger.exception('Fail to initialize')
else:
self.logger.debug('Initialized')
logger.exception("Init method of engine %s failed due to an exception.", self.engine.name)
init_ok = False
# In older engines, None is returned from the init method, which is
# equivalent to indicating that the initialization was successful.
if init_ok is None:
init_ok = True
return init_ok
@property
def has_initialize_function(self):
return hasattr(self.engine, 'init')
def handle_exception(self, result_container, exception_or_message, suspend=False):
def handle_exception(
self,
result_container: "ResultContainer",
exception_or_message: BaseException | str,
suspend: bool = False,
):
# update result_container
if isinstance(exception_or_message, BaseException):
exception_class = exception_or_message.__class__
@ -96,13 +176,13 @@ class EngineProcessor(ABC):
error_message = module_name + exception_class.__qualname__
else:
error_message = exception_or_message
result_container.add_unresponsive_engine(self.engine_name, error_message)
result_container.add_unresponsive_engine(self.engine.name, error_message)
# metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'error')
counter_inc('engine', self.engine.name, 'search', 'count', 'error')
if isinstance(exception_or_message, BaseException):
count_exception(self.engine_name, exception_or_message)
count_exception(self.engine.name, exception_or_message)
else:
count_error(self.engine_name, exception_or_message)
count_error(self.engine.name, exception_or_message)
# suspend the engine ?
if suspend:
suspended_time = None
@ -110,51 +190,63 @@ class EngineProcessor(ABC):
suspended_time = exception_or_message.suspended_time
self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member
def _extend_container_basic(self, result_container, start_time, search_results):
def _extend_container_basic(
self,
result_container: "ResultContainer",
start_time: float,
search_results: "list[Result | LegacyResult]",
):
# update result_container
result_container.extend(self.engine_name, search_results)
result_container.extend(self.engine.name, search_results)
engine_time = default_timer() - start_time
page_load_time = get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time)
result_container.add_timing(self.engine.name, engine_time, page_load_time)
# metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'successful')
histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total')
counter_inc('engine', self.engine.name, 'search', 'count', 'successful')
histogram_observe(engine_time, 'engine', self.engine.name, 'time', 'total')
if page_load_time is not None:
histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
histogram_observe(page_load_time, 'engine', self.engine.name, 'time', 'http')
def extend_container(self, result_container, start_time, search_results):
def extend_container(
self,
result_container: "ResultContainer",
start_time: float,
search_results: "list[Result | LegacyResult]|None",
):
if getattr(threading.current_thread(), '_timeout', False):
# the main thread is not waiting anymore
self.handle_exception(result_container, 'timeout', None)
self.handle_exception(result_container, 'timeout', False)
else:
# check if the engine accepted the request
if search_results is not None:
self._extend_container_basic(result_container, start_time, search_results)
self.suspended_status.resume()
def extend_container_if_suspended(self, result_container):
def extend_container_if_suspended(self, result_container: "ResultContainer") -> bool:
if self.suspended_status.is_suspended:
result_container.add_unresponsive_engine(
self.engine_name, self.suspended_status.suspend_reason, suspended=True
self.engine.name, self.suspended_status.suspend_reason, suspended=True
)
return True
return False
def get_params(self, search_query, engine_category) -> dict[str, t.Any]:
"""Returns a set of (see :ref:`request params <engine request arguments>`) or
``None`` if request is not supported.
def get_params(self, search_query: "SearchQuery", engine_category: str) -> RequestParams | None:
"""Returns a dictionary with the :ref:`request parameters <engine
request arguments>` (:py:obj:`RequestParams`), if the search condition
is not supported by the engine, ``None`` is returned:
Not supported conditions (``None`` is returned):
- *time range* filter in search conditions, but the engine does not have
a corresponding filter
- page number > 1 when engine does not support paging
- page number > ``max_page``
- A page-number > 1 when engine does not support paging.
- A time range when the engine does not support time range.
"""
# if paging is not supported, skip
if search_query.pageno > 1 and not self.engine.paging:
return None
# if max page is reached, skip
max_page = self.engine.max_page or settings['search']['max_page']
max_page = self.engine.max_page or get_setting("search.max_page")
if max_page and max_page < search_query.pageno:
return None
@ -162,39 +254,45 @@ class EngineProcessor(ABC):
if search_query.time_range and not self.engine.time_range_support:
return None
params = {}
params["query"] = search_query.query
params['category'] = engine_category
params['pageno'] = search_query.pageno
params['safesearch'] = search_query.safesearch
params['time_range'] = search_query.time_range
params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
params['searxng_locale'] = search_query.lang
params: RequestParams = {
"query": search_query.query,
"category": engine_category,
"pageno": search_query.pageno,
"safesearch": search_query.safesearch,
"time_range": search_query.time_range,
"engine_data": search_query.engine_data.get(self.engine.name, {}),
"searxng_locale": search_query.lang,
}
# deprecated / vintage --> use params['searxng_locale']
# deprecated / vintage --> use params["searxng_locale"]
#
# Conditions related to engine's traits are implemented in engine.traits
# module. Don't do 'locale' decisions here in the abstract layer of the
# module. Don't do "locale" decisions here in the abstract layer of the
# search processor, just pass the value from user's choice unchanged to
# the engine request.
if hasattr(self.engine, 'language') and self.engine.language:
params['language'] = self.engine.language
if hasattr(self.engine, "language") and self.engine.language:
params["language"] = self.engine.language # pyright: ignore[reportGeneralTypeIssues]
else:
params['language'] = search_query.lang
params["language"] = search_query.lang # pyright: ignore[reportGeneralTypeIssues]
return params
@abstractmethod
def search(self, query, params, result_container, start_time, timeout_limit):
def search(
self,
query: str,
params: RequestParams,
result_container: "ResultContainer",
start_time: float,
timeout_limit: float,
):
pass
def get_tests(self):
tests = getattr(self.engine, 'tests', None)
if tests is None:
tests = getattr(self.engine, 'additional_tests', {})
tests.update(self.get_default_tests())
return tests
# deprecated!
return {}
def get_default_tests(self):
# deprecated!
return {}

View File

@ -1,26 +1,32 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``offline``
"""Processors for engine-type: ``offline``"""
"""
import typing as t
from .abstract import EngineProcessor, RequestParams
from .abstract import EngineProcessor
if t.TYPE_CHECKING:
from searx.results import ResultContainer
class OfflineProcessor(EngineProcessor):
"""Processor class used by ``offline`` engines"""
"""Processor class used by ``offline`` engines."""
engine_type = 'offline'
engine_type: str = "offline"
def _search_basic(self, query, params):
return self.engine.search(query, params)
def search(self, query, params, result_container, start_time, timeout_limit):
def search(
self,
query: str,
params: RequestParams,
result_container: "ResultContainer",
start_time: float,
timeout_limit: float,
):
try:
search_results = self._search_basic(query, params)
search_results = self.engine.search(query, params)
self.extend_container(result_container, start_time, search_results)
except ValueError as e:
# do not record the error
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine.name, e))
except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e)
self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
self.logger.exception('engine {0} : exception : {1}'.format(self.engine.name, e))

View File

@ -1,8 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online``
"""Processor used for ``online`` engines."""
"""
# pylint: disable=use-dict-literal
__all__ = ["OnlineProcessor", "OnlineParams"]
import typing as t
from timeit import default_timer
import asyncio
@ -17,50 +18,132 @@ from searx.exceptions import (
SearxEngineTooManyRequestsException,
)
from searx.metrics.error_recorder import count_error
from .abstract import EngineProcessor
from .abstract import EngineProcessor, RequestParams
if t.TYPE_CHECKING:
from searx.search.models import SearchQuery
from searx.results import ResultContainer
from searx.result_types import EngineResults
def default_request_params():
class HTTPParams(t.TypedDict):
"""HTTP request parameters"""
method: t.Literal["GET", "POST"]
"""HTTP request method."""
headers: dict[str, str]
"""HTTP header information."""
data: dict[str, str]
"""Sending `form encoded data`_.
.. _form encoded data:
https://www.python-httpx.org/quickstart/#sending-form-encoded-data
"""
json: dict[str, t.Any]
"""`Sending `JSON encoded data`_.
.. _JSON encoded data:
https://www.python-httpx.org/quickstart/#sending-json-encoded-data
"""
content: bytes
"""`Sending `binary request data`_.
.. _binary request data:
https://www.python-httpx.org/quickstart/#sending-json-encoded-data
"""
url: str
"""Requested url."""
cookies: dict[str, str]
"""HTTP cookies."""
allow_redirects: bool
"""Follow redirects"""
max_redirects: int
"""Maximum redirects, hard limit."""
soft_max_redirects: int
"""Maximum redirects, soft limit. Record an error but don't stop the engine."""
verify: None | t.Literal[False] | str # not sure str really works
"""If not ``None``, it overrides the verify value defined in the network. Use
``False`` to accept any server certificate and use a path to file to specify a
server certificate"""
auth: str | None
"""An authentication to use when sending requests."""
raise_for_httperror: bool
"""Raise an exception if the `HTTP response status code`_ is ``>= 300``.
.. _HTTP response status code:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status
"""
class OnlineParams(HTTPParams, RequestParams):
"""Request parameters of a ``online`` engine."""
def default_request_params() -> HTTPParams:
"""Default request parameters for ``online`` engines."""
return {
# fmt: off
'method': 'GET',
'headers': {},
'data': {},
'url': '',
'cookies': {},
'auth': None
# fmt: on
"method": "GET",
"headers": {},
"data": {},
"json": {},
"content": b"",
"url": "",
"cookies": {},
"allow_redirects": False,
"max_redirects": 0,
"soft_max_redirects": 0,
"auth": None,
"verify": None,
"raise_for_httperror": True,
}
class OnlineProcessor(EngineProcessor):
"""Processor class for ``online`` engines."""
engine_type = 'online'
engine_type: str = "online"
def initialize(self):
def init_engine(self) -> bool:
"""This method is called in a thread, and before the base method is
called, the network must be set up for the ``online`` engines."""
self.init_network_in_thread(start_time=default_timer(), timeout_limit=self.engine.timeout)
return super().init_engine()
def init_network_in_thread(self, start_time: float, timeout_limit: float):
# set timeout for all HTTP requests
searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer())
searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time)
# reset the HTTP total time
searx.network.reset_time_for_thread()
# set the network
searx.network.set_context_network_name(self.engine_name)
super().initialize()
searx.network.set_context_network_name(self.engine.name)
def get_params(self, search_query, engine_category):
"""Returns a set of :ref:`request params <engine request online>` or ``None``
if request is not supported.
"""
params = super().get_params(search_query, engine_category)
if params is None:
return None
def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineParams | None:
"""Returns a dictionary with the :ref:`request params <engine request
online>` (:py:obj:`OnlineParams`), if the search condition is not
supported by the engine, ``None`` is returned."""
# add default params
params.update(default_request_params())
base_params: RequestParams | None = super().get_params(search_query, engine_category)
if base_params is None:
return base_params
params: OnlineParams = {**default_request_params(), **base_params}
headers = params["headers"]
# add an user agent
params['headers']['User-Agent'] = gen_useragent()
headers["User-Agent"] = gen_useragent()
# add Accept-Language header
if self.engine.send_accept_language_header and search_query.locale:
@ -71,73 +154,77 @@ class OnlineProcessor(EngineProcessor):
search_query.locale.territory,
search_query.locale.language,
)
params['headers']['Accept-Language'] = ac_lang
headers["Accept-Language"] = ac_lang
self.logger.debug('HTTP Accept-Language: %s', params['headers'].get('Accept-Language', ''))
self.logger.debug("HTTP Accept-Language: %s", headers.get("Accept-Language", ""))
return params
def _send_http_request(self, params):
# create dictionary which contain all
# information about the request
request_args = dict(headers=params['headers'], cookies=params['cookies'], auth=params['auth'])
def _send_http_request(self, params: OnlineParams):
# verify
# if not None, it overrides the verify value defined in the network.
# use False to accept any server certificate
# use a path to file to specify a server certificate
verify = params.get('verify')
# create dictionary which contain all information about the request
request_args: dict[str, t.Any] = {
"headers": params["headers"],
"cookies": params["cookies"],
"auth": params["auth"],
}
verify = params.get("verify")
if verify is not None:
request_args['verify'] = params['verify']
request_args["verify"] = verify
# max_redirects
max_redirects = params.get('max_redirects')
max_redirects = params.get("max_redirects")
if max_redirects:
request_args['max_redirects'] = max_redirects
request_args["max_redirects"] = max_redirects
# allow_redirects
if 'allow_redirects' in params:
request_args['allow_redirects'] = params['allow_redirects']
if "allow_redirects" in params:
request_args["allow_redirects"] = params["allow_redirects"]
# soft_max_redirects
soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)
soft_max_redirects: int = params.get("soft_max_redirects", max_redirects or 0)
# raise_for_status
request_args['raise_for_httperror'] = params.get('raise_for_httperror', True)
request_args["raise_for_httperror"] = params.get("raise_for_httperror", True)
# specific type of request (GET or POST)
if params['method'] == 'GET':
if params["method"] == "GET":
req = searx.network.get
else:
req = searx.network.post
request_args['data'] = params['data']
if params["data"]:
request_args["data"] = params["data"]
if params["json"]:
request_args["json"] = params["json"]
if params["content"]:
request_args["content"] = params["content"]
# send the request
response = req(params['url'], **request_args)
response = req(params["url"], **request_args)
# check soft limit of the redirect count
if len(response.history) > soft_max_redirects:
# unexpected redirect : record an error
# but the engine might still return valid results.
status_code = str(response.status_code or '')
reason = response.reason_phrase or ''
status_code = str(response.status_code or "")
reason = response.reason_phrase or ""
hostname = response.url.host
count_error(
self.engine_name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
self.engine.name,
"{} redirects, maximum: {}".format(len(response.history), soft_max_redirects),
(status_code, reason, hostname),
secondary=True,
)
return response
def _search_basic(self, query, params):
def _search_basic(self, query: str, params: OnlineParams) -> "EngineResults|None":
# update request parameters dependent on
# search-engine (contained in engines folder)
self.engine.request(query, params)
# ignoring empty urls
if not params['url']:
if not params["url"]:
return None
# send request
@ -147,13 +234,15 @@ class OnlineProcessor(EngineProcessor):
response.search_params = params
return self.engine.response(response)
def search(self, query, params, result_container, start_time, timeout_limit):
# set timeout for all HTTP requests
searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time)
# reset the HTTP total time
searx.network.reset_time_for_thread()
# set the network
searx.network.set_context_network_name(self.engine_name)
def search( # pyright: ignore[reportIncompatibleMethodOverride]
self,
query: str,
params: OnlineParams,
result_container: "ResultContainer",
start_time: float,
timeout_limit: float,
):
self.init_network_in_thread(start_time, timeout_limit)
try:
# send requests and parse the results
@ -162,7 +251,7 @@ class OnlineProcessor(EngineProcessor):
except ssl.SSLError as e:
# requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True)
self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine_name).verify))
self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine.name).verify))
except (httpx.TimeoutException, asyncio.TimeoutError) as e:
# requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True)
@ -179,55 +268,13 @@ class OnlineProcessor(EngineProcessor):
default_timer() - start_time, timeout_limit, e
)
)
except SearxEngineCaptchaException as e:
except (
SearxEngineCaptchaException,
SearxEngineTooManyRequestsException,
SearxEngineAccessDeniedException,
) as e:
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('CAPTCHA')
except SearxEngineTooManyRequestsException as e:
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('Too many requests')
except SearxEngineAccessDeniedException as e:
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('SearXNG is blocked')
self.logger.exception(e.message)
except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e)
self.logger.exception('exception : {0}'.format(e))
def get_default_tests(self):
tests = {}
tests['simple'] = {
'matrix': {'query': ('life', 'computer')},
'result_container': ['not_empty'],
}
if getattr(self.engine, 'paging', False):
tests['paging'] = {
'matrix': {'query': 'time', 'pageno': (1, 2, 3)},
'result_container': ['not_empty'],
'test': ['unique_results'],
}
if 'general' in self.engine.categories:
# avoid documentation about HTML tags (<time> and <input type="time">)
tests['paging']['matrix']['query'] = 'news'
if getattr(self.engine, 'time_range', False):
tests['time_range'] = {
'matrix': {'query': 'news', 'time_range': (None, 'day')},
'result_container': ['not_empty'],
'test': ['unique_results'],
}
if getattr(self.engine, 'traits', False):
tests['lang_fr'] = {
'matrix': {'query': 'paris', 'lang': 'fr'},
'result_container': ['not_empty', ('has_language', 'fr')],
}
tests['lang_en'] = {
'matrix': {'query': 'paris', 'lang': 'en'},
'result_container': ['not_empty', ('has_language', 'en')],
}
if getattr(self.engine, 'safesearch', False):
tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']}
return tests
self.logger.exception("exception : {0}".format(e))

View File

@ -1,42 +1,71 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online_currency``
"""Processor used for ``online_currency`` engines."""
"""
import typing as t
import unicodedata
import re
import flask_babel
import babel
from searx.data import CURRENCIES
from .online import OnlineProcessor
from .online import OnlineProcessor, OnlineParams
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
if t.TYPE_CHECKING:
from .abstract import EngineProcessor
from searx.search.models import SearchQuery
def normalize_name(name: str):
name = name.strip()
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
search_syntax = re.compile(r".*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)", re.I)
"""Search syntax used for from/to currency (e.g. ``10 usd to eur``)"""
class CurrenciesParams(t.TypedDict):
"""Currencies request parameters."""
amount: float
"""Currency amount to be converted"""
to_iso4217: str
"""ISO_4217_ alpha code of the currency used as the basis for conversion.
.. _ISO_4217: https://en.wikipedia.org/wiki/ISO_4217
"""
from_iso4217: str
"""ISO_4217_ alpha code of the currency to be converted."""
from_name: str
"""Name of the currency used as the basis for conversion."""
to_name: str
"""Name of the currency of the currency to be converted."""
class OnlineCurrenciesParams(CurrenciesParams, OnlineParams): # pylint: disable=duplicate-bases
"""Request parameters of a ``online_currency`` engine."""
class OnlineCurrencyProcessor(OnlineProcessor):
"""Processor class used by ``online_currency`` engines."""
engine_type = 'online_currency'
engine_type: str = "online_currency"
def initialize(self):
def initialize(self, callback: t.Callable[["EngineProcessor", bool], bool]):
CURRENCIES.init()
super().initialize()
super().initialize(callback)
def get_params(self, search_query, engine_category):
"""Returns a set of :ref:`request params <engine request online_currency>`
or ``None`` if search query does not match to :py:obj:`parser_re`."""
def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineCurrenciesParams | None:
"""Returns a dictionary with the :ref:`request params <engine request
online_currency>` (:py:obj:`OnlineCurrenciesParams`). ``None`` is
returned if the search query does not match :py:obj:`search_syntax`."""
params = super().get_params(search_query, engine_category)
if params is None:
online_params: OnlineParams | None = super().get_params(search_query, engine_category)
if online_params is None:
return None
m = parser_re.match(search_query.query)
m = search_syntax.match(search_query.query)
if not m:
return None
@ -46,22 +75,46 @@ class OnlineCurrencyProcessor(OnlineProcessor):
except ValueError:
return None
from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency))
to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency))
# most often $ stands for USD
if from_currency == "$":
from_currency = "$ us"
params['amount'] = amount
params['from'] = from_currency
params['to'] = to_currency
params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en")
params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en")
return params
if to_currency == "$":
to_currency = "$ us"
def get_default_tests(self):
tests = {}
from_iso4217 = from_currency
if not CURRENCIES.is_iso4217(from_iso4217):
from_iso4217 = CURRENCIES.name_to_iso4217(_normalize_name(from_currency))
tests['currency'] = {
'matrix': {'query': '1337 usd in rmb'},
'result_container': ['has_answer'],
to_iso4217 = to_currency
if not CURRENCIES.is_iso4217(to_iso4217):
to_iso4217 = CURRENCIES.name_to_iso4217(_normalize_name(to_currency))
if from_iso4217 is None or to_iso4217 is None:
return None
ui_locale = flask_babel.get_locale() or babel.Locale.parse("en")
from_name: str = CURRENCIES.iso4217_to_name(
from_iso4217, ui_locale.language
) # pyright: ignore[reportAssignmentType]
to_name: str = CURRENCIES.iso4217_to_name(
to_iso4217, ui_locale.language
) # pyright: ignore[reportAssignmentType]
params: OnlineCurrenciesParams = {
**online_params,
"amount": amount,
"from_iso4217": from_iso4217,
"to_iso4217": to_iso4217,
"from_name": from_name,
"to_name": to_name,
}
return tests
return params
def _normalize_name(name: str):
name = name.strip()
name = name.lower().replace("-", " ")
name = re.sub(" +", " ", name)
return unicodedata.normalize("NFKD", name).lower()

View File

@ -1,60 +1,102 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online_dictionary``
"""
"""Processor used for ``online_dictionary`` engines."""
import typing as t
import re
from searx.utils import is_valid_lang
from .online import OnlineProcessor
from searx.sxng_locales import sxng_locales
from .online import OnlineProcessor, OnlineParams
parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.+)$', re.I)
if t.TYPE_CHECKING:
from searx.search.models import SearchQuery
search_syntax = re.compile(r".*?([a-z]+)-([a-z]+) (.+)$", re.I)
"""Search syntax used for from/to language (e.g. ``en-de``)"""
FromToType: t.TypeAlias = tuple[bool, str, str]
"""Type of a language descriptions in the context of a ``online_dictionary``."""
class DictParams(t.TypedDict):
"""Dictionary request parameters."""
from_lang: FromToType
"""Language from which is to be translated."""
to_lang: FromToType
"""Language to translate into."""
query: str
"""Search term, cleaned of search syntax (*from-to* has been removed)."""
class OnlineDictParams(DictParams, OnlineParams): # pylint: disable=duplicate-bases
"""Request parameters of a ``online_dictionary`` engine."""
class OnlineDictionaryProcessor(OnlineProcessor):
"""Processor class used by ``online_dictionary`` engines."""
"""Processor class for ``online_dictionary`` engines."""
engine_type = 'online_dictionary'
engine_type: str = "online_dictionary"
def get_params(self, search_query, engine_category):
"""Returns a set of :ref:`request params <engine request online_dictionary>` or
``None`` if search query does not match to :py:obj:`parser_re`.
"""
params = super().get_params(search_query, engine_category)
if params is None:
def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineDictParams | None:
"""Returns a dictionary with the :ref:`request params <engine request
online_dictionary>` (:py:obj:`OnlineDictParams`). ``None`` is returned
if the search query does not match :py:obj:`search_syntax`."""
online_params: OnlineParams | None = super().get_params(search_query, engine_category)
if online_params is None:
return None
m = parser_re.match(search_query.query)
m = search_syntax.match(search_query.query)
if not m:
return None
from_lang, to_lang, query = m.groups()
from_lang = is_valid_lang(from_lang)
to_lang = is_valid_lang(to_lang)
from_lang = _get_lang_descr(from_lang)
to_lang = _get_lang_descr(to_lang)
if not from_lang or not to_lang:
return None
params['from_lang'] = from_lang
params['to_lang'] = to_lang
params['query'] = query
params: OnlineDictParams = {
**online_params,
"from_lang": from_lang,
"to_lang": to_lang,
"query": query,
}
return params
def get_default_tests(self):
tests = {}
if getattr(self.engine, 'paging', False):
tests['translation_paging'] = {
'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)},
'result_container': ['not_empty', ('one_title_contains', 'house')],
'test': ['unique_results'],
}
else:
tests['translation'] = {
'matrix': {'query': 'en-es house'},
'result_container': ['not_empty', ('one_title_contains', 'house')],
}
def _get_lang_descr(lang: str) -> FromToType | None:
"""Returns language's code and language's english name if argument ``lang``
describes a language known by SearXNG, otherwise ``None``.
return tests
Examples:
.. code:: python
>>> _get_lang_descr("zz")
None
>>> _get_lang_descr("uk")
(True, "uk", "ukrainian")
>>> _get_lang_descr(b"uk")
(True, "uk", "ukrainian")
>>> _get_lang_descr("en")
(True, "en", "english")
>>> _get_lang_descr("Español")
(True, "es", "spanish")
>>> _get_lang_descr("Spanish")
(True, "es", "spanish")
"""
lang = lang.lower()
is_abbr = len(lang) == 2
if is_abbr:
for l in sxng_locales:
if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return None
for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return None

View File

@ -1,45 +1,64 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online_url_search``
"""
"""Processor used for ``online_url_search`` engines."""
import typing as t
import re
from .online import OnlineProcessor
re_search_urls = {
'http': re.compile(r'https?:\/\/[^ ]*'),
'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
from .online import OnlineProcessor, OnlineParams
if t.TYPE_CHECKING:
from .abstract import EngineProcessor
from searx.search.models import SearchQuery
search_syntax = {
"http": re.compile(r"https?:\/\/[^ ]*"),
"ftp": re.compile(r"ftps?:\/\/[^ ]*"),
"data:image": re.compile("data:image/[^; ]*;base64,[^ ]*"),
}
"""Search syntax used for a URL search."""
class UrlParams(t.TypedDict):
"""URL request parameters."""
search_urls: dict[str, str | None]
class OnlineUrlSearchParams(UrlParams, OnlineParams): # pylint: disable=duplicate-bases
"""Request parameters of a ``online_url_search`` engine."""
class OnlineUrlSearchProcessor(OnlineProcessor):
"""Processor class used by ``online_url_search`` engines."""
engine_type = 'online_url_search'
engine_type: str = "online_url_search"
def get_params(self, search_query, engine_category):
"""Returns a set of :ref:`request params <engine request online>` or ``None`` if
search query does not match to :py:obj:`re_search_urls`.
"""
def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineUrlSearchParams | None:
"""Returns a dictionary with the :ref:`request params <engine request
online_currency>` (:py:obj:`OnlineUrlSearchParams`). ``None`` is
returned if the search query does not match :py:obj:`search_syntax`."""
params = super().get_params(search_query, engine_category)
if params is None:
online_params: OnlineParams | None = super().get_params(search_query, engine_category)
if online_params is None:
return None
url_match = False
search_urls = {}
search_urls: dict[str, str | None] = {}
has_match: bool = False
for k, v in re_search_urls.items():
m = v.search(search_query.query)
v = None
for url_schema, url_re in search_syntax.items():
search_urls[url_schema] = None
m = url_re.search(search_query.query)
if m:
url_match = True
v = m[0]
search_urls[k] = v
has_match = True
search_urls[url_schema] = m[0]
if not url_match:
if not has_match:
return None
params['search_urls'] = search_urls
params: OnlineUrlSearchParams = {
**online_params,
"search_urls": search_urls,
}
return params

View File

@ -718,7 +718,6 @@ engines:
- name: currency
engine: currency_convert
categories: general
shortcut: cc
- name: deezer

View File

@ -410,38 +410,6 @@ def int_or_zero(num: list[str] | str) -> int:
return convert_str_to_int(num)
def is_valid_lang(lang: str) -> tuple[bool, str, str] | None:
"""Return language code and name if lang describe a language.
Examples:
>>> is_valid_lang('zz')
None
>>> is_valid_lang('uk')
(True, 'uk', 'ukrainian')
>>> is_valid_lang(b'uk')
(True, 'uk', 'ukrainian')
>>> is_valid_lang('en')
(True, 'en', 'english')
>>> searx.utils.is_valid_lang('Español')
(True, 'es', 'spanish')
>>> searx.utils.is_valid_lang('Spanish')
(True, 'es', 'spanish')
"""
if isinstance(lang, bytes):
lang = lang.decode()
is_abbr = len(lang) == 2
lang = lang.lower()
if is_abbr:
for l in sxng_locales:
if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return None
for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return None
def load_module(filename: str, module_dir: str) -> types.ModuleType:
modname = splitext(filename)[0]
modpath = join(module_dir, filename)

View File

@ -48,6 +48,7 @@ import searx.query
import searx.search
import searx.search.models
import searx.webadapter
from searx.search.processors import PROCESSORS
EngineCategoriesVar = Optional[List[str]]
@ -172,7 +173,7 @@ if __name__ == '__main__':
searx.search.initialize_network(settings_engines, searx.settings['outgoing'])
searx.search.check_network_configuration()
searx.search.initialize_metrics([engine['name'] for engine in settings_engines])
searx.search.initialize_processors(settings_engines)
PROCESSORS.init(settings_engines)
search_q = get_search_query(prog_args, engine_categories=engine_cs)
res_dict = to_dict(search_q)
sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial))

View File

@ -20,7 +20,7 @@ class TestOnlineProcessor(SearxTestCase):
def test_get_params_default_params(self):
engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
online_processor = online.OnlineProcessor(engine)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
params = self._get_params(online_processor, search_query, 'general')
self.assertIn('method', params)
@ -32,7 +32,7 @@ class TestOnlineProcessor(SearxTestCase):
def test_get_params_useragent(self):
engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME)
online_processor = online.OnlineProcessor(engine)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
params = self._get_params(online_processor, search_query, 'general')
self.assertIn('User-Agent', params['headers'])

View File

@ -24,7 +24,7 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
def dummy(*args, **kwargs): # pylint: disable=unused-argument
pass
self.setattr4test(searx.search.processors, 'initialize_processor', dummy)
self.setattr4test(searx.search.processors.PROCESSORS, 'init', dummy)
# set some defaults
test_results = [