[mod] addition of various type hints / engine processors

Continuation of #5147 .. typification of the engine processors.

BTW:

- removed obsolete engine property https_support
- fixed & improved currency_convert
- engine instances can now implement a engine.setup method

[#5147] https://github.com/searxng/searxng/pull/5147

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-11 19:10:27 +02:00 committed by Markus Heiser
parent 23257bddce
commit 8f8343dc0d
28 changed files with 814 additions and 522 deletions

View File

@ -144,9 +144,9 @@ parameters with default value can be redefined for special purposes.
====================== ============== ======================================================================== ====================== ============== ========================================================================
url str ``''`` url str ``''``
method str ``'GET'`` method str ``'GET'``
headers set ``{}`` headers dict ``{}``
data set ``{}`` data dict ``{}``
cookies set ``{}`` cookies dict ``{}``
verify bool ``True`` verify bool ``True``
headers.User-Agent str a random User-Agent headers.User-Agent str a random User-Agent
category str current category, like ``'general'`` category str current category, like ``'general'``
@ -226,9 +226,9 @@ following parameters can be used to specify a search request:
=================== =========== ========================================================================== =================== =========== ==========================================================================
url str requested url url str requested url
method str HTTP request method method str HTTP request method
headers set HTTP header information headers dict HTTP header information
data set HTTP data information data dict HTTP data information
cookies set HTTP cookies cookies dict HTTP cookies
verify bool Performing SSL-Validity check verify bool Performing SSL-Validity check
allow_redirects bool Follow redirects allow_redirects bool Follow redirects
max_redirects int maximum redirects, hard limit max_redirects int maximum redirects, hard limit
@ -249,6 +249,3 @@ by templates. For more details read section:
- :ref:`simple theme templates` - :ref:`simple theme templates`
- :ref:`result types` - :ref:`result types`

View File

@ -1,22 +1,23 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store currencies data in a SQL database.""" """Simple implementation to store currencies data in a SQL database."""
__all__ = ["CurrenciesDB"] __all__ = ["CurrenciesDB"]
import typing as t
import json import json
import pathlib import pathlib
from .core import get_cache, log from .core import get_cache, log
@t.final
class CurrenciesDB: class CurrenciesDB:
# pylint: disable=missing-class-docstring # pylint: disable=missing-class-docstring
ctx_names = "data_currencies_names" ctx_names: str = "data_currencies_names"
ctx_iso4217 = "data_currencies_iso4217" ctx_iso4217: str = "data_currencies_iso4217"
json_file = pathlib.Path(__file__).parent / "currencies.json" json_file: pathlib.Path = pathlib.Path(__file__).parent / "currencies.json"
def __init__(self): def __init__(self):
self.cache = get_cache() self.cache = get_cache()
@ -33,23 +34,27 @@ class CurrenciesDB:
def load(self): def load(self):
log.debug("init searx.data.CURRENCIES") log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f: with open(self.json_file, encoding="utf-8") as f:
data_dict = json.load(f) data_dict: dict[str, dict[str, str]] = json.load(f)
for key, value in data_dict["names"].items(): for key, value in data_dict["names"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None) self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
for key, value in data_dict["iso4217"].items(): for key, value in data_dict["iso4217"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None) self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
def name_to_iso4217(self, name): def name_to_iso4217(self, name: str) -> str | None:
self.init() self.init()
ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names) ret_val: str | list[str] | None = self.cache.get(key=name, default=None, ctx=self.ctx_names)
if isinstance(ret_val, list): if isinstance(ret_val, list):
# if more alternatives, use the last in the list # if more alternatives, use the last in the list
ret_val = ret_val[-1] ret_val = ret_val[-1]
return ret_val return ret_val
def iso4217_to_name(self, iso4217, language): def iso4217_to_name(self, iso4217: str, language: str) -> str | None:
self.init() self.init()
iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217) iso4217_languages: dict[str, str] = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217)
return iso4217_languages.get(language, iso4217) return iso4217_languages.get(language)
def is_iso4217(self, iso4217: str) -> bool:
item = self.cache.get(key=iso4217, default={}, ctx=self.ctx_iso4217)
return bool(item)

View File

@ -39,6 +39,7 @@ if t.TYPE_CHECKING:
from searx.enginelib.traits import EngineTraits from searx.enginelib.traits import EngineTraits
from searx.extended_types import SXNG_Response from searx.extended_types import SXNG_Response
from searx.result_types import EngineResults from searx.result_types import EngineResults
from searx.search.processors import OfflineParamTypes, OnlineParamTypes
ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache( ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
ExpireCacheCfg( ExpireCacheCfg(
@ -195,6 +196,10 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
paging: bool paging: bool
"""Engine supports multiple pages.""" """Engine supports multiple pages."""
max_page: int = 0
"""If the engine supports paging, then this is the value for the last page
that is still supported. ``0`` means unlimited numbers of pages."""
time_range_support: bool time_range_support: bool
"""Engine supports search time range.""" """Engine supports search time range."""
@ -304,14 +309,49 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
weight: int weight: int
"""Weighting of the results of this engine (:ref:`weight <settings engines>`).""" """Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter] def setup(self, engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
"""Initialization of the engine. If no initialization is needed, drop """Dynamic setup of the engine settings.
this init function."""
With this method, the engine's setup is carried out. For example, to
check or dynamically adapt the values handed over in the parameter
``engine_settings``. The return value (True/False) indicates whether
the setup was successful and the engine can be built or rejected.
The method is optional and is called synchronously as part of the
initialization of the service and is therefore only suitable for simple
(local) exams/changes at the engine setting. The :py:obj:`Engine.init`
method must be used for longer tasks in which values of a remote must be
determined, for example.
"""
return True
def init(self, engine_settings: dict[str, t.Any]) -> bool | None: # pylint: disable=unused-argument
"""Initialization of the engine.
The method is optional and asynchronous (in a thread). It is suitable,
for example, for setting up a cache (for the engine) or for querying
values (required by the engine) from a remote.
Whether the initialization was successful can be indicated by the return
value ``True`` or even ``False``.
- If no return value is given from this init method (``None``), this is
equivalent to ``True``.
- If an exception is thrown as part of the initialization, this is
equivalent to ``False``.
"""
return True
@abc.abstractmethod @abc.abstractmethod
def request(self, query: str, params: dict[str, t.Any]) -> None: def search(self, query: str, params: "OfflineParamTypes") -> "EngineResults":
"""Build up the params for the online request.""" """Search method of the ``offline`` engines"""
@abc.abstractmethod
def request(self, query: str, params: "OnlineParamTypes") -> None:
"""Method to build the parameters for the request of an ``online``
engine."""
@abc.abstractmethod @abc.abstractmethod
def response(self, resp: "SXNG_Response") -> "EngineResults": def response(self, resp: "SXNG_Response") -> "EngineResults":
"""Parse out the result items from the response.""" """Method to parse the response of an ``online`` engine."""

View File

@ -51,7 +51,10 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool
DEFAULT_CATEGORY = 'other' DEFAULT_CATEGORY = 'other'
categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []} categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []}
engines: "dict[str, Engine | types.ModuleType]" = {} engines: "dict[str, Engine | types.ModuleType]" = {}
"""Global registered engine instances."""
engine_shortcuts = {} engine_shortcuts = {}
"""Simple map of registered *shortcuts* to name of the engine (or ``None``). """Simple map of registered *shortcuts* to name of the engine (or ``None``).
@ -144,6 +147,9 @@ def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | N
set_loggers(engine, engine_name) set_loggers(engine, engine_name)
if not call_engine_setup(engine, engine_data):
return None
if not any(cat in settings['categories_as_tabs'] for cat in engine.categories): if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
engine.categories.append(DEFAULT_CATEGORY) engine.categories.append(DEFAULT_CATEGORY)
@ -223,6 +229,25 @@ def is_engine_active(engine: "Engine | types.ModuleType"):
return True return True
def call_engine_setup(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]) -> bool:
setup_ok = False
setup_func = getattr(engine, "setup", None)
if setup_func is None:
setup_ok = True
elif not callable(setup_func):
logger.error("engine's setup method isn't a callable (is of type: %s)", type(setup_func))
else:
try:
setup_ok = engine.setup(engine_data)
except Exception as e: # pylint: disable=broad-except
logger.exception('exception : {0}'.format(e))
if not setup_ok:
logger.error("%s: Engine setup was not successful, engine is set to inactive.", engine.name)
return setup_ok
def register_engine(engine: "Engine | types.ModuleType"): def register_engine(engine: "Engine | types.ModuleType"):
if engine.name in engines: if engine.name in engines:
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name)) logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))

View File

@ -1,53 +1,58 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Currency convert (DuckDuckGo) """Currency convert (DuckDuckGo)"""
"""
import typing as t
import json import json
from searx.result_types import EngineResults from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.search.processors import OnlineCurrenciesParams
from searx.extended_types import SXNG_Response
# about # about
about = { about = {
"website": 'https://duckduckgo.com/', "website": "https://duckduckgo.com/",
"wikidata_id": 'Q12805', "wikidata_id": "Q12805",
"official_api_documentation": 'https://duckduckgo.com/api', "official_api_documentation": "https://duckduckgo.com/api",
"use_official_api": False, "use_official_api": False,
"require_api_key": False, "require_api_key": False,
"results": 'JSONP', "results": "JSONP",
"description": "Service from DuckDuckGo.", "description": "Service from DuckDuckGo.",
} }
engine_type = 'online_currency' engine_type = "online_currency"
categories = [] categories = ["currency", "general"]
base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
base_url = "https://duckduckgo.com/js/spice/currency/1/%(from_iso4217)s/%(to_iso4217)s"
ddg_link_url = "https://duckduckgo.com/?q=%(from_iso4217)s+to+%(to_iso4217)s"
weight = 100 weight = 100
https_support = True
def request(query: str, params: "OnlineCurrenciesParams") -> None: # pylint: disable=unused-argument
params["url"] = base_url % params
def request(_query, params): def response(resp: "SXNG_Response") -> EngineResults:
params['url'] = base_url.format(params['from'], params['to'])
return params
def response(resp) -> EngineResults:
res = EngineResults() res = EngineResults()
# remove first and last lines to get only json # remove first and last lines to get only json
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2] json_resp = resp.text[resp.text.find("\n") + 1 : resp.text.rfind("\n") - 2]
try: try:
conversion_rate = float(json.loads(json_resp)["to"][0]["mid"]) conversion_rate = float(json.loads(json_resp)["to"][0]["mid"])
except IndexError: except IndexError:
return res return res
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
resp.search_params['amount'],
resp.search_params['from'],
resp.search_params['amount'] * conversion_rate,
resp.search_params['to'],
conversion_rate,
resp.search_params['from_name'],
resp.search_params['to_name'],
)
url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}" params: OnlineCurrenciesParams = resp.search_params # pyright: ignore[reportAssignmentType]
answer = "{0} {1} = {2} {3} (1 {5} : {4} {6})".format(
params["amount"],
params["from_iso4217"],
params["amount"] * conversion_rate,
params["to_iso4217"],
conversion_rate,
params["from_name"],
params["to_name"],
)
url = ddg_link_url % params
res.add(res.types.Answer(answer=answer, url=url)) res.add(res.types.Answer(answer=answer, url=url))
return res return res

View File

@ -24,7 +24,6 @@ engine_type = 'online_dictionary'
categories = ['general', 'translate'] categories = ['general', 'translate']
base_url = "https://dictzone.com" base_url = "https://dictzone.com"
weight = 100 weight = 100
https_support = True
def request(query, params): # pylint: disable=unused-argument def request(query, params): # pylint: disable=unused-argument

View File

@ -3,7 +3,6 @@
""" """
from urllib.parse import urlunparse from urllib.parse import urlunparse
from json import dumps
# about # about
about = { about = {
@ -56,7 +55,7 @@ def request(query, params):
query_data = query_data_template query_data = query_data_template
query_data["query"]["multi_match"]["query"] = query query_data["query"]["multi_match"]["query"] = query
query_data["from"] = (params["pageno"] - 1) * number_of_results query_data["from"] = (params["pageno"] - 1) * number_of_results
params["data"] = dumps(query_data) params["json"] = query_data
return params return params

View File

@ -22,7 +22,6 @@ categories = ['general', 'translate']
api_url = "https://api.mymemory.translated.net" api_url = "https://api.mymemory.translated.net"
web_url = "https://mymemory.translated.net" web_url = "https://mymemory.translated.net"
weight = 100 weight = 100
https_support = True
api_key = '' api_key = ''

View File

@ -74,9 +74,9 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
""" """
if suspended_time is None: if suspended_time is None:
suspended_time = self._get_default_suspended_time() suspended_time = self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time)) self.message: str = f"{message} (suspended_time={suspended_time})"
self.suspended_time: int = suspended_time self.suspended_time: int = suspended_time
self.message: str = message super().__init__(self.message)
def _get_default_suspended_time(self) -> int: def _get_default_suspended_time(self) -> int:
from searx import get_setting # pylint: disable=C0415 from searx import get_setting # pylint: disable=C0415

View File

@ -30,6 +30,7 @@ import httpx
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
import searx.preferences import searx.preferences
import searx.results import searx.results
from searx.search.processors import ParamTypes
class SXNG_Request(flask.Request): class SXNG_Request(flask.Request):
@ -78,6 +79,8 @@ class SXNG_Response(httpx.Response):
response = typing.cast(SXNG_Response, response) response = typing.cast(SXNG_Response, response)
if response.ok: if response.ok:
... ...
query_was = search_params["query"]
""" """
ok: bool ok: bool
search_params: "ParamTypes"

View File

@ -24,17 +24,6 @@ LogParametersType = tuple[str, ...]
class ErrorContext: # pylint: disable=missing-class-docstring class ErrorContext: # pylint: disable=missing-class-docstring
__slots__ = (
'filename',
'function',
'line_no',
'code',
'exception_classname',
'log_message',
'log_parameters',
'secondary',
)
def __init__( # pylint: disable=too-many-arguments def __init__( # pylint: disable=too-many-arguments
self, self,
filename: str, filename: str,
@ -159,7 +148,7 @@ def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many-
return () return ()
def get_exception_classname(exc: Exception) -> str: def get_exception_classname(exc: BaseException) -> str:
exc_class = exc.__class__ exc_class = exc.__class__
exc_name = exc_class.__qualname__ exc_name = exc_class.__qualname__
exc_module = exc_class.__module__ exc_module = exc_class.__module__
@ -182,7 +171,7 @@ def get_error_context(
return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary) return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None: def count_exception(engine_name: str, exc: BaseException, secondary: bool = False) -> None:
if not settings['general']['enable_metrics']: if not settings['general']['enable_metrics']:
return return
framerecords = inspect.trace() framerecords = inspect.trace()

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, global-statement # pylint: disable=missing-module-docstring, global-statement
__all__ = ["initialize", "check_network_configuration", "raise_for_httperror"] __all__ = ["get_network", "initialize", "check_network_configuration", "raise_for_httperror"]
import typing as t import typing as t
@ -22,6 +22,8 @@ from .network import get_network, initialize, check_network_configuration # pyl
from .client import get_loop from .client import get_loop
from .raise_for_httperror import raise_for_httperror from .raise_for_httperror import raise_for_httperror
if t.TYPE_CHECKING:
from searx.network.network import Network
THREADLOCAL = threading.local() THREADLOCAL = threading.local()
"""Thread-local data is data for thread specific values.""" """Thread-local data is data for thread specific values."""
@ -31,7 +33,7 @@ def reset_time_for_thread():
THREADLOCAL.total_time = 0 THREADLOCAL.total_time = 0
def get_time_for_thread(): def get_time_for_thread() -> float | None:
"""returns thread's total time or None""" """returns thread's total time or None"""
return THREADLOCAL.__dict__.get('total_time') return THREADLOCAL.__dict__.get('total_time')
@ -45,7 +47,7 @@ def set_context_network_name(network_name: str):
THREADLOCAL.network = get_network(network_name) THREADLOCAL.network = get_network(network_name)
def get_context_network(): def get_context_network() -> "Network":
"""If set return thread's network. """If set return thread's network.
If unset, return value from :py:obj:`get_network`. If unset, return value from :py:obj:`get_network`.
@ -68,7 +70,7 @@ def _record_http_time():
THREADLOCAL.total_time += time_after_request - time_before_request THREADLOCAL.total_time += time_after_request - time_before_request
def _get_timeout(start_time: float, kwargs): def _get_timeout(start_time: float, kwargs: t.Any) -> float:
# pylint: disable=too-many-branches # pylint: disable=too-many-branches
timeout: float | None timeout: float | None
@ -91,7 +93,7 @@ def _get_timeout(start_time: float, kwargs):
return timeout return timeout
def request(method, url, **kwargs) -> SXNG_Response: def request(method: str, url: str, **kwargs: t.Any) -> SXNG_Response:
"""same as requests/requests/api.py request(...)""" """same as requests/requests/api.py request(...)"""
with _record_http_time() as start_time: with _record_http_time() as start_time:
network = get_context_network() network = get_context_network()
@ -183,15 +185,15 @@ def head(url: str, **kwargs: t.Any) -> SXNG_Response:
return request('head', url, **kwargs) return request('head', url, **kwargs)
def post(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: def post(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response:
return request('post', url, data=data, **kwargs) return request('post', url, data=data, **kwargs)
def put(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: def put(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response:
return request('put', url, data=data, **kwargs) return request('put', url, data=data, **kwargs)
def patch(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: def patch(url: str, data: dict[str, t.Any] | None = None, **kwargs: t.Any) -> SXNG_Response:
return request('patch', url, data=data, **kwargs) return request('patch', url, data=data, **kwargs)
@ -250,7 +252,7 @@ def _close_response_method(self):
continue continue
def stream(method: str, url: str, **kwargs: t.Any) -> tuple[httpx.Response, Iterable[bytes]]: def stream(method: str, url: str, **kwargs: t.Any) -> tuple[SXNG_Response, Iterable[bytes]]:
"""Replace httpx.stream. """Replace httpx.stream.
Usage: Usage:

View File

@ -138,7 +138,7 @@ def get_transport_for_socks_proxy(
password=proxy_password, password=proxy_password,
rdns=rdns, rdns=rdns,
loop=get_loop(), loop=get_loop(),
verify=_verify, verify=_verify, # pyright: ignore[reportArgumentType]
http2=http2, http2=http2,
local_address=local_address, local_address=local_address,
limits=limit, limits=limit,

View File

@ -1,8 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=global-statement # pylint: disable=global-statement
# pylint: disable=missing-module-docstring, missing-class-docstring # pylint: disable=missing-module-docstring, missing-class-docstring
__all__ = ["get_network"]
import typing as t import typing as t
from collections.abc import Generator, AsyncIterator from collections.abc import Generator
import atexit import atexit
import asyncio import asyncio
@ -74,7 +78,7 @@ class Network:
using_tor_proxy: bool = False, using_tor_proxy: bool = False,
local_addresses: str | list[str] | None = None, local_addresses: str | list[str] | None = None,
retries: int = 0, retries: int = 0,
retry_on_http_error: None = None, retry_on_http_error: bool = False,
max_redirects: int = 30, max_redirects: int = 30,
logger_name: str = None, # pyright: ignore[reportArgumentType] logger_name: str = None, # pyright: ignore[reportArgumentType]
): ):
@ -232,14 +236,14 @@ class Network:
return kwargs_clients return kwargs_clients
@staticmethod @staticmethod
def extract_do_raise_for_httperror(kwargs): def extract_do_raise_for_httperror(kwargs: dict[str, t.Any]):
do_raise_for_httperror = True do_raise_for_httperror = True
if 'raise_for_httperror' in kwargs: if 'raise_for_httperror' in kwargs:
do_raise_for_httperror = kwargs['raise_for_httperror'] do_raise_for_httperror = kwargs['raise_for_httperror']
del kwargs['raise_for_httperror'] del kwargs['raise_for_httperror']
return do_raise_for_httperror return do_raise_for_httperror
def patch_response(self, response: httpx.Response | SXNG_Response, do_raise_for_httperror: bool) -> SXNG_Response: def patch_response(self, response: httpx.Response, do_raise_for_httperror: bool) -> SXNG_Response:
if isinstance(response, httpx.Response): if isinstance(response, httpx.Response):
response = t.cast(SXNG_Response, response) response = t.cast(SXNG_Response, response)
# requests compatibility (response is not streamed) # requests compatibility (response is not streamed)
@ -255,7 +259,7 @@ class Network:
raise raise
return response return response
def is_valid_response(self, response: SXNG_Response): def is_valid_response(self, response: httpx.Response):
# pylint: disable=too-many-boolean-expressions # pylint: disable=too-many-boolean-expressions
if ( if (
(self.retry_on_http_error is True and 400 <= response.status_code <= 599) (self.retry_on_http_error is True and 400 <= response.status_code <= 599)
@ -265,9 +269,7 @@ class Network:
return False return False
return True return True
async def call_client( async def call_client(self, stream: bool, method: str, url: str, **kwargs: t.Any) -> SXNG_Response:
self, stream: bool, method: str, url: str, **kwargs: t.Any
) -> AsyncIterator[SXNG_Response] | None:
retries = self.retries retries = self.retries
was_disconnected = False was_disconnected = False
do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs) do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
@ -278,9 +280,9 @@ class Network:
client.cookies = httpx.Cookies(cookies) client.cookies = httpx.Cookies(cookies)
try: try:
if stream: if stream:
response = client.stream(method, url, **kwargs) # pyright: ignore[reportAny] response = client.stream(method, url, **kwargs)
else: else:
response = await client.request(method, url, **kwargs) # pyright: ignore[reportAny] response = await client.request(method, url, **kwargs)
if self.is_valid_response(response) or retries <= 0: if self.is_valid_response(response) or retries <= 0:
return self.patch_response(response, do_raise_for_httperror) return self.patch_response(response, do_raise_for_httperror)
except httpx.RemoteProtocolError as e: except httpx.RemoteProtocolError as e:
@ -298,7 +300,7 @@ class Network:
raise e raise e
retries -= 1 retries -= 1
async def request(self, method: str, url: str, **kwargs): async def request(self, method: str, url: str, **kwargs: t.Any) -> SXNG_Response:
return await self.call_client(False, method, url, **kwargs) return await self.call_client(False, method, url, **kwargs)
async def stream(self, method: str, url: str, **kwargs): async def stream(self, method: str, url: str, **kwargs):
@ -358,7 +360,7 @@ def initialize(
'proxies': settings_outgoing['proxies'], 'proxies': settings_outgoing['proxies'],
'max_redirects': settings_outgoing['max_redirects'], 'max_redirects': settings_outgoing['max_redirects'],
'retries': settings_outgoing['retries'], 'retries': settings_outgoing['retries'],
'retry_on_http_error': None, 'retry_on_http_error': False,
} }
def new_network(params: dict[str, t.Any], logger_name: str | None = None): def new_network(params: dict[str, t.Any], logger_name: str | None = None):

View File

@ -1,8 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, too-few-public-methods # pylint: disable=missing-module-docstring, too-few-public-methods
# the public namespace has not yet been finally defined .. __all__ = ["SearchWithPlugins"]
# __all__ = [..., ]
import typing as t import typing as t
@ -22,7 +21,7 @@ from searx.metrics import initialize as initialize_metrics, counter_inc
from searx.network import initialize as initialize_network, check_network_configuration from searx.network import initialize as initialize_network, check_network_configuration
from searx.results import ResultContainer from searx.results import ResultContainer
from searx.search.checker import initialize as initialize_checker from searx.search.checker import initialize as initialize_checker
from searx.search.processors import PROCESSORS, initialize as initialize_processors from searx.search.processors import PROCESSORS
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
@ -44,7 +43,7 @@ def initialize(
if check_network: if check_network:
check_network_configuration() check_network_configuration()
initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics) initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics)
initialize_processors(settings_engines) PROCESSORS.init(settings_engines)
if enable_checker: if enable_checker:
initialize_checker() initialize_checker()
@ -52,8 +51,6 @@ def initialize(
class Search: class Search:
"""Search information container""" """Search information container"""
__slots__ = "search_query", "result_container", "start_time", "actual_timeout" # type: ignore
def __init__(self, search_query: "SearchQuery"): def __init__(self, search_query: "SearchQuery"):
"""Initialize the Search""" """Initialize the Search"""
# init vars # init vars
@ -185,8 +182,6 @@ class Search:
class SearchWithPlugins(Search): class SearchWithPlugins(Search):
"""Inherit from the Search class, add calls to the plugins.""" """Inherit from the Search class, add calls to the plugins."""
__slots__ = 'user_plugins', 'request'
def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]): def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]):
super().__init__(search_query) super().__init__(search_query)
self.user_plugins = user_plugins self.user_plugins = user_plugins

View File

@ -24,42 +24,29 @@ class EngineRef:
return hash((self.name, self.category)) return hash((self.name, self.category))
@typing.final
class SearchQuery: class SearchQuery:
"""container for all the search parameters (query, language, etc...)""" """container for all the search parameters (query, language, etc...)"""
__slots__ = (
'query',
'engineref_list',
'lang',
'locale',
'safesearch',
'pageno',
'time_range',
'timeout_limit',
'external_bang',
'engine_data',
'redirect_to_first_result',
)
def __init__( def __init__(
self, self,
query: str, query: str,
engineref_list: typing.List[EngineRef], engineref_list: list[EngineRef],
lang: str = 'all', lang: str = 'all',
safesearch: int = 0, safesearch: typing.Literal[0, 1, 2] = 0,
pageno: int = 1, pageno: int = 1,
time_range: typing.Optional[str] = None, time_range: typing.Literal["day", "week", "month", "year"] | None = None,
timeout_limit: typing.Optional[float] = None, timeout_limit: float | None = None,
external_bang: typing.Optional[str] = None, external_bang: str | None = None,
engine_data: typing.Optional[typing.Dict[str, str]] = None, engine_data: dict[str, dict[str, str]] | None = None,
redirect_to_first_result: typing.Optional[bool] = None, redirect_to_first_result: bool | None = None,
): # pylint:disable=too-many-arguments ): # pylint:disable=too-many-arguments
self.query = query self.query = query
self.engineref_list = engineref_list self.engineref_list = engineref_list
self.lang = lang self.lang = lang
self.safesearch = safesearch self.safesearch: typing.Literal[0, 1, 2] = safesearch
self.pageno = pageno self.pageno = pageno
self.time_range = time_range self.time_range: typing.Literal["day", "week", "month", "year"] | None = time_range
self.timeout_limit = timeout_limit self.timeout_limit = timeout_limit
self.external_bang = external_bang self.external_bang = external_bang
self.engine_data = engine_data or {} self.engine_data = engine_data or {}

View File

@ -2,83 +2,95 @@
"""Implement request processors used by engine-types.""" """Implement request processors used by engine-types."""
__all__ = [ __all__ = [
'EngineProcessor', "OfflineParamTypes",
'OfflineProcessor', "OnlineCurrenciesParams",
'OnlineProcessor', "OnlineDictParams",
'OnlineDictionaryProcessor', "OnlineParamTypes",
'OnlineCurrencyProcessor', "OnlineParams",
'OnlineUrlSearchProcessor', "OnlineUrlSearchParams",
'PROCESSORS', "PROCESSORS",
"ParamTypes",
"RequestParams",
] ]
import typing as t import typing as t
import threading
from searx import logger from searx import logger
from searx import engines from searx import engines
from .online import OnlineProcessor from .abstract import EngineProcessor, RequestParams
from .offline import OfflineProcessor from .offline import OfflineProcessor
from .online_dictionary import OnlineDictionaryProcessor from .online import OnlineProcessor, OnlineParams
from .online_currency import OnlineCurrencyProcessor from .online_dictionary import OnlineDictionaryProcessor, OnlineDictParams
from .online_url_search import OnlineUrlSearchProcessor from .online_currency import OnlineCurrencyProcessor, OnlineCurrenciesParams
from .abstract import EngineProcessor from .online_url_search import OnlineUrlSearchProcessor, OnlineUrlSearchParams
if t.TYPE_CHECKING: logger = logger.getChild("search.processors")
from searx.enginelib import Engine
logger = logger.getChild('search.processors') OnlineParamTypes: t.TypeAlias = OnlineParams | OnlineDictParams | OnlineCurrenciesParams | OnlineUrlSearchParams
PROCESSORS: dict[str, EngineProcessor] = {} OfflineParamTypes: t.TypeAlias = RequestParams
"""Cache request processors, stored by *engine-name* (:py:func:`initialize`) ParamTypes: t.TypeAlias = OfflineParamTypes | OnlineParamTypes
class ProcessorMap(dict[str, EngineProcessor]):
"""Class to manage :py:obj:`EngineProcessor` instances in a key/value map
(instances stored by *engine-name*)."""
processor_types: dict[str, type[EngineProcessor]] = {
OnlineProcessor.engine_type: OnlineProcessor,
OfflineProcessor.engine_type: OfflineProcessor,
OnlineDictionaryProcessor.engine_type: OnlineDictionaryProcessor,
OnlineCurrencyProcessor.engine_type: OnlineCurrencyProcessor,
OnlineUrlSearchProcessor.engine_type: OnlineUrlSearchProcessor,
}
def init(self, engine_list: list[dict[str, t.Any]]):
"""Initialize all engines and registers a processor for each engine."""
for eng_settings in engine_list:
eng_name: str = eng_settings["name"]
if eng_settings.get("inactive", False) is True:
logger.info("Engine of name '%s' is inactive.", eng_name)
continue
eng_obj = engines.engines.get(eng_name)
if eng_obj is None:
logger.warning("Engine of name '%s' does not exists.", eng_name)
continue
eng_type = getattr(eng_obj, "engine_type", "online")
proc_cls = self.processor_types.get(eng_type)
if proc_cls is None:
logger.error("Engine '%s' is of unknown engine_type: %s", eng_type)
continue
# initialize (and register) the engine
eng_proc = proc_cls(eng_obj)
eng_proc.initialize(self.register_processor)
def register_processor(self, eng_proc: EngineProcessor, eng_proc_ok: bool) -> bool:
"""Register the :py:obj:`EngineProcessor`.
This method is usually passed as a callback to the initialization of the
:py:obj:`EngineProcessor`.
The value (true/false) passed in ``eng_proc_ok`` indicates whether the
initialization of the :py:obj:`EngineProcessor` was successful; if this
is not the case, the processor is not registered.
"""
if eng_proc_ok:
self[eng_proc.engine.name] = eng_proc
# logger.debug("registered engine processor: %s", eng_proc.engine.name)
else:
logger.error("init method of engine %s failed (%s).", eng_proc.engine.name)
return eng_proc_ok
PROCESSORS = ProcessorMap()
"""Global :py:obj:`ProcessorMap`.
:meta hide-value: :meta hide-value:
""" """
def get_processor_class(engine_type: str) -> type[EngineProcessor] | None:
"""Return processor class according to the ``engine_type``"""
for c in [
OnlineProcessor,
OfflineProcessor,
OnlineDictionaryProcessor,
OnlineCurrencyProcessor,
OnlineUrlSearchProcessor,
]:
if c.engine_type == engine_type:
return c
return None
def get_processor(engine: "Engine | ModuleType", engine_name: str) -> EngineProcessor | None:
"""Return processor instance that fits to ``engine.engine.type``"""
engine_type = getattr(engine, 'engine_type', 'online')
processor_class = get_processor_class(engine_type)
if processor_class is not None:
return processor_class(engine, engine_name)
return None
def initialize_processor(processor: EngineProcessor):
"""Initialize one processor
Call the init function of the engine
"""
if processor.has_initialize_function:
_t = threading.Thread(target=processor.initialize, daemon=True)
_t.start()
def initialize(engine_list: list[dict[str, t.Any]]):
"""Initialize all engines and store a processor for each engine in
:py:obj:`PROCESSORS`."""
for engine_data in engine_list:
engine_name: str = engine_data['name']
engine = engines.engines.get(engine_name)
if engine:
processor = get_processor(engine, engine_name)
if processor is None:
engine.logger.error('Error get processor for engine %s', engine_name)
else:
initialize_processor(processor)
PROCESSORS[engine_name] = processor

View File

@ -1,7 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Abstract base classes for engine request processors. """Abstract base classes for all engine processors."""
"""
import typing as t import typing as t
@ -10,25 +8,75 @@ import threading
from abc import abstractmethod, ABC from abc import abstractmethod, ABC
from timeit import default_timer from timeit import default_timer
from searx import settings, logger from searx import get_setting
from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.network import get_time_for_thread, get_network from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.exceptions import SearxEngineAccessDeniedException
from searx.utils import get_engine_from_settings from searx.utils import get_engine_from_settings
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
import types
from searx.enginelib import Engine from searx.enginelib import Engine
from searx.search.models import SearchQuery
from searx.results import ResultContainer
from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
logger = logger.getChild('searx.search.processor')
SUSPENDED_STATUS: dict[int | str, 'SuspendedStatus'] = {} logger = logger.getChild("searx.search.processor")
SUSPENDED_STATUS: dict[int | str, "SuspendedStatus"] = {}
class RequestParams(t.TypedDict):
"""Basic quantity of the Request parameters of all engine types."""
query: str
"""Search term, stripped of search syntax arguments."""
category: str
"""Current category, like ``general``.
.. hint::
This field is deprecated, don't use it in further implementations.
This field is currently *arbitrarily* filled with the name of "one""
category (the name of the first category of the engine). In practice,
however, it is not clear what this "one" category should be; in principle,
multiple categories can also be activated in a search.
"""
pageno: int
"""Current page number, where the first page is ``1``."""
safesearch: t.Literal[0, 1, 2]
"""Safe-Search filter (0:normal, 1:moderate, 2:strict)."""
time_range: t.Literal["day", "week", "month", "year"] | None
"""Time-range filter."""
engine_data: dict[str, str]
"""Allows the transfer of (engine specific) data to the next request of the
client. In the case of the ``online`` engines, this data is delivered to
the client via the HTML ``<form>`` in response.
If the client then sends this form back to the server with the next request,
this data will be available.
This makes it possible to carry data from one request to the next without a
session context, but this feature (is fragile) and should only be used in
exceptional cases. See also :ref:`engine_data`."""
searxng_locale: str
"""Language / locale filter from the search request, a string like 'all',
'en', 'en-US', 'zh-HK' .. and others, for more details see
:py:obj:`searx.locales`."""
class SuspendedStatus: class SuspendedStatus:
"""Class to handle suspend state.""" """Class to handle suspend state."""
__slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
def __init__(self): def __init__(self):
self.lock: threading.Lock = threading.Lock() self.lock: threading.Lock = threading.Lock()
self.continuous_errors: int = 0 self.continuous_errors: int = 0
@ -39,18 +87,18 @@ class SuspendedStatus:
def is_suspended(self): def is_suspended(self):
return self.suspend_end_time >= default_timer() return self.suspend_end_time >= default_timer()
def suspend(self, suspended_time: int, suspend_reason: str): def suspend(self, suspended_time: int | None, suspend_reason: str):
with self.lock: with self.lock:
# update continuous_errors / suspend_end_time # update continuous_errors / suspend_end_time
self.continuous_errors += 1 self.continuous_errors += 1
if suspended_time is None: if suspended_time is None:
suspended_time = min( max_ban: int = get_setting("search.max_ban_time_on_fail")
settings['search']['max_ban_time_on_fail'], ban_fail: int = get_setting("search.ban_time_on_fail")
self.continuous_errors * settings['search']['ban_time_on_fail'], suspended_time = min(max_ban, ban_fail)
)
self.suspend_end_time = default_timer() + suspended_time self.suspend_end_time = default_timer() + suspended_time
self.suspend_reason = suspend_reason self.suspend_reason = suspend_reason
logger.debug('Suspend for %i seconds', suspended_time) logger.debug("Suspend for %i seconds", suspended_time)
def resume(self): def resume(self):
with self.lock: with self.lock:
@ -63,31 +111,63 @@ class SuspendedStatus:
class EngineProcessor(ABC): class EngineProcessor(ABC):
"""Base classes used for all types of request processors.""" """Base classes used for all types of request processors."""
__slots__ = 'engine', 'engine_name', 'suspended_status', 'logger' engine_type: str
def __init__(self, engine: "Engine|ModuleType", engine_name: str): def __init__(self, engine: "Engine|types.ModuleType"):
self.engine: "Engine" = engine self.engine: "Engine" = engine # pyright: ignore[reportAttributeAccessIssue]
self.engine_name: str = engine_name self.logger: logging.Logger = engines[engine.name].logger
self.logger: logging.Logger = engines[engine_name].logger key = get_network(self.engine.name)
key = get_network(self.engine_name) key = id(key) if key else self.engine.name
key = id(key) if key else self.engine_name
self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
def initialize(self): def initialize(self, callback: t.Callable[["EngineProcessor", bool], bool]):
"""Initialization of *this* :py:obj:`EngineProcessor`.
If processor's engine has an ``init`` method, it is called first.
Engine's ``init`` method is executed in a thread, meaning that the
*registration* (the ``callback``) may occur later and is not already
established by the return from this registration method.
Registration only takes place if the ``init`` method is not available or
is successfully run through.
"""
if not hasattr(self.engine, "init"):
callback(self, True)
return
if not callable(self.engine.init):
logger.error("Engine's init method isn't a callable (is of type: %s).", type(self.engine.init))
callback(self, False)
return
def __init_processor_thread():
eng_ok = self.init_engine()
callback(self, eng_ok)
# set up and start a thread
threading.Thread(target=__init_processor_thread, daemon=True).start()
def init_engine(self) -> bool:
eng_setting = get_engine_from_settings(self.engine.name)
init_ok: bool | None = False
try: try:
self.engine.init(get_engine_from_settings(self.engine_name)) init_ok = self.engine.init(eng_setting)
except SearxEngineResponseException as exc:
self.logger.warning('Fail to initialize // %s', exc)
except Exception: # pylint: disable=broad-except except Exception: # pylint: disable=broad-except
self.logger.exception('Fail to initialize') logger.exception("Init method of engine %s failed due to an exception.", self.engine.name)
else: init_ok = False
self.logger.debug('Initialized') # In older engines, None is returned from the init method, which is
# equivalent to indicating that the initialization was successful.
if init_ok is None:
init_ok = True
return init_ok
@property def handle_exception(
def has_initialize_function(self): self,
return hasattr(self.engine, 'init') result_container: "ResultContainer",
exception_or_message: BaseException | str,
def handle_exception(self, result_container, exception_or_message, suspend=False): suspend: bool = False,
):
# update result_container # update result_container
if isinstance(exception_or_message, BaseException): if isinstance(exception_or_message, BaseException):
exception_class = exception_or_message.__class__ exception_class = exception_or_message.__class__
@ -96,13 +176,13 @@ class EngineProcessor(ABC):
error_message = module_name + exception_class.__qualname__ error_message = module_name + exception_class.__qualname__
else: else:
error_message = exception_or_message error_message = exception_or_message
result_container.add_unresponsive_engine(self.engine_name, error_message) result_container.add_unresponsive_engine(self.engine.name, error_message)
# metrics # metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'error') counter_inc('engine', self.engine.name, 'search', 'count', 'error')
if isinstance(exception_or_message, BaseException): if isinstance(exception_or_message, BaseException):
count_exception(self.engine_name, exception_or_message) count_exception(self.engine.name, exception_or_message)
else: else:
count_error(self.engine_name, exception_or_message) count_error(self.engine.name, exception_or_message)
# suspend the engine ? # suspend the engine ?
if suspend: if suspend:
suspended_time = None suspended_time = None
@ -110,51 +190,63 @@ class EngineProcessor(ABC):
suspended_time = exception_or_message.suspended_time suspended_time = exception_or_message.suspended_time
self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member
def _extend_container_basic(self, result_container, start_time, search_results): def _extend_container_basic(
self,
result_container: "ResultContainer",
start_time: float,
search_results: "list[Result | LegacyResult]",
):
# update result_container # update result_container
result_container.extend(self.engine_name, search_results) result_container.extend(self.engine.name, search_results)
engine_time = default_timer() - start_time engine_time = default_timer() - start_time
page_load_time = get_time_for_thread() page_load_time = get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time) result_container.add_timing(self.engine.name, engine_time, page_load_time)
# metrics # metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'successful') counter_inc('engine', self.engine.name, 'search', 'count', 'successful')
histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total') histogram_observe(engine_time, 'engine', self.engine.name, 'time', 'total')
if page_load_time is not None: if page_load_time is not None:
histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http') histogram_observe(page_load_time, 'engine', self.engine.name, 'time', 'http')
def extend_container(self, result_container, start_time, search_results): def extend_container(
self,
result_container: "ResultContainer",
start_time: float,
search_results: "list[Result | LegacyResult]|None",
):
if getattr(threading.current_thread(), '_timeout', False): if getattr(threading.current_thread(), '_timeout', False):
# the main thread is not waiting anymore # the main thread is not waiting anymore
self.handle_exception(result_container, 'timeout', None) self.handle_exception(result_container, 'timeout', False)
else: else:
# check if the engine accepted the request # check if the engine accepted the request
if search_results is not None: if search_results is not None:
self._extend_container_basic(result_container, start_time, search_results) self._extend_container_basic(result_container, start_time, search_results)
self.suspended_status.resume() self.suspended_status.resume()
def extend_container_if_suspended(self, result_container): def extend_container_if_suspended(self, result_container: "ResultContainer") -> bool:
if self.suspended_status.is_suspended: if self.suspended_status.is_suspended:
result_container.add_unresponsive_engine( result_container.add_unresponsive_engine(
self.engine_name, self.suspended_status.suspend_reason, suspended=True self.engine.name, self.suspended_status.suspend_reason, suspended=True
) )
return True return True
return False return False
def get_params(self, search_query, engine_category) -> dict[str, t.Any]: def get_params(self, search_query: "SearchQuery", engine_category: str) -> RequestParams | None:
"""Returns a set of (see :ref:`request params <engine request arguments>`) or """Returns a dictionary with the :ref:`request parameters <engine
``None`` if request is not supported. request arguments>` (:py:obj:`RequestParams`), if the search condition
is not supported by the engine, ``None`` is returned:
Not supported conditions (``None`` is returned): - *time range* filter in search conditions, but the engine does not have
a corresponding filter
- page number > 1 when engine does not support paging
- page number > ``max_page``
- A page-number > 1 when engine does not support paging.
- A time range when the engine does not support time range.
""" """
# if paging is not supported, skip # if paging is not supported, skip
if search_query.pageno > 1 and not self.engine.paging: if search_query.pageno > 1 and not self.engine.paging:
return None return None
# if max page is reached, skip # if max page is reached, skip
max_page = self.engine.max_page or settings['search']['max_page'] max_page = self.engine.max_page or get_setting("search.max_page")
if max_page and max_page < search_query.pageno: if max_page and max_page < search_query.pageno:
return None return None
@ -162,39 +254,45 @@ class EngineProcessor(ABC):
if search_query.time_range and not self.engine.time_range_support: if search_query.time_range and not self.engine.time_range_support:
return None return None
params = {} params: RequestParams = {
params["query"] = search_query.query "query": search_query.query,
params['category'] = engine_category "category": engine_category,
params['pageno'] = search_query.pageno "pageno": search_query.pageno,
params['safesearch'] = search_query.safesearch "safesearch": search_query.safesearch,
params['time_range'] = search_query.time_range "time_range": search_query.time_range,
params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) "engine_data": search_query.engine_data.get(self.engine.name, {}),
params['searxng_locale'] = search_query.lang "searxng_locale": search_query.lang,
}
# deprecated / vintage --> use params['searxng_locale'] # deprecated / vintage --> use params["searxng_locale"]
# #
# Conditions related to engine's traits are implemented in engine.traits # Conditions related to engine's traits are implemented in engine.traits
# module. Don't do 'locale' decisions here in the abstract layer of the # module. Don't do "locale" decisions here in the abstract layer of the
# search processor, just pass the value from user's choice unchanged to # search processor, just pass the value from user's choice unchanged to
# the engine request. # the engine request.
if hasattr(self.engine, 'language') and self.engine.language: if hasattr(self.engine, "language") and self.engine.language:
params['language'] = self.engine.language params["language"] = self.engine.language # pyright: ignore[reportGeneralTypeIssues]
else: else:
params['language'] = search_query.lang params["language"] = search_query.lang # pyright: ignore[reportGeneralTypeIssues]
return params return params
@abstractmethod @abstractmethod
def search(self, query, params, result_container, start_time, timeout_limit): def search(
self,
query: str,
params: RequestParams,
result_container: "ResultContainer",
start_time: float,
timeout_limit: float,
):
pass pass
def get_tests(self): def get_tests(self):
tests = getattr(self.engine, 'tests', None) # deprecated!
if tests is None: return {}
tests = getattr(self.engine, 'additional_tests', {})
tests.update(self.get_default_tests())
return tests
def get_default_tests(self): def get_default_tests(self):
# deprecated!
return {} return {}

View File

@ -1,26 +1,32 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``offline`` """Processors for engine-type: ``offline``"""
""" import typing as t
from .abstract import EngineProcessor, RequestParams
from .abstract import EngineProcessor if t.TYPE_CHECKING:
from searx.results import ResultContainer
class OfflineProcessor(EngineProcessor): class OfflineProcessor(EngineProcessor):
"""Processor class used by ``offline`` engines""" """Processor class used by ``offline`` engines."""
engine_type = 'offline' engine_type: str = "offline"
def _search_basic(self, query, params): def search(
return self.engine.search(query, params) self,
query: str,
def search(self, query, params, result_container, start_time, timeout_limit): params: RequestParams,
result_container: "ResultContainer",
start_time: float,
timeout_limit: float,
):
try: try:
search_results = self._search_basic(query, params) search_results = self.engine.search(query, params)
self.extend_container(result_container, start_time, search_results) self.extend_container(result_container, start_time, search_results)
except ValueError as e: except ValueError as e:
# do not record the error # do not record the error
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine.name, e))
except Exception as e: # pylint: disable=broad-except except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e) self.handle_exception(result_container, e)
self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) self.logger.exception('engine {0} : exception : {1}'.format(self.engine.name, e))

View File

@ -1,8 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online`` """Processor used for ``online`` engines."""
""" __all__ = ["OnlineProcessor", "OnlineParams"]
# pylint: disable=use-dict-literal
import typing as t
from timeit import default_timer from timeit import default_timer
import asyncio import asyncio
@ -17,50 +18,132 @@ from searx.exceptions import (
SearxEngineTooManyRequestsException, SearxEngineTooManyRequestsException,
) )
from searx.metrics.error_recorder import count_error from searx.metrics.error_recorder import count_error
from .abstract import EngineProcessor from .abstract import EngineProcessor, RequestParams
if t.TYPE_CHECKING:
from searx.search.models import SearchQuery
from searx.results import ResultContainer
from searx.result_types import EngineResults
def default_request_params(): class HTTPParams(t.TypedDict):
"""HTTP request parameters"""
method: t.Literal["GET", "POST"]
"""HTTP request method."""
headers: dict[str, str]
"""HTTP header information."""
data: dict[str, str]
"""Sending `form encoded data`_.
.. _form encoded data:
https://www.python-httpx.org/quickstart/#sending-form-encoded-data
"""
json: dict[str, t.Any]
"""`Sending `JSON encoded data`_.
.. _JSON encoded data:
https://www.python-httpx.org/quickstart/#sending-json-encoded-data
"""
content: bytes
"""`Sending `binary request data`_.
.. _binary request data:
https://www.python-httpx.org/quickstart/#sending-json-encoded-data
"""
url: str
"""Requested url."""
cookies: dict[str, str]
"""HTTP cookies."""
allow_redirects: bool
"""Follow redirects"""
max_redirects: int
"""Maximum redirects, hard limit."""
soft_max_redirects: int
"""Maximum redirects, soft limit. Record an error but don't stop the engine."""
verify: None | t.Literal[False] | str # not sure str really works
"""If not ``None``, it overrides the verify value defined in the network. Use
``False`` to accept any server certificate and use a path to file to specify a
server certificate"""
auth: str | None
"""An authentication to use when sending requests."""
raise_for_httperror: bool
"""Raise an exception if the `HTTP response status code`_ is ``>= 300``.
.. _HTTP response status code:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status
"""
class OnlineParams(HTTPParams, RequestParams):
"""Request parameters of a ``online`` engine."""
def default_request_params() -> HTTPParams:
"""Default request parameters for ``online`` engines.""" """Default request parameters for ``online`` engines."""
return { return {
# fmt: off "method": "GET",
'method': 'GET', "headers": {},
'headers': {}, "data": {},
'data': {}, "json": {},
'url': '', "content": b"",
'cookies': {}, "url": "",
'auth': None "cookies": {},
# fmt: on "allow_redirects": False,
"max_redirects": 0,
"soft_max_redirects": 0,
"auth": None,
"verify": None,
"raise_for_httperror": True,
} }
class OnlineProcessor(EngineProcessor): class OnlineProcessor(EngineProcessor):
"""Processor class for ``online`` engines.""" """Processor class for ``online`` engines."""
engine_type = 'online' engine_type: str = "online"
def initialize(self): def init_engine(self) -> bool:
"""This method is called in a thread, and before the base method is
called, the network must be set up for the ``online`` engines."""
self.init_network_in_thread(start_time=default_timer(), timeout_limit=self.engine.timeout)
return super().init_engine()
def init_network_in_thread(self, start_time: float, timeout_limit: float):
# set timeout for all HTTP requests # set timeout for all HTTP requests
searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer()) searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time)
# reset the HTTP total time # reset the HTTP total time
searx.network.reset_time_for_thread() searx.network.reset_time_for_thread()
# set the network # set the network
searx.network.set_context_network_name(self.engine_name) searx.network.set_context_network_name(self.engine.name)
super().initialize()
def get_params(self, search_query, engine_category): def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineParams | None:
"""Returns a set of :ref:`request params <engine request online>` or ``None`` """Returns a dictionary with the :ref:`request params <engine request
if request is not supported. online>` (:py:obj:`OnlineParams`), if the search condition is not
""" supported by the engine, ``None`` is returned."""
params = super().get_params(search_query, engine_category)
if params is None:
return None
# add default params base_params: RequestParams | None = super().get_params(search_query, engine_category)
params.update(default_request_params()) if base_params is None:
return base_params
params: OnlineParams = {**default_request_params(), **base_params}
headers = params["headers"]
# add an user agent # add an user agent
params['headers']['User-Agent'] = gen_useragent() headers["User-Agent"] = gen_useragent()
# add Accept-Language header # add Accept-Language header
if self.engine.send_accept_language_header and search_query.locale: if self.engine.send_accept_language_header and search_query.locale:
@ -71,73 +154,77 @@ class OnlineProcessor(EngineProcessor):
search_query.locale.territory, search_query.locale.territory,
search_query.locale.language, search_query.locale.language,
) )
params['headers']['Accept-Language'] = ac_lang headers["Accept-Language"] = ac_lang
self.logger.debug('HTTP Accept-Language: %s', params['headers'].get('Accept-Language', '')) self.logger.debug("HTTP Accept-Language: %s", headers.get("Accept-Language", ""))
return params return params
def _send_http_request(self, params): def _send_http_request(self, params: OnlineParams):
# create dictionary which contain all
# information about the request
request_args = dict(headers=params['headers'], cookies=params['cookies'], auth=params['auth'])
# verify # create dictionary which contain all information about the request
# if not None, it overrides the verify value defined in the network. request_args: dict[str, t.Any] = {
# use False to accept any server certificate "headers": params["headers"],
# use a path to file to specify a server certificate "cookies": params["cookies"],
verify = params.get('verify') "auth": params["auth"],
}
verify = params.get("verify")
if verify is not None: if verify is not None:
request_args['verify'] = params['verify'] request_args["verify"] = verify
# max_redirects # max_redirects
max_redirects = params.get('max_redirects') max_redirects = params.get("max_redirects")
if max_redirects: if max_redirects:
request_args['max_redirects'] = max_redirects request_args["max_redirects"] = max_redirects
# allow_redirects # allow_redirects
if 'allow_redirects' in params: if "allow_redirects" in params:
request_args['allow_redirects'] = params['allow_redirects'] request_args["allow_redirects"] = params["allow_redirects"]
# soft_max_redirects # soft_max_redirects
soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0) soft_max_redirects: int = params.get("soft_max_redirects", max_redirects or 0)
# raise_for_status # raise_for_status
request_args['raise_for_httperror'] = params.get('raise_for_httperror', True) request_args["raise_for_httperror"] = params.get("raise_for_httperror", True)
# specific type of request (GET or POST) # specific type of request (GET or POST)
if params['method'] == 'GET': if params["method"] == "GET":
req = searx.network.get req = searx.network.get
else: else:
req = searx.network.post req = searx.network.post
if params["data"]:
request_args['data'] = params['data'] request_args["data"] = params["data"]
if params["json"]:
request_args["json"] = params["json"]
if params["content"]:
request_args["content"] = params["content"]
# send the request # send the request
response = req(params['url'], **request_args) response = req(params["url"], **request_args)
# check soft limit of the redirect count # check soft limit of the redirect count
if len(response.history) > soft_max_redirects: if len(response.history) > soft_max_redirects:
# unexpected redirect : record an error # unexpected redirect : record an error
# but the engine might still return valid results. # but the engine might still return valid results.
status_code = str(response.status_code or '') status_code = str(response.status_code or "")
reason = response.reason_phrase or '' reason = response.reason_phrase or ""
hostname = response.url.host hostname = response.url.host
count_error( count_error(
self.engine_name, self.engine.name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), "{} redirects, maximum: {}".format(len(response.history), soft_max_redirects),
(status_code, reason, hostname), (status_code, reason, hostname),
secondary=True, secondary=True,
) )
return response return response
def _search_basic(self, query, params): def _search_basic(self, query: str, params: OnlineParams) -> "EngineResults|None":
# update request parameters dependent on # update request parameters dependent on
# search-engine (contained in engines folder) # search-engine (contained in engines folder)
self.engine.request(query, params) self.engine.request(query, params)
# ignoring empty urls # ignoring empty urls
if not params['url']: if not params["url"]:
return None return None
# send request # send request
@ -147,13 +234,15 @@ class OnlineProcessor(EngineProcessor):
response.search_params = params response.search_params = params
return self.engine.response(response) return self.engine.response(response)
def search(self, query, params, result_container, start_time, timeout_limit): def search( # pyright: ignore[reportIncompatibleMethodOverride]
# set timeout for all HTTP requests self,
searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time) query: str,
# reset the HTTP total time params: OnlineParams,
searx.network.reset_time_for_thread() result_container: "ResultContainer",
# set the network start_time: float,
searx.network.set_context_network_name(self.engine_name) timeout_limit: float,
):
self.init_network_in_thread(start_time, timeout_limit)
try: try:
# send requests and parse the results # send requests and parse the results
@ -162,7 +251,7 @@ class OnlineProcessor(EngineProcessor):
except ssl.SSLError as e: except ssl.SSLError as e:
# requests timeout (connect or read) # requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine_name).verify)) self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine.name).verify))
except (httpx.TimeoutException, asyncio.TimeoutError) as e: except (httpx.TimeoutException, asyncio.TimeoutError) as e:
# requests timeout (connect or read) # requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
@ -179,55 +268,13 @@ class OnlineProcessor(EngineProcessor):
default_timer() - start_time, timeout_limit, e default_timer() - start_time, timeout_limit, e
) )
) )
except SearxEngineCaptchaException as e: except (
SearxEngineCaptchaException,
SearxEngineTooManyRequestsException,
SearxEngineAccessDeniedException,
) as e:
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
self.logger.exception('CAPTCHA') self.logger.exception(e.message)
except SearxEngineTooManyRequestsException as e:
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('Too many requests')
except SearxEngineAccessDeniedException as e:
self.handle_exception(result_container, e, suspend=True)
self.logger.exception('SearXNG is blocked')
except Exception as e: # pylint: disable=broad-except except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e) self.handle_exception(result_container, e)
self.logger.exception('exception : {0}'.format(e)) self.logger.exception("exception : {0}".format(e))
def get_default_tests(self):
tests = {}
tests['simple'] = {
'matrix': {'query': ('life', 'computer')},
'result_container': ['not_empty'],
}
if getattr(self.engine, 'paging', False):
tests['paging'] = {
'matrix': {'query': 'time', 'pageno': (1, 2, 3)},
'result_container': ['not_empty'],
'test': ['unique_results'],
}
if 'general' in self.engine.categories:
# avoid documentation about HTML tags (<time> and <input type="time">)
tests['paging']['matrix']['query'] = 'news'
if getattr(self.engine, 'time_range', False):
tests['time_range'] = {
'matrix': {'query': 'news', 'time_range': (None, 'day')},
'result_container': ['not_empty'],
'test': ['unique_results'],
}
if getattr(self.engine, 'traits', False):
tests['lang_fr'] = {
'matrix': {'query': 'paris', 'lang': 'fr'},
'result_container': ['not_empty', ('has_language', 'fr')],
}
tests['lang_en'] = {
'matrix': {'query': 'paris', 'lang': 'en'},
'result_container': ['not_empty', ('has_language', 'en')],
}
if getattr(self.engine, 'safesearch', False):
tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']}
return tests

View File

@ -1,42 +1,71 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online_currency`` """Processor used for ``online_currency`` engines."""
""" import typing as t
import unicodedata import unicodedata
import re import re
import flask_babel
import babel
from searx.data import CURRENCIES from searx.data import CURRENCIES
from .online import OnlineProcessor from .online import OnlineProcessor, OnlineParams
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) if t.TYPE_CHECKING:
from .abstract import EngineProcessor
from searx.search.models import SearchQuery
def normalize_name(name: str): search_syntax = re.compile(r".*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)", re.I)
name = name.strip() """Search syntax used for from/to currency (e.g. ``10 usd to eur``)"""
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower() class CurrenciesParams(t.TypedDict):
"""Currencies request parameters."""
amount: float
"""Currency amount to be converted"""
to_iso4217: str
"""ISO_4217_ alpha code of the currency used as the basis for conversion.
.. _ISO_4217: https://en.wikipedia.org/wiki/ISO_4217
"""
from_iso4217: str
"""ISO_4217_ alpha code of the currency to be converted."""
from_name: str
"""Name of the currency used as the basis for conversion."""
to_name: str
"""Name of the currency of the currency to be converted."""
class OnlineCurrenciesParams(CurrenciesParams, OnlineParams): # pylint: disable=duplicate-bases
"""Request parameters of a ``online_currency`` engine."""
class OnlineCurrencyProcessor(OnlineProcessor): class OnlineCurrencyProcessor(OnlineProcessor):
"""Processor class used by ``online_currency`` engines.""" """Processor class used by ``online_currency`` engines."""
engine_type = 'online_currency' engine_type: str = "online_currency"
def initialize(self): def initialize(self, callback: t.Callable[["EngineProcessor", bool], bool]):
CURRENCIES.init() CURRENCIES.init()
super().initialize() super().initialize(callback)
def get_params(self, search_query, engine_category): def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineCurrenciesParams | None:
"""Returns a set of :ref:`request params <engine request online_currency>` """Returns a dictionary with the :ref:`request params <engine request
or ``None`` if search query does not match to :py:obj:`parser_re`.""" online_currency>` (:py:obj:`OnlineCurrenciesParams`). ``None`` is
returned if the search query does not match :py:obj:`search_syntax`."""
params = super().get_params(search_query, engine_category) online_params: OnlineParams | None = super().get_params(search_query, engine_category)
if params is None:
if online_params is None:
return None return None
m = search_syntax.match(search_query.query)
m = parser_re.match(search_query.query)
if not m: if not m:
return None return None
@ -46,22 +75,46 @@ class OnlineCurrencyProcessor(OnlineProcessor):
except ValueError: except ValueError:
return None return None
from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency)) # most often $ stands for USD
to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency)) if from_currency == "$":
from_currency = "$ us"
params['amount'] = amount if to_currency == "$":
params['from'] = from_currency to_currency = "$ us"
params['to'] = to_currency
params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en")
params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en")
return params
def get_default_tests(self): from_iso4217 = from_currency
tests = {} if not CURRENCIES.is_iso4217(from_iso4217):
from_iso4217 = CURRENCIES.name_to_iso4217(_normalize_name(from_currency))
tests['currency'] = { to_iso4217 = to_currency
'matrix': {'query': '1337 usd in rmb'}, if not CURRENCIES.is_iso4217(to_iso4217):
'result_container': ['has_answer'], to_iso4217 = CURRENCIES.name_to_iso4217(_normalize_name(to_currency))
if from_iso4217 is None or to_iso4217 is None:
return None
ui_locale = flask_babel.get_locale() or babel.Locale.parse("en")
from_name: str = CURRENCIES.iso4217_to_name(
from_iso4217, ui_locale.language
) # pyright: ignore[reportAssignmentType]
to_name: str = CURRENCIES.iso4217_to_name(
to_iso4217, ui_locale.language
) # pyright: ignore[reportAssignmentType]
params: OnlineCurrenciesParams = {
**online_params,
"amount": amount,
"from_iso4217": from_iso4217,
"to_iso4217": to_iso4217,
"from_name": from_name,
"to_name": to_name,
} }
return tests return params
def _normalize_name(name: str):
name = name.strip()
name = name.lower().replace("-", " ")
name = re.sub(" +", " ", name)
return unicodedata.normalize("NFKD", name).lower()

View File

@ -1,60 +1,102 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online_dictionary`` """Processor used for ``online_dictionary`` engines."""
"""
import typing as t
import re import re
from searx.utils import is_valid_lang from searx.sxng_locales import sxng_locales
from .online import OnlineProcessor from .online import OnlineProcessor, OnlineParams
parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.+)$', re.I) if t.TYPE_CHECKING:
from searx.search.models import SearchQuery
search_syntax = re.compile(r".*?([a-z]+)-([a-z]+) (.+)$", re.I)
"""Search syntax used for from/to language (e.g. ``en-de``)"""
FromToType: t.TypeAlias = tuple[bool, str, str]
"""Type of a language descriptions in the context of a ``online_dictionary``."""
class DictParams(t.TypedDict):
"""Dictionary request parameters."""
from_lang: FromToType
"""Language from which is to be translated."""
to_lang: FromToType
"""Language to translate into."""
query: str
"""Search term, cleaned of search syntax (*from-to* has been removed)."""
class OnlineDictParams(DictParams, OnlineParams): # pylint: disable=duplicate-bases
"""Request parameters of a ``online_dictionary`` engine."""
class OnlineDictionaryProcessor(OnlineProcessor): class OnlineDictionaryProcessor(OnlineProcessor):
"""Processor class used by ``online_dictionary`` engines.""" """Processor class for ``online_dictionary`` engines."""
engine_type = 'online_dictionary' engine_type: str = "online_dictionary"
def get_params(self, search_query, engine_category): def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineDictParams | None:
"""Returns a set of :ref:`request params <engine request online_dictionary>` or """Returns a dictionary with the :ref:`request params <engine request
``None`` if search query does not match to :py:obj:`parser_re`. online_dictionary>` (:py:obj:`OnlineDictParams`). ``None`` is returned
""" if the search query does not match :py:obj:`search_syntax`."""
params = super().get_params(search_query, engine_category)
if params is None: online_params: OnlineParams | None = super().get_params(search_query, engine_category)
if online_params is None:
return None return None
m = search_syntax.match(search_query.query)
m = parser_re.match(search_query.query)
if not m: if not m:
return None return None
from_lang, to_lang, query = m.groups() from_lang, to_lang, query = m.groups()
from_lang = _get_lang_descr(from_lang)
from_lang = is_valid_lang(from_lang) to_lang = _get_lang_descr(to_lang)
to_lang = is_valid_lang(to_lang)
if not from_lang or not to_lang: if not from_lang or not to_lang:
return None return None
params['from_lang'] = from_lang params: OnlineDictParams = {
params['to_lang'] = to_lang **online_params,
params['query'] = query "from_lang": from_lang,
"to_lang": to_lang,
"query": query,
}
return params return params
def get_default_tests(self):
tests = {}
if getattr(self.engine, 'paging', False): def _get_lang_descr(lang: str) -> FromToType | None:
tests['translation_paging'] = { """Returns language's code and language's english name if argument ``lang``
'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)}, describes a language known by SearXNG, otherwise ``None``.
'result_container': ['not_empty', ('one_title_contains', 'house')],
'test': ['unique_results'],
}
else:
tests['translation'] = {
'matrix': {'query': 'en-es house'},
'result_container': ['not_empty', ('one_title_contains', 'house')],
}
return tests Examples:
.. code:: python
>>> _get_lang_descr("zz")
None
>>> _get_lang_descr("uk")
(True, "uk", "ukrainian")
>>> _get_lang_descr(b"uk")
(True, "uk", "ukrainian")
>>> _get_lang_descr("en")
(True, "en", "english")
>>> _get_lang_descr("Español")
(True, "es", "spanish")
>>> _get_lang_descr("Spanish")
(True, "es", "spanish")
"""
lang = lang.lower()
is_abbr = len(lang) == 2
if is_abbr:
for l in sxng_locales:
if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return None
for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return None

View File

@ -1,45 +1,64 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Processors for engine-type: ``online_url_search`` """Processor used for ``online_url_search`` engines."""
"""
import typing as t
import re import re
from .online import OnlineProcessor
re_search_urls = { from .online import OnlineProcessor, OnlineParams
'http': re.compile(r'https?:\/\/[^ ]*'),
'ftp': re.compile(r'ftps?:\/\/[^ ]*'), if t.TYPE_CHECKING:
'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'), from .abstract import EngineProcessor
from searx.search.models import SearchQuery
search_syntax = {
"http": re.compile(r"https?:\/\/[^ ]*"),
"ftp": re.compile(r"ftps?:\/\/[^ ]*"),
"data:image": re.compile("data:image/[^; ]*;base64,[^ ]*"),
} }
"""Search syntax used for a URL search."""
class UrlParams(t.TypedDict):
"""URL request parameters."""
search_urls: dict[str, str | None]
class OnlineUrlSearchParams(UrlParams, OnlineParams): # pylint: disable=duplicate-bases
"""Request parameters of a ``online_url_search`` engine."""
class OnlineUrlSearchProcessor(OnlineProcessor): class OnlineUrlSearchProcessor(OnlineProcessor):
"""Processor class used by ``online_url_search`` engines.""" """Processor class used by ``online_url_search`` engines."""
engine_type = 'online_url_search' engine_type: str = "online_url_search"
def get_params(self, search_query, engine_category): def get_params(self, search_query: "SearchQuery", engine_category: str) -> OnlineUrlSearchParams | None:
"""Returns a set of :ref:`request params <engine request online>` or ``None`` if """Returns a dictionary with the :ref:`request params <engine request
search query does not match to :py:obj:`re_search_urls`. online_currency>` (:py:obj:`OnlineUrlSearchParams`). ``None`` is
""" returned if the search query does not match :py:obj:`search_syntax`."""
params = super().get_params(search_query, engine_category) online_params: OnlineParams | None = super().get_params(search_query, engine_category)
if params is None: if online_params is None:
return None return None
url_match = False search_urls: dict[str, str | None] = {}
search_urls = {} has_match: bool = False
for k, v in re_search_urls.items(): for url_schema, url_re in search_syntax.items():
m = v.search(search_query.query) search_urls[url_schema] = None
v = None m = url_re.search(search_query.query)
if m: if m:
url_match = True has_match = True
v = m[0] search_urls[url_schema] = m[0]
search_urls[k] = v
if not url_match: if not has_match:
return None return None
params['search_urls'] = search_urls params: OnlineUrlSearchParams = {
**online_params,
"search_urls": search_urls,
}
return params return params

View File

@ -718,7 +718,6 @@ engines:
- name: currency - name: currency
engine: currency_convert engine: currency_convert
categories: general
shortcut: cc shortcut: cc
- name: deezer - name: deezer

View File

@ -410,38 +410,6 @@ def int_or_zero(num: list[str] | str) -> int:
return convert_str_to_int(num) return convert_str_to_int(num)
def is_valid_lang(lang: str) -> tuple[bool, str, str] | None:
"""Return language code and name if lang describe a language.
Examples:
>>> is_valid_lang('zz')
None
>>> is_valid_lang('uk')
(True, 'uk', 'ukrainian')
>>> is_valid_lang(b'uk')
(True, 'uk', 'ukrainian')
>>> is_valid_lang('en')
(True, 'en', 'english')
>>> searx.utils.is_valid_lang('Español')
(True, 'es', 'spanish')
>>> searx.utils.is_valid_lang('Spanish')
(True, 'es', 'spanish')
"""
if isinstance(lang, bytes):
lang = lang.decode()
is_abbr = len(lang) == 2
lang = lang.lower()
if is_abbr:
for l in sxng_locales:
if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return None
for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return None
def load_module(filename: str, module_dir: str) -> types.ModuleType: def load_module(filename: str, module_dir: str) -> types.ModuleType:
modname = splitext(filename)[0] modname = splitext(filename)[0]
modpath = join(module_dir, filename) modpath = join(module_dir, filename)

View File

@ -48,6 +48,7 @@ import searx.query
import searx.search import searx.search
import searx.search.models import searx.search.models
import searx.webadapter import searx.webadapter
from searx.search.processors import PROCESSORS
EngineCategoriesVar = Optional[List[str]] EngineCategoriesVar = Optional[List[str]]
@ -172,7 +173,7 @@ if __name__ == '__main__':
searx.search.initialize_network(settings_engines, searx.settings['outgoing']) searx.search.initialize_network(settings_engines, searx.settings['outgoing'])
searx.search.check_network_configuration() searx.search.check_network_configuration()
searx.search.initialize_metrics([engine['name'] for engine in settings_engines]) searx.search.initialize_metrics([engine['name'] for engine in settings_engines])
searx.search.initialize_processors(settings_engines) PROCESSORS.init(settings_engines)
search_q = get_search_query(prog_args, engine_categories=engine_cs) search_q = get_search_query(prog_args, engine_categories=engine_cs)
res_dict = to_dict(search_q) res_dict = to_dict(search_q)
sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial)) sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial))

View File

@ -20,7 +20,7 @@ class TestOnlineProcessor(SearxTestCase):
def test_get_params_default_params(self): def test_get_params_default_params(self):
engine = engines.engines[TEST_ENGINE_NAME] engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME) online_processor = online.OnlineProcessor(engine)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None) search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
params = self._get_params(online_processor, search_query, 'general') params = self._get_params(online_processor, search_query, 'general')
self.assertIn('method', params) self.assertIn('method', params)
@ -32,7 +32,7 @@ class TestOnlineProcessor(SearxTestCase):
def test_get_params_useragent(self): def test_get_params_useragent(self):
engine = engines.engines[TEST_ENGINE_NAME] engine = engines.engines[TEST_ENGINE_NAME]
online_processor = online.OnlineProcessor(engine, TEST_ENGINE_NAME) online_processor = online.OnlineProcessor(engine)
search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None) search_query = SearchQuery('test', [EngineRef(TEST_ENGINE_NAME, 'general')], 'all', 0, 1, None, None, None)
params = self._get_params(online_processor, search_query, 'general') params = self._get_params(online_processor, search_query, 'general')
self.assertIn('User-Agent', params['headers']) self.assertIn('User-Agent', params['headers'])

View File

@ -24,7 +24,7 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
def dummy(*args, **kwargs): # pylint: disable=unused-argument def dummy(*args, **kwargs): # pylint: disable=unused-argument
pass pass
self.setattr4test(searx.search.processors, 'initialize_processor', dummy) self.setattr4test(searx.search.processors.PROCESSORS, 'init', dummy)
# set some defaults # set some defaults
test_results = [ test_results = [