mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 15:52:31 -04:00 
			
		
		
		
	Merge pull request #94 from return42/pylint-network
Pylint searx.network
This commit is contained in:
		
						commit
						7c76cef746
					
				| @ -1,4 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| # pylint: disable=missing-module-docstring, missing-function-docstring, global-statement | ||||
| 
 | ||||
| import asyncio | ||||
| import threading | ||||
| @ -31,29 +33,33 @@ except ImportError: | ||||
|             self._count.release() | ||||
| 
 | ||||
|         def get(self): | ||||
|             if not self._count.acquire(True): | ||||
|             if not self._count.acquire(True):  #pylint: disable=consider-using-with | ||||
|                 raise Empty | ||||
|             return self._queue.popleft() | ||||
| 
 | ||||
| 
 | ||||
| THREADLOCAL = threading.local() | ||||
| 
 | ||||
| """Thread-local data is data for thread specific values.""" | ||||
| 
 | ||||
| def reset_time_for_thread(): | ||||
|     global THREADLOCAL | ||||
|     THREADLOCAL.total_time = 0 | ||||
| 
 | ||||
| 
 | ||||
| def get_time_for_thread(): | ||||
|     """returns thread's total time or None""" | ||||
|     global THREADLOCAL | ||||
|     return THREADLOCAL.__dict__.get('total_time') | ||||
| 
 | ||||
| 
 | ||||
| def set_timeout_for_thread(timeout, start_time=None): | ||||
|     global THREADLOCAL | ||||
|     THREADLOCAL.timeout = timeout | ||||
|     THREADLOCAL.start_time = start_time | ||||
| 
 | ||||
| 
 | ||||
| def set_context_network_name(network_name): | ||||
|     global THREADLOCAL | ||||
|     THREADLOCAL.network = get_network(network_name) | ||||
| 
 | ||||
| 
 | ||||
| @ -62,11 +68,13 @@ def get_context_network(): | ||||
| 
 | ||||
|     If unset, return value from :py:obj:`get_network`. | ||||
|     """ | ||||
|     global THREADLOCAL | ||||
|     return THREADLOCAL.__dict__.get('network') or get_network() | ||||
| 
 | ||||
| 
 | ||||
| def request(method, url, **kwargs): | ||||
|     """same as requests/requests/api.py request(...)""" | ||||
|     global THREADLOCAL | ||||
|     time_before_request = default_timer() | ||||
| 
 | ||||
|     # timeout (httpx) | ||||
| @ -153,18 +161,17 @@ def patch(url, data=None, **kwargs): | ||||
| def delete(url, **kwargs): | ||||
|     return request('delete', url, **kwargs) | ||||
| 
 | ||||
| 
 | ||||
| async def stream_chunk_to_queue(network, q, method, url, **kwargs): | ||||
| async def stream_chunk_to_queue(network, queue, method, url, **kwargs): | ||||
|     try: | ||||
|         async with network.stream(method, url, **kwargs) as response: | ||||
|             q.put(response) | ||||
|             queue.put(response) | ||||
|             async for chunk in response.aiter_bytes(65536): | ||||
|                 if len(chunk) > 0: | ||||
|                     q.put(chunk) | ||||
|                     queue.put(chunk) | ||||
|     except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e: | ||||
|         q.put(e) | ||||
|         queue.put(e) | ||||
|     finally: | ||||
|         q.put(None) | ||||
|         queue.put(None) | ||||
| 
 | ||||
| 
 | ||||
| def stream(method, url, **kwargs): | ||||
| @ -179,13 +186,15 @@ def stream(method, url, **kwargs): | ||||
|     httpx.Client.stream requires to write the httpx.HTTPTransport version of the | ||||
|     the httpx.AsyncHTTPTransport declared above. | ||||
|     """ | ||||
|     q = SimpleQueue() | ||||
|     future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(get_network(), q, method, url, **kwargs), | ||||
|                                               get_loop()) | ||||
|     chunk_or_exception = q.get() | ||||
|     queue = SimpleQueue() | ||||
|     future = asyncio.run_coroutine_threadsafe( | ||||
|         stream_chunk_to_queue(get_network(), queue, method, url, **kwargs), | ||||
|         get_loop() | ||||
|     ) | ||||
|     chunk_or_exception = queue.get() | ||||
|     while chunk_or_exception is not None: | ||||
|         if isinstance(chunk_or_exception, Exception): | ||||
|             raise chunk_or_exception | ||||
|         yield chunk_or_exception | ||||
|         chunk_or_exception = q.get() | ||||
|         chunk_or_exception = queue.get() | ||||
|     return future.result() | ||||
|  | ||||
| @ -1,14 +1,19 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| # pylint: disable=missing-module-docstring, missing-function-docstring, global-statement | ||||
| 
 | ||||
| import asyncio | ||||
| import logging | ||||
| import threading | ||||
| 
 | ||||
| import httpcore | ||||
| import httpx | ||||
| from httpx_socks import AsyncProxyTransport | ||||
| from python_socks import parse_proxy_url | ||||
| import python_socks._errors | ||||
| from python_socks import ( | ||||
|     parse_proxy_url, | ||||
|     ProxyConnectionError, | ||||
|     ProxyTimeoutError, | ||||
|     ProxyError | ||||
| ) | ||||
| 
 | ||||
| from searx import logger | ||||
| 
 | ||||
| @ -30,7 +35,11 @@ TRANSPORT_KWARGS = { | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): | ||||
| # pylint: disable=protected-access | ||||
| async def close_connections_for_url( | ||||
|         connection_pool: httpcore.AsyncConnectionPool, | ||||
|         url: httpcore._utils.URL ): | ||||
| 
 | ||||
|     origin = httpcore._utils.url_to_origin(url) | ||||
|     logger.debug('Drop connections for %r', origin) | ||||
|     connections_to_close = connection_pool._connections_for_origin(origin) | ||||
| @ -40,6 +49,7 @@ async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPoo | ||||
|             await connection.aclose() | ||||
|         except httpcore.NetworkError as e: | ||||
|             logger.warning('Error closing an existing connection', exc_info=e) | ||||
| # pylint: enable=protected-access | ||||
| 
 | ||||
| 
 | ||||
| def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False): | ||||
| @ -80,9 +90,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): | ||||
|             retry -= 1 | ||||
|             try: | ||||
|                 return await super().arequest(method, url, headers, stream, ext) | ||||
|             except (python_socks._errors.ProxyConnectionError, | ||||
|                     python_socks._errors.ProxyTimeoutError, | ||||
|                     python_socks._errors.ProxyError) as e: | ||||
|             except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e: | ||||
|                 raise httpcore.ProxyError(e) | ||||
|             except OSError as e: | ||||
|                 # socket.gaierror when DNS resolution fails | ||||
| @ -114,7 +122,7 @@ class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): | ||||
|             except httpcore.CloseError as e: | ||||
|                 # httpcore.CloseError: [Errno 104] Connection reset by peer | ||||
|                 # raised by _keepalive_sweep() | ||||
|                 #   from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198  # noqa | ||||
|                 #   from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198  # pylint: disable=line-too-long | ||||
|                 await close_connections_for_url(self._pool, url) | ||||
|                 logger.warning('httpcore.CloseError: retry', exc_info=e) | ||||
|                 # retry | ||||
| @ -129,6 +137,7 @@ class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): | ||||
| 
 | ||||
| 
 | ||||
| def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries): | ||||
|     global TRANSPORT_KWARGS | ||||
|     # support socks5h (requests compatibility): | ||||
|     # https://requests.readthedocs.io/en/master/user/advanced/#socks | ||||
|     # socks5://   hostname is resolved on client side | ||||
| @ -141,7 +150,8 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit | ||||
| 
 | ||||
|     proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) | ||||
|     verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify | ||||
|     return AsyncProxyTransportFixed(proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port, | ||||
|     return AsyncProxyTransportFixed( | ||||
|         proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port, | ||||
|         username=proxy_username, password=proxy_password, | ||||
|         rdns=rdns, | ||||
|         loop=get_loop(), | ||||
| @ -152,18 +162,23 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit | ||||
|         max_keepalive_connections=limit.max_keepalive_connections, | ||||
|         keepalive_expiry=limit.keepalive_expiry, | ||||
|         retries=retries, | ||||
|                                     **TRANSPORT_KWARGS) | ||||
|         **TRANSPORT_KWARGS | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def get_transport(verify, http2, local_address, proxy_url, limit, retries): | ||||
|     global TRANSPORT_KWARGS | ||||
|     verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify | ||||
|     return AsyncHTTPTransportFixed(verify=verify, | ||||
|     return AsyncHTTPTransportFixed( | ||||
|         # pylint: disable=protected-access | ||||
|         verify=verify, | ||||
|         http2=http2, | ||||
|         local_address=local_address, | ||||
|         proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, | ||||
|         limits=limit, | ||||
|         retries=retries, | ||||
|                                    **TRANSPORT_KWARGS) | ||||
|         **TRANSPORT_KWARGS | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def iter_proxies(proxies): | ||||
| @ -175,24 +190,32 @@ def iter_proxies(proxies): | ||||
|             yield pattern, proxy_url | ||||
| 
 | ||||
| 
 | ||||
| def new_client(enable_http, verify, enable_http2, | ||||
| def new_client( | ||||
|         # pylint: disable=too-many-arguments | ||||
|         enable_http, verify, enable_http2, | ||||
|         max_connections, max_keepalive_connections, keepalive_expiry, | ||||
|                proxies, local_address, retries, max_redirects): | ||||
|     limit = httpx.Limits(max_connections=max_connections, | ||||
|         proxies, local_address, retries, max_redirects  ): | ||||
|     limit = httpx.Limits( | ||||
|         max_connections=max_connections, | ||||
|         max_keepalive_connections=max_keepalive_connections, | ||||
|                          keepalive_expiry=keepalive_expiry) | ||||
|         keepalive_expiry=keepalive_expiry | ||||
|     ) | ||||
|     # See https://www.python-httpx.org/advanced/#routing | ||||
|     mounts = {} | ||||
|     for pattern, proxy_url in iter_proxies(proxies): | ||||
|         if not enable_http and (pattern == 'http' or pattern.startswith('http://')): | ||||
|             continue | ||||
|         if proxy_url.startswith('socks4://') \ | ||||
|            or proxy_url.startswith('socks5://') \ | ||||
|            or proxy_url.startswith('socks5h://'): | ||||
|             mounts[pattern] = get_transport_for_socks_proxy(verify, enable_http2, local_address, proxy_url, limit, | ||||
|                                                             retries) | ||||
|         if (proxy_url.startswith('socks4://') | ||||
|            or proxy_url.startswith('socks5://') | ||||
|             or proxy_url.startswith('socks5h://') | ||||
|         ): | ||||
|             mounts[pattern] = get_transport_for_socks_proxy( | ||||
|                 verify, enable_http2, local_address, proxy_url, limit, retries | ||||
|             ) | ||||
|         else: | ||||
|             mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries) | ||||
|             mounts[pattern] = get_transport( | ||||
|                 verify, enable_http2, local_address, proxy_url, limit, retries | ||||
|             ) | ||||
| 
 | ||||
|     if not enable_http: | ||||
|         mounts['http://'] = AsyncHTTPTransportNoHttp() | ||||
| @ -217,12 +240,12 @@ def init(): | ||||
|         LOOP = asyncio.new_event_loop() | ||||
|         LOOP.run_forever() | ||||
| 
 | ||||
|     th = threading.Thread( | ||||
|     thread = threading.Thread( | ||||
|         target=loop_thread, | ||||
|         name='asyncio_loop', | ||||
|         daemon=True, | ||||
|     ) | ||||
|     th.start() | ||||
|     thread.start() | ||||
| 
 | ||||
| 
 | ||||
| init() | ||||
|  | ||||
| @ -1,4 +1,7 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| # pylint: disable=global-statement | ||||
| # pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring | ||||
| 
 | ||||
| import atexit | ||||
| import asyncio | ||||
| @ -34,12 +37,16 @@ ADDRESS_MAPPING = { | ||||
| 
 | ||||
| class Network: | ||||
| 
 | ||||
|     __slots__ = ('enable_http', 'verify', 'enable_http2', | ||||
|     __slots__ = ( | ||||
|         'enable_http', 'verify', 'enable_http2', | ||||
|         'max_connections', 'max_keepalive_connections', 'keepalive_expiry', | ||||
|         'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error', | ||||
|                  '_local_addresses_cycle', '_proxies_cycle', '_clients') | ||||
|         '_local_addresses_cycle', '_proxies_cycle', '_clients' | ||||
|     ) | ||||
| 
 | ||||
|     def __init__(self, | ||||
|     def __init__( | ||||
|             # pylint: disable=too-many-arguments | ||||
|             self, | ||||
|             enable_http=True, | ||||
|             verify=True, | ||||
|             enable_http2=False, | ||||
| @ -50,7 +57,8 @@ class Network: | ||||
|             local_addresses=None, | ||||
|             retries=0, | ||||
|             retry_on_http_error=None, | ||||
|                  max_redirects=30): | ||||
|             max_redirects=30 ): | ||||
| 
 | ||||
|         self.enable_http = enable_http | ||||
|         self.verify = verify | ||||
|         self.enable_http2 = enable_http2 | ||||
| @ -81,7 +89,7 @@ class Network: | ||||
|         local_addresses = self.local_addresses | ||||
|         if not local_addresses: | ||||
|             return | ||||
|         elif isinstance(local_addresses, str): | ||||
|         if isinstance(local_addresses, str): | ||||
|             local_addresses = [local_addresses] | ||||
|         for address in local_addresses: | ||||
|             yield address | ||||
| @ -119,6 +127,7 @@ class Network: | ||||
|         for pattern, proxy_urls in self.iter_proxies(): | ||||
|             proxy_settings[pattern] = cycle(proxy_urls) | ||||
|         while True: | ||||
|             # pylint: disable=stop-iteration-return | ||||
|             yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items()) | ||||
| 
 | ||||
|     def get_client(self, verify=None, max_redirects=None): | ||||
| @ -128,7 +137,8 @@ class Network: | ||||
|         proxies = next(self._proxies_cycle)  # is a tuple so it can be part of the key | ||||
|         key = (verify, max_redirects, local_address, proxies) | ||||
|         if key not in self._clients or self._clients[key].is_closed: | ||||
|             self._clients[key] = new_client(self.enable_http, | ||||
|             self._clients[key] = new_client( | ||||
|                 self.enable_http, | ||||
|                 verify, | ||||
|                 self.enable_http2, | ||||
|                 self.max_connections, | ||||
| @ -137,7 +147,8 @@ class Network: | ||||
|                 dict(proxies), | ||||
|                 local_address, | ||||
|                 0, | ||||
|                                             max_redirects) | ||||
|                 max_redirects | ||||
|             ) | ||||
|         return self._clients[key] | ||||
| 
 | ||||
|     async def aclose(self): | ||||
| @ -158,9 +169,11 @@ class Network: | ||||
|         return kwargs_clients | ||||
| 
 | ||||
|     def is_valid_respones(self, response): | ||||
|         if (self.retry_on_http_error is True and 400 <= response.status_code <= 599) \ | ||||
|            or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) \ | ||||
|            or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error): | ||||
|         # pylint: disable=too-many-boolean-expressions | ||||
|         if ((self.retry_on_http_error is True and 400 <= response.status_code <= 599) | ||||
|             or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) | ||||
|             or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error) | ||||
|         ): | ||||
|             return False | ||||
|         return True | ||||
| 
 | ||||
| @ -194,6 +207,7 @@ class Network: | ||||
| 
 | ||||
|     @classmethod | ||||
|     async def aclose_all(cls): | ||||
|         global NETWORKS | ||||
|         await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False) | ||||
| 
 | ||||
| 
 | ||||
| @ -203,8 +217,10 @@ def get_network(name=None): | ||||
| 
 | ||||
| 
 | ||||
| def initialize(settings_engines=None, settings_outgoing=None): | ||||
|     # pylint: disable=import-outside-toplevel) | ||||
|     from searx.engines import engines | ||||
|     from searx import settings | ||||
|     # pylint: enable=import-outside-toplevel) | ||||
| 
 | ||||
|     global NETWORKS | ||||
| 
 | ||||
| @ -212,7 +228,7 @@ def initialize(settings_engines=None, settings_outgoing=None): | ||||
|     settings_outgoing = settings_outgoing or settings.get('outgoing') | ||||
| 
 | ||||
|     # default parameters for AsyncHTTPTransport | ||||
|     # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121  # noqa | ||||
|     # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121  # pylint: disable=line-too-long | ||||
|     default_params = { | ||||
|         'enable_http': False, | ||||
|         'verify': True, | ||||
| @ -290,6 +306,7 @@ def done(): | ||||
|     Note: since Network.aclose has to be async, it is not possible to call this method on Network.__del__ | ||||
|     So Network.aclose is called here using atexit.register | ||||
|     """ | ||||
|     global NETWORKS | ||||
|     try: | ||||
|         loop = get_loop() | ||||
|         if loop: | ||||
|  | ||||
| @ -1,17 +1,23 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Raise exception for an HTTP response is an error. | ||||
| """ | ||||
| from searx.exceptions import (SearxEngineCaptchaException, SearxEngineTooManyRequestsException, | ||||
|                               SearxEngineAccessDeniedException) | ||||
| # lint: pylint | ||||
| # pylint: disable=missing-function-docstring | ||||
| """Raise exception for an HTTP response is an error. | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from searx.exceptions import ( | ||||
|     SearxEngineCaptchaException, | ||||
|     SearxEngineTooManyRequestsException, | ||||
|     SearxEngineAccessDeniedException, | ||||
| ) | ||||
| 
 | ||||
| def is_cloudflare_challenge(resp): | ||||
|     if resp.status_code in [429, 503]: | ||||
|         if ('__cf_chl_jschl_tk__=' in resp.text)\ | ||||
|         if (('__cf_chl_jschl_tk__=' in resp.text) | ||||
|             or ('/cdn-cgi/challenge-platform/' in resp.text | ||||
|                 and 'orchestrate/jsch/v1' in resp.text | ||||
|                and 'window._cf_chl_enter(' in resp.text): | ||||
|                 and 'window._cf_chl_enter(' in resp.text | ||||
|             )): | ||||
|             return True | ||||
|     if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text: | ||||
|         return True | ||||
| @ -27,15 +33,21 @@ def raise_for_cloudflare_captcha(resp): | ||||
|         if is_cloudflare_challenge(resp): | ||||
|             # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- | ||||
|             # suspend for 2 weeks | ||||
|             raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15) | ||||
|             raise SearxEngineCaptchaException( | ||||
|                 message='Cloudflare CAPTCHA', | ||||
|                 suspended_time=3600 * 24 * 15 | ||||
|             ) | ||||
| 
 | ||||
|         if is_cloudflare_firewall(resp): | ||||
|             raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24) | ||||
|             raise SearxEngineAccessDeniedException( | ||||
|                 message='Cloudflare Firewall', suspended_time=3600 * 24 | ||||
|             ) | ||||
| 
 | ||||
| 
 | ||||
| def raise_for_recaptcha(resp): | ||||
|     if resp.status_code == 503 \ | ||||
|        and '"https://www.google.com/recaptcha/' in resp.text: | ||||
|     if (resp.status_code == 503 | ||||
|         and '"https://www.google.com/recaptcha/' in resp.text | ||||
|     ): | ||||
|         raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7) | ||||
| 
 | ||||
| 
 | ||||
| @ -59,8 +71,10 @@ def raise_for_httperror(resp): | ||||
|     if resp.status_code and resp.status_code >= 400: | ||||
|         raise_for_captcha(resp) | ||||
|         if resp.status_code in (402, 403): | ||||
|             raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code), | ||||
|                                                    suspended_time=3600 * 24) | ||||
|             raise SearxEngineAccessDeniedException( | ||||
|                 message='HTTP error ' + str(resp.status_code), | ||||
|                 suspended_time=3600 * 24 | ||||
|             ) | ||||
|         if resp.status_code == 429: | ||||
|             raise SearxEngineTooManyRequestsException() | ||||
|         resp.raise_for_status() | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user