mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 15:52:31 -04:00 
			
		
		
		
	[mod] botdetection - improve ip_limit and link_token methods
- counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the request is validated by the link_token method [1] - renew a ping-key on validation [2], this is needed for infinite scrolling, where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in the vanilla limiter - normalize the counter names of the ip_limit method to 'ip_limit.*' - just integrate the ip_limit method straight forward in the limiter plugin / non intermediate code --> ip_limit now returns None or a werkzeug.Response object that can be passed by the plugin to the flask application / non intermediate code that returns a tuple [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277 [2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206 [3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									52f1452c09
								
							
						
					
					
						commit
						b8c7c2c9aa
					
				| @ -9,18 +9,4 @@ The methods implemented in this python package are use by the :ref:`limiter src` | |||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| import flask | from ._helpers import dump_request | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def dump_request(request: flask.Request): |  | ||||||
|     return ( |  | ||||||
|         "%s: '%s'" % (request.headers.get('X-Forwarded-For'), request.path) |  | ||||||
|         + " || form: %s" % request.form |  | ||||||
|         + " || Accept: %s" % request.headers.get('Accept') |  | ||||||
|         + " || Accept-Language: %s" % request.headers.get('Accept-Language') |  | ||||||
|         + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding') |  | ||||||
|         + " || Content-Type: %s" % request.headers.get('Content-Type') |  | ||||||
|         + " || Content-Length: %s" % request.headers.get('Content-Length') |  | ||||||
|         + " || Connection: %s" % request.headers.get('Connection') |  | ||||||
|         + " || User-Agent: %s" % request.headers.get('User-Agent') |  | ||||||
|     ) |  | ||||||
|  | |||||||
							
								
								
									
										93
									
								
								searx/botdetection/_helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								searx/botdetection/_helpers.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,93 @@ | |||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # lint: pylint | ||||||
|  | # pylint: disable=missing-module-docstring, invalid-name | ||||||
|  | 
 | ||||||
|  | from typing import Optional | ||||||
|  | import flask | ||||||
|  | import werkzeug | ||||||
|  | 
 | ||||||
|  | from searx import logger | ||||||
|  | 
 | ||||||
|  | logger = logger.getChild('botdetection') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def dump_request(request: flask.Request): | ||||||
|  |     return ( | ||||||
|  |         "%s: %s" % (get_real_ip(request), request.path) | ||||||
|  |         + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For') | ||||||
|  |         + " || X-Real-IP: %s" % request.headers.get('X-Real-IP') | ||||||
|  |         + " || form: %s" % request.form | ||||||
|  |         + " || Accept: %s" % request.headers.get('Accept') | ||||||
|  |         + " || Accept-Language: %s" % request.headers.get('Accept-Language') | ||||||
|  |         + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding') | ||||||
|  |         + " || Content-Type: %s" % request.headers.get('Content-Type') | ||||||
|  |         + " || Content-Length: %s" % request.headers.get('Content-Length') | ||||||
|  |         + " || Connection: %s" % request.headers.get('Connection') | ||||||
|  |         + " || User-Agent: %s" % request.headers.get('User-Agent') | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def too_many_requests(request: flask.Request, log_msg: str) -> Optional[werkzeug.Response]: | ||||||
|  |     log_prefix = 'BLOCK %s: ' % get_real_ip(request) | ||||||
|  |     logger.debug(log_prefix + log_msg) | ||||||
|  |     return flask.make_response(('Too Many Requests', 429)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_real_ip(request: flask.Request) -> str: | ||||||
|  |     """Returns real IP of the request.  Since not all proxies set all the HTTP | ||||||
|  |     headers and incoming headers can be faked it may happen that the IP cannot | ||||||
|  |     be determined correctly. | ||||||
|  | 
 | ||||||
|  |     .. sidebar:: :py:obj:`flask.Request.remote_addr` | ||||||
|  | 
 | ||||||
|  |        SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``). | ||||||
|  | 
 | ||||||
|  |     This function tries to get the remote IP in the order listed below, | ||||||
|  |     additional some tests are done and if inconsistencies or errors are | ||||||
|  |     detected, they are logged. | ||||||
|  | 
 | ||||||
|  |     The remote IP of the request is taken from (first match): | ||||||
|  | 
 | ||||||
|  |     - X-Forwarded-For_ header | ||||||
|  |     - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__ | ||||||
|  |     - :py:obj:`flask.Request.remote_addr` | ||||||
|  | 
 | ||||||
|  |     .. _ProxyFix: | ||||||
|  |        https://werkzeug.palletsprojects.com/middleware/proxy_fix/ | ||||||
|  | 
 | ||||||
|  |     .. _X-Forwarded-For: | ||||||
|  |       https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     forwarded_for = request.headers.get("X-Forwarded-For") | ||||||
|  |     real_ip = request.headers.get('X-Real-IP') | ||||||
|  |     remote_addr = request.remote_addr | ||||||
|  |     logger.debug("X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr) | ||||||
|  | 
 | ||||||
|  |     if not forwarded_for: | ||||||
|  |         logger.error("X-Forwarded-For header is not set!") | ||||||
|  |     else: | ||||||
|  |         from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import | ||||||
|  | 
 | ||||||
|  |         forwarded_for = [x.strip() for x in forwarded_for.split(',')] | ||||||
|  |         x_for: int = get_cfg()['real_ip.x_for'] | ||||||
|  |         forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)] | ||||||
|  | 
 | ||||||
|  |     if not real_ip: | ||||||
|  |         logger.error("X-Real-IP header is not set!") | ||||||
|  | 
 | ||||||
|  |     if forwarded_for and real_ip and forwarded_for != real_ip: | ||||||
|  |         logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for) | ||||||
|  | 
 | ||||||
|  |     if forwarded_for and remote_addr and forwarded_for != remote_addr: | ||||||
|  |         logger.warning( | ||||||
|  |             "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     if real_ip and remote_addr and real_ip != remote_addr: | ||||||
|  |         logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip) | ||||||
|  | 
 | ||||||
|  |     request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0' | ||||||
|  |     logger.debug("get_real_ip() -> %s", request_ip) | ||||||
|  |     return request_ip | ||||||
| @ -15,13 +15,15 @@ Accept_ header .. | |||||||
| """ | """ | ||||||
| # pylint: disable=unused-argument | # pylint: disable=unused-argument | ||||||
| 
 | 
 | ||||||
| from typing import Optional, Tuple | from typing import Optional | ||||||
| import flask | import flask | ||||||
|  | import werkzeug | ||||||
| 
 | 
 | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
|  | from ._helpers import too_many_requests | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: | def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: | ||||||
|     if 'text/html' not in request.accept_mimetypes: |     if 'text/html' not in request.accept_mimetypes: | ||||||
|         return 429, "bot detected, HTTP header Accept did not contain text/html" |         return too_many_requests(request, "HTTP header Accept did not contain text/html") | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -16,14 +16,16 @@ bot if the Accept-Encoding_ header .. | |||||||
| """ | """ | ||||||
| # pylint: disable=unused-argument | # pylint: disable=unused-argument | ||||||
| 
 | 
 | ||||||
| from typing import Optional, Tuple | from typing import Optional | ||||||
| import flask | import flask | ||||||
|  | import werkzeug | ||||||
| 
 | 
 | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
|  | from ._helpers import too_many_requests | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: | def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: | ||||||
|     accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')] |     accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')] | ||||||
|     if not ('gzip' in accept_list or 'deflate' in accept_list): |     if not ('gzip' in accept_list or 'deflate' in accept_list): | ||||||
|         return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate" |         return too_many_requests(request, "HTTP header Accept-Encoding did not contain gzip nor deflate") | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -13,13 +13,15 @@ if the Accept-Language_ header is unset. | |||||||
| """ | """ | ||||||
| # pylint: disable=unused-argument | # pylint: disable=unused-argument | ||||||
| 
 | 
 | ||||||
| from typing import Optional, Tuple | from typing import Optional | ||||||
| import flask | import flask | ||||||
|  | import werkzeug | ||||||
| 
 | 
 | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
|  | from ._helpers import too_many_requests | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: | def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: | ||||||
|     if request.headers.get('Accept-Language', '').strip() == '': |     if request.headers.get('Accept-Language', '').strip() == '': | ||||||
|         return 429, "bot detected, missing HTTP header Accept-Language" |         return too_many_requests(request, "missing HTTP header Accept-Language") | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -13,13 +13,15 @@ the Connection_ header is set to ``close``. | |||||||
| """ | """ | ||||||
| # pylint: disable=unused-argument | # pylint: disable=unused-argument | ||||||
| 
 | 
 | ||||||
| from typing import Optional, Tuple | from typing import Optional | ||||||
| import flask | import flask | ||||||
|  | import werkzeug | ||||||
| 
 | 
 | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
|  | from ._helpers import too_many_requests | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: | def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: | ||||||
|     if request.headers.get('Connection', '').strip() == 'close': |     if request.headers.get('Connection', '').strip() == 'close': | ||||||
|         return 429, "bot detected, HTTP header 'Connection=close'" |         return too_many_requests(request, "HTTP header 'Connection=close") | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -14,11 +14,13 @@ the User-Agent_ header is unset or matches the regular expression | |||||||
| """ | """ | ||||||
| # pylint: disable=unused-argument | # pylint: disable=unused-argument | ||||||
| 
 | 
 | ||||||
| from typing import Optional, Tuple | from typing import Optional | ||||||
| import re | import re | ||||||
| import flask | import flask | ||||||
|  | import werkzeug | ||||||
| 
 | 
 | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
|  | from ._helpers import too_many_requests | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| USER_AGENT = ( | USER_AGENT = ( | ||||||
| @ -48,11 +50,8 @@ def regexp_user_agent(): | |||||||
|     return _regexp |     return _regexp | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: | def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: | ||||||
|     user_agent = request.headers.get('User-Agent', 'unknown') |     user_agent = request.headers.get('User-Agent', 'unknown') | ||||||
|     if regexp_user_agent().match(user_agent): |     if regexp_user_agent().match(user_agent): | ||||||
|         return ( |         return too_many_requests(request, f"bot detected, HTTP header User-Agent: {user_agent}") | ||||||
|             429, |  | ||||||
|             f"bot detected, HTTP header User-Agent: {user_agent}", |  | ||||||
|         ) |  | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -1,3 +1,5 @@ | |||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # lint: pylint | ||||||
| """.. _botdetection.ip_limit: | """.. _botdetection.ip_limit: | ||||||
| 
 | 
 | ||||||
| Method ``ip_limit`` | Method ``ip_limit`` | ||||||
| @ -37,16 +39,18 @@ droped. | |||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from typing import Optional, Tuple | from typing import Optional | ||||||
| import flask | import flask | ||||||
|  | import werkzeug | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| from searx import redisdb | from searx import redisdb | ||||||
| from searx import logger | from searx import logger | ||||||
| from searx.redislib import incr_sliding_window, drop_counter | from searx.redislib import incr_sliding_window, drop_counter | ||||||
| 
 | 
 | ||||||
| from . import link_token | from . import link_token | ||||||
|  | from ._helpers import too_many_requests | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('botdetection.ip_limit') | logger = logger.getChild('botdetection.ip_limit') | ||||||
| 
 | 
 | ||||||
| @ -81,50 +85,51 @@ SUSPICIOUS_IP_MAX = 3 | |||||||
| """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`.""" | """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`.""" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: | def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]: | ||||||
|  |     # pylint: disable=too-many-return-statements | ||||||
|     redis_client = redisdb.client() |     redis_client = redisdb.client() | ||||||
| 
 | 
 | ||||||
|     x_forwarded_for = request.headers.get('X-Forwarded-For', '') |     client_ip = request.headers.get('X-Forwarded-For', '') | ||||||
|     if not x_forwarded_for: |     if not client_ip: | ||||||
|         logger.error("missing HTTP header X-Forwarded-For") |         logger.error("missing HTTP header X-Forwarded-For") | ||||||
| 
 | 
 | ||||||
|     if request.args.get('format', 'html') != 'html': |     if request.args.get('format', 'html') != 'html': | ||||||
|         c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW) |         c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW) | ||||||
|         if c > API_MAX: |         if c > API_MAX: | ||||||
|             return 429, "BLOCK %s: API limit exceeded" |             return too_many_requests(request, "too many request in API_WINDOW") | ||||||
| 
 |  | ||||||
|     suspicious = False |  | ||||||
|     suspicious_ip_counter = 'IP limit - SUSPICIOUS_IP_WINDOW:' + x_forwarded_for |  | ||||||
| 
 | 
 | ||||||
|     if cfg['botdetection.ip_limit.link_token']: |     if cfg['botdetection.ip_limit.link_token']: | ||||||
|         suspicious = link_token.is_suspicious(request) |  | ||||||
| 
 | 
 | ||||||
|     if suspicious: |         suspicious = link_token.is_suspicious(request, True) | ||||||
|  | 
 | ||||||
|  |         if not suspicious: | ||||||
|  |             # this IP is no longer suspicious: release ip again / delete the counter of this IP | ||||||
|  |             drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip) | ||||||
|  |             return None | ||||||
| 
 | 
 | ||||||
|         # this IP is suspicious: count requests from this IP |         # this IP is suspicious: count requests from this IP | ||||||
|         c = incr_sliding_window(redis_client, suspicious_ip_counter, SUSPICIOUS_IP_WINDOW) |         c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW) | ||||||
|         if c > SUSPICIOUS_IP_MAX: |         if c > SUSPICIOUS_IP_MAX: | ||||||
|             return 429, f"bot detected, too many request from {x_forwarded_for} in SUSPICIOUS_IP_WINDOW" |             logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip) | ||||||
|  |             return flask.redirect(flask.url_for('index'), code=302) | ||||||
| 
 | 
 | ||||||
|         c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) |         c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW) | ||||||
|         if c > BURST_MAX_SUSPICIOUS: |         if c > BURST_MAX_SUSPICIOUS: | ||||||
|             return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS" |             return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)") | ||||||
| 
 | 
 | ||||||
|         c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW) |         c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW) | ||||||
|         if c > LONG_MAX_SUSPICIOUS: |         if c > LONG_MAX_SUSPICIOUS: | ||||||
|             return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS" |             return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)") | ||||||
|  | 
 | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  |     # vanilla limiter without extensions counts BURST_MAX and LONG_MAX | ||||||
|  |     c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW) | ||||||
|  |     if c > BURST_MAX: | ||||||
|  |         return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)") | ||||||
|  | 
 | ||||||
|  |     c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW) | ||||||
|  |     if c > LONG_MAX: | ||||||
|  |         return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)") | ||||||
| 
 | 
 | ||||||
|     else: |  | ||||||
| 
 |  | ||||||
|         if cfg['botdetection.ip_limit.link_token']: |  | ||||||
|             # this IP is no longer suspicious: release ip again / delete the counter of this IP |  | ||||||
|             drop_counter(redis_client, suspicious_ip_counter) |  | ||||||
| 
 |  | ||||||
|         c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) |  | ||||||
|         if c > BURST_MAX: |  | ||||||
|             return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX" |  | ||||||
| 
 |  | ||||||
|         c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW) |  | ||||||
|         if c > LONG_MAX: |  | ||||||
|             return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX" |  | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -42,6 +42,7 @@ from pathlib import Path | |||||||
| import flask | import flask | ||||||
| import pytomlpp as toml | import pytomlpp as toml | ||||||
| 
 | 
 | ||||||
|  | from searx import logger | ||||||
| from searx.tools import config | from searx.tools import config | ||||||
| from searx.botdetection import ( | from searx.botdetection import ( | ||||||
|     http_accept, |     http_accept, | ||||||
| @ -62,7 +63,13 @@ CFG_DEPRECATED = { | |||||||
|     # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config." |     # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config." | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| CFG = config.Config({}, {}) | CFG = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_cfg() -> config.Config: | ||||||
|  |     if CFG is None: | ||||||
|  |         init_cfg(logger) | ||||||
|  |     return CFG | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def init_cfg(log): | def init_cfg(log): | ||||||
| @ -73,7 +80,7 @@ def init_cfg(log): | |||||||
|         log.warning("missing config file: %s", LIMITER_CFG) |         log.warning("missing config file: %s", LIMITER_CFG) | ||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|     log.warning("load config file: %s", LIMITER_CFG) |     log.info("load config file: %s", LIMITER_CFG) | ||||||
|     try: |     try: | ||||||
|         upd_cfg = toml.load(LIMITER_CFG) |         upd_cfg = toml.load(LIMITER_CFG) | ||||||
|     except toml.DecodeError as exc: |     except toml.DecodeError as exc: | ||||||
|  | |||||||
| @ -47,15 +47,24 @@ from searx.redislib import secret_hash | |||||||
| TOKEN_LIVE_TIME = 600 | TOKEN_LIVE_TIME = 600 | ||||||
| """Livetime (sec) of limiter's CSS token.""" | """Livetime (sec) of limiter's CSS token.""" | ||||||
| 
 | 
 | ||||||
|  | PING_LIVE_TIME = 3600 | ||||||
|  | """Livetime (sec) of the ping-key from a client (request)""" | ||||||
|  | 
 | ||||||
| PING_KEY = 'SearXNG_limiter.ping' | PING_KEY = 'SearXNG_limiter.ping' | ||||||
|  | """Prefix of all ping-keys generated by :py:obj:`get_ping_key`""" | ||||||
|  | 
 | ||||||
| TOKEN_KEY = 'SearXNG_limiter.token' | TOKEN_KEY = 'SearXNG_limiter.token' | ||||||
|  | """Key for which the current token is stored in the DB""" | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('botdetection.link_token') | logger = logger.getChild('botdetection.link_token') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def is_suspicious(request: flask.Request): | def is_suspicious(request: flask.Request, renew: bool = False): | ||||||
|     """Checks if there is a valid ping for this request, if not this request is |     """Checks if there is a valid ping for this request, if not this request is | ||||||
|     rated as *suspicious*""" |     rated as *suspicious*.  If a valid ping exists and argument ``renew`` is | ||||||
|  |     ``True`` the expire time of this ping is reset to :py:obj:`PING_LIVE_TIME`. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|     redis_client = redisdb.client() |     redis_client = redisdb.client() | ||||||
|     if not redis_client: |     if not redis_client: | ||||||
|         return False |         return False | ||||||
| @ -69,12 +78,19 @@ def is_suspicious(request: flask.Request): | |||||||
|         ) |         ) | ||||||
|         return True |         return True | ||||||
| 
 | 
 | ||||||
|     logger.debug("found ping for this request: %s", ping_key) |     if renew: | ||||||
|  |         redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) | ||||||
|  | 
 | ||||||
|  |     logger.debug("found ping for client request: %s", ping_key) | ||||||
|     return False |     return False | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def ping(request: flask.Request, token: str): | def ping(request: flask.Request, token: str): | ||||||
|     """This function is called by a request to URL ``/client<token>.css``""" |     """This function is called by a request to URL ``/client<token>.css``.  If | ||||||
|  |     ``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB. | ||||||
|  |     The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|     redis_client = redisdb.client() |     redis_client = redisdb.client() | ||||||
|     if not redis_client: |     if not redis_client: | ||||||
|         return |         return | ||||||
| @ -82,20 +98,25 @@ def ping(request: flask.Request, token: str): | |||||||
|         return |         return | ||||||
|     ping_key = get_ping_key(request) |     ping_key = get_ping_key(request) | ||||||
|     logger.debug("store ping for: %s", ping_key) |     logger.debug("store ping for: %s", ping_key) | ||||||
|     redis_client.set(ping_key, 1, ex=TOKEN_LIVE_TIME) |     redis_client.set(ping_key, 1, ex=PING_LIVE_TIME) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def get_ping_key(request: flask.Request): | def get_ping_key(request: flask.Request): | ||||||
|     """Generates a hashed key that fits (more or less) to a request.  At least |     """Generates a hashed key that fits (more or less) to a client (request). | ||||||
|     X-Forwarded-For_ is needed to be able to assign the request to an IP. |     At least X-Forwarded-For_ is needed to be able to assign the request to an | ||||||
|  |     IP. | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
|     return secret_hash( |     return ( | ||||||
|         PING_KEY |         PING_KEY | ||||||
|         + request.headers.get('X-Forwarded-For', '') |         + "[" | ||||||
|  |         + secret_hash( | ||||||
|  |             request.headers.get('X-Forwarded-For', '') | ||||||
|             + request.headers.get('Accept-Language', '') |             + request.headers.get('Accept-Language', '') | ||||||
|             + request.headers.get('User-Agent', '') |             + request.headers.get('User-Agent', '') | ||||||
|         ) |         ) | ||||||
|  |         + "]" | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def token_is_valid(token) -> bool: | def token_is_valid(token) -> bool: | ||||||
|  | |||||||
| @ -20,16 +20,10 @@ logger = logger.getChild('limiter') | |||||||
| 
 | 
 | ||||||
| def pre_request(): | def pre_request(): | ||||||
|     """See :ref:`flask.Flask.before_request`""" |     """See :ref:`flask.Flask.before_request`""" | ||||||
| 
 |     ret_val = limiter.filter_request(flask.request) | ||||||
|     val = limiter.filter_request(flask.request) |     if ret_val is None: | ||||||
|     if val is not None: |  | ||||||
|         http_status, msg = val |  | ||||||
|         client_ip = flask.request.headers.get('X-Forwarded-For', '<unknown>') |  | ||||||
|         logger.error("BLOCK (IP %s): %s" % (client_ip, msg)) |  | ||||||
|         return 'Too Many Requests', http_status |  | ||||||
| 
 |  | ||||||
|         logger.debug("OK: %s" % dump_request(flask.request)) |         logger.debug("OK: %s" % dump_request(flask.request)) | ||||||
|     return None |     return ret_val | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def init(app: flask.Flask, settings) -> bool: | def init(app: flask.Flask, settings) -> bool: | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user