mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	[mod] botdetection: HTTP Fetch Metadata Request Headers
HTTP Fetch Metadata Request Headers [1][2] are used to detect bot requests. Bots with invalid *Fetch Metadata* will be redirected to the intro (`index`) page. [1] https://www.w3.org/TR/fetch-metadata/ [2] https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									8ef5fbca4e
								
							
						
					
					
						commit
						fe08bb1d90
					
				@ -53,6 +53,9 @@ Probe HTTP headers
 | 
				
			|||||||
.. automodule:: searx.botdetection.http_user_agent
 | 
					.. automodule:: searx.botdetection.http_user_agent
 | 
				
			||||||
  :members:
 | 
					  :members:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. automodule:: searx.botdetection.sec_fetch
 | 
				
			||||||
 | 
					  :members:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. _botdetection config:
 | 
					.. _botdetection config:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Config
 | 
					Config
 | 
				
			||||||
 | 
				
			|||||||
@ -34,6 +34,9 @@ def dump_request(request: SXNG_Request):
 | 
				
			|||||||
        + " || Content-Length: %s" % request.headers.get('Content-Length')
 | 
					        + " || Content-Length: %s" % request.headers.get('Content-Length')
 | 
				
			||||||
        + " || Connection: %s" % request.headers.get('Connection')
 | 
					        + " || Connection: %s" % request.headers.get('Connection')
 | 
				
			||||||
        + " || User-Agent: %s" % request.headers.get('User-Agent')
 | 
					        + " || User-Agent: %s" % request.headers.get('User-Agent')
 | 
				
			||||||
 | 
					        + " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site')
 | 
				
			||||||
 | 
					        + " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode')
 | 
				
			||||||
 | 
					        + " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest')
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										61
									
								
								searx/botdetection/http_sec_fetch.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								searx/botdetection/http_sec_fetch.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,61 @@
 | 
				
			|||||||
 | 
					# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					Method ``http_sec_fetch``
 | 
				
			||||||
 | 
					-------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ``http_sec_fetch`` method protect resources from web attacks with `Fetch
 | 
				
			||||||
 | 
					Metadata`_.  A request is filtered out in case of:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- http header Sec-Fetch-Mode_ is invalid
 | 
				
			||||||
 | 
					- http header Sec-Fetch-Dest_ is invalid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. _Fetch Metadata:
 | 
				
			||||||
 | 
					   https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. Sec-Fetch-Dest:
 | 
				
			||||||
 | 
					   https://developer.mozilla.org/en-US/docs/Web/API/Request/destination
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. Sec-Fetch-Mode:
 | 
				
			||||||
 | 
					   https://developer.mozilla.org/en-US/docs/Web/API/Request/mode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					# pylint: disable=unused-argument
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from __future__ import annotations
 | 
				
			||||||
 | 
					from ipaddress import (
 | 
				
			||||||
 | 
					    IPv4Network,
 | 
				
			||||||
 | 
					    IPv6Network,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import flask
 | 
				
			||||||
 | 
					import werkzeug
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.extended_types import SXNG_Request
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from . import config
 | 
				
			||||||
 | 
					from ._helpers import logger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def filter_request(
 | 
				
			||||||
 | 
					    network: IPv4Network | IPv6Network,
 | 
				
			||||||
 | 
					    request: SXNG_Request,
 | 
				
			||||||
 | 
					    cfg: config.Config,
 | 
				
			||||||
 | 
					) -> werkzeug.Response | None:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val = request.headers.get("Sec-Fetch-Mode", "")
 | 
				
			||||||
 | 
					    if val != "navigate":
 | 
				
			||||||
 | 
					        logger.debug("invalid Sec-Fetch-Mode '%s'", val)
 | 
				
			||||||
 | 
					        return flask.redirect(flask.url_for('index'), code=302)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val = request.headers.get("Sec-Fetch-Site", "")
 | 
				
			||||||
 | 
					    if val not in ('same-origin', 'same-site', 'none'):
 | 
				
			||||||
 | 
					        logger.debug("invalid Sec-Fetch-Site '%s'", val)
 | 
				
			||||||
 | 
					        flask.redirect(flask.url_for('index'), code=302)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val = request.headers.get("Sec-Fetch-Dest", "")
 | 
				
			||||||
 | 
					    if val != "document":
 | 
				
			||||||
 | 
					        logger.debug("invalid Sec-Fetch-Dest '%s'", val)
 | 
				
			||||||
 | 
					        flask.redirect(flask.url_for('index'), code=302)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
@ -112,6 +112,7 @@ from searx.botdetection import (
 | 
				
			|||||||
    http_accept_encoding,
 | 
					    http_accept_encoding,
 | 
				
			||||||
    http_accept_language,
 | 
					    http_accept_language,
 | 
				
			||||||
    http_user_agent,
 | 
					    http_user_agent,
 | 
				
			||||||
 | 
					    http_sec_fetch,
 | 
				
			||||||
    ip_limit,
 | 
					    ip_limit,
 | 
				
			||||||
    ip_lists,
 | 
					    ip_lists,
 | 
				
			||||||
    get_network,
 | 
					    get_network,
 | 
				
			||||||
@ -179,16 +180,17 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
 | 
				
			|||||||
        logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
 | 
					        logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
 | 
				
			||||||
        return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
 | 
					        return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # methods applied on /
 | 
					    # methods applied on all requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for func in [
 | 
					    for func in [
 | 
				
			||||||
        http_user_agent,
 | 
					        http_user_agent,
 | 
				
			||||||
    ]:
 | 
					    ]:
 | 
				
			||||||
        val = func.filter_request(network, request, cfg)
 | 
					        val = func.filter_request(network, request, cfg)
 | 
				
			||||||
        if val is not None:
 | 
					        if val is not None:
 | 
				
			||||||
 | 
					            logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(sxng_request))
 | 
				
			||||||
            return val
 | 
					            return val
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # methods applied on /search
 | 
					    # methods applied on /search requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if request.path == '/search':
 | 
					    if request.path == '/search':
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -197,11 +199,14 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
 | 
				
			|||||||
            http_accept_encoding,
 | 
					            http_accept_encoding,
 | 
				
			||||||
            http_accept_language,
 | 
					            http_accept_language,
 | 
				
			||||||
            http_user_agent,
 | 
					            http_user_agent,
 | 
				
			||||||
 | 
					            http_sec_fetch,
 | 
				
			||||||
            ip_limit,
 | 
					            ip_limit,
 | 
				
			||||||
        ]:
 | 
					        ]:
 | 
				
			||||||
            val = func.filter_request(network, request, cfg)
 | 
					            val = func.filter_request(network, request, cfg)
 | 
				
			||||||
            if val is not None:
 | 
					            if val is not None:
 | 
				
			||||||
 | 
					                logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(sxng_request))
 | 
				
			||||||
                return val
 | 
					                return val
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    logger.debug(f"OK {network}: %s", dump_request(sxng_request))
 | 
					    logger.debug(f"OK {network}: %s", dump_request(sxng_request))
 | 
				
			||||||
    return None
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user