mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	[mod] limiter: blocklist and passlist (ip_lists)
A blocklist and a passlist can be configured in /etc/searxng/limiter.toml::
    [botdetection.ip_lists]
    pass_ip = [
      '51.15.252.168',  # IPv4 of check.searx.space
    ]
    block_ip = [
      '93.184.216.34',  # IPv4 of example.org
    ]
Closes: https://github.com/searxng/searxng/issues/2127
Closes: https://github.com/searxng/searxng/pull/2129
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									de2f396e50
								
							
						
					
					
						commit
						f3763d73ad
					
				@ -15,6 +15,9 @@ Bot Detection
 | 
				
			|||||||
.. automodule:: searx.botdetection.limiter
 | 
					.. automodule:: searx.botdetection.limiter
 | 
				
			||||||
  :members:
 | 
					  :members:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. automodule:: searx.botdetection.ip_lists
 | 
				
			||||||
 | 
					  :members:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Rate limit
 | 
					Rate limit
 | 
				
			||||||
==========
 | 
					==========
 | 
				
			||||||
 | 
				
			|||||||
@ -6,8 +6,8 @@ from __future__ import annotations
 | 
				
			|||||||
from ipaddress import (
 | 
					from ipaddress import (
 | 
				
			||||||
    IPv4Network,
 | 
					    IPv4Network,
 | 
				
			||||||
    IPv6Network,
 | 
					    IPv6Network,
 | 
				
			||||||
 | 
					    IPv4Address,
 | 
				
			||||||
    IPv6Address,
 | 
					    IPv6Address,
 | 
				
			||||||
    ip_address,
 | 
					 | 
				
			||||||
    ip_network,
 | 
					    ip_network,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
import flask
 | 
					import flask
 | 
				
			||||||
@ -46,11 +46,10 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
 | 
				
			|||||||
    return flask.make_response(('Too Many Requests', 429))
 | 
					    return flask.make_response(('Too Many Requests', 429))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_network(real_ip: str, cfg: config.Config) -> IPv4Network | IPv6Network:
 | 
					def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
 | 
				
			||||||
    """Returns the (client) network of whether the real_ip is part of."""
 | 
					    """Returns the (client) network of whether the real_ip is part of."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ip = ip_address(real_ip)
 | 
					    if real_ip.version == 6:
 | 
				
			||||||
    if isinstance(ip, IPv6Address):
 | 
					 | 
				
			||||||
        prefix = cfg['real_ip.ipv6_prefix']
 | 
					        prefix = cfg['real_ip.ipv6_prefix']
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        prefix = cfg['real_ip.ipv4_prefix']
 | 
					        prefix = cfg['real_ip.ipv4_prefix']
 | 
				
			||||||
@ -99,7 +98,7 @@ def get_real_ip(request: flask.Request) -> str:
 | 
				
			|||||||
        from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
 | 
					        from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        forwarded_for = [x.strip() for x in forwarded_for.split(',')]
 | 
					        forwarded_for = [x.strip() for x in forwarded_for.split(',')]
 | 
				
			||||||
        x_for: int = get_cfg()['real_ip.x_for']
 | 
					        x_for: int = get_cfg()['real_ip.x_for']  # type: ignore
 | 
				
			||||||
        forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
 | 
					        forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not real_ip:
 | 
					    if not real_ip:
 | 
				
			||||||
 | 
				
			|||||||
@ -49,14 +49,16 @@ import werkzeug
 | 
				
			|||||||
from searx.tools import config
 | 
					from searx.tools import config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx import redisdb
 | 
					from searx import redisdb
 | 
				
			||||||
from searx import logger
 | 
					 | 
				
			||||||
from searx.redislib import incr_sliding_window, drop_counter
 | 
					from searx.redislib import incr_sliding_window, drop_counter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from . import link_token
 | 
					from . import link_token
 | 
				
			||||||
from ._helpers import too_many_requests
 | 
					from ._helpers import (
 | 
				
			||||||
 | 
					    too_many_requests,
 | 
				
			||||||
 | 
					    logger,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logger.getChild('botdetection.ip_limit')
 | 
					logger = logger.getChild('ip_limit')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BURST_WINDOW = 20
 | 
					BURST_WINDOW = 20
 | 
				
			||||||
"""Time (sec) before sliding window for *burst* requests expires."""
 | 
					"""Time (sec) before sliding window for *burst* requests expires."""
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										85
									
								
								searx/botdetection/ip_lists.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								searx/botdetection/ip_lists.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,85 @@
 | 
				
			|||||||
 | 
					# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					# lint: pylint
 | 
				
			||||||
 | 
					""".. _botdetection.ip_lists:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Method ``ip_lists``
 | 
				
			||||||
 | 
					-------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
 | 
				
			||||||
 | 
					:py:obj:`pass-lists <pass_ip>`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. code:: toml
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   [botdetection.ip_lists]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   pass_ip = [
 | 
				
			||||||
 | 
					    '140.238.172.132', # IPv4 of check.searx.space
 | 
				
			||||||
 | 
					    '192.168.0.0/16',  # IPv4 private network
 | 
				
			||||||
 | 
					    'fe80::/10'        # IPv6 linklocal
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					   block_ip = [
 | 
				
			||||||
 | 
					      '93.184.216.34', # IPv4 of example.org
 | 
				
			||||||
 | 
					      '257.1.1.1',     # invalid IP --> will be ignored, logged in ERROR class
 | 
				
			||||||
 | 
					   ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					# pylint: disable=unused-argument
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from __future__ import annotations
 | 
				
			||||||
 | 
					from typing import Tuple
 | 
				
			||||||
 | 
					from ipaddress import (
 | 
				
			||||||
 | 
					    ip_network,
 | 
				
			||||||
 | 
					    IPv4Address,
 | 
				
			||||||
 | 
					    IPv6Address,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.tools import config
 | 
				
			||||||
 | 
					from ._helpers import logger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					logger = logger.getChild('ip_limit')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SEARXNG_ORG = [
 | 
				
			||||||
 | 
					    # https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
 | 
				
			||||||
 | 
					    '140.238.172.132',  # IPv4 check.searx.space
 | 
				
			||||||
 | 
					    '2603:c022:0:4900::/56',  # IPv6 check.searx.space
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
 | 
				
			||||||
 | 
					    """Checks if the IP on the subnet is in one of the members of the
 | 
				
			||||||
 | 
					    ``botdetection.ip_lists.pass_ip`` list.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
 | 
				
			||||||
 | 
					        for net in SEARXNG_ORG:
 | 
				
			||||||
 | 
					            net = ip_network(net, strict=False)
 | 
				
			||||||
 | 
					            if real_ip.version == net.version and real_ip in net:
 | 
				
			||||||
 | 
					                return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
 | 
				
			||||||
 | 
					    return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
 | 
				
			||||||
 | 
					    """Checks if the IP on the subnet is in one of the members of the
 | 
				
			||||||
 | 
					    ``botdetection.ip_lists.block_ip`` list.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
 | 
				
			||||||
 | 
					    if block:
 | 
				
			||||||
 | 
					        msg += " To remove IP from list, please contact the maintainer of the service."
 | 
				
			||||||
 | 
					    return block, msg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def ip_is_subnet_of_member_in_list(
 | 
				
			||||||
 | 
					    real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
 | 
				
			||||||
 | 
					) -> Tuple[bool, str]:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for net in cfg.get(list_name, default=[]):
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            net = ip_network(net, strict=False)
 | 
				
			||||||
 | 
					        except ValueError:
 | 
				
			||||||
 | 
					            logger.error("invalid IP %s in %s", net, list_name)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					        if real_ip.version == net.version and real_ip in net:
 | 
				
			||||||
 | 
					            return True, f"IP matches {net.compressed} in {list_name}."
 | 
				
			||||||
 | 
					    return False, f"IP is not a member of an item in the f{list_name} list"
 | 
				
			||||||
@ -40,6 +40,7 @@ and set the redis-url connection. Check the value, it depends on your redis DB
 | 
				
			|||||||
from __future__ import annotations
 | 
					from __future__ import annotations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					from ipaddress import ip_address
 | 
				
			||||||
import flask
 | 
					import flask
 | 
				
			||||||
import werkzeug
 | 
					import werkzeug
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -53,6 +54,7 @@ from . import (
 | 
				
			|||||||
    http_connection,
 | 
					    http_connection,
 | 
				
			||||||
    http_user_agent,
 | 
					    http_user_agent,
 | 
				
			||||||
    ip_limit,
 | 
					    ip_limit,
 | 
				
			||||||
 | 
					    ip_lists,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ._helpers import (
 | 
					from ._helpers import (
 | 
				
			||||||
@ -84,16 +86,41 @@ def get_cfg() -> config.Config:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def filter_request(request: flask.Request) -> werkzeug.Response | None:
 | 
					def filter_request(request: flask.Request) -> werkzeug.Response | None:
 | 
				
			||||||
 | 
					    # pylint: disable=too-many-return-statements
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cfg = get_cfg()
 | 
					    cfg = get_cfg()
 | 
				
			||||||
    real_ip = get_real_ip(request)
 | 
					    real_ip = ip_address(get_real_ip(request))
 | 
				
			||||||
    network = get_network(real_ip, cfg)
 | 
					    network = get_network(real_ip, cfg)
 | 
				
			||||||
    if network.is_link_local:
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if request.path == '/healthz':
 | 
					    if request.path == '/healthz':
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # link-local
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if network.is_link_local:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # block- & pass- lists
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    # 1. The IP of the request is first checked against the pass-list; if the IP
 | 
				
			||||||
 | 
					    #    matches an entry in the list, the request is not blocked.
 | 
				
			||||||
 | 
					    # 2. If no matching entry is found in the pass-list, then a check is made against
 | 
				
			||||||
 | 
					    #    the block list; if the IP matches an entry in the list, the request is
 | 
				
			||||||
 | 
					    #    blocked.
 | 
				
			||||||
 | 
					    # 3. If the IP is not in either list, the request is not blocked.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match, msg = ip_lists.pass_ip(real_ip, cfg)
 | 
				
			||||||
 | 
					    if match:
 | 
				
			||||||
 | 
					        logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match, msg = ip_lists.block_ip(real_ip, cfg)
 | 
				
			||||||
 | 
					    if match:
 | 
				
			||||||
 | 
					        logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
 | 
				
			||||||
 | 
					        return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # methods applied on /
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for func in [
 | 
					    for func in [
 | 
				
			||||||
        http_user_agent,
 | 
					        http_user_agent,
 | 
				
			||||||
    ]:
 | 
					    ]:
 | 
				
			||||||
@ -101,6 +128,8 @@ def filter_request(request: flask.Request) -> werkzeug.Response | None:
 | 
				
			|||||||
        if val is not None:
 | 
					        if val is not None:
 | 
				
			||||||
            return val
 | 
					            return val
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # methods applied on /search
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if request.path == '/search':
 | 
					    if request.path == '/search':
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for func in [
 | 
					        for func in [
 | 
				
			||||||
 | 
				
			|||||||
@ -16,7 +16,25 @@ ipv6_prefix = 48
 | 
				
			|||||||
# (networks) are not monitored by the ip_limit
 | 
					# (networks) are not monitored by the ip_limit
 | 
				
			||||||
filter_link_local = false
 | 
					filter_link_local = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# acrivate link_token method in the ip_limit method
 | 
					# activate link_token method in the ip_limit method
 | 
				
			||||||
link_token = false
 | 
					link_token = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[botdetection.ip_lists]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# In the limiter, the ip_lists method has priority over all other methods -> if
 | 
				
			||||||
 | 
					# an IP is in the pass_ip list, it has unrestricted access and it is also not
 | 
				
			||||||
 | 
					# checked if e.g. the "user agent" suggests a bot (e.g. curl).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					block_ip = [
 | 
				
			||||||
 | 
					  # '93.184.216.34',  # IPv4 of example.org
 | 
				
			||||||
 | 
					  # '257.1.1.1',      # invalid IP --> will be ignored, logged in ERROR class
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pass_ip = [
 | 
				
			||||||
 | 
					  # '192.168.0.0/16',      # IPv4 private network
 | 
				
			||||||
 | 
					  # 'fe80::/10'            # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Activate passlist of (hardcoded) IPs from the SearXNG organization,
 | 
				
			||||||
 | 
					# e.g. `check.searx.space`.
 | 
				
			||||||
 | 
					pass_searxng_org = true
 | 
				
			||||||
@ -39,6 +39,7 @@ from __future__ import annotations
 | 
				
			|||||||
from ipaddress import (
 | 
					from ipaddress import (
 | 
				
			||||||
    IPv4Network,
 | 
					    IPv4Network,
 | 
				
			||||||
    IPv6Network,
 | 
					    IPv6Network,
 | 
				
			||||||
 | 
					    ip_address,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import string
 | 
					import string
 | 
				
			||||||
@ -107,7 +108,7 @@ def ping(request: flask.Request, token: str):
 | 
				
			|||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cfg = limiter.get_cfg()
 | 
					    cfg = limiter.get_cfg()
 | 
				
			||||||
    real_ip = get_real_ip(request)
 | 
					    real_ip = ip_address(get_real_ip(request))
 | 
				
			||||||
    network = get_network(real_ip, cfg)
 | 
					    network = get_network(real_ip, cfg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ping_key = get_ping_key(network, request)
 | 
					    ping_key = get_ping_key(network, request)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user