mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	add Ahmia filter plugin for onion results
This commit is contained in:
		
							parent
							
								
									c3daa08537
								
							
						
					
					
						commit
						32957cdf49
					
				
							
								
								
									
										16177
									
								
								searx/data/ahmia_blacklist.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16177
									
								
								searx/data/ahmia_blacklist.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -28,6 +28,7 @@ from searx import logger, settings, static_path
 | 
				
			|||||||
logger = logger.getChild('plugins')
 | 
					logger = logger.getChild('plugins')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx.plugins import (oa_doi_rewrite,
 | 
					from searx.plugins import (oa_doi_rewrite,
 | 
				
			||||||
 | 
					                           ahmia_filter,
 | 
				
			||||||
                           hash_plugin,
 | 
					                           hash_plugin,
 | 
				
			||||||
                           https_rewrite,
 | 
					                           https_rewrite,
 | 
				
			||||||
                           infinite_scroll,
 | 
					                           infinite_scroll,
 | 
				
			||||||
@ -181,3 +182,7 @@ if 'enabled_plugins' in settings:
 | 
				
			|||||||
            plugin.default_on = True
 | 
					            plugin.default_on = True
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            plugin.default_on = False
 | 
					            plugin.default_on = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# load tor specific plugins
 | 
				
			||||||
 | 
					if settings['outgoing'].get('using_tor_proxy'):
 | 
				
			||||||
 | 
					    plugins.register(ahmia_filter)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										36
									
								
								searx/plugins/ahmia_filter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								searx/plugins/ahmia_filter.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,36 @@
 | 
				
			|||||||
 | 
					'''
 | 
				
			||||||
 | 
					 SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from hashlib import md5
 | 
				
			||||||
 | 
					from os.path import join
 | 
				
			||||||
 | 
					from urllib.parse import urlparse
 | 
				
			||||||
 | 
					from searx import searx_dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					name = "Ahmia blacklist"
 | 
				
			||||||
 | 
					description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
 | 
				
			||||||
 | 
					default_on = True
 | 
				
			||||||
 | 
					preference_section = 'onions'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ahmia_blacklist = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_ahmia_blacklist():
 | 
				
			||||||
 | 
					    global ahmia_blacklist
 | 
				
			||||||
 | 
					    if not ahmia_blacklist:
 | 
				
			||||||
 | 
					        with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
 | 
				
			||||||
 | 
					            ahmia_blacklist = f.read().split()
 | 
				
			||||||
 | 
					    return ahmia_blacklist
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def not_blacklisted(result):
 | 
				
			||||||
 | 
					    if not result.get('is_onion'):
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					    result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
 | 
				
			||||||
 | 
					    return result_hash not in get_ahmia_blacklist()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def post_search(request, search):
 | 
				
			||||||
 | 
					    filtered_results = list(filter(not_blacklisted, search.result_container._merged_results))
 | 
				
			||||||
 | 
					    search.result_container._merged_results = filtered_results
 | 
				
			||||||
 | 
					    return True
 | 
				
			||||||
@ -258,6 +258,7 @@
 | 
				
			|||||||
                <fieldset>
 | 
					                <fieldset>
 | 
				
			||||||
                    <div class="container-fluid">
 | 
					                    <div class="container-fluid">
 | 
				
			||||||
                        {% for plugin in plugins %}
 | 
					                        {% for plugin in plugins %}
 | 
				
			||||||
 | 
					                        {% if plugin.preference_section != 'onions' %}
 | 
				
			||||||
                        <div class="panel panel-default">
 | 
					                        <div class="panel panel-default">
 | 
				
			||||||
                            <div class="panel-heading">
 | 
					                            <div class="panel-heading">
 | 
				
			||||||
                                <h3 class="panel-title">{{ _(plugin.name) }}</h3>
 | 
					                                <h3 class="panel-title">{{ _(plugin.name) }}</h3>
 | 
				
			||||||
@ -271,6 +272,7 @@
 | 
				
			|||||||
                                </div>
 | 
					                                </div>
 | 
				
			||||||
                            </div>
 | 
					                            </div>
 | 
				
			||||||
                        </div>
 | 
					                        </div>
 | 
				
			||||||
 | 
					                        {% endif %}
 | 
				
			||||||
                        {% endfor %}
 | 
					                        {% endfor %}
 | 
				
			||||||
                    </div>
 | 
					                    </div>
 | 
				
			||||||
                </fieldset>
 | 
					                </fieldset>
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										33
									
								
								utils/fetch_ahmia_blacklist.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										33
									
								
								utils/fetch_ahmia_blacklist.py
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# This script saves Ahmia's blacklist for onion sites.
 | 
				
			||||||
 | 
					# More info in https://ahmia.fi/blacklist/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# set path
 | 
				
			||||||
 | 
					from sys import path
 | 
				
			||||||
 | 
					from os.path import realpath, dirname, join
 | 
				
			||||||
 | 
					path.append(realpath(dirname(realpath(__file__)) + '/../'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from searx import searx_dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = 'https://ahmia.fi/blacklist/banned/'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fetch_ahmia_blacklist():
 | 
				
			||||||
 | 
					    resp = requests.get(URL, timeout=3.0)
 | 
				
			||||||
 | 
					    if resp.status_code != 200:
 | 
				
			||||||
 | 
					        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        blacklist = resp.text.split()
 | 
				
			||||||
 | 
					        return blacklist
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_ahmia_blacklist_filename():
 | 
				
			||||||
 | 
					    return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					blacklist = fetch_ahmia_blacklist()
 | 
				
			||||||
 | 
					with open(get_ahmia_blacklist_filename(), "w") as f:
 | 
				
			||||||
 | 
					    f.write('\n'.join(blacklist))
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user