mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-02 18:47:05 -05:00 
			
		
		
		
	
						commit
						db703a0283
					
				
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -15,6 +15,7 @@ setup.cfg
 | 
				
			|||||||
*.pyc
 | 
					*.pyc
 | 
				
			||||||
*/*.pyc
 | 
					*/*.pyc
 | 
				
			||||||
*~
 | 
					*~
 | 
				
			||||||
 | 
					*.swp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/node_modules
 | 
					/node_modules
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										16177
									
								
								searx/data/ahmia_blacklist.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16177
									
								
								searx/data/ahmia_blacklist.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -142,6 +142,17 @@ def load_engine(engine_data):
 | 
				
			|||||||
        engine.stats['page_load_time'] = 0
 | 
					        engine.stats['page_load_time'] = 0
 | 
				
			||||||
        engine.stats['page_load_count'] = 0
 | 
					        engine.stats['page_load_count'] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # tor related settings
 | 
				
			||||||
 | 
					    if settings['outgoing'].get('using_tor_proxy'):
 | 
				
			||||||
 | 
					        # use onion url if using tor.
 | 
				
			||||||
 | 
					        if hasattr(engine, 'onion_url'):
 | 
				
			||||||
 | 
					            engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
 | 
				
			||||||
 | 
					    elif 'onions' in engine.categories:
 | 
				
			||||||
 | 
					        # exclude onion engines if not using tor.
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for category_name in engine.categories:
 | 
					    for category_name in engine.categories:
 | 
				
			||||||
        categories.setdefault(category_name, []).append(engine)
 | 
					        categories.setdefault(category_name, []).append(engine)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -252,8 +263,9 @@ def get_engines_stats(preferences):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_engines(engine_list):
 | 
					def load_engines(engine_list):
 | 
				
			||||||
    global engines
 | 
					    global engines, engine_shortcuts
 | 
				
			||||||
    engines.clear()
 | 
					    engines.clear()
 | 
				
			||||||
 | 
					    engine_shortcuts.clear()
 | 
				
			||||||
    for engine_data in engine_list:
 | 
					    for engine_data in engine_list:
 | 
				
			||||||
        engine = load_engine(engine_data)
 | 
					        engine = load_engine(engine_data)
 | 
				
			||||||
        if engine is not None:
 | 
					        if engine is not None:
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										82
									
								
								searx/engines/ahmia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								searx/engines/ahmia.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,82 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					 Ahmia (Onions)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @website      http://msydqstlz2kzerdg.onion
 | 
				
			||||||
 | 
					 @provides-api no
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @using-api    no
 | 
				
			||||||
 | 
					 @results      HTML
 | 
				
			||||||
 | 
					 @stable       no
 | 
				
			||||||
 | 
					 @parse        url, title, content
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from urllib.parse import urlencode, urlparse, parse_qs
 | 
				
			||||||
 | 
					from lxml.html import fromstring
 | 
				
			||||||
 | 
					from searx.engines.xpath import extract_url, extract_text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# engine config
 | 
				
			||||||
 | 
					categories = ['onions']
 | 
				
			||||||
 | 
					paging = True
 | 
				
			||||||
 | 
					page_size = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# search url
 | 
				
			||||||
 | 
					search_url = 'http://msydqstlz2kzerdg.onion/search/?{query}'
 | 
				
			||||||
 | 
					time_range_support = True
 | 
				
			||||||
 | 
					time_range_dict = {'day': 1,
 | 
				
			||||||
 | 
					                   'week': 7,
 | 
				
			||||||
 | 
					                   'month': 30}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# xpaths
 | 
				
			||||||
 | 
					results_xpath = '//li[@class="result"]'
 | 
				
			||||||
 | 
					url_xpath = './h4/a/@href'
 | 
				
			||||||
 | 
					title_xpath = './h4/a[1]'
 | 
				
			||||||
 | 
					content_xpath = './/p[1]'
 | 
				
			||||||
 | 
					correction_xpath = '//*[@id="didYouMean"]//a'
 | 
				
			||||||
 | 
					number_of_results_xpath = '//*[@id="totalResults"]'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    params['url'] = search_url.format(query=urlencode({'q': query}))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if params['time_range'] in time_range_dict:
 | 
				
			||||||
 | 
					        params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					    dom = fromstring(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # trim results so there's not way too many at once
 | 
				
			||||||
 | 
					    first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
 | 
				
			||||||
 | 
					    all_results = dom.xpath(results_xpath)
 | 
				
			||||||
 | 
					    trimmed_results = all_results[first_result_index:first_result_index + page_size]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # get results
 | 
				
			||||||
 | 
					    for result in trimmed_results:
 | 
				
			||||||
 | 
					        # remove ahmia url and extract the actual url for the result
 | 
				
			||||||
 | 
					        raw_url = extract_url(result.xpath(url_xpath), search_url)
 | 
				
			||||||
 | 
					        cleaned_url = parse_qs(urlparse(raw_url).query).get('redirect_url', [''])[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        title = extract_text(result.xpath(title_xpath))
 | 
				
			||||||
 | 
					        content = extract_text(result.xpath(content_xpath))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        results.append({'url': cleaned_url,
 | 
				
			||||||
 | 
					                        'title': title,
 | 
				
			||||||
 | 
					                        'content': content,
 | 
				
			||||||
 | 
					                        'is_onion': True})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # get spelling corrections
 | 
				
			||||||
 | 
					    for correction in dom.xpath(correction_xpath):
 | 
				
			||||||
 | 
					        results.append({'correction': extract_text(correction)})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # get number of results
 | 
				
			||||||
 | 
					    number_of_results = dom.xpath(number_of_results_xpath)
 | 
				
			||||||
 | 
					    if number_of_results:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            results.append({'number_of_results': int(extract_text(number_of_results))})
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
							
								
								
									
										64
									
								
								searx/engines/not_evil.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								searx/engines/not_evil.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,64 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					 not Evil (Onions)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @website     http://hss3uro2hsxfogfq.onion
 | 
				
			||||||
 | 
					 @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @using-api   no
 | 
				
			||||||
 | 
					 @results     HTML
 | 
				
			||||||
 | 
					 @stable      no
 | 
				
			||||||
 | 
					 @parse       url, title, content
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from urllib.parse import urlencode
 | 
				
			||||||
 | 
					from lxml import html
 | 
				
			||||||
 | 
					from searx.engines.xpath import extract_text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# engine dependent config
 | 
				
			||||||
 | 
					categories = ['onions']
 | 
				
			||||||
 | 
					paging = True
 | 
				
			||||||
 | 
					page_size = 20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# search-url
 | 
				
			||||||
 | 
					base_url = 'http://hss3uro2hsxfogfq.onion/'
 | 
				
			||||||
 | 
					search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# specific xpath variables
 | 
				
			||||||
 | 
					results_xpath = '//*[@id="content"]/div/p'
 | 
				
			||||||
 | 
					url_xpath = './span[1]'
 | 
				
			||||||
 | 
					title_xpath = './a[1]'
 | 
				
			||||||
 | 
					content_xpath = './text()'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# do search-request
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    offset = (params['pageno'] - 1) * page_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    params['url'] = base_url + search_url.format(pageno=offset,
 | 
				
			||||||
 | 
					                                                 query=urlencode({'q': query}),
 | 
				
			||||||
 | 
					                                                 page_size=page_size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get response from search-request
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # needed because otherwise requests guesses wrong encoding
 | 
				
			||||||
 | 
					    resp.encoding = 'utf8'
 | 
				
			||||||
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # parse results
 | 
				
			||||||
 | 
					    for result in dom.xpath(results_xpath):
 | 
				
			||||||
 | 
					        url = extract_text(result.xpath(url_xpath)[0])
 | 
				
			||||||
 | 
					        title = extract_text(result.xpath(title_xpath)[0])
 | 
				
			||||||
 | 
					        content = extract_text(result.xpath(content_xpath))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # append result
 | 
				
			||||||
 | 
					        results.append({'url': url,
 | 
				
			||||||
 | 
					                        'title': title,
 | 
				
			||||||
 | 
					                        'content': content,
 | 
				
			||||||
 | 
					                        'is_onion': True})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
@ -10,6 +10,8 @@ thumbnail_xpath = False
 | 
				
			|||||||
paging = False
 | 
					paging = False
 | 
				
			||||||
suggestion_xpath = ''
 | 
					suggestion_xpath = ''
 | 
				
			||||||
results_xpath = ''
 | 
					results_xpath = ''
 | 
				
			||||||
 | 
					cached_xpath = ''
 | 
				
			||||||
 | 
					cached_url = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# parameters for engines with paging support
 | 
					# parameters for engines with paging support
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
@ -36,6 +38,8 @@ def request(query, params):
 | 
				
			|||||||
def response(resp):
 | 
					def response(resp):
 | 
				
			||||||
    results = []
 | 
					    results = []
 | 
				
			||||||
    dom = html.fromstring(resp.text)
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					    is_onion = True if 'onions' in categories else False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if results_xpath:
 | 
					    if results_xpath:
 | 
				
			||||||
        for result in eval_xpath(dom, results_xpath):
 | 
					        for result in eval_xpath(dom, results_xpath):
 | 
				
			||||||
            url = extract_url(eval_xpath(result, url_xpath), search_url)
 | 
					            url = extract_url(eval_xpath(result, url_xpath), search_url)
 | 
				
			||||||
@ -49,15 +53,33 @@ def response(resp):
 | 
				
			|||||||
                if len(thumbnail_xpath_result) > 0:
 | 
					                if len(thumbnail_xpath_result) > 0:
 | 
				
			||||||
                    tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
 | 
					                    tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # add alternative cached url if available
 | 
				
			||||||
 | 
					            if cached_xpath:
 | 
				
			||||||
 | 
					                tmp_result['cached_url'] = cached_url + extract_text(result.xpath(cached_xpath))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if is_onion:
 | 
				
			||||||
 | 
					                tmp_result['is_onion'] = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            results.append(tmp_result)
 | 
					            results.append(tmp_result)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        for url, title, content in zip(
 | 
					        if cached_xpath:
 | 
				
			||||||
            (extract_url(x, search_url) for
 | 
					            for url, title, content, cached in zip(
 | 
				
			||||||
             x in eval_xpath(dom, url_xpath)),
 | 
					                (extract_url(x, search_url) for
 | 
				
			||||||
            map(extract_text, eval_xpath(dom, title_xpath)),
 | 
					                 x in dom.xpath(url_xpath)),
 | 
				
			||||||
            map(extract_text, eval_xpath(dom, content_xpath))
 | 
					                map(extract_text, dom.xpath(title_xpath)),
 | 
				
			||||||
        ):
 | 
					                map(extract_text, dom.xpath(content_xpath)),
 | 
				
			||||||
            results.append({'url': url, 'title': title, 'content': content})
 | 
					                map(extract_text, dom.xpath(cached_xpath))
 | 
				
			||||||
 | 
					            ):
 | 
				
			||||||
 | 
					                results.append({'url': url, 'title': title, 'content': content,
 | 
				
			||||||
 | 
					                                'cached_url': cached_url + cached, 'is_onion': is_onion})
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            for url, title, content in zip(
 | 
				
			||||||
 | 
					                (extract_url(x, search_url) for
 | 
				
			||||||
 | 
					                 x in dom.xpath(url_xpath)),
 | 
				
			||||||
 | 
					                map(extract_text, dom.xpath(title_xpath)),
 | 
				
			||||||
 | 
					                map(extract_text, dom.xpath(content_xpath))
 | 
				
			||||||
 | 
					            ):
 | 
				
			||||||
 | 
					                results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not suggestion_xpath:
 | 
					    if not suggestion_xpath:
 | 
				
			||||||
        return results
 | 
					        return results
 | 
				
			||||||
 | 
				
			|||||||
@ -28,6 +28,7 @@ from searx import logger, settings, static_path
 | 
				
			|||||||
logger = logger.getChild('plugins')
 | 
					logger = logger.getChild('plugins')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx.plugins import (oa_doi_rewrite,
 | 
					from searx.plugins import (oa_doi_rewrite,
 | 
				
			||||||
 | 
					                           ahmia_filter,
 | 
				
			||||||
                           hash_plugin,
 | 
					                           hash_plugin,
 | 
				
			||||||
                           https_rewrite,
 | 
					                           https_rewrite,
 | 
				
			||||||
                           infinite_scroll,
 | 
					                           infinite_scroll,
 | 
				
			||||||
@ -181,3 +182,7 @@ if 'enabled_plugins' in settings:
 | 
				
			|||||||
            plugin.default_on = True
 | 
					            plugin.default_on = True
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            plugin.default_on = False
 | 
					            plugin.default_on = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# load tor specific plugins
 | 
				
			||||||
 | 
					if settings['outgoing'].get('using_tor_proxy'):
 | 
				
			||||||
 | 
					    plugins.register(ahmia_filter)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										36
									
								
								searx/plugins/ahmia_filter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								searx/plugins/ahmia_filter.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,36 @@
 | 
				
			|||||||
 | 
					'''
 | 
				
			||||||
 | 
					 SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from hashlib import md5
 | 
				
			||||||
 | 
					from os.path import join
 | 
				
			||||||
 | 
					from urllib.parse import urlparse
 | 
				
			||||||
 | 
					from searx import searx_dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					name = "Ahmia blacklist"
 | 
				
			||||||
 | 
					description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
 | 
				
			||||||
 | 
					default_on = True
 | 
				
			||||||
 | 
					preference_section = 'onions'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ahmia_blacklist = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_ahmia_blacklist():
 | 
				
			||||||
 | 
					    global ahmia_blacklist
 | 
				
			||||||
 | 
					    if not ahmia_blacklist:
 | 
				
			||||||
 | 
					        with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
 | 
				
			||||||
 | 
					            ahmia_blacklist = f.read().split()
 | 
				
			||||||
 | 
					    return ahmia_blacklist
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def not_blacklisted(result):
 | 
				
			||||||
 | 
					    if not result.get('is_onion'):
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					    result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
 | 
				
			||||||
 | 
					    return result_hash not in get_ahmia_blacklist()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def post_search(request, search):
 | 
				
			||||||
 | 
					    filtered_results = list(filter(not_blacklisted, search.result_container._merged_results))
 | 
				
			||||||
 | 
					    search.result_container._merged_results = filtered_results
 | 
				
			||||||
 | 
					    return True
 | 
				
			||||||
@ -60,8 +60,10 @@ outgoing: # communication with search engines
 | 
				
			|||||||
# see http://docs.python-requests.org/en/latest/user/advanced/#proxies
 | 
					# see http://docs.python-requests.org/en/latest/user/advanced/#proxies
 | 
				
			||||||
# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
 | 
					# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
 | 
				
			||||||
#    proxies :
 | 
					#    proxies :
 | 
				
			||||||
#        http : http://127.0.0.1:8080
 | 
					#        http : socks5h://127.0.0.1:9050
 | 
				
			||||||
#        https: http://127.0.0.1:8080
 | 
					#        https: socks5h://127.0.0.1:9050
 | 
				
			||||||
 | 
					#    using_tor_proxy : True
 | 
				
			||||||
 | 
					#    extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy
 | 
				
			||||||
# uncomment below section only if you have more than one network interface
 | 
					# uncomment below section only if you have more than one network interface
 | 
				
			||||||
# which can be the source of outgoing search requests
 | 
					# which can be the source of outgoing search requests
 | 
				
			||||||
#    source_ips:
 | 
					#    source_ips:
 | 
				
			||||||
@ -89,6 +91,12 @@ engines:
 | 
				
			|||||||
    shortcut: apkm
 | 
					    shortcut: apkm
 | 
				
			||||||
    disabled: True
 | 
					    disabled: True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Requires Tor
 | 
				
			||||||
 | 
					  - name : ahmia
 | 
				
			||||||
 | 
					    engine : ahmia
 | 
				
			||||||
 | 
					    categories : onions
 | 
				
			||||||
 | 
					    shortcut : ah
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : arch linux wiki
 | 
					  - name : arch linux wiki
 | 
				
			||||||
    engine : archlinux
 | 
					    engine : archlinux
 | 
				
			||||||
    shortcut : al
 | 
					    shortcut : al
 | 
				
			||||||
@ -185,7 +193,7 @@ engines:
 | 
				
			|||||||
  - name : deviantart
 | 
					  - name : deviantart
 | 
				
			||||||
    engine : deviantart
 | 
					    engine : deviantart
 | 
				
			||||||
    shortcut : da
 | 
					    shortcut : da
 | 
				
			||||||
    timeout: 3.0
 | 
					    timeout : 3.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : ddg definitions
 | 
					  - name : ddg definitions
 | 
				
			||||||
    engine : duckduckgo_definitions
 | 
					    engine : duckduckgo_definitions
 | 
				
			||||||
@ -514,6 +522,11 @@ engines:
 | 
				
			|||||||
    timeout: 5.0
 | 
					    timeout: 5.0
 | 
				
			||||||
    shortcut : npm
 | 
					    shortcut : npm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Requires Tor
 | 
				
			||||||
 | 
					  - name : not evil
 | 
				
			||||||
 | 
					    engine : not_evil
 | 
				
			||||||
 | 
					    shortcut : ne
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : nyaa
 | 
					  - name : nyaa
 | 
				
			||||||
    engine : nyaa
 | 
					    engine : nyaa
 | 
				
			||||||
    shortcut : nt
 | 
					    shortcut : nt
 | 
				
			||||||
@ -698,6 +711,18 @@ engines:
 | 
				
			|||||||
    url: https://torrentz2.eu/
 | 
					    url: https://torrentz2.eu/
 | 
				
			||||||
    timeout : 3.0
 | 
					    timeout : 3.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Requires Tor
 | 
				
			||||||
 | 
					  - name : torch
 | 
				
			||||||
 | 
					    engine : xpath
 | 
				
			||||||
 | 
					    paging : True
 | 
				
			||||||
 | 
					    search_url : http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and
 | 
				
			||||||
 | 
					    results_xpath : //table//tr
 | 
				
			||||||
 | 
					    url_xpath : ./td[2]/a
 | 
				
			||||||
 | 
					    title_xpath : ./td[2]/b
 | 
				
			||||||
 | 
					    content_xpath : ./td[2]/small
 | 
				
			||||||
 | 
					    categories : onions
 | 
				
			||||||
 | 
					    shortcut : tch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : twitter
 | 
					  - name : twitter
 | 
				
			||||||
    engine : twitter
 | 
					    engine : twitter
 | 
				
			||||||
    shortcut : tw
 | 
					    shortcut : tw
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,11 @@
 | 
				
			|||||||
<div class="result {{ result.class }}{% for e in result.engines %} {{ e }}{% endfor %}">
 | 
					<div class="result {{ result.class }}{% for e in result.engines %} {{ e }}{% endfor %}">
 | 
				
			||||||
    <h3 class="result_title">{% if "icon_"~result.engine~".ico" in favicons %}<img width="14" height="14" class="favicon" src="{{ url_for('static', filename='img/icons/icon_'+result.engine+'.ico') }}" alt="{{result.engine}}" />{% endif %}<a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3>
 | 
					    <h3 class="result_title">{% if "icon_"~result.engine~".ico" in favicons %}<img width="14" height="14" class="favicon" src="{{ url_for('static', filename='img/icons/icon_'+result.engine+'.ico') }}" alt="{{result.engine}}" />{% endif %}<a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3>
 | 
				
			||||||
    <p class="url">{{ result.pretty_url }}‎ <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('cached') }}</a>
 | 
					    <p class="url">{{ result.pretty_url }}‎
 | 
				
			||||||
 | 
					    {% if result.cached_url %}
 | 
				
			||||||
 | 
					        <a class="cache_link" href="{{ result.cached_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('cached') }}</a>
 | 
				
			||||||
 | 
					    {% elif not result.is_onion %}
 | 
				
			||||||
 | 
					        <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('cached') }}</a>
 | 
				
			||||||
 | 
					    {% endif %}
 | 
				
			||||||
    {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span>{% endif %}</p>
 | 
					    {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span>{% endif %}</p>
 | 
				
			||||||
    <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p>
 | 
					    <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p>
 | 
				
			||||||
</div>
 | 
					</div>
 | 
				
			||||||
 | 
				
			|||||||
@ -32,7 +32,11 @@
 | 
				
			|||||||
            <span class="label label-default">{{ engine }}</span>
 | 
					            <span class="label label-default">{{ engine }}</span>
 | 
				
			||||||
        {%- endfor -%}
 | 
					        {%- endfor -%}
 | 
				
			||||||
        {%- if result.url -%}
 | 
					        {%- if result.url -%}
 | 
				
			||||||
        <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
 | 
					            {% if result.cached_url %}
 | 
				
			||||||
 | 
					            <small>{{ result_link(result.cached_url, icon('link') + _('cached'), "text-info", id) }}</small>
 | 
				
			||||||
 | 
					            {% elif not result.is_onion %}
 | 
				
			||||||
 | 
					            <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
 | 
				
			||||||
 | 
					            {% endif %}
 | 
				
			||||||
        {%- endif -%}
 | 
					        {%- endif -%}
 | 
				
			||||||
        {%- if proxify -%}
 | 
					        {%- if proxify -%}
 | 
				
			||||||
        <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info", id) }}</small>
 | 
					        <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info", id) }}</small>
 | 
				
			||||||
@ -50,7 +54,11 @@
 | 
				
			|||||||
        <span class="label label-default">{{ engine }}</span>
 | 
					        <span class="label label-default">{{ engine }}</span>
 | 
				
			||||||
    {%- endfor %}
 | 
					    {%- endfor %}
 | 
				
			||||||
    {%- if result.url -%}
 | 
					    {%- if result.url -%}
 | 
				
			||||||
    <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
 | 
					        {% if result.cached_url %}
 | 
				
			||||||
 | 
					        <small>{{ result_link(result.cached_url, icon('link') + _('cached'), "text-info", id) }}</small>
 | 
				
			||||||
 | 
					        {% elif not result.is_onion %}
 | 
				
			||||||
 | 
					        <small>{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info", id) }}</small>
 | 
				
			||||||
 | 
					        {% endif %}
 | 
				
			||||||
    {%- endif -%}
 | 
					    {%- endif -%}
 | 
				
			||||||
    {% if proxify -%}
 | 
					    {% if proxify -%}
 | 
				
			||||||
    <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info", id) }}</small>
 | 
					    <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info", id) }}</small>
 | 
				
			||||||
 | 
				
			|||||||
@ -258,6 +258,7 @@
 | 
				
			|||||||
                <fieldset>
 | 
					                <fieldset>
 | 
				
			||||||
                    <div class="container-fluid">
 | 
					                    <div class="container-fluid">
 | 
				
			||||||
                        {% for plugin in plugins %}
 | 
					                        {% for plugin in plugins %}
 | 
				
			||||||
 | 
					                        {% if plugin.preference_section != 'onions' %}
 | 
				
			||||||
                        <div class="panel panel-default">
 | 
					                        <div class="panel panel-default">
 | 
				
			||||||
                            <div class="panel-heading">
 | 
					                            <div class="panel-heading">
 | 
				
			||||||
                                <h3 class="panel-title">{{ _(plugin.name) }}</h3>
 | 
					                                <h3 class="panel-title">{{ _(plugin.name) }}</h3>
 | 
				
			||||||
@ -271,6 +272,7 @@
 | 
				
			|||||||
                                </div>
 | 
					                                </div>
 | 
				
			||||||
                            </div>
 | 
					                            </div>
 | 
				
			||||||
                        </div>
 | 
					                        </div>
 | 
				
			||||||
 | 
					                        {% endif %}
 | 
				
			||||||
                        {% endfor %}
 | 
					                        {% endfor %}
 | 
				
			||||||
                    </div>
 | 
					                    </div>
 | 
				
			||||||
                </fieldset>
 | 
					                </fieldset>
 | 
				
			||||||
 | 
				
			|||||||
@ -146,6 +146,7 @@ _category_names = (gettext('files'),
 | 
				
			|||||||
                   gettext('it'),
 | 
					                   gettext('it'),
 | 
				
			||||||
                   gettext('news'),
 | 
					                   gettext('news'),
 | 
				
			||||||
                   gettext('map'),
 | 
					                   gettext('map'),
 | 
				
			||||||
 | 
					                   gettext('onions'),
 | 
				
			||||||
                   gettext('science'))
 | 
					                   gettext('science'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
outgoing_proxies = settings['outgoing'].get('proxies') or None
 | 
					outgoing_proxies = settings['outgoing'].get('proxies') or None
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										121
									
								
								tests/unit/engines/test_xpath.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								tests/unit/engines/test_xpath.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,121 @@
 | 
				
			|||||||
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					import mock
 | 
				
			||||||
 | 
					from searx.engines import xpath
 | 
				
			||||||
 | 
					from searx.testing import SearxTestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestXpathEngine(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_request(self):
 | 
				
			||||||
 | 
					        xpath.search_url = 'https://url.com/{query}'
 | 
				
			||||||
 | 
					        xpath.categories = []
 | 
				
			||||||
 | 
					        xpath.paging = False
 | 
				
			||||||
 | 
					        query = 'test_query'
 | 
				
			||||||
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
 | 
					        params = xpath.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertIn('url', params)
 | 
				
			||||||
 | 
					        self.assertEquals('https://url.com/test_query', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        xpath.search_url = 'https://url.com/q={query}&p={pageno}'
 | 
				
			||||||
 | 
					        xpath.paging = True
 | 
				
			||||||
 | 
					        query = 'test_query'
 | 
				
			||||||
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
 | 
					        dicto['pageno'] = 1
 | 
				
			||||||
 | 
					        params = xpath.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertIn('url', params)
 | 
				
			||||||
 | 
					        self.assertEquals('https://url.com/q=test_query&p=1', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_response(self):
 | 
				
			||||||
 | 
					        # without results_xpath
 | 
				
			||||||
 | 
					        xpath.url_xpath = '//div[@class="search_result"]//a[@class="result"]/@href'
 | 
				
			||||||
 | 
					        xpath.title_xpath = '//div[@class="search_result"]//a[@class="result"]'
 | 
				
			||||||
 | 
					        xpath.content_xpath = '//div[@class="search_result"]//p[@class="content"]'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, None)
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, [])
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, '')
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, '[]')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = mock.Mock(text='<html></html>')
 | 
				
			||||||
 | 
					        self.assertEqual(xpath.response(response), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        html = u"""
 | 
				
			||||||
 | 
					        <div>
 | 
				
			||||||
 | 
					            <div class="search_result">
 | 
				
			||||||
 | 
					                <a class="result" href="https://result1.com">Result 1</a>
 | 
				
			||||||
 | 
					                <p class="content">Content 1</p>
 | 
				
			||||||
 | 
					                <a class="cached" href="https://cachedresult1.com">Cache</a>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					            <div class="search_result">
 | 
				
			||||||
 | 
					                <a class="result" href="https://result2.com">Result 2</a>
 | 
				
			||||||
 | 
					                <p class="content">Content 2</p>
 | 
				
			||||||
 | 
					                <a class="cached" href="https://cachedresult2.com">Cache</a>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        response = mock.Mock(text=html)
 | 
				
			||||||
 | 
					        results = xpath.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['title'], 'Result 1')
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['url'], 'https://result1.com/')
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['content'], 'Content 1')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['title'], 'Result 2')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['url'], 'https://result2.com/')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['content'], 'Content 2')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # with cached urls, without results_xpath
 | 
				
			||||||
 | 
					        xpath.cached_xpath = '//div[@class="search_result"]//a[@class="cached"]/@href'
 | 
				
			||||||
 | 
					        results = xpath.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['cached_url'], 'https://cachedresult1.com')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['cached_url'], 'https://cachedresult2.com')
 | 
				
			||||||
 | 
					        self.assertFalse(results[0].get('is_onion', False))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # results are onion urls (no results_xpath)
 | 
				
			||||||
 | 
					        xpath.categories = ['onions']
 | 
				
			||||||
 | 
					        results = xpath.response(response)
 | 
				
			||||||
 | 
					        self.assertTrue(results[0]['is_onion'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # with results_xpath
 | 
				
			||||||
 | 
					        xpath.results_xpath = '//div[@class="search_result"]'
 | 
				
			||||||
 | 
					        xpath.url_xpath = './/a[@class="result"]/@href'
 | 
				
			||||||
 | 
					        xpath.title_xpath = './/a[@class="result"]'
 | 
				
			||||||
 | 
					        xpath.content_xpath = './/p[@class="content"]'
 | 
				
			||||||
 | 
					        xpath.cached_xpath = None
 | 
				
			||||||
 | 
					        xpath.categories = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, None)
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, [])
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, '')
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, xpath.response, '[]')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = mock.Mock(text='<html></html>')
 | 
				
			||||||
 | 
					        self.assertEqual(xpath.response(response), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = mock.Mock(text=html)
 | 
				
			||||||
 | 
					        results = xpath.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['title'], 'Result 1')
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['url'], 'https://result1.com/')
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['content'], 'Content 1')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['title'], 'Result 2')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['url'], 'https://result2.com/')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['content'], 'Content 2')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # with cached urls, with results_xpath
 | 
				
			||||||
 | 
					        xpath.cached_xpath = './/a[@class="cached"]/@href'
 | 
				
			||||||
 | 
					        results = xpath.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['cached_url'], 'https://cachedresult1.com')
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['cached_url'], 'https://cachedresult2.com')
 | 
				
			||||||
 | 
					        self.assertFalse(results[0].get('is_onion', False))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # results are onion urls (with results_xpath)
 | 
				
			||||||
 | 
					        xpath.categories = ['onions']
 | 
				
			||||||
 | 
					        results = xpath.response(response)
 | 
				
			||||||
 | 
					        self.assertTrue(results[0]['is_onion'])
 | 
				
			||||||
							
								
								
									
										44
									
								
								tests/unit/test_engines_init.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								tests/unit/test_engines_init.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					from searx.testing import SearxTestCase
 | 
				
			||||||
 | 
					from searx import settings, engines
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestEnginesInit(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def tearDownClass(cls):
 | 
				
			||||||
 | 
					        settings['outgoing']['using_tor_proxy'] = False
 | 
				
			||||||
 | 
					        settings['outgoing']['extra_proxy_timeout'] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_initialize_engines_default(self):
 | 
				
			||||||
 | 
					        engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'},
 | 
				
			||||||
 | 
					                       {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        engines.initialize_engines(engine_list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(engines.engines), 2)
 | 
				
			||||||
 | 
					        self.assertIn('engine1', engines.engines)
 | 
				
			||||||
 | 
					        self.assertIn('engine2', engines.engines)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_initialize_engines_exclude_onions(self):
 | 
				
			||||||
 | 
					        settings['outgoing']['using_tor_proxy'] = False
 | 
				
			||||||
 | 
					        engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'},
 | 
				
			||||||
 | 
					                       {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        engines.initialize_engines(engine_list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(engines.engines), 1)
 | 
				
			||||||
 | 
					        self.assertIn('engine1', engines.engines)
 | 
				
			||||||
 | 
					        self.assertNotIn('onions', engines.categories)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_initialize_engines_include_onions(self):
 | 
				
			||||||
 | 
					        settings['outgoing']['using_tor_proxy'] = True
 | 
				
			||||||
 | 
					        settings['outgoing']['extra_proxy_timeout'] = 100.0
 | 
				
			||||||
 | 
					        engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general',
 | 
				
			||||||
 | 
					                        'timeout': 20.0, 'onion_url': 'http://engine1.onion'},
 | 
				
			||||||
 | 
					                       {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        engines.initialize_engines(engine_list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(engines.engines), 2)
 | 
				
			||||||
 | 
					        self.assertIn('engine1', engines.engines)
 | 
				
			||||||
 | 
					        self.assertIn('engine2', engines.engines)
 | 
				
			||||||
 | 
					        self.assertIn('onions', engines.categories)
 | 
				
			||||||
 | 
					        self.assertIn('http://engine1.onion', engines.engines['engine1'].search_url)
 | 
				
			||||||
 | 
					        self.assertEqual(engines.engines['engine1'].timeout, 120.0)
 | 
				
			||||||
							
								
								
									
										33
									
								
								utils/fetch_ahmia_blacklist.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										33
									
								
								utils/fetch_ahmia_blacklist.py
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# This script saves Ahmia's blacklist for onion sites.
 | 
				
			||||||
 | 
					# More info in https://ahmia.fi/blacklist/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# set path
 | 
				
			||||||
 | 
					from sys import path
 | 
				
			||||||
 | 
					from os.path import realpath, dirname, join
 | 
				
			||||||
 | 
					path.append(realpath(dirname(realpath(__file__)) + '/../'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from searx import searx_dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = 'https://ahmia.fi/blacklist/banned/'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fetch_ahmia_blacklist():
 | 
				
			||||||
 | 
					    resp = requests.get(URL, timeout=3.0)
 | 
				
			||||||
 | 
					    if resp.status_code != 200:
 | 
				
			||||||
 | 
					        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        blacklist = resp.text.split()
 | 
				
			||||||
 | 
					        return blacklist
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_ahmia_blacklist_filename():
 | 
				
			||||||
 | 
					    return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					blacklist = fetch_ahmia_blacklist()
 | 
				
			||||||
 | 
					with open(get_ahmia_blacklist_filename(), "w") as f:
 | 
				
			||||||
 | 
					    f.write('\n'.join(blacklist))
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user