mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	Merge pull request #30 from matejc/smallissues
fix: robot fw, entry points, some flake8, package searx egg
This commit is contained in:
		
						commit
						ffc93ba256
					
				
							
								
								
									
										20
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,20 +1,24 @@
 | 
				
			|||||||
env
 | 
					 | 
				
			||||||
engines.cfg
 | 
					 | 
				
			||||||
.installed.cfg
 | 
					 | 
				
			||||||
.coverage
 | 
					.coverage
 | 
				
			||||||
coverage/
 | 
					.installed.cfg
 | 
				
			||||||
 | 
					engines.cfg
 | 
				
			||||||
 | 
					env
 | 
				
			||||||
 | 
					robot_log.html
 | 
				
			||||||
 | 
					robot_output.xml
 | 
				
			||||||
 | 
					robot_report.html
 | 
				
			||||||
setup.cfg
 | 
					setup.cfg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
*.pyc
 | 
					*.pyc
 | 
				
			||||||
*/*.pyc
 | 
					*/*.pyc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bin/
 | 
					bin/
 | 
				
			||||||
 | 
					build/
 | 
				
			||||||
 | 
					covearge/
 | 
				
			||||||
 | 
					develop-eggs/
 | 
				
			||||||
 | 
					dist/
 | 
				
			||||||
 | 
					eggs/
 | 
				
			||||||
include/
 | 
					include/
 | 
				
			||||||
lib/
 | 
					lib/
 | 
				
			||||||
build/
 | 
					 | 
				
			||||||
develop-eggs/
 | 
					 | 
				
			||||||
eggs/
 | 
					 | 
				
			||||||
local/
 | 
					local/
 | 
				
			||||||
searx.egg-info/
 | 
					 | 
				
			||||||
parts/
 | 
					parts/
 | 
				
			||||||
 | 
					searx.egg-info/
 | 
				
			||||||
var/
 | 
					var/
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										12
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								Makefile
									
									
									
									
									
								
							@ -21,11 +21,7 @@ $(python):
 | 
				
			|||||||
tests: .installed.cfg
 | 
					tests: .installed.cfg
 | 
				
			||||||
	@bin/test
 | 
						@bin/test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enginescfg:
 | 
					robot: .installed.cfg
 | 
				
			||||||
	@test -f ./engines.cfg || echo "Copying engines.cfg ..."
 | 
					 | 
				
			||||||
	@cp --no-clobber engines.cfg_sample engines.cfg
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
robot: .installed.cfg enginescfg
 | 
					 | 
				
			||||||
	@bin/robot
 | 
						@bin/robot
 | 
				
			||||||
 | 
					
 | 
				
			||||||
flake8: .installed.cfg
 | 
					flake8: .installed.cfg
 | 
				
			||||||
@ -37,18 +33,18 @@ coverage: .installed.cfg
 | 
				
			|||||||
	@bin/coverage report --show-missing
 | 
						@bin/coverage report --show-missing
 | 
				
			||||||
	@bin/coverage html --directory ./coverage
 | 
						@bin/coverage html --directory ./coverage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
production: bin/buildout production.cfg setup.py enginescfg
 | 
					production: bin/buildout production.cfg setup.py
 | 
				
			||||||
	bin/buildout -c production.cfg $(options)
 | 
						bin/buildout -c production.cfg $(options)
 | 
				
			||||||
	@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
 | 
						@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
 | 
				
			||||||
	@echo "* Hint 1: on production, disable debug mode and change secret_key"
 | 
						@echo "* Hint 1: on production, disable debug mode and change secret_key"
 | 
				
			||||||
	@echo "* Hint 2: searx will be executed at server startup by crontab"
 | 
						@echo "* Hint 2: searx will be executed at server startup by crontab"
 | 
				
			||||||
	@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
 | 
						@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
minimal: bin/buildout minimal.cfg setup.py enginescfg
 | 
					minimal: bin/buildout minimal.cfg setup.py
 | 
				
			||||||
	bin/buildout -c minimal.cfg $(options)
 | 
						bin/buildout -c minimal.cfg $(options)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
clean:
 | 
					clean:
 | 
				
			||||||
	@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
 | 
						@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
 | 
				
			||||||
		searx.egg-info lib include .coverage coverage
 | 
							searx.egg-info lib include .coverage coverage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.PHONY: all tests enginescfg robot flake8 coverage production minimal clean
 | 
					.PHONY: all tests robot flake8 coverage production minimal clean
 | 
				
			||||||
 | 
				
			|||||||
@ -16,8 +16,6 @@ recipe = zc.recipe.egg:script
 | 
				
			|||||||
eggs = ${buildout:eggs}
 | 
					eggs = ${buildout:eggs}
 | 
				
			||||||
interpreter = py
 | 
					interpreter = py
 | 
				
			||||||
dependent-scripts = true
 | 
					dependent-scripts = true
 | 
				
			||||||
entry-points =
 | 
					 | 
				
			||||||
    searx-run=searx.webapp:run
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[robot]
 | 
					[robot]
 | 
				
			||||||
 | 
				
			|||||||
@ -13,5 +13,3 @@ parts +=
 | 
				
			|||||||
recipe = zc.recipe.egg:script
 | 
					recipe = zc.recipe.egg:script
 | 
				
			||||||
eggs = ${buildout:eggs}
 | 
					eggs = ${buildout:eggs}
 | 
				
			||||||
interpreter = py
 | 
					interpreter = py
 | 
				
			||||||
entry-points =
 | 
					 | 
				
			||||||
    searx-run=searx.webapp:run
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -15,8 +15,6 @@ parts +=
 | 
				
			|||||||
recipe = zc.recipe.egg:script
 | 
					recipe = zc.recipe.egg:script
 | 
				
			||||||
eggs = ${buildout:eggs}
 | 
					eggs = ${buildout:eggs}
 | 
				
			||||||
interpreter = py
 | 
					interpreter = py
 | 
				
			||||||
entry-points =
 | 
					 | 
				
			||||||
    searx-run=searx.webapp:run
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[supervisor]
 | 
					[supervisor]
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,5 @@
 | 
				
			|||||||
from os import environ
 | 
					from os import environ
 | 
				
			||||||
from os.path import realpath, dirname, join
 | 
					from os.path import realpath, dirname, join, abspath
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    from yaml import load
 | 
					    from yaml import load
 | 
				
			||||||
except:
 | 
					except:
 | 
				
			||||||
@ -7,8 +7,7 @@ except:
 | 
				
			|||||||
    stderr.write('[E] install pyyaml\n')
 | 
					    stderr.write('[E] install pyyaml\n')
 | 
				
			||||||
    exit(2)
 | 
					    exit(2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					searx_dir = abspath(dirname(__file__))
 | 
				
			||||||
searx_dir  = realpath(dirname(realpath(__file__))+'/../')
 | 
					 | 
				
			||||||
engine_dir = dirname(realpath(__file__))
 | 
					engine_dir = dirname(realpath(__file__))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if 'SEARX_SETTINGS_PATH' in environ:
 | 
					if 'SEARX_SETTINGS_PATH' in environ:
 | 
				
			||||||
@ -19,4 +18,3 @@ else:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
with open(settings_path) as settings_yaml:
 | 
					with open(settings_path) as settings_yaml:
 | 
				
			||||||
    settings = load(settings_yaml)
 | 
					    settings = load(settings_yaml)
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -35,6 +35,7 @@ engines = {}
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
categories = {'general': []}
 | 
					categories = {'general': []}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_module(filename):
 | 
					def load_module(filename):
 | 
				
			||||||
    modname = splitext(filename)[0]
 | 
					    modname = splitext(filename)[0]
 | 
				
			||||||
    if modname in sys.modules:
 | 
					    if modname in sys.modules:
 | 
				
			||||||
@ -50,7 +51,7 @@ if not 'engines' in settings or not settings['engines']:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
for engine_data in settings['engines']:
 | 
					for engine_data in settings['engines']:
 | 
				
			||||||
    engine_name = engine_data['engine']
 | 
					    engine_name = engine_data['engine']
 | 
				
			||||||
    engine = load_module(engine_name+'.py')
 | 
					    engine = load_module(engine_name + '.py')
 | 
				
			||||||
    for param_name in engine_data:
 | 
					    for param_name in engine_data:
 | 
				
			||||||
        if param_name == 'engine':
 | 
					        if param_name == 'engine':
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
@ -58,38 +59,50 @@ for engine_data in settings['engines']:
 | 
				
			|||||||
            if engine_data['categories'] == 'none':
 | 
					            if engine_data['categories'] == 'none':
 | 
				
			||||||
                engine.categories = []
 | 
					                engine.categories = []
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                engine.categories = map(str.strip, engine_data['categories'].split(','))
 | 
					                engine.categories = map(
 | 
				
			||||||
 | 
					                    str.strip, engine_data['categories'].split(','))
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        setattr(engine, param_name, engine_data[param_name])
 | 
					        setattr(engine, param_name, engine_data[param_name])
 | 
				
			||||||
    for engine_attr in dir(engine):
 | 
					    for engine_attr in dir(engine):
 | 
				
			||||||
        if engine_attr.startswith('_'):
 | 
					        if engine_attr.startswith('_'):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        if getattr(engine, engine_attr) == None:
 | 
					        if getattr(engine, engine_attr) == None:
 | 
				
			||||||
            print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)
 | 
					            print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)  # noqa
 | 
				
			||||||
            sys.exit(1)
 | 
					            sys.exit(1)
 | 
				
			||||||
    engines[engine.name] = engine
 | 
					    engines[engine.name] = engine
 | 
				
			||||||
    engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0}
 | 
					    engine.stats = {
 | 
				
			||||||
 | 
					        'result_count': 0,
 | 
				
			||||||
 | 
					        'search_count': 0,
 | 
				
			||||||
 | 
					        'page_load_time': 0,
 | 
				
			||||||
 | 
					        'score_count': 0,
 | 
				
			||||||
 | 
					        'errors': 0
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    if hasattr(engine, 'categories'):
 | 
					    if hasattr(engine, 'categories'):
 | 
				
			||||||
        for category_name in engine.categories:
 | 
					        for category_name in engine.categories:
 | 
				
			||||||
            categories.setdefault(category_name, []).append(engine)
 | 
					            categories.setdefault(category_name, []).append(engine)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        categories['general'].append(engine)
 | 
					        categories['general'].append(engine)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def default_request_params():
 | 
					def default_request_params():
 | 
				
			||||||
    return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
 | 
					    return {
 | 
				
			||||||
 | 
					        'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def make_callback(engine_name, results, suggestions, callback, params):
 | 
					def make_callback(engine_name, results, suggestions, callback, params):
 | 
				
			||||||
    # creating a callback wrapper for the search engine results
 | 
					    # creating a callback wrapper for the search engine results
 | 
				
			||||||
    def process_callback(response, **kwargs):
 | 
					    def process_callback(response, **kwargs):
 | 
				
			||||||
        cb_res = []
 | 
					        cb_res = []
 | 
				
			||||||
        response.search_params = params
 | 
					        response.search_params = params
 | 
				
			||||||
        engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds()
 | 
					        engines[engine_name].stats['page_load_time'] += \
 | 
				
			||||||
 | 
					            (datetime.now() - params['started']).total_seconds()
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            search_results = callback(response)
 | 
					            search_results = callback(response)
 | 
				
			||||||
        except Exception, e:
 | 
					        except Exception, e:
 | 
				
			||||||
            engines[engine_name].stats['errors'] += 1
 | 
					            engines[engine_name].stats['errors'] += 1
 | 
				
			||||||
            results[engine_name] = cb_res
 | 
					            results[engine_name] = cb_res
 | 
				
			||||||
            print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e))
 | 
					            print '[E] Error with engine "{0}":\n\t{1}'.format(
 | 
				
			||||||
 | 
					                engine_name, str(e))
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        for result in search_results:
 | 
					        for result in search_results:
 | 
				
			||||||
            result['engine'] = engine_name
 | 
					            result['engine'] = engine_name
 | 
				
			||||||
@ -101,23 +114,25 @@ def make_callback(engine_name, results, suggestions, callback, params):
 | 
				
			|||||||
        results[engine_name] = cb_res
 | 
					        results[engine_name] = cb_res
 | 
				
			||||||
    return process_callback
 | 
					    return process_callback
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def score_results(results):
 | 
					def score_results(results):
 | 
				
			||||||
    flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
 | 
					    flat_res = filter(
 | 
				
			||||||
 | 
					        None, chain.from_iterable(izip_longest(*results.values())))
 | 
				
			||||||
    flat_len = len(flat_res)
 | 
					    flat_len = len(flat_res)
 | 
				
			||||||
    engines_len = len(results)
 | 
					    engines_len = len(results)
 | 
				
			||||||
    results = []
 | 
					    results = []
 | 
				
			||||||
    # deduplication + scoring
 | 
					    # deduplication + scoring
 | 
				
			||||||
    for i,res in enumerate(flat_res):
 | 
					    for i, res in enumerate(flat_res):
 | 
				
			||||||
        res['parsed_url'] = urlparse(res['url'])
 | 
					        res['parsed_url'] = urlparse(res['url'])
 | 
				
			||||||
        res['engines'] = [res['engine']]
 | 
					        res['engines'] = [res['engine']]
 | 
				
			||||||
        weight = 1.0
 | 
					        weight = 1.0
 | 
				
			||||||
        if hasattr(engines[res['engine']], 'weight'):
 | 
					        if hasattr(engines[res['engine']], 'weight'):
 | 
				
			||||||
            weight = float(engines[res['engine']].weight)
 | 
					            weight = float(engines[res['engine']].weight)
 | 
				
			||||||
        score = int((flat_len - i)/engines_len)*weight+1
 | 
					        score = int((flat_len - i) / engines_len) * weight + 1
 | 
				
			||||||
        duplicated = False
 | 
					        duplicated = False
 | 
				
			||||||
        for new_res in results:
 | 
					        for new_res in results:
 | 
				
			||||||
            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path
 | 
					            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
 | 
				
			||||||
            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path
 | 
					            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
 | 
				
			||||||
            if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
 | 
					            if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
 | 
				
			||||||
               p1 == p2 and\
 | 
					               p1 == p2 and\
 | 
				
			||||||
               res['parsed_url'].query == new_res['parsed_url'].query and\
 | 
					               res['parsed_url'].query == new_res['parsed_url'].query and\
 | 
				
			||||||
@ -125,7 +140,7 @@ def score_results(results):
 | 
				
			|||||||
                duplicated = new_res
 | 
					                duplicated = new_res
 | 
				
			||||||
                break
 | 
					                break
 | 
				
			||||||
        if duplicated:
 | 
					        if duplicated:
 | 
				
			||||||
            if len(res.get('content', '')) > len(duplicated.get('content', '')):
 | 
					            if len(res.get('content', '')) > len(duplicated.get('content', '')):  # noqa
 | 
				
			||||||
                duplicated['content'] = res['content']
 | 
					                duplicated['content'] = res['content']
 | 
				
			||||||
            duplicated['score'] += score
 | 
					            duplicated['score'] += score
 | 
				
			||||||
            duplicated['engines'].append(res['engine'])
 | 
					            duplicated['engines'].append(res['engine'])
 | 
				
			||||||
@ -139,6 +154,7 @@ def score_results(results):
 | 
				
			|||||||
            results.append(res)
 | 
					            results.append(res)
 | 
				
			||||||
    return sorted(results, key=itemgetter('score'), reverse=True)
 | 
					    return sorted(results, key=itemgetter('score'), reverse=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def search(query, request, selected_engines):
 | 
					def search(query, request, selected_engines):
 | 
				
			||||||
    global engines, categories, number_of_searches
 | 
					    global engines, categories, number_of_searches
 | 
				
			||||||
    requests = []
 | 
					    requests = []
 | 
				
			||||||
@ -160,13 +176,20 @@ def search(query, request, selected_engines):
 | 
				
			|||||||
        request_params['started'] = datetime.now()
 | 
					        request_params['started'] = datetime.now()
 | 
				
			||||||
        request_params = engine.request(query, request_params)
 | 
					        request_params = engine.request(query, request_params)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params)
 | 
					        callback = make_callback(
 | 
				
			||||||
 | 
					            selected_engine['name'],
 | 
				
			||||||
 | 
					            results,
 | 
				
			||||||
 | 
					            suggestions,
 | 
				
			||||||
 | 
					            engine.response,
 | 
				
			||||||
 | 
					            request_params
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        request_args = dict(headers = request_params['headers']
 | 
					        request_args = dict(
 | 
				
			||||||
                           ,hooks   = dict(response=callback)
 | 
					            headers=request_params['headers'],
 | 
				
			||||||
                           ,cookies = request_params['cookies']
 | 
					            hooks=dict(response=callback),
 | 
				
			||||||
                           ,timeout = settings['server']['request_timeout']
 | 
					            cookies=request_params['cookies'],
 | 
				
			||||||
                           )
 | 
					            timeout=settings['server']['request_timeout']
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if request_params['method'] == 'GET':
 | 
					        if request_params['method'] == 'GET':
 | 
				
			||||||
            req = grequests.get
 | 
					            req = grequests.get
 | 
				
			||||||
@ -180,7 +203,7 @@ def search(query, request, selected_engines):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        requests.append(req(request_params['url'], **request_args))
 | 
					        requests.append(req(request_params['url'], **request_args))
 | 
				
			||||||
    grequests.map(requests)
 | 
					    grequests.map(requests)
 | 
				
			||||||
    for engine_name,engine_results in results.items():
 | 
					    for engine_name, engine_results in results.items():
 | 
				
			||||||
        engines[engine_name].stats['search_count'] += 1
 | 
					        engines[engine_name].stats['search_count'] += 1
 | 
				
			||||||
        engines[engine_name].stats['result_count'] += len(engine_results)
 | 
					        engines[engine_name].stats['result_count'] += len(engine_results)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -192,6 +215,7 @@ def search(query, request, selected_engines):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return results, suggestions
 | 
					    return results, suggestions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_engines_stats():
 | 
					def get_engines_stats():
 | 
				
			||||||
    # TODO refactor
 | 
					    # TODO refactor
 | 
				
			||||||
    pageloads = []
 | 
					    pageloads = []
 | 
				
			||||||
@ -200,14 +224,15 @@ def get_engines_stats():
 | 
				
			|||||||
    errors = []
 | 
					    errors = []
 | 
				
			||||||
    scores_per_result = []
 | 
					    scores_per_result = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0
 | 
					    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
 | 
				
			||||||
    for engine in engines.values():
 | 
					    for engine in engines.values():
 | 
				
			||||||
        if engine.stats['search_count'] == 0:
 | 
					        if engine.stats['search_count'] == 0:
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        results_num = engine.stats['result_count']/float(engine.stats['search_count'])
 | 
					        results_num = \
 | 
				
			||||||
        load_times  = engine.stats['page_load_time']/float(engine.stats['search_count'])
 | 
					            engine.stats['result_count'] / float(engine.stats['search_count'])
 | 
				
			||||||
 | 
					        load_times = engine.stats['page_load_time'] / float(engine.stats['search_count'])  # noqa
 | 
				
			||||||
        if results_num:
 | 
					        if results_num:
 | 
				
			||||||
            score = engine.stats['score_count'] / float(engine.stats['search_count'])
 | 
					            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
 | 
				
			||||||
            score_per_result = score / results_num
 | 
					            score_per_result = score / results_num
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            score = score_per_result = 0.0
 | 
					            score = score_per_result = 0.0
 | 
				
			||||||
@ -220,30 +245,39 @@ def get_engines_stats():
 | 
				
			|||||||
        results.append({'avg': results_num, 'name': engine.name})
 | 
					        results.append({'avg': results_num, 'name': engine.name})
 | 
				
			||||||
        scores.append({'avg': score, 'name': engine.name})
 | 
					        scores.append({'avg': score, 'name': engine.name})
 | 
				
			||||||
        errors.append({'avg': engine.stats['errors'], 'name': engine.name})
 | 
					        errors.append({'avg': engine.stats['errors'], 'name': engine.name})
 | 
				
			||||||
        scores_per_result.append({'avg': score_per_result, 'name': engine.name})
 | 
					        scores_per_result.append({
 | 
				
			||||||
 | 
					            'avg': score_per_result,
 | 
				
			||||||
 | 
					            'name': engine.name
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine in pageloads:
 | 
					    for engine in pageloads:
 | 
				
			||||||
        engine['percentage'] = int(engine['avg']/max_pageload*100)
 | 
					        engine['percentage'] = int(engine['avg'] / max_pageload * 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine in results:
 | 
					    for engine in results:
 | 
				
			||||||
        engine['percentage'] = int(engine['avg']/max_results*100)
 | 
					        engine['percentage'] = int(engine['avg'] / max_results * 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine in scores:
 | 
					    for engine in scores:
 | 
				
			||||||
        engine['percentage'] = int(engine['avg']/max_score*100)
 | 
					        engine['percentage'] = int(engine['avg'] / max_score * 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine in scores_per_result:
 | 
					    for engine in scores_per_result:
 | 
				
			||||||
        engine['percentage'] = int(engine['avg']/max_score_per_result*100)
 | 
					        engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine in errors:
 | 
					    for engine in errors:
 | 
				
			||||||
        if max_errors:
 | 
					        if max_errors:
 | 
				
			||||||
            engine['percentage'] = int(float(engine['avg'])/max_errors*100)
 | 
					            engine['percentage'] = int(float(engine['avg']) / max_errors * 100)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            engine['percentage'] = 0
 | 
					            engine['percentage'] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return [
 | 
				
			||||||
    return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg')))
 | 
					        ('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))),
 | 
				
			||||||
           ,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True))
 | 
					        (
 | 
				
			||||||
           ,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True))
 | 
					            'Number of results',
 | 
				
			||||||
           ,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True))
 | 
					            sorted(results, key=itemgetter('avg'), reverse=True)
 | 
				
			||||||
           ,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True))
 | 
					        ),
 | 
				
			||||||
           ]
 | 
					        ('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            'Scores per result',
 | 
				
			||||||
 | 
					            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        ('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
				
			|||||||
@ -4,11 +4,12 @@ from cgi import escape
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
base_url = 'http://www.bing.com/'
 | 
					base_url = 'http://www.bing.com/'
 | 
				
			||||||
search_string = 'search?{query}'
 | 
					search_string = 'search?{query}'
 | 
				
			||||||
locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
 | 
					locale = 'en-US'  # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
    search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale}))
 | 
					    search_path = search_string.format(
 | 
				
			||||||
 | 
					        query=urlencode({'q': query, 'setmkt': locale}))
 | 
				
			||||||
    #if params['category'] == 'images':
 | 
					    #if params['category'] == 'images':
 | 
				
			||||||
    #    params['url'] = base_url + 'images/' + search_path
 | 
					    #    params['url'] = base_url + 'images/' + search_path
 | 
				
			||||||
    params['url'] = base_url + search_path
 | 
					    params['url'] = base_url + search_path
 | 
				
			||||||
 | 
				
			|||||||
@ -7,6 +7,7 @@ weight = 100
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
 | 
					parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
    m = parser_re.match(query)
 | 
					    m = parser_re.match(query)
 | 
				
			||||||
    if not m:
 | 
					    if not m:
 | 
				
			||||||
@ -19,7 +20,7 @@ def request(query, params):
 | 
				
			|||||||
        # wrong params
 | 
					        # wrong params
 | 
				
			||||||
        return params
 | 
					        return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    q = (from_currency+to_currency).upper()
 | 
					    q = (from_currency + to_currency).upper()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params['url'] = url.format(query=q)
 | 
					    params['url'] = url.format(query=q)
 | 
				
			||||||
    params['ammount'] = ammount
 | 
					    params['ammount'] = ammount
 | 
				
			||||||
@ -33,25 +34,27 @@ def response(resp):
 | 
				
			|||||||
    global base_url
 | 
					    global base_url
 | 
				
			||||||
    results = []
 | 
					    results = []
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        _,conversion_rate,_ = resp.text.split(',', 2)
 | 
					        _, conversion_rate, _ = resp.text.split(',', 2)
 | 
				
			||||||
        conversion_rate = float(conversion_rate)
 | 
					        conversion_rate = float(conversion_rate)
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        return results
 | 
					        return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount']
 | 
					    title = '{0} {1} in {2} is {3}'.format(
 | 
				
			||||||
                                          ,resp.search_params['from']
 | 
					        resp.search_params['ammount'],
 | 
				
			||||||
                                          ,resp.search_params['to']
 | 
					        resp.search_params['from'],
 | 
				
			||||||
                                          ,resp.search_params['ammount']*conversion_rate
 | 
					        resp.search_params['to'],
 | 
				
			||||||
                                          )
 | 
					        resp.search_params['ammount'] * conversion_rate
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
 | 
					    content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
 | 
				
			||||||
    now_date = datetime.now().strftime('%Y%m%d')
 | 
					    now_date = datetime.now().strftime('%Y%m%d')
 | 
				
			||||||
    url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
 | 
					    url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
 | 
				
			||||||
    url = url.format(now_date
 | 
					    url = url.format(
 | 
				
			||||||
                    ,resp.search_params['ammount']
 | 
					        now_date,
 | 
				
			||||||
                    ,resp.search_params['from'].lower()
 | 
					        resp.search_params['ammount'],
 | 
				
			||||||
                    ,resp.search_params['to'].lower()
 | 
					        resp.search_params['from'].lower(),
 | 
				
			||||||
                    )
 | 
					        resp.search_params['to'].lower()
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    results.append({'title': title, 'content': content, 'url': url})
 | 
					    results.append({'title': title, 'content': content, 'url': url})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
				
			|||||||
@ -8,9 +8,11 @@ locale = 'en_US'
 | 
				
			|||||||
# see http://www.dailymotion.com/doc/api/obj-video.html
 | 
					# see http://www.dailymotion.com/doc/api/obj-video.html
 | 
				
			||||||
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
 | 
					search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
    global search_url
 | 
					    global search_url
 | 
				
			||||||
    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
 | 
					    params['url'] = search_url.format(
 | 
				
			||||||
 | 
					        query=urlencode({'search': query, 'localization': locale}))
 | 
				
			||||||
    return params
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -32,6 +34,7 @@ def response(resp):
 | 
				
			|||||||
        results.append({'url': url, 'title': title, 'content': content})
 | 
					        results.append({'url': url, 'title': title, 'content': content})
 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def text_content_from_html(html_string):
 | 
					def text_content_from_html(html_string):
 | 
				
			||||||
    desc_html = html.fragment_fromstring(html_string, create_parent=True)
 | 
					    desc_html = html.fragment_fromstring(html_string, create_parent=True)
 | 
				
			||||||
    return desc_html.text_content()
 | 
					    return desc_html.text_content()
 | 
				
			||||||
 | 
				
			|||||||
@ -1,16 +0,0 @@
 | 
				
			|||||||
 | 
					 | 
				
			||||||
port = 11111
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
secret_key = "ultrasecretkey" # change this!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
debug = False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
request_timeout = 5.0 # seconds
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
weights = {} # 'search_engine_name': float(weight) | default is 1.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
blacklist = [] # search engine blacklist
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
categories = {} # custom search engine categories
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
base_url = None # "https://your.domain.tld/" or None (to use request parameters)
 | 
					 | 
				
			||||||
							
								
								
									
										107
									
								
								searx/settings_robot.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								searx/settings_robot.yml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,107 @@
 | 
				
			|||||||
 | 
					server:
 | 
				
			||||||
 | 
					    port : 11111
 | 
				
			||||||
 | 
					    secret_key : "ultrasecretkey" # change this!
 | 
				
			||||||
 | 
					    debug : False
 | 
				
			||||||
 | 
					    request_timeout : 3.0 # seconds
 | 
				
			||||||
 | 
					    base_url: False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					engines:
 | 
				
			||||||
 | 
					  - name : wikipedia
 | 
				
			||||||
 | 
					    engine : mediawiki
 | 
				
			||||||
 | 
					    url    : https://en.wikipedia.org/
 | 
				
			||||||
 | 
					    number_of_results : 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : bing
 | 
				
			||||||
 | 
					    engine : bing
 | 
				
			||||||
 | 
					    locale : en-US
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : currency
 | 
				
			||||||
 | 
					    engine : currency_convert
 | 
				
			||||||
 | 
					    categories : general
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : deviantart
 | 
				
			||||||
 | 
					    engine : deviantart
 | 
				
			||||||
 | 
					    categories : images
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : ddg definitions
 | 
				
			||||||
 | 
					    engine : duckduckgo_definitions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : duckduckgo
 | 
				
			||||||
 | 
					    engine : duckduckgo
 | 
				
			||||||
 | 
					    locale : en-us
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : filecrop
 | 
				
			||||||
 | 
					    engine : filecrop
 | 
				
			||||||
 | 
					    categories : files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : flickr
 | 
				
			||||||
 | 
					    engine : flickr
 | 
				
			||||||
 | 
					    categories : images
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : github
 | 
				
			||||||
 | 
					    engine : github
 | 
				
			||||||
 | 
					    categories : it
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : google
 | 
				
			||||||
 | 
					    engine        : json_engine
 | 
				
			||||||
 | 
					    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
 | 
				
			||||||
 | 
					    categories    : general
 | 
				
			||||||
 | 
					    url_query     : /responseData/results/unescapedUrl
 | 
				
			||||||
 | 
					    content_query : /responseData/results/content
 | 
				
			||||||
 | 
					    title_query   : /responseData/results/titleNoFormatting
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : google images
 | 
				
			||||||
 | 
					    engine : google_images
 | 
				
			||||||
 | 
					    categories : images
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : piratebay
 | 
				
			||||||
 | 
					    engine : piratebay
 | 
				
			||||||
 | 
					    categories : videos, music, files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : soundcloud
 | 
				
			||||||
 | 
					    engine : soundcloud
 | 
				
			||||||
 | 
					    categories : music
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : stackoverflow
 | 
				
			||||||
 | 
					    engine : stackoverflow
 | 
				
			||||||
 | 
					    categories : it
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : startpage
 | 
				
			||||||
 | 
					    engine : startpage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : twitter
 | 
				
			||||||
 | 
					    engine : twitter
 | 
				
			||||||
 | 
					    categories : social media
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : urbandictionary
 | 
				
			||||||
 | 
					    engine        : xpath
 | 
				
			||||||
 | 
					    search_url    : http://www.urbandictionary.com/define.php?term={query}
 | 
				
			||||||
 | 
					    url_xpath     : //div[@class="word"]//a/@href
 | 
				
			||||||
 | 
					    title_xpath   : //div[@class="word"]//a
 | 
				
			||||||
 | 
					    content_xpath : //div[@class="definition"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : yahoo
 | 
				
			||||||
 | 
					    engine           : xpath
 | 
				
			||||||
 | 
					    search_url       : http://search.yahoo.com/search?p={query}
 | 
				
			||||||
 | 
					    results_xpath    : //div[@class="res"]
 | 
				
			||||||
 | 
					    url_xpath        : .//h3/a/@href
 | 
				
			||||||
 | 
					    title_xpath      : .//h3/a
 | 
				
			||||||
 | 
					    content_xpath    : .//div[@class="abstr"]
 | 
				
			||||||
 | 
					    suggestion_xpath : //div[@id="satat"]//a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : youtube
 | 
				
			||||||
 | 
					    engine : youtube
 | 
				
			||||||
 | 
					    categories : videos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : dailymotion
 | 
				
			||||||
 | 
					    engine : dailymotion
 | 
				
			||||||
 | 
					    locale : en_US
 | 
				
			||||||
 | 
					    categories : videos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : vimeo
 | 
				
			||||||
 | 
					    engine : vimeo
 | 
				
			||||||
 | 
					    categories : videos
 | 
				
			||||||
 | 
					    results_xpath : //div[@id="browse_content"]/ol/li
 | 
				
			||||||
 | 
					    url_xpath : ./a/@href
 | 
				
			||||||
 | 
					    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
 | 
				
			||||||
 | 
					    content_xpath : ./a/img/@src
 | 
				
			||||||
@ -7,10 +7,10 @@ from unittest2 import TestCase
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import subprocess
 | 
					import subprocess
 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SearxTestLayer:
 | 
					class SearxTestLayer:
 | 
				
			||||||
 | 
					    """Base layer for non-robot tests."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    __name__ = u'SearxTestLayer'
 | 
					    __name__ = u'SearxTestLayer'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -36,24 +36,37 @@ class SearxRobotLayer(Layer):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def setUp(self):
 | 
					    def setUp(self):
 | 
				
			||||||
        os.setpgrp()  # create new process group, become its leader
 | 
					        os.setpgrp()  # create new process group, become its leader
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # get program paths
 | 
				
			||||||
        webapp = os.path.join(
 | 
					        webapp = os.path.join(
 | 
				
			||||||
            os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
 | 
					            os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
 | 
				
			||||||
            'webapp.py'
 | 
					            'webapp.py'
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
 | 
					        exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # set robot settings path
 | 
				
			||||||
 | 
					        os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath(
 | 
				
			||||||
 | 
					            os.path.dirname(__file__) + '/settings_robot.yml')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # run the server
 | 
				
			||||||
        self.server = subprocess.Popen(
 | 
					        self.server = subprocess.Popen(
 | 
				
			||||||
            [exe, webapp, 'settings_robot'],
 | 
					            [exe, webapp],
 | 
				
			||||||
            stdout=subprocess.PIPE,
 | 
					            stdout=subprocess.PIPE,
 | 
				
			||||||
            stderr=subprocess.STDOUT
 | 
					            stderr=subprocess.STDOUT
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def tearDown(self):
 | 
					    def tearDown(self):
 | 
				
			||||||
        # TERM all processes in my group
 | 
					        # send TERM signal to all processes in my group, to stop subprocesses
 | 
				
			||||||
        os.killpg(os.getpgid(self.server.pid), 15)
 | 
					        os.killpg(os.getpgid(self.server.pid), 15)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # remove previously set environment variable
 | 
				
			||||||
 | 
					        del os.environ['SEARX_SETTINGS_PATH']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SEARXROBOTLAYER = SearxRobotLayer()
 | 
					SEARXROBOTLAYER = SearxRobotLayer()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SearxTestCase(TestCase):
 | 
					class SearxTestCase(TestCase):
 | 
				
			||||||
 | 
					    """Base test case for non-robot tests."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    layer = SearxTestLayer
 | 
					    layer = SearxTestLayer
 | 
				
			||||||
 | 
				
			|||||||
@ -5,10 +5,12 @@ import codecs
 | 
				
			|||||||
import cStringIO
 | 
					import cStringIO
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def gen_useragent():
 | 
					def gen_useragent():
 | 
				
			||||||
    # TODO
 | 
					    # TODO
 | 
				
			||||||
    return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
 | 
					    return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def highlight_content(content, query):
 | 
					def highlight_content(content, query):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not content:
 | 
					    if not content:
 | 
				
			||||||
@ -34,10 +36,11 @@ def highlight_content(content, query):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return content
 | 
					    return content
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class HTMLTextExtractor(HTMLParser):
 | 
					class HTMLTextExtractor(HTMLParser):
 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self):
 | 
				
			||||||
        HTMLParser.__init__(self)
 | 
					        HTMLParser.__init__(self)
 | 
				
			||||||
        self.result = [ ]
 | 
					        self.result = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handle_data(self, d):
 | 
					    def handle_data(self, d):
 | 
				
			||||||
        self.result.append(d)
 | 
					        self.result.append(d)
 | 
				
			||||||
@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser):
 | 
				
			|||||||
    def get_text(self):
 | 
					    def get_text(self):
 | 
				
			||||||
        return u''.join(self.result)
 | 
					        return u''.join(self.result)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def html_to_text(html):
 | 
					def html_to_text(html):
 | 
				
			||||||
    s = HTMLTextExtractor()
 | 
					    s = HTMLTextExtractor()
 | 
				
			||||||
    s.feed(html)
 | 
					    s.feed(html)
 | 
				
			||||||
 | 
				
			|||||||
@ -17,13 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 | 
				
			|||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 | 
					(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
if __name__ == "__main__":
 | 
					 | 
				
			||||||
    sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from searx import settings
 | 
					from searx import settings
 | 
				
			||||||
 | 
					 | 
				
			||||||
from flask import Flask, request, render_template, url_for, Response, make_response, redirect
 | 
					from flask import Flask, request, render_template, url_for, Response, make_response, redirect
 | 
				
			||||||
from searx.engines import search, categories, engines, get_engines_stats
 | 
					from searx.engines import search, categories, engines, get_engines_stats
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
@ -33,11 +27,17 @@ from flask import send_from_directory
 | 
				
			|||||||
from searx.utils import highlight_content, html_to_text
 | 
					from searx.utils import highlight_content, html_to_text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					app = Flask(
 | 
				
			||||||
 | 
					    __name__,
 | 
				
			||||||
 | 
					    static_folder=os.path.join(os.path.dirname(__file__), 'static'),
 | 
				
			||||||
 | 
					    template_folder=os.path.join(os.path.dirname(__file__), 'templates')
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
app = Flask(__name__)
 | 
					 | 
				
			||||||
app.secret_key = settings['server']['secret_key']
 | 
					app.secret_key = settings['server']['secret_key']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
#TODO configurable via settings.yml
 | 
					#TODO configurable via settings.yml
 | 
				
			||||||
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
 | 
					favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
 | 
				
			||||||
            'twitter', 'stackoverflow', 'github']
 | 
					            'twitter', 'stackoverflow', 'github']
 | 
				
			||||||
@ -81,6 +81,7 @@ def render(template_name, **kwargs):
 | 
				
			|||||||
            kwargs['selected_categories'] = ['general']
 | 
					            kwargs['selected_categories'] = ['general']
 | 
				
			||||||
    return render_template(template_name, **kwargs)
 | 
					    return render_template(template_name, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def parse_query(query):
 | 
					def parse_query(query):
 | 
				
			||||||
    query_engines = []
 | 
					    query_engines = []
 | 
				
			||||||
    query_parts = query.split()
 | 
					    query_parts = query.split()
 | 
				
			||||||
@ -94,7 +95,7 @@ def parse_query(query):
 | 
				
			|||||||
def index():
 | 
					def index():
 | 
				
			||||||
    global categories
 | 
					    global categories
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if request.method=='POST':
 | 
					    if request.method == 'POST':
 | 
				
			||||||
        request_data = request.form
 | 
					        request_data = request.form
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        request_data = request.args
 | 
					        request_data = request.args
 | 
				
			||||||
@ -106,7 +107,7 @@ def index():
 | 
				
			|||||||
    query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
 | 
					    query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not len(selected_engines):
 | 
					    if not len(selected_engines):
 | 
				
			||||||
        for pd_name,pd in request_data.items():
 | 
					        for pd_name, pd in request_data.items():
 | 
				
			||||||
            if pd_name.startswith('category_'):
 | 
					            if pd_name.startswith('category_'):
 | 
				
			||||||
                category = pd_name[9:]
 | 
					                category = pd_name[9:]
 | 
				
			||||||
                if not category in categories:
 | 
					                if not category in categories:
 | 
				
			||||||
@ -159,23 +160,24 @@ def index():
 | 
				
			|||||||
        response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
 | 
					        response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
 | 
				
			||||||
        return response
 | 
					        return response
 | 
				
			||||||
    elif request_data.get('format') == 'rss':
 | 
					    elif request_data.get('format') == 'rss':
 | 
				
			||||||
        response_rss = render('opensearch_response_rss.xml'
 | 
					        response_rss = render(
 | 
				
			||||||
                              ,results=results
 | 
					            'opensearch_response_rss.xml',
 | 
				
			||||||
                              ,q=request_data['q']
 | 
					            results=results,
 | 
				
			||||||
                              ,number_of_results=len(results)
 | 
					            q=request_data['q'],
 | 
				
			||||||
                              ,base_url=get_base_url()
 | 
					            number_of_results=len(results),
 | 
				
			||||||
                              )
 | 
					            base_url=get_base_url()
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
        return Response(response_rss, mimetype='text/xml')
 | 
					        return Response(response_rss, mimetype='text/xml')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return render(
 | 
				
			||||||
    return render('results.html'
 | 
					        'results.html',
 | 
				
			||||||
                 ,results=results
 | 
					        results=results,
 | 
				
			||||||
                 ,q=request_data['q']
 | 
					        q=request_data['q'],
 | 
				
			||||||
                 ,selected_categories=selected_categories
 | 
					        selected_categories=selected_categories,
 | 
				
			||||||
                 ,number_of_results=len(results)+len(featured_results)
 | 
					        number_of_results=len(results) + len(featured_results),
 | 
				
			||||||
                 ,featured_results=featured_results
 | 
					        featured_results=featured_results,
 | 
				
			||||||
                 ,suggestions=suggestions
 | 
					        suggestions=suggestions
 | 
				
			||||||
                 )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@app.route('/about', methods=['GET'])
 | 
					@app.route('/about', methods=['GET'])
 | 
				
			||||||
@ -192,9 +194,9 @@ def list_engines():
 | 
				
			|||||||
@app.route('/preferences', methods=['GET', 'POST'])
 | 
					@app.route('/preferences', methods=['GET', 'POST'])
 | 
				
			||||||
def preferences():
 | 
					def preferences():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if request.method=='POST':
 | 
					    if request.method == 'POST':
 | 
				
			||||||
        selected_categories = []
 | 
					        selected_categories = []
 | 
				
			||||||
        for pd_name,pd in request.form.items():
 | 
					        for pd_name, pd in request.form.items():
 | 
				
			||||||
            if pd_name.startswith('category_'):
 | 
					            if pd_name.startswith('category_'):
 | 
				
			||||||
                category = pd_name[9:]
 | 
					                category = pd_name[9:]
 | 
				
			||||||
                if not category in categories:
 | 
					                if not category in categories:
 | 
				
			||||||
@ -203,7 +205,10 @@ def preferences():
 | 
				
			|||||||
        if selected_categories:
 | 
					        if selected_categories:
 | 
				
			||||||
            resp = make_response(redirect('/'))
 | 
					            resp = make_response(redirect('/'))
 | 
				
			||||||
            # cookie max age: 4 weeks
 | 
					            # cookie max age: 4 weeks
 | 
				
			||||||
            resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4)
 | 
					            resp.set_cookie(
 | 
				
			||||||
 | 
					                'categories', ','.join(selected_categories),
 | 
				
			||||||
 | 
					                max_age=60 * 60 * 24 * 7 * 4
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
            return resp
 | 
					            return resp
 | 
				
			||||||
    return render('preferences.html')
 | 
					    return render('preferences.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -238,6 +243,7 @@ def opensearch():
 | 
				
			|||||||
                mimetype="application/xml")
 | 
					                mimetype="application/xml")
 | 
				
			||||||
    return resp
 | 
					    return resp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@app.route('/favicon.ico')
 | 
					@app.route('/favicon.ico')
 | 
				
			||||||
def favicon():
 | 
					def favicon():
 | 
				
			||||||
    return send_from_directory(os.path.join(app.root_path, 'static/img'),
 | 
					    return send_from_directory(os.path.join(app.root_path, 'static/img'),
 | 
				
			||||||
@ -248,10 +254,11 @@ def run():
 | 
				
			|||||||
    from gevent import monkey
 | 
					    from gevent import monkey
 | 
				
			||||||
    monkey.patch_all()
 | 
					    monkey.patch_all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    app.run(debug        = settings['server']['debug']
 | 
					    app.run(
 | 
				
			||||||
           ,use_debugger = settings['server']['debug']
 | 
					        debug=settings['server']['debug'],
 | 
				
			||||||
           ,port         = settings['server']['port']
 | 
					        use_debugger=settings['server']['debug'],
 | 
				
			||||||
           )
 | 
					        port=settings['server']['port']
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										15
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								setup.py
									
									
									
									
									
								
							@ -49,4 +49,19 @@ setup(
 | 
				
			|||||||
            'zope.testrunner',
 | 
					            'zope.testrunner',
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    entry_points={
 | 
				
			||||||
 | 
					        'console_scripts': [
 | 
				
			||||||
 | 
					            'searx-run = searx.webapp:run'
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    package_data={
 | 
				
			||||||
 | 
					        'searx': [
 | 
				
			||||||
 | 
					            'settings.yml',
 | 
				
			||||||
 | 
					            '../README.md',
 | 
				
			||||||
 | 
					            'static/*/*',
 | 
				
			||||||
 | 
					            'templates/*.html',
 | 
				
			||||||
 | 
					            'templates/result_templates/*.html',
 | 
				
			||||||
 | 
					        ],
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user