mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 10:37:06 -04:00 
			
		
		
		
	Merge pull request #30 from matejc/smallissues
fix: robot fw, entry points, some flake8, package searx egg
This commit is contained in:
		
						commit
						ffc93ba256
					
				
							
								
								
									
										20
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,20 +1,24 @@ | ||||
| env | ||||
| engines.cfg | ||||
| .installed.cfg | ||||
| .coverage | ||||
| coverage/ | ||||
| .installed.cfg | ||||
| engines.cfg | ||||
| env | ||||
| robot_log.html | ||||
| robot_output.xml | ||||
| robot_report.html | ||||
| setup.cfg | ||||
| 
 | ||||
| *.pyc | ||||
| */*.pyc | ||||
| 
 | ||||
| bin/ | ||||
| build/ | ||||
| covearge/ | ||||
| develop-eggs/ | ||||
| dist/ | ||||
| eggs/ | ||||
| include/ | ||||
| lib/ | ||||
| build/ | ||||
| develop-eggs/ | ||||
| eggs/ | ||||
| local/ | ||||
| searx.egg-info/ | ||||
| parts/ | ||||
| searx.egg-info/ | ||||
| var/ | ||||
|  | ||||
							
								
								
									
										12
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								Makefile
									
									
									
									
									
								
							| @ -21,11 +21,7 @@ $(python): | ||||
| tests: .installed.cfg | ||||
| 	@bin/test | ||||
| 
 | ||||
| enginescfg: | ||||
| 	@test -f ./engines.cfg || echo "Copying engines.cfg ..." | ||||
| 	@cp --no-clobber engines.cfg_sample engines.cfg | ||||
| 
 | ||||
| robot: .installed.cfg enginescfg | ||||
| robot: .installed.cfg | ||||
| 	@bin/robot | ||||
| 
 | ||||
| flake8: .installed.cfg | ||||
| @ -37,18 +33,18 @@ coverage: .installed.cfg | ||||
| 	@bin/coverage report --show-missing | ||||
| 	@bin/coverage html --directory ./coverage | ||||
| 
 | ||||
| production: bin/buildout production.cfg setup.py enginescfg | ||||
| production: bin/buildout production.cfg setup.py | ||||
| 	bin/buildout -c production.cfg $(options) | ||||
| 	@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`" | ||||
| 	@echo "* Hint 1: on production, disable debug mode and change secret_key" | ||||
| 	@echo "* Hint 2: searx will be executed at server startup by crontab" | ||||
| 	@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'" | ||||
| 
 | ||||
| minimal: bin/buildout minimal.cfg setup.py enginescfg | ||||
| minimal: bin/buildout minimal.cfg setup.py | ||||
| 	bin/buildout -c minimal.cfg $(options) | ||||
| 
 | ||||
| clean: | ||||
| 	@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
 | ||||
| 		searx.egg-info lib include .coverage coverage | ||||
| 
 | ||||
| .PHONY: all tests enginescfg robot flake8 coverage production minimal clean | ||||
| .PHONY: all tests robot flake8 coverage production minimal clean | ||||
|  | ||||
| @ -16,8 +16,6 @@ recipe = zc.recipe.egg:script | ||||
| eggs = ${buildout:eggs} | ||||
| interpreter = py | ||||
| dependent-scripts = true | ||||
| entry-points = | ||||
|     searx-run=searx.webapp:run | ||||
| 
 | ||||
| 
 | ||||
| [robot] | ||||
|  | ||||
| @ -13,5 +13,3 @@ parts += | ||||
| recipe = zc.recipe.egg:script | ||||
| eggs = ${buildout:eggs} | ||||
| interpreter = py | ||||
| entry-points = | ||||
|     searx-run=searx.webapp:run | ||||
|  | ||||
| @ -15,8 +15,6 @@ parts += | ||||
| recipe = zc.recipe.egg:script | ||||
| eggs = ${buildout:eggs} | ||||
| interpreter = py | ||||
| entry-points = | ||||
|     searx-run=searx.webapp:run | ||||
| 
 | ||||
| 
 | ||||
| [supervisor] | ||||
|  | ||||
| @ -1,5 +1,5 @@ | ||||
| from os import environ | ||||
| from os.path import realpath, dirname, join | ||||
| from os.path import realpath, dirname, join, abspath | ||||
| try: | ||||
|     from yaml import load | ||||
| except: | ||||
| @ -7,8 +7,7 @@ except: | ||||
|     stderr.write('[E] install pyyaml\n') | ||||
|     exit(2) | ||||
| 
 | ||||
| 
 | ||||
| searx_dir  = realpath(dirname(realpath(__file__))+'/../') | ||||
| searx_dir = abspath(dirname(__file__)) | ||||
| engine_dir = dirname(realpath(__file__)) | ||||
| 
 | ||||
| if 'SEARX_SETTINGS_PATH' in environ: | ||||
| @ -19,4 +18,3 @@ else: | ||||
| 
 | ||||
| with open(settings_path) as settings_yaml: | ||||
|     settings = load(settings_yaml) | ||||
| 
 | ||||
|  | ||||
| @ -35,6 +35,7 @@ engines = {} | ||||
| 
 | ||||
| categories = {'general': []} | ||||
| 
 | ||||
| 
 | ||||
| def load_module(filename): | ||||
|     modname = splitext(filename)[0] | ||||
|     if modname in sys.modules: | ||||
| @ -50,7 +51,7 @@ if not 'engines' in settings or not settings['engines']: | ||||
| 
 | ||||
| for engine_data in settings['engines']: | ||||
|     engine_name = engine_data['engine'] | ||||
|     engine = load_module(engine_name+'.py') | ||||
|     engine = load_module(engine_name + '.py') | ||||
|     for param_name in engine_data: | ||||
|         if param_name == 'engine': | ||||
|             continue | ||||
| @ -58,38 +59,50 @@ for engine_data in settings['engines']: | ||||
|             if engine_data['categories'] == 'none': | ||||
|                 engine.categories = [] | ||||
|             else: | ||||
|                 engine.categories = map(str.strip, engine_data['categories'].split(',')) | ||||
|                 engine.categories = map( | ||||
|                     str.strip, engine_data['categories'].split(',')) | ||||
|             continue | ||||
|         setattr(engine, param_name, engine_data[param_name]) | ||||
|     for engine_attr in dir(engine): | ||||
|         if engine_attr.startswith('_'): | ||||
|             continue | ||||
|         if getattr(engine, engine_attr) == None: | ||||
|             print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) | ||||
|             print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)  # noqa | ||||
|             sys.exit(1) | ||||
|     engines[engine.name] = engine | ||||
|     engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0} | ||||
|     engine.stats = { | ||||
|         'result_count': 0, | ||||
|         'search_count': 0, | ||||
|         'page_load_time': 0, | ||||
|         'score_count': 0, | ||||
|         'errors': 0 | ||||
|     } | ||||
|     if hasattr(engine, 'categories'): | ||||
|         for category_name in engine.categories: | ||||
|             categories.setdefault(category_name, []).append(engine) | ||||
|     else: | ||||
|         categories['general'].append(engine) | ||||
| 
 | ||||
| 
 | ||||
| def default_request_params(): | ||||
|     return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} | ||||
|     return { | ||||
|         'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} | ||||
| 
 | ||||
| 
 | ||||
| def make_callback(engine_name, results, suggestions, callback, params): | ||||
|     # creating a callback wrapper for the search engine results | ||||
|     def process_callback(response, **kwargs): | ||||
|         cb_res = [] | ||||
|         response.search_params = params | ||||
|         engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds() | ||||
|         engines[engine_name].stats['page_load_time'] += \ | ||||
|             (datetime.now() - params['started']).total_seconds() | ||||
|         try: | ||||
|             search_results = callback(response) | ||||
|         except Exception, e: | ||||
|             engines[engine_name].stats['errors'] += 1 | ||||
|             results[engine_name] = cb_res | ||||
|             print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e)) | ||||
|             print '[E] Error with engine "{0}":\n\t{1}'.format( | ||||
|                 engine_name, str(e)) | ||||
|             return | ||||
|         for result in search_results: | ||||
|             result['engine'] = engine_name | ||||
| @ -101,23 +114,25 @@ def make_callback(engine_name, results, suggestions, callback, params): | ||||
|         results[engine_name] = cb_res | ||||
|     return process_callback | ||||
| 
 | ||||
| 
 | ||||
| def score_results(results): | ||||
|     flat_res = filter(None, chain.from_iterable(izip_longest(*results.values()))) | ||||
|     flat_res = filter( | ||||
|         None, chain.from_iterable(izip_longest(*results.values()))) | ||||
|     flat_len = len(flat_res) | ||||
|     engines_len = len(results) | ||||
|     results = [] | ||||
|     # deduplication + scoring | ||||
|     for i,res in enumerate(flat_res): | ||||
|     for i, res in enumerate(flat_res): | ||||
|         res['parsed_url'] = urlparse(res['url']) | ||||
|         res['engines'] = [res['engine']] | ||||
|         weight = 1.0 | ||||
|         if hasattr(engines[res['engine']], 'weight'): | ||||
|             weight = float(engines[res['engine']].weight) | ||||
|         score = int((flat_len - i)/engines_len)*weight+1 | ||||
|         score = int((flat_len - i) / engines_len) * weight + 1 | ||||
|         duplicated = False | ||||
|         for new_res in results: | ||||
|             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path | ||||
|             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path | ||||
|             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa | ||||
|             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa | ||||
|             if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\ | ||||
|                p1 == p2 and\ | ||||
|                res['parsed_url'].query == new_res['parsed_url'].query and\ | ||||
| @ -125,7 +140,7 @@ def score_results(results): | ||||
|                 duplicated = new_res | ||||
|                 break | ||||
|         if duplicated: | ||||
|             if len(res.get('content', '')) > len(duplicated.get('content', '')): | ||||
|             if len(res.get('content', '')) > len(duplicated.get('content', '')):  # noqa | ||||
|                 duplicated['content'] = res['content'] | ||||
|             duplicated['score'] += score | ||||
|             duplicated['engines'].append(res['engine']) | ||||
| @ -139,6 +154,7 @@ def score_results(results): | ||||
|             results.append(res) | ||||
|     return sorted(results, key=itemgetter('score'), reverse=True) | ||||
| 
 | ||||
| 
 | ||||
| def search(query, request, selected_engines): | ||||
|     global engines, categories, number_of_searches | ||||
|     requests = [] | ||||
| @ -160,13 +176,20 @@ def search(query, request, selected_engines): | ||||
|         request_params['started'] = datetime.now() | ||||
|         request_params = engine.request(query, request_params) | ||||
| 
 | ||||
|         callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params) | ||||
|         callback = make_callback( | ||||
|             selected_engine['name'], | ||||
|             results, | ||||
|             suggestions, | ||||
|             engine.response, | ||||
|             request_params | ||||
|         ) | ||||
| 
 | ||||
|         request_args = dict(headers = request_params['headers'] | ||||
|                            ,hooks   = dict(response=callback) | ||||
|                            ,cookies = request_params['cookies'] | ||||
|                            ,timeout = settings['server']['request_timeout'] | ||||
|                            ) | ||||
|         request_args = dict( | ||||
|             headers=request_params['headers'], | ||||
|             hooks=dict(response=callback), | ||||
|             cookies=request_params['cookies'], | ||||
|             timeout=settings['server']['request_timeout'] | ||||
|         ) | ||||
| 
 | ||||
|         if request_params['method'] == 'GET': | ||||
|             req = grequests.get | ||||
| @ -180,7 +203,7 @@ def search(query, request, selected_engines): | ||||
| 
 | ||||
|         requests.append(req(request_params['url'], **request_args)) | ||||
|     grequests.map(requests) | ||||
|     for engine_name,engine_results in results.items(): | ||||
|     for engine_name, engine_results in results.items(): | ||||
|         engines[engine_name].stats['search_count'] += 1 | ||||
|         engines[engine_name].stats['result_count'] += len(engine_results) | ||||
| 
 | ||||
| @ -192,6 +215,7 @@ def search(query, request, selected_engines): | ||||
| 
 | ||||
|     return results, suggestions | ||||
| 
 | ||||
| 
 | ||||
| def get_engines_stats(): | ||||
|     # TODO refactor | ||||
|     pageloads = [] | ||||
| @ -200,14 +224,15 @@ def get_engines_stats(): | ||||
|     errors = [] | ||||
|     scores_per_result = [] | ||||
| 
 | ||||
|     max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 | ||||
|     max_pageload = max_results = max_score = max_errors = max_score_per_result = 0  # noqa | ||||
|     for engine in engines.values(): | ||||
|         if engine.stats['search_count'] == 0: | ||||
|             continue | ||||
|         results_num = engine.stats['result_count']/float(engine.stats['search_count']) | ||||
|         load_times  = engine.stats['page_load_time']/float(engine.stats['search_count']) | ||||
|         results_num = \ | ||||
|             engine.stats['result_count'] / float(engine.stats['search_count']) | ||||
|         load_times = engine.stats['page_load_time'] / float(engine.stats['search_count'])  # noqa | ||||
|         if results_num: | ||||
|             score = engine.stats['score_count'] / float(engine.stats['search_count']) | ||||
|             score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa | ||||
|             score_per_result = score / results_num | ||||
|         else: | ||||
|             score = score_per_result = 0.0 | ||||
| @ -220,30 +245,39 @@ def get_engines_stats(): | ||||
|         results.append({'avg': results_num, 'name': engine.name}) | ||||
|         scores.append({'avg': score, 'name': engine.name}) | ||||
|         errors.append({'avg': engine.stats['errors'], 'name': engine.name}) | ||||
|         scores_per_result.append({'avg': score_per_result, 'name': engine.name}) | ||||
|         scores_per_result.append({ | ||||
|             'avg': score_per_result, | ||||
|             'name': engine.name | ||||
|         }) | ||||
| 
 | ||||
|     for engine in pageloads: | ||||
|         engine['percentage'] = int(engine['avg']/max_pageload*100) | ||||
|         engine['percentage'] = int(engine['avg'] / max_pageload * 100) | ||||
| 
 | ||||
|     for engine in results: | ||||
|         engine['percentage'] = int(engine['avg']/max_results*100) | ||||
|         engine['percentage'] = int(engine['avg'] / max_results * 100) | ||||
| 
 | ||||
|     for engine in scores: | ||||
|         engine['percentage'] = int(engine['avg']/max_score*100) | ||||
|         engine['percentage'] = int(engine['avg'] / max_score * 100) | ||||
| 
 | ||||
|     for engine in scores_per_result: | ||||
|         engine['percentage'] = int(engine['avg']/max_score_per_result*100) | ||||
|         engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) | ||||
| 
 | ||||
|     for engine in errors: | ||||
|         if max_errors: | ||||
|             engine['percentage'] = int(float(engine['avg'])/max_errors*100) | ||||
|             engine['percentage'] = int(float(engine['avg']) / max_errors * 100) | ||||
|         else: | ||||
|             engine['percentage'] = 0 | ||||
| 
 | ||||
| 
 | ||||
|     return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))) | ||||
|            ,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True)) | ||||
|            ,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)) | ||||
|            ,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True)) | ||||
|            ,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)) | ||||
|            ] | ||||
|     return [ | ||||
|         ('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))), | ||||
|         ( | ||||
|             'Number of results', | ||||
|             sorted(results, key=itemgetter('avg'), reverse=True) | ||||
|         ), | ||||
|         ('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)), | ||||
|         ( | ||||
|             'Scores per result', | ||||
|             sorted(scores_per_result, key=itemgetter('avg'), reverse=True) | ||||
|         ), | ||||
|         ('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)), | ||||
|     ] | ||||
|  | ||||
| @ -4,11 +4,12 @@ from cgi import escape | ||||
| 
 | ||||
| base_url = 'http://www.bing.com/' | ||||
| search_string = 'search?{query}' | ||||
| locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx | ||||
| locale = 'en-US'  # see http://msdn.microsoft.com/en-us/library/dd251064.aspx | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale})) | ||||
|     search_path = search_string.format( | ||||
|         query=urlencode({'q': query, 'setmkt': locale})) | ||||
|     #if params['category'] == 'images': | ||||
|     #    params['url'] = base_url + 'images/' + search_path | ||||
|     params['url'] = base_url + search_path | ||||
|  | ||||
| @ -7,6 +7,7 @@ weight = 100 | ||||
| 
 | ||||
| parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     m = parser_re.match(query) | ||||
|     if not m: | ||||
| @ -19,7 +20,7 @@ def request(query, params): | ||||
|         # wrong params | ||||
|         return params | ||||
| 
 | ||||
|     q = (from_currency+to_currency).upper() | ||||
|     q = (from_currency + to_currency).upper() | ||||
| 
 | ||||
|     params['url'] = url.format(query=q) | ||||
|     params['ammount'] = ammount | ||||
| @ -33,25 +34,27 @@ def response(resp): | ||||
|     global base_url | ||||
|     results = [] | ||||
|     try: | ||||
|         _,conversion_rate,_ = resp.text.split(',', 2) | ||||
|         _, conversion_rate, _ = resp.text.split(',', 2) | ||||
|         conversion_rate = float(conversion_rate) | ||||
|     except: | ||||
|         return results | ||||
| 
 | ||||
|     title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount'] | ||||
|                                           ,resp.search_params['from'] | ||||
|                                           ,resp.search_params['to'] | ||||
|                                           ,resp.search_params['ammount']*conversion_rate | ||||
|                                           ) | ||||
|     title = '{0} {1} in {2} is {3}'.format( | ||||
|         resp.search_params['ammount'], | ||||
|         resp.search_params['from'], | ||||
|         resp.search_params['to'], | ||||
|         resp.search_params['ammount'] * conversion_rate | ||||
|     ) | ||||
| 
 | ||||
|     content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to']) | ||||
|     now_date = datetime.now().strftime('%Y%m%d') | ||||
|     url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' | ||||
|     url = url.format(now_date | ||||
|                     ,resp.search_params['ammount'] | ||||
|                     ,resp.search_params['from'].lower() | ||||
|                     ,resp.search_params['to'].lower() | ||||
|                     ) | ||||
|     url = url.format( | ||||
|         now_date, | ||||
|         resp.search_params['ammount'], | ||||
|         resp.search_params['from'].lower(), | ||||
|         resp.search_params['to'].lower() | ||||
|     ) | ||||
|     results.append({'title': title, 'content': content, 'url': url}) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
| @ -8,9 +8,11 @@ locale = 'en_US' | ||||
| # see http://www.dailymotion.com/doc/api/obj-video.html | ||||
| search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     global search_url | ||||
|     params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale })) | ||||
|     params['url'] = search_url.format( | ||||
|         query=urlencode({'search': query, 'localization': locale})) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| @ -32,6 +34,7 @@ def response(resp): | ||||
|         results.append({'url': url, 'title': title, 'content': content}) | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def text_content_from_html(html_string): | ||||
|     desc_html = html.fragment_fromstring(html_string, create_parent=True) | ||||
|     return desc_html.text_content() | ||||
|  | ||||
| @ -1,16 +0,0 @@ | ||||
| 
 | ||||
| port = 11111 | ||||
| 
 | ||||
| secret_key = "ultrasecretkey" # change this! | ||||
| 
 | ||||
| debug = False | ||||
| 
 | ||||
| request_timeout = 5.0 # seconds | ||||
| 
 | ||||
| weights = {} # 'search_engine_name': float(weight) | default is 1.0 | ||||
| 
 | ||||
| blacklist = [] # search engine blacklist | ||||
| 
 | ||||
| categories = {} # custom search engine categories | ||||
| 
 | ||||
| base_url = None # "https://your.domain.tld/" or None (to use request parameters) | ||||
							
								
								
									
										107
									
								
								searx/settings_robot.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								searx/settings_robot.yml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,107 @@ | ||||
| server: | ||||
|     port : 11111 | ||||
|     secret_key : "ultrasecretkey" # change this! | ||||
|     debug : False | ||||
|     request_timeout : 3.0 # seconds | ||||
|     base_url: False | ||||
| 
 | ||||
| engines: | ||||
|   - name : wikipedia | ||||
|     engine : mediawiki | ||||
|     url    : https://en.wikipedia.org/ | ||||
|     number_of_results : 1 | ||||
| 
 | ||||
|   - name : bing | ||||
|     engine : bing | ||||
|     locale : en-US | ||||
| 
 | ||||
|   - name : currency | ||||
|     engine : currency_convert | ||||
|     categories : general | ||||
| 
 | ||||
|   - name : deviantart | ||||
|     engine : deviantart | ||||
|     categories : images | ||||
| 
 | ||||
|   - name : ddg definitions | ||||
|     engine : duckduckgo_definitions | ||||
| 
 | ||||
|   - name : duckduckgo | ||||
|     engine : duckduckgo | ||||
|     locale : en-us | ||||
| 
 | ||||
|   - name : filecrop | ||||
|     engine : filecrop | ||||
|     categories : files | ||||
| 
 | ||||
|   - name : flickr | ||||
|     engine : flickr | ||||
|     categories : images | ||||
| 
 | ||||
|   - name : github | ||||
|     engine : github | ||||
|     categories : it | ||||
| 
 | ||||
|   - name : google | ||||
|     engine        : json_engine | ||||
|     search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} | ||||
|     categories    : general | ||||
|     url_query     : /responseData/results/unescapedUrl | ||||
|     content_query : /responseData/results/content | ||||
|     title_query   : /responseData/results/titleNoFormatting | ||||
| 
 | ||||
|   - name : google images | ||||
|     engine : google_images | ||||
|     categories : images | ||||
| 
 | ||||
|   - name : piratebay | ||||
|     engine : piratebay | ||||
|     categories : videos, music, files | ||||
| 
 | ||||
|   - name : soundcloud | ||||
|     engine : soundcloud | ||||
|     categories : music | ||||
| 
 | ||||
|   - name : stackoverflow | ||||
|     engine : stackoverflow | ||||
|     categories : it | ||||
| 
 | ||||
|   - name : startpage | ||||
|     engine : startpage | ||||
| 
 | ||||
|   - name : twitter | ||||
|     engine : twitter | ||||
|     categories : social media | ||||
| 
 | ||||
|   - name : urbandictionary | ||||
|     engine        : xpath | ||||
|     search_url    : http://www.urbandictionary.com/define.php?term={query} | ||||
|     url_xpath     : //div[@class="word"]//a/@href | ||||
|     title_xpath   : //div[@class="word"]//a | ||||
|     content_xpath : //div[@class="definition"] | ||||
| 
 | ||||
|   - name : yahoo | ||||
|     engine           : xpath | ||||
|     search_url       : http://search.yahoo.com/search?p={query} | ||||
|     results_xpath    : //div[@class="res"] | ||||
|     url_xpath        : .//h3/a/@href | ||||
|     title_xpath      : .//h3/a | ||||
|     content_xpath    : .//div[@class="abstr"] | ||||
|     suggestion_xpath : //div[@id="satat"]//a | ||||
| 
 | ||||
|   - name : youtube | ||||
|     engine : youtube | ||||
|     categories : videos | ||||
| 
 | ||||
|   - name : dailymotion | ||||
|     engine : dailymotion | ||||
|     locale : en_US | ||||
|     categories : videos | ||||
| 
 | ||||
|   - name : vimeo | ||||
|     engine : vimeo | ||||
|     categories : videos | ||||
|     results_xpath : //div[@id="browse_content"]/ol/li | ||||
|     url_xpath : ./a/@href | ||||
|     title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() | ||||
|     content_xpath : ./a/img/@src | ||||
| @ -7,10 +7,10 @@ from unittest2 import TestCase | ||||
| 
 | ||||
| import os | ||||
| import subprocess | ||||
| import sys | ||||
| 
 | ||||
| 
 | ||||
| class SearxTestLayer: | ||||
|     """Base layer for non-robot tests.""" | ||||
| 
 | ||||
|     __name__ = u'SearxTestLayer' | ||||
| 
 | ||||
| @ -36,24 +36,37 @@ class SearxRobotLayer(Layer): | ||||
| 
 | ||||
|     def setUp(self): | ||||
|         os.setpgrp()  # create new process group, become its leader | ||||
| 
 | ||||
|         # get program paths | ||||
|         webapp = os.path.join( | ||||
|             os.path.abspath(os.path.dirname(os.path.realpath(__file__))), | ||||
|             'webapp.py' | ||||
|         ) | ||||
|         exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py') | ||||
| 
 | ||||
|         # set robot settings path | ||||
|         os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath( | ||||
|             os.path.dirname(__file__) + '/settings_robot.yml') | ||||
| 
 | ||||
|         # run the server | ||||
|         self.server = subprocess.Popen( | ||||
|             [exe, webapp, 'settings_robot'], | ||||
|             [exe, webapp], | ||||
|             stdout=subprocess.PIPE, | ||||
|             stderr=subprocess.STDOUT | ||||
|         ) | ||||
| 
 | ||||
|     def tearDown(self): | ||||
|         # TERM all processes in my group | ||||
|         # send TERM signal to all processes in my group, to stop subprocesses | ||||
|         os.killpg(os.getpgid(self.server.pid), 15) | ||||
| 
 | ||||
|         # remove previously set environment variable | ||||
|         del os.environ['SEARX_SETTINGS_PATH'] | ||||
| 
 | ||||
| 
 | ||||
| SEARXROBOTLAYER = SearxRobotLayer() | ||||
| 
 | ||||
| 
 | ||||
| class SearxTestCase(TestCase): | ||||
|     """Base test case for non-robot tests.""" | ||||
| 
 | ||||
|     layer = SearxTestLayer | ||||
|  | ||||
| @ -5,10 +5,12 @@ import codecs | ||||
| import cStringIO | ||||
| import re | ||||
| 
 | ||||
| 
 | ||||
| def gen_useragent(): | ||||
|     # TODO | ||||
|     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" | ||||
| 
 | ||||
| 
 | ||||
| def highlight_content(content, query): | ||||
| 
 | ||||
|     if not content: | ||||
| @ -34,10 +36,11 @@ def highlight_content(content, query): | ||||
| 
 | ||||
|     return content | ||||
| 
 | ||||
| 
 | ||||
| class HTMLTextExtractor(HTMLParser): | ||||
|     def __init__(self): | ||||
|         HTMLParser.__init__(self) | ||||
|         self.result = [ ] | ||||
|         self.result = [] | ||||
| 
 | ||||
|     def handle_data(self, d): | ||||
|         self.result.append(d) | ||||
| @ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser): | ||||
|     def get_text(self): | ||||
|         return u''.join(self.result) | ||||
| 
 | ||||
| 
 | ||||
| def html_to_text(html): | ||||
|     s = HTMLTextExtractor() | ||||
|     s.feed(html) | ||||
|  | ||||
| @ -17,13 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | ||||
| ''' | ||||
| 
 | ||||
| import os | ||||
| import sys | ||||
| if __name__ == "__main__": | ||||
|     sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../')) | ||||
| 
 | ||||
| from searx import settings | ||||
| 
 | ||||
| from flask import Flask, request, render_template, url_for, Response, make_response, redirect | ||||
| from searx.engines import search, categories, engines, get_engines_stats | ||||
| import json | ||||
| @ -33,11 +27,17 @@ from flask import send_from_directory | ||||
| from searx.utils import highlight_content, html_to_text | ||||
| 
 | ||||
| 
 | ||||
| import os | ||||
| 
 | ||||
| 
 | ||||
| app = Flask( | ||||
|     __name__, | ||||
|     static_folder=os.path.join(os.path.dirname(__file__), 'static'), | ||||
|     template_folder=os.path.join(os.path.dirname(__file__), 'templates') | ||||
| ) | ||||
| 
 | ||||
| app = Flask(__name__) | ||||
| app.secret_key = settings['server']['secret_key'] | ||||
| 
 | ||||
| 
 | ||||
| #TODO configurable via settings.yml | ||||
| favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', | ||||
|             'twitter', 'stackoverflow', 'github'] | ||||
| @ -81,6 +81,7 @@ def render(template_name, **kwargs): | ||||
|             kwargs['selected_categories'] = ['general'] | ||||
|     return render_template(template_name, **kwargs) | ||||
| 
 | ||||
| 
 | ||||
| def parse_query(query): | ||||
|     query_engines = [] | ||||
|     query_parts = query.split() | ||||
| @ -94,7 +95,7 @@ def parse_query(query): | ||||
| def index(): | ||||
|     global categories | ||||
| 
 | ||||
|     if request.method=='POST': | ||||
|     if request.method == 'POST': | ||||
|         request_data = request.form | ||||
|     else: | ||||
|         request_data = request.args | ||||
| @ -106,7 +107,7 @@ def index(): | ||||
|     query, selected_engines = parse_query(request_data['q'].encode('utf-8')) | ||||
| 
 | ||||
|     if not len(selected_engines): | ||||
|         for pd_name,pd in request_data.items(): | ||||
|         for pd_name, pd in request_data.items(): | ||||
|             if pd_name.startswith('category_'): | ||||
|                 category = pd_name[9:] | ||||
|                 if not category in categories: | ||||
| @ -159,23 +160,24 @@ def index(): | ||||
|         response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split()))) | ||||
|         return response | ||||
|     elif request_data.get('format') == 'rss': | ||||
|         response_rss = render('opensearch_response_rss.xml' | ||||
|                               ,results=results | ||||
|                               ,q=request_data['q'] | ||||
|                               ,number_of_results=len(results) | ||||
|                               ,base_url=get_base_url() | ||||
|                               ) | ||||
|         response_rss = render( | ||||
|             'opensearch_response_rss.xml', | ||||
|             results=results, | ||||
|             q=request_data['q'], | ||||
|             number_of_results=len(results), | ||||
|             base_url=get_base_url() | ||||
|         ) | ||||
|         return Response(response_rss, mimetype='text/xml') | ||||
| 
 | ||||
| 
 | ||||
|     return render('results.html' | ||||
|                  ,results=results | ||||
|                  ,q=request_data['q'] | ||||
|                  ,selected_categories=selected_categories | ||||
|                  ,number_of_results=len(results)+len(featured_results) | ||||
|                  ,featured_results=featured_results | ||||
|                  ,suggestions=suggestions | ||||
|                  ) | ||||
|     return render( | ||||
|         'results.html', | ||||
|         results=results, | ||||
|         q=request_data['q'], | ||||
|         selected_categories=selected_categories, | ||||
|         number_of_results=len(results) + len(featured_results), | ||||
|         featured_results=featured_results, | ||||
|         suggestions=suggestions | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| @app.route('/about', methods=['GET']) | ||||
| @ -192,9 +194,9 @@ def list_engines(): | ||||
| @app.route('/preferences', methods=['GET', 'POST']) | ||||
| def preferences(): | ||||
| 
 | ||||
|     if request.method=='POST': | ||||
|     if request.method == 'POST': | ||||
|         selected_categories = [] | ||||
|         for pd_name,pd in request.form.items(): | ||||
|         for pd_name, pd in request.form.items(): | ||||
|             if pd_name.startswith('category_'): | ||||
|                 category = pd_name[9:] | ||||
|                 if not category in categories: | ||||
| @ -203,7 +205,10 @@ def preferences(): | ||||
|         if selected_categories: | ||||
|             resp = make_response(redirect('/')) | ||||
|             # cookie max age: 4 weeks | ||||
|             resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4) | ||||
|             resp.set_cookie( | ||||
|                 'categories', ','.join(selected_categories), | ||||
|                 max_age=60 * 60 * 24 * 7 * 4 | ||||
|             ) | ||||
|             return resp | ||||
|     return render('preferences.html') | ||||
| 
 | ||||
| @ -238,6 +243,7 @@ def opensearch(): | ||||
|                 mimetype="application/xml") | ||||
|     return resp | ||||
| 
 | ||||
| 
 | ||||
| @app.route('/favicon.ico') | ||||
| def favicon(): | ||||
|     return send_from_directory(os.path.join(app.root_path, 'static/img'), | ||||
| @ -248,10 +254,11 @@ def run(): | ||||
|     from gevent import monkey | ||||
|     monkey.patch_all() | ||||
| 
 | ||||
|     app.run(debug        = settings['server']['debug'] | ||||
|            ,use_debugger = settings['server']['debug'] | ||||
|            ,port         = settings['server']['port'] | ||||
|            ) | ||||
|     app.run( | ||||
|         debug=settings['server']['debug'], | ||||
|         use_debugger=settings['server']['debug'], | ||||
|         port=settings['server']['port'] | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user