mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	
							parent
							
								
									e4d46d21c7
								
							
						
					
					
						commit
						805fb02ed1
					
				
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -20,6 +20,7 @@ from searx.utils import html_to_text
 | 
				
			|||||||
categories = None
 | 
					categories = None
 | 
				
			||||||
paging = True
 | 
					paging = True
 | 
				
			||||||
language_support = True
 | 
					language_support = True
 | 
				
			||||||
 | 
					supported_languages_url = 'https://qwant.com/region'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
category_to_keyword = {'general': 'web',
 | 
					category_to_keyword = {'general': 'web',
 | 
				
			||||||
                       'images': 'images',
 | 
					                       'images': 'images',
 | 
				
			||||||
@ -46,6 +47,13 @@ def request(query, params):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    # add language tag if specified
 | 
					    # add language tag if specified
 | 
				
			||||||
    if params['language'] != 'all':
 | 
					    if params['language'] != 'all':
 | 
				
			||||||
 | 
					        if params['language'].find('-') < 0:
 | 
				
			||||||
 | 
					            # tries to get a country code from language
 | 
				
			||||||
 | 
					            for lang in supported_languages:
 | 
				
			||||||
 | 
					                lc = lang.split('-')
 | 
				
			||||||
 | 
					                if params['language'] == lc[0]:
 | 
				
			||||||
 | 
					                    params['language'] = lang
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
        params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
 | 
					        params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return params
 | 
					    return params
 | 
				
			||||||
@ -96,5 +104,21 @@ def response(resp):
 | 
				
			|||||||
                            'publishedDate': published_date,
 | 
					                            'publishedDate': published_date,
 | 
				
			||||||
                            'content': content})
 | 
					                            'content': content})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # return results
 | 
					 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get supported languages from their site
 | 
				
			||||||
 | 
					def _fetch_supported_languages(resp):
 | 
				
			||||||
 | 
					    # list of regions is embedded in page as a js object
 | 
				
			||||||
 | 
					    response_text = resp.text
 | 
				
			||||||
 | 
					    response_text = response_text[response_text.find('regionalisation'):]
 | 
				
			||||||
 | 
					    response_text = response_text[response_text.find('{'):response_text.find(');')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    regions_json = loads(response_text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    supported_languages = []
 | 
				
			||||||
 | 
					    for lang in regions_json['languages'].values():
 | 
				
			||||||
 | 
					        for country in lang['countries']:
 | 
				
			||||||
 | 
					            supported_languages.append(lang['code'] + '-' + country)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return supported_languages
 | 
				
			||||||
 | 
				
			|||||||
@ -5,9 +5,6 @@
 | 
				
			|||||||
language_codes = (
 | 
					language_codes = (
 | 
				
			||||||
    (u"ar-SA", u"العربية", u"", u"Arabic"),
 | 
					    (u"ar-SA", u"العربية", u"", u"Arabic"),
 | 
				
			||||||
    (u"bg-BG", u"Български", u"", u"Bulgarian"),
 | 
					    (u"bg-BG", u"Български", u"", u"Bulgarian"),
 | 
				
			||||||
    (u"ca", u"Català", u"", u"Catalan"),
 | 
					 | 
				
			||||||
    (u"ca-CT", u"Català", u"", u"Catalan"),
 | 
					 | 
				
			||||||
    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
 | 
					 | 
				
			||||||
    (u"cs-CZ", u"Čeština", u"", u"Czech"),
 | 
					    (u"cs-CZ", u"Čeština", u"", u"Czech"),
 | 
				
			||||||
    (u"da-DK", u"Dansk", u"", u"Danish"),
 | 
					    (u"da-DK", u"Dansk", u"", u"Danish"),
 | 
				
			||||||
    (u"de", u"Deutsch", u"", u"German"),
 | 
					    (u"de", u"Deutsch", u"", u"German"),
 | 
				
			||||||
@ -18,7 +15,9 @@ language_codes = (
 | 
				
			|||||||
    (u"en", u"English", u"", u"English"),
 | 
					    (u"en", u"English", u"", u"English"),
 | 
				
			||||||
    (u"en-AU", u"English", u"Australia", u"English"),
 | 
					    (u"en-AU", u"English", u"Australia", u"English"),
 | 
				
			||||||
    (u"en-CA", u"English", u"Canada", u"English"),
 | 
					    (u"en-CA", u"English", u"Canada", u"English"),
 | 
				
			||||||
 | 
					    (u"en-CY", u"English", u"Cyprus", u"English"),
 | 
				
			||||||
    (u"en-GB", u"English", u"United Kingdom", u"English"),
 | 
					    (u"en-GB", u"English", u"United Kingdom", u"English"),
 | 
				
			||||||
 | 
					    (u"en-GD", u"English", u"Grenada", u"English"),
 | 
				
			||||||
    (u"en-ID", u"English", u"Indonesia", u"English"),
 | 
					    (u"en-ID", u"English", u"Indonesia", u"English"),
 | 
				
			||||||
    (u"en-IE", u"English", u"Ireland", u"English"),
 | 
					    (u"en-IE", u"English", u"Ireland", u"English"),
 | 
				
			||||||
    (u"en-IN", u"English", u"India", u"English"),
 | 
					    (u"en-IN", u"English", u"India", u"English"),
 | 
				
			||||||
@ -54,10 +53,10 @@ language_codes = (
 | 
				
			|||||||
    (u"ko-KR", u"한국어", u"", u"Korean"),
 | 
					    (u"ko-KR", u"한국어", u"", u"Korean"),
 | 
				
			||||||
    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
 | 
					    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
 | 
				
			||||||
    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
 | 
					    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
 | 
				
			||||||
 | 
					    (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
 | 
				
			||||||
    (u"nl", u"Nederlands", u"", u"Dutch"),
 | 
					    (u"nl", u"Nederlands", u"", u"Dutch"),
 | 
				
			||||||
    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
 | 
					    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
 | 
				
			||||||
    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
 | 
					    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
 | 
				
			||||||
    (u"no-NO", u"Norsk", u"", u"Norwegian"),
 | 
					 | 
				
			||||||
    (u"pl-PL", u"Polski", u"", u"Polish"),
 | 
					    (u"pl-PL", u"Polski", u"", u"Polish"),
 | 
				
			||||||
    (u"pt", u"Português", u"", u"Portuguese"),
 | 
					    (u"pt", u"Português", u"", u"Portuguese"),
 | 
				
			||||||
    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
 | 
					    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
 | 
				
			||||||
@ -69,7 +68,6 @@ language_codes = (
 | 
				
			|||||||
    (u"sv-SE", u"Svenska", u"", u"Swedish"),
 | 
					    (u"sv-SE", u"Svenska", u"", u"Swedish"),
 | 
				
			||||||
    (u"th-TH", u"ไทย", u"", u"Thai"),
 | 
					    (u"th-TH", u"ไทย", u"", u"Thai"),
 | 
				
			||||||
    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
 | 
					    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
 | 
				
			||||||
    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
 | 
					 | 
				
			||||||
    (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
 | 
					    (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
 | 
				
			||||||
    (u"zh", u"中文", u"", u"Chinese"),
 | 
					    (u"zh", u"中文", u"", u"Chinese"),
 | 
				
			||||||
    (u"zh-CN", u"中文", u"中国", u"Chinese"),
 | 
					    (u"zh-CN", u"中文", u"中国", u"Chinese"),
 | 
				
			||||||
 | 
				
			|||||||
@ -25,6 +25,11 @@ class TestQwantEngine(SearxTestCase):
 | 
				
			|||||||
        self.assertFalse('fr' in params['url'])
 | 
					        self.assertFalse('fr' in params['url'])
 | 
				
			||||||
        self.assertIn('news', params['url'])
 | 
					        self.assertIn('news', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
 | 
				
			||||||
 | 
					        dicto['language'] = 'fr'
 | 
				
			||||||
 | 
					        params = qwant.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertIn('fr_fr', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_response(self):
 | 
					    def test_response(self):
 | 
				
			||||||
        self.assertRaises(AttributeError, qwant.response, None)
 | 
					        self.assertRaises(AttributeError, qwant.response, None)
 | 
				
			||||||
        self.assertRaises(AttributeError, qwant.response, [])
 | 
					        self.assertRaises(AttributeError, qwant.response, [])
 | 
				
			||||||
@ -315,3 +320,19 @@ class TestQwantEngine(SearxTestCase):
 | 
				
			|||||||
        results = qwant.response(response)
 | 
					        results = qwant.response(response)
 | 
				
			||||||
        self.assertEqual(type(results), list)
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
        self.assertEqual(len(results), 0)
 | 
					        self.assertEqual(len(results), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_fetch_supported_languages(self):
 | 
				
			||||||
 | 
					        page = """some code...
 | 
				
			||||||
 | 
					        config_set('project.regionalisation', {"continents":{},"languages":
 | 
				
			||||||
 | 
					        {"de":{"code":"de","name":"Deutsch","countries":["DE","CH","AT"]},
 | 
				
			||||||
 | 
					        "it":{"code":"it","name":"Italiano","countries":["IT","CH"]}}});
 | 
				
			||||||
 | 
					        some more code..."""
 | 
				
			||||||
 | 
					        response = mock.Mock(text=page)
 | 
				
			||||||
 | 
					        languages = qwant._fetch_supported_languages(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(languages), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(languages), 5)
 | 
				
			||||||
 | 
					        self.assertIn('de-DE', languages)
 | 
				
			||||||
 | 
					        self.assertIn('de-CH', languages)
 | 
				
			||||||
 | 
					        self.assertIn('de-AT', languages)
 | 
				
			||||||
 | 
					        self.assertIn('it-IT', languages)
 | 
				
			||||||
 | 
					        self.assertIn('it-CH', languages)
 | 
				
			||||||
 | 
				
			|||||||
@ -14,7 +14,8 @@ from json import loads, dumps
 | 
				
			|||||||
import io
 | 
					import io
 | 
				
			||||||
from sys import path
 | 
					from sys import path
 | 
				
			||||||
path.append('../searx')  # noqa
 | 
					path.append('../searx')  # noqa
 | 
				
			||||||
from searx.engines import engines
 | 
					from searx import settings
 | 
				
			||||||
 | 
					from searx.engines import initialize_engines, engines
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Geonames API for country names.
 | 
					# Geonames API for country names.
 | 
				
			||||||
geonames_user = ''  # ADD USER NAME HERE
 | 
					geonames_user = ''  # ADD USER NAME HERE
 | 
				
			||||||
@ -77,6 +78,7 @@ def get_country_name(locale):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# Fetchs supported languages for each engine and writes json file with those.
 | 
					# Fetchs supported languages for each engine and writes json file with those.
 | 
				
			||||||
def fetch_supported_languages():
 | 
					def fetch_supported_languages():
 | 
				
			||||||
 | 
					    initialize_engines(settings['engines'])
 | 
				
			||||||
    for engine_name in engines:
 | 
					    for engine_name in engines:
 | 
				
			||||||
        if hasattr(engines[engine_name], 'fetch_supported_languages'):
 | 
					        if hasattr(engines[engine_name], 'fetch_supported_languages'):
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
@ -117,7 +119,7 @@ def join_language_lists():
 | 
				
			|||||||
                    languages[lang]['counter'].append(engine_name)
 | 
					                    languages[lang]['counter'].append(engine_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # filter list to include only languages supported by most engines
 | 
					    # filter list to include only languages supported by most engines
 | 
				
			||||||
    min_supported_engines = int(0.75 * len(engines_languages))
 | 
					    min_supported_engines = int(0.70 * len(engines_languages))
 | 
				
			||||||
    languages = {code: lang for code, lang
 | 
					    languages = {code: lang for code, lang
 | 
				
			||||||
                 in languages.iteritems()
 | 
					                 in languages.iteritems()
 | 
				
			||||||
                 if len(lang.get('counter', [])) >= min_supported_engines or
 | 
					                 if len(lang.get('counter', [])) >= min_supported_engines or
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user