mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 10:37:06 -04:00 
			
		
		
		
	Merge pull request #1061 from a01200356/bing
[fix] Language support for Bing Images and Videos
This commit is contained in:
		
						commit
						c8a66a090a
					
				| @ -13,7 +13,7 @@ python: | |||||||
| before_install: | before_install: | ||||||
|   - "export DISPLAY=:99.0" |   - "export DISPLAY=:99.0" | ||||||
|   - "sh -e /etc/init.d/xvfb start" |   - "sh -e /etc/init.d/xvfb start" | ||||||
|   - npm install less less-plugin-clean-css grunt-cli |   - npm install less@2.7 less-plugin-clean-css grunt-cli | ||||||
|   - export PATH=`pwd`/node_modules/.bin:$PATH |   - export PATH=`pwd`/node_modules/.bin:$PATH | ||||||
|   - ./manage.sh install_geckodriver ~/drivers |   - ./manage.sh install_geckodriver ~/drivers | ||||||
|   - export PATH=~/drivers:$PATH |   - export PATH=~/drivers:$PATH | ||||||
|  | |||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -18,7 +18,6 @@ | |||||||
| from lxml import html | from lxml import html | ||||||
| from json import loads | from json import loads | ||||||
| import re | import re | ||||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url |  | ||||||
| from searx.url_utils import urlencode | from searx.url_utils import urlencode | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| @ -26,6 +25,8 @@ categories = ['images'] | |||||||
| paging = True | paging = True | ||||||
| safesearch = True | safesearch = True | ||||||
| time_range_support = True | time_range_support = True | ||||||
|  | language_support = True | ||||||
|  | supported_languages_url = 'https://www.bing.com/account/general' | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'https://www.bing.com/' | base_url = 'https://www.bing.com/' | ||||||
| @ -45,23 +46,41 @@ safesearch_types = {2: 'STRICT', | |||||||
| _quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) | _quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # get supported region code | ||||||
|  | def get_region_code(lang, lang_list=None): | ||||||
|  |     region = None | ||||||
|  |     if lang in (lang_list or supported_languages): | ||||||
|  |         region = lang | ||||||
|  |     elif lang.startswith('no'): | ||||||
|  |         region = 'nb-NO' | ||||||
|  |     else: | ||||||
|  |         # try to get a supported country code with language | ||||||
|  |         lang = lang.split('-')[0] | ||||||
|  |         for lc in (lang_list or supported_languages): | ||||||
|  |             if lang == lc.split('-')[0]: | ||||||
|  |                 region = lc | ||||||
|  |                 break | ||||||
|  |     if region: | ||||||
|  |         return region.lower() | ||||||
|  |     else: | ||||||
|  |         return 'en-us' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     offset = (params['pageno'] - 1) * 10 + 1 |     offset = (params['pageno'] - 1) * 10 + 1 | ||||||
| 
 | 
 | ||||||
|     # required for cookie |  | ||||||
|     if params['language'] == 'all': |  | ||||||
|         language = 'en-US' |  | ||||||
|     else: |  | ||||||
|         language = params['language'] |  | ||||||
| 
 |  | ||||||
|     search_path = search_string.format( |     search_path = search_string.format( | ||||||
|         query=urlencode({'q': query}), |         query=urlencode({'q': query}), | ||||||
|         offset=offset) |         offset=offset) | ||||||
| 
 | 
 | ||||||
|  |     language = get_region_code(params['language']) | ||||||
|  | 
 | ||||||
|     params['cookies']['SRCHHPGUSR'] = \ |     params['cookies']['SRCHHPGUSR'] = \ | ||||||
|         'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\ |         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||||
|         '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | 
 | ||||||
|  |     params['cookies']['_EDGE_S'] = 'mkt=' + language +\ | ||||||
|  |         '&ui=' + language + '&F=1' | ||||||
| 
 | 
 | ||||||
|     params['url'] = base_url + search_path |     params['url'] = base_url + search_path | ||||||
|     if params['time_range'] in time_range_dict: |     if params['time_range'] in time_range_dict: | ||||||
| @ -106,3 +125,22 @@ def response(resp): | |||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get supported languages from their site | ||||||
|  | def _fetch_supported_languages(resp): | ||||||
|  |     supported_languages = [] | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  | 
 | ||||||
|  |     regions_xpath = '//div[@id="region-section-content"]' \ | ||||||
|  |                     + '//ul[@class="b_vList"]/li/a/@href' | ||||||
|  | 
 | ||||||
|  |     regions = dom.xpath(regions_xpath) | ||||||
|  |     for region in regions: | ||||||
|  |         code = re.search('setmkt=[^\&]+', region).group()[7:] | ||||||
|  |         if code == 'nb-NO': | ||||||
|  |             code = 'no-NO' | ||||||
|  | 
 | ||||||
|  |         supported_languages.append(code) | ||||||
|  | 
 | ||||||
|  |     return supported_languages | ||||||
|  | |||||||
| @ -12,6 +12,7 @@ | |||||||
| 
 | 
 | ||||||
| from json import loads | from json import loads | ||||||
| from lxml import html | from lxml import html | ||||||
|  | from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url, get_region_code | ||||||
| from searx.engines.xpath import extract_text | from searx.engines.xpath import extract_text | ||||||
| from searx.url_utils import urlencode | from searx.url_utils import urlencode | ||||||
| 
 | 
 | ||||||
| @ -21,6 +22,7 @@ paging = True | |||||||
| safesearch = True | safesearch = True | ||||||
| time_range_support = True | time_range_support = True | ||||||
| number_of_results = 10 | number_of_results = 10 | ||||||
|  | language_support = True | ||||||
| 
 | 
 | ||||||
| search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\ | search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\ | ||||||
|              'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5' |              'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5' | ||||||
| @ -45,7 +47,8 @@ def request(query, params): | |||||||
|         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') |         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||||
| 
 | 
 | ||||||
|     # language cookie |     # language cookie | ||||||
|     params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1' |     region = get_region_code(params['language'], lang_list=supported_languages) | ||||||
|  |     params['cookies']['_EDGE_S'] = 'mkt=' + region + '&F=1' | ||||||
| 
 | 
 | ||||||
|     # query and paging |     # query and paging | ||||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), |     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||||
|  | |||||||
| @ -134,4 +134,4 @@ def _fetch_supported_languages(resp): | |||||||
|     regions_json = loads(response_page) |     regions_json = loads(response_page) | ||||||
|     supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) |     supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) | ||||||
| 
 | 
 | ||||||
|     return supported_languages |     return list(supported_languages) | ||||||
|  | |||||||
| @ -118,7 +118,7 @@ def _fetch_supported_languages(resp): | |||||||
|     dom = fromstring(resp.text) |     dom = fromstring(resp.text) | ||||||
|     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') |     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') | ||||||
|     for option in options: |     for option in options: | ||||||
|         code = option.xpath('./@data-val')[0] |         code = option.xpath('./@data-search-language')[0] | ||||||
|         if code.startswith('nb-'): |         if code.startswith('nb-'): | ||||||
|             code = code.replace('nb', 'no', 1) |             code = code.replace('nb', 'no', 1) | ||||||
|         supported_languages.append(code) |         supported_languages.append(code) | ||||||
|  | |||||||
| @ -5,6 +5,11 @@ | |||||||
| language_codes = ( | language_codes = ( | ||||||
|     (u"ar-SA", u"العربية", u"", u"Arabic"), |     (u"ar-SA", u"العربية", u"", u"Arabic"), | ||||||
|     (u"bg-BG", u"Български", u"", u"Bulgarian"), |     (u"bg-BG", u"Български", u"", u"Bulgarian"), | ||||||
|  |     (u"ca", u"Català", u"", u"Catalan"), | ||||||
|  |     (u"ca-AD", u"Català", u"Andorra", u"Catalan"), | ||||||
|  |     (u"ca-CT", u"Català", u"", u"Catalan"), | ||||||
|  |     (u"ca-ES", u"Català", u"Espanya", u"Catalan"), | ||||||
|  |     (u"ca-FR", u"Català", u"França", u"Catalan"), | ||||||
|     (u"cs-CZ", u"Čeština", u"", u"Czech"), |     (u"cs-CZ", u"Čeština", u"", u"Czech"), | ||||||
|     (u"da-DK", u"Dansk", u"", u"Danish"), |     (u"da-DK", u"Dansk", u"", u"Danish"), | ||||||
|     (u"de", u"Deutsch", u"", u"German"), |     (u"de", u"Deutsch", u"", u"German"), | ||||||
| @ -15,9 +20,7 @@ language_codes = ( | |||||||
|     (u"en", u"English", u"", u"English"), |     (u"en", u"English", u"", u"English"), | ||||||
|     (u"en-AU", u"English", u"Australia", u"English"), |     (u"en-AU", u"English", u"Australia", u"English"), | ||||||
|     (u"en-CA", u"English", u"Canada", u"English"), |     (u"en-CA", u"English", u"Canada", u"English"), | ||||||
|     (u"en-CY", u"English", u"Cyprus", u"English"), |  | ||||||
|     (u"en-GB", u"English", u"United Kingdom", u"English"), |     (u"en-GB", u"English", u"United Kingdom", u"English"), | ||||||
|     (u"en-GD", u"English", u"Grenada", u"English"), |  | ||||||
|     (u"en-ID", u"English", u"Indonesia", u"English"), |     (u"en-ID", u"English", u"Indonesia", u"English"), | ||||||
|     (u"en-IE", u"English", u"Ireland", u"English"), |     (u"en-IE", u"English", u"Ireland", u"English"), | ||||||
|     (u"en-IN", u"English", u"India", u"English"), |     (u"en-IN", u"English", u"India", u"English"), | ||||||
| @ -28,6 +31,7 @@ language_codes = ( | |||||||
|     (u"en-US", u"English", u"United States", u"English"), |     (u"en-US", u"English", u"United States", u"English"), | ||||||
|     (u"en-ZA", u"English", u"South Africa", u"English"), |     (u"en-ZA", u"English", u"South Africa", u"English"), | ||||||
|     (u"es", u"Español", u"", u"Spanish"), |     (u"es", u"Español", u"", u"Spanish"), | ||||||
|  |     (u"es-AD", u"Español", u"Andorra", u"Spanish"), | ||||||
|     (u"es-AR", u"Español", u"Argentina", u"Spanish"), |     (u"es-AR", u"Español", u"Argentina", u"Spanish"), | ||||||
|     (u"es-CL", u"Español", u"Chile", u"Spanish"), |     (u"es-CL", u"Español", u"Chile", u"Spanish"), | ||||||
|     (u"es-CO", u"Español", u"Colombia", u"Spanish"), |     (u"es-CO", u"Español", u"Colombia", u"Spanish"), | ||||||
| @ -38,38 +42,32 @@ language_codes = ( | |||||||
|     (u"et-EE", u"Eesti", u"", u"Estonian"), |     (u"et-EE", u"Eesti", u"", u"Estonian"), | ||||||
|     (u"fi-FI", u"Suomi", u"", u"Finnish"), |     (u"fi-FI", u"Suomi", u"", u"Finnish"), | ||||||
|     (u"fr", u"Français", u"", u"French"), |     (u"fr", u"Français", u"", u"French"), | ||||||
|  |     (u"fr-AD", u"Français", u"Andorre", u"French"), | ||||||
|     (u"fr-BE", u"Français", u"Belgique", u"French"), |     (u"fr-BE", u"Français", u"Belgique", u"French"), | ||||||
|     (u"fr-CA", u"Français", u"Canada", u"French"), |     (u"fr-CA", u"Français", u"Canada", u"French"), | ||||||
|     (u"fr-CH", u"Français", u"Suisse", u"French"), |     (u"fr-CH", u"Français", u"Suisse", u"French"), | ||||||
|     (u"fr-FR", u"Français", u"France", u"French"), |     (u"fr-FR", u"Français", u"France", u"French"), | ||||||
|     (u"he-IL", u"עברית", u"", u"Hebrew"), |     (u"he-IL", u"עברית", u"", u"Hebrew"), | ||||||
|     (u"hr-HR", u"Hrvatski", u"", u"Croatian"), |  | ||||||
|     (u"hu-HU", u"Magyar", u"", u"Hungarian"), |     (u"hu-HU", u"Magyar", u"", u"Hungarian"), | ||||||
|     (u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"), |  | ||||||
|     (u"it", u"Italiano", u"", u"Italian"), |     (u"it", u"Italiano", u"", u"Italian"), | ||||||
|     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"), |     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"), | ||||||
|     (u"it-IT", u"Italiano", u"Italia", u"Italian"), |     (u"it-IT", u"Italiano", u"Italia", u"Italian"), | ||||||
|     (u"ja-JP", u"日本語", u"", u"Japanese"), |     (u"ja-JP", u"日本語", u"", u"Japanese"), | ||||||
|     (u"ko-KR", u"한국어", u"", u"Korean"), |     (u"ko-KR", u"한국어", u"", u"Korean"), | ||||||
|     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), |  | ||||||
|     (u"lv-LV", u"Latviešu", u"", u"Latvian"), |  | ||||||
|     (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"), |  | ||||||
|     (u"nl", u"Nederlands", u"", u"Dutch"), |     (u"nl", u"Nederlands", u"", u"Dutch"), | ||||||
|     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), |     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), | ||||||
|     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), |     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), | ||||||
|     (u"no-NO", u"Norsk", u"", u"Norwegian"), |     (u"no-NO", u"Norsk", u"", u"Norwegian"), | ||||||
|     (u"pl-PL", u"Polski", u"", u"Polish"), |     (u"pl-PL", u"Polski", u"", u"Polish"), | ||||||
|     (u"pt", u"Português", u"", u"Portuguese"), |     (u"pt", u"Português", u"", u"Portuguese"), | ||||||
|  |     (u"pt-AD", u"Português", u"Andorra", u"Portuguese"), | ||||||
|     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), |     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), | ||||||
|     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), |     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), | ||||||
|     (u"ro-RO", u"Română", u"", u"Romanian"), |     (u"ro-RO", u"Română", u"", u"Romanian"), | ||||||
|     (u"ru-RU", u"Русский", u"", u"Russian"), |     (u"ru-RU", u"Русский", u"", u"Russian"), | ||||||
|     (u"sk-SK", u"Slovenčina", u"", u"Slovak"), |  | ||||||
|     (u"sl", u"Slovenščina", u"", u"Slovenian"), |  | ||||||
|     (u"sv-SE", u"Svenska", u"", u"Swedish"), |     (u"sv-SE", u"Svenska", u"", u"Swedish"), | ||||||
|     (u"th-TH", u"ไทย", u"", u"Thai"), |     (u"th-TH", u"ไทย", u"", u"Thai"), | ||||||
|     (u"tr-TR", u"Türkçe", u"", u"Turkish"), |     (u"tr-TR", u"Türkçe", u"", u"Turkish"), | ||||||
|     (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), |  | ||||||
|     (u"zh", u"中文", u"", u"Chinese"), |     (u"zh", u"中文", u"", u"Chinese"), | ||||||
|     (u"zh-CN", u"中文", u"中国", u"Chinese"), |     (u"zh-CN", u"中文", u"中国", u"Chinese"), | ||||||
|     (u"zh-HK", u"中文", u"香港", u"Chinese"), |     (u"zh-HK", u"中文", u"香港", u"Chinese"), | ||||||
|  | |||||||
| @ -8,10 +8,12 @@ from searx.testing import SearxTestCase | |||||||
| class TestBingImagesEngine(SearxTestCase): | class TestBingImagesEngine(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|  |         bing_images.supported_languages = ['fr-FR', 'en-US'] | ||||||
|  | 
 | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|         dicto['language'] = 'fr_FR' |         dicto['language'] = 'fr-FR' | ||||||
|         dicto['safesearch'] = 1 |         dicto['safesearch'] = 1 | ||||||
|         dicto['time_range'] = '' |         dicto['time_range'] = '' | ||||||
|         params = bing_images.request(query, dicto) |         params = bing_images.request(query, dicto) | ||||||
| @ -19,12 +21,19 @@ class TestBingImagesEngine(SearxTestCase): | |||||||
|         self.assertTrue(query in params['url']) |         self.assertTrue(query in params['url']) | ||||||
|         self.assertTrue('bing.com' in params['url']) |         self.assertTrue('bing.com' in params['url']) | ||||||
|         self.assertTrue('SRCHHPGUSR' in params['cookies']) |         self.assertTrue('SRCHHPGUSR' in params['cookies']) | ||||||
|         self.assertTrue('fr' in params['cookies']['SRCHHPGUSR']) |         self.assertTrue('DEMOTE' in params['cookies']['SRCHHPGUSR']) | ||||||
|  |         self.assertTrue('_EDGE_S' in params['cookies']) | ||||||
|  |         self.assertTrue('fr-fr' in params['cookies']['_EDGE_S']) | ||||||
|  | 
 | ||||||
|  |         dicto['language'] = 'fr' | ||||||
|  |         params = bing_images.request(query, dicto) | ||||||
|  |         self.assertTrue('_EDGE_S' in params['cookies']) | ||||||
|  |         self.assertTrue('fr-fr' in params['cookies']['_EDGE_S']) | ||||||
| 
 | 
 | ||||||
|         dicto['language'] = 'all' |         dicto['language'] = 'all' | ||||||
|         params = bing_images.request(query, dicto) |         params = bing_images.request(query, dicto) | ||||||
|         self.assertIn('SRCHHPGUSR', params['cookies']) |         self.assertTrue('_EDGE_S' in params['cookies']) | ||||||
|         self.assertIn('en', params['cookies']['SRCHHPGUSR']) |         self.assertTrue('en-us' in params['cookies']['_EDGE_S']) | ||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         self.assertRaises(AttributeError, bing_images.response, None) |         self.assertRaises(AttributeError, bing_images.response, None) | ||||||
| @ -82,3 +91,28 @@ class TestBingImagesEngine(SearxTestCase): | |||||||
|         self.assertEqual(results[0]['content'], '') |         self.assertEqual(results[0]['content'], '') | ||||||
|         self.assertEqual(results[0]['thumbnail_src'], 'thumb_url') |         self.assertEqual(results[0]['thumbnail_src'], 'thumb_url') | ||||||
|         self.assertEqual(results[0]['img_src'], 'img_url') |         self.assertEqual(results[0]['img_src'], 'img_url') | ||||||
|  | 
 | ||||||
|  |     def test_fetch_supported_languages(self): | ||||||
|  |         html = """ | ||||||
|  |         <div> | ||||||
|  |             <div id="region-section-content"> | ||||||
|  |                 <ul class="b_vList"> | ||||||
|  |                     <li> | ||||||
|  |                         <a href="https://bing...&setmkt=de-DE&s...">Germany</a> | ||||||
|  |                         <a href="https://bing...&setmkt=nb-NO&s...">Norway</a> | ||||||
|  |                     </li> | ||||||
|  |                 </ul> | ||||||
|  |                 <ul class="b_vList"> | ||||||
|  |                     <li> | ||||||
|  |                         <a href="https://bing...&setmkt=es-AR&s...">Argentina</a> | ||||||
|  |                     </li> | ||||||
|  |                 </ul> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         languages = list(bing_images._fetch_supported_languages(response)) | ||||||
|  |         self.assertEqual(len(languages), 3) | ||||||
|  |         self.assertIn('de-DE', languages) | ||||||
|  |         self.assertIn('no-NO', languages) | ||||||
|  |         self.assertIn('es-AR', languages) | ||||||
|  | |||||||
| @ -8,6 +8,8 @@ from searx.testing import SearxTestCase | |||||||
| class TestBingVideosEngine(SearxTestCase): | class TestBingVideosEngine(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|  |         bing_videos.supported_languages = ['fr-FR', 'en-US'] | ||||||
|  | 
 | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  | |||||||
| @ -139,9 +139,9 @@ class TestSwisscowsEngine(SearxTestCase): | |||||||
|             <div id="regions-popup"> |             <div id="regions-popup"> | ||||||
|                 <div> |                 <div> | ||||||
|                     <ul> |                     <ul> | ||||||
|                         <li><a data-val="browser"></a></li> |                         <li><a data-search-language="browser"></a></li> | ||||||
|                         <li><a data-val="de-CH"></a></li> |                         <li><a data-search-language="de-CH"></a></li> | ||||||
|                         <li><a data-val="fr-CH"></a></li> |                         <li><a data-search-language="fr-CH"></a></li> | ||||||
|                     </ul> |                     </ul> | ||||||
|                 </div> |                 </div> | ||||||
|             </div> |             </div> | ||||||
|  | |||||||
| @ -8,13 +8,13 @@ | |||||||
| # are written in current directory to avoid overwriting in case something goes wrong. | # are written in current directory to avoid overwriting in case something goes wrong. | ||||||
| 
 | 
 | ||||||
| from requests import get | from requests import get | ||||||
| from urllib import urlencode |  | ||||||
| from lxml.html import fromstring | from lxml.html import fromstring | ||||||
| from json import loads, dumps | from json import loads, dump | ||||||
| import io | import io | ||||||
| from sys import path | from sys import path | ||||||
| path.append('../searx')  # noqa | path.append('../searx')  # noqa | ||||||
| from searx import settings | from searx import settings | ||||||
|  | from searx.url_utils import urlencode | ||||||
| from searx.engines import initialize_engines, engines | from searx.engines import initialize_engines, engines | ||||||
| 
 | 
 | ||||||
| # Geonames API for country names. | # Geonames API for country names. | ||||||
| @ -70,7 +70,7 @@ def get_country_name(locale): | |||||||
|     json = loads(response.text) |     json = loads(response.text) | ||||||
|     content = json.get('geonames', None) |     content = json.get('geonames', None) | ||||||
|     if content is None or len(content) != 1: |     if content is None or len(content) != 1: | ||||||
|         print "No country name found for " + locale[0] + "-" + locale[1] |         print("No country name found for " + locale[0] + "-" + locale[1]) | ||||||
|         return '' |         return '' | ||||||
| 
 | 
 | ||||||
|     return content[0].get('countryName', '') |     return content[0].get('countryName', '') | ||||||
| @ -84,11 +84,11 @@ def fetch_supported_languages(): | |||||||
|             try: |             try: | ||||||
|                 engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() |                 engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() | ||||||
|             except Exception as e: |             except Exception as e: | ||||||
|                 print e |                 print(e) | ||||||
| 
 | 
 | ||||||
|     # write json file |     # write json file | ||||||
|     with io.open(engines_languages_file, "w", encoding="utf-8") as f: |     with io.open(engines_languages_file, "w", encoding="utf-8") as f: | ||||||
|         f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8"))) |         dump(engines_languages, f, ensure_ascii=False) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Join all language lists. | # Join all language lists. | ||||||
| @ -97,7 +97,7 @@ def join_language_lists(): | |||||||
|     global languages |     global languages | ||||||
|     # include wikipedia first for more accurate language names |     # include wikipedia first for more accurate language names | ||||||
|     languages = {code: lang for code, lang |     languages = {code: lang for code, lang | ||||||
|                  in engines_languages['wikipedia'].iteritems() |                  in engines_languages['wikipedia'].items() | ||||||
|                  if valid_code(code)} |                  if valid_code(code)} | ||||||
| 
 | 
 | ||||||
|     for engine_name in engines_languages: |     for engine_name in engines_languages: | ||||||
| @ -121,7 +121,7 @@ def join_language_lists(): | |||||||
|     # filter list to include only languages supported by most engines |     # filter list to include only languages supported by most engines | ||||||
|     min_supported_engines = int(0.70 * len(engines_languages)) |     min_supported_engines = int(0.70 * len(engines_languages)) | ||||||
|     languages = {code: lang for code, lang |     languages = {code: lang for code, lang | ||||||
|                  in languages.iteritems() |                  in languages.items() | ||||||
|                  if len(lang.get('counter', [])) >= min_supported_engines or |                  if len(lang.get('counter', [])) >= min_supported_engines or | ||||||
|                  len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines} |                  len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines} | ||||||
| 
 | 
 | ||||||
| @ -165,7 +165,7 @@ def filter_single_country_languages(): | |||||||
| 
 | 
 | ||||||
| # Write languages.py. | # Write languages.py. | ||||||
| def write_languages_file(): | def write_languages_file(): | ||||||
|     new_file = open(languages_file, 'w') |     new_file = open(languages_file, 'wb') | ||||||
|     file_content = '# -*- coding: utf-8 -*-\n'\ |     file_content = '# -*- coding: utf-8 -*-\n'\ | ||||||
|                    + '# list of language codes\n'\ |                    + '# list of language codes\n'\ | ||||||
|                    + '# this file is generated automatically by utils/update_search_languages.py\n'\ |                    + '# this file is generated automatically by utils/update_search_languages.py\n'\ | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user