mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-30 18:22:31 -04:00 
			
		
		
		
	Merge pull request #1446 from MarcAbonce/language_aliases_fix
[fix] Fix queries in Hebrew and Norwegian so they give results in the right language
This commit is contained in:
		
						commit
						491792c1a5
					
				
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -113,7 +113,6 @@ def load_engine(engine_data): | ||||
|                iso_lang not in getattr(engine, 'supported_languages'): | ||||
|                 language_aliases[iso_lang] = engine_lang | ||||
| 
 | ||||
|         if language_aliases: | ||||
|         setattr(engine, 'language_aliases', language_aliases) | ||||
| 
 | ||||
|     # assign language fetching method if auxiliary method exists | ||||
|  | ||||
| @ -55,7 +55,7 @@ def request(query, params): | ||||
|         query=urlencode({'q': query}), | ||||
|         offset=offset) | ||||
| 
 | ||||
|     language = match_language(params['language'], supported_languages).lower() | ||||
|     language = match_language(params['language'], supported_languages, language_aliases).lower() | ||||
| 
 | ||||
|     params['cookies']['SRCHHPGUSR'] = \ | ||||
|         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||
|  | ||||
| @ -48,7 +48,7 @@ def request(query, params): | ||||
|         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||
| 
 | ||||
|     # language cookie | ||||
|     language = match_language(params['language'], supported_languages).lower() | ||||
|     language = match_language(params['language'], supported_languages, language_aliases).lower() | ||||
|     params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1' | ||||
| 
 | ||||
|     # query and paging | ||||
|  | ||||
| @ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath): | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) * 10 | ||||
| 
 | ||||
|     language = match_language(params['language'], supported_languages) | ||||
|     language = match_language(params['language'], supported_languages, language_aliases) | ||||
|     language_array = language.split('-') | ||||
|     if params['language'].find('-') > 0: | ||||
|         country = params['language'].split('-')[1] | ||||
| @ -381,10 +381,10 @@ def attributes_to_html(attributes): | ||||
| def _fetch_supported_languages(resp): | ||||
|     supported_languages = {} | ||||
|     dom = html.fromstring(resp.text) | ||||
|     options = dom.xpath('//table//td/font/label/span') | ||||
|     options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]') | ||||
|     for option in options: | ||||
|         code = option.xpath('./@id')[0][1:] | ||||
|         name = option.text.title() | ||||
|         code = option.xpath('./@value')[0].split('_')[-1] | ||||
|         name = option.xpath('./@data-name')[0].title() | ||||
|         supported_languages[code] = {"name": name} | ||||
| 
 | ||||
|     return supported_languages | ||||
|  | ||||
| @ -51,7 +51,7 @@ def request(query, params): | ||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||
|                                       search_options=urlencode(search_options)) | ||||
| 
 | ||||
|     language = match_language(params['language'], supported_languages).split('-')[0] | ||||
|     language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] | ||||
|     if language: | ||||
|         params['url'] += '&lr=lang_' + language | ||||
| 
 | ||||
|  | ||||
| @ -46,7 +46,7 @@ def request(query, params): | ||||
|                                    offset=offset) | ||||
| 
 | ||||
|     # add language tag | ||||
|     language = match_language(params['language'], supported_languages) | ||||
|     language = match_language(params['language'], supported_languages, language_aliases) | ||||
|     params['url'] += '&locale=' + language.replace('-', '_').lower() | ||||
| 
 | ||||
|     return params | ||||
|  | ||||
| @ -36,7 +36,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     region = match_language(params['language'], supported_languages) | ||||
|     region = match_language(params['language'], supported_languages, language_aliases) | ||||
|     ui_language = region.split('-')[0] | ||||
| 
 | ||||
|     search_path = search_string.format( | ||||
|  | ||||
| @ -68,7 +68,7 @@ def response(resp): | ||||
|     html = fromstring(resp.text) | ||||
|     search_results = html.xpath(wikidata_ids_xpath) | ||||
| 
 | ||||
|     language = match_language(resp.search_params['language'], supported_languages).split('-')[0] | ||||
|     language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0] | ||||
| 
 | ||||
|     # TODO: make requests asynchronous to avoid timeout when result_count > 1 | ||||
|     for search_result in search_results[:result_count]: | ||||
|  | ||||
| @ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' | ||||
| 
 | ||||
| # set language in base_url | ||||
| def url_lang(lang): | ||||
|     return match_language(lang, supported_languages).split('-')[0] | ||||
|     return match_language(lang, supported_languages, language_aliases).split('-')[0] | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
|  | ||||
| @ -9,6 +9,7 @@ class TestBingImagesEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         bing_images.supported_languages = ['fr-FR', 'en-US'] | ||||
|         bing_images.language_aliases = {} | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|  | ||||
| @ -9,6 +9,7 @@ class TestBingVideosEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         bing_videos.supported_languages = ['fr-FR', 'en-US'] | ||||
|         bing_videos.language_aliases = {} | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|  | ||||
| @ -15,7 +15,8 @@ class TestGoogleEngine(SearxTestCase): | ||||
|         return response | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         google.supported_languages = ['en', 'fr', 'zh-CN'] | ||||
|         google.supported_languages = ['en', 'fr', 'zh-CN', 'iw'] | ||||
|         google.language_aliases = {'he': 'iw'} | ||||
| 
 | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
| @ -41,6 +42,12 @@ class TestGoogleEngine(SearxTestCase): | ||||
|         self.assertIn('zh-CN', params['url']) | ||||
|         self.assertIn('zh-CN', params['headers']['Accept-Language']) | ||||
| 
 | ||||
|         dicto['language'] = 'he' | ||||
|         params = google.request(query, dicto) | ||||
|         self.assertIn('google.com', params['url']) | ||||
|         self.assertIn('iw', params['url']) | ||||
|         self.assertIn('iw', params['headers']['Accept-Language']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, google.response, None) | ||||
|         self.assertRaises(AttributeError, google.response, []) | ||||
| @ -198,29 +205,13 @@ class TestGoogleEngine(SearxTestCase): | ||||
|         html = u""" | ||||
|         <html> | ||||
|             <body> | ||||
|                 <table> | ||||
|                     <tbody> | ||||
|                         <tr> | ||||
|                             <td> | ||||
|                                 <font> | ||||
|                                     <label> | ||||
|                                         <span id="ten">English</span> | ||||
|                                     </label> | ||||
|                                 </font> | ||||
|                             </td> | ||||
|                             <td> | ||||
|                                 <font> | ||||
|                                     <label> | ||||
|                                         <span id="tzh-CN">中文 (简体)</span> | ||||
|                                     </label> | ||||
|                                     <label> | ||||
|                                         <span id="tzh-TW">中文 (繁體)</span> | ||||
|                                     </label> | ||||
|                                 </font> | ||||
|                             </td> | ||||
|                         </tr> | ||||
|                     </tbody> | ||||
|                 </table> | ||||
|                 <div id="langSec"> | ||||
|                     <div> | ||||
|                         <input name="lr" data-name="english" value="lang_en" /> | ||||
|                         <input name="lr" data-name="中文 (简体)" value="lang_zh-CN" /> | ||||
|                         <input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" /> | ||||
|                     </div> | ||||
|                 </div> | ||||
|             </body> | ||||
|         </html> | ||||
|         """ | ||||
|  | ||||
| @ -10,6 +10,7 @@ class TestGoogleNewsEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         google_news.supported_languages = ['en-US', 'fr-FR'] | ||||
|         google_news.language_aliases = {} | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|  | ||||
| @ -8,6 +8,7 @@ class TestQwantEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR'] | ||||
|         qwant.language_aliases = {} | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 0 | ||||
|  | ||||
| @ -8,6 +8,7 @@ class TestSwisscowsEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         swisscows.supported_languages = ['de-AT', 'de-DE'] | ||||
|         swisscows.language_aliases = {} | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|  | ||||
| @ -27,6 +27,7 @@ class TestWikidataEngine(SearxTestCase): | ||||
|         self.assertRaises(AttributeError, wikidata.response, '[]') | ||||
| 
 | ||||
|         wikidata.supported_languages = ['en', 'es'] | ||||
|         wikidata.language_aliases = {} | ||||
|         response = mock.Mock(text='<html></html>', search_params={"language": "en"}) | ||||
|         self.assertEqual(wikidata.response(response), []) | ||||
| 
 | ||||
|  | ||||
| @ -8,7 +8,8 @@ from searx.testing import SearxTestCase | ||||
| class TestWikipediaEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         wikipedia.supported_languages = ['fr', 'en'] | ||||
|         wikipedia.supported_languages = ['fr', 'en', 'no'] | ||||
|         wikipedia.language_aliases = {'nb': 'no'} | ||||
| 
 | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
| @ -25,9 +26,13 @@ class TestWikipediaEngine(SearxTestCase): | ||||
|         self.assertIn('Test_Query', params['url']) | ||||
|         self.assertNotIn('test_query', params['url']) | ||||
| 
 | ||||
|         dicto['language'] = 'nb' | ||||
|         params = wikipedia.request(query, dicto) | ||||
|         self.assertIn('no.wikipedia.org', params['url']) | ||||
| 
 | ||||
|         dicto['language'] = 'xx' | ||||
|         params = wikipedia.request(query, dicto) | ||||
|         self.assertIn('en', params['url']) | ||||
|         self.assertIn('en.wikipedia.org', params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         dicto = defaultdict(dict) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user