mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 02:27:06 -04:00 
			
		
		
		
	Merge pull request #1446 from MarcAbonce/language_aliases_fix
[fix] Fix queries in Hebrew and Norwegian so they give results in the right language
This commit is contained in:
		
						commit
						491792c1a5
					
				
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -113,7 +113,6 @@ def load_engine(engine_data): | |||||||
|                iso_lang not in getattr(engine, 'supported_languages'): |                iso_lang not in getattr(engine, 'supported_languages'): | ||||||
|                 language_aliases[iso_lang] = engine_lang |                 language_aliases[iso_lang] = engine_lang | ||||||
| 
 | 
 | ||||||
|         if language_aliases: |  | ||||||
|         setattr(engine, 'language_aliases', language_aliases) |         setattr(engine, 'language_aliases', language_aliases) | ||||||
| 
 | 
 | ||||||
|     # assign language fetching method if auxiliary method exists |     # assign language fetching method if auxiliary method exists | ||||||
|  | |||||||
| @ -55,7 +55,7 @@ def request(query, params): | |||||||
|         query=urlencode({'q': query}), |         query=urlencode({'q': query}), | ||||||
|         offset=offset) |         offset=offset) | ||||||
| 
 | 
 | ||||||
|     language = match_language(params['language'], supported_languages).lower() |     language = match_language(params['language'], supported_languages, language_aliases).lower() | ||||||
| 
 | 
 | ||||||
|     params['cookies']['SRCHHPGUSR'] = \ |     params['cookies']['SRCHHPGUSR'] = \ | ||||||
|         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') |         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||||
|  | |||||||
| @ -48,7 +48,7 @@ def request(query, params): | |||||||
|         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') |         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||||
| 
 | 
 | ||||||
|     # language cookie |     # language cookie | ||||||
|     language = match_language(params['language'], supported_languages).lower() |     language = match_language(params['language'], supported_languages, language_aliases).lower() | ||||||
|     params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1' |     params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1' | ||||||
| 
 | 
 | ||||||
|     # query and paging |     # query and paging | ||||||
|  | |||||||
| @ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath): | |||||||
| def request(query, params): | def request(query, params): | ||||||
|     offset = (params['pageno'] - 1) * 10 |     offset = (params['pageno'] - 1) * 10 | ||||||
| 
 | 
 | ||||||
|     language = match_language(params['language'], supported_languages) |     language = match_language(params['language'], supported_languages, language_aliases) | ||||||
|     language_array = language.split('-') |     language_array = language.split('-') | ||||||
|     if params['language'].find('-') > 0: |     if params['language'].find('-') > 0: | ||||||
|         country = params['language'].split('-')[1] |         country = params['language'].split('-')[1] | ||||||
| @ -381,10 +381,10 @@ def attributes_to_html(attributes): | |||||||
| def _fetch_supported_languages(resp): | def _fetch_supported_languages(resp): | ||||||
|     supported_languages = {} |     supported_languages = {} | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
|     options = dom.xpath('//table//td/font/label/span') |     options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]') | ||||||
|     for option in options: |     for option in options: | ||||||
|         code = option.xpath('./@id')[0][1:] |         code = option.xpath('./@value')[0].split('_')[-1] | ||||||
|         name = option.text.title() |         name = option.xpath('./@data-name')[0].title() | ||||||
|         supported_languages[code] = {"name": name} |         supported_languages[code] = {"name": name} | ||||||
| 
 | 
 | ||||||
|     return supported_languages |     return supported_languages | ||||||
|  | |||||||
| @ -51,7 +51,7 @@ def request(query, params): | |||||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), |     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||||
|                                       search_options=urlencode(search_options)) |                                       search_options=urlencode(search_options)) | ||||||
| 
 | 
 | ||||||
|     language = match_language(params['language'], supported_languages).split('-')[0] |     language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] | ||||||
|     if language: |     if language: | ||||||
|         params['url'] += '&lr=lang_' + language |         params['url'] += '&lr=lang_' + language | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -46,7 +46,7 @@ def request(query, params): | |||||||
|                                    offset=offset) |                                    offset=offset) | ||||||
| 
 | 
 | ||||||
|     # add language tag |     # add language tag | ||||||
|     language = match_language(params['language'], supported_languages) |     language = match_language(params['language'], supported_languages, language_aliases) | ||||||
|     params['url'] += '&locale=' + language.replace('-', '_').lower() |     params['url'] += '&locale=' + language.replace('-', '_').lower() | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
|  | |||||||
| @ -36,7 +36,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') | |||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     region = match_language(params['language'], supported_languages) |     region = match_language(params['language'], supported_languages, language_aliases) | ||||||
|     ui_language = region.split('-')[0] |     ui_language = region.split('-')[0] | ||||||
| 
 | 
 | ||||||
|     search_path = search_string.format( |     search_path = search_string.format( | ||||||
|  | |||||||
| @ -68,7 +68,7 @@ def response(resp): | |||||||
|     html = fromstring(resp.text) |     html = fromstring(resp.text) | ||||||
|     search_results = html.xpath(wikidata_ids_xpath) |     search_results = html.xpath(wikidata_ids_xpath) | ||||||
| 
 | 
 | ||||||
|     language = match_language(resp.search_params['language'], supported_languages).split('-')[0] |     language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0] | ||||||
| 
 | 
 | ||||||
|     # TODO: make requests asynchronous to avoid timeout when result_count > 1 |     # TODO: make requests asynchronous to avoid timeout when result_count > 1 | ||||||
|     for search_result in search_results[:result_count]: |     for search_result in search_results[:result_count]: | ||||||
|  | |||||||
| @ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' | |||||||
| 
 | 
 | ||||||
| # set language in base_url | # set language in base_url | ||||||
| def url_lang(lang): | def url_lang(lang): | ||||||
|     return match_language(lang, supported_languages).split('-')[0] |     return match_language(lang, supported_languages, language_aliases).split('-')[0] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
|  | |||||||
| @ -9,6 +9,7 @@ class TestBingImagesEngine(SearxTestCase): | |||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         bing_images.supported_languages = ['fr-FR', 'en-US'] |         bing_images.supported_languages = ['fr-FR', 'en-US'] | ||||||
|  |         bing_images.language_aliases = {} | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  | |||||||
| @ -9,6 +9,7 @@ class TestBingVideosEngine(SearxTestCase): | |||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         bing_videos.supported_languages = ['fr-FR', 'en-US'] |         bing_videos.supported_languages = ['fr-FR', 'en-US'] | ||||||
|  |         bing_videos.language_aliases = {} | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  | |||||||
| @ -15,7 +15,8 @@ class TestGoogleEngine(SearxTestCase): | |||||||
|         return response |         return response | ||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         google.supported_languages = ['en', 'fr', 'zh-CN'] |         google.supported_languages = ['en', 'fr', 'zh-CN', 'iw'] | ||||||
|  |         google.language_aliases = {'he': 'iw'} | ||||||
| 
 | 
 | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
| @ -41,6 +42,12 @@ class TestGoogleEngine(SearxTestCase): | |||||||
|         self.assertIn('zh-CN', params['url']) |         self.assertIn('zh-CN', params['url']) | ||||||
|         self.assertIn('zh-CN', params['headers']['Accept-Language']) |         self.assertIn('zh-CN', params['headers']['Accept-Language']) | ||||||
| 
 | 
 | ||||||
|  |         dicto['language'] = 'he' | ||||||
|  |         params = google.request(query, dicto) | ||||||
|  |         self.assertIn('google.com', params['url']) | ||||||
|  |         self.assertIn('iw', params['url']) | ||||||
|  |         self.assertIn('iw', params['headers']['Accept-Language']) | ||||||
|  | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         self.assertRaises(AttributeError, google.response, None) |         self.assertRaises(AttributeError, google.response, None) | ||||||
|         self.assertRaises(AttributeError, google.response, []) |         self.assertRaises(AttributeError, google.response, []) | ||||||
| @ -198,29 +205,13 @@ class TestGoogleEngine(SearxTestCase): | |||||||
|         html = u""" |         html = u""" | ||||||
|         <html> |         <html> | ||||||
|             <body> |             <body> | ||||||
|                 <table> |                 <div id="langSec"> | ||||||
|                     <tbody> |                     <div> | ||||||
|                         <tr> |                         <input name="lr" data-name="english" value="lang_en" /> | ||||||
|                             <td> |                         <input name="lr" data-name="中文 (简体)" value="lang_zh-CN" /> | ||||||
|                                 <font> |                         <input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" /> | ||||||
|                                     <label> |                     </div> | ||||||
|                                         <span id="ten">English</span> |                 </div> | ||||||
|                                     </label> |  | ||||||
|                                 </font> |  | ||||||
|                             </td> |  | ||||||
|                             <td> |  | ||||||
|                                 <font> |  | ||||||
|                                     <label> |  | ||||||
|                                         <span id="tzh-CN">中文 (简体)</span> |  | ||||||
|                                     </label> |  | ||||||
|                                     <label> |  | ||||||
|                                         <span id="tzh-TW">中文 (繁體)</span> |  | ||||||
|                                     </label> |  | ||||||
|                                 </font> |  | ||||||
|                             </td> |  | ||||||
|                         </tr> |  | ||||||
|                     </tbody> |  | ||||||
|                 </table> |  | ||||||
|             </body> |             </body> | ||||||
|         </html> |         </html> | ||||||
|         """ |         """ | ||||||
|  | |||||||
| @ -10,6 +10,7 @@ class TestGoogleNewsEngine(SearxTestCase): | |||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         google_news.supported_languages = ['en-US', 'fr-FR'] |         google_news.supported_languages = ['en-US', 'fr-FR'] | ||||||
|  |         google_news.language_aliases = {} | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  | |||||||
| @ -8,6 +8,7 @@ class TestQwantEngine(SearxTestCase): | |||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR'] |         qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR'] | ||||||
|  |         qwant.language_aliases = {} | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 0 |         dicto['pageno'] = 0 | ||||||
|  | |||||||
| @ -8,6 +8,7 @@ class TestSwisscowsEngine(SearxTestCase): | |||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         swisscows.supported_languages = ['de-AT', 'de-DE'] |         swisscows.supported_languages = ['de-AT', 'de-DE'] | ||||||
|  |         swisscows.language_aliases = {} | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['pageno'] = 1 |         dicto['pageno'] = 1 | ||||||
|  | |||||||
| @ -27,6 +27,7 @@ class TestWikidataEngine(SearxTestCase): | |||||||
|         self.assertRaises(AttributeError, wikidata.response, '[]') |         self.assertRaises(AttributeError, wikidata.response, '[]') | ||||||
| 
 | 
 | ||||||
|         wikidata.supported_languages = ['en', 'es'] |         wikidata.supported_languages = ['en', 'es'] | ||||||
|  |         wikidata.language_aliases = {} | ||||||
|         response = mock.Mock(text='<html></html>', search_params={"language": "en"}) |         response = mock.Mock(text='<html></html>', search_params={"language": "en"}) | ||||||
|         self.assertEqual(wikidata.response(response), []) |         self.assertEqual(wikidata.response(response), []) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -8,7 +8,8 @@ from searx.testing import SearxTestCase | |||||||
| class TestWikipediaEngine(SearxTestCase): | class TestWikipediaEngine(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         wikipedia.supported_languages = ['fr', 'en'] |         wikipedia.supported_languages = ['fr', 'en', 'no'] | ||||||
|  |         wikipedia.language_aliases = {'nb': 'no'} | ||||||
| 
 | 
 | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
| @ -25,9 +26,13 @@ class TestWikipediaEngine(SearxTestCase): | |||||||
|         self.assertIn('Test_Query', params['url']) |         self.assertIn('Test_Query', params['url']) | ||||||
|         self.assertNotIn('test_query', params['url']) |         self.assertNotIn('test_query', params['url']) | ||||||
| 
 | 
 | ||||||
|  |         dicto['language'] = 'nb' | ||||||
|  |         params = wikipedia.request(query, dicto) | ||||||
|  |         self.assertIn('no.wikipedia.org', params['url']) | ||||||
|  | 
 | ||||||
|         dicto['language'] = 'xx' |         dicto['language'] = 'xx' | ||||||
|         params = wikipedia.request(query, dicto) |         params = wikipedia.request(query, dicto) | ||||||
|         self.assertIn('en', params['url']) |         self.assertIn('en.wikipedia.org', params['url']) | ||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user