mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-30 18:22:31 -04:00 
			
		
		
		
	Merge pull request #1260 from MarcAbonce/engine-fixes
[fix] Engine fixes
This commit is contained in:
		
						commit
						e5def5b019
					
				| @ -68,8 +68,8 @@ def response(resp): | |||||||
|     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): |     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): | ||||||
|         try: |         try: | ||||||
|             r = { |             r = { | ||||||
|                 'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"), |                 'url': result.xpath('.//a[@class="l lLrAF"]')[0].attrib.get("href"), | ||||||
|                 'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')), |                 'title': ''.join(result.xpath('.//a[@class="l lLrAF"]//text()')), | ||||||
|                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), |                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), | ||||||
|             } |             } | ||||||
|         except: |         except: | ||||||
|  | |||||||
| @ -27,7 +27,7 @@ result_count = 1 | |||||||
| # urls | # urls | ||||||
| wikidata_host = 'https://www.wikidata.org' | wikidata_host = 'https://www.wikidata.org' | ||||||
| url_search = wikidata_host \ | url_search = wikidata_host \ | ||||||
|     + '/wiki/Special:ItemDisambiguation?{query}' |     + '/w/index.php?{query}' | ||||||
| 
 | 
 | ||||||
| wikidata_api = wikidata_host + '/w/api.php' | wikidata_api = wikidata_host + '/w/api.php' | ||||||
| url_detail = wikidata_api\ | url_detail = wikidata_api\ | ||||||
| @ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\ | |||||||
| url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400' | url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400' | ||||||
| 
 | 
 | ||||||
| # xpaths | # xpaths | ||||||
| wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title' | wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href' | ||||||
| title_xpath = '//*[contains(@class,"wikibase-title-label")]' | title_xpath = '//*[contains(@class,"wikibase-title-label")]' | ||||||
| description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]' | description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]' | ||||||
| property_xpath = '//div[@id="{propertyid}"]' | property_xpath = '//div[@id="{propertyid}"]' | ||||||
| @ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     language = match_language(params['language'], supported_languages).split('-')[0] |  | ||||||
| 
 |  | ||||||
|     params['url'] = url_search.format( |     params['url'] = url_search.format( | ||||||
|         query=urlencode({'label': query, 'language': language})) |         query=urlencode({'search': query})) | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
|     html = fromstring(resp.text) |     html = fromstring(resp.text) | ||||||
|     wikidata_ids = html.xpath(wikidata_ids_xpath) |     search_results = html.xpath(wikidata_ids_xpath) | ||||||
| 
 | 
 | ||||||
|     language = match_language(resp.search_params['language'], supported_languages).split('-')[0] |     language = match_language(resp.search_params['language'], supported_languages).split('-')[0] | ||||||
| 
 | 
 | ||||||
|     # TODO: make requests asynchronous to avoid timeout when result_count > 1 |     # TODO: make requests asynchronous to avoid timeout when result_count > 1 | ||||||
|     for wikidata_id in wikidata_ids[:result_count]: |     for search_result in search_results[:result_count]: | ||||||
|  |         wikidata_id = search_result.split('/')[-1] | ||||||
|         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) |         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) | ||||||
|         htmlresponse = get(url) |         htmlresponse = get(url) | ||||||
|         jsonresponse = loads(htmlresponse.text) |         jsonresponse = loads(htmlresponse.text) | ||||||
|  | |||||||
| @ -53,7 +53,7 @@ def extract_url(xpath_results, search_url): | |||||||
|     if url.startswith('//'): |     if url.startswith('//'): | ||||||
|         # add http or https to this kind of url //example.com/ |         # add http or https to this kind of url //example.com/ | ||||||
|         parsed_search_url = urlparse(search_url) |         parsed_search_url = urlparse(search_url) | ||||||
|         url = u'{0}:{1}'.format(parsed_search_url.scheme, url) |         url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) | ||||||
|     elif url.startswith('/'): |     elif url.startswith('/'): | ||||||
|         # fix relative url to the search engine |         # fix relative url to the search engine | ||||||
|         url = urljoin(search_url, url) |         url = urljoin(search_url, url) | ||||||
|  | |||||||
| @ -174,6 +174,7 @@ engines: | |||||||
|   - name : wikidata |   - name : wikidata | ||||||
|     engine : wikidata |     engine : wikidata | ||||||
|     shortcut : wd |     shortcut : wd | ||||||
|  |     timeout : 3.0 | ||||||
|     weight : 2 |     weight : 2 | ||||||
| 
 | 
 | ||||||
|   - name : duckduckgo |   - name : duckduckgo | ||||||
|  | |||||||
| @ -42,7 +42,7 @@ class TestGoogleNewsEngine(SearxTestCase): | |||||||
|                 <div class="ts _JGs _JHs _tJs _KGs _jHs"> |                 <div class="ts _JGs _JHs _tJs _KGs _jHs"> | ||||||
|                     <div class="_hJs"> |                     <div class="_hJs"> | ||||||
|                         <h3 class="r _gJs"> |                         <h3 class="r _gJs"> | ||||||
|                             <a class="l _PMs" href="https://example.com/" onmousedown="return rwt(this,'','','','11','AFQjCNEyehpzD5cJK1KUfXBx9RmsbqqG9g','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAggiKAAwAA','','',event)">Example title</a> |                             <a class="l lLrAF" href="https://example.com/" onmousedown="return rwt(this,'','','','11','AFQjCNEyehpzD5cJK1KUfXBx9RmsbqqG9g','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAggiKAAwAA','','',event)">Example title</a> | ||||||
|                         </h3> |                         </h3> | ||||||
|                         <div class="slp"> |                         <div class="slp"> | ||||||
|                             <span class="_OHs _PHs"> |                             <span class="_OHs _PHs"> | ||||||
| @ -63,7 +63,7 @@ class TestGoogleNewsEngine(SearxTestCase): | |||||||
|                     </a> |                     </a> | ||||||
|                     <div class="_hJs"> |                     <div class="_hJs"> | ||||||
|                         <h3 class="r _gJs"> |                         <h3 class="r _gJs"> | ||||||
|                             <a class="l _PMs" href="https://example2.com/" onmousedown="return rwt(this,'','','','12','AFQjCNHObfH7sYmLWI1SC-YhWXKZFRzRjw','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAgglKAAwAQ','','',event)">Example title 2</a> |                             <a class="l lLrAF" href="https://example2.com/" onmousedown="return rwt(this,'','','','12','AFQjCNHObfH7sYmLWI1SC-YhWXKZFRzRjw','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAgglKAAwAQ','','',event)">Example title 2</a> | ||||||
|                         </h3> |                         </h3> | ||||||
|                         <div class="slp"> |                         <div class="slp"> | ||||||
|                             <span class="_OHs _PHs"> |                             <span class="_OHs _PHs"> | ||||||
|  | |||||||
| @ -9,20 +9,15 @@ from searx.testing import SearxTestCase | |||||||
| class TestWikidataEngine(SearxTestCase): | class TestWikidataEngine(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def test_request(self): |     def test_request(self): | ||||||
|         wikidata.supported_languages = ['en', 'es'] |  | ||||||
|         query = 'test_query' |         query = 'test_query' | ||||||
|         dicto = defaultdict(dict) |         dicto = defaultdict(dict) | ||||||
|         dicto['language'] = 'en-US' |  | ||||||
|         params = wikidata.request(query, dicto) |         params = wikidata.request(query, dicto) | ||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
|         self.assertIn(query, params['url']) |         self.assertIn(query, params['url']) | ||||||
|         self.assertIn('wikidata.org', params['url']) |         self.assertIn('wikidata.org', params['url']) | ||||||
|         self.assertIn('en', params['url']) |  | ||||||
| 
 | 
 | ||||||
|         dicto['language'] = 'es-ES' |  | ||||||
|         params = wikidata.request(query, dicto) |         params = wikidata.request(query, dicto) | ||||||
|         self.assertIn(query, params['url']) |         self.assertIn(query, params['url']) | ||||||
|         self.assertIn('es', params['url']) |  | ||||||
| 
 | 
 | ||||||
|     # successful cases are not tested here to avoid sending additional requests |     # successful cases are not tested here to avoid sending additional requests | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
| @ -31,6 +26,7 @@ class TestWikidataEngine(SearxTestCase): | |||||||
|         self.assertRaises(AttributeError, wikidata.response, '') |         self.assertRaises(AttributeError, wikidata.response, '') | ||||||
|         self.assertRaises(AttributeError, wikidata.response, '[]') |         self.assertRaises(AttributeError, wikidata.response, '[]') | ||||||
| 
 | 
 | ||||||
|  |         wikidata.supported_languages = ['en', 'es'] | ||||||
|         response = mock.Mock(text='<html></html>', search_params={"language": "en"}) |         response = mock.Mock(text='<html></html>', search_params={"language": "en"}) | ||||||
|         self.assertEqual(wikidata.response(response), []) |         self.assertEqual(wikidata.response(response), []) | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user