mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 10:37:06 -04:00 
			
		
		
		
	If no language is specified, bing returns results with multiple languages for one query which isn't really useful. Setting english as default insted if nothing.
		
			
				
	
	
		
			100 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			100 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
|  Bing (Web)
 | |
| 
 | |
|  @website     https://www.bing.com
 | |
|  @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
 | |
|               max. 5000 query/month
 | |
| 
 | |
|  @using-api   no (because of query limit)
 | |
|  @results     HTML (using search portal)
 | |
|  @stable      no (HTML can change)
 | |
|  @parse       url, title, content
 | |
| 
 | |
|  @todo        publishedDate
 | |
| """
 | |
| 
 | |
| from urllib import urlencode
 | |
| from lxml import html
 | |
| from searx.engines.xpath import extract_text
 | |
| 
 | |
| # engine dependent config
 | |
| categories = ['general']
 | |
| paging = True
 | |
| language_support = True
 | |
| supported_languages_url = 'https://www.bing.com/account/general'
 | |
| 
 | |
| # search-url
 | |
| base_url = 'https://www.bing.com/'
 | |
| search_string = 'search?{query}&first={offset}'
 | |
| 
 | |
| 
 | |
| # do search-request
 | |
| def request(query, params):
 | |
|     offset = (params['pageno'] - 1) * 10 + 1
 | |
| 
 | |
|     if params['language'] != 'all':
 | |
|         lang = params['language'].split('-')[0].upper()
 | |
|     else:
 | |
|         lang = 'EN'
 | |
| 
 | |
|     query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8')
 | |
| 
 | |
|     search_path = search_string.format(
 | |
|         query=urlencode({'q': query}),
 | |
|         offset=offset)
 | |
| 
 | |
|     params['url'] = base_url + search_path
 | |
|     return params
 | |
| 
 | |
| 
 | |
| # get response from search-request
 | |
| def response(resp):
 | |
|     results = []
 | |
| 
 | |
|     dom = html.fromstring(resp.text)
 | |
| 
 | |
|     try:
 | |
|         results.append({'number_of_results': int(dom.xpath('//span[@class="sb_count"]/text()')[0]
 | |
|                                                  .split()[0].replace(',', ''))})
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     # parse results
 | |
|     for result in dom.xpath('//div[@class="sa_cc"]'):
 | |
|         link = result.xpath('.//h3/a')[0]
 | |
|         url = link.attrib.get('href')
 | |
|         title = extract_text(link)
 | |
|         content = extract_text(result.xpath('.//p'))
 | |
| 
 | |
|         # append result
 | |
|         results.append({'url': url,
 | |
|                         'title': title,
 | |
|                         'content': content})
 | |
| 
 | |
|     # parse results again if nothing is found yet
 | |
|     for result in dom.xpath('//li[@class="b_algo"]'):
 | |
|         link = result.xpath('.//h2/a')[0]
 | |
|         url = link.attrib.get('href')
 | |
|         title = extract_text(link)
 | |
|         content = extract_text(result.xpath('.//p'))
 | |
| 
 | |
|         # append result
 | |
|         results.append({'url': url,
 | |
|                         'title': title,
 | |
|                         'content': content})
 | |
| 
 | |
|     # return results
 | |
|     return results
 | |
| 
 | |
| 
 | |
| # get supported languages from their site
 | |
| def _fetch_supported_languages(resp):
 | |
|     supported_languages = []
 | |
|     dom = html.fromstring(resp.text)
 | |
|     options = dom.xpath('//div[@id="limit-languages"]//input')
 | |
|     for option in options:
 | |
|         code = option.xpath('./@id')[0].replace('_', '-')
 | |
|         supported_languages.append(code)
 | |
| 
 | |
|     return supported_languages
 |