mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	[fix] Bing-Web engine: XPath to get the wikipedia result
Modify the XPath selector to get the wikipedia result plus small fixes. About result content: especially with the Wikipedia result, we'd get several paragraph elements, only the first paragraph would be taken and displayed on the search result
This commit is contained in:
		
							parent
							
								
									4e355564d2
								
							
						
					
					
						commit
						9ee99423fe
					
				@ -4,6 +4,7 @@
 | 
			
		||||
 | 
			
		||||
- https://github.com/searx/searx/issues/2019#issuecomment-648227442
 | 
			
		||||
"""
 | 
			
		||||
# pylint: disable=too-many-branches
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
from urllib.parse import urlencode, urlparse, parse_qs
 | 
			
		||||
@ -74,7 +75,6 @@ def request(query, params):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def response(resp):
 | 
			
		||||
 | 
			
		||||
    results = []
 | 
			
		||||
    result_len = 0
 | 
			
		||||
 | 
			
		||||
@ -84,12 +84,20 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
    url_to_resolve = []
 | 
			
		||||
    url_to_resolve_index = []
 | 
			
		||||
    for i, result in enumerate(eval_xpath_list(dom, '//li[@class="b_algo"]')):
 | 
			
		||||
    for i, result in enumerate(eval_xpath_list(dom, '//li[contains(@class, "b_algo")]')):
 | 
			
		||||
 | 
			
		||||
        link = eval_xpath(result, './/h2/a')[0]
 | 
			
		||||
        url = link.attrib.get('href')
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = extract_text(eval_xpath(result, './/p'))
 | 
			
		||||
 | 
			
		||||
        # Make sure that the element is free of <a href> links and <span class='algoSlug_icon'>
 | 
			
		||||
        content = eval_xpath(result, '(.//p)[1]')
 | 
			
		||||
        for p in content:
 | 
			
		||||
            for e in p.xpath('.//a'):
 | 
			
		||||
                e.getparent().remove(e)
 | 
			
		||||
            for e in p.xpath('.//span[@class="algoSlug_icon"]'):
 | 
			
		||||
                e.getparent().remove(e)
 | 
			
		||||
        content = extract_text(content)
 | 
			
		||||
 | 
			
		||||
        # get the real URL either using the URL shown to user or following the Bing URL
 | 
			
		||||
        if url.startswith('https://www.bing.com/ck/a?'):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user