mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	Merge pull request #249 from dalf/master
[fix] update yahoo engine according to the web site changes
This commit is contained in:
		
						commit
						7f7f10bb6f
					
				@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/'
 | 
			
		||||
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
 | 
			
		||||
 | 
			
		||||
# specific xpath variables
 | 
			
		||||
results_xpath = '//div[@class="res"]'
 | 
			
		||||
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
 | 
			
		||||
url_xpath = './/h3/a/@href'
 | 
			
		||||
title_xpath = './/h3/a'
 | 
			
		||||
content_xpath = './/div[@class="abstr"]'
 | 
			
		||||
suggestion_xpath = '//div[@id="satat"]//a'
 | 
			
		||||
content_xpath = './/div[@class="compText aAbs"]'
 | 
			
		||||
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# remove yahoo-specific tracking-url
 | 
			
		||||
@ -91,11 +91,12 @@ def response(resp):
 | 
			
		||||
                        'content': content})
 | 
			
		||||
 | 
			
		||||
    # if no suggestion found, return results
 | 
			
		||||
    if not dom.xpath(suggestion_xpath):
 | 
			
		||||
    suggestions = dom.xpath(suggestion_xpath)
 | 
			
		||||
    if not suggestions:
 | 
			
		||||
        return results
 | 
			
		||||
 | 
			
		||||
    # parse suggestion
 | 
			
		||||
    for suggestion in dom.xpath(suggestion_xpath):
 | 
			
		||||
    for suggestion in suggestions:
 | 
			
		||||
        # append suggestion
 | 
			
		||||
        results.append({'suggestion': extract_text(suggestion)})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase):
 | 
			
		||||
        self.assertEqual(yahoo.response(response), [])
 | 
			
		||||
 | 
			
		||||
        html = """
 | 
			
		||||
        <div class="res">
 | 
			
		||||
            <div>
 | 
			
		||||
                <h3>
 | 
			
		||||
                <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
 | 
			
		||||
                    _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
 | 
			
		||||
                    /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
 | 
			
		||||
                    <b>This</b> is the title
 | 
			
		||||
                </a>
 | 
			
		||||
<ol class="reg mb-15 searchCenterMiddle">
 | 
			
		||||
    <li class="first">
 | 
			
		||||
        <div class="dd algo fst Sr">
 | 
			
		||||
            <div class="compTitle">
 | 
			
		||||
                <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
 | 
			
		||||
                     _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
 | 
			
		||||
                     /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
 | 
			
		||||
                     target="_blank" data-bid="54e712e13671c">
 | 
			
		||||
                     <b><b>This is the title</b></b></a>
 | 
			
		||||
                </h3>
 | 
			
		||||
            </div>
 | 
			
		||||
            <span class="url" dir="ltr">www.<b>test</b>.com</span>
 | 
			
		||||
            <div class="abstr">
 | 
			
		||||
                <b>This</b> is the content
 | 
			
		||||
            <div class="compText aAbs">
 | 
			
		||||
                <p class="lh-18"><b><b>This is the </b>content</b>
 | 
			
		||||
                </p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
        <div id="satat"  data-bns="Yahoo" data-bk="124.1">
 | 
			
		||||
            <h2>Also Try</h2>
 | 
			
		||||
            <table>
 | 
			
		||||
                <tbody>
 | 
			
		||||
                    <tr>
 | 
			
		||||
                        <td>
 | 
			
		||||
                            <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
 | 
			
		||||
                                <span>
 | 
			
		||||
                                    <b></b>This is <b>the suggestion</b>
 | 
			
		||||
                                </span>
 | 
			
		||||
                            </a>
 | 
			
		||||
                        </td>
 | 
			
		||||
                    </tr>
 | 
			
		||||
                </tbody>
 | 
			
		||||
            </table>
 | 
			
		||||
    </li>
 | 
			
		||||
    <li>
 | 
			
		||||
        <div class="dd algo lst Sr">
 | 
			
		||||
            <div class="compTitle">
 | 
			
		||||
                <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA;
 | 
			
		||||
                     _ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10
 | 
			
		||||
                     /RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-"
 | 
			
		||||
                     target="_blank" data-bid="54e712e136926">
 | 
			
		||||
                     This is the second <b><b>title</b></b></a>
 | 
			
		||||
                </h3>
 | 
			
		||||
            </div>
 | 
			
		||||
            <div class="compText aAbs">
 | 
			
		||||
                <p class="lh-18">This is the second content</p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
    </li>
 | 
			
		||||
</ol>
 | 
			
		||||
<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
 | 
			
		||||
    <div class="compTitle mb-4 h-17">
 | 
			
		||||
        <h3 class="title">Also Try</h3> </div>
 | 
			
		||||
    <table class="compTable m-0 ac-1st td-u fz-ms">
 | 
			
		||||
        <tbody>
 | 
			
		||||
            <tr>
 | 
			
		||||
                <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
 | 
			
		||||
                </td>
 | 
			
		||||
            </tr>
 | 
			
		||||
    </table>
 | 
			
		||||
</div>
 | 
			
		||||
        """
 | 
			
		||||
        response = mock.Mock(text=html)
 | 
			
		||||
        results = yahoo.response(response)
 | 
			
		||||
        print results
 | 
			
		||||
        self.assertEqual(type(results), list)
 | 
			
		||||
        self.assertEqual(len(results), 2)
 | 
			
		||||
        self.assertEqual(len(results), 3)
 | 
			
		||||
        self.assertEqual(results[0]['title'], 'This is the title')
 | 
			
		||||
        self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
 | 
			
		||||
        self.assertEqual(results[0]['content'], 'This is the content')
 | 
			
		||||
        self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
 | 
			
		||||
        self.assertEqual(results[1]['title'], 'This is the second title')
 | 
			
		||||
        self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/')
 | 
			
		||||
        self.assertEqual(results[1]['content'], 'This is the second content')
 | 
			
		||||
        self.assertEqual(results[2]['suggestion'], 'This is the suggestion')
 | 
			
		||||
 | 
			
		||||
        html = """
 | 
			
		||||
        <div class="res">
 | 
			
		||||
            <div>
 | 
			
		||||
                <h3>
 | 
			
		||||
                <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
 | 
			
		||||
                    _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
 | 
			
		||||
                    /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
 | 
			
		||||
                    <b>This</b> is the title
 | 
			
		||||
                </a>
 | 
			
		||||
<ol class="reg mb-15 searchCenterMiddle">
 | 
			
		||||
    <li class="first">
 | 
			
		||||
        <div class="dd algo fst Sr">
 | 
			
		||||
            <div class="compTitle">
 | 
			
		||||
                <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
 | 
			
		||||
                     _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
 | 
			
		||||
                     /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
 | 
			
		||||
                     target="_blank" data-bid="54e712e13671c">
 | 
			
		||||
                  <b><b>This is the title</b></b></a>
 | 
			
		||||
                </h3>
 | 
			
		||||
            </div>
 | 
			
		||||
            <span class="url" dir="ltr">www.<b>test</b>.com</span>
 | 
			
		||||
            <div class="abstr">
 | 
			
		||||
                <b>This</b> is the content
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
        <div class="res">
 | 
			
		||||
            <div>
 | 
			
		||||
                <h3>
 | 
			
		||||
                <a id="link-1" class="yschttl spt">
 | 
			
		||||
                    <b>This</b> is the title
 | 
			
		||||
                </a>
 | 
			
		||||
                </h3>
 | 
			
		||||
            </div>
 | 
			
		||||
            <span class="url" dir="ltr">www.<b>test</b>.com</span>
 | 
			
		||||
            <div class="abstr">
 | 
			
		||||
                <b>This</b> is the content
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
        <div class="res">
 | 
			
		||||
            <div>
 | 
			
		||||
                <h3>
 | 
			
		||||
                </h3>
 | 
			
		||||
            </div>
 | 
			
		||||
            <span class="url" dir="ltr">www.<b>test</b>.com</span>
 | 
			
		||||
            <div class="abstr">
 | 
			
		||||
                <b>This</b> is the content
 | 
			
		||||
            <div class="compText aAbs">
 | 
			
		||||
                <p class="lh-18"><b><b>This is the </b>content</b>
 | 
			
		||||
                </p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
    </li>
 | 
			
		||||
</ol>
 | 
			
		||||
        """
 | 
			
		||||
        response = mock.Mock(text=html)
 | 
			
		||||
        results = yahoo.response(response)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user