mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	Merge branch 'master' into simple
This commit is contained in:
		
						commit
						ac59f5e168
					
				
							
								
								
									
										96
									
								
								searx/engines/bing_videos.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								searx/engines/bing_videos.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,96 @@
 | 
			
		||||
"""
 | 
			
		||||
 Bing (Videos)
 | 
			
		||||
 | 
			
		||||
 @website     https://www.bing.com/videos
 | 
			
		||||
 @provide-api yes (http://datamarket.azure.com/dataset/bing/search)
 | 
			
		||||
 | 
			
		||||
 @using-api   no
 | 
			
		||||
 @results     HTML
 | 
			
		||||
 @stable      no
 | 
			
		||||
 @parse       url, title, content, thumbnail
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from json import loads
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
from searx.url_utils import urlencode
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
categories = ['videos']
 | 
			
		||||
paging = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
number_of_results = 10
 | 
			
		||||
 | 
			
		||||
search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\
 | 
			
		||||
             'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5'
 | 
			
		||||
time_range_string = '&qft=+filterui:videoage-lt{interval}'
 | 
			
		||||
time_range_dict = {'day': '1440',
 | 
			
		||||
                   'week': '10080',
 | 
			
		||||
                   'month': '43200',
 | 
			
		||||
                   'year': '525600'}
 | 
			
		||||
 | 
			
		||||
# safesearch definitions
 | 
			
		||||
safesearch_types = {2: 'STRICT',
 | 
			
		||||
                    1: 'DEMOTE',
 | 
			
		||||
                    0: 'OFF'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# do search-request
 | 
			
		||||
def request(query, params):
 | 
			
		||||
    offset = (params['pageno'] - 1) * 10 + 1
 | 
			
		||||
 | 
			
		||||
    # safesearch cookie
 | 
			
		||||
    params['cookies']['SRCHHPGUSR'] = \
 | 
			
		||||
        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
 | 
			
		||||
 | 
			
		||||
    # language cookie
 | 
			
		||||
    params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1'
 | 
			
		||||
 | 
			
		||||
    # query and paging
 | 
			
		||||
    params['url'] = search_url.format(query=urlencode({'q': query}),
 | 
			
		||||
                                      offset=offset,
 | 
			
		||||
                                      number_of_results=number_of_results)
 | 
			
		||||
 | 
			
		||||
    # time range
 | 
			
		||||
    if params['time_range'] in time_range_dict:
 | 
			
		||||
        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
 | 
			
		||||
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# get response from search-request
 | 
			
		||||
def response(resp):
 | 
			
		||||
    results = []
 | 
			
		||||
 | 
			
		||||
    dom = html.fromstring(resp.text)
 | 
			
		||||
 | 
			
		||||
    for result in dom.xpath('//div[@class="dg_u"]'):
 | 
			
		||||
 | 
			
		||||
        # try to extract the url
 | 
			
		||||
        url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload')
 | 
			
		||||
        if len(url_container) > 0:
 | 
			
		||||
            url = loads(url_container[0])['purl']
 | 
			
		||||
        else:
 | 
			
		||||
            url = result.xpath('./a/@href')[0]
 | 
			
		||||
 | 
			
		||||
            # discard results that do not return an external url
 | 
			
		||||
            # very recent results sometimes don't return the video's url
 | 
			
		||||
            if url.startswith('/videos/search?'):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        title = extract_text(result.xpath('./a//div[@class="tl"]'))
 | 
			
		||||
        content = extract_text(result.xpath('.//div[@class="pubInfo"]'))
 | 
			
		||||
        thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0]
 | 
			
		||||
 | 
			
		||||
        results.append({'url': url,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
                        'content': content,
 | 
			
		||||
                        'thumbnail': thumbnail,
 | 
			
		||||
                        'template': 'videos.html'})
 | 
			
		||||
 | 
			
		||||
        # first page ignores requested number of results
 | 
			
		||||
        if len(results) >= number_of_results:
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
@ -81,6 +81,10 @@ engines:
 | 
			
		||||
    engine : bing_news
 | 
			
		||||
    shortcut : bin
 | 
			
		||||
 | 
			
		||||
  - name : bing videos
 | 
			
		||||
    engine : bing_videos
 | 
			
		||||
    shortcut : biv
 | 
			
		||||
 | 
			
		||||
  - name : bitbucket
 | 
			
		||||
    engine : xpath
 | 
			
		||||
    paging : True
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										131
									
								
								tests/unit/engines/test_bing_videos.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								tests/unit/engines/test_bing_videos.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,131 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
import mock
 | 
			
		||||
from searx.engines import bing_videos
 | 
			
		||||
from searx.testing import SearxTestCase
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestBingVideosEngine(SearxTestCase):
 | 
			
		||||
 | 
			
		||||
    def test_request(self):
 | 
			
		||||
        query = 'test_query'
 | 
			
		||||
        dicto = defaultdict(dict)
 | 
			
		||||
        dicto['pageno'] = 1
 | 
			
		||||
        dicto['language'] = 'fr-FR'
 | 
			
		||||
        dicto['safesearch'] = 0
 | 
			
		||||
        dicto['time_range'] = ''
 | 
			
		||||
        params = bing_videos.request(query, dicto)
 | 
			
		||||
        self.assertTrue('url' in params)
 | 
			
		||||
        self.assertTrue(query in params['url'])
 | 
			
		||||
        self.assertTrue('bing.com' in params['url'])
 | 
			
		||||
        self.assertTrue('SRCHHPGUSR' in params['cookies'])
 | 
			
		||||
        self.assertTrue('OFF' in params['cookies']['SRCHHPGUSR'])
 | 
			
		||||
        self.assertTrue('_EDGE_S' in params['cookies'])
 | 
			
		||||
        self.assertTrue('fr-fr' in params['cookies']['_EDGE_S'])
 | 
			
		||||
 | 
			
		||||
        dicto['pageno'] = 2
 | 
			
		||||
        dicto['time_range'] = 'day'
 | 
			
		||||
        dicto['safesearch'] = 2
 | 
			
		||||
        params = bing_videos.request(query, dicto)
 | 
			
		||||
        self.assertTrue('first=11' in params['url'])
 | 
			
		||||
        self.assertTrue('1440' in params['url'])
 | 
			
		||||
        self.assertIn('SRCHHPGUSR', params['cookies'])
 | 
			
		||||
        self.assertTrue('STRICT' in params['cookies']['SRCHHPGUSR'])
 | 
			
		||||
 | 
			
		||||
    def test_response(self):
 | 
			
		||||
        self.assertRaises(AttributeError, bing_videos.response, None)
 | 
			
		||||
        self.assertRaises(AttributeError, bing_videos.response, [])
 | 
			
		||||
        self.assertRaises(AttributeError, bing_videos.response, '')
 | 
			
		||||
        self.assertRaises(AttributeError, bing_videos.response, '[]')
 | 
			
		||||
 | 
			
		||||
        response = mock.Mock(text='<html></html>')
 | 
			
		||||
        self.assertEqual(bing_videos.response(response), [])
 | 
			
		||||
 | 
			
		||||
        response = mock.Mock(text='<html></html>')
 | 
			
		||||
        self.assertEqual(bing_videos.response(response), [])
 | 
			
		||||
 | 
			
		||||
        html = """
 | 
			
		||||
        <div>
 | 
			
		||||
            <div class="dg_u">
 | 
			
		||||
                <a class="dv_i" href="/videos/search?abcde">
 | 
			
		||||
                    <div class="vthblock">
 | 
			
		||||
                        <div class="vthumb">
 | 
			
		||||
                            <img src="thumb_1.jpg" />
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div>
 | 
			
		||||
                            <div class="tl">
 | 
			
		||||
                                Title 1
 | 
			
		||||
                            </div>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="videoInfoPanel">
 | 
			
		||||
                        <div class="pubInfo">
 | 
			
		||||
                            <div>Content 1</div>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </a>
 | 
			
		||||
                <div class="sa_wrapper"
 | 
			
		||||
                    data-eventpayload="{"purl": "https://url.com/1"}">
 | 
			
		||||
                </div>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
        """
 | 
			
		||||
        response = mock.Mock(text=html)
 | 
			
		||||
        results = bing_videos.response(response)
 | 
			
		||||
        self.assertEqual(type(results), list)
 | 
			
		||||
        self.assertEqual(len(results), 1)
 | 
			
		||||
        self.assertEqual(results[0]['title'], 'Title 1')
 | 
			
		||||
        self.assertEqual(results[0]['url'], 'https://url.com/1')
 | 
			
		||||
        self.assertEqual(results[0]['content'], 'Content 1')
 | 
			
		||||
        self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg')
 | 
			
		||||
 | 
			
		||||
        html = """
 | 
			
		||||
        <div>
 | 
			
		||||
            <div class="dg_u">
 | 
			
		||||
                <a class="dv_i" href="https://url.com/1">
 | 
			
		||||
                    <div class="vthblock">
 | 
			
		||||
                        <div class="vthumb">
 | 
			
		||||
                            <img src="thumb_1.jpg" />
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div>
 | 
			
		||||
                            <div class="tl">
 | 
			
		||||
                                Title 1
 | 
			
		||||
                            </div>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="videoInfoPanel">
 | 
			
		||||
                        <div class="pubInfo">
 | 
			
		||||
                            <div>Content 1</div>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </a>
 | 
			
		||||
            </div>
 | 
			
		||||
            <div class="dg_u">
 | 
			
		||||
                <a class="dv_i" href="/videos/search?abcde">
 | 
			
		||||
                    <div class="vthblock">
 | 
			
		||||
                        <div class="vthumb">
 | 
			
		||||
                            <img src="thumb_2.jpg" />
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div>
 | 
			
		||||
                            <div class="tl">
 | 
			
		||||
                                Title 2
 | 
			
		||||
                            </div>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="videoInfoPanel">
 | 
			
		||||
                        <div class="pubInfo">
 | 
			
		||||
                            <div>Content 2</div>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </a>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
        """
 | 
			
		||||
        response = mock.Mock(text=html)
 | 
			
		||||
        results = bing_videos.response(response)
 | 
			
		||||
        self.assertEqual(type(results), list)
 | 
			
		||||
        self.assertEqual(len(results), 1)
 | 
			
		||||
        self.assertEqual(results[0]['title'], 'Title 1')
 | 
			
		||||
        self.assertEqual(results[0]['url'], 'https://url.com/1')
 | 
			
		||||
        self.assertEqual(results[0]['content'], 'Content 1')
 | 
			
		||||
        self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg')
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user