mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-30 18:22:31 -04:00 
			
		
		
		
	[fix] bing images
This commit is contained in:
		
							parent
							
								
									c0bb89fd46
								
							
						
					
					
						commit
						9ee8e552da
					
				| @ -31,7 +31,6 @@ time_range_support = True | ||||
| base_url = 'https://www.bing.com/' | ||||
| search_string = 'images/search?{query}&count=10&first={offset}' | ||||
| time_range_string = '&qft=+filterui:age-lt{interval}' | ||||
| thumb_url = "https://www.bing.com/th?id={ihk}" | ||||
| time_range_dict = {'day': '1440', | ||||
|                    'week': '10080', | ||||
|                    'month': '43200', | ||||
| @ -78,30 +77,32 @@ def response(resp): | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath('//div[@class="dg_u"]/div'): | ||||
|     for result in dom.xpath('//div[@id="mmComponent_images_1"]/ul/li/div/div[@class="imgpt"]'): | ||||
|         link = result.xpath('./a')[0] | ||||
| 
 | ||||
|         # TODO find actual title | ||||
|         title = link.xpath('.//img/@alt')[0] | ||||
| 
 | ||||
|         # parse json-data (it is required to add a space, to make it parsable) | ||||
|         json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('m'))) | ||||
| 
 | ||||
|         title = link.attrib.get('t1') | ||||
|         ihk = link.attrib.get('ihk') | ||||
|         url = json_data.get('purl') | ||||
|         img_src = json_data.get('murl') | ||||
| 
 | ||||
|         # url = 'http://' + link.attrib.get('t3') | ||||
|         url = json_data.get('surl') | ||||
|         img_src = json_data.get('imgurl') | ||||
|         thumb_json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('mad'))) | ||||
|         thumbnail = thumb_json_data.get('turl') | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'template': 'images.html', | ||||
|                         'url': url, | ||||
|                         'title': title, | ||||
|                         'content': '', | ||||
|                         'thumbnail_src': thumb_url.format(ihk=ihk), | ||||
|                         'thumbnail_src': thumbnail, | ||||
|                         'img_src': img_src}) | ||||
| 
 | ||||
|         # TODO stop parsing if 10 images are found | ||||
|         if len(results) >= 10: | ||||
|             break | ||||
|         # if len(results) >= 10: | ||||
|         #     break | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
|  | ||||
| @ -39,85 +39,46 @@ class TestBingImagesEngine(SearxTestCase): | ||||
|         self.assertEqual(bing_images.response(response), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> | ||||
|             <a href="/images/search?q=south&view=detailv2&&id=7E92863981CCFB89FBDD55205C742DFDA3290CF6&selectedIndex=9&ccid=vzvIfv5u&simid=608055786735667000&thid=OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0" ihk="OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0" t1="South Carolina" t2="747 x 589 · 29 kB · gif" t3="www.digital-topo-maps.com/county-map/south-carolina.shtml" hh="236" hw="300" m='{ns:"images",k:"5117",mid:"7E92863981CCFB89FBDD55205C742DFDA3290CF6",md5:"bf3bc87efe6e0e476be8cc34bf6cd80e",surl:"http://www.digital-topo-maps.com/county-map/south-carolina.shtml",imgurl:"http://www.digital-topo-maps.com/county-map/south-carolina-county-map.gif",tid:"OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0",ow:"480",docid:"608055786735667000",oh:"378",tft:"45"}' mid="7E92863981CCFB89FBDD55205C742DFDA3290CF6" h="ID=images,5117.1"> | ||||
|                 <img class="img_hid" src2="https://tse4.mm.bing.net/th?id=OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0&w=210&h=154&c=7&rs=1&qlt=90&o=4&pid=1.1" style="width:210px;height:154px;" width="210" height="154"> | ||||
|             </a> | ||||
| 
 | ||||
|         </div></div> | ||||
|         """  # noqa | ||||
|         html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') | ||||
|         response = mock.Mock(text=html) | ||||
|         results = bing_images.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(results[0]['title'], 'South Carolina') | ||||
|         self.assertEqual(results[0]['url'], | ||||
|                          'http://www.digital-topo-maps.com/county-map/south-carolina.shtml') | ||||
|         self.assertEqual(results[0]['content'], '') | ||||
|         self.assertEqual(results[0]['thumbnail_src'], | ||||
|                          'https://www.bing.com/th?id=OIP.Mbf3bc87efe6e0e476be8cc34bf6cd80eH0') | ||||
|         self.assertEqual(results[0]['img_src'], | ||||
|                          'http://www.digital-topo-maps.com/county-map/south-carolina-county-map.gif') | ||||
| 
 | ||||
|         html = """ | ||||
|         <a href="#" ihk="HN.608003696942779811" | ||||
|             m="{ns:"images",k:"5045", | ||||
|             mid:"59EB92C317974F34517A1CCAEBEF76A578E08DEE", | ||||
|             surl:"http://www.page.url/", | ||||
|             imgurl:"http://test.url/Test%20Query.jpg",oh:"238", | ||||
|             tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" | ||||
|             mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" | ||||
|             t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> | ||||
|             <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" | ||||
|             style="height:144px;" width="178" height="144"/> | ||||
|         </a> | ||||
|         """ | ||||
|         response = mock.Mock(text=html) | ||||
|         results = bing_images.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 0) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> | ||||
|             <a href="#" ihk="HN.608003696942779811" | ||||
|                 m="{ns:"images",k:"5045", | ||||
| mid:"659EB92C317974F34517A1CCAEBEF76A578E08DEE", | ||||
| surl:"http://www.page.url/",imgurl:"http://test.url/Test%20Query.jpg", | ||||
| oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" | ||||
|                 mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" | ||||
|                 t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> | ||||
|                 <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" | ||||
|                 style="height:144px;" width="178" height="144"/> | ||||
|             </a> | ||||
|         </div></div> | ||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> | ||||
|             <a href="#" ihk="HN.608003696942779811" | ||||
|                 m="{ns:"images",k:"5045", | ||||
| mid:"659EB92C317974F34517A1CCAEBEF76A578E08DEE", | ||||
| surl:"http://www.page.url/",imgurl:"http://test.url/Test%20Query.jpg", | ||||
| oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" | ||||
|                 mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" | ||||
|                 t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> | ||||
|                 <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" | ||||
|                 style="height:144px;" width="178" height="144"/> | ||||
|             </a> | ||||
|         </div></div> | ||||
|         <div class="dg_u" style="width:178px;height:144px;left:17px;top:0px"><div> | ||||
|             <a href="#" ihk="HN.608003696942779811" | ||||
|                 m="{ns:"images",k:"5045", | ||||
| mid:"659EB92C317974F34517A1CCAEBEF76A578E08DEE", | ||||
| surl:"http://www.page.url/",imgurl:"http://test.url/Test%20Query.jpg", | ||||
| oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%20Query.jpg"}" | ||||
|                 mid="59EB92C317974F34517A1CCAEBEF76A578E08DEE" onclick="return false;" | ||||
|                 t1="Test Query" t2="650 x 517 · 31 kB · jpeg" t3="www.short.url" h="ID=images,5045.1"> | ||||
|                 <img src="https://tse4.mm.bing.net/th?id=HN.608003696942779811&o=4&pid=1.7" | ||||
|                 style="height:144px;" width="178" height="144"/> | ||||
|             </a> | ||||
|         </div></div> | ||||
|         <div id="mmComponent_images_1"> | ||||
|             <ul> | ||||
|                 <li> | ||||
|                     <div> | ||||
|                         <div class="imgpt"> | ||||
|                             <a m='{"purl":"page_url","murl":"img_url"}' mad='{"turl":"thumb_url"}'> | ||||
|                                 <img src="" alt="alt text" /> | ||||
|                             </a> | ||||
|                         </div> | ||||
|                         <div></div> | ||||
|                     </div> | ||||
|                     <div> | ||||
|                         <div class="imgpt"> | ||||
|                             <a m='{"purl":"page_url2","murl":"img_url2"}' mad='{"turl":"thumb_url2"}'> | ||||
|                                 <img src="" alt="alt text 2" /> | ||||
|                             </a> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </li> | ||||
|             </ul> | ||||
|             <ul> | ||||
|                 <li> | ||||
|                     <div> | ||||
|                         <div class="imgpt"> | ||||
|                             <a m='{"purl":"page_url3","murl":"img_url3"}' mad='{"turl":"thumb_url3"}'> | ||||
|                                 <img src="" alt="alt text 3" /> | ||||
|                             </a> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </li> | ||||
|             </ul> | ||||
|         </div> | ||||
|         """ | ||||
|         html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') | ||||
|         response = mock.Mock(text=html) | ||||
|         results = bing_images.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 3) | ||||
|         self.assertEqual(results[0]['title'], 'alt text') | ||||
|         self.assertEqual(results[0]['url'], 'page_url') | ||||
|         self.assertEqual(results[0]['content'], '') | ||||
|         self.assertEqual(results[0]['thumbnail_src'], 'thumb_url') | ||||
|         self.assertEqual(results[0]['img_src'], 'img_url') | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user