mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 02:27:06 -04:00 
			
		
		
		
	Add Nyaa.se search engine
This commit is contained in:
		
							parent
							
								
									d748b8419a
								
							
						
					
					
						commit
						e5677ae6b6
					
				
							
								
								
									
										115
									
								
								searx/engines/nyaa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								searx/engines/nyaa.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,115 @@ | |||||||
|  | """ | ||||||
|  |  Nyaa.se (Anime Bittorrent tracker) | ||||||
|  | 
 | ||||||
|  |  @website      http://www.nyaa.se/ | ||||||
|  |  @provide-api  no | ||||||
|  |  @using-api    no | ||||||
|  |  @results      HTML | ||||||
|  |  @stable       no (HTML can change) | ||||||
|  |  @parse        url, title, content, seed, leech, torrentfile | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from cgi import escape | ||||||
|  | from urllib import urlencode | ||||||
|  | from lxml import html | ||||||
|  | from searx.engines.xpath import extract_text | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['files', 'images', 'videos', 'music'] | ||||||
|  | paging = True | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'http://www.nyaa.se/' | ||||||
|  | search_url = base_url + '?page=search&{query}&offset={offset}' | ||||||
|  | 
 | ||||||
|  | # xpath queries | ||||||
|  | xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' | ||||||
|  | xpath_category = './/td[@class="tlisticon"]/a' | ||||||
|  | xpath_title = './/td[@class="tlistname"]/a' | ||||||
|  | xpath_torrent_file = './/td[@class="tlistdownload"]/a' | ||||||
|  | xpath_filesize = './/td[@class="tlistsize"]/text()' | ||||||
|  | xpath_seeds = './/td[@class="tlistsn"]/text()' | ||||||
|  | xpath_leeches = './/td[@class="tlistln"]/text()' | ||||||
|  | xpath_downloads = './/td[@class="tlistdn"]/text()' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # convert a variable to integer or return 0 if it's not a number | ||||||
|  | def int_or_zero(num): | ||||||
|  |     if isinstance(num, list): | ||||||
|  |         if len(num) < 1: | ||||||
|  |             return 0 | ||||||
|  |         num = num[0] | ||||||
|  |     if num.isdigit(): | ||||||
|  |         return int(num) | ||||||
|  |     return 0 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     query = urlencode({'term': query}) | ||||||
|  |     params['url'] = search_url.format(query=query, offset=params['pageno']) | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  | 
 | ||||||
|  |     for result in dom.xpath(xpath_results): | ||||||
|  |         # category in which our torrent belongs | ||||||
|  |         category = result.xpath(xpath_category)[0].attrib.get('title') | ||||||
|  | 
 | ||||||
|  |         # torrent title | ||||||
|  |         page_a = result.xpath(xpath_title)[0] | ||||||
|  |         title = escape(extract_text(page_a)) | ||||||
|  | 
 | ||||||
|  |         # link to the page | ||||||
|  |         href = page_a.attrib.get('href') | ||||||
|  | 
 | ||||||
|  |         # link to the torrent file | ||||||
|  |         torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') | ||||||
|  | 
 | ||||||
|  |         # torrent size | ||||||
|  |         try: | ||||||
|  |             file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') | ||||||
|  | 
 | ||||||
|  |             # convert torrent size to bytes. | ||||||
|  |             # if there is no correct index in this dictionary, | ||||||
|  |             # the try block fails as it should | ||||||
|  |             multiplier = { | ||||||
|  |                 'KIB': 1024, | ||||||
|  |                 'MIB': 1024 ** 2, | ||||||
|  |                 'GIB': 1024 ** 3, | ||||||
|  |                 'TIB': 1024 ** 4 | ||||||
|  |             }[suffix.upper()] | ||||||
|  | 
 | ||||||
|  |             file_size = int(float(file_size) * multiplier) | ||||||
|  |         except Exception as e: | ||||||
|  |             file_size = None | ||||||
|  | 
 | ||||||
|  |         # seed count | ||||||
|  |         seed = int_or_zero(result.xpath(xpath_seeds)) | ||||||
|  | 
 | ||||||
|  |         # leech count | ||||||
|  |         leech = int_or_zero(result.xpath(xpath_leeches)) | ||||||
|  | 
 | ||||||
|  |         # torrent downloads count | ||||||
|  |         downloads = int_or_zero(result.xpath(xpath_downloads)) | ||||||
|  | 
 | ||||||
|  |         # content string contains all information not included into template | ||||||
|  |         content = 'Category: "{category}". Downloaded {downloads} times.' | ||||||
|  |         content = content.format(category=category, downloads=downloads) | ||||||
|  |         content = escape(content) | ||||||
|  | 
 | ||||||
|  |         results.append({'url': href, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content, | ||||||
|  |                         'seed': seed, | ||||||
|  |                         'leech': leech, | ||||||
|  |                         'filesize': file_size, | ||||||
|  |                         'torrentfile': torrent_link, | ||||||
|  |                         'template': 'torrent.html'}) | ||||||
|  | 
 | ||||||
|  |     return results | ||||||
| @ -175,6 +175,10 @@ engines: | |||||||
|     engine : mixcloud |     engine : mixcloud | ||||||
|     shortcut : mc |     shortcut : mc | ||||||
| 
 | 
 | ||||||
|  |   - name : nyaa | ||||||
|  |     engine : nyaa | ||||||
|  |     shortcut : nt | ||||||
|  | 
 | ||||||
|   - name : openstreetmap |   - name : openstreetmap | ||||||
|     engine : openstreetmap |     engine : openstreetmap | ||||||
|     shortcut : osm |     shortcut : osm | ||||||
|  | |||||||
							
								
								
									
										66
									
								
								tests/unit/engines/test_nyaa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								tests/unit/engines/test_nyaa.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import nyaa | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestNyaaEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dic = defaultdict(dict) | ||||||
|  |         dic['pageno'] = 1 | ||||||
|  |         params = nyaa.request(query, dic) | ||||||
|  |         self.assertTrue('url' in params) | ||||||
|  |         self.assertTrue(query in params['url']) | ||||||
|  |         self.assertTrue('nyaa.se' in params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         resp = mock.Mock(text='<html></html>') | ||||||
|  |         self.assertEqual(nyaa.response(resp), []) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <table class="tlist"> | ||||||
|  |           <tbody> | ||||||
|  |             <tr class="trusted tlistrow"> | ||||||
|  |               <td class="tlisticon"> | ||||||
|  |                 <a href="//www.nyaa.se" title="English-translated Anime"> | ||||||
|  |                    <img src="//files.nyaa.se" alt="English-translated Anime"> | ||||||
|  |                 </a> | ||||||
|  |               </td> | ||||||
|  |               <td class="tlistname"> | ||||||
|  |                 <a href="//www.nyaa.se/?page3"> | ||||||
|  |                   Sample torrent title | ||||||
|  |                 </a> | ||||||
|  |               </td> | ||||||
|  |               <td class="tlistdownload"> | ||||||
|  |                 <a href="//www.nyaa.se/?page_dl" title="Download"> | ||||||
|  |                   <img src="//files.nyaa.se/www-dl.png" alt="DL"> | ||||||
|  |                 </a> | ||||||
|  |               </td> | ||||||
|  |               <td class="tlistsize">10 MiB</td> | ||||||
|  |               <td class="tlistsn">1</td> | ||||||
|  |               <td class="tlistln">3</td> | ||||||
|  |               <td class="tlistdn">666</td> | ||||||
|  |               <td class="tlistmn">0</td> | ||||||
|  |             </tr> | ||||||
|  |           </tbody> | ||||||
|  |         </table> | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         resp = mock.Mock(text=html) | ||||||
|  |         results = nyaa.response(resp) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  | 
 | ||||||
|  |         r = results[0] | ||||||
|  |         self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) | ||||||
|  |         self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) | ||||||
|  |         self.assertTrue(r['content'].find('English-translated Anime') >= 0) | ||||||
|  |         self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(r['title'], 'Sample torrent title') | ||||||
|  |         self.assertEqual(r['seed'], 1) | ||||||
|  |         self.assertEqual(r['leech'], 3) | ||||||
|  |         self.assertEqual(r['filesize'], 10 * 1024 * 1024) | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user