mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 10:37:06 -04:00 
			
		
		
		
	
						commit
						80df181575
					
				
							
								
								
									
										72
									
								
								searx/engines/framalibre.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								searx/engines/framalibre.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,72 @@ | ||||
| """ | ||||
|  FramaLibre (It) | ||||
| 
 | ||||
|  @website     https://framalibre.org/ | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content, thumbnail, img_src | ||||
| """ | ||||
| 
 | ||||
| from urlparse import urljoin | ||||
| from cgi import escape | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from dateutil import parser | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://framalibre.org/' | ||||
| search_url = base_url + 'recherche-par-crit-res?{query}&page={offset}' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| results_xpath = '//div[@class="nodes-list-row"]/div[contains(@typeof,"sioc:Item")]' | ||||
| link_xpath = './/h3[@class="node-title"]/a[@href]' | ||||
| thumbnail_xpath = './/img[@class="media-object img-responsive"]/@src' | ||||
| content_xpath = './/div[@class="content"]//p' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) | ||||
|     params['url'] = search_url.format(query=urlencode({'keys': query}), | ||||
|                                       offset=offset) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath(results_xpath): | ||||
|         link = result.xpath(link_xpath)[0] | ||||
|         href = urljoin(base_url, link.attrib.get('href')) | ||||
|         # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this... | ||||
|         title = escape(extract_text(link)) | ||||
|         thumbnail_tags = result.xpath(thumbnail_xpath) | ||||
|         thumbnail = None | ||||
|         if len(thumbnail_tags) > 0: | ||||
|             thumbnail = extract_text(thumbnail_tags[0]) | ||||
|             if thumbnail[0] == '/': | ||||
|                 thumbnail = base_url + thumbnail | ||||
|         content = escape(extract_text(result.xpath(content_xpath))) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': href, | ||||
|                         'title': title, | ||||
|                         'thumbnail': thumbnail, | ||||
|                         'img_src': thumbnail, | ||||
|                         'content': content}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
| @ -465,6 +465,11 @@ engines: | ||||
|     shortcut : scc | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : framalibre | ||||
|     engine : framalibre | ||||
|     shortcut : frl | ||||
|     disabled : True | ||||
| 
 | ||||
| #  - name : searx | ||||
| #    engine : searx_engine | ||||
| #    shortcut : se | ||||
|  | ||||
							
								
								
									
										103
									
								
								tests/unit/engines/test_framalibre.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								tests/unit/engines/test_framalibre.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,103 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import framalibre | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestFramalibreEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 0 | ||||
|         params = framalibre.request(query, dicto) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('framalibre.org' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, framalibre.response, None) | ||||
|         self.assertRaises(AttributeError, framalibre.response, []) | ||||
|         self.assertRaises(AttributeError, framalibre.response, '') | ||||
|         self.assertRaises(AttributeError, framalibre.response, '[]') | ||||
| 
 | ||||
|         response = mock.Mock(text='{}') | ||||
|         self.assertEqual(framalibre.response(response), []) | ||||
| 
 | ||||
|         response = mock.Mock(text='{"data": []}') | ||||
|         self.assertEqual(framalibre.response(response), []) | ||||
| 
 | ||||
|         html = u""" | ||||
|         <div class="nodes-list-row"> | ||||
|           <div id="node-431" | ||||
|               class="node node-logiciel-annuaires node-promoted node-teaser node-teaser node-sheet clearfix nodes-list" | ||||
|               about="/content/gogs" typeof="sioc:Item foaf:Document"> | ||||
|             <header class="media"> | ||||
|               <div class="media-left"> | ||||
|                 <div class="field field-name-field-logo field-type-image field-label-hidden"> | ||||
|                   <div class="field-items"> | ||||
|                     <div class="field-item even"> | ||||
|                       <a href="/content/gogs"> | ||||
|                         <img class="media-object img-responsive" typeof="foaf:Image" | ||||
|  src="https://framalibre.org/sites/default/files/styles/teaser_logo/public/leslogos/gogs-lg.png?itok=rrCxKKBy" | ||||
|  width="70" height="70" alt="" /> | ||||
|                       </a> | ||||
|                     </div> | ||||
|                   </div> | ||||
|                 </div> | ||||
|               </div> | ||||
|               <div class="media-body"> | ||||
|                 <h3 class="node-title"><a href="/content/gogs">Gogs</a></h3> | ||||
|                 <span property="dc:title" content="Gogs" class="rdf-meta element-hidden"></span> | ||||
|                 <div class="field field-name-field-annuaires field-type-taxonomy-term-reference field-label-hidden"> | ||||
|                   <div class="field-items"> | ||||
|                     <div class="field-item even"> | ||||
|                       <a href="/annuaires/cloudwebapps" | ||||
|  typeof="skos:Concept" property="rdfs:label skos:prefLabel" | ||||
|  datatype="" class="label label-primary">Cloud/webApps</a> | ||||
|                     </div> | ||||
|                   </div> | ||||
|                 </div> | ||||
|               </div> | ||||
|             </header> | ||||
|             <div class="content"> | ||||
|               <div class="field field-name-field-votre-appr-ciation field-type-fivestar field-label-hidden"> | ||||
|                 <div class="field-items"> | ||||
|                   <div class="field-item even"> | ||||
|                   </div> | ||||
|                 </div> | ||||
|               </div> | ||||
|               <div class="field field-name-body field-type-text-with-summary field-label-hidden"> | ||||
|                 <div class="field-items"> | ||||
|                   <div class="field-item even" property="content:encoded"> | ||||
|                     <p>Gogs est une interface web basée sur git et une bonne alternative à GitHub.</p> | ||||
|                   </div> | ||||
|                 </div> | ||||
|               </div> | ||||
|             </div> | ||||
|             <footer> | ||||
|               <a href="/content/gogs" class="read-more btn btn-default btn-sm">Voir la notice</a> | ||||
|               <div class="field field-name-field-lien-officiel field-type-link-field field-label-hidden"> | ||||
|                 <div class="field-items"> | ||||
|                   <div class="field-item even"> | ||||
|                     <a href="https://gogs.io/" target="_blank" title="Voir le site officiel"> | ||||
|                       <span class="glyphicon glyphicon-globe"></span> | ||||
|                       <span class="sr-only">Lien officiel</span> | ||||
|                     </a> | ||||
|                   </div> | ||||
|                 </div> | ||||
|               </div> | ||||
|             </footer> | ||||
|           </div> | ||||
|         </div> | ||||
|         """ | ||||
|         response = mock.Mock(text=html) | ||||
|         results = framalibre.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(results[0]['title'], 'Gogs') | ||||
|         self.assertEqual(results[0]['url'], | ||||
|                          'https://framalibre.org/content/gogs') | ||||
|         self.assertEqual(results[0]['content'], | ||||
|                          u"Gogs est une interface web basée sur git et une bonne alternative à GitHub.") | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user