mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	
						commit
						80df181575
					
				
							
								
								
									
										72
									
								
								searx/engines/framalibre.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								searx/engines/framalibre.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,72 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					 FramaLibre (It)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @website     https://framalibre.org/
 | 
				
			||||||
 | 
					 @provide-api no
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @using-api   no
 | 
				
			||||||
 | 
					 @results     HTML
 | 
				
			||||||
 | 
					 @stable      no (HTML can change)
 | 
				
			||||||
 | 
					 @parse       url, title, content, thumbnail, img_src
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from urlparse import urljoin
 | 
				
			||||||
 | 
					from cgi import escape
 | 
				
			||||||
 | 
					from urllib import urlencode
 | 
				
			||||||
 | 
					from lxml import html
 | 
				
			||||||
 | 
					from searx.engines.xpath import extract_text
 | 
				
			||||||
 | 
					from dateutil import parser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# engine dependent config
 | 
				
			||||||
 | 
					categories = ['it']
 | 
				
			||||||
 | 
					paging = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# search-url
 | 
				
			||||||
 | 
					base_url = 'https://framalibre.org/'
 | 
				
			||||||
 | 
					search_url = base_url + 'recherche-par-crit-res?{query}&page={offset}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# specific xpath variables
 | 
				
			||||||
 | 
					results_xpath = '//div[@class="nodes-list-row"]/div[contains(@typeof,"sioc:Item")]'
 | 
				
			||||||
 | 
					link_xpath = './/h3[@class="node-title"]/a[@href]'
 | 
				
			||||||
 | 
					thumbnail_xpath = './/img[@class="media-object img-responsive"]/@src'
 | 
				
			||||||
 | 
					content_xpath = './/div[@class="content"]//p'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# do search-request
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    offset = (params['pageno'] - 1)
 | 
				
			||||||
 | 
					    params['url'] = search_url.format(query=urlencode({'keys': query}),
 | 
				
			||||||
 | 
					                                      offset=offset)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get response from search-request
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # parse results
 | 
				
			||||||
 | 
					    for result in dom.xpath(results_xpath):
 | 
				
			||||||
 | 
					        link = result.xpath(link_xpath)[0]
 | 
				
			||||||
 | 
					        href = urljoin(base_url, link.attrib.get('href'))
 | 
				
			||||||
 | 
					        # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this...
 | 
				
			||||||
 | 
					        title = escape(extract_text(link))
 | 
				
			||||||
 | 
					        thumbnail_tags = result.xpath(thumbnail_xpath)
 | 
				
			||||||
 | 
					        thumbnail = None
 | 
				
			||||||
 | 
					        if len(thumbnail_tags) > 0:
 | 
				
			||||||
 | 
					            thumbnail = extract_text(thumbnail_tags[0])
 | 
				
			||||||
 | 
					            if thumbnail[0] == '/':
 | 
				
			||||||
 | 
					                thumbnail = base_url + thumbnail
 | 
				
			||||||
 | 
					        content = escape(extract_text(result.xpath(content_xpath)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # append result
 | 
				
			||||||
 | 
					        results.append({'url': href,
 | 
				
			||||||
 | 
					                        'title': title,
 | 
				
			||||||
 | 
					                        'thumbnail': thumbnail,
 | 
				
			||||||
 | 
					                        'img_src': thumbnail,
 | 
				
			||||||
 | 
					                        'content': content})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # return results
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
@ -465,6 +465,11 @@ engines:
 | 
				
			|||||||
    shortcut : scc
 | 
					    shortcut : scc
 | 
				
			||||||
    disabled : True
 | 
					    disabled : True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : framalibre
 | 
				
			||||||
 | 
					    engine : framalibre
 | 
				
			||||||
 | 
					    shortcut : frl
 | 
				
			||||||
 | 
					    disabled : True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#  - name : searx
 | 
					#  - name : searx
 | 
				
			||||||
#    engine : searx_engine
 | 
					#    engine : searx_engine
 | 
				
			||||||
#    shortcut : se
 | 
					#    shortcut : se
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										103
									
								
								tests/unit/engines/test_framalibre.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								tests/unit/engines/test_framalibre.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,103 @@
 | 
				
			|||||||
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					import mock
 | 
				
			||||||
 | 
					from searx.engines import framalibre
 | 
				
			||||||
 | 
					from searx.testing import SearxTestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestFramalibreEngine(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_request(self):
 | 
				
			||||||
 | 
					        query = 'test_query'
 | 
				
			||||||
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
 | 
					        dicto['pageno'] = 0
 | 
				
			||||||
 | 
					        params = framalibre.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertTrue('url' in params)
 | 
				
			||||||
 | 
					        self.assertTrue(query in params['url'])
 | 
				
			||||||
 | 
					        self.assertTrue('framalibre.org' in params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_response(self):
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, framalibre.response, None)
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, framalibre.response, [])
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, framalibre.response, '')
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, framalibre.response, '[]')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = mock.Mock(text='{}')
 | 
				
			||||||
 | 
					        self.assertEqual(framalibre.response(response), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = mock.Mock(text='{"data": []}')
 | 
				
			||||||
 | 
					        self.assertEqual(framalibre.response(response), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        html = u"""
 | 
				
			||||||
 | 
					        <div class="nodes-list-row">
 | 
				
			||||||
 | 
					          <div id="node-431"
 | 
				
			||||||
 | 
					              class="node node-logiciel-annuaires node-promoted node-teaser node-teaser node-sheet clearfix nodes-list"
 | 
				
			||||||
 | 
					              about="/content/gogs" typeof="sioc:Item foaf:Document">
 | 
				
			||||||
 | 
					            <header class="media">
 | 
				
			||||||
 | 
					              <div class="media-left">
 | 
				
			||||||
 | 
					                <div class="field field-name-field-logo field-type-image field-label-hidden">
 | 
				
			||||||
 | 
					                  <div class="field-items">
 | 
				
			||||||
 | 
					                    <div class="field-item even">
 | 
				
			||||||
 | 
					                      <a href="/content/gogs">
 | 
				
			||||||
 | 
					                        <img class="media-object img-responsive" typeof="foaf:Image"
 | 
				
			||||||
 | 
					 src="https://framalibre.org/sites/default/files/styles/teaser_logo/public/leslogos/gogs-lg.png?itok=rrCxKKBy"
 | 
				
			||||||
 | 
					 width="70" height="70" alt="" />
 | 
				
			||||||
 | 
					                      </a>
 | 
				
			||||||
 | 
					                    </div>
 | 
				
			||||||
 | 
					                  </div>
 | 
				
			||||||
 | 
					                </div>
 | 
				
			||||||
 | 
					              </div>
 | 
				
			||||||
 | 
					              <div class="media-body">
 | 
				
			||||||
 | 
					                <h3 class="node-title"><a href="/content/gogs">Gogs</a></h3>
 | 
				
			||||||
 | 
					                <span property="dc:title" content="Gogs" class="rdf-meta element-hidden"></span>
 | 
				
			||||||
 | 
					                <div class="field field-name-field-annuaires field-type-taxonomy-term-reference field-label-hidden">
 | 
				
			||||||
 | 
					                  <div class="field-items">
 | 
				
			||||||
 | 
					                    <div class="field-item even">
 | 
				
			||||||
 | 
					                      <a href="/annuaires/cloudwebapps"
 | 
				
			||||||
 | 
					 typeof="skos:Concept" property="rdfs:label skos:prefLabel"
 | 
				
			||||||
 | 
					 datatype="" class="label label-primary">Cloud/webApps</a>
 | 
				
			||||||
 | 
					                    </div>
 | 
				
			||||||
 | 
					                  </div>
 | 
				
			||||||
 | 
					                </div>
 | 
				
			||||||
 | 
					              </div>
 | 
				
			||||||
 | 
					            </header>
 | 
				
			||||||
 | 
					            <div class="content">
 | 
				
			||||||
 | 
					              <div class="field field-name-field-votre-appr-ciation field-type-fivestar field-label-hidden">
 | 
				
			||||||
 | 
					                <div class="field-items">
 | 
				
			||||||
 | 
					                  <div class="field-item even">
 | 
				
			||||||
 | 
					                  </div>
 | 
				
			||||||
 | 
					                </div>
 | 
				
			||||||
 | 
					              </div>
 | 
				
			||||||
 | 
					              <div class="field field-name-body field-type-text-with-summary field-label-hidden">
 | 
				
			||||||
 | 
					                <div class="field-items">
 | 
				
			||||||
 | 
					                  <div class="field-item even" property="content:encoded">
 | 
				
			||||||
 | 
					                    <p>Gogs est une interface web basée sur git et une bonne alternative à GitHub.</p>
 | 
				
			||||||
 | 
					                  </div>
 | 
				
			||||||
 | 
					                </div>
 | 
				
			||||||
 | 
					              </div>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					            <footer>
 | 
				
			||||||
 | 
					              <a href="/content/gogs" class="read-more btn btn-default btn-sm">Voir la notice</a>
 | 
				
			||||||
 | 
					              <div class="field field-name-field-lien-officiel field-type-link-field field-label-hidden">
 | 
				
			||||||
 | 
					                <div class="field-items">
 | 
				
			||||||
 | 
					                  <div class="field-item even">
 | 
				
			||||||
 | 
					                    <a href="https://gogs.io/" target="_blank" title="Voir le site officiel">
 | 
				
			||||||
 | 
					                      <span class="glyphicon glyphicon-globe"></span>
 | 
				
			||||||
 | 
					                      <span class="sr-only">Lien officiel</span>
 | 
				
			||||||
 | 
					                    </a>
 | 
				
			||||||
 | 
					                  </div>
 | 
				
			||||||
 | 
					                </div>
 | 
				
			||||||
 | 
					              </div>
 | 
				
			||||||
 | 
					            </footer>
 | 
				
			||||||
 | 
					          </div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        response = mock.Mock(text=html)
 | 
				
			||||||
 | 
					        results = framalibre.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 1)
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['title'], 'Gogs')
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['url'],
 | 
				
			||||||
 | 
					                         'https://framalibre.org/content/gogs')
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['content'],
 | 
				
			||||||
 | 
					                         u"Gogs est une interface web basée sur git et une bonne alternative à GitHub.")
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user