mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	Merge branch 'master' into patch-2
This commit is contained in:
		
						commit
						c2da901afa
					
				@ -1,6 +1,8 @@
 | 
				
			|||||||
 | 
					import hashlib
 | 
				
			||||||
import random
 | 
					import random
 | 
				
			||||||
import string
 | 
					import string
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
 | 
					import uuid
 | 
				
			||||||
from flask_babel import gettext
 | 
					from flask_babel import gettext
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# required answerer attribute
 | 
					# required answerer attribute
 | 
				
			||||||
@ -16,9 +18,13 @@ else:
 | 
				
			|||||||
    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 | 
					    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def random_characters():
 | 
				
			||||||
 | 
					    return [random.choice(random_string_letters)
 | 
				
			||||||
 | 
					            for _ in range(random.randint(8, 32))]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def random_string():
 | 
					def random_string():
 | 
				
			||||||
    return u''.join(random.choice(random_string_letters)
 | 
					    return u''.join(random_characters())
 | 
				
			||||||
                    for _ in range(random.randint(8, 32)))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def random_float():
 | 
					def random_float():
 | 
				
			||||||
@ -29,9 +35,21 @@ def random_int():
 | 
				
			|||||||
    return unicode(random.randint(-random_int_max, random_int_max))
 | 
					    return unicode(random.randint(-random_int_max, random_int_max))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def random_sha256():
 | 
				
			||||||
 | 
					    m = hashlib.sha256()
 | 
				
			||||||
 | 
					    m.update(b''.join(random_characters()))
 | 
				
			||||||
 | 
					    return unicode(m.hexdigest())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def random_uuid():
 | 
				
			||||||
 | 
					    return unicode(uuid.uuid4())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
random_types = {b'string': random_string,
 | 
					random_types = {b'string': random_string,
 | 
				
			||||||
                b'int': random_int,
 | 
					                b'int': random_int,
 | 
				
			||||||
                b'float': random_float}
 | 
					                b'float': random_float,
 | 
				
			||||||
 | 
					                b'sha256': random_sha256,
 | 
				
			||||||
 | 
					                b'uuid': random_uuid}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# required answerer function
 | 
					# required answerer function
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										76
									
								
								searx/engines/duden.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								searx/engines/duden.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,76 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					 Duden
 | 
				
			||||||
 | 
					 @website     https://www.duden.de
 | 
				
			||||||
 | 
					 @provide-api no
 | 
				
			||||||
 | 
					 @using-api   no
 | 
				
			||||||
 | 
					 @results     HTML (using search portal)
 | 
				
			||||||
 | 
					 @stable      no (HTML can change)
 | 
				
			||||||
 | 
					 @parse       url, title, content
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from lxml import html, etree
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from searx.engines.xpath import extract_text
 | 
				
			||||||
 | 
					from searx.url_utils import quote
 | 
				
			||||||
 | 
					from searx import logger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					categories = ['general']
 | 
				
			||||||
 | 
					paging = True
 | 
				
			||||||
 | 
					language_support = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# search-url
 | 
				
			||||||
 | 
					base_url = 'https://www.duden.de/'
 | 
				
			||||||
 | 
					search_url = base_url + 'suchen/dudenonline/{query}?page={offset}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    '''pre-request callback
 | 
				
			||||||
 | 
					    params<dict>:
 | 
				
			||||||
 | 
					      method  : POST/GET
 | 
				
			||||||
 | 
					      headers : {}
 | 
				
			||||||
 | 
					      data    : {} # if method == POST
 | 
				
			||||||
 | 
					      url     : ''
 | 
				
			||||||
 | 
					      category: 'search category'
 | 
				
			||||||
 | 
					      pageno  : 1 # number of the requested page
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    offset = (params['pageno'] - 1)
 | 
				
			||||||
 | 
					    params['url'] = search_url.format(offset=offset, query=quote(query))
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    '''post-response callback
 | 
				
			||||||
 | 
					    resp: requests response object
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        number_of_results_string = re.sub('[^0-9]', '', dom.xpath(
 | 
				
			||||||
 | 
					            '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0]
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        results.append({'number_of_results': int(number_of_results_string)})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        logger.debug("Couldn't read number of results.")
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for result in dom.xpath('//section[@class="wide" and not(contains(@style,"overflow:hidden"))]'):
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            logger.debug("running for %s" % str(result))
 | 
				
			||||||
 | 
					            link = result.xpath('.//h2/a')[0]
 | 
				
			||||||
 | 
					            url = link.attrib.get('href')
 | 
				
			||||||
 | 
					            title = result.xpath('string(.//h2/a)')
 | 
				
			||||||
 | 
					            content = extract_text(result.xpath('.//p'))
 | 
				
			||||||
 | 
					            # append result
 | 
				
			||||||
 | 
					            results.append({'url': url,
 | 
				
			||||||
 | 
					                            'title': title,
 | 
				
			||||||
 | 
					                            'content': content})
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
@ -714,6 +714,11 @@ engines:
 | 
				
			|||||||
    shortcut : 1337x
 | 
					    shortcut : 1337x
 | 
				
			||||||
    disabled : True
 | 
					    disabled : True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : Duden
 | 
				
			||||||
 | 
					    engine : duden
 | 
				
			||||||
 | 
					    shortcut : du
 | 
				
			||||||
 | 
					    disabled : True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#  - name : yacy
 | 
					#  - name : yacy
 | 
				
			||||||
#    engine : yacy
 | 
					#    engine : yacy
 | 
				
			||||||
#    shortcut : ya
 | 
					#    shortcut : ya
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										41
									
								
								tests/unit/engines/test_duden.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								tests/unit/engines/test_duden.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,41 @@
 | 
				
			|||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					import mock
 | 
				
			||||||
 | 
					from searx.engines import duden
 | 
				
			||||||
 | 
					from searx.testing import SearxTestCase
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestDudenEngine(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_request(self):
 | 
				
			||||||
 | 
					        query = 'Haus'
 | 
				
			||||||
 | 
					        dic = defaultdict(dict)
 | 
				
			||||||
 | 
					        dic['pageno'] = 1
 | 
				
			||||||
 | 
					        params = duden.request(query, dic)
 | 
				
			||||||
 | 
					        self.assertTrue('url' in params)
 | 
				
			||||||
 | 
					        self.assertTrue(query in params['url'])
 | 
				
			||||||
 | 
					        self.assertTrue('duden.de' in params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_response(self):
 | 
				
			||||||
 | 
					        resp = mock.Mock(text='<html></html>')
 | 
				
			||||||
 | 
					        self.assertEqual(duden.response(resp), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        html = """
 | 
				
			||||||
 | 
					        <section class="wide">
 | 
				
			||||||
 | 
					        <h2><a href="https://this.is.the.url/" class="hidden-link"><strong>This is the title</strong> also here</a></h2>
 | 
				
			||||||
 | 
					        <p>This is the <strong>content</strong></p>
 | 
				
			||||||
 | 
					        <a href="https://this.is.the.url/">Zum vollständigen Artikel</a>
 | 
				
			||||||
 | 
					        </section>
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        resp = mock.Mock(text=html)
 | 
				
			||||||
 | 
					        results = duden.response(resp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 1)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # testing result (dictionary entry)
 | 
				
			||||||
 | 
					        r = results[0]
 | 
				
			||||||
 | 
					        self.assertEqual(r['url'], 'https://this.is.the.url/')
 | 
				
			||||||
 | 
					        self.assertEqual(r['title'], 'This is the title also here')
 | 
				
			||||||
 | 
					        self.assertEqual(r['content'], 'This is the content')
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user