mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	Merge pull request #540 from a01200356/wikipedia_infobox
[enh] wikipedia infobox
This commit is contained in:
		
						commit
						f46057feb2
					
				@ -43,3 +43,4 @@ generally made searx better:
 | 
				
			|||||||
- Kang-min Liu
 | 
					- Kang-min Liu
 | 
				
			||||||
- Kirill Isakov
 | 
					- Kirill Isakov
 | 
				
			||||||
- Guilhem Bonnefille
 | 
					- Guilhem Bonnefille
 | 
				
			||||||
 | 
					- Marc Abonce Seguin
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,6 @@
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
from urllib import urlencode
 | 
					from urllib import urlencode
 | 
				
			||||||
 | 
					from re import compile, sub
 | 
				
			||||||
from lxml import html
 | 
					from lxml import html
 | 
				
			||||||
from searx.utils import html_to_text
 | 
					from searx.utils import html_to_text
 | 
				
			||||||
from searx.engines.xpath import extract_text
 | 
					from searx.engines.xpath import extract_text
 | 
				
			||||||
@ -7,6 +8,8 @@ from searx.engines.xpath import extract_text
 | 
				
			|||||||
url = 'https://api.duckduckgo.com/'\
 | 
					url = 'https://api.duckduckgo.com/'\
 | 
				
			||||||
    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 | 
					    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					http_regex = compile(r'^http:')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def result_to_text(url, text, htmlResult):
 | 
					def result_to_text(url, text, htmlResult):
 | 
				
			||||||
    # TODO : remove result ending with "Meaning" or "Category"
 | 
					    # TODO : remove result ending with "Meaning" or "Category"
 | 
				
			||||||
@ -19,8 +22,8 @@ def result_to_text(url, text, htmlResult):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
    # TODO add kl={locale}
 | 
					 | 
				
			||||||
    params['url'] = url.format(query=urlencode({'q': query}))
 | 
					    params['url'] = url.format(query=urlencode({'q': query}))
 | 
				
			||||||
 | 
					    params['headers']['Accept-Language'] = params['language']
 | 
				
			||||||
    return params
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -103,6 +106,10 @@ def response(resp):
 | 
				
			|||||||
        urls.append({'title': search_res.get('DefinitionSource'),
 | 
					        urls.append({'title': search_res.get('DefinitionSource'),
 | 
				
			||||||
                    'url': definitionURL})
 | 
					                    'url': definitionURL})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # to merge with wikidata's infobox
 | 
				
			||||||
 | 
					    if infobox_id:
 | 
				
			||||||
 | 
					        infobox_id = http_regex.sub('https:', infobox_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # entity
 | 
					    # entity
 | 
				
			||||||
    entity = search_res.get('Entity', None)
 | 
					    entity = search_res.get('Entity', None)
 | 
				
			||||||
    # TODO continent / country / department / location / waterfall /
 | 
					    # TODO continent / country / department / location / waterfall /
 | 
				
			||||||
 | 
				
			|||||||
@ -86,15 +86,15 @@ def getDetail(jsonresponse, wikidata_id, language, locale):
 | 
				
			|||||||
        results.append({'title': title, 'url': official_website})
 | 
					        results.append({'title': title, 'url': official_website})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    wikipedia_link_count = 0
 | 
					    wikipedia_link_count = 0
 | 
				
			||||||
    if language != 'en':
 | 
					    wikipedia_link = get_wikilink(result, language + 'wiki')
 | 
				
			||||||
        wikipedia_link_count += add_url(urls,
 | 
					 | 
				
			||||||
                                        'Wikipedia (' + language + ')',
 | 
					 | 
				
			||||||
                                        get_wikilink(result, language +
 | 
					 | 
				
			||||||
                                                     'wiki'))
 | 
					 | 
				
			||||||
    wikipedia_en_link = get_wikilink(result, 'enwiki')
 | 
					 | 
				
			||||||
    wikipedia_link_count += add_url(urls,
 | 
					    wikipedia_link_count += add_url(urls,
 | 
				
			||||||
                                    'Wikipedia (en)',
 | 
					                                    'Wikipedia (' + language + ')',
 | 
				
			||||||
                                    wikipedia_en_link)
 | 
					                                    wikipedia_link)
 | 
				
			||||||
 | 
					    if language != 'en':
 | 
				
			||||||
 | 
					        wikipedia_en_link = get_wikilink(result, 'enwiki')
 | 
				
			||||||
 | 
					        wikipedia_link_count += add_url(urls,
 | 
				
			||||||
 | 
					                                        'Wikipedia (en)',
 | 
				
			||||||
 | 
					                                        wikipedia_en_link)
 | 
				
			||||||
    if wikipedia_link_count == 0:
 | 
					    if wikipedia_link_count == 0:
 | 
				
			||||||
        misc_language = get_wiki_firstlanguage(result, 'wiki')
 | 
					        misc_language = get_wiki_firstlanguage(result, 'wiki')
 | 
				
			||||||
        if misc_language is not None:
 | 
					        if misc_language is not None:
 | 
				
			||||||
@ -188,7 +188,7 @@ def getDetail(jsonresponse, wikidata_id, language, locale):
 | 
				
			|||||||
    else:
 | 
					    else:
 | 
				
			||||||
        results.append({
 | 
					        results.append({
 | 
				
			||||||
                       'infobox': title,
 | 
					                       'infobox': title,
 | 
				
			||||||
                       'id': wikipedia_en_link,
 | 
					                       'id': wikipedia_link,
 | 
				
			||||||
                       'content': description,
 | 
					                       'content': description,
 | 
				
			||||||
                       'attributes': attributes,
 | 
					                       'attributes': attributes,
 | 
				
			||||||
                       'urls': urls
 | 
					                       'urls': urls
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										114
									
								
								searx/engines/wikipedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								searx/engines/wikipedia.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,114 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					 Wikipedia (Web)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @website     https://{language}.wikipedia.org
 | 
				
			||||||
 | 
					 @provide-api yes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @using-api   yes
 | 
				
			||||||
 | 
					 @results     JSON
 | 
				
			||||||
 | 
					 @stable      yes
 | 
				
			||||||
 | 
					 @parse       url, infobox
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from json import loads
 | 
				
			||||||
 | 
					from urllib import urlencode, quote
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# search-url
 | 
				
			||||||
 | 
					base_url = 'https://{language}.wikipedia.org/'
 | 
				
			||||||
 | 
					search_postfix = 'w/api.php?'\
 | 
				
			||||||
 | 
					    'action=query'\
 | 
				
			||||||
 | 
					    '&format=json'\
 | 
				
			||||||
 | 
					    '&{query}'\
 | 
				
			||||||
 | 
					    '&prop=extracts|pageimages'\
 | 
				
			||||||
 | 
					    '&exintro'\
 | 
				
			||||||
 | 
					    '&explaintext'\
 | 
				
			||||||
 | 
					    '&pithumbsize=300'\
 | 
				
			||||||
 | 
					    '&redirects'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# set language in base_url
 | 
				
			||||||
 | 
					def url_lang(lang):
 | 
				
			||||||
 | 
					    if lang == 'all':
 | 
				
			||||||
 | 
					        language = 'en'
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        language = lang.split('_')[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return base_url.format(language=language)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# do search-request
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    if query.islower():
 | 
				
			||||||
 | 
					        query += '|' + query.title()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    params['url'] = url_lang(params['language']) \
 | 
				
			||||||
 | 
					        + search_postfix.format(query=urlencode({'titles': query}))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get first meaningful paragraph
 | 
				
			||||||
 | 
					# this should filter out disambiguation pages and notes above first paragraph
 | 
				
			||||||
 | 
					# "magic numbers" were obtained by fine tuning
 | 
				
			||||||
 | 
					def extract_first_paragraph(content, title, image):
 | 
				
			||||||
 | 
					    first_paragraph = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    failed_attempts = 0
 | 
				
			||||||
 | 
					    for paragraph in content.split('\n'):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        starts_with_title = paragraph.lower().find(title.lower(), 0, len(title) + 35)
 | 
				
			||||||
 | 
					        length = len(paragraph)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if length >= 200 or (starts_with_title >= 0 and (image or length >= 150)):
 | 
				
			||||||
 | 
					            first_paragraph = paragraph
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        failed_attempts += 1
 | 
				
			||||||
 | 
					        if failed_attempts > 3:
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return first_paragraph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get response from search-request
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    search_result = loads(resp.content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # wikipedia article's unique id
 | 
				
			||||||
 | 
					    # first valid id is assumed to be the requested article
 | 
				
			||||||
 | 
					    for article_id in search_result['query']['pages']:
 | 
				
			||||||
 | 
					        page = search_result['query']['pages'][article_id]
 | 
				
			||||||
 | 
					        if int(article_id) > 0:
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if int(article_id) < 0:
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = page.get('title')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    image = page.get('thumbnail')
 | 
				
			||||||
 | 
					    if image:
 | 
				
			||||||
 | 
					        image = image.get('source')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extract = page.get('extract')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    summary = extract_first_paragraph(extract, title, image)
 | 
				
			||||||
 | 
					    if not summary:
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # link to wikipedia article
 | 
				
			||||||
 | 
					    # parenthesis are not quoted to make infobox mergeable with wikidata's
 | 
				
			||||||
 | 
					    wikipedia_link = url_lang(resp.search_params['language']) \
 | 
				
			||||||
 | 
					        + 'wiki/' + quote(title.replace(' ', '_').encode('utf8')).replace('%28', '(').replace('%29', ')')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    results.append({'url': wikipedia_link, 'title': title})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    results.append({'infobox': title,
 | 
				
			||||||
 | 
					                    'id': wikipedia_link,
 | 
				
			||||||
 | 
					                    'content': summary,
 | 
				
			||||||
 | 
					                    'img_src': image,
 | 
				
			||||||
 | 
					                    'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
@ -37,7 +37,7 @@ def merge_two_infoboxes(infobox1, infobox2):
 | 
				
			|||||||
        urls1 = infobox1.get('urls', None)
 | 
					        urls1 = infobox1.get('urls', None)
 | 
				
			||||||
        if urls1 is None:
 | 
					        if urls1 is None:
 | 
				
			||||||
            urls1 = []
 | 
					            urls1 = []
 | 
				
			||||||
            infobox1.set('urls', urls1)
 | 
					            infobox1['urls'] = urls1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        urlSet = set()
 | 
					        urlSet = set()
 | 
				
			||||||
        for url in infobox1.get('urls', []):
 | 
					        for url in infobox1.get('urls', []):
 | 
				
			||||||
@ -47,11 +47,17 @@ def merge_two_infoboxes(infobox1, infobox2):
 | 
				
			|||||||
            if url.get('url', None) not in urlSet:
 | 
					            if url.get('url', None) not in urlSet:
 | 
				
			||||||
                urls1.append(url)
 | 
					                urls1.append(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if 'img_src' in infobox2:
 | 
				
			||||||
 | 
					        img1 = infobox1.get('img_src', None)
 | 
				
			||||||
 | 
					        img2 = infobox2.get('img_src')
 | 
				
			||||||
 | 
					        if img1 is None:
 | 
				
			||||||
 | 
					            infobox1['img_src'] = img2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if 'attributes' in infobox2:
 | 
					    if 'attributes' in infobox2:
 | 
				
			||||||
        attributes1 = infobox1.get('attributes', None)
 | 
					        attributes1 = infobox1.get('attributes', None)
 | 
				
			||||||
        if attributes1 is None:
 | 
					        if attributes1 is None:
 | 
				
			||||||
            attributes1 = []
 | 
					            attributes1 = []
 | 
				
			||||||
            infobox1.set('attributes', attributes1)
 | 
					            infobox1['attributes'] = attributes1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        attributeSet = set()
 | 
					        attributeSet = set()
 | 
				
			||||||
        for attribute in infobox1.get('attributes', []):
 | 
					        for attribute in infobox1.get('attributes', []):
 | 
				
			||||||
@ -68,7 +74,7 @@ def merge_two_infoboxes(infobox1, infobox2):
 | 
				
			|||||||
            if result_content_len(content2) > result_content_len(content1):
 | 
					            if result_content_len(content2) > result_content_len(content1):
 | 
				
			||||||
                infobox1['content'] = content2
 | 
					                infobox1['content'] = content2
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            infobox1.set('content', content2)
 | 
					            infobox1['content'] = content2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def result_score(result):
 | 
					def result_score(result):
 | 
				
			||||||
 | 
				
			|||||||
@ -43,10 +43,9 @@ engines:
 | 
				
			|||||||
    shortcut : bs
 | 
					    shortcut : bs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : wikipedia
 | 
					  - name : wikipedia
 | 
				
			||||||
    engine : mediawiki
 | 
					    engine : wikipedia
 | 
				
			||||||
    shortcut : wp
 | 
					    shortcut : wp
 | 
				
			||||||
    base_url : 'https://{language}.wikipedia.org/'
 | 
					    base_url : 'https://{language}.wikipedia.org/'
 | 
				
			||||||
    number_of_results : 1
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : bing
 | 
					  - name : bing
 | 
				
			||||||
    engine : bing
 | 
					    engine : bing
 | 
				
			||||||
@ -93,6 +92,7 @@ engines:
 | 
				
			|||||||
  - name : ddg definitions
 | 
					  - name : ddg definitions
 | 
				
			||||||
    engine : duckduckgo_definitions
 | 
					    engine : duckduckgo_definitions
 | 
				
			||||||
    shortcut : ddd
 | 
					    shortcut : ddd
 | 
				
			||||||
 | 
					    disabled : True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : digg
 | 
					  - name : digg
 | 
				
			||||||
    engine : digg
 | 
					    engine : digg
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,9 @@
 | 
				
			|||||||
<div class="panel panel-default infobox">
 | 
					<div class="panel panel-default infobox">
 | 
				
			||||||
    <div class="panel-heading">
 | 
					    <div class="panel-heading">
 | 
				
			||||||
        <h4 class="panel-title infobox_part">{{ infobox.infobox }}</h4>
 | 
					        <bdi><h4 class="panel-title infobox_part">{{ infobox.infobox }}</h4></bdi>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
    <div class="panel-body">
 | 
					    <div class="panel-body">
 | 
				
			||||||
 | 
					        <bdi>
 | 
				
			||||||
        {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %}
 | 
					        {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %}
 | 
				
			||||||
        {% if infobox.content %}<p class="infobox_part">{{ infobox.content }}</p>{% endif %}
 | 
					        {% if infobox.content %}<p class="infobox_part">{{ infobox.content }}</p>{% endif %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -28,5 +29,6 @@
 | 
				
			|||||||
            {% endfor %}
 | 
					            {% endfor %}
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
        {% endif %}
 | 
					        {% endif %}
 | 
				
			||||||
 | 
					        </bdi>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
</div>
 | 
					</div>
 | 
				
			||||||
 | 
				
			|||||||
@ -123,7 +123,7 @@ class TestDDGDefinitionsEngine(SearxTestCase):
 | 
				
			|||||||
        self.assertEqual(results[1]['url'], 'result first url')
 | 
					        self.assertEqual(results[1]['url'], 'result first url')
 | 
				
			||||||
        self.assertEqual(results[2]['suggestion'], 'text')
 | 
					        self.assertEqual(results[2]['suggestion'], 'text')
 | 
				
			||||||
        self.assertEqual(results[3]['infobox'], 'heading')
 | 
					        self.assertEqual(results[3]['infobox'], 'heading')
 | 
				
			||||||
        self.assertEqual(results[3]['id'], 'http://definition.url')
 | 
					        self.assertEqual(results[3]['id'], 'https://definition.url')
 | 
				
			||||||
        self.assertEqual(results[3]['entity'], 'Entity')
 | 
					        self.assertEqual(results[3]['entity'], 'Entity')
 | 
				
			||||||
        self.assertIn('abstract', results[3]['content'])
 | 
					        self.assertIn('abstract', results[3]['content'])
 | 
				
			||||||
        self.assertIn('this is the definition', results[3]['content'])
 | 
					        self.assertIn('this is the definition', results[3]['content'])
 | 
				
			||||||
@ -240,7 +240,7 @@ class TestDDGDefinitionsEngine(SearxTestCase):
 | 
				
			|||||||
        self.assertEqual(type(results), list)
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
        self.assertEqual(len(results), 1)
 | 
					        self.assertEqual(len(results), 1)
 | 
				
			||||||
        self.assertEqual(results[0]['infobox'], 'heading')
 | 
					        self.assertEqual(results[0]['infobox'], 'heading')
 | 
				
			||||||
        self.assertEqual(results[0]['id'], 'http://definition.url')
 | 
					        self.assertEqual(results[0]['id'], 'https://definition.url')
 | 
				
			||||||
        self.assertEqual(results[0]['entity'], 'Entity')
 | 
					        self.assertEqual(results[0]['entity'], 'Entity')
 | 
				
			||||||
        self.assertIn('abstract', results[0]['content'])
 | 
					        self.assertIn('abstract', results[0]['content'])
 | 
				
			||||||
        self.assertIn('this is the definition', results[0]['content'])
 | 
					        self.assertIn('this is the definition', results[0]['content'])
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										160
									
								
								tests/unit/engines/test_wikipedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										160
									
								
								tests/unit/engines/test_wikipedia.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,160 @@
 | 
				
			|||||||
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					import mock
 | 
				
			||||||
 | 
					from searx.engines import wikipedia
 | 
				
			||||||
 | 
					from searx.testing import SearxTestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestWikipediaEngine(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_request(self):
 | 
				
			||||||
 | 
					        query = 'test_query'
 | 
				
			||||||
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
 | 
					        dicto['language'] = 'fr_FR'
 | 
				
			||||||
 | 
					        params = wikipedia.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertIn('url', params)
 | 
				
			||||||
 | 
					        self.assertIn(query, params['url'])
 | 
				
			||||||
 | 
					        self.assertIn('test_query', params['url'])
 | 
				
			||||||
 | 
					        self.assertIn('Test_Query', params['url'])
 | 
				
			||||||
 | 
					        self.assertIn('fr.wikipedia.org', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        query = 'Test_Query'
 | 
				
			||||||
 | 
					        params = wikipedia.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertIn('Test_Query', params['url'])
 | 
				
			||||||
 | 
					        self.assertNotIn('test_query', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        dicto['language'] = 'all'
 | 
				
			||||||
 | 
					        params = wikipedia.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertIn('en', params['url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_response(self):
 | 
				
			||||||
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
 | 
					        dicto['language'] = 'fr'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, wikipedia.response, None)
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, wikipedia.response, [])
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, wikipedia.response, '')
 | 
				
			||||||
 | 
					        self.assertRaises(AttributeError, wikipedia.response, '[]')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # page not found
 | 
				
			||||||
 | 
					        json = """
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "batchcomplete": "",
 | 
				
			||||||
 | 
					            "query": {
 | 
				
			||||||
 | 
					                "normalized": [],
 | 
				
			||||||
 | 
					                "pages": {
 | 
				
			||||||
 | 
					                    "-1": {
 | 
				
			||||||
 | 
					                        "ns": 0,
 | 
				
			||||||
 | 
					                        "title": "",
 | 
				
			||||||
 | 
					                        "missing": ""
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }"""
 | 
				
			||||||
 | 
					        response = mock.Mock(content=json, search_params=dicto)
 | 
				
			||||||
 | 
					        self.assertEqual(wikipedia.response(response), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # normal case
 | 
				
			||||||
 | 
					        json = """
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "batchcomplete": "",
 | 
				
			||||||
 | 
					            "query": {
 | 
				
			||||||
 | 
					                "normalized": [],
 | 
				
			||||||
 | 
					                "pages": {
 | 
				
			||||||
 | 
					                    "12345": {
 | 
				
			||||||
 | 
					                        "pageid": 12345,
 | 
				
			||||||
 | 
					                        "ns": 0,
 | 
				
			||||||
 | 
					                        "title": "The Title",
 | 
				
			||||||
 | 
					                        "extract": "The Title is...",
 | 
				
			||||||
 | 
					                        "thumbnail": {
 | 
				
			||||||
 | 
					                            "source": "img_src.jpg"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        "pageimage": "img_name.jpg"
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }"""
 | 
				
			||||||
 | 
					        response = mock.Mock(content=json, search_params=dicto)
 | 
				
			||||||
 | 
					        results = wikipedia.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(results[0]['title'], u'The Title')
 | 
				
			||||||
 | 
					        self.assertIn('fr.wikipedia.org/wiki/The_Title', results[0]['url'])
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['infobox'], u'The Title')
 | 
				
			||||||
 | 
					        self.assertIn('fr.wikipedia.org/wiki/The_Title', results[1]['id'])
 | 
				
			||||||
 | 
					        self.assertIn('The Title is...', results[1]['content'])
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['img_src'], 'img_src.jpg')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # disambiguation page
 | 
				
			||||||
 | 
					        json = """
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "batchcomplete": "",
 | 
				
			||||||
 | 
					            "query": {
 | 
				
			||||||
 | 
					                "normalized": [],
 | 
				
			||||||
 | 
					                "pages": {
 | 
				
			||||||
 | 
					                    "12345": {
 | 
				
			||||||
 | 
					                        "pageid": 12345,
 | 
				
			||||||
 | 
					                        "ns": 0,
 | 
				
			||||||
 | 
					                        "title": "The Title",
 | 
				
			||||||
 | 
					                        "extract": "The Title can be:\\nThe Title 1\\nThe Title 2\\nThe Title 3\\nThe Title 4......................................................................................................................................." """  # noqa
 | 
				
			||||||
 | 
					        json += """
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }"""
 | 
				
			||||||
 | 
					        response = mock.Mock(content=json, search_params=dicto)
 | 
				
			||||||
 | 
					        results = wikipedia.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # no image
 | 
				
			||||||
 | 
					        json = """
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "batchcomplete": "",
 | 
				
			||||||
 | 
					            "query": {
 | 
				
			||||||
 | 
					                "normalized": [],
 | 
				
			||||||
 | 
					                "pages": {
 | 
				
			||||||
 | 
					                    "12345": {
 | 
				
			||||||
 | 
					                        "pageid": 12345,
 | 
				
			||||||
 | 
					                        "ns": 0,
 | 
				
			||||||
 | 
					                        "title": "The Title",
 | 
				
			||||||
 | 
					                        "extract": "The Title is......................................................................................................................................................................................." """  # noqa
 | 
				
			||||||
 | 
					        json += """
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }"""
 | 
				
			||||||
 | 
					        response = mock.Mock(content=json, search_params=dicto)
 | 
				
			||||||
 | 
					        results = wikipedia.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertIn('The Title is...', results[1]['content'])
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['img_src'], None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # title not in first paragraph
 | 
				
			||||||
 | 
					        json = u"""
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "batchcomplete": "",
 | 
				
			||||||
 | 
					            "query": {
 | 
				
			||||||
 | 
					                "normalized": [],
 | 
				
			||||||
 | 
					                "pages": {
 | 
				
			||||||
 | 
					                    "12345": {
 | 
				
			||||||
 | 
					                        "pageid": 12345,
 | 
				
			||||||
 | 
					                        "ns": 0,
 | 
				
			||||||
 | 
					                        "title": "披頭四樂隊",
 | 
				
			||||||
 | 
					                        "extract": "披头士乐队....................................................................................................................................................................................................\\n披頭四樂隊...", """  # noqa
 | 
				
			||||||
 | 
					        json += """
 | 
				
			||||||
 | 
					                        "thumbnail": {
 | 
				
			||||||
 | 
					                            "source": "img_src.jpg"
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        "pageimage": "img_name.jpg"
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }"""
 | 
				
			||||||
 | 
					        response = mock.Mock(content=json, search_params=dicto)
 | 
				
			||||||
 | 
					        results = wikipedia.response(response)
 | 
				
			||||||
 | 
					        self.assertEqual(type(results), list)
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
 | 
				
			||||||
 | 
					        self.assertIn(u'披头士乐队...', results[1]['content'])
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user