mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	[mod] do not escape html content in engines
This commit is contained in:
		
							parent
							
								
									28f12ef5a0
								
							
						
					
					
						commit
						16bdc0baf4
					
				@ -12,7 +12,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
@ -135,7 +134,7 @@ def response(resp):
 | 
			
		||||
    for result in dom.xpath(xpath_results):
 | 
			
		||||
        link = result.xpath(xpath_link)[0]
 | 
			
		||||
        href = urljoin(base_url, link.attrib.get('href'))
 | 
			
		||||
        title = escape(extract_text(link))
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
 | 
			
		||||
        results.append({'url': href,
 | 
			
		||||
                        'title': title})
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,6 @@
 | 
			
		||||
from lxml import etree
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from searx.utils import searx_useragent
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
@ -94,7 +93,7 @@ def response(resp):
 | 
			
		||||
                url = item.text
 | 
			
		||||
 | 
			
		||||
            elif item.attrib["name"] == "dcdescription":
 | 
			
		||||
                content = escape(item.text[:300])
 | 
			
		||||
                content = item.text[:300]
 | 
			
		||||
                if len(item.text) > 300:
 | 
			
		||||
                    content += "..."
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
 | 
			
		||||
@ -61,7 +60,7 @@ def response(resp):
 | 
			
		||||
        link = result.xpath('.//h3/a')[0]
 | 
			
		||||
        url = link.attrib.get('href')
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = escape(extract_text(result.xpath('.//p')))
 | 
			
		||||
        content = extract_text(result.xpath('.//p'))
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': url,
 | 
			
		||||
@ -73,7 +72,7 @@ def response(resp):
 | 
			
		||||
        link = result.xpath('.//h2/a')[0]
 | 
			
		||||
        url = link.attrib.get('href')
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = escape(extract_text(result.xpath('.//p')))
 | 
			
		||||
        content = extract_text(result.xpath('.//p'))
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': url,
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import quote
 | 
			
		||||
from lxml import html
 | 
			
		||||
from operator import itemgetter
 | 
			
		||||
@ -51,8 +50,8 @@ def response(resp):
 | 
			
		||||
    for result in search_res:
 | 
			
		||||
        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
 | 
			
		||||
        href = urljoin(url, link.attrib.get('href'))
 | 
			
		||||
        title = escape(extract_text(link))
 | 
			
		||||
        content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
 | 
			
		||||
        content = "<br />".join(content.split("\n"))
 | 
			
		||||
 | 
			
		||||
        filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,6 @@
 | 
			
		||||
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from json import loads
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
 | 
			
		||||
# engine dependent config
 | 
			
		||||
@ -57,7 +56,7 @@ def response(resp):
 | 
			
		||||
    for res in search_res['list']:
 | 
			
		||||
        title = res['title']
 | 
			
		||||
        url = res['url']
 | 
			
		||||
        content = escape(res['description'])
 | 
			
		||||
        content = res['description']
 | 
			
		||||
        thumbnail = res['thumbnail_360_url']
 | 
			
		||||
        publishedDate = datetime.fromtimestamp(res['created_time'], None)
 | 
			
		||||
        embedded = embedded_url.format(videoid=res['id'])
 | 
			
		||||
 | 
			
		||||
@ -51,10 +51,11 @@ def response(resp):
 | 
			
		||||
            if url.startswith('http://'):
 | 
			
		||||
                url = 'https' + url[4:]
 | 
			
		||||
 | 
			
		||||
            content = result['artist']['name'] +\
 | 
			
		||||
                " • " +\
 | 
			
		||||
                result['album']['title'] +\
 | 
			
		||||
                " • " + result['title']
 | 
			
		||||
            content = '{} - {} - {}'.format(
 | 
			
		||||
                result['artist']['name'],
 | 
			
		||||
                result['album']['title'],
 | 
			
		||||
                result['title'])
 | 
			
		||||
 | 
			
		||||
            embedded = embedded_url.format(audioid=result['id'])
 | 
			
		||||
 | 
			
		||||
            # append result
 | 
			
		||||
 | 
			
		||||
@ -12,7 +12,6 @@
 | 
			
		||||
import re
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from lxml import html
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from searx.utils import is_valid_lang
 | 
			
		||||
 | 
			
		||||
categories = ['general']
 | 
			
		||||
@ -62,8 +61,8 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
        results.append({
 | 
			
		||||
            'url': urljoin(resp.url, '?%d' % k),
 | 
			
		||||
            'title': escape(from_result.text_content()),
 | 
			
		||||
            'content': escape('; '.join(to_results))
 | 
			
		||||
            'title': from_result.text_content(),
 | 
			
		||||
            'content': '; '.join(to_results)
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,6 @@
 | 
			
		||||
from urllib import quote_plus
 | 
			
		||||
from json import loads
 | 
			
		||||
from lxml import html
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from dateutil import parser
 | 
			
		||||
 | 
			
		||||
# engine dependent config
 | 
			
		||||
@ -56,7 +55,7 @@ def response(resp):
 | 
			
		||||
        url = result.attrib.get('data-contenturl')
 | 
			
		||||
        thumbnail = result.xpath('.//img')[0].attrib.get('src')
 | 
			
		||||
        title = ''.join(result.xpath(title_xpath))
 | 
			
		||||
        content = escape(''.join(result.xpath(content_xpath)))
 | 
			
		||||
        content = ''.join(result.xpath(content_xpath))
 | 
			
		||||
        pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
 | 
			
		||||
        publishedDate = parser.parse(pubdate)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
 @parse        url, title, content
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
from lxml import html
 | 
			
		||||
@ -43,7 +42,7 @@ def response(resp):
 | 
			
		||||
        img_src = app.xpath('.//img/@src')[0]
 | 
			
		||||
 | 
			
		||||
        content = extract_text(app.xpath('./p')[0])
 | 
			
		||||
        content = escape(content.replace(title, '', 1).strip())
 | 
			
		||||
        content = content.replace(title, '', 1).strip()
 | 
			
		||||
 | 
			
		||||
        results.append({'url': url,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
 | 
			
		||||
@ -77,21 +77,13 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
        url = build_flickr_url(photo['owner'], photo['id'])
 | 
			
		||||
 | 
			
		||||
        title = photo['title']
 | 
			
		||||
 | 
			
		||||
        content = '<span class="photo-author">' +\
 | 
			
		||||
                  photo['ownername'] +\
 | 
			
		||||
                  '</span><br />' +\
 | 
			
		||||
                  '<span class="description">' +\
 | 
			
		||||
                  photo['description']['_content'] +\
 | 
			
		||||
                  '</span>'
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': url,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
                        'title': photo['title'],
 | 
			
		||||
                        'img_src': img_src,
 | 
			
		||||
                        'thumbnail_src': thumbnail_src,
 | 
			
		||||
                        'content': content,
 | 
			
		||||
                        'content': content = photo['description']['_content'],
 | 
			
		||||
                        'author': photo['ownername'],
 | 
			
		||||
                        'template': 'images.html'})
 | 
			
		||||
 | 
			
		||||
    # return results
 | 
			
		||||
 | 
			
		||||
@ -102,16 +102,15 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
        title = photo.get('title', '')
 | 
			
		||||
 | 
			
		||||
        content = '<span class="photo-author">' +\
 | 
			
		||||
                  photo['username'] +\
 | 
			
		||||
                  '</span><br />'
 | 
			
		||||
        author = photo['username']
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': url,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
                        'img_src': img_src,
 | 
			
		||||
                        'thumbnail_src': thumbnail_src,
 | 
			
		||||
                        'content': content,
 | 
			
		||||
                        'content': '',
 | 
			
		||||
                        'author': author,
 | 
			
		||||
                        'template': 'images.html'})
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,6 @@
 | 
			
		||||
 @parse       url, title, content
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from json import loads
 | 
			
		||||
from random import randint
 | 
			
		||||
from time import time
 | 
			
		||||
@ -78,8 +77,8 @@ def response(resp):
 | 
			
		||||
    for result in response_json['results']:
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': result['url'],
 | 
			
		||||
                        'title': escape(result['title']),
 | 
			
		||||
                        'content': escape(result['sum'])})
 | 
			
		||||
                        'title': result['title'],
 | 
			
		||||
                        'content': result['sum']})
 | 
			
		||||
 | 
			
		||||
    # return results
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -12,7 +12,6 @@
 | 
			
		||||
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from json import loads
 | 
			
		||||
from cgi import escape
 | 
			
		||||
 | 
			
		||||
# engine dependent config
 | 
			
		||||
categories = ['it']
 | 
			
		||||
@ -48,7 +47,7 @@ def response(resp):
 | 
			
		||||
        url = res['html_url']
 | 
			
		||||
 | 
			
		||||
        if res['description']:
 | 
			
		||||
            content = escape(res['description'][:500])
 | 
			
		||||
            content = res['description'][:500]
 | 
			
		||||
        else:
 | 
			
		||||
            content = ''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
# @parse       url, title, content, suggestion
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from urlparse import urlparse, parse_qsl
 | 
			
		||||
from lxml import html, etree
 | 
			
		||||
@ -155,7 +154,7 @@ def parse_url(url_string, google_hostname):
 | 
			
		||||
def extract_text_from_dom(result, xpath):
 | 
			
		||||
    r = result.xpath(xpath)
 | 
			
		||||
    if len(r) > 0:
 | 
			
		||||
        return escape(extract_text(r[0]))
 | 
			
		||||
        return extract_text(r[0])
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -264,7 +263,7 @@ def response(resp):
 | 
			
		||||
    # parse suggestion
 | 
			
		||||
    for suggestion in dom.xpath(suggestion_xpath):
 | 
			
		||||
        # append suggestion
 | 
			
		||||
        results.append({'suggestion': escape(extract_text(suggestion))})
 | 
			
		||||
        results.append({'suggestion': extract_text(suggestion)})
 | 
			
		||||
 | 
			
		||||
    # return results
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import quote
 | 
			
		||||
from lxml import html
 | 
			
		||||
from operator import itemgetter
 | 
			
		||||
@ -57,7 +56,7 @@ def response(resp):
 | 
			
		||||
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
 | 
			
		||||
        href = urljoin(url, link.attrib['href'])
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = escape(extract_text(result.xpath(content_xpath)))
 | 
			
		||||
        content = extract_text(result.xpath(content_xpath))
 | 
			
		||||
        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
 | 
			
		||||
        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
 | 
			
		||||
        filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
 @parse        url, title, content, seed, leech, torrentfile
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
@ -78,7 +77,7 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
        # torrent title
 | 
			
		||||
        page_a = result.xpath(xpath_title)[0]
 | 
			
		||||
        title = escape(extract_text(page_a))
 | 
			
		||||
        title = extract_text(page_a)
 | 
			
		||||
 | 
			
		||||
        # link to the page
 | 
			
		||||
        href = page_a.attrib.get('href')
 | 
			
		||||
@ -90,7 +89,7 @@ def response(resp):
 | 
			
		||||
        try:
 | 
			
		||||
            file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
 | 
			
		||||
            file_size = int(float(file_size) * get_filesize_mul(suffix))
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
        except:
 | 
			
		||||
            file_size = None
 | 
			
		||||
 | 
			
		||||
        # seed count
 | 
			
		||||
@ -105,7 +104,6 @@ def response(resp):
 | 
			
		||||
        # content string contains all information not included into template
 | 
			
		||||
        content = 'Category: "{category}". Downloaded {downloads} times.'
 | 
			
		||||
        content = content.format(category=category, downloads=downloads)
 | 
			
		||||
        content = escape(content)
 | 
			
		||||
 | 
			
		||||
        results.append({'url': href,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
# @parse       url, title, content, seed, leech, magnetlink
 | 
			
		||||
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import quote
 | 
			
		||||
from lxml import html
 | 
			
		||||
from operator import itemgetter
 | 
			
		||||
@ -62,7 +61,7 @@ def response(resp):
 | 
			
		||||
        link = result.xpath('.//div[@class="detName"]//a')[0]
 | 
			
		||||
        href = urljoin(url, link.attrib.get('href'))
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = escape(extract_text(result.xpath(content_xpath)))
 | 
			
		||||
        content = extract_text(result.xpath(content_xpath))
 | 
			
		||||
        seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
 | 
			
		||||
 | 
			
		||||
        # convert seed to int if possible
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from urlparse import urlparse, urljoin
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
@ -68,7 +67,7 @@ def response(resp):
 | 
			
		||||
            img_results.append(params)
 | 
			
		||||
        else:
 | 
			
		||||
            created = datetime.fromtimestamp(data['created_utc'])
 | 
			
		||||
            content = escape(data['selftext'])
 | 
			
		||||
            content = data['selftext']
 | 
			
		||||
            if len(content) > 500:
 | 
			
		||||
                content = content[:500] + '...'
 | 
			
		||||
            params['content'] = content
 | 
			
		||||
 | 
			
		||||
@ -44,20 +44,12 @@ def response(resp):
 | 
			
		||||
    # parse results
 | 
			
		||||
    for result in search_results.get('results', []):
 | 
			
		||||
        href = result['url']
 | 
			
		||||
        title = "[" + result['type'] + "] " +\
 | 
			
		||||
                result['namespace'] +\
 | 
			
		||||
                " " + result['name']
 | 
			
		||||
        content = '<span class="highlight">[' +\
 | 
			
		||||
                  result['type'] + "] " +\
 | 
			
		||||
                  result['name'] + " " +\
 | 
			
		||||
                  result['synopsis'] +\
 | 
			
		||||
                  "</span><br />" +\
 | 
			
		||||
                  result['description']
 | 
			
		||||
        title = "[{}] {} {}".format(result['type'], result['namespace'], result['name'])
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': href,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
                        'content': content})
 | 
			
		||||
                        'content': result['description']})
 | 
			
		||||
 | 
			
		||||
    # return results
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
# @parse       url, title, content, seed, leech, magnetlink
 | 
			
		||||
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import quote
 | 
			
		||||
from lxml import html
 | 
			
		||||
from operator import itemgetter
 | 
			
		||||
 | 
			
		||||
@ -46,10 +46,11 @@ def response(resp):
 | 
			
		||||
        if result['type'] == 'track':
 | 
			
		||||
            title = result['name']
 | 
			
		||||
            url = result['external_urls']['spotify']
 | 
			
		||||
            content = result['artists'][0]['name'] +\
 | 
			
		||||
                " • " +\
 | 
			
		||||
                result['album']['name'] +\
 | 
			
		||||
                " • " + result['name']
 | 
			
		||||
            content = '{} - {} - {}'.format(
 | 
			
		||||
                result['artists'][0]['name'],
 | 
			
		||||
                result['album']['name'],
 | 
			
		||||
                result['name'])
 | 
			
		||||
 | 
			
		||||
            embedded = embedded_url.format(audioid=result['id'])
 | 
			
		||||
 | 
			
		||||
            # append result
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from urlparse import urljoin
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
@ -48,8 +47,8 @@ def response(resp):
 | 
			
		||||
    for result in dom.xpath(results_xpath):
 | 
			
		||||
        link = result.xpath(link_xpath)[0]
 | 
			
		||||
        href = urljoin(url, link.attrib.get('href'))
 | 
			
		||||
        title = escape(extract_text(link))
 | 
			
		||||
        content = escape(extract_text(result.xpath(content_xpath)))
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
        content = extract_text(result.xpath(content_xpath))
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': href,
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,6 @@
 | 
			
		||||
# @todo        paging
 | 
			
		||||
 | 
			
		||||
from lxml import html
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from dateutil import parser
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
import re
 | 
			
		||||
@ -79,10 +78,10 @@ def response(resp):
 | 
			
		||||
        if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        title = escape(extract_text(link))
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
 | 
			
		||||
        if result.xpath('./p[@class="desc clk"]'):
 | 
			
		||||
            content = escape(extract_text(result.xpath('./p[@class="desc clk"]')))
 | 
			
		||||
            content = extract_text(result.xpath('./p[@class="desc clk"]'))
 | 
			
		||||
        else:
 | 
			
		||||
            content = ''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,6 @@
 | 
			
		||||
 @parse       url, title, content
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import quote_plus
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.languages import language_codes
 | 
			
		||||
@ -59,7 +58,7 @@ def response(resp):
 | 
			
		||||
        elif search_lang:
 | 
			
		||||
            href = href + search_lang + '/'
 | 
			
		||||
 | 
			
		||||
        title = escape(extract_text(link))
 | 
			
		||||
        title = extract_text(link)
 | 
			
		||||
 | 
			
		||||
        content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
 | 
			
		||||
        content = content + " - "
 | 
			
		||||
@ -75,7 +74,7 @@ def response(resp):
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': href,
 | 
			
		||||
                        'title': title,
 | 
			
		||||
                        'content': escape(content)})
 | 
			
		||||
                        'content': content})
 | 
			
		||||
 | 
			
		||||
    # return results
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,6 @@
 | 
			
		||||
 @parse       url, title, content
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from json import loads
 | 
			
		||||
from urllib import urlencode, unquote
 | 
			
		||||
import re
 | 
			
		||||
@ -78,7 +77,7 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
            # append result
 | 
			
		||||
            results.append({'url': result['SourceUrl'],
 | 
			
		||||
                            'title': escape(result['Title']),
 | 
			
		||||
                            'title': result['Title'],
 | 
			
		||||
                            'content': '',
 | 
			
		||||
                            'img_src': img_url,
 | 
			
		||||
                            'template': 'images.html'})
 | 
			
		||||
@ -90,8 +89,8 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
            # append result
 | 
			
		||||
            results.append({'url': result_url,
 | 
			
		||||
                            'title': escape(result_title),
 | 
			
		||||
                            'content': escape(result_content)})
 | 
			
		||||
                            'title': result_title,
 | 
			
		||||
                            'content': result_content})
 | 
			
		||||
 | 
			
		||||
    # parse images
 | 
			
		||||
    for result in json.get('Images', []):
 | 
			
		||||
@ -100,7 +99,7 @@ def response(resp):
 | 
			
		||||
 | 
			
		||||
        # append result
 | 
			
		||||
        results.append({'url': result['SourceUrl'],
 | 
			
		||||
                        'title': escape(result['Title']),
 | 
			
		||||
                        'title': result['Title'],
 | 
			
		||||
                        'content': '',
 | 
			
		||||
                        'img_src': img_url,
 | 
			
		||||
                        'template': 'images.html'})
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
 | 
			
		||||
@ -12,7 +12,6 @@
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.engines.xpath import extract_text
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
 @parse       url, title, content
 | 
			
		||||
"""
 | 
			
		||||
import re
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from searx.utils import is_valid_lang
 | 
			
		||||
 | 
			
		||||
categories = ['general']
 | 
			
		||||
@ -52,14 +51,14 @@ def request(query, params):
 | 
			
		||||
def response(resp):
 | 
			
		||||
    results = []
 | 
			
		||||
    results.append({
 | 
			
		||||
        'url': escape(web_url.format(
 | 
			
		||||
        'url': web_url.format(
 | 
			
		||||
            from_lang=resp.search_params['from_lang'][2],
 | 
			
		||||
            to_lang=resp.search_params['to_lang'][2],
 | 
			
		||||
            query=resp.search_params['query'])),
 | 
			
		||||
        'title': escape('[{0}-{1}] {2}'.format(
 | 
			
		||||
            query=resp.search_params['query']),
 | 
			
		||||
        'title': '[{0}-{1}] {2}'.format(
 | 
			
		||||
            resp.search_params['from_lang'][1],
 | 
			
		||||
            resp.search_params['to_lang'][1],
 | 
			
		||||
            resp.search_params['query'])),
 | 
			
		||||
        'content': escape(resp.json()['responseData']['translatedText'])
 | 
			
		||||
            resp.search_params['query']),
 | 
			
		||||
        'content': resp.json()['responseData']['translatedText']
 | 
			
		||||
    })
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,6 @@
 | 
			
		||||
# @stable      no
 | 
			
		||||
# @parse       url, infobox
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from json import loads
 | 
			
		||||
from time import time
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
 @parse       url, title, content
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml import html
 | 
			
		||||
from searx.search import logger
 | 
			
		||||
@ -52,8 +51,8 @@ def response(resp):
 | 
			
		||||
    for result in dom.xpath(results_xpath):
 | 
			
		||||
        try:
 | 
			
		||||
            res = {'url': result.xpath(url_xpath)[0],
 | 
			
		||||
                   'title': escape(''.join(result.xpath(title_xpath))),
 | 
			
		||||
                   'content': escape(''.join(result.xpath(content_xpath)))}
 | 
			
		||||
                   'title': ''.join(result.xpath(title_xpath)),
 | 
			
		||||
                   'content': ''.join(result.xpath(content_xpath))}
 | 
			
		||||
        except:
 | 
			
		||||
            logger.exception('yandex parse crash')
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user