mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	Merge pull request #1866 from return42/fix-news
bugfix: google-news and bing-news has changed the language parameter
This commit is contained in:
		
						commit
						a5d3585a0c
					
				
							
								
								
									
										13
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								Makefile
									
									
									
									
									
								
							@ -27,6 +27,7 @@ help:
 | 
				
			|||||||
	@echo  '  uninstall - uninstall (./local)'
 | 
						@echo  '  uninstall - uninstall (./local)'
 | 
				
			||||||
	@echo  '  gh-pages  - build docs & deploy on gh-pages branch'
 | 
						@echo  '  gh-pages  - build docs & deploy on gh-pages branch'
 | 
				
			||||||
	@echo  '  clean     - drop builds and environments'
 | 
						@echo  '  clean     - drop builds and environments'
 | 
				
			||||||
 | 
						@echo  '  project   - re-build generic files of the searx project'
 | 
				
			||||||
	@echo  ''
 | 
						@echo  ''
 | 
				
			||||||
	@$(MAKE) -s -f utils/makefile.include make-help
 | 
						@$(MAKE) -s -f utils/makefile.include make-help
 | 
				
			||||||
	@echo  ''
 | 
						@echo  ''
 | 
				
			||||||
@ -67,6 +68,18 @@ docs-live:  pyenvinstall sphinx-live
 | 
				
			|||||||
$(GH_PAGES)::
 | 
					$(GH_PAGES)::
 | 
				
			||||||
	@echo "doc available at --> $(DOCS_URL)"
 | 
						@echo "doc available at --> $(DOCS_URL)"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# update project files
 | 
				
			||||||
 | 
					# --------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PHONY += project engines-languages
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					project: searx/data/engines_languages.json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					searx/data/engines_languages.json:  pyenvinstall
 | 
				
			||||||
 | 
						$(PY_ENV_ACT); python utils/fetch_languages.py
 | 
				
			||||||
 | 
						mv engines_languages.json searx/data/engines_languages.json
 | 
				
			||||||
 | 
						mv languages.py searx/languages.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# test
 | 
					# test
 | 
				
			||||||
# ----
 | 
					# ----
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -5,6 +5,7 @@ mock==2.0.0
 | 
				
			|||||||
nose2[coverage_plugin]
 | 
					nose2[coverage_plugin]
 | 
				
			||||||
cov-core==1.15.0
 | 
					cov-core==1.15.0
 | 
				
			||||||
pep8==1.7.0
 | 
					pep8==1.7.0
 | 
				
			||||||
 | 
					pylint
 | 
				
			||||||
plone.testing==5.0.0
 | 
					plone.testing==5.0.0
 | 
				
			||||||
splinter==0.11.0
 | 
					splinter==0.11.0
 | 
				
			||||||
transifex-client==0.12.2
 | 
					transifex-client==0.12.2
 | 
				
			||||||
 | 
				
			|||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -110,13 +110,18 @@ def response(resp):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# get supported languages from their site
 | 
					# get supported languages from their site
 | 
				
			||||||
def _fetch_supported_languages(resp):
 | 
					def _fetch_supported_languages(resp):
 | 
				
			||||||
    supported_languages = []
 | 
					    lang_tags = set()
 | 
				
			||||||
    dom = html.fromstring(resp.text)
 | 
					 | 
				
			||||||
    options = eval_xpath(dom, '//div[@id="limit-languages"]//input')
 | 
					 | 
				
			||||||
    for option in options:
 | 
					 | 
				
			||||||
        code = eval_xpath(option, './@id')[0].replace('_', '-')
 | 
					 | 
				
			||||||
        if code == 'nb':
 | 
					 | 
				
			||||||
            code = 'no'
 | 
					 | 
				
			||||||
        supported_languages.append(code)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return supported_languages
 | 
					    setmkt = re.compile('setmkt=([^&]*)')
 | 
				
			||||||
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					    lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for a in lang_links:
 | 
				
			||||||
 | 
					        href = eval_xpath(a, './@href')[0]
 | 
				
			||||||
 | 
					        match = setmkt.search(href)
 | 
				
			||||||
 | 
					        l_tag = match.groups()[0]
 | 
				
			||||||
 | 
					        _lang, _nation = l_tag.split('-', 1)
 | 
				
			||||||
 | 
					        l_tag = _lang.lower() + '-' + _nation.upper()
 | 
				
			||||||
 | 
					        lang_tags.add(l_tag)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return list(lang_tags)
 | 
				
			||||||
 | 
				
			|||||||
@ -18,6 +18,8 @@ import re
 | 
				
			|||||||
from searx.url_utils import urlencode
 | 
					from searx.url_utils import urlencode
 | 
				
			||||||
from searx.utils import match_language
 | 
					from searx.utils import match_language
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# engine dependent config
 | 
					# engine dependent config
 | 
				
			||||||
categories = ['images']
 | 
					categories = ['images']
 | 
				
			||||||
paging = True
 | 
					paging = True
 | 
				
			||||||
@ -103,22 +105,3 @@ def response(resp):
 | 
				
			|||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# get supported languages from their site
 | 
					 | 
				
			||||||
def _fetch_supported_languages(resp):
 | 
					 | 
				
			||||||
    supported_languages = []
 | 
					 | 
				
			||||||
    dom = html.fromstring(resp.text)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    regions_xpath = '//div[@id="region-section-content"]' \
 | 
					 | 
				
			||||||
                    + '//ul[@class="b_vList"]/li/a/@href'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    regions = dom.xpath(regions_xpath)
 | 
					 | 
				
			||||||
    for region in regions:
 | 
					 | 
				
			||||||
        code = re.search('setmkt=[^\&]+', region).group()[7:]
 | 
					 | 
				
			||||||
        if code == 'nb-NO':
 | 
					 | 
				
			||||||
            code = 'no-NO'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        supported_languages.append(code)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return supported_languages
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -15,9 +15,10 @@ from datetime import datetime
 | 
				
			|||||||
from dateutil import parser
 | 
					from dateutil import parser
 | 
				
			||||||
from lxml import etree
 | 
					from lxml import etree
 | 
				
			||||||
from searx.utils import list_get, match_language
 | 
					from searx.utils import list_get, match_language
 | 
				
			||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 | 
					 | 
				
			||||||
from searx.url_utils import urlencode, urlparse, parse_qsl
 | 
					from searx.url_utils import urlencode, urlparse, parse_qsl
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# engine dependent config
 | 
					# engine dependent config
 | 
				
			||||||
categories = ['news']
 | 
					categories = ['news']
 | 
				
			||||||
paging = True
 | 
					paging = True
 | 
				
			||||||
@ -58,6 +59,7 @@ def _get_url(query, language, offset, time_range):
 | 
				
			|||||||
            offset=offset,
 | 
					            offset=offset,
 | 
				
			||||||
            interval=time_range_dict[time_range])
 | 
					            interval=time_range_dict[time_range])
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
 | 
					        # e.g. setmkt=de-de&setlang=de
 | 
				
			||||||
        search_path = search_string.format(
 | 
					        search_path = search_string.format(
 | 
				
			||||||
            query=urlencode({'q': query, 'setmkt': language}),
 | 
					            query=urlencode({'q': query, 'setmkt': language}),
 | 
				
			||||||
            offset=offset)
 | 
					            offset=offset)
 | 
				
			||||||
 | 
				
			|||||||
@ -12,10 +12,10 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from json import loads
 | 
					from json import loads
 | 
				
			||||||
from lxml import html
 | 
					from lxml import html
 | 
				
			||||||
from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url
 | 
					 | 
				
			||||||
from searx.url_utils import urlencode
 | 
					from searx.url_utils import urlencode
 | 
				
			||||||
from searx.utils import match_language
 | 
					from searx.utils import match_language
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 | 
				
			||||||
 | 
					
 | 
				
			||||||
categories = ['videos']
 | 
					categories = ['videos']
 | 
				
			||||||
paging = True
 | 
					paging = True
 | 
				
			||||||
@ -67,6 +67,10 @@ def request(query, params):
 | 
				
			|||||||
    if params['time_range'] in time_range_dict:
 | 
					    if params['time_range'] in time_range_dict:
 | 
				
			||||||
        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
 | 
					        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # bing videos did not like "older" versions < 70.0.1 when selectin other
 | 
				
			||||||
 | 
					    # languages then 'en' .. very strange ?!?!
 | 
				
			||||||
 | 
					    params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return params
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -54,7 +54,7 @@ def request(query, params):
 | 
				
			|||||||
    if params['language'] != 'all':
 | 
					    if params['language'] != 'all':
 | 
				
			||||||
        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
 | 
					        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
 | 
				
			||||||
        if language:
 | 
					        if language:
 | 
				
			||||||
            params['url'] += '&lr=lang_' + language
 | 
					            params['url'] += '&hl=' + language
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return params
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -3,9 +3,11 @@
 | 
				
			|||||||
# this file is generated automatically by utils/update_search_languages.py
 | 
					# this file is generated automatically by utils/update_search_languages.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
language_codes = (
 | 
					language_codes = (
 | 
				
			||||||
 | 
					    (u"af-NA", u"Afrikaans", u"", u"Afrikaans"),
 | 
				
			||||||
    (u"ar-SA", u"العربية", u"", u"Arabic"),
 | 
					    (u"ar-SA", u"العربية", u"", u"Arabic"),
 | 
				
			||||||
 | 
					    (u"be-BY", u"Беларуская", u"", u"Belarusian"),
 | 
				
			||||||
    (u"bg-BG", u"Български", u"", u"Bulgarian"),
 | 
					    (u"bg-BG", u"Български", u"", u"Bulgarian"),
 | 
				
			||||||
    (u"ca-ES", u"Català", u"", u"Catalan"),
 | 
					    (u"ca-AD", u"Català", u"", u"Catalan"),
 | 
				
			||||||
    (u"cs-CZ", u"Čeština", u"", u"Czech"),
 | 
					    (u"cs-CZ", u"Čeština", u"", u"Czech"),
 | 
				
			||||||
    (u"da-DK", u"Dansk", u"", u"Danish"),
 | 
					    (u"da-DK", u"Dansk", u"", u"Danish"),
 | 
				
			||||||
    (u"de", u"Deutsch", u"", u"German"),
 | 
					    (u"de", u"Deutsch", u"", u"German"),
 | 
				
			||||||
@ -17,11 +19,15 @@ language_codes = (
 | 
				
			|||||||
    (u"en-AU", u"English", u"Australia", u"English"),
 | 
					    (u"en-AU", u"English", u"Australia", u"English"),
 | 
				
			||||||
    (u"en-CA", u"English", u"Canada", u"English"),
 | 
					    (u"en-CA", u"English", u"Canada", u"English"),
 | 
				
			||||||
    (u"en-GB", u"English", u"United Kingdom", u"English"),
 | 
					    (u"en-GB", u"English", u"United Kingdom", u"English"),
 | 
				
			||||||
 | 
					    (u"en-IE", u"English", u"Ireland", u"English"),
 | 
				
			||||||
    (u"en-IN", u"English", u"India", u"English"),
 | 
					    (u"en-IN", u"English", u"India", u"English"),
 | 
				
			||||||
    (u"en-MY", u"English", u"Malaysia", u"English"),
 | 
					    (u"en-NZ", u"English", u"New Zealand", u"English"),
 | 
				
			||||||
 | 
					    (u"en-PH", u"English", u"Philippines", u"English"),
 | 
				
			||||||
 | 
					    (u"en-SG", u"English", u"Singapore", u"English"),
 | 
				
			||||||
    (u"en-US", u"English", u"United States", u"English"),
 | 
					    (u"en-US", u"English", u"United States", u"English"),
 | 
				
			||||||
    (u"es", u"Español", u"", u"Spanish"),
 | 
					    (u"es", u"Español", u"", u"Spanish"),
 | 
				
			||||||
    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
 | 
					    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
 | 
				
			||||||
 | 
					    (u"es-CL", u"Español", u"Chile", u"Spanish"),
 | 
				
			||||||
    (u"es-ES", u"Español", u"España", u"Spanish"),
 | 
					    (u"es-ES", u"Español", u"España", u"Spanish"),
 | 
				
			||||||
    (u"es-MX", u"Español", u"México", u"Spanish"),
 | 
					    (u"es-MX", u"Español", u"México", u"Spanish"),
 | 
				
			||||||
    (u"et-EE", u"Eesti", u"", u"Estonian"),
 | 
					    (u"et-EE", u"Eesti", u"", u"Estonian"),
 | 
				
			||||||
@ -35,6 +41,7 @@ language_codes = (
 | 
				
			|||||||
    (u"he-IL", u"עברית", u"", u"Hebrew"),
 | 
					    (u"he-IL", u"עברית", u"", u"Hebrew"),
 | 
				
			||||||
    (u"hr-HR", u"Hrvatski", u"", u"Croatian"),
 | 
					    (u"hr-HR", u"Hrvatski", u"", u"Croatian"),
 | 
				
			||||||
    (u"hu-HU", u"Magyar", u"", u"Hungarian"),
 | 
					    (u"hu-HU", u"Magyar", u"", u"Hungarian"),
 | 
				
			||||||
 | 
					    (u"hy-AM", u"Հայերեն", u"", u"Armenian"),
 | 
				
			||||||
    (u"id-ID", u"Indonesia", u"", u"Indonesian"),
 | 
					    (u"id-ID", u"Indonesia", u"", u"Indonesian"),
 | 
				
			||||||
    (u"is-IS", u"Íslenska", u"", u"Icelandic"),
 | 
					    (u"is-IS", u"Íslenska", u"", u"Icelandic"),
 | 
				
			||||||
    (u"it-IT", u"Italiano", u"", u"Italian"),
 | 
					    (u"it-IT", u"Italiano", u"", u"Italian"),
 | 
				
			||||||
@ -42,7 +49,7 @@ language_codes = (
 | 
				
			|||||||
    (u"ko-KR", u"한국어", u"", u"Korean"),
 | 
					    (u"ko-KR", u"한국어", u"", u"Korean"),
 | 
				
			||||||
    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
 | 
					    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
 | 
				
			||||||
    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
 | 
					    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
 | 
				
			||||||
    (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
 | 
					    (u"ms-MY", u"Melayu", u"", u"Malay"),
 | 
				
			||||||
    (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"),
 | 
					    (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"),
 | 
				
			||||||
    (u"nl", u"Nederlands", u"", u"Dutch"),
 | 
					    (u"nl", u"Nederlands", u"", u"Dutch"),
 | 
				
			||||||
    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
 | 
					    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
 | 
				
			||||||
@ -55,8 +62,9 @@ language_codes = (
 | 
				
			|||||||
    (u"ru-RU", u"Русский", u"", u"Russian"),
 | 
					    (u"ru-RU", u"Русский", u"", u"Russian"),
 | 
				
			||||||
    (u"sk-SK", u"Slovenčina", u"", u"Slovak"),
 | 
					    (u"sk-SK", u"Slovenčina", u"", u"Slovak"),
 | 
				
			||||||
    (u"sl-SI", u"Slovenščina", u"", u"Slovenian"),
 | 
					    (u"sl-SI", u"Slovenščina", u"", u"Slovenian"),
 | 
				
			||||||
    (u"sr-RS", u"Српски", u"", u"Serbian"),
 | 
					    (u"sr-RS", u"Srpski", u"", u"Serbian"),
 | 
				
			||||||
    (u"sv-SE", u"Svenska", u"", u"Swedish"),
 | 
					    (u"sv-SE", u"Svenska", u"", u"Swedish"),
 | 
				
			||||||
 | 
					    (u"sw-KE", u"Kiswahili", u"", u"Swahili"),
 | 
				
			||||||
    (u"th-TH", u"ไทย", u"", u"Thai"),
 | 
					    (u"th-TH", u"ไทย", u"", u"Thai"),
 | 
				
			||||||
    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
 | 
					    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
 | 
				
			||||||
    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
 | 
					    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
 | 
				
			||||||
 | 
				
			|||||||
@ -5,7 +5,7 @@
 | 
				
			|||||||
# Output files (engines_languages.json and languages.py)
 | 
					# Output files (engines_languages.json and languages.py)
 | 
				
			||||||
# are written in current directory to avoid overwriting in case something goes wrong.
 | 
					# are written in current directory to avoid overwriting in case something goes wrong.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from json import dump
 | 
					import json
 | 
				
			||||||
import io
 | 
					import io
 | 
				
			||||||
from sys import path
 | 
					from sys import path
 | 
				
			||||||
from babel import Locale, UnknownLocaleError
 | 
					from babel import Locale, UnknownLocaleError
 | 
				
			||||||
@ -22,19 +22,22 @@ languages_file = 'languages.py'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# Fetchs supported languages for each engine and writes json file with those.
 | 
					# Fetchs supported languages for each engine and writes json file with those.
 | 
				
			||||||
def fetch_supported_languages():
 | 
					def fetch_supported_languages():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    engines_languages = {}
 | 
					    engines_languages = {}
 | 
				
			||||||
    for engine_name in engines:
 | 
					    names = list(engines)
 | 
				
			||||||
 | 
					    names.sort()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for engine_name in names:
 | 
				
			||||||
 | 
					        print("fetching languages of engine %s" % engine_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if hasattr(engines[engine_name], 'fetch_supported_languages'):
 | 
					        if hasattr(engines[engine_name], 'fetch_supported_languages'):
 | 
				
			||||||
            try:
 | 
					 | 
				
			||||||
            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
 | 
					            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
 | 
				
			||||||
            if type(engines_languages[engine_name]) == list:
 | 
					            if type(engines_languages[engine_name]) == list:
 | 
				
			||||||
                engines_languages[engine_name] = sorted(engines_languages[engine_name])
 | 
					                engines_languages[engine_name] = sorted(engines_languages[engine_name])
 | 
				
			||||||
            except Exception as e:
 | 
					 | 
				
			||||||
                print(e)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # write json file
 | 
					    # write json file
 | 
				
			||||||
    with io.open(engines_languages_file, "w", encoding="utf-8") as f:
 | 
					    with open(engines_languages_file, 'w', encoding='utf-8') as f:
 | 
				
			||||||
        dump(engines_languages, f, ensure_ascii=False, indent=4, separators=(',', ': '))
 | 
					        json.dump(engines_languages, f, indent=2, sort_keys=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return engines_languages
 | 
					    return engines_languages
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user