mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 02:27:06 -04:00 
			
		
		
		
	Merge pull request #1560 from return42/http-accept-language
[mod] add 'Accept-Language' HTTP header to online processores
This commit is contained in:
		
						commit
						7c9c112484
					
				| @ -440,6 +440,7 @@ engine is shown.  Most of the options have a default value or even are optional. | ||||
|      engine: example | ||||
|      shortcut: demo | ||||
|      base_url: 'https://{language}.example.com/' | ||||
|      send_accept_language_header: false | ||||
|      categories: general | ||||
|      timeout: 3.0 | ||||
|      api_key: 'apikey' | ||||
| @ -488,6 +489,13 @@ engine is shown.  Most of the options have a default value or even are optional. | ||||
|   use multiple sites using only one engine, or updating the site URL without | ||||
|   touching at the code. | ||||
| 
 | ||||
| ``send_accept_language_header`` : | ||||
|   Several engines that support languages (or regions) deal with the HTTP header | ||||
|   ``Accept-Language`` to build a response that fits to the locale.  When this | ||||
|   option is activated, the language (locale) that is selected by the user is used | ||||
|   to build and send a ``Accept-Language`` header in the request to the origin | ||||
|   search engine. | ||||
| 
 | ||||
| ``categories`` : optional | ||||
|   Define in which categories this engine will be active.  Most of the time, it is | ||||
|   defined in the code of the engine, but in a few cases it is useful, like when | ||||
|  | ||||
| @ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = { | ||||
|     "enable_http": False, | ||||
|     "using_tor_proxy": False, | ||||
|     "display_error_messages": True, | ||||
|     "send_accept_language_header": False, | ||||
|     "tokens": [], | ||||
|     "about": {}, | ||||
| } | ||||
|  | ||||
| @ -25,6 +25,7 @@ categories = ['general', 'web'] | ||||
| paging = True | ||||
| time_range_support = False | ||||
| safesearch = False | ||||
| send_accept_language_header = True | ||||
| supported_languages_url = 'https://www.bing.com/account/general' | ||||
| language_aliases = {} | ||||
| 
 | ||||
| @ -68,7 +69,6 @@ def request(query, params): | ||||
|         logger.debug("headers.Referer --> %s", referer) | ||||
| 
 | ||||
|     params['url'] = base_url + search_path | ||||
|     params['headers']['Accept-Language'] = "en-US,en;q=0.5" | ||||
|     params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' | ||||
|     return params | ||||
| 
 | ||||
|  | ||||
| @ -31,6 +31,7 @@ categories = ['images', 'web'] | ||||
| paging = True | ||||
| safesearch = True | ||||
| time_range_support = True | ||||
| send_accept_language_header = True | ||||
| supported_languages_url = 'https://www.bing.com/account/general' | ||||
| number_of_results = 28 | ||||
| 
 | ||||
|  | ||||
| @ -34,6 +34,7 @@ about = { | ||||
| categories = ['news'] | ||||
| paging = True | ||||
| time_range_support = True | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://www.bing.com/' | ||||
|  | ||||
| @ -30,6 +30,7 @@ categories = ['videos', 'web'] | ||||
| paging = True | ||||
| safesearch = True | ||||
| time_range_support = True | ||||
| send_accept_language_header = True | ||||
| number_of_results = 28 | ||||
| 
 | ||||
| base_url = 'https://www.bing.com/' | ||||
| @ -70,10 +71,6 @@ def request(query, params): | ||||
|     if params['time_range'] in time_range_dict: | ||||
|         params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) | ||||
| 
 | ||||
|     # bing videos did not like "older" versions < 70.0.1 when selectin other | ||||
|     # languages then 'en' .. very strange ?!?! | ||||
|     params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1' | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -20,6 +20,7 @@ from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| engine_type = 'online' | ||||
| send_accept_language_header = True | ||||
| categories = ['general'] | ||||
| disabled = True | ||||
| timeout = 2.0 | ||||
|  | ||||
| @ -31,6 +31,7 @@ categories = ['general', 'web'] | ||||
| paging = True | ||||
| supported_languages_url = 'https://duckduckgo.com/util/u588.js' | ||||
| time_range_support = True | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| language_aliases = { | ||||
|     'ar-SA': 'ar-XA', | ||||
|  | ||||
| @ -27,6 +27,8 @@ about = { | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1' | ||||
| 
 | ||||
| WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/'] | ||||
| @ -62,7 +64,6 @@ def request(query, params): | ||||
|     params['url'] = URL.format(query=urlencode({'q': query})) | ||||
|     language = match_language(params['language'], supported_languages, language_aliases) | ||||
|     language = language.split('-')[0] | ||||
|     params['headers']['Accept-Language'] = language | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -30,6 +30,7 @@ about = { | ||||
| categories = ['images', 'web'] | ||||
| paging = True | ||||
| safesearch = True | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| # search-url | ||||
| images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}' | ||||
|  | ||||
| @ -45,6 +45,7 @@ categories = ['general', 'web'] | ||||
| paging = True | ||||
| time_range_support = True | ||||
| safesearch = True | ||||
| send_accept_language_header = True | ||||
| use_mobile_ui = False | ||||
| supported_languages_url = 'https://www.google.com/preferences?#languages' | ||||
| 
 | ||||
| @ -241,16 +242,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): | ||||
|         # language. | ||||
|         ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language) | ||||
| 
 | ||||
|         # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 | ||||
|         ret_val['headers']['Accept-Language'] = ','.join( | ||||
|             [ | ||||
|                 lang_country, | ||||
|                 language + ';q=0.8,', | ||||
|                 'en;q=0.6', | ||||
|                 '*;q=0.5', | ||||
|             ] | ||||
|         ) | ||||
| 
 | ||||
|     return ret_val | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -51,6 +51,7 @@ paging = False | ||||
| use_locale_domain = True | ||||
| time_range_support = True | ||||
| safesearch = True | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| filter_mapping = {0: 'images', 1: 'active', 2: 'active'} | ||||
| 
 | ||||
| @ -125,7 +126,6 @@ def request(query, params): | ||||
|     """Google-Video search request""" | ||||
| 
 | ||||
|     lang_info = get_lang_info(params, supported_languages, language_aliases, False) | ||||
|     logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) | ||||
| 
 | ||||
|     query_url = ( | ||||
|         'https://' | ||||
|  | ||||
| @ -70,13 +70,13 @@ time_range_support = True | ||||
| # | ||||
| #  safesearch : results are identitical for safesearch=0 and safesearch=2 | ||||
| safesearch = False | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     """Google-News search request""" | ||||
| 
 | ||||
|     lang_info = get_lang_info(params, supported_languages, language_aliases, False) | ||||
|     logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) | ||||
| 
 | ||||
|     # google news has only one domain | ||||
|     lang_info['subdomain'] = 'news.google.com' | ||||
|  | ||||
| @ -22,6 +22,8 @@ about = { | ||||
| } | ||||
| 
 | ||||
| categories = ["files", "apps"] | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| search_url = "https://play.google.com/store/search?{query}&c=apps" | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -52,6 +52,7 @@ language_support = True | ||||
| use_locale_domain = True | ||||
| time_range_support = True | ||||
| safesearch = False | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| 
 | ||||
| def time_range_url(params): | ||||
| @ -75,7 +76,6 @@ def request(query, params): | ||||
| 
 | ||||
|     offset = (params['pageno'] - 1) * 10 | ||||
|     lang_info = get_lang_info(params, supported_languages, language_aliases, False) | ||||
|     logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) | ||||
| 
 | ||||
|     # subdomain is: scholar.google.xy | ||||
|     lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") | ||||
|  | ||||
| @ -60,6 +60,7 @@ language_support = True | ||||
| use_locale_domain = True | ||||
| time_range_support = True | ||||
| safesearch = True | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| RE_CACHE = {} | ||||
| 
 | ||||
| @ -111,7 +112,6 @@ def request(query, params): | ||||
|     """Google-Video search request""" | ||||
| 
 | ||||
|     lang_info = get_lang_info(params, supported_languages, language_aliases, False) | ||||
|     logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) | ||||
| 
 | ||||
|     query_url = ( | ||||
|         'https://' | ||||
|  | ||||
| @ -30,6 +30,7 @@ about = { | ||||
| categories = ['map'] | ||||
| paging = False | ||||
| language_support = True | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://nominatim.openstreetmap.org/' | ||||
| @ -142,9 +143,8 @@ def request(query, params): | ||||
|     params['url'] = base_url + search_string.format(query=urlencode({'q': query})) | ||||
|     params['route'] = route_re.match(query) | ||||
|     params['headers']['User-Agent'] = searx_useragent() | ||||
| 
 | ||||
|     accept_language = 'en' if params['language'] == 'all' else params['language'] | ||||
|     params['headers']['Accept-Language'] = accept_language | ||||
|     if 'Accept-Language' not in params['headers']: | ||||
|         params['headers']['Accept-Language'] = 'en' | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -19,6 +19,9 @@ about = { | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| send_accept_language_header = True | ||||
| 
 | ||||
| # search-url | ||||
| search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' | ||||
| supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' | ||||
| @ -41,9 +44,6 @@ def request(query, params): | ||||
|     language = url_lang(params['language']) | ||||
|     params['url'] = search_url.format(title=quote(query), language=language) | ||||
| 
 | ||||
|     if params['language'].lower() in language_variants.get(language, []): | ||||
|         params['headers']['Accept-Language'] = params['language'].lower() | ||||
| 
 | ||||
|     params['headers']['User-Agent'] = searx_useragent() | ||||
|     params['raise_for_httperror'] = False | ||||
|     params['soft_max_redirects'] = 2 | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| 
 | ||||
| import typing | ||||
| import babel | ||||
| 
 | ||||
| 
 | ||||
| class EngineRef: | ||||
| @ -29,6 +30,7 @@ class SearchQuery: | ||||
|         'query', | ||||
|         'engineref_list', | ||||
|         'lang', | ||||
|         'locale', | ||||
|         'safesearch', | ||||
|         'pageno', | ||||
|         'time_range', | ||||
| @ -59,6 +61,13 @@ class SearchQuery: | ||||
|         self.external_bang = external_bang | ||||
|         self.engine_data = engine_data or {} | ||||
| 
 | ||||
|         self.locale = None | ||||
|         if self.lang: | ||||
|             try: | ||||
|                 self.locale = babel.Locale.parse(self.lang, sep='-') | ||||
|             except babel.core.UnknownLocaleError: | ||||
|                 pass | ||||
| 
 | ||||
|     @property | ||||
|     def categories(self): | ||||
|         return list(set(map(lambda engineref: engineref.category, self.engineref_list))) | ||||
|  | ||||
| @ -138,6 +138,13 @@ class EngineProcessor(ABC): | ||||
|         return False | ||||
| 
 | ||||
|     def get_params(self, search_query, engine_category): | ||||
|         """Returns a set of *request params* or ``None`` if request is not supported. | ||||
| 
 | ||||
|         Not supported conditions (``None`` is returned): | ||||
| 
 | ||||
|         - A page-number > 1 when engine does not support paging. | ||||
|         - A time range when the engine does not support time range. | ||||
|         """ | ||||
|         # if paging is not supported, skip | ||||
|         if search_query.pageno > 1 and not self.engine.paging: | ||||
|             return None | ||||
|  | ||||
| @ -60,6 +60,17 @@ class OnlineProcessor(EngineProcessor): | ||||
|         # add an user agent | ||||
|         params['headers']['User-Agent'] = gen_useragent() | ||||
| 
 | ||||
|         # add Accept-Language header | ||||
|         if self.engine.send_accept_language_header and search_query.locale: | ||||
|             ac_lang = search_query.locale.language | ||||
|             if search_query.locale.territory: | ||||
|                 ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % ( | ||||
|                     search_query.locale.language, | ||||
|                     search_query.locale.territory, | ||||
|                     search_query.locale.language, | ||||
|                 ) | ||||
|             params['headers']['Accept-Language'] = ac_lang | ||||
| 
 | ||||
|         return params | ||||
| 
 | ||||
|     def _send_http_request(self, params): | ||||
|  | ||||
| @ -38,6 +38,9 @@ class OnlineCurrencyProcessor(OnlineProcessor): | ||||
|     engine_type = 'online_currency' | ||||
| 
 | ||||
|     def get_params(self, search_query, engine_category): | ||||
|         """Returns a set of *request params* or ``None`` if search query does not match | ||||
|         to :py:obj:`parser_re`.""" | ||||
| 
 | ||||
|         params = super().get_params(search_query, engine_category) | ||||
|         if params is None: | ||||
|             return None | ||||
|  | ||||
| @ -18,6 +18,8 @@ class OnlineDictionaryProcessor(OnlineProcessor): | ||||
|     engine_type = 'online_dictionary' | ||||
| 
 | ||||
|     def get_params(self, search_query, engine_category): | ||||
|         """Returns a set of *request params* or ``None`` if search query does not match | ||||
|         to :py:obj:`parser_re`.""" | ||||
|         params = super().get_params(search_query, engine_category) | ||||
|         if params is None: | ||||
|             return None | ||||
|  | ||||
| @ -20,6 +20,9 @@ class OnlineUrlSearchProcessor(OnlineProcessor): | ||||
|     engine_type = 'online_url_search' | ||||
| 
 | ||||
|     def get_params(self, search_query, engine_category): | ||||
|         """Returns a set of *request params* or ``None`` if search query does not match | ||||
|         to at least one of :py:obj:`re_search_urls`. | ||||
|         """ | ||||
|         params = super().get_params(search_query, engine_category) | ||||
|         if params is None: | ||||
|             return None | ||||
|  | ||||
| @ -748,6 +748,7 @@ engines: | ||||
| 
 | ||||
|   - name: google play movies | ||||
|     engine: xpath | ||||
|     send_accept_language_header: true | ||||
|     search_url: https://play.google.com/store/search?q={query}&c=movies | ||||
|     results_xpath: '//div[@class="ImZGtf mpg5gc"]' | ||||
|     title_xpath: './/div[@class="RZEgze"]//div[@class="kCSSQe"]//a' | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user