mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-30 10:12:31 -04:00 
			
		
		
		
	
						commit
						d669da81fb
					
				| @ -1,8 +0,0 @@ | |||||||
| .. _autodetect search language: |  | ||||||
| 
 |  | ||||||
| ====================== |  | ||||||
| Search language plugin |  | ||||||
| ====================== |  | ||||||
| 
 |  | ||||||
| .. automodule:: searx.plugins.autodetect_search_language |  | ||||||
|   :members: |  | ||||||
| @ -1,97 +0,0 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later |  | ||||||
| # lint: pylint |  | ||||||
| """Plugin to detect the search language from the search query. |  | ||||||
| 
 |  | ||||||
| The language detection is done by using the fastText_ library (`python |  | ||||||
| fasttext`_). fastText_ distributes the `language identification model`_, for |  | ||||||
| reference: |  | ||||||
| 
 |  | ||||||
| - `FastText.zip: Compressing text classification models`_ |  | ||||||
| - `Bag of Tricks for Efficient Text Classification`_ |  | ||||||
| 
 |  | ||||||
| The `language identification model`_ support the language codes (ISO-639-3):: |  | ||||||
| 
 |  | ||||||
|    af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs bxr |  | ||||||
|    ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es et eu fa |  | ||||||
|    fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia id ie ilo io |  | ||||||
|    is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li lmo lo lrc lt lv |  | ||||||
|    mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne new nl nn |  | ||||||
|    no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru rue sa sah sc scn sco sd |  | ||||||
|    sh si sk sl so sq sr su sv sw ta te tg th tk tl tr tt tyv ug uk ur uz vec vep |  | ||||||
|    vi vls vo wa war wuu xal xmf yi yo yue zh |  | ||||||
| 
 |  | ||||||
| The `language identification model`_ is harmonized with the SearXNG's language |  | ||||||
| (locale) model.  General conditions of SearXNG's locale model are: |  | ||||||
| 
 |  | ||||||
| a. SearXNG's locale of a query is passed to the |  | ||||||
|    :py:obj:`searx.locales.get_engine_locale` to get a language and/or region |  | ||||||
|    code that is used by an engine. |  | ||||||
| 
 |  | ||||||
| b. SearXNG and most of the engines do not support all the languages from |  | ||||||
|    language model and there might be also a discrepancy in the ISO-639-3 and |  | ||||||
|    ISO-639-2 handling (:py:obj:`searx.locales.get_engine_locale`).  Further |  | ||||||
|    more, in SearXNG the locales like ``zh-TH`` (``zh-CN``) are mapped to |  | ||||||
|    ``zh_Hant`` (``zh_Hans``). |  | ||||||
| 
 |  | ||||||
| Conclusion: This plugin does only auto-detect the languages a user can select in |  | ||||||
| the language menu (:py:obj:`supported_langs`). |  | ||||||
| 
 |  | ||||||
| SearXNG's locale of a query comes from (*highest wins*): |  | ||||||
| 
 |  | ||||||
| 1. The ``Accept-Language`` header from user's HTTP client. |  | ||||||
| 2. The user select a locale in the preferences. |  | ||||||
| 3. The user select a locale from the menu in the query form (e.g. ``:zh-TW``) |  | ||||||
| 4. This plugin is activated in the preferences and the locale (only the language |  | ||||||
|    code / none region code) comes from the fastText's language detection. |  | ||||||
| 
 |  | ||||||
| Conclusion: There is a conflict between the language selected by the user and |  | ||||||
| the language from language detection of this plugin.  For example, the user |  | ||||||
| explicitly selects the German locale via the search syntax to search for a term |  | ||||||
| that is identified as an English term (try ``:de-DE thermomix``, for example). |  | ||||||
| 
 |  | ||||||
| .. hint:: |  | ||||||
| 
 |  | ||||||
|    To SearXNG maintainers; please take into account: under some circumstances |  | ||||||
|    the auto-detection of the language of this plugin could be detrimental to |  | ||||||
|    users expectations.  Its not recommended to activate this plugin by |  | ||||||
|    default. It should always be the user's decision whether to activate this |  | ||||||
|    plugin or not. |  | ||||||
| 
 |  | ||||||
| .. _fastText: https://fasttext.cc/ |  | ||||||
| .. _python fasttext: https://pypi.org/project/fasttext/ |  | ||||||
| .. _language identification model: https://fasttext.cc/docs/en/language-identification.html |  | ||||||
| .. _Bag of Tricks for Efficient Text Classification: https://arxiv.org/abs/1607.01759 |  | ||||||
| .. _`FastText.zip: Compressing text classification models`: https://arxiv.org/abs/1612.03651 |  | ||||||
| 
 |  | ||||||
| """ |  | ||||||
| 
 |  | ||||||
| from flask_babel import gettext |  | ||||||
| import babel |  | ||||||
| 
 |  | ||||||
| from searx.utils import detect_language |  | ||||||
| from searx.languages import language_codes |  | ||||||
| 
 |  | ||||||
| name = gettext('Autodetect search language') |  | ||||||
| description = gettext('Automatically detect the query search language and switch to it.') |  | ||||||
| preference_section = 'general' |  | ||||||
| default_on = False |  | ||||||
| 
 |  | ||||||
| supported_langs = set() |  | ||||||
| """Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`).""" |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def pre_search(request, search):  # pylint: disable=unused-argument |  | ||||||
|     lang = detect_language(search.search_query.query, min_probability=0) |  | ||||||
|     if lang in supported_langs: |  | ||||||
|         search.search_query.lang = lang |  | ||||||
|         try: |  | ||||||
|             search.search_query.locale = babel.Locale.parse(lang) |  | ||||||
|         except babel.core.UnknownLocaleError: |  | ||||||
|             pass |  | ||||||
|     return True |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def init(app, settings):  # pylint: disable=unused-argument |  | ||||||
|     for searxng_locale in language_codes: |  | ||||||
|         supported_langs.add(searxng_locale[0].split('-')[0]) |  | ||||||
|     return True |  | ||||||
| @ -154,7 +154,7 @@ class SearchLanguageSetting(EnumStringSetting): | |||||||
|     """Available choices may change, so user's value may not be in choices anymore""" |     """Available choices may change, so user's value may not be in choices anymore""" | ||||||
| 
 | 
 | ||||||
|     def _validate_selection(self, selection): |     def _validate_selection(self, selection): | ||||||
|         if selection != '' and not VALID_LANGUAGE_CODE.match(selection): |         if selection != '' and selection != 'auto' and not VALID_LANGUAGE_CODE.match(selection): | ||||||
|             raise ValidationException('Invalid language code: "{0}"'.format(selection)) |             raise ValidationException('Invalid language code: "{0}"'.format(selection)) | ||||||
| 
 | 
 | ||||||
|     def parse(self, data: str): |     def parse(self, data: str): | ||||||
|  | |||||||
| @ -104,7 +104,7 @@ class LanguageParser(QueryPartParser): | |||||||
|                     break |                     break | ||||||
| 
 | 
 | ||||||
|         # user may set a valid, yet not selectable language |         # user may set a valid, yet not selectable language | ||||||
|         if VALID_LANGUAGE_CODE.match(value): |         if VALID_LANGUAGE_CODE.match(value) or value == 'auto': | ||||||
|             lang_parts = value.split('-') |             lang_parts = value.split('-') | ||||||
|             if len(lang_parts) > 1: |             if len(lang_parts) > 1: | ||||||
|                 value = lang_parts[0].lower() + '-' + lang_parts[1].upper() |                 value = lang_parts[0].lower() + '-' + lang_parts[1].upper() | ||||||
|  | |||||||
| @ -3,10 +3,12 @@ | |||||||
| # pylint: disable=missing-module-docstring, too-few-public-methods | # pylint: disable=missing-module-docstring, too-few-public-methods | ||||||
| 
 | 
 | ||||||
| import threading | import threading | ||||||
|  | from copy import copy | ||||||
| from timeit import default_timer | from timeit import default_timer | ||||||
| from uuid import uuid4 | from uuid import uuid4 | ||||||
| 
 | 
 | ||||||
| import flask | import flask | ||||||
|  | import babel | ||||||
| 
 | 
 | ||||||
| from searx import settings | from searx import settings | ||||||
| from searx.answerers import ask | from searx.answerers import ask | ||||||
| @ -20,6 +22,7 @@ from searx.network import initialize as initialize_network, check_network_config | |||||||
| from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time | from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time | ||||||
| from searx.search.processors import PROCESSORS, initialize as initialize_processors | from searx.search.processors import PROCESSORS, initialize as initialize_processors | ||||||
| from searx.search.checker import initialize as initialize_checker | from searx.search.checker import initialize as initialize_checker | ||||||
|  | from searx.utils import detect_language | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('search') | logger = logger.getChild('search') | ||||||
| @ -37,18 +40,57 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False, | |||||||
|         initialize_checker() |         initialize_checker() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def replace_auto_language(search_query: SearchQuery): | ||||||
|  |     """ | ||||||
|  |     Do nothing except if `search_query.lang` is "auto". | ||||||
|  |     In this case: | ||||||
|  |     * the value "auto" is replaced by the detected language of the query. | ||||||
|  |       The default value is "all" when no language is detected. | ||||||
|  |     * `search_query.locale` is updated accordingly | ||||||
|  | 
 | ||||||
|  |     Use :py:obj:`searx.utils.detect_language` with `only_search_languages=True` to keep | ||||||
|  |     only languages supported by the engines. | ||||||
|  |     """ | ||||||
|  |     if search_query.lang != 'auto': | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     detected_lang = detect_language(search_query.query, threshold=0.0, only_search_languages=True) | ||||||
|  |     if detected_lang is None: | ||||||
|  |         # fallback to 'all' if no language has been detected | ||||||
|  |         search_query.lang = 'all' | ||||||
|  |         search_query.locale = None | ||||||
|  |         return | ||||||
|  |     search_query.lang = detected_lang | ||||||
|  |     try: | ||||||
|  |         search_query.locale = babel.Locale.parse(search_query.lang) | ||||||
|  |     except babel.core.UnknownLocaleError: | ||||||
|  |         search_query.locale = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class Search: | class Search: | ||||||
|     """Search information container""" |     """Search information container""" | ||||||
| 
 | 
 | ||||||
|     __slots__ = "search_query", "result_container", "start_time", "actual_timeout" |     __slots__ = "search_query", "result_container", "start_time", "actual_timeout" | ||||||
| 
 | 
 | ||||||
|     def __init__(self, search_query: SearchQuery): |     def __init__(self, search_query: SearchQuery): | ||||||
|  |         """Initialize the Search | ||||||
|  | 
 | ||||||
|  |         search_query is copied | ||||||
|  |         """ | ||||||
|         # init vars |         # init vars | ||||||
|         super().__init__() |         super().__init__() | ||||||
|         self.search_query = search_query |  | ||||||
|         self.result_container = ResultContainer() |         self.result_container = ResultContainer() | ||||||
|         self.start_time = None |         self.start_time = None | ||||||
|         self.actual_timeout = None |         self.actual_timeout = None | ||||||
|  |         self.search_query = copy(search_query) | ||||||
|  |         self.update_search_query(self.search_query) | ||||||
|  | 
 | ||||||
|  |     def update_search_query(self, search_query: SearchQuery): | ||||||
|  |         """Update search_query. | ||||||
|  | 
 | ||||||
|  |         call replace_auto_language to replace the "auto" language | ||||||
|  |         """ | ||||||
|  |         replace_auto_language(search_query) | ||||||
| 
 | 
 | ||||||
|     def search_external_bang(self): |     def search_external_bang(self): | ||||||
|         """ |         """ | ||||||
|  | |||||||
| @ -109,3 +109,16 @@ class SearchQuery: | |||||||
|                 self.external_bang, |                 self.external_bang, | ||||||
|             ) |             ) | ||||||
|         ) |         ) | ||||||
|  | 
 | ||||||
|  |     def __copy__(self): | ||||||
|  |         return SearchQuery( | ||||||
|  |             self.query, | ||||||
|  |             self.engineref_list, | ||||||
|  |             self.lang, | ||||||
|  |             self.safesearch, | ||||||
|  |             self.pageno, | ||||||
|  |             self.time_range, | ||||||
|  |             self.timeout_limit, | ||||||
|  |             self.external_bang, | ||||||
|  |             self.engine_data, | ||||||
|  |         ) | ||||||
|  | |||||||
| @ -18,7 +18,7 @@ searx_dir = abspath(dirname(__file__)) | |||||||
| 
 | 
 | ||||||
| logger = logging.getLogger('searx') | logger = logging.getLogger('searx') | ||||||
| OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss'] | OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss'] | ||||||
| LANGUAGE_CODES = ['all'] + list(l[0] for l in languages) | LANGUAGE_CODES = ['all', 'auto'] + list(l[0] for l in languages) | ||||||
| SIMPLE_STYLE = ('auto', 'light', 'dark') | SIMPLE_STYLE = ('auto', 'light', 'dark') | ||||||
| CATEGORIES_AS_TABS = { | CATEGORIES_AS_TABS = { | ||||||
|     'general': {}, |     'general': {}, | ||||||
|  | |||||||
| @ -1,5 +1,9 @@ | |||||||
| <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}} | <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}} | ||||||
| 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | ||||||
|  | 	<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}> | ||||||
|  | 		{{- _('Auto-detect') -}} | ||||||
|  | 		{%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%} | ||||||
|  | 	</option> | ||||||
| 	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} | 	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} | ||||||
| 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> | 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> | ||||||
| 		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %} | 		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %} | ||||||
|  | |||||||
| @ -116,12 +116,15 @@ | |||||||
|       <p class="value">{{- '' -}} |       <p class="value">{{- '' -}} | ||||||
|         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}} |         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}} | ||||||
|           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> |           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> | ||||||
|  |           <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }}</option> | ||||||
|           {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} |           {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} | ||||||
|           <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option> |           <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option> | ||||||
|           {%- endfor -%} |           {%- endfor -%} | ||||||
|         </select>{{- '' -}} |         </select>{{- '' -}} | ||||||
|       </p> |       </p> | ||||||
|       <div class="description" id="desc_language">{{ _('What language do you prefer for search?') }}</div> |       <div class="description" id="desc_language"> | ||||||
|  |         {{- _('What language do you prefer for search?') }} {{ _('Choose Auto-detect to let SearXNG detect the language of your query.') -}} | ||||||
|  |       </div> | ||||||
|     </fieldset> |     </fieldset> | ||||||
|     {% endif %} |     {% endif %} | ||||||
|     {% if 'autocomplete' not in locked_preferences %} |     {% if 'autocomplete' not in locked_preferences %} | ||||||
|  | |||||||
| @ -53,6 +53,9 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {} | |||||||
| _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None | _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None | ||||||
| """fasttext model to predict laguage of a search term""" | """fasttext model to predict laguage of a search term""" | ||||||
| 
 | 
 | ||||||
|  | SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes]) | ||||||
|  | """Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`).""" | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class _NotSetClass:  # pylint: disable=too-few-public-methods | class _NotSetClass:  # pylint: disable=too-few-public-methods | ||||||
|     """Internal class for this module, do not create instance of this class. |     """Internal class for this module, do not create instance of this class. | ||||||
| @ -637,11 +640,72 @@ def _get_fasttext_model() -> "fasttext.FastText._FastText": | |||||||
|     return _FASTTEXT_MODEL |     return _FASTTEXT_MODEL | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def detect_language(text: str, threshold: float = 0.3, min_probability: float = 0.5) -> Optional[str]: | def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]: | ||||||
|     """https://fasttext.cc/docs/en/language-identification.html""" |     """Detect the language of the ``text`` parameter. | ||||||
|  | 
 | ||||||
|  |     :param str text: The string whose language is to be detected. | ||||||
|  | 
 | ||||||
|  |     :param float threshold: Threshold filters the returned labels by a threshold | ||||||
|  |         on probability.  A choice of 0.3 will return labels with at least 0.3 | ||||||
|  |         probability. | ||||||
|  | 
 | ||||||
|  |     :param bool only_search_languages: If ``True``, returns only supported | ||||||
|  |         SearXNG search languages.  see :py:obj:`searx.languages` | ||||||
|  | 
 | ||||||
|  |     :rtype: str, None | ||||||
|  |     :returns: | ||||||
|  |         The detected language code or ``None``. See below. | ||||||
|  | 
 | ||||||
|  |     :raises ValueError: If ``text`` is not a string. | ||||||
|  | 
 | ||||||
|  |     The language detection is done by using `a fork`_ of the fastText_ library | ||||||
|  |     (`python fasttext`_). fastText_ distributes the `language identification | ||||||
|  |     model`_, for reference: | ||||||
|  | 
 | ||||||
|  |     - `FastText.zip: Compressing text classification models`_ | ||||||
|  |     - `Bag of Tricks for Efficient Text Classification`_ | ||||||
|  | 
 | ||||||
|  |     The `language identification model`_ support the language codes | ||||||
|  |     (ISO-639-3):: | ||||||
|  | 
 | ||||||
|  |         af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs | ||||||
|  |         bxr ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es | ||||||
|  |         et eu fa fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia | ||||||
|  |         id ie ilo io is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li | ||||||
|  |         lmo lo lrc lt lv mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah | ||||||
|  |         nap nds ne new nl nn no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru | ||||||
|  |         rue sa sah sc scn sco sd sh si sk sl so sq sr su sv sw ta te tg th tk tl | ||||||
|  |         tr tt tyv ug uk ur uz vec vep vi vls vo wa war wuu xal xmf yi yo yue zh | ||||||
|  | 
 | ||||||
|  |     By using ``only_search_languages=True`` the `language identification model`_ | ||||||
|  |     is harmonized with the SearXNG's language (locale) model.  General | ||||||
|  |     conditions of SearXNG's locale model are: | ||||||
|  | 
 | ||||||
|  |     a. SearXNG's locale of a query is passed to the | ||||||
|  |        :py:obj:`searx.locales.get_engine_locale` to get a language and/or region | ||||||
|  |        code that is used by an engine. | ||||||
|  | 
 | ||||||
|  |     b. Most of SearXNG's engines do not support all the languages from `language | ||||||
|  |        identification model`_ and there is also a discrepancy in the ISO-639-3 | ||||||
|  |        (fastext) and ISO-639-2 (SearXNG)handling.  Further more, in SearXNG the | ||||||
|  |        locales like ``zh-TH`` (``zh-CN``) are mapped to ``zh_Hant`` | ||||||
|  |        (``zh_Hans``) while the `language identification model`_ reduce both to | ||||||
|  |        ``zh``. | ||||||
|  | 
 | ||||||
|  |     .. _a fork: https://github.com/searxng/fasttext-predict | ||||||
|  |     .. _fastText: https://fasttext.cc/ | ||||||
|  |     .. _python fasttext: https://pypi.org/project/fasttext/ | ||||||
|  |     .. _language identification model: https://fasttext.cc/docs/en/language-identification.html | ||||||
|  |     .. _Bag of Tricks for Efficient Text Classification: https://arxiv.org/abs/1607.01759 | ||||||
|  |     .. _`FastText.zip: Compressing text classification models`: https://arxiv.org/abs/1612.03651 | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|     if not isinstance(text, str): |     if not isinstance(text, str): | ||||||
|         raise ValueError('text must a str') |         raise ValueError('text must a str') | ||||||
|     r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold) |     r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold) | ||||||
|     if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0 and r[1][0] > min_probability: |     if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0: | ||||||
|         return r[0][0].split('__label__')[1] |         language = r[0][0].split('__label__')[1] | ||||||
|  |         if only_search_languages and language not in SEARCH_LANGUAGE_CODES: | ||||||
|  |             return None | ||||||
|  |         return language | ||||||
|     return None |     return None | ||||||
|  | |||||||
| @ -63,7 +63,7 @@ def parse_lang(preferences: Preferences, form: Dict[str, str], raw_text_query: R | |||||||
|         query_lang = preferences.get_value('language') |         query_lang = preferences.get_value('language') | ||||||
| 
 | 
 | ||||||
|     # check language |     # check language | ||||||
|     if not VALID_LANGUAGE_CODE.match(query_lang): |     if not VALID_LANGUAGE_CODE.match(query_lang) and query_lang != 'auto': | ||||||
|         raise SearxParameterException('language', query_lang) |         raise SearxParameterException('language', query_lang) | ||||||
| 
 | 
 | ||||||
|     return query_lang |     return query_lang | ||||||
|  | |||||||
| @ -810,6 +810,9 @@ def search(): | |||||||
|         ) |         ) | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|  |     # search_query.lang contains the user choice (all, auto, en, ...) | ||||||
|  |     # when the user choice is "auto", search.search_query.lang contains the detected language | ||||||
|  |     # otherwise it is equals to search_query.lang | ||||||
|     return render( |     return render( | ||||||
|         # fmt: off |         # fmt: off | ||||||
|         'results.html', |         'results.html', | ||||||
| @ -834,6 +837,11 @@ def search(): | |||||||
|             settings['search']['languages'], |             settings['search']['languages'], | ||||||
|             fallback=request.preferences.get_value("language") |             fallback=request.preferences.get_value("language") | ||||||
|         ), |         ), | ||||||
|  |         search_language = match_language( | ||||||
|  |             search.search_query.lang, | ||||||
|  |             settings['search']['languages'], | ||||||
|  |             fallback=request.preferences.get_value("language") | ||||||
|  |         ), | ||||||
|         timeout_limit = request.form.get('timeout_limit', None) |         timeout_limit = request.form.get('timeout_limit', None) | ||||||
|         # fmt: on |         # fmt: on | ||||||
|     ) |     ) | ||||||
|  | |||||||
| @ -91,6 +91,17 @@ class TestLanguageParser(SearxTestCase): | |||||||
|         self.assertIn('all', query.languages) |         self.assertIn('all', query.languages) | ||||||
|         self.assertFalse(query.specific) |         self.assertFalse(query.specific) | ||||||
| 
 | 
 | ||||||
|  |     def test_auto_language_code(self): | ||||||
|  |         language = 'auto' | ||||||
|  |         query_text = 'una consulta' | ||||||
|  |         full_query = ':' + language + ' ' + query_text | ||||||
|  |         query = RawTextQuery(full_query, []) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(query.getFullQuery(), full_query) | ||||||
|  |         self.assertEqual(len(query.query_parts), 1) | ||||||
|  |         self.assertIn('auto', query.languages) | ||||||
|  |         self.assertFalse(query.specific) | ||||||
|  | 
 | ||||||
|     def test_invalid_language_code(self): |     def test_invalid_language_code(self): | ||||||
|         language = 'not_a_language' |         language = 'not_a_language' | ||||||
|         query_text = 'the query' |         query_text = 'the query' | ||||||
|  | |||||||
| @ -1,5 +1,7 @@ | |||||||
| # -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||||
| 
 | 
 | ||||||
|  | from copy import copy | ||||||
|  | 
 | ||||||
| import searx.search | import searx.search | ||||||
| from searx.search import SearchQuery, EngineRef | from searx.search import SearchQuery, EngineRef | ||||||
| from searx import settings | from searx import settings | ||||||
| @ -34,6 +36,11 @@ class SearchQueryTestCase(SearxTestCase): | |||||||
|         self.assertEqual(s, s) |         self.assertEqual(s, s) | ||||||
|         self.assertNotEqual(s, t) |         self.assertNotEqual(s, t) | ||||||
| 
 | 
 | ||||||
|  |     def test_copy(self): | ||||||
|  |         s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, None, None, None) | ||||||
|  |         t = copy(s) | ||||||
|  |         self.assertEqual(s, t) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class SearchTestCase(SearxTestCase): | class SearchTestCase(SearxTestCase): | ||||||
|     @classmethod |     @classmethod | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user