mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 07:49:02 -04:00 
			
		
		
		
	Merge pull request #683 from return42/fix-doc
Document & Pylint scripts in searxng_extra/update
This commit is contained in:
		
						commit
						160f3e022e
					
				| @ -1,14 +1,15 @@ | ||||
| .. _searxng_extra: | ||||
| 
 | ||||
| ====================================================== | ||||
| Tooling box ``searxng_extra`` for developers and users | ||||
| ====================================================== | ||||
| ============================= | ||||
| Tooling box ``searxng_extra`` | ||||
| ============================= | ||||
| 
 | ||||
| In the folder :origin:`searxng_extra/` we maintain some tools useful for | ||||
| In the folder :origin:`searxng_extra/` we maintain some tools useful for CI and | ||||
| developers. | ||||
| 
 | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|    :caption: Contents | ||||
| 
 | ||||
|    update | ||||
|    standalone_searx.py | ||||
|  | ||||
							
								
								
									
										88
									
								
								docs/dev/searxng_extra/update.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								docs/dev/searxng_extra/update.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,88 @@ | ||||
| ========================= | ||||
| ``searxng_extra/update/`` | ||||
| ========================= | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/__init__.py>` | ||||
| 
 | ||||
| Scripts to update static data in :origin:`searx/data/` | ||||
| 
 | ||||
| .. _update_ahmia_blacklist.py: | ||||
| 
 | ||||
| ``update_ahmia_blacklist.py`` | ||||
| ============================= | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_ahmia_blacklist.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_ahmia_blacklist | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_currencies.py`` | ||||
| ======================== | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_currencies.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_currencies | ||||
|   :members: | ||||
| 
 | ||||
| ``update_engine_descriptions.py`` | ||||
| ================================= | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_engine_descriptions.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_engine_descriptions | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_external_bangs.py`` | ||||
| ============================ | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_external_bangs.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_external_bangs | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_firefox_version.py`` | ||||
| ============================= | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_firefox_version.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_firefox_version | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_languages.py`` | ||||
| ======================= | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_languages.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_languages | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_osm_keys_tags.py`` | ||||
| =========================== | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_osm_keys_tags.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_osm_keys_tags | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_pygments.py`` | ||||
| ====================== | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_pygments.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_pygments | ||||
|   :members: | ||||
| 
 | ||||
| 
 | ||||
| ``update_wikidata_units.py`` | ||||
| ============================ | ||||
| 
 | ||||
| :origin:`[source] <searxng_extra/update/update_wikidata_units.py>` | ||||
| 
 | ||||
| .. automodule:: searxng_extra.update.update_wikidata_units | ||||
|   :members: | ||||
| @ -1,10 +1,15 @@ | ||||
| #!/usr/bin/env python | ||||
| # lint: pylint | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """This script saves `Ahmia's blacklist`_ for onion sites. | ||||
| 
 | ||||
| # This script saves Ahmia's blacklist for onion sites. | ||||
| # More info in https://ahmia.fi/blacklist/ | ||||
| Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data | ||||
| ...  <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| .. _Ahmia's blacklist: https://ahmia.fi/blacklist/ | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| # set path | ||||
| from os.path import join | ||||
| 
 | ||||
| import requests | ||||
| @ -17,15 +22,14 @@ def fetch_ahmia_blacklist(): | ||||
|     resp = requests.get(URL, timeout=3.0) | ||||
|     if resp.status_code != 200: | ||||
|         raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) | ||||
|     else: | ||||
|         blacklist = resp.text.split() | ||||
|         return blacklist | ||||
|     return resp.text.split() | ||||
| 
 | ||||
| 
 | ||||
| def get_ahmia_blacklist_filename(): | ||||
|     return join(join(searx_dir, "data"), "ahmia_blacklist.txt") | ||||
| 
 | ||||
| 
 | ||||
| blacklist = fetch_ahmia_blacklist() | ||||
| with open(get_ahmia_blacklist_filename(), "w") as f: | ||||
| if __name__ == '__main__': | ||||
|     blacklist = fetch_ahmia_blacklist() | ||||
|     with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f: | ||||
|         f.write('\n'.join(blacklist)) | ||||
|  | ||||
| @ -1,13 +1,22 @@ | ||||
| #!/usr/bin/env python | ||||
| # lint: pylint | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| 
 | ||||
| """Fetch currencies from :origin:`searx/engines/wikidata.py` engine. | ||||
| 
 | ||||
| Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ... | ||||
| <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| # pylint: disable=invalid-name | ||||
| 
 | ||||
| import re | ||||
| import unicodedata | ||||
| import json | ||||
| 
 | ||||
| # set path | ||||
| from sys import path | ||||
| from os.path import realpath, dirname, join | ||||
| from os.path import join | ||||
| 
 | ||||
| from searx import searx_dir | ||||
| from searx.locales import LOCALE_NAMES | ||||
|  | ||||
| @ -1,6 +1,16 @@ | ||||
| #!/usr/bin/env python | ||||
| # lint: pylint | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| 
 | ||||
| """Fetch website description from websites and from | ||||
| :origin:`searx/engines/wikidata.py` engine. | ||||
| 
 | ||||
| Output file: :origin:`searx/data/engine_descriptions.json`. | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| # pylint: disable=invalid-name, global-statement | ||||
| 
 | ||||
| import json | ||||
| from urllib.parse import urlparse | ||||
| from os.path import join | ||||
| @ -102,7 +112,7 @@ def get_wikipedia_summary(lang, pageid): | ||||
|         response.raise_for_status() | ||||
|         api_result = json.loads(response.text) | ||||
|         return api_result.get('extract') | ||||
|     except: | ||||
|     except Exception:  # pylint: disable=broad-except | ||||
|         return None | ||||
| 
 | ||||
| 
 | ||||
| @ -134,7 +144,7 @@ def get_website_description(url, lang1, lang2=None): | ||||
|     try: | ||||
|         response = searx.network.get(url, headers=headers, timeout=10) | ||||
|         response.raise_for_status() | ||||
|     except Exception: | ||||
|     except Exception:  # pylint: disable=broad-except | ||||
|         return (None, None) | ||||
| 
 | ||||
|     try: | ||||
|  | ||||
| @ -1,17 +1,20 @@ | ||||
| #!/usr/bin/env python | ||||
| # lint: pylint | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Update searx/data/external_bangs.json using the duckduckgo bangs. | ||||
| """Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs | ||||
| (:origin:`CI Update data ... <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| https://duckduckgo.com/newbang loads: | ||||
| 
 | ||||
| https://duckduckgo.com/newbang loads | ||||
| * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js ) | ||||
| * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example ) | ||||
| 
 | ||||
| This script loads the javascript, then the bangs. | ||||
| 
 | ||||
| The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ), | ||||
| but most probably it will requires to update RE_BANG_VERSION | ||||
| The javascript URL may change in the future ( for example | ||||
| https://duckduckgo.com/bv2.js ), but most probably it will requires to update | ||||
| RE_BANG_VERSION | ||||
| 
 | ||||
| """ | ||||
| # pylint: disable=C0116 | ||||
| 
 | ||||
|  | ||||
| @ -1,21 +1,30 @@ | ||||
| #!/usr/bin/env python | ||||
| # lint: pylint | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| 
 | ||||
| """Fetch firefox useragent signatures | ||||
| 
 | ||||
| Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ... | ||||
| <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| import json | ||||
| import requests | ||||
| import re | ||||
| from os.path import dirname, join | ||||
| from os.path import join | ||||
| from urllib.parse import urlparse, urljoin | ||||
| from distutils.version import LooseVersion, StrictVersion | ||||
| from distutils.version import LooseVersion | ||||
| 
 | ||||
| import requests | ||||
| from lxml import html | ||||
| from searx import searx_dir | ||||
| 
 | ||||
| URL = 'https://ftp.mozilla.org/pub/firefox/releases/' | ||||
| RELEASE_PATH = '/pub/firefox/releases/' | ||||
| 
 | ||||
| NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$') | ||||
| # BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$') | ||||
| # ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$') | ||||
| NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$') | ||||
| # BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$') | ||||
| # ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$') | ||||
| 
 | ||||
| # | ||||
| useragents = { | ||||
| @ -32,7 +41,6 @@ def fetch_firefox_versions(): | ||||
|     resp = requests.get(URL, timeout=2.0) | ||||
|     if resp.status_code != 200: | ||||
|         raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) | ||||
|     else: | ||||
|     dom = html.fromstring(resp.text) | ||||
|     versions = [] | ||||
| 
 | ||||
| @ -66,6 +74,7 @@ def get_useragents_filename(): | ||||
|     return join(join(searx_dir, "data"), "useragents.json") | ||||
| 
 | ||||
| 
 | ||||
| useragents["versions"] = fetch_firefox_last_versions() | ||||
| with open(get_useragents_filename(), "w") as f: | ||||
| if __name__ == '__main__': | ||||
|     useragents["versions"] = fetch_firefox_last_versions() | ||||
|     with open(get_useragents_filename(), "w", encoding='utf-8') as f: | ||||
|         json.dump(useragents, f, indent=4, ensure_ascii=False) | ||||
|  | ||||
| @ -1,9 +1,17 @@ | ||||
| #!/usr/bin/env python | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| 
 | ||||
| # This script generates languages.py from intersecting each engine's supported languages. | ||||
| # | ||||
| # Output files: searx/data/engines_languages.json and searx/languages.py | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """This script generates languages.py from intersecting each engine's supported | ||||
| languages. | ||||
| 
 | ||||
| Output files: :origin:`searx/data/engines_languages.json` and | ||||
| :origin:`searx/languages.py` (:origin:`CI Update data ... | ||||
| <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| # pylint: disable=invalid-name | ||||
| 
 | ||||
| import json | ||||
| from pathlib import Path | ||||
| @ -24,7 +32,7 @@ languages_file = Path(searx_dir) / 'languages.py' | ||||
| def fetch_supported_languages(): | ||||
|     set_timeout_for_thread(10.0) | ||||
| 
 | ||||
|     engines_languages = dict() | ||||
|     engines_languages = {} | ||||
|     names = list(engines) | ||||
|     names.sort() | ||||
| 
 | ||||
| @ -32,7 +40,7 @@ def fetch_supported_languages(): | ||||
|         if hasattr(engines[engine_name], 'fetch_supported_languages'): | ||||
|             engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() | ||||
|             print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) | ||||
|             if type(engines_languages[engine_name]) == list: | ||||
|             if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck | ||||
|                 engines_languages[engine_name] = sorted(engines_languages[engine_name]) | ||||
| 
 | ||||
|     print("fetched languages from %s engines" % len(engines_languages)) | ||||
| @ -55,7 +63,7 @@ def get_locale(lang_code): | ||||
| 
 | ||||
| # Join all language lists. | ||||
| def join_language_lists(engines_languages): | ||||
|     language_list = dict() | ||||
|     language_list = {} | ||||
|     for engine_name in engines_languages: | ||||
|         for lang_code in engines_languages[engine_name]: | ||||
| 
 | ||||
| @ -91,7 +99,7 @@ def join_language_lists(engines_languages): | ||||
|                     'name': language_name, | ||||
|                     'english_name': english_name, | ||||
|                     'counter': set(), | ||||
|                     'countries': dict(), | ||||
|                     'countries': {}, | ||||
|                 } | ||||
| 
 | ||||
|             # add language with country if not in list | ||||
| @ -119,6 +127,7 @@ def join_language_lists(engines_languages): | ||||
| def filter_language_list(all_languages): | ||||
|     min_engines_per_lang = 13 | ||||
|     min_engines_per_country = 7 | ||||
|     # pylint: disable=consider-using-dict-items, consider-iterating-dictionary | ||||
|     main_engines = [ | ||||
|         engine_name | ||||
|         for engine_name in engines.keys() | ||||
| @ -138,7 +147,7 @@ def filter_language_list(all_languages): | ||||
|     } | ||||
| 
 | ||||
|     def _copy_lang_data(lang, country_name=None): | ||||
|         new_dict = dict() | ||||
|         new_dict = {} | ||||
|         new_dict['name'] = all_languages[lang]['name'] | ||||
|         new_dict['english_name'] = all_languages[lang]['english_name'] | ||||
|         if country_name: | ||||
| @ -146,10 +155,10 @@ def filter_language_list(all_languages): | ||||
|         return new_dict | ||||
| 
 | ||||
|     # for each language get country codes supported by most engines or at least one country code | ||||
|     filtered_languages_with_countries = dict() | ||||
|     filtered_languages_with_countries = {} | ||||
|     for lang, lang_data in filtered_languages.items(): | ||||
|         countries = lang_data['countries'] | ||||
|         filtered_countries = dict() | ||||
|         filtered_countries = {} | ||||
| 
 | ||||
|         # get language's country codes with enough supported engines | ||||
|         for lang_country, country_data in countries.items(): | ||||
| @ -211,7 +220,7 @@ def write_languages_file(languages): | ||||
| 
 | ||||
|     language_codes = tuple(language_codes) | ||||
| 
 | ||||
|     with open(languages_file, 'w') as new_file: | ||||
|     with open(languages_file, 'w', encoding='utf-8') as new_file: | ||||
|         file_content = "{file_headers} {language_codes},\n)\n".format( | ||||
|             # fmt: off | ||||
|             file_headers = '\n'.join(file_headers), | ||||
| @ -224,7 +233,7 @@ def write_languages_file(languages): | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     load_engines(settings['engines']) | ||||
|     engines_languages = fetch_supported_languages() | ||||
|     all_languages = join_language_lists(engines_languages) | ||||
|     filtered_languages = filter_language_list(all_languages) | ||||
|     write_languages_file(filtered_languages) | ||||
|     _engines_languages = fetch_supported_languages() | ||||
|     _all_languages = join_language_lists(_engines_languages) | ||||
|     _filtered_languages = filter_language_list(_all_languages) | ||||
|     write_languages_file(_filtered_languages) | ||||
|  | ||||
| @ -5,7 +5,10 @@ | ||||
| 
 | ||||
| To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for | ||||
| example `OSM tags API`_ (sidenote: the actual change log from | ||||
| map.atownsend.org.uk_ might be useful to normalize OSM tags) | ||||
| map.atownsend.org.uk_ might be useful to normalize OSM tags). | ||||
| 
 | ||||
| Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ... | ||||
| <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| .. _Wikidata Query Service: https://query.wikidata.org/ | ||||
| .. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc | ||||
|  | ||||
| @ -3,6 +3,13 @@ | ||||
| # lint: pylint | ||||
| # pylint: disable=missing-module-docstring | ||||
| 
 | ||||
| """Fetch units from :origin:`searx/engines/wikidata.py` engine. | ||||
| 
 | ||||
| Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data | ||||
| ...  <.github/workflows/data-update.yml>`). | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| import json | ||||
| import collections | ||||
| 
 | ||||
| @ -54,5 +61,6 @@ def get_wikidata_units_filename(): | ||||
|     return join(join(searx_dir, "data"), "wikidata_units.json") | ||||
| 
 | ||||
| 
 | ||||
| with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: | ||||
| if __name__ == '__main__': | ||||
|     with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: | ||||
|         json.dump(get_data(), f, indent=4, ensure_ascii=False) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user