mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	Merge pull request #2246 from dalf/mod-searx-data
[mod] Add searx.data module
This commit is contained in:
		
						commit
						8b278cbfad
					
				
							
								
								
									
										21
									
								
								searx/data/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								searx/data/__init__.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,21 @@
 | 
				
			|||||||
 | 
					import json
 | 
				
			||||||
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
 | 
				
			||||||
 | 
					data_dir = Path(__file__).parent
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def load(filename):
 | 
				
			||||||
 | 
					    # add str(...) for Python 3.5
 | 
				
			||||||
 | 
					    with open(str(data_dir / filename), encoding='utf-8') as fd:
 | 
				
			||||||
 | 
					        return json.load(fd)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def bangs_loader():
 | 
				
			||||||
 | 
					    return load('bangs.json')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ENGINES_LANGUAGES = load('engines_languages.json')
 | 
				
			||||||
 | 
					CURRENCIES = load('currencies.json')
 | 
				
			||||||
 | 
					USER_AGENTS = load('useragents.json')
 | 
				
			||||||
@ -19,13 +19,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 | 
				
			|||||||
import sys
 | 
					import sys
 | 
				
			||||||
import threading
 | 
					import threading
 | 
				
			||||||
from os.path import realpath, dirname
 | 
					from os.path import realpath, dirname
 | 
				
			||||||
from io import open
 | 
					 | 
				
			||||||
from babel.localedata import locale_identifiers
 | 
					from babel.localedata import locale_identifiers
 | 
				
			||||||
from flask_babel import gettext
 | 
					from flask_babel import gettext
 | 
				
			||||||
from operator import itemgetter
 | 
					from operator import itemgetter
 | 
				
			||||||
from json import loads
 | 
					 | 
				
			||||||
from searx import settings
 | 
					from searx import settings
 | 
				
			||||||
from searx import logger
 | 
					from searx import logger
 | 
				
			||||||
 | 
					from searx.data import ENGINES_LANGUAGES
 | 
				
			||||||
from searx.poolrequests import get
 | 
					from searx.poolrequests import get
 | 
				
			||||||
from searx.utils import load_module, match_language, get_engine_from_settings
 | 
					from searx.utils import load_module, match_language, get_engine_from_settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -38,7 +37,6 @@ engines = {}
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
categories = {'general': []}
 | 
					categories = {'general': []}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
 | 
					 | 
				
			||||||
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
 | 
					babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
 | 
				
			||||||
               for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
 | 
					               for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -108,8 +106,8 @@ def load_engine(engine_data):
 | 
				
			|||||||
            sys.exit(1)
 | 
					            sys.exit(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # assign supported languages from json file
 | 
					    # assign supported languages from json file
 | 
				
			||||||
    if engine_data['name'] in languages:
 | 
					    if engine_data['name'] in ENGINES_LANGUAGES:
 | 
				
			||||||
        setattr(engine, 'supported_languages', languages[engine_data['name']])
 | 
					        setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # find custom aliases for non standard language codes
 | 
					    # find custom aliases for non standard language codes
 | 
				
			||||||
    if hasattr(engine, 'supported_languages'):
 | 
					    if hasattr(engine, 'supported_languages'):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,11 +1,11 @@
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import unicodedata
 | 
					import unicodedata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from io import open
 | 
					 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from searx.data import CURRENCIES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
categories = []
 | 
					categories = []
 | 
				
			||||||
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
 | 
					url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
 | 
				
			||||||
@ -13,8 +13,6 @@ weight = 100
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
 | 
					parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
db = 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def normalize_name(name):
 | 
					def normalize_name(name):
 | 
				
			||||||
    name = name.lower().replace('-', ' ').rstrip('s')
 | 
					    name = name.lower().replace('-', ' ').rstrip('s')
 | 
				
			||||||
@ -23,17 +21,17 @@ def normalize_name(name):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def name_to_iso4217(name):
 | 
					def name_to_iso4217(name):
 | 
				
			||||||
    global db
 | 
					    global CURRENCIES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    name = normalize_name(name)
 | 
					    name = normalize_name(name)
 | 
				
			||||||
    currencies = db['names'].get(name, [name])
 | 
					    currency = CURRENCIES['names'].get(name, [name])
 | 
				
			||||||
    return currencies[0]
 | 
					    return currency[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def iso4217_to_name(iso4217, language):
 | 
					def iso4217_to_name(iso4217, language):
 | 
				
			||||||
    global db
 | 
					    global CURRENCIES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return db['iso4217'].get(iso4217, {}).get(language, iso4217)
 | 
					    return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
@ -82,15 +80,3 @@ def response(resp):
 | 
				
			|||||||
    results.append({'answer': answer, 'url': url})
 | 
					    results.append({'answer': answer, 'url': url})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def load():
 | 
					 | 
				
			||||||
    global db
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    current_dir = os.path.dirname(os.path.realpath(__file__))
 | 
					 | 
				
			||||||
    json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    db = json.loads(json_data)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
load()
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,4 @@
 | 
				
			|||||||
import json
 | 
					from searx.data import bangs_loader
 | 
				
			||||||
from os.path import join
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from searx import searx_dir
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# bangs data coming from the following url convert to json with
 | 
					# bangs data coming from the following url convert to json with
 | 
				
			||||||
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
 | 
					# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
 | 
				
			||||||
@ -9,10 +6,9 @@ from searx import searx_dir
 | 
				
			|||||||
# NOTE only use the get_bang_url
 | 
					# NOTE only use the get_bang_url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bangs_data = {}
 | 
					bangs_data = {}
 | 
				
			||||||
with open(join(searx_dir, 'data/bangs.json'), encoding='utf-8') as json_file:
 | 
					for bang in bangs_loader()['bang']:
 | 
				
			||||||
    for bang in json.load(json_file)['bang']:
 | 
					    for trigger in bang["triggers"]:
 | 
				
			||||||
        for trigger in bang["triggers"]:
 | 
					        bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
 | 
				
			||||||
            bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_bang_url(search_query):
 | 
					def get_bang_url(search_query):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,13 +1,10 @@
 | 
				
			|||||||
# -*- coding: utf-8 -*-
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import json
 | 
					 | 
				
			||||||
import importlib
 | 
					import importlib
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from numbers import Number
 | 
					from numbers import Number
 | 
				
			||||||
from os.path import splitext, join
 | 
					from os.path import splitext, join
 | 
				
			||||||
from io import open
 | 
					 | 
				
			||||||
from random import choice
 | 
					from random import choice
 | 
				
			||||||
from html.parser import HTMLParser
 | 
					from html.parser import HTMLParser
 | 
				
			||||||
from urllib.parse import urljoin, urlparse, unquote
 | 
					from urllib.parse import urljoin, urlparse, unquote
 | 
				
			||||||
@ -18,6 +15,7 @@ from babel.core import get_global
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx import settings
 | 
					from searx import settings
 | 
				
			||||||
 | 
					from searx.data import USER_AGENTS
 | 
				
			||||||
from searx.version import VERSION_STRING
 | 
					from searx.version import VERSION_STRING
 | 
				
			||||||
from searx.languages import language_codes
 | 
					from searx.languages import language_codes
 | 
				
			||||||
from searx import logger
 | 
					from searx import logger
 | 
				
			||||||
@ -31,9 +29,6 @@ blocked_tags = ('script',
 | 
				
			|||||||
ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
 | 
					ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
 | 
				
			||||||
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
 | 
					ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
 | 
					 | 
				
			||||||
                             + "/data/useragents.json", 'r', encoding='utf-8').read())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xpath_cache = dict()
 | 
					xpath_cache = dict()
 | 
				
			||||||
lang_to_lc_cache = dict()
 | 
					lang_to_lc_cache = dict()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -50,7 +45,7 @@ def gen_useragent(os=None):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    See searx/data/useragents.json
 | 
					    See searx/data/useragents.json
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
 | 
					    return str(USER_AGENTS['ua'].format(os=os or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions'])))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class HTMLTextExtractorException(Exception):
 | 
					class HTMLTextExtractorException(Exception):
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user