mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 02:27:06 -04:00 
			
		
		
		
	[mod] multithreading only in searx.search.* packages
it prepares the new architecture change, everything about multithreading in moved in the searx.search.* packages previously the call to the "init" function of the engines was done in searx.engines: * the network was not set (request not sent using the defined proxy) * it requires to monkey patch the code to avoid HTTP requests during the tests
This commit is contained in:
		
							parent
							
								
									d36adfa59f
								
							
						
					
					
						commit
						8c1a65d32f
					
				| @ -167,26 +167,3 @@ def load_engines(engine_list): | |||||||
|         if engine is not None: |         if engine is not None: | ||||||
|             engines[engine.name] = engine |             engines[engine.name] = engine | ||||||
|     return engines |     return engines | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def initialize_engines(engine_list): |  | ||||||
|     load_engines(engine_list) |  | ||||||
|     initialize_network(engine_list, settings['outgoing']) |  | ||||||
| 
 |  | ||||||
|     def engine_init(engine_name, init_fn): |  | ||||||
|         try: |  | ||||||
|             set_context_network_name(engine_name) |  | ||||||
|             init_fn(get_engine_from_settings(engine_name)) |  | ||||||
|         except SearxEngineResponseException as exc: |  | ||||||
|             logger.warn('%s engine: Fail to initialize // %s', engine_name, exc) |  | ||||||
|         except Exception: |  | ||||||
|             logger.exception('%s engine: Fail to initialize', engine_name) |  | ||||||
|         else: |  | ||||||
|             logger.debug('%s engine: Initialized', engine_name) |  | ||||||
| 
 |  | ||||||
|     for engine_name, engine in engines.items(): |  | ||||||
|         if hasattr(engine, 'init'): |  | ||||||
|             init_fn = getattr(engine, 'init') |  | ||||||
|             if init_fn: |  | ||||||
|                 logger.debug('%s engine: Starting background initialization', engine_name) |  | ||||||
|                 threading.Thread(target=engine_init, args=(engine_name, init_fn)).start() |  | ||||||
|  | |||||||
| @ -29,9 +29,11 @@ from searx.results import ResultContainer | |||||||
| from searx import logger | from searx import logger | ||||||
| from searx.plugins import plugins | from searx.plugins import plugins | ||||||
| from searx.search.models import EngineRef, SearchQuery | from searx.search.models import EngineRef, SearchQuery | ||||||
| from searx.search.processors import processors, initialize as initialize_processors | from searx.engines import load_engines | ||||||
| from searx.search.checker import initialize as initialize_checker | from searx.network import initialize as initialize_network | ||||||
| from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time | from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time | ||||||
|  | from searx.search.processors import PROCESSORS, initialize as initialize_processors | ||||||
|  | from searx.search.checker import initialize as initialize_checker | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('search') | logger = logger.getChild('search') | ||||||
| @ -50,8 +52,10 @@ else: | |||||||
| 
 | 
 | ||||||
| def initialize(settings_engines=None, enable_checker=False): | def initialize(settings_engines=None, enable_checker=False): | ||||||
|     settings_engines = settings_engines or settings['engines'] |     settings_engines = settings_engines or settings['engines'] | ||||||
|     initialize_processors(settings_engines) |     load_engines(settings_engines) | ||||||
|  |     initialize_network(settings_engines, settings['outgoing']) | ||||||
|     initialize_metrics([engine['name'] for engine in settings_engines]) |     initialize_metrics([engine['name'] for engine in settings_engines]) | ||||||
|  |     initialize_processors(settings_engines) | ||||||
|     if enable_checker: |     if enable_checker: | ||||||
|         initialize_checker() |         initialize_checker() | ||||||
| 
 | 
 | ||||||
| @ -106,7 +110,7 @@ class Search: | |||||||
| 
 | 
 | ||||||
|         # start search-reqest for all selected engines |         # start search-reqest for all selected engines | ||||||
|         for engineref in self.search_query.engineref_list: |         for engineref in self.search_query.engineref_list: | ||||||
|             processor = processors[engineref.name] |             processor = PROCESSORS[engineref.name] | ||||||
| 
 | 
 | ||||||
|             # stop the request now if the engine is suspend |             # stop the request now if the engine is suspend | ||||||
|             if processor.extend_container_if_suspended(self.result_container): |             if processor.extend_container_if_suspended(self.result_container): | ||||||
| @ -152,7 +156,7 @@ class Search: | |||||||
| 
 | 
 | ||||||
|         for engine_name, query, request_params in requests: |         for engine_name, query, request_params in requests: | ||||||
|             th = threading.Thread( |             th = threading.Thread( | ||||||
|                 target=processors[engine_name].search, |                 target=PROCESSORS[engine_name].search, | ||||||
|                 args=(query, request_params, self.result_container, self.start_time, self.actual_timeout), |                 args=(query, request_params, self.result_container, self.start_time, self.actual_timeout), | ||||||
|                 name=search_id, |                 name=search_id, | ||||||
|             ) |             ) | ||||||
|  | |||||||
| @ -8,7 +8,7 @@ import logging | |||||||
| 
 | 
 | ||||||
| import searx.search | import searx.search | ||||||
| import searx.search.checker | import searx.search.checker | ||||||
| from searx.search import processors | from searx.search import PROCESSORS | ||||||
| from searx.engines import engine_shortcuts | from searx.engines import engine_shortcuts | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -41,13 +41,13 @@ def iter_processor(engine_name_list): | |||||||
|     if len(engine_name_list) > 0: |     if len(engine_name_list) > 0: | ||||||
|         for name in engine_name_list: |         for name in engine_name_list: | ||||||
|             name = engine_shortcuts.get(name, name) |             name = engine_shortcuts.get(name, name) | ||||||
|             processor = processors.get(name) |             processor = PROCESSORS.get(name) | ||||||
|             if processor is not None: |             if processor is not None: | ||||||
|                 yield name, processor |                 yield name, processor | ||||||
|             else: |             else: | ||||||
|                 stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}') |                 stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}') | ||||||
|     else: |     else: | ||||||
|         for name, processor in searx.search.processors.items(): |         for name, processor in searx.search.PROCESSORS.items(): | ||||||
|             yield name, processor |             yield name, processor | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -9,7 +9,7 @@ import signal | |||||||
| 
 | 
 | ||||||
| from searx import logger, settings, searx_debug | from searx import logger, settings, searx_debug | ||||||
| from searx.exceptions import SearxSettingsException | from searx.exceptions import SearxSettingsException | ||||||
| from searx.search.processors import processors | from searx.search.processors import PROCESSORS | ||||||
| from searx.search.checker import Checker | from searx.search.checker import Checker | ||||||
| from searx.shared import schedule, storage | from searx.shared import schedule, storage | ||||||
| 
 | 
 | ||||||
| @ -55,7 +55,7 @@ def run(): | |||||||
|             'status': 'ok', |             'status': 'ok', | ||||||
|             'engines': {} |             'engines': {} | ||||||
|         } |         } | ||||||
|         for name, processor in processors.items(): |         for name, processor in PROCESSORS.items(): | ||||||
|             logger.debug('Checking %s engine', name) |             logger.debug('Checking %s engine', name) | ||||||
|             checker = Checker(processor) |             checker = Checker(processor) | ||||||
|             checker.run() |             checker.run() | ||||||
|  | |||||||
| @ -11,9 +11,11 @@ __all__ = [ | |||||||
|     'OnlineProcessor', |     'OnlineProcessor', | ||||||
|     'OnlineDictionaryProcessor', |     'OnlineDictionaryProcessor', | ||||||
|     'OnlineCurrencyProcessor', |     'OnlineCurrencyProcessor', | ||||||
|     'processors', |     'PROCESSORS', | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
|  | import threading | ||||||
|  | 
 | ||||||
| from searx import logger | from searx import logger | ||||||
| import searx.engines as engines | import searx.engines as engines | ||||||
| 
 | 
 | ||||||
| @ -24,7 +26,7 @@ from .online_currency import OnlineCurrencyProcessor | |||||||
| from .abstract import EngineProcessor | from .abstract import EngineProcessor | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('search.processors') | logger = logger.getChild('search.processors') | ||||||
| processors = {} | PROCESSORS = {} | ||||||
| """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" | """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" | ||||||
| 
 | 
 | ||||||
| def get_processor_class(engine_type): | def get_processor_class(engine_type): | ||||||
| @ -34,6 +36,7 @@ def get_processor_class(engine_type): | |||||||
|             return c |             return c | ||||||
|     return None |     return None | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| def get_processor(engine, engine_name): | def get_processor(engine, engine_name): | ||||||
|     """Return processor instance that fits to ``engine.engine.type``)""" |     """Return processor instance that fits to ``engine.engine.type``)""" | ||||||
|     engine_type = getattr(engine, 'engine_type', 'online') |     engine_type = getattr(engine, 'engine_type', 'online') | ||||||
| @ -42,12 +45,26 @@ def get_processor(engine, engine_name): | |||||||
|         return processor_class(engine, engine_name) |         return processor_class(engine, engine_name) | ||||||
|     return None |     return None | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | def initialize_processor(processor): | ||||||
|  |     """Initialize one processor | ||||||
|  | 
 | ||||||
|  |     Call the init function of the engine | ||||||
|  |     """ | ||||||
|  |     if processor.has_initialize_function: | ||||||
|  |         t = threading.Thread(target=processor.initialize, daemon=True) | ||||||
|  |         t.start() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def initialize(engine_list): | def initialize(engine_list): | ||||||
|     """Initialize all engines and store a processor for each engine in :py:obj:`processors`.""" |     """Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`.""" | ||||||
|     engines.initialize_engines(engine_list) |     for engine_data in engine_list: | ||||||
|     for engine_name, engine in engines.engines.items(): |         engine_name = engine_data['name'] | ||||||
|  |         engine = engines.engines.get(engine_name) | ||||||
|  |         if engine: | ||||||
|             processor = get_processor(engine, engine_name) |             processor = get_processor(engine, engine_name) | ||||||
|  |             initialize_processor(processor) | ||||||
|             if processor is None: |             if processor is None: | ||||||
|                 logger.error('Error get processor for engine %s', engine_name) |                 logger.error('Error get processor for engine %s', engine_name) | ||||||
|             else: |             else: | ||||||
|             processors[engine_name] = processor |                 PROCESSORS[engine_name] = processor | ||||||
|  | |||||||
| @ -13,7 +13,8 @@ from searx import logger | |||||||
| from searx.engines import settings | from searx.engines import settings | ||||||
| from searx.network import get_time_for_thread, get_network | from searx.network import get_time_for_thread, get_network | ||||||
| from searx.metrics import histogram_observe, counter_inc, count_exception, count_error | from searx.metrics import histogram_observe, counter_inc, count_exception, count_error | ||||||
| from searx.exceptions import SearxEngineAccessDeniedException | from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException | ||||||
|  | from searx.utils import get_engine_from_settings | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('searx.search.processor') | logger = logger.getChild('searx.search.processor') | ||||||
| SUSPENDED_STATUS = {} | SUSPENDED_STATUS = {} | ||||||
| @ -66,6 +67,20 @@ class EngineProcessor(ABC): | |||||||
|         key = id(key) if key else self.engine_name |         key = id(key) if key else self.engine_name | ||||||
|         self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) |         self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) | ||||||
| 
 | 
 | ||||||
|  |     def initialize(self): | ||||||
|  |         try: | ||||||
|  |             self.engine.init(get_engine_from_settings(self.engine_name)) | ||||||
|  |         except SearxEngineResponseException as exc: | ||||||
|  |             logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc) | ||||||
|  |         except Exception:  # pylint: disable=broad-except | ||||||
|  |             logger.exception('%s engine: Fail to initialize', self.engine_name) | ||||||
|  |         else: | ||||||
|  |             logger.debug('%s engine: Initialized', self.engine_name) | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def has_initialize_function(self): | ||||||
|  |         return hasattr(self.engine, 'init') | ||||||
|  | 
 | ||||||
|     def handle_exception(self, result_container, exception_or_message, suspend=False): |     def handle_exception(self, result_container, exception_or_message, suspend=False): | ||||||
|         # update result_container |         # update result_container | ||||||
|         if isinstance(exception_or_message, BaseException): |         if isinstance(exception_or_message, BaseException): | ||||||
|  | |||||||
| @ -5,7 +5,7 @@ | |||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from time import time | from timeit import default_timer | ||||||
| import asyncio | import asyncio | ||||||
| import httpx | import httpx | ||||||
| 
 | 
 | ||||||
| @ -40,6 +40,15 @@ class OnlineProcessor(EngineProcessor): | |||||||
| 
 | 
 | ||||||
|     engine_type = 'online' |     engine_type = 'online' | ||||||
| 
 | 
 | ||||||
|  |     def initialize(self): | ||||||
|  |         # set timeout for all HTTP requests | ||||||
|  |         searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer()) | ||||||
|  |         # reset the HTTP total time | ||||||
|  |         searx.network.reset_time_for_thread() | ||||||
|  |         # set the network | ||||||
|  |         searx.network.set_context_network_name(self.engine_name) | ||||||
|  |         super().initialize() | ||||||
|  | 
 | ||||||
|     def get_params(self, search_query, engine_category): |     def get_params(self, search_query, engine_category): | ||||||
|         params = super().get_params(search_query, engine_category) |         params = super().get_params(search_query, engine_category) | ||||||
|         if params is None: |         if params is None: | ||||||
| @ -139,7 +148,7 @@ class OnlineProcessor(EngineProcessor): | |||||||
|             self.handle_exception(result_container, e, suspend=True) |             self.handle_exception(result_container, e, suspend=True) | ||||||
|             logger.error("engine {0} : HTTP requests timeout" |             logger.error("engine {0} : HTTP requests timeout" | ||||||
|                          "(search duration : {1} s, timeout: {2} s) : {3}" |                          "(search duration : {1} s, timeout: {2} s) : {3}" | ||||||
|                          .format(self.engine_name, time() - start_time, |                          .format(self.engine_name, default_timer() - start_time, | ||||||
|                                  timeout_limit, |                                  timeout_limit, | ||||||
|                                  e.__class__.__name__)) |                                  e.__class__.__name__)) | ||||||
|         except (httpx.HTTPError, httpx.StreamError) as e: |         except (httpx.HTTPError, httpx.StreamError) as e: | ||||||
| @ -147,7 +156,7 @@ class OnlineProcessor(EngineProcessor): | |||||||
|             self.handle_exception(result_container, e, suspend=True) |             self.handle_exception(result_container, e, suspend=True) | ||||||
|             logger.exception("engine {0} : requests exception" |             logger.exception("engine {0} : requests exception" | ||||||
|                              "(search duration : {1} s, timeout: {2} s) : {3}" |                              "(search duration : {1} s, timeout: {2} s) : {3}" | ||||||
|                              .format(self.engine_name, time() - start_time, |                              .format(self.engine_name, default_timer() - start_time, | ||||||
|                                      timeout_limit, |                                      timeout_limit, | ||||||
|                                      e)) |                                      e)) | ||||||
|         except SearxEngineCaptchaException as e: |         except SearxEngineCaptchaException as e: | ||||||
|  | |||||||
| @ -23,7 +23,7 @@ class TestEnginesInit(SearxTestCase): | |||||||
|         engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, |         engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, | ||||||
|                        {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] |                        {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] | ||||||
| 
 | 
 | ||||||
|         engines.initialize_engines(engine_list) |         engines.load_engines(engine_list) | ||||||
|         self.assertEqual(len(engines.engines), 1) |         self.assertEqual(len(engines.engines), 1) | ||||||
|         self.assertIn('engine1', engines.engines) |         self.assertIn('engine1', engines.engines) | ||||||
|         self.assertNotIn('onions', engines.categories) |         self.assertNotIn('onions', engines.categories) | ||||||
| @ -35,7 +35,7 @@ class TestEnginesInit(SearxTestCase): | |||||||
|                         'timeout': 20.0, 'onion_url': 'http://engine1.onion'}, |                         'timeout': 20.0, 'onion_url': 'http://engine1.onion'}, | ||||||
|                        {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] |                        {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] | ||||||
| 
 | 
 | ||||||
|         engines.initialize_engines(engine_list) |         engines.load_engines(engine_list) | ||||||
|         self.assertEqual(len(engines.engines), 2) |         self.assertEqual(len(engines.engines), 2) | ||||||
|         self.assertIn('engine1', engines.engines) |         self.assertIn('engine1', engines.engines) | ||||||
|         self.assertIn('engine2', engines.engines) |         self.assertIn('engine2', engines.engines) | ||||||
|  | |||||||
| @ -1,11 +1,8 @@ | |||||||
| from mock import patch | from searx import settings | ||||||
| 
 | from searx.engines import load_engines | ||||||
| from searx.search import initialize |  | ||||||
| from searx.query import RawTextQuery | from searx.query import RawTextQuery | ||||||
| from searx.testing import SearxTestCase | from searx.testing import SearxTestCase | ||||||
| 
 | 
 | ||||||
| import searx.engines |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| TEST_ENGINES = [ | TEST_ENGINES = [ | ||||||
|     { |     { | ||||||
| @ -241,7 +238,7 @@ class TestBang(SearxTestCase): | |||||||
|     THE_QUERY = 'the query' |     THE_QUERY = 'the query' | ||||||
| 
 | 
 | ||||||
|     def test_bang(self): |     def test_bang(self): | ||||||
|         initialize(TEST_ENGINES) |         load_engines(TEST_ENGINES) | ||||||
| 
 | 
 | ||||||
|         for bang in TestBang.SPECIFIC_BANGS + TestBang.NOT_SPECIFIC_BANGS: |         for bang in TestBang.SPECIFIC_BANGS + TestBang.NOT_SPECIFIC_BANGS: | ||||||
|             with self.subTest(msg="Check bang", bang=bang): |             with self.subTest(msg="Check bang", bang=bang): | ||||||
| @ -267,12 +264,12 @@ class TestBang(SearxTestCase): | |||||||
|                 self.assertFalse(query.specific) |                 self.assertFalse(query.specific) | ||||||
| 
 | 
 | ||||||
|     def test_bang_not_found(self): |     def test_bang_not_found(self): | ||||||
|         initialize(TEST_ENGINES) |         load_engines(TEST_ENGINES) | ||||||
|         query = RawTextQuery('the query !bang_not_found', []) |         query = RawTextQuery('the query !bang_not_found', []) | ||||||
|         self.assertEqual(query.getFullQuery(), 'the query !bang_not_found') |         self.assertEqual(query.getFullQuery(), 'the query !bang_not_found') | ||||||
| 
 | 
 | ||||||
|     def test_bang_autocomplete(self): |     def test_bang_autocomplete(self): | ||||||
|         initialize(TEST_ENGINES) |         load_engines(TEST_ENGINES) | ||||||
|         query = RawTextQuery('the query !dum', []) |         query = RawTextQuery('the query !dum', []) | ||||||
|         self.assertEqual(query.autocomplete_list, ['!dummy_engine']) |         self.assertEqual(query.autocomplete_list, ['!dummy_engine']) | ||||||
| 
 | 
 | ||||||
| @ -281,8 +278,7 @@ class TestBang(SearxTestCase): | |||||||
|         self.assertEqual(query.getQuery(), '!dum the query') |         self.assertEqual(query.getQuery(), '!dum the query') | ||||||
| 
 | 
 | ||||||
|     def test_bang_autocomplete_empty(self): |     def test_bang_autocomplete_empty(self): | ||||||
|         with patch.object(searx.engines, 'initialize_engines', searx.engines.load_engines): |         load_engines(settings['engines']) | ||||||
|             initialize() |  | ||||||
|         query = RawTextQuery('the query !', []) |         query = RawTextQuery('the query !', []) | ||||||
|         self.assertEqual(query.autocomplete_list, ['!images', '!wikipedia', '!osm']) |         self.assertEqual(query.autocomplete_list, ['!images', '!wikipedia', '!osm']) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -5,14 +5,16 @@ from urllib.parse import ParseResult | |||||||
| from mock import Mock | from mock import Mock | ||||||
| from searx.testing import SearxTestCase | from searx.testing import SearxTestCase | ||||||
| from searx.search import Search | from searx.search import Search | ||||||
| import searx.engines | import searx.search.processors | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ViewsTestCase(SearxTestCase): | class ViewsTestCase(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
|         # skip init function (no external HTTP request) |         # skip init function (no external HTTP request) | ||||||
|         self.setattr4test(searx.engines, 'initialize_engines', searx.engines.load_engines) |         def dummy(*args, **kwargs): | ||||||
|  |             pass | ||||||
|  |         self.setattr4test(searx.search.processors, 'initialize_processor', dummy) | ||||||
| 
 | 
 | ||||||
|         from searx import webapp  # pylint disable=import-outside-toplevel |         from searx import webapp  # pylint disable=import-outside-toplevel | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user