mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-03 19:17:07 -05:00 
			
		
		
		
	[enh] use longest title and test get_ordered_results()
This commit is contained in:
		
							parent
							
								
									94aafc83a6
								
							
						
					
					
						commit
						6948689d2a
					
				@ -12,7 +12,6 @@ from searx import logger
 | 
				
			|||||||
from searx.engines import engines
 | 
					from searx.engines import engines
 | 
				
			||||||
from searx.metrics import histogram_observe, counter_add, count_error
 | 
					from searx.metrics import histogram_observe, counter_add, count_error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 | 
					CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 | 
				
			||||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 | 
					WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -133,7 +132,7 @@ def result_score(result, priority):
 | 
				
			|||||||
    weight = 1.0
 | 
					    weight = 1.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for result_engine in result['engines']:
 | 
					    for result_engine in result['engines']:
 | 
				
			||||||
        if hasattr(engines[result_engine], 'weight'):
 | 
					        if hasattr(engines.get(result_engine), 'weight'):
 | 
				
			||||||
            weight *= float(engines[result_engine].weight)
 | 
					            weight *= float(engines[result_engine].weight)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    weight *= len(result['positions'])
 | 
					    weight *= len(result['positions'])
 | 
				
			||||||
@ -332,10 +331,14 @@ class ResultContainer:
 | 
				
			|||||||
        return None
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __merge_duplicated_http_result(self, duplicated, result, position):
 | 
					    def __merge_duplicated_http_result(self, duplicated, result, position):
 | 
				
			||||||
        # using content with more text
 | 
					        # use content with more text
 | 
				
			||||||
        if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
 | 
					        if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
 | 
				
			||||||
            duplicated['content'] = result['content']
 | 
					            duplicated['content'] = result['content']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # use title with more text
 | 
				
			||||||
 | 
					        if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
 | 
				
			||||||
 | 
					            duplicated['title'] = result['title']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # merge all result's parameters not found in duplicate
 | 
					        # merge all result's parameters not found in duplicate
 | 
				
			||||||
        for key in result.keys():
 | 
					        for key in result.keys():
 | 
				
			||||||
            if not duplicated.get(key):
 | 
					            if not duplicated.get(key):
 | 
				
			||||||
@ -347,7 +350,7 @@ class ResultContainer:
 | 
				
			|||||||
        # add engine to list of result-engines
 | 
					        # add engine to list of result-engines
 | 
				
			||||||
        duplicated['engines'].add(result['engine'])
 | 
					        duplicated['engines'].add(result['engine'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # using https if possible
 | 
					        # use https if possible
 | 
				
			||||||
        if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
 | 
					        if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
 | 
				
			||||||
            duplicated['url'] = result['parsed_url'].geturl()
 | 
					            duplicated['url'] = result['parsed_url'].geturl()
 | 
				
			||||||
            duplicated['parsed_url'] = result['parsed_url']
 | 
					            duplicated['parsed_url'] = result['parsed_url']
 | 
				
			||||||
 | 
				
			|||||||
@ -2,9 +2,26 @@
 | 
				
			|||||||
# pylint: disable=missing-module-docstring
 | 
					# pylint: disable=missing-module-docstring
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx.results import ResultContainer
 | 
					from searx.results import ResultContainer
 | 
				
			||||||
 | 
					from searx.engines import load_engines
 | 
				
			||||||
from tests import SearxTestCase
 | 
					from tests import SearxTestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def make_test_engine_dict(**kwargs) -> dict:
 | 
				
			||||||
 | 
					    test_engine = {
 | 
				
			||||||
 | 
					        # fmt: off
 | 
				
			||||||
 | 
					        'name': None,
 | 
				
			||||||
 | 
					        'engine': None,
 | 
				
			||||||
 | 
					        'categories': 'general',
 | 
				
			||||||
 | 
					        'shortcut': 'dummy',
 | 
				
			||||||
 | 
					        'timeout': 3.0,
 | 
				
			||||||
 | 
					        'tokens': [],
 | 
				
			||||||
 | 
					        # fmt: on
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    test_engine.update(**kwargs)
 | 
				
			||||||
 | 
					    return test_engine
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
 | 
					def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
 | 
				
			||||||
    result = {
 | 
					    result = {
 | 
				
			||||||
        # fmt: off
 | 
					        # fmt: off
 | 
				
			||||||
@ -19,23 +36,41 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ResultContainerTestCase(SearxTestCase):  # pylint: disable=missing-class-docstring
 | 
					class ResultContainerTestCase(SearxTestCase):  # pylint: disable=missing-class-docstring
 | 
				
			||||||
 | 
					    def setUp(self) -> None:
 | 
				
			||||||
 | 
					        stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra")
 | 
				
			||||||
 | 
					        duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg")
 | 
				
			||||||
 | 
					        mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        load_engines([stract_engine, duckduckgo_engine, mojeek_engine])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.container = ResultContainer()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def tearDown(self):
 | 
				
			||||||
 | 
					        load_engines([])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_empty(self):
 | 
					    def test_empty(self):
 | 
				
			||||||
        c = ResultContainer()
 | 
					        self.assertEqual(self.container.get_ordered_results(), [])
 | 
				
			||||||
        self.assertEqual(c.get_ordered_results(), [])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_one_result(self):
 | 
					    def test_one_result(self):
 | 
				
			||||||
        c = ResultContainer()
 | 
					        self.container.extend('wikipedia', [fake_result()])
 | 
				
			||||||
        c.extend('wikipedia', [fake_result()])
 | 
					
 | 
				
			||||||
        self.assertEqual(c.results_length(), 1)
 | 
					        self.assertEqual(self.container.results_length(), 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_one_suggestion(self):
 | 
					    def test_one_suggestion(self):
 | 
				
			||||||
        c = ResultContainer()
 | 
					        self.container.extend('wikipedia', [fake_result(suggestion=True)])
 | 
				
			||||||
        c.extend('wikipedia', [fake_result(suggestion=True)])
 | 
					
 | 
				
			||||||
        self.assertEqual(len(c.suggestions), 1)
 | 
					        self.assertEqual(len(self.container.suggestions), 1)
 | 
				
			||||||
        self.assertEqual(c.results_length(), 0)
 | 
					        self.assertEqual(self.container.results_length(), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_result_merge(self):
 | 
					    def test_result_merge(self):
 | 
				
			||||||
        c = ResultContainer()
 | 
					        self.container.extend('wikipedia', [fake_result()])
 | 
				
			||||||
        c.extend('wikipedia', [fake_result()])
 | 
					        self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
 | 
				
			||||||
        c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
 | 
					
 | 
				
			||||||
        self.assertEqual(c.results_length(), 2)
 | 
					        self.assertEqual(self.container.results_length(), 2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_result_merge_by_title(self):
 | 
				
			||||||
 | 
					        self.container.extend('stract', [fake_result(engine='stract', title='short title')])
 | 
				
			||||||
 | 
					        self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')])
 | 
				
			||||||
 | 
					        self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title')
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user