mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-01 19:17:07 -04:00 
			
		
		
		
	[enh] use longest title and test get_ordered_results()
This commit is contained in:
		
							parent
							
								
									94aafc83a6
								
							
						
					
					
						commit
						6948689d2a
					
				| @ -12,7 +12,6 @@ from searx import logger | ||||
| from searx.engines import engines | ||||
| from searx.metrics import histogram_observe, counter_add, count_error | ||||
| 
 | ||||
| 
 | ||||
| CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) | ||||
| WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) | ||||
| 
 | ||||
| @ -133,7 +132,7 @@ def result_score(result, priority): | ||||
|     weight = 1.0 | ||||
| 
 | ||||
|     for result_engine in result['engines']: | ||||
|         if hasattr(engines[result_engine], 'weight'): | ||||
|         if hasattr(engines.get(result_engine), 'weight'): | ||||
|             weight *= float(engines[result_engine].weight) | ||||
| 
 | ||||
|     weight *= len(result['positions']) | ||||
| @ -332,10 +331,14 @@ class ResultContainer: | ||||
|         return None | ||||
| 
 | ||||
|     def __merge_duplicated_http_result(self, duplicated, result, position): | ||||
|         # using content with more text | ||||
|         # use content with more text | ||||
|         if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): | ||||
|             duplicated['content'] = result['content'] | ||||
| 
 | ||||
|         # use title with more text | ||||
|         if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')): | ||||
|             duplicated['title'] = result['title'] | ||||
| 
 | ||||
|         # merge all result's parameters not found in duplicate | ||||
|         for key in result.keys(): | ||||
|             if not duplicated.get(key): | ||||
| @ -347,7 +350,7 @@ class ResultContainer: | ||||
|         # add engine to list of result-engines | ||||
|         duplicated['engines'].add(result['engine']) | ||||
| 
 | ||||
|         # using https if possible | ||||
|         # use https if possible | ||||
|         if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': | ||||
|             duplicated['url'] = result['parsed_url'].geturl() | ||||
|             duplicated['parsed_url'] = result['parsed_url'] | ||||
|  | ||||
| @ -2,9 +2,26 @@ | ||||
| # pylint: disable=missing-module-docstring | ||||
| 
 | ||||
| from searx.results import ResultContainer | ||||
| from searx.engines import load_engines | ||||
| from tests import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| def make_test_engine_dict(**kwargs) -> dict: | ||||
|     test_engine = { | ||||
|         # fmt: off | ||||
|         'name': None, | ||||
|         'engine': None, | ||||
|         'categories': 'general', | ||||
|         'shortcut': 'dummy', | ||||
|         'timeout': 3.0, | ||||
|         'tokens': [], | ||||
|         # fmt: on | ||||
|     } | ||||
| 
 | ||||
|     test_engine.update(**kwargs) | ||||
|     return test_engine | ||||
| 
 | ||||
| 
 | ||||
| def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): | ||||
|     result = { | ||||
|         # fmt: off | ||||
| @ -19,23 +36,41 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng | ||||
| 
 | ||||
| 
 | ||||
| class ResultContainerTestCase(SearxTestCase):  # pylint: disable=missing-class-docstring | ||||
|     def setUp(self) -> None: | ||||
|         stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra") | ||||
|         duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg") | ||||
|         mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk") | ||||
| 
 | ||||
|         load_engines([stract_engine, duckduckgo_engine, mojeek_engine]) | ||||
| 
 | ||||
|         self.container = ResultContainer() | ||||
| 
 | ||||
|     def tearDown(self): | ||||
|         load_engines([]) | ||||
| 
 | ||||
|     def test_empty(self): | ||||
|         c = ResultContainer() | ||||
|         self.assertEqual(c.get_ordered_results(), []) | ||||
|         self.assertEqual(self.container.get_ordered_results(), []) | ||||
| 
 | ||||
|     def test_one_result(self): | ||||
|         c = ResultContainer() | ||||
|         c.extend('wikipedia', [fake_result()]) | ||||
|         self.assertEqual(c.results_length(), 1) | ||||
|         self.container.extend('wikipedia', [fake_result()]) | ||||
| 
 | ||||
|         self.assertEqual(self.container.results_length(), 1) | ||||
| 
 | ||||
|     def test_one_suggestion(self): | ||||
|         c = ResultContainer() | ||||
|         c.extend('wikipedia', [fake_result(suggestion=True)]) | ||||
|         self.assertEqual(len(c.suggestions), 1) | ||||
|         self.assertEqual(c.results_length(), 0) | ||||
|         self.container.extend('wikipedia', [fake_result(suggestion=True)]) | ||||
| 
 | ||||
|         self.assertEqual(len(self.container.suggestions), 1) | ||||
|         self.assertEqual(self.container.results_length(), 0) | ||||
| 
 | ||||
|     def test_result_merge(self): | ||||
|         c = ResultContainer() | ||||
|         c.extend('wikipedia', [fake_result()]) | ||||
|         c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')]) | ||||
|         self.assertEqual(c.results_length(), 2) | ||||
|         self.container.extend('wikipedia', [fake_result()]) | ||||
|         self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')]) | ||||
| 
 | ||||
|         self.assertEqual(self.container.results_length(), 2) | ||||
| 
 | ||||
|     def test_result_merge_by_title(self): | ||||
|         self.container.extend('stract', [fake_result(engine='stract', title='short title')]) | ||||
|         self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')]) | ||||
|         self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')]) | ||||
| 
 | ||||
|         self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title') | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user