mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 07:49:02 -04:00 
			
		
		
		
	[fix] hardening against arguments of type None, where str or dict is expected
On a long-running server, the tracebacks below can be found (albeit rarely),
which indicate problems with NoneType where a string or another data type is
expected.
result.img_src::
    File "/usr/local/searxng/searxng-src/searx/templates/simple/result_templates/images.html", line 13, in top-level template code
      <img src="" data-src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}">{{- "" -}}
      ^
    File "/usr/local/searxng/searxng-src/searx/webapp.py", line 284, in image_proxify
      if url.startswith('//'):
         ^^^^^^^^^^^^^^
    AttributeError: 'NoneType' object has no attribute 'startswith'
result.content::
    File "/usr/local/searxng/searxng-src/searx/result_types/_base.py", line 105, in _normalize_text_fields
      result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
                       ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
    TypeError: expected string or bytes-like object, got 'NoneType'
html_to_text, when html_str is a NoneType::
    File "/usr/local/searxng/searxng-src/searx/engines/wikipedia.py", line 190, in response
      title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title'))
    File "/usr/local/searxng/searxng-src/searx/utils.py", line 158, in html_to_text
      html_str = html_str.replace('\n', ' ').replace('\r', ' ')
                 ^^^^^^^^^^^^^^^^
    AttributeError: 'NoneType' object has no attribute 'replace'
presearch engine, when json_resp is a NoneType::
    File "/usr/local/searxng/searxng-src/searx/engines/presearch.py", line 221, in response
      results = parse_search_query(json_resp.get('results'))
    File "/usr/local/searxng/searxng-src/searx/engines/presearch.py", line 161, in parse_search_query
      for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []):
                  ^^^^^^^^^^^^^^^^
    AttributeError: 'NoneType' object has no attribute 'get'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									08885d0614
								
							
						
					
					
						commit
						e6308b8167
					
				| @ -184,6 +184,8 @@ def _fix_title(title, url): | |||||||
| 
 | 
 | ||||||
| def parse_search_query(json_results): | def parse_search_query(json_results): | ||||||
|     results = [] |     results = [] | ||||||
|  |     if not json_results: | ||||||
|  |         return results | ||||||
| 
 | 
 | ||||||
|     for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []): |     for item in json_results.get('specialSections', {}).get('topStoriesCompact', {}).get('data', []): | ||||||
|         result = { |         result = { | ||||||
| @ -245,7 +247,7 @@ def response(resp): | |||||||
|     json_resp = resp.json() |     json_resp = resp.json() | ||||||
| 
 | 
 | ||||||
|     if search_type == 'search': |     if search_type == 'search': | ||||||
|         results = parse_search_query(json_resp.get('results')) |         results = parse_search_query(json_resp.get('results', {})) | ||||||
| 
 | 
 | ||||||
|     elif search_type == 'images': |     elif search_type == 'images': | ||||||
|         for item in json_resp.get('images', []): |         for item in json_resp.get('images', []): | ||||||
|  | |||||||
| @ -103,8 +103,10 @@ def _normalize_text_fields(result: MainResult | LegacyResult): | |||||||
|         result.content = str(result) |         result.content = str(result) | ||||||
| 
 | 
 | ||||||
|     # normalize title and content |     # normalize title and content | ||||||
|     result.title = WHITESPACE_REGEX.sub(" ", result.title).strip() |     if result.title: | ||||||
|     result.content = WHITESPACE_REGEX.sub(" ", result.content).strip() |         result.title = WHITESPACE_REGEX.sub(" ", result.title).strip() | ||||||
|  |     if result.content: | ||||||
|  |         result.content = WHITESPACE_REGEX.sub(" ", result.content).strip() | ||||||
|     if result.content == result.title: |     if result.content == result.title: | ||||||
|         # avoid duplicate content between the content and title fields |         # avoid duplicate content between the content and title fields | ||||||
|         result.content = "" |         result.content = "" | ||||||
|  | |||||||
| @ -154,6 +154,8 @@ def html_to_text(html_str: str) -> str: | |||||||
|         >>> html_to_text(r'regexp: (?<![a-zA-Z]') |         >>> html_to_text(r'regexp: (?<![a-zA-Z]') | ||||||
|         'regexp: (?<![a-zA-Z]' |         'regexp: (?<![a-zA-Z]' | ||||||
|     """ |     """ | ||||||
|  |     if not html_str: | ||||||
|  |         return "" | ||||||
|     html_str = html_str.replace('\n', ' ').replace('\r', ' ') |     html_str = html_str.replace('\n', ' ').replace('\r', ' ') | ||||||
|     html_str = ' '.join(html_str.split()) |     html_str = ' '.join(html_str.split()) | ||||||
|     s = _HTMLTextExtractor() |     s = _HTMLTextExtractor() | ||||||
|  | |||||||
| @ -265,6 +265,9 @@ def custom_url_for(endpoint: str, **values): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def morty_proxify(url: str): | def morty_proxify(url: str): | ||||||
|  |     if not url: | ||||||
|  |         return url | ||||||
|  | 
 | ||||||
|     if url.startswith('//'): |     if url.startswith('//'): | ||||||
|         url = 'https:' + url |         url = 'https:' + url | ||||||
| 
 | 
 | ||||||
| @ -280,6 +283,8 @@ def morty_proxify(url: str): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def image_proxify(url: str): | def image_proxify(url: str): | ||||||
|  |     if not url: | ||||||
|  |         return url | ||||||
| 
 | 
 | ||||||
|     if url.startswith('//'): |     if url.startswith('//'): | ||||||
|         url = 'https:' + url |         url = 'https:' + url | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user