mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 10:37:06 -04:00 
			
		
		
		
	settings.yml:
* outgoing.networks:
   * can contains network definition
   * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections,
     keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries
   * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time)
   * local_addresses can be "192.168.0.1/24" (it supports IPv6)
   * support_ipv4 & support_ipv6: both True by default
     see https://github.com/searx/searx/pull/1034
* each engine can define a "network" section:
   * either a full network description
   * either reference an existing network
* all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master)
		
	
			
		
			
				
	
	
		
			67 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			67 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| """
 | |
|  Seznam
 | |
| """
 | |
| 
 | |
| from urllib.parse import urlencode
 | |
| from lxml import html
 | |
| from searx.network import get
 | |
| from searx.exceptions import SearxEngineAccessDeniedException
 | |
| from searx.utils import (
 | |
|     extract_text,
 | |
|     eval_xpath_list,
 | |
|     eval_xpath_getindex,
 | |
|     eval_xpath,
 | |
| )
 | |
| 
 | |
| # about
 | |
| about = {
 | |
|     "website": "https://www.seznam.cz/",
 | |
|     "wikidata_id": "Q3490485",
 | |
|     "official_api_documentation": "https://api.sklik.cz/",
 | |
|     "use_official_api": False,
 | |
|     "require_api_key": False,
 | |
|     "results": "HTML",
 | |
| }
 | |
| 
 | |
| base_url = 'https://search.seznam.cz/'
 | |
| 
 | |
| 
 | |
| def request(query, params):
 | |
|     response_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
 | |
|     dom = html.fromstring(response_index.text)
 | |
| 
 | |
|     url_params = {
 | |
|         'q': query,
 | |
|         'oq': query,
 | |
|     }
 | |
|     for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
 | |
|         name = e.get('name')
 | |
|         value = e.get('value')
 | |
|         url_params[name] = value
 | |
| 
 | |
|     params['url'] = base_url + '?' + urlencode(url_params)
 | |
|     params['cookies'] = response_index.cookies
 | |
|     return params
 | |
| 
 | |
| 
 | |
| def response(resp):
 | |
|     if resp.url.path.startswith('/verify'):
 | |
|         raise SearxEngineAccessDeniedException()
 | |
| 
 | |
|     results = []
 | |
| 
 | |
|     dom = html.fromstring(resp.content.decode())
 | |
|     for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'):
 | |
|         result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None)
 | |
|         if result_data is None:
 | |
|             continue
 | |
|         title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
 | |
|         results.append({
 | |
|             'url': title_element.get('href'),
 | |
|             'title': extract_text(title_element),
 | |
|             'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')),
 | |
|         })
 | |
| 
 | |
|     return results
 |