mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-26 00:02:31 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			77 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| """Baidu_
 | |
| 
 | |
| .. _Baidu: https://www.baidu.com
 | |
| """
 | |
| 
 | |
| # There exits a https://github.com/ohblue/baidu-serp-api/
 | |
| # but we don't use it here (may we can learn from).
 | |
| 
 | |
| from urllib.parse import urlencode
 | |
| from datetime import datetime
 | |
| 
 | |
| from searx.exceptions import SearxEngineAPIException
 | |
| 
 | |
| about = {
 | |
|     "website": "https://www.baidu.com",
 | |
|     "wikidata_id": "Q14772",
 | |
|     "official_api_documentation": None,
 | |
|     "use_official_api": False,
 | |
|     "require_api_key": False,
 | |
|     "results": "JSON",
 | |
|     "language": "zh",
 | |
| }
 | |
| 
 | |
| paging = True
 | |
| categories = ["general"]
 | |
| base_url = "https://www.baidu.com/s"
 | |
| results_per_page = 10
 | |
| 
 | |
| 
 | |
| def request(query, params):
 | |
|     keyword = query.strip()
 | |
| 
 | |
|     query_params = {
 | |
|         "wd": keyword,
 | |
|         "rn": results_per_page,
 | |
|         "pn": (params["pageno"] - 1) * results_per_page,
 | |
|         "tn": "json",
 | |
|     }
 | |
| 
 | |
|     params["url"] = f"{base_url}?{urlencode(query_params)}"
 | |
|     return params
 | |
| 
 | |
| 
 | |
| def response(resp):
 | |
|     try:
 | |
|         data = resp.json()
 | |
|     except Exception as e:
 | |
|         raise SearxEngineAPIException(f"Invalid response: {e}") from e
 | |
|     results = []
 | |
| 
 | |
|     if "feed" not in data or "entry" not in data["feed"]:
 | |
|         raise SearxEngineAPIException("Invalid response")
 | |
| 
 | |
|     for entry in data["feed"]["entry"]:
 | |
|         if not entry.get("title") or not entry.get("url"):
 | |
|             continue
 | |
| 
 | |
|         published_date = None
 | |
|         if entry.get("time"):
 | |
|             try:
 | |
|                 published_date = datetime.fromtimestamp(entry["time"])
 | |
|             except (ValueError, TypeError):
 | |
|                 published_date = None
 | |
| 
 | |
|         results.append(
 | |
|             {
 | |
|                 "title": entry["title"],
 | |
|                 "url": entry["url"],
 | |
|                 "content": entry.get("abs", ""),
 | |
|                 "publishedDate": published_date,
 | |
|                 # "source": entry.get('source')
 | |
|             }
 | |
|         )
 | |
| 
 | |
|     return results
 |