mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 15:52:31 -04:00 
			
		
		
		
	This patch fixes some quirks and issues related to the engines and the network. Each engine has its own network and this network was broken for the following engines[1]: - archlinux - bing - dailymotion - duckduckgo - google - peertube - startpage - wikipedia Since the files have been touched anyway, the type annotaions of the engine modules has also been completed so that error messages from the type checker are no longer reported. Related and (partial) fixed issue: - [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861 - [2] https://github.com/searxng/searxng/issues/2513 - [3] https://github.com/searxng/searxng/issues/2515 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
		
			
				
	
	
		
			187 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			187 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| # lint: pylint
 | |
| """Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
 | |
| (more or less) the same REST API and the schema of the JSON result is identical.
 | |
| 
 | |
| """
 | |
| 
 | |
| import re
 | |
| from urllib.parse import urlencode
 | |
| from datetime import datetime
 | |
| from dateutil.parser import parse
 | |
| from dateutil.relativedelta import relativedelta
 | |
| 
 | |
| import babel
 | |
| 
 | |
| from searx.network import get  # see https://github.com/searxng/searxng/issues/762
 | |
| from searx.locales import language_tag
 | |
| from searx.utils import html_to_text
 | |
| from searx.enginelib.traits import EngineTraits
 | |
| 
 | |
| traits: EngineTraits
 | |
| 
 | |
| about = {
 | |
|     # pylint: disable=line-too-long
 | |
|     "website": 'https://joinpeertube.org',
 | |
|     "wikidata_id": 'Q50938515',
 | |
|     "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
 | |
|     "use_official_api": True,
 | |
|     "require_api_key": False,
 | |
|     "results": 'JSON',
 | |
| }
 | |
| 
 | |
| # engine dependent config
 | |
| categories = ["videos"]
 | |
| paging = True
 | |
| base_url = "https://peer.tube"
 | |
| """Base URL of the Peertube instance.  A list of instances is available at:
 | |
| 
 | |
| - https://instances.joinpeertube.org/instances
 | |
| """
 | |
| 
 | |
| time_range_support = True
 | |
| time_range_table = {
 | |
|     'day': relativedelta(),
 | |
|     'week': relativedelta(weeks=-1),
 | |
|     'month': relativedelta(months=-1),
 | |
|     'year': relativedelta(years=-1),
 | |
| }
 | |
| 
 | |
| safesearch = True
 | |
| safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
 | |
| 
 | |
| 
 | |
| def minute_to_hm(minute):
 | |
|     if isinstance(minute, int):
 | |
|         return "%d:%02d" % (divmod(minute, 60))
 | |
|     return None
 | |
| 
 | |
| 
 | |
| def request(query, params):
 | |
|     """Assemble request for the Peertube API"""
 | |
| 
 | |
|     if not query:
 | |
|         return False
 | |
| 
 | |
|     # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
 | |
|     eng_lang = traits.get_language(params['searxng_locale'], None)
 | |
| 
 | |
|     params['url'] = (
 | |
|         base_url.rstrip("/")
 | |
|         + "/api/v1/search/videos?"
 | |
|         + urlencode(
 | |
|             {
 | |
|                 'search': query,
 | |
|                 'searchTarget': 'search-index',  # Vidiversum
 | |
|                 'resultType': 'videos',
 | |
|                 'start': (params['pageno'] - 1) * 10,
 | |
|                 'count': 10,
 | |
|                 # -createdAt: sort by date ascending / createdAt: date descending
 | |
|                 'sort': '-match',  # sort by *match descending*
 | |
|                 'nsfw': safesearch_table[params['safesearch']],
 | |
|             }
 | |
|         )
 | |
|     )
 | |
| 
 | |
|     if eng_lang is not None:
 | |
|         params['url'] += '&languageOneOf[]=' + eng_lang
 | |
|         params['url'] += '&boostLanguages[]=' + eng_lang
 | |
| 
 | |
|     if params['time_range'] in time_range_table:
 | |
|         time = datetime.now().date() + time_range_table[params['time_range']]
 | |
|         params['url'] += '&startDate=' + time.isoformat()
 | |
| 
 | |
|     return params
 | |
| 
 | |
| 
 | |
| def response(resp):
 | |
|     return video_response(resp)
 | |
| 
 | |
| 
 | |
| def video_response(resp):
 | |
|     """Parse video response from SepiaSearch and Peertube instances."""
 | |
|     results = []
 | |
| 
 | |
|     json_data = resp.json()
 | |
| 
 | |
|     if 'data' not in json_data:
 | |
|         return []
 | |
| 
 | |
|     for result in json_data['data']:
 | |
|         metadata = [
 | |
|             x
 | |
|             for x in [
 | |
|                 result.get('channel', {}).get('displayName'),
 | |
|                 result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
 | |
|                 ', '.join(result.get('tags', [])),
 | |
|             ]
 | |
|             if x
 | |
|         ]
 | |
| 
 | |
|         results.append(
 | |
|             {
 | |
|                 'url': result['url'],
 | |
|                 'title': result['name'],
 | |
|                 'content': html_to_text(result.get('description') or ''),
 | |
|                 'author': result.get('account', {}).get('displayName'),
 | |
|                 'length': minute_to_hm(result.get('duration')),
 | |
|                 'template': 'videos.html',
 | |
|                 'publishedDate': parse(result['publishedAt']),
 | |
|                 'iframe_src': result.get('embedUrl'),
 | |
|                 'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
 | |
|                 'metadata': ' | '.join(metadata),
 | |
|             }
 | |
|         )
 | |
| 
 | |
|     return results
 | |
| 
 | |
| 
 | |
| def fetch_traits(engine_traits: EngineTraits):
 | |
|     """Fetch languages from peertube's search-index source code.
 | |
| 
 | |
|     See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
 | |
| 
 | |
|     .. _8ed5c729 - Refactor and redesign client:
 | |
|        https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
 | |
|     .. _videoLanguages:
 | |
|        https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
 | |
|     """
 | |
| 
 | |
|     resp = get(
 | |
|         'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
 | |
|         # the response from search-index repository is very slow
 | |
|         timeout=60,
 | |
|     )
 | |
| 
 | |
|     if not resp.ok:  # type: ignore
 | |
|         print("ERROR: response from peertube is not OK.")
 | |
|         return
 | |
| 
 | |
|     js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)  # type: ignore
 | |
|     if not js_lang:
 | |
|         print("ERROR: can't determine languages from peertube")
 | |
|         return
 | |
| 
 | |
|     for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
 | |
|         eng_tag = lang.group(1)
 | |
|         if eng_tag == 'oc':
 | |
|             # Occitanis not known by babel, its closest relative is Catalan
 | |
|             # but 'ca' is already in the list of engine_traits.languages -->
 | |
|             # 'oc' will be ignored.
 | |
|             continue
 | |
|         try:
 | |
|             sxng_tag = language_tag(babel.Locale.parse(eng_tag))
 | |
|         except babel.UnknownLocaleError:
 | |
|             print("ERROR: %s is unknown by babel" % eng_tag)
 | |
|             continue
 | |
| 
 | |
|         conflict = engine_traits.languages.get(sxng_tag)
 | |
|         if conflict:
 | |
|             if conflict != eng_tag:
 | |
|                 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
 | |
|             continue
 | |
|         engine_traits.languages[sxng_tag] = eng_tag
 | |
| 
 | |
|     engine_traits.languages['zh_Hans'] = 'zh'
 | |
|     engine_traits.languages['zh_Hant'] = 'zh'
 |