mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-31 02:27:06 -04:00 
			
		
		
		
	[mod] Peertube: re-engineered & upgrade to data_type: traits_v1
- fetch_traits(): Fetch languages from peertube's search-index source code.
  [mod] Include migration of the request methode from 'supported_languages'
        to 'traits' (EngineTraits) object.
  [fix] old supported_languages_url is no longer valid since the sources
        has been moved to a different path.
- fixed code to pass pylint
- request(): complete re-implementation based on the API docs [1]
- response(): complete re-implementation, adds serveral fields missed before
- add source code documentation
[1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									6e5f22e558
								
							
						
					
					
						commit
						a7fe22770a
					
				
							
								
								
									
										19
									
								
								docs/src/searx.engines.peertube.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								docs/src/searx.engines.peertube.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | |||||||
|  | .. _peertube engines: | ||||||
|  | 
 | ||||||
|  | ================ | ||||||
|  | Peertube Engines | ||||||
|  | ================ | ||||||
|  | 
 | ||||||
|  | .. contents:: Contents | ||||||
|  |    :depth: 2 | ||||||
|  |    :local: | ||||||
|  |    :backlinks: entry | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _peertube video engine: | ||||||
|  | 
 | ||||||
|  | Peertube Video | ||||||
|  | ============== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.peertube | ||||||
|  |   :members: | ||||||
| @ -1468,31 +1468,32 @@ | |||||||
|   "peertube": { |   "peertube": { | ||||||
|     "all_locale": null, |     "all_locale": null, | ||||||
|     "custom": {}, |     "custom": {}, | ||||||
|     "data_type": "supported_languages", |     "data_type": "traits_v1", | ||||||
|     "languages": {}, |     "languages": { | ||||||
|  |       "ca": "ca", | ||||||
|  |       "cs": "cs", | ||||||
|  |       "de": "de", | ||||||
|  |       "el": "el", | ||||||
|  |       "en": "en", | ||||||
|  |       "eo": "eo", | ||||||
|  |       "es": "es", | ||||||
|  |       "eu": "eu", | ||||||
|  |       "fi": "fi", | ||||||
|  |       "fr": "fr", | ||||||
|  |       "gd": "gd", | ||||||
|  |       "it": "it", | ||||||
|  |       "ja": "ja", | ||||||
|  |       "nl": "nl", | ||||||
|  |       "pl": "pl", | ||||||
|  |       "pt": "pt", | ||||||
|  |       "ru": "ru", | ||||||
|  |       "sv": "sv", | ||||||
|  |       "zh": "zh", | ||||||
|  |       "zh_Hans": "zh", | ||||||
|  |       "zh_Hant": "zh" | ||||||
|  |     }, | ||||||
|     "regions": {}, |     "regions": {}, | ||||||
|     "supported_languages": [ |     "supported_languages": {} | ||||||
|       "ca", |  | ||||||
|       "cs", |  | ||||||
|       "de", |  | ||||||
|       "el", |  | ||||||
|       "en", |  | ||||||
|       "eo", |  | ||||||
|       "es", |  | ||||||
|       "eu", |  | ||||||
|       "fi", |  | ||||||
|       "fr", |  | ||||||
|       "gd", |  | ||||||
|       "it", |  | ||||||
|       "ja", |  | ||||||
|       "nl", |  | ||||||
|       "oc", |  | ||||||
|       "pl", |  | ||||||
|       "pt", |  | ||||||
|       "ru", |  | ||||||
|       "sv", |  | ||||||
|       "zh" |  | ||||||
|     ] |  | ||||||
|   }, |   }, | ||||||
|   "qwant": { |   "qwant": { | ||||||
|     "all_locale": null, |     "all_locale": null, | ||||||
|  | |||||||
| @ -1,18 +1,30 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| """ | # lint: pylint | ||||||
|  peertube (Videos) | """Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share | ||||||
|  | (more or less) the same REST API and the schema of the JSON result is identical. | ||||||
|  | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from json import loads | import re | ||||||
| from datetime import datetime |  | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode | ||||||
| from searx.utils import html_to_text | from datetime import datetime | ||||||
|  | from dateutil.parser import parse | ||||||
|  | from dateutil.relativedelta import relativedelta | ||||||
|  | 
 | ||||||
|  | import babel | ||||||
|  | 
 | ||||||
|  | from searx import network | ||||||
|  | from searx.locales import language_tag | ||||||
|  | from searx.utils import html_to_text | ||||||
|  | from searx.enginelib.traits import EngineTraits | ||||||
|  | 
 | ||||||
|  | traits: EngineTraits | ||||||
| 
 | 
 | ||||||
| # about |  | ||||||
| about = { | about = { | ||||||
|  |     # pylint: disable=line-too-long | ||||||
|     "website": 'https://joinpeertube.org', |     "website": 'https://joinpeertube.org', | ||||||
|     "wikidata_id": 'Q50938515', |     "wikidata_id": 'Q50938515', | ||||||
|     "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html', |     "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos', | ||||||
|     "use_official_api": True, |     "use_official_api": True, | ||||||
|     "require_api_key": False, |     "require_api_key": False, | ||||||
|     "results": 'JSON', |     "results": 'JSON', | ||||||
| @ -22,66 +34,155 @@ about = { | |||||||
| categories = ["videos"] | categories = ["videos"] | ||||||
| paging = True | paging = True | ||||||
| base_url = "https://peer.tube" | base_url = "https://peer.tube" | ||||||
| supported_languages_url = 'https://peer.tube/api/v1/videos/languages' | """Base URL of the Peertube instance.  A list of instances is available at: | ||||||
|  | 
 | ||||||
|  | - https://instances.joinpeertube.org/instances | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | time_range_support = True | ||||||
|  | time_range_table = { | ||||||
|  |     'day': relativedelta(), | ||||||
|  |     'week': relativedelta(weeks=-1), | ||||||
|  |     'month': relativedelta(months=-1), | ||||||
|  |     'year': relativedelta(years=-1), | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | safesearch = True | ||||||
|  | safesearch_table = {0: 'both', 1: 'false', 2: 'false'} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def minute_to_hm(minute): | ||||||
|  |     if isinstance(minute, int): | ||||||
|  |         return "%d:%02d" % (divmod(minute, 60)) | ||||||
|  |     return None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request |  | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     sanitized_url = base_url.rstrip("/") |     """Assemble request for the Peertube API""" | ||||||
|     pageno = (params["pageno"] - 1) * 15 | 
 | ||||||
|     search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}" |     if not query: | ||||||
|     query_dict = {"search": query} |         return False | ||||||
|     language = params["language"].split("-")[0] | 
 | ||||||
|     if "all" != language and language in supported_languages: |     # eng_region = traits.get_region(params['searxng_locale'], 'en_US') | ||||||
|         query_dict["languageOneOf"] = language |     eng_lang = traits.get_language(params['searxng_locale'], None) | ||||||
|     params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno) | 
 | ||||||
|  |     params['url'] = ( | ||||||
|  |         base_url.rstrip("/") | ||||||
|  |         + "/api/v1/search/videos?" | ||||||
|  |         + urlencode( | ||||||
|  |             { | ||||||
|  |                 'search': query, | ||||||
|  |                 'searchTarget': 'search-index',  # Vidiversum | ||||||
|  |                 'resultType': 'videos', | ||||||
|  |                 'start': (params['pageno'] - 1) * 10, | ||||||
|  |                 'count': 10, | ||||||
|  |                 # -createdAt: sort by date ascending / createdAt: date descending | ||||||
|  |                 'sort': '-match',  # sort by *match descending* | ||||||
|  |                 'nsfw': safesearch_table[params['safesearch']], | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     if eng_lang is not None: | ||||||
|  |         params['url'] += '&languageOneOf[]=' + eng_lang | ||||||
|  |         params['url'] += '&boostLanguages[]=' + eng_lang | ||||||
|  | 
 | ||||||
|  |     if params['time_range'] in time_range_table: | ||||||
|  |         time = datetime.now().date() + time_range_table[params['time_range']] | ||||||
|  |         params['url'] += '&startDate=' + time.isoformat() | ||||||
|  | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _get_offset_from_pageno(pageno): |  | ||||||
|     return (pageno - 1) * 15 + 1 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # get response from search-request |  | ||||||
| def response(resp): | def response(resp): | ||||||
|     sanitized_url = base_url.rstrip("/") |     return video_response(resp) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def video_response(resp): | ||||||
|  |     """Parse video response from SepiaSearch and Peertube instances.""" | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     search_res = loads(resp.text) |     json_data = resp.json() | ||||||
| 
 | 
 | ||||||
|     # return empty array if there are no results |     if 'data' not in json_data: | ||||||
|     if "data" not in search_res: |  | ||||||
|         return [] |         return [] | ||||||
| 
 | 
 | ||||||
|     # parse results |     for result in json_data['data']: | ||||||
|     for res in search_res["data"]: |         metadata = [ | ||||||
|         title = res["name"] |             x | ||||||
|         url = sanitized_url + "/videos/watch/" + res["uuid"] |             for x in [ | ||||||
|         description = res["description"] |                 result.get('channel', {}).get('displayName'), | ||||||
|         if description: |                 result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'), | ||||||
|             content = html_to_text(res["description"]) |                 ', '.join(result.get('tags', [])), | ||||||
|         else: |             ] | ||||||
|             content = "" |             if x | ||||||
|         thumbnail = sanitized_url + res["thumbnailPath"] |         ] | ||||||
|         publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") |  | ||||||
| 
 | 
 | ||||||
|         results.append( |         results.append( | ||||||
|             { |             { | ||||||
|                 "template": "videos.html", |                 'url': result['url'], | ||||||
|                 "url": url, |                 'title': result['name'], | ||||||
|                 "title": title, |                 'content': html_to_text(result.get('description') or ''), | ||||||
|                 "content": content, |                 'author': result.get('account', {}).get('displayName'), | ||||||
|                 "publishedDate": publishedDate, |                 'length': minute_to_hm(result.get('duration')), | ||||||
|                 "iframe_src": sanitized_url + res["embedPath"], |                 'template': 'videos.html', | ||||||
|                 "thumbnail": thumbnail, |                 'publishedDate': parse(result['publishedAt']), | ||||||
|  |                 'iframe_src': result.get('embedUrl'), | ||||||
|  |                 'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'), | ||||||
|  |                 'metadata': ' | '.join(metadata), | ||||||
|             } |             } | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     # return results |  | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _fetch_supported_languages(resp): | def fetch_traits(engine_traits: EngineTraits): | ||||||
|     videolanguages = resp.json() |     """Fetch languages from peertube's search-index source code. | ||||||
|     peertube_languages = list(videolanguages.keys()) | 
 | ||||||
|     return peertube_languages |     See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_ | ||||||
|  | 
 | ||||||
|  |     .. _8ed5c729 - Refactor and redesign client: | ||||||
|  |        https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729 | ||||||
|  |     .. _videoLanguages: | ||||||
|  |        https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     resp = network.get( | ||||||
|  |         'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue', | ||||||
|  |         # the response from search-index repository is very slow | ||||||
|  |         timeout=60, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     if not resp.ok: | ||||||
|  |         print("ERROR: response from peertube is not OK.") | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) | ||||||
|  |     if not js_lang: | ||||||
|  |         print("ERROR: can't determine languages from peertube") | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)): | ||||||
|  |         try: | ||||||
|  |             eng_tag = lang.group(1) | ||||||
|  |             if eng_tag == 'oc': | ||||||
|  |                 # Occitanis not known by babel, its closest relative is Catalan | ||||||
|  |                 # but 'ca' is already in the list of engine_traits.languages --> | ||||||
|  |                 # 'oc' will be ignored. | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             sxng_tag = language_tag(babel.Locale.parse(eng_tag)) | ||||||
|  | 
 | ||||||
|  |         except babel.UnknownLocaleError: | ||||||
|  |             print("ERROR: %s is unknown by babel" % eng_tag) | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         conflict = engine_traits.languages.get(sxng_tag) | ||||||
|  |         if conflict: | ||||||
|  |             if conflict != eng_tag: | ||||||
|  |                 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) | ||||||
|  |             continue | ||||||
|  |         engine_traits.languages[sxng_tag] = eng_tag | ||||||
|  | 
 | ||||||
|  |     engine_traits.languages['zh_Hans'] = 'zh' | ||||||
|  |     engine_traits.languages['zh_Hant'] = 'zh' | ||||||
|  | |||||||
| @ -1758,9 +1758,8 @@ engines: | |||||||
|     engine: peertube |     engine: peertube | ||||||
|     shortcut: ptb |     shortcut: ptb | ||||||
|     paging: true |     paging: true | ||||||
|     # https://instances.joinpeertube.org/instances |     # alternatives see: https://instances.joinpeertube.org/instances | ||||||
|     base_url: https://peertube.biz/ |     # base_url: https://tube.4aem.com | ||||||
|     # base_url: https://tube.tardis.world/ |  | ||||||
|     categories: videos |     categories: videos | ||||||
|     disabled: true |     disabled: true | ||||||
|     timeout: 6.0 |     timeout: 6.0 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user