mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-26 08:12:30 -04:00 
			
		
		
		
	Tube Archivist [1] is a self-hosted project which archives youtube videos on your own local server. This engine connects with Tube Archivist's search API to allow searching from SearXNG into your own hosted videos. [1] https://www.tubearchivist.com/ Signed-off-by: Robert M. Clabough <robert@claobugh.tech> Co-authored-by: Bnyro <bnyro@tutanota.com> Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
		
			
				
	
	
		
			188 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			188 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| """`Tube Archivist`_ - *Your self hosted YouTube media server.*
 | |
| 
 | |
| .. _Tube Archivist: https://www.tubearchivist.com
 | |
| 
 | |
| This engine connects with a self-hosted instance of `Tube Archivist`_ to allow
 | |
| searching for your hosted videos.
 | |
| 
 | |
| `Tube Archivist`_ (TA) requires authentication for all image loads via cookie
 | |
| authentication.  What this means is that by default, SearXNG will have no way to
 | |
| pull images from TA (as there is no way to pass cookies in a URL string only).
 | |
| 
 | |
| In the meantime while work is done on the TA side, this can be worked around by
 | |
| bypassing auth for images in TA by altering the default TA nginx file.
 | |
| 
 | |
| This is located in the main tubearchivist docker container at::
 | |
| 
 | |
|   /etc/nginx/sites-available/default
 | |
| 
 | |
| It is **strongly** recommended first setting up the intial connection and
 | |
| verying searching works first with broken images, and then attempting this
 | |
| change.  This will limit any debugging to only images, rather than
 | |
| tokens/networking.
 | |
| 
 | |
| Steps to enable **unauthenticated** metadata access for channels and videos:
 | |
| 
 | |
| #. Perform any backups of TA before editing core configurations.
 | |
| 
 | |
| #. Copy the contents of the file ``/etc/nginx/sites-available/default`` in the
 | |
|    TA docker container
 | |
| 
 | |
| #. Edit ``location /cache/videos`` and ``location /cache/channels``.  Comment
 | |
|    out the line ``auth_request /api/ping/;`` to ``# auth_request /api/ping/;``.
 | |
| 
 | |
| #. Save the file to wherever you normally store your docker configuration.
 | |
| 
 | |
| #. Mount this new configuration over the default configuration.  With ``docker
 | |
|    run``, this would be::
 | |
| 
 | |
|      -v ./your-new-config.yml:/etc/nginx/sites-available/default
 | |
| 
 | |
|    With ``docker compose``, this would be::
 | |
| 
 | |
|      - "./your-new-config.yml:/etc/nginx/sites-available/default:ro"
 | |
| 
 | |
| #. Start the TA container.
 | |
| 
 | |
| After these steps, double check that TA works as normal (nothing should be
 | |
| different on the TA side).  Searching again should now show images.
 | |
| 
 | |
| 
 | |
| Configuration
 | |
| =============
 | |
| 
 | |
| The engine has the following required settings:
 | |
| 
 | |
| - :py:obj:`base_url`
 | |
| - :py:obj:`ta_token`
 | |
| 
 | |
| Optional settings:
 | |
| 
 | |
| - :py:obj:`ta_link_to_mp4`
 | |
| 
 | |
| .. code:: yaml
 | |
| 
 | |
|   - name: tubearchivist
 | |
|     engine: tubearchivist
 | |
|     shortcut: tuba
 | |
|     base_url:
 | |
|     ta_token:
 | |
|     ta_link_to_mp4: true
 | |
| 
 | |
| Implementations
 | |
| ===============
 | |
| """
 | |
| 
 | |
| from __future__ import annotations
 | |
| 
 | |
| from urllib.parse import urlencode
 | |
| from dateutil.parser import parse
 | |
| from searx.utils import html_to_text, humanize_number
 | |
| from searx.result_types import EngineResults
 | |
| 
 | |
| about = {
 | |
|     # pylint: disable=line-too-long
 | |
|     "website": 'https://www.tubearchivist.com',
 | |
|     "official_api_documentation": 'https://docs.tubearchivist.com/api/introduction/',
 | |
|     "use_official_api": True,
 | |
|     "require_api_key": False,
 | |
|     "results": 'JSON',
 | |
| }
 | |
| 
 | |
| # engine dependent config
 | |
| categories = ["videos"]
 | |
| paging = True
 | |
| 
 | |
| base_url = ""
 | |
| """Base URL of the Tube Archivist instance.  Fill this in with your own
 | |
| Tube Archivist URL (``http://your-instance:port``)."""
 | |
| 
 | |
| ta_token: str = ""
 | |
| """The API key to use for Authorization_ header.  Can be found under:
 | |
| 
 | |
|   :menuselection:`Settings --> User --> Admin Interface`.
 | |
| 
 | |
| .. _Authorization: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Authorization
 | |
| """
 | |
| 
 | |
| ta_link_to_mp4: bool = False
 | |
| """Optional, if true SearXNG will link directly to the mp4 of the video to play
 | |
| in the browser.  The default behavior is to link into TubeArchivist's interface
 | |
| directly."""
 | |
| 
 | |
| 
 | |
| def absolute_url(relative_url):
 | |
|     return f'{base_url.rstrip("/")}{relative_url}'
 | |
| 
 | |
| 
 | |
| def init(_):
 | |
|     if not base_url:
 | |
|         raise ValueError('tubearchivist engine: base_url is unset')
 | |
|     if not ta_token:
 | |
|         raise ValueError('tubearchivist engine: ta_token is unset')
 | |
| 
 | |
| 
 | |
| def request(query, params):
 | |
|     if not query:
 | |
|         return False
 | |
| 
 | |
|     args = {'query': query}
 | |
|     params['url'] = f"{base_url.rstrip('/')}/api/search?{urlencode(args)}"
 | |
|     params['headers']['Authorization'] = f'Token {ta_token}'
 | |
| 
 | |
|     return params
 | |
| 
 | |
| 
 | |
| def response(resp) -> EngineResults:
 | |
|     results = EngineResults()
 | |
|     video_response(resp, results)
 | |
|     return results
 | |
| 
 | |
| 
 | |
| def video_response(resp, results: EngineResults) -> None:
 | |
|     """Parse video response from Tubearchivist instances."""
 | |
| 
 | |
|     json_data = resp.json()
 | |
| 
 | |
|     if 'results' not in json_data:
 | |
|         return
 | |
| 
 | |
|     for channel_result in json_data['results']['channel_results']:
 | |
|         channel_url = absolute_url(f'/channel/{channel_result["channel_id"]}')
 | |
| 
 | |
|         res = results.types.MainResult(
 | |
|             url=channel_url,
 | |
|             title=channel_result['channel_name'],
 | |
|             content=html_to_text(channel_result['channel_description']),
 | |
|             author=channel_result['channel_name'],
 | |
|             views=humanize_number(channel_result['channel_subs']),
 | |
|             thumbnail=f'{absolute_url(channel_result["channel_thumb_url"])}?auth={ta_token}',
 | |
|         )
 | |
| 
 | |
|         results.add(result=res)
 | |
| 
 | |
|     for video_result in json_data['results']['video_results']:
 | |
|         metadata = list(filter(None, [video_result['channel']['channel_name'], *video_result.get('tags', [])]))[:5]
 | |
|         if ta_link_to_mp4:
 | |
|             url = f'{base_url.rstrip("/")}{video_result["media_url"]}'
 | |
|         else:
 | |
|             url = f'{base_url.rstrip("/")}/?videoId={video_result["youtube_id"]}'
 | |
| 
 | |
|         # a type for the video.html template is not yet implemented
 | |
|         # --> using LegacyResult
 | |
| 
 | |
|         kwargs = {
 | |
|             'template': 'videos.html',
 | |
|             'url': url,
 | |
|             'title': video_result['title'],
 | |
|             'content': html_to_text(video_result['description']),
 | |
|             'author': video_result['channel']['channel_name'],
 | |
|             'length': video_result['player']['duration_str'],
 | |
|             'views': humanize_number(video_result['stats']['view_count']),
 | |
|             'publishedDate': parse(video_result['published']),
 | |
|             'thumbnail': f'{absolute_url(video_result["vid_thumb_url"])}?auth={ta_token}',
 | |
|             'metadata': ' | '.join(metadata),
 | |
|         }
 | |
|         results.add(results.types.LegacyResult(**kwargs))
 |