mirror of
https://github.com/searxng/searxng.git
synced 2025-07-08 02:34:21 -04:00
[feat] engines: add Tube Archivist engine (#4889)
Tube Archivist [1] is a self-hosted project which archives youtube videos on your own local server. This engine connects with Tube Archivist's search API to allow searching from SearXNG into your own hosted videos. [1] https://www.tubearchivist.com/ Signed-off-by: Robert M. Clabough <robert@claobugh.tech> Co-authored-by: Bnyro <bnyro@tutanota.com> Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
cc61d0833c
commit
ec892d3836
@ -177,3 +177,4 @@ features or generally made searx better:
|
|||||||
- Daniel Mowitz `<https://daniel.mowitz.rocks>`
|
- Daniel Mowitz `<https://daniel.mowitz.rocks>`
|
||||||
- `Bearz314 <https://github.com/bearz314>`_
|
- `Bearz314 <https://github.com/bearz314>`_
|
||||||
- Tommaso Colella `<https://github.com/gioleppe>`
|
- Tommaso Colella `<https://github.com/gioleppe>`
|
||||||
|
- @AgentScrubbles
|
||||||
|
8
docs/dev/engines/online/tubearchivist.rst
Normal file
8
docs/dev/engines/online/tubearchivist.rst
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.. _tubearchivist engine:
|
||||||
|
|
||||||
|
==============
|
||||||
|
Tube Archivist
|
||||||
|
==============
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.tubearchivist
|
||||||
|
:members:
|
187
searx/engines/tubearchivist.py
Normal file
187
searx/engines/tubearchivist.py
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""`Tube Archivist`_ - *Your self hosted YouTube media server.*
|
||||||
|
|
||||||
|
.. _Tube Archivist: https://www.tubearchivist.com
|
||||||
|
|
||||||
|
This engine connects with a self-hosted instance of `Tube Archivist`_ to allow
|
||||||
|
searching for your hosted videos.
|
||||||
|
|
||||||
|
`Tube Archivist`_ (TA) requires authentication for all image loads via cookie
|
||||||
|
authentication. What this means is that by default, SearXNG will have no way to
|
||||||
|
pull images from TA (as there is no way to pass cookies in a URL string only).
|
||||||
|
|
||||||
|
In the meantime while work is done on the TA side, this can be worked around by
|
||||||
|
bypassing auth for images in TA by altering the default TA nginx file.
|
||||||
|
|
||||||
|
This is located in the main tubearchivist docker container at::
|
||||||
|
|
||||||
|
/etc/nginx/sites-available/default
|
||||||
|
|
||||||
|
It is **strongly** recommended first setting up the intial connection and
|
||||||
|
verying searching works first with broken images, and then attempting this
|
||||||
|
change. This will limit any debugging to only images, rather than
|
||||||
|
tokens/networking.
|
||||||
|
|
||||||
|
Steps to enable **unauthenticated** metadata access for channels and videos:
|
||||||
|
|
||||||
|
#. Perform any backups of TA before editing core configurations.
|
||||||
|
|
||||||
|
#. Copy the contents of the file ``/etc/nginx/sites-available/default`` in the
|
||||||
|
TA docker container
|
||||||
|
|
||||||
|
#. Edit ``location /cache/videos`` and ``location /cache/channels``. Comment
|
||||||
|
out the line ``auth_request /api/ping/;`` to ``# auth_request /api/ping/;``.
|
||||||
|
|
||||||
|
#. Save the file to wherever you normally store your docker configuration.
|
||||||
|
|
||||||
|
#. Mount this new configuration over the default configuration. With ``docker
|
||||||
|
run``, this would be::
|
||||||
|
|
||||||
|
-v ./your-new-config.yml:/etc/nginx/sites-available/default
|
||||||
|
|
||||||
|
With ``docker compose``, this would be::
|
||||||
|
|
||||||
|
- "./your-new-config.yml:/etc/nginx/sites-available/default:ro"
|
||||||
|
|
||||||
|
#. Start the TA container.
|
||||||
|
|
||||||
|
After these steps, double check that TA works as normal (nothing should be
|
||||||
|
different on the TA side). Searching again should now show images.
|
||||||
|
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
The engine has the following required settings:
|
||||||
|
|
||||||
|
- :py:obj:`base_url`
|
||||||
|
- :py:obj:`ta_token`
|
||||||
|
|
||||||
|
Optional settings:
|
||||||
|
|
||||||
|
- :py:obj:`ta_link_to_mp4`
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: tubearchivist
|
||||||
|
engine: tubearchivist
|
||||||
|
shortcut: tuba
|
||||||
|
base_url:
|
||||||
|
ta_token:
|
||||||
|
ta_link_to_mp4: true
|
||||||
|
|
||||||
|
Implementations
|
||||||
|
===============
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
from dateutil.parser import parse
|
||||||
|
from searx.utils import html_to_text, humanize_number
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
|
about = {
|
||||||
|
# pylint: disable=line-too-long
|
||||||
|
"website": 'https://www.tubearchivist.com',
|
||||||
|
"official_api_documentation": 'https://docs.tubearchivist.com/api/introduction/',
|
||||||
|
"use_official_api": True,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": 'JSON',
|
||||||
|
}
|
||||||
|
|
||||||
|
# engine dependent config
|
||||||
|
categories = ["videos"]
|
||||||
|
paging = True
|
||||||
|
|
||||||
|
base_url = ""
|
||||||
|
"""Base URL of the Tube Archivist instance. Fill this in with your own
|
||||||
|
Tube Archivist URL (``http://your-instance:port``)."""
|
||||||
|
|
||||||
|
ta_token: str = ""
|
||||||
|
"""The API key to use for Authorization_ header. Can be found under:
|
||||||
|
|
||||||
|
:menuselection:`Settings --> User --> Admin Interface`.
|
||||||
|
|
||||||
|
.. _Authorization: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Authorization
|
||||||
|
"""
|
||||||
|
|
||||||
|
ta_link_to_mp4: bool = False
|
||||||
|
"""Optional, if true SearXNG will link directly to the mp4 of the video to play
|
||||||
|
in the browser. The default behavior is to link into TubeArchivist's interface
|
||||||
|
directly."""
|
||||||
|
|
||||||
|
|
||||||
|
def absolute_url(relative_url):
|
||||||
|
return f'{base_url.rstrip("/")}{relative_url}'
|
||||||
|
|
||||||
|
|
||||||
|
def init(_):
|
||||||
|
if not base_url:
|
||||||
|
raise ValueError('tubearchivist engine: base_url is unset')
|
||||||
|
if not ta_token:
|
||||||
|
raise ValueError('tubearchivist engine: ta_token is unset')
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
if not query:
|
||||||
|
return False
|
||||||
|
|
||||||
|
args = {'query': query}
|
||||||
|
params['url'] = f"{base_url.rstrip('/')}/api/search?{urlencode(args)}"
|
||||||
|
params['headers']['Authorization'] = f'Token {ta_token}'
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp) -> EngineResults:
|
||||||
|
results = EngineResults()
|
||||||
|
video_response(resp, results)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def video_response(resp, results: EngineResults) -> None:
|
||||||
|
"""Parse video response from Tubearchivist instances."""
|
||||||
|
|
||||||
|
json_data = resp.json()
|
||||||
|
|
||||||
|
if 'results' not in json_data:
|
||||||
|
return
|
||||||
|
|
||||||
|
for channel_result in json_data['results']['channel_results']:
|
||||||
|
channel_url = absolute_url(f'/channel/{channel_result["channel_id"]}')
|
||||||
|
|
||||||
|
res = results.types.MainResult(
|
||||||
|
url=channel_url,
|
||||||
|
title=channel_result['channel_name'],
|
||||||
|
content=html_to_text(channel_result['channel_description']),
|
||||||
|
author=channel_result['channel_name'],
|
||||||
|
views=humanize_number(channel_result['channel_subs']),
|
||||||
|
thumbnail=f'{absolute_url(channel_result["channel_thumb_url"])}?auth={ta_token}',
|
||||||
|
)
|
||||||
|
|
||||||
|
results.add(result=res)
|
||||||
|
|
||||||
|
for video_result in json_data['results']['video_results']:
|
||||||
|
metadata = list(filter(None, [video_result['channel']['channel_name'], *video_result.get('tags', [])]))[:5]
|
||||||
|
if ta_link_to_mp4:
|
||||||
|
url = f'{base_url.rstrip("/")}{video_result["media_url"]}'
|
||||||
|
else:
|
||||||
|
url = f'{base_url.rstrip("/")}/?videoId={video_result["youtube_id"]}'
|
||||||
|
|
||||||
|
# a type for the video.html template is not yet implemented
|
||||||
|
# --> using LegacyResult
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
'template': 'videos.html',
|
||||||
|
'url': url,
|
||||||
|
'title': video_result['title'],
|
||||||
|
'content': html_to_text(video_result['description']),
|
||||||
|
'author': video_result['channel']['channel_name'],
|
||||||
|
'length': video_result['player']['duration_str'],
|
||||||
|
'views': humanize_number(video_result['stats']['view_count']),
|
||||||
|
'publishedDate': parse(video_result['published']),
|
||||||
|
'thumbnail': f'{absolute_url(video_result["vid_thumb_url"])}?auth={ta_token}',
|
||||||
|
'metadata': ' | '.join(metadata),
|
||||||
|
}
|
||||||
|
results.add(results.types.LegacyResult(**kwargs))
|
@ -2036,6 +2036,16 @@ engines:
|
|||||||
enable_http: true
|
enable_http: true
|
||||||
shortcut: tch
|
shortcut: tch
|
||||||
|
|
||||||
|
# TubeArchivist is a self-hosted Youtube archivist software.
|
||||||
|
# https://docs.searxng.org/dev/engines/online/tubearchivist.html
|
||||||
|
#
|
||||||
|
# - name: tubearchivist
|
||||||
|
# engine: tubearchivist
|
||||||
|
# shortcut: tuba
|
||||||
|
# base_url:
|
||||||
|
# ta_token:
|
||||||
|
# ta_link_to_mp4: false
|
||||||
|
|
||||||
# torznab engine lets you query any torznab compatible indexer. Using this
|
# torznab engine lets you query any torznab compatible indexer. Using this
|
||||||
# engine in combination with Jackett opens the possibility to query a lot of
|
# engine in combination with Jackett opens the possibility to query a lot of
|
||||||
# public and private indexers directly from SearXNG. More details at:
|
# public and private indexers directly from SearXNG. More details at:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user