mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	This patch brings two major changes:
- ``Result.filter_urls(..)`` to pass a filter function for URL fields
- The ``enabled_plugins:`` section in SearXNG's settings do no longer exists.
To understand plugin development compile documentation:
    $ make docs.clean docs.live
and read http://0.0.0.0:8000/dev/plugins/development.html
There is no longer a distinction between built-in and external plugin, all
plugins are registered via the settings in the ``plugins:`` section.
In SearXNG, plugins can be registered via a fully qualified class name.  A
configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate
it by default / *opt-in* or *opt-out* from user's point of view.
built-in plugins
================
The built-in plugins are all located in the namespace `searx.plugins`.
.. code:: yaml
    plugins:
      searx.plugins.calculator.SXNGPlugin:
        active: true
      searx.plugins.hash_plugin.SXNGPlugin:
        active: true
      searx.plugins.self_info.SXNGPlugin:
        active: true
      searx.plugins.tracker_url_remover.SXNGPlugin:
        active: true
      searx.plugins.unit_converter.SXNGPlugin:
        active: true
      searx.plugins.ahmia_filter.SXNGPlugin:
        active: true
      searx.plugins.hostnames.SXNGPlugin:
        active: true
      searx.plugins.oa_doi_rewrite.SXNGPlugin:
        active: false
      searx.plugins.tor_check.SXNGPlugin:
        active: false
external plugins
================
SearXNG supports *external plugins* / there is no need to install one, SearXNG
runs out of the box.
- Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/
To get a developer installation in a SearXNG developer environment:
.. code:: sh
   $ git clone git@github.com:return42/tgwf-searx-plugins.git
   $ ./manage pyenv.cmd python -m \
         pip install -e tgwf-searx-plugins
To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to
the ``plugins:``:
.. code:: yaml
    plugins:
      # ...
      only_show_green_results.SXNGPlugin:
        active: false
Result.filter_urls(..)
======================
The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields.
In the following example, the filter function ``my_url_filter``:
.. code:: python
   def my_url_filter(result, field_name, url_src) -> bool | str:
       if "google" in url_src:
           return False              # remove URL field from result
       if "facebook" in url_src:
           new_url = url_src.replace("facebook", "fb-dummy")
           return new_url            # return modified URL
       return True                   # leave URL in field unchanged
is applied to all URL fields in the :py:obj:`Plugin.on_result` hook:
.. code:: python
   class MyUrlFilter(Plugin):
       ...
       def on_result(self, request, search, result) -> bool:
           result.filter_urls(my_url_filter)
           return True
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
		
	
			
		
			
				
	
	
		
			305 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			305 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
						|
# pylint: disable=too-few-public-methods,missing-module-docstring
 | 
						|
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
__all__ = ["PluginInfo", "Plugin", "PluginCfg", "PluginStorage"]
 | 
						|
 | 
						|
import abc
 | 
						|
import importlib
 | 
						|
import inspect
 | 
						|
import logging
 | 
						|
import re
 | 
						|
import typing
 | 
						|
 | 
						|
from dataclasses import dataclass, field
 | 
						|
 | 
						|
from searx.extended_types import SXNG_Request
 | 
						|
from searx.result_types import Result
 | 
						|
 | 
						|
if typing.TYPE_CHECKING:
 | 
						|
    from searx.search import SearchWithPlugins
 | 
						|
    import flask
 | 
						|
 | 
						|
log: logging.Logger = logging.getLogger("searx.plugins")
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class PluginInfo:
 | 
						|
    """Object that holds informations about a *plugin*, these infos are shown to
 | 
						|
    the user in the Preferences menu.
 | 
						|
 | 
						|
    To be able to translate the information into other languages, the text must
 | 
						|
    be written in English and translated with :py:obj:`flask_babel.gettext`.
 | 
						|
    """
 | 
						|
 | 
						|
    id: str
 | 
						|
    """The ID-selector in HTML/CSS `#<id>`."""
 | 
						|
 | 
						|
    name: str
 | 
						|
    """Name of the *plugin*."""
 | 
						|
 | 
						|
    description: str
 | 
						|
    """Short description of the *answerer*."""
 | 
						|
 | 
						|
    preference_section: typing.Literal["general", "ui", "privacy", "query"] | None = "general"
 | 
						|
    """Section (tab/group) in the preferences where this plugin is shown to the
 | 
						|
    user.
 | 
						|
 | 
						|
    The value ``query`` is reserved for plugins that are activated via a
 | 
						|
    *keyword* as part of a search query, see:
 | 
						|
 | 
						|
    - :py:obj:`PluginInfo.examples`
 | 
						|
    - :py:obj:`Plugin.keywords`
 | 
						|
 | 
						|
    Those plugins are shown in the preferences in tab *Special Queries*.
 | 
						|
    """
 | 
						|
 | 
						|
    examples: list[str] = field(default_factory=list)
 | 
						|
    """List of short examples of the usage / of query terms."""
 | 
						|
 | 
						|
    keywords: list[str] = field(default_factory=list)
 | 
						|
    """See :py:obj:`Plugin.keywords`"""
 | 
						|
 | 
						|
 | 
						|
ID_REGXP = re.compile("[a-z][a-z0-9].*")
 | 
						|
 | 
						|
 | 
						|
class Plugin(abc.ABC):
 | 
						|
    """Abstract base class of all Plugins."""
 | 
						|
 | 
						|
    id: str = ""
 | 
						|
    """The ID (suffix) in the HTML form."""
 | 
						|
 | 
						|
    active: typing.ClassVar[bool]
 | 
						|
    """Plugin is enabled/disabled by default (:py:obj:`PluginCfg.active`)."""
 | 
						|
 | 
						|
    keywords: list[str] = []
 | 
						|
    """Keywords in the search query that activate the plugin.  The *keyword* is
 | 
						|
    the first word in a search query.  If a plugin should be executed regardless
 | 
						|
    of the search query, the list of keywords should be empty (which is also the
 | 
						|
    default in the base class for Plugins)."""
 | 
						|
 | 
						|
    log: logging.Logger
 | 
						|
    """A logger object, is automatically initialized when calling the
 | 
						|
    constructor (if not already set in the subclass)."""
 | 
						|
 | 
						|
    info: PluginInfo
 | 
						|
    """Informations about the *plugin*, see :py:obj:`PluginInfo`."""
 | 
						|
 | 
						|
    fqn: str = ""
 | 
						|
 | 
						|
    def __init__(self, plg_cfg: PluginCfg) -> None:
 | 
						|
        super().__init__()
 | 
						|
        if not self.fqn:
 | 
						|
            self.fqn = self.__class__.__mro__[0].__module__
 | 
						|
 | 
						|
        # names from the configuration
 | 
						|
        for n, v in plg_cfg.__dict__.items():
 | 
						|
            setattr(self, n, v)
 | 
						|
 | 
						|
        # names that must be set by the plugin implementation
 | 
						|
        for attr in [
 | 
						|
            "id",
 | 
						|
        ]:
 | 
						|
            if getattr(self, attr, None) is None:
 | 
						|
                raise NotImplementedError(f"plugin {self} is missing attribute {attr}")
 | 
						|
 | 
						|
        if not ID_REGXP.match(self.id):
 | 
						|
            raise ValueError(f"plugin ID {self.id} contains invalid character (use lowercase ASCII)")
 | 
						|
 | 
						|
        if not getattr(self, "log", None):
 | 
						|
            pkg_name = inspect.getmodule(self.__class__).__package__  # type: ignore
 | 
						|
            self.log = logging.getLogger(f"{pkg_name}.{self.id}")
 | 
						|
 | 
						|
    def __hash__(self) -> int:
 | 
						|
        """The hash value is used in :py:obj:`set`, for example, when an object
 | 
						|
        is added to the set.  The hash value is also used in other contexts,
 | 
						|
        e.g. when checking for equality to identify identical plugins from
 | 
						|
        different sources (name collisions)."""
 | 
						|
 | 
						|
        return id(self)
 | 
						|
 | 
						|
    def __eq__(self, other):
 | 
						|
        """py:obj:`Plugin` objects are equal if the hash values of the two
 | 
						|
        objects are equal."""
 | 
						|
 | 
						|
        return hash(self) == hash(other)
 | 
						|
 | 
						|
    def init(self, app: "flask.Flask") -> bool:  # pylint: disable=unused-argument
 | 
						|
        """Initialization of the plugin, the return value decides whether this
 | 
						|
        plugin is active or not.  Initialization only takes place once, at the
 | 
						|
        time the WEB application is set up.  The base methode always returns
 | 
						|
        ``True``, the methode can be overwritten in the inheritances,
 | 
						|
 | 
						|
        - ``True`` plugin is active
 | 
						|
        - ``False`` plugin is inactive
 | 
						|
        """
 | 
						|
        return True
 | 
						|
 | 
						|
    # pylint: disable=unused-argument
 | 
						|
    def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
 | 
						|
        """Runs BEFORE the search request and returns a boolean:
 | 
						|
 | 
						|
        - ``True`` to continue the search
 | 
						|
        - ``False`` to stop the search
 | 
						|
        """
 | 
						|
        return True
 | 
						|
 | 
						|
    def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
 | 
						|
        """Runs for each result of each engine and returns a boolean:
 | 
						|
 | 
						|
        - ``True`` to keep the result
 | 
						|
        - ``False`` to remove the result from the result list
 | 
						|
 | 
						|
        The ``result`` can be modified to the needs.
 | 
						|
 | 
						|
        .. hint::
 | 
						|
 | 
						|
           If :py:obj:`Result.url <searx.result_types._base.Result.url>` is modified,
 | 
						|
           :py:obj:`Result.parsed_url <searx.result_types._base.Result.parsed_url>` must
 | 
						|
           be changed accordingly:
 | 
						|
 | 
						|
           .. code:: python
 | 
						|
 | 
						|
              result["parsed_url"] = urlparse(result["url"])
 | 
						|
        """
 | 
						|
        return True
 | 
						|
 | 
						|
    def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
 | 
						|
        """Runs AFTER the search request.  Can return a list of
 | 
						|
        :py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
 | 
						|
        final result list."""
 | 
						|
        return
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class PluginCfg:
 | 
						|
    """Settings of a plugin.
 | 
						|
 | 
						|
    .. code:: yaml
 | 
						|
 | 
						|
       mypackage.mymodule.MyPlugin:
 | 
						|
         active: true
 | 
						|
    """
 | 
						|
 | 
						|
    active: bool = False
 | 
						|
    """Plugin is active by default and the user can *opt-out* in the preferences."""
 | 
						|
 | 
						|
 | 
						|
class PluginStorage:
 | 
						|
    """A storage for managing the *plugins* of SearXNG."""
 | 
						|
 | 
						|
    plugin_list: set[Plugin]
 | 
						|
    """The list of :py:obj:`Plugins` in this storage."""
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        self.plugin_list = set()
 | 
						|
 | 
						|
    def __iter__(self):
 | 
						|
        yield from self.plugin_list
 | 
						|
 | 
						|
    def __len__(self):
 | 
						|
        return len(self.plugin_list)
 | 
						|
 | 
						|
    @property
 | 
						|
    def info(self) -> list[PluginInfo]:
 | 
						|
 | 
						|
        return [p.info for p in self.plugin_list]
 | 
						|
 | 
						|
    def load_settings(self, cfg: dict[str, dict]):
 | 
						|
        """Load plugins configured in SearXNG's settings :ref:`settings
 | 
						|
        plugins`."""
 | 
						|
 | 
						|
        for fqn, plg_settings in cfg.items():
 | 
						|
            cls = None
 | 
						|
            mod_name, cls_name = fqn.rsplit('.', 1)
 | 
						|
            try:
 | 
						|
                mod = importlib.import_module(mod_name)
 | 
						|
                cls = getattr(mod, cls_name, None)
 | 
						|
            except Exception as exc:  # pylint: disable=broad-exception-caught
 | 
						|
                log.exception(exc)
 | 
						|
 | 
						|
            if cls is None:
 | 
						|
                msg = f"plugin {fqn} is not implemented"
 | 
						|
                raise ValueError(msg)
 | 
						|
            plg = cls(PluginCfg(**plg_settings))
 | 
						|
            self.register(plg)
 | 
						|
 | 
						|
    def register(self, plugin: Plugin):
 | 
						|
        """Register a :py:obj:`Plugin`.  In case of name collision (if two
 | 
						|
        plugins have same ID) a :py:obj:`KeyError` exception is raised.
 | 
						|
        """
 | 
						|
 | 
						|
        if plugin in [p.id for p in self.plugin_list]:
 | 
						|
            msg = f"name collision '{plugin.id}'"
 | 
						|
            plugin.log.critical(msg)
 | 
						|
            raise KeyError(msg)
 | 
						|
 | 
						|
        self.plugin_list.add(plugin)
 | 
						|
        plugin.log.debug("plugin has been loaded")
 | 
						|
 | 
						|
    def init(self, app: "flask.Flask") -> None:
 | 
						|
        """Calls the method :py:obj:`Plugin.init` of each plugin in this
 | 
						|
        storage.  Depending on its return value, the plugin is removed from
 | 
						|
        *this* storage or not."""
 | 
						|
 | 
						|
        for plg in self.plugin_list.copy():
 | 
						|
            if not plg.init(app):
 | 
						|
                self.plugin_list.remove(plg)
 | 
						|
 | 
						|
    def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
 | 
						|
 | 
						|
        ret = True
 | 
						|
        for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
 | 
						|
            try:
 | 
						|
                ret = bool(plugin.pre_search(request=request, search=search))
 | 
						|
            except Exception:  # pylint: disable=broad-except
 | 
						|
                plugin.log.exception("Exception while calling pre_search")
 | 
						|
                continue
 | 
						|
            if not ret:
 | 
						|
                # skip this search on the first False from a plugin
 | 
						|
                break
 | 
						|
        return ret
 | 
						|
 | 
						|
    def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
 | 
						|
 | 
						|
        ret = True
 | 
						|
        for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
 | 
						|
            try:
 | 
						|
                ret = bool(plugin.on_result(request=request, search=search, result=result))
 | 
						|
            except Exception:  # pylint: disable=broad-except
 | 
						|
                plugin.log.exception("Exception while calling on_result")
 | 
						|
                continue
 | 
						|
            if not ret:
 | 
						|
                # ignore this result item on the first False from a plugin
 | 
						|
                break
 | 
						|
 | 
						|
        return ret
 | 
						|
 | 
						|
    def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None:
 | 
						|
        """Extend :py:obj:`search.result_container
 | 
						|
        <searx.results.ResultContainer`> with result items from plugins listed
 | 
						|
        in :py:obj:`search.user_plugins <SearchWithPlugins.user_plugins>`.
 | 
						|
        """
 | 
						|
 | 
						|
        keyword = None
 | 
						|
        for keyword in search.search_query.query.split():
 | 
						|
            if keyword:
 | 
						|
                break
 | 
						|
 | 
						|
        for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
 | 
						|
 | 
						|
            if plugin.keywords:
 | 
						|
                # plugin with keywords: skip plugin if no keyword match
 | 
						|
                if keyword and keyword not in plugin.keywords:
 | 
						|
                    continue
 | 
						|
            try:
 | 
						|
                results = plugin.post_search(request=request, search=search) or []
 | 
						|
            except Exception:  # pylint: disable=broad-except
 | 
						|
                plugin.log.exception("Exception while calling post_search")
 | 
						|
                continue
 | 
						|
 | 
						|
            # In case of *plugins* prefix ``plugin:`` is set, see searx.result_types.Result
 | 
						|
            search.result_container.extend(f"plugin: {plugin.id}", results)
 |