[mod] typification of SearXNG: add new result type Paper

This patch adds a new result type: Paper - Python class: searx/result_types/paper.py - Jinja template: searx/templates/simple/result_templates/paper.html - CSS (less) client/simple/src/less/result_types/paper.less Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2025-09-29 15:30:51 -04:00 · 2025-09-10 16:10:42 +02:00 · 2025-09-10 16:10:42 +02:00 · 7eedd44f5f
commit 7eedd44f5f
parent 57ef342ad1
10 changed files with 318 additions and 46 deletions
--- a/client/simple/src/less/result_types/paper.less
+++ b/client/simple/src/less/result_types/paper.less
@ -0,0 +1,72 @@
 // SPDX-License-Identifier: AGPL-3.0-or-later
 /*
  Layout of the Paper result class
 */
 .result-paper {
  .attributes {
    display: table;
    border-spacing: 0.125rem;
    div {
      display: table-row;
      span {
        font-size: 0.9rem;
        margin-top: 0.25rem;
        display: table-cell;
        time {
          font-size: 0.9rem;
        }
      }
      span:first-child {
        color: var(--color-base-font);
        min-width: 10rem;
      }
      span:nth-child(2) {
        color: var(--color-result-publishdate-font);
      }
    }
  }
  .content {
    margin-top: 0.25rem;
  }
  .comments {
    font-size: 0.9rem;
    margin: 0.25rem 0 0 0;
    padding: 0;
    word-wrap: break-word;
    line-height: 1.24;
    font-style: italic;
  }
 }
@media screen and (max-width: @phone) {
  .result-paper {
    .attributes {
      display: block;
      div {
        display: block;
        span {
          display: inline;
        }
        span:first-child {
          font-weight: bold;
        }
        span:nth-child(2) {
          .ltr-margin-left(0.5rem);
        }
      }
    }
  }
 }
--- a/client/simple/src/less/style.less
+++ b/client/simple/src/less/style.less
@ -309,11 +309,11 @@ article[data-vim-selected].category-social {
  }
 }
 .result-paper,
 .result-packages {
  .attributes {
    display: table;
    border-spacing: 0.125rem;
    margin-top: 0.3rem;
    div {
      display: table-row;
@ -353,12 +353,6 @@ article[data-vim-selected].category-social {
  }
 }
 .result-packages {
  .attributes {
    margin-top: 0.3rem;
  }
 }
 .template_group_images {
  display: flex;
  flex-wrap: wrap;
@ -1118,7 +1112,6 @@ summary.title {
    display: none;
  }
  .result-paper,
  .result-packages {
    .attributes {
      display: block;
@ -1164,3 +1157,4 @@ pre code {
 // import layouts of the Result types
@import "result_types/keyvalue.less";
@import "result_types/paper.less";
--- a/docs/dev/result_types/main/paper.rst
+++ b/docs/dev/result_types/main/paper.rst
@ -0,0 +1,7 @@
 .. _result_types.paper:
 =============
 Paper Results
 =============
 .. automodule:: searx.result_types.paper
--- a/docs/dev/result_types/main_result.rst
+++ b/docs/dev/result_types/main_result.rst
@ -16,6 +16,7 @@ following types have been implemented so far ..
   main/mainresult
   main/keyvalue
   main/code
   main/paper
 The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
 have not yet been typed.  The templates can be used as orientation until the
@ -26,7 +27,6 @@ final typing is complete.
 - :ref:`template videos`
 - :ref:`template torrent`
 - :ref:`template map`
 - :ref:`template paper`
 - :ref:`template packages`
 - :ref:`template files`
 - :ref:`template products`
--- a/searx/result_types/init.py
+++ b/searx/result_types/init.py
@ -22,6 +22,7 @@ __all__ = [
    "Translations",
    "WeatherAnswer",
    "Code",
    "Paper",
 ]
 import typing as t
@ -31,6 +32,7 @@ from ._base import Result, MainResult, LegacyResult
 from .answer import AnswerSet, Answer, Translations, WeatherAnswer
 from .keyvalue import KeyValue
 from .code import Code
 from .paper import Paper
 class ResultList(list[Result | LegacyResult], abc.ABC):
@ -44,6 +46,7 @@ class ResultList(list[Result | LegacyResult], abc.ABC):
        Answer = Answer
        KeyValue = KeyValue
        Code = Code
        Paper = Paper
        MainResult = MainResult
        Result = Result
        Translations = Translations
--- a/searx/result_types/_base.py
+++ b/searx/result_types/_base.py
@ -362,7 +362,11 @@ class MainResult(Result):  # pylint: disable=missing-class-docstring
    """The date on which the object was published."""
    pubdate: str = ""
-    """String representation of :py:obj:`MainResult.publishedDate`"""
+    """String representation of :py:obj:`MainResult.publishedDate`
    Deprecated: it is still partially used in the templates, but will one day be
    completely eliminated.
    """
    length: time.struct_time | None = None
    """Playing duration in seconds."""
--- a/searx/result_types/paper.py
+++ b/searx/result_types/paper.py
@ -0,0 +1,96 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Typification of the *paper* results.
 .. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
 .. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
 Results of this type are rendered in the :origin:`paper.html
 <searx/templates/simple/result_templates/paper.html>` template.
 Related topics:
 - `BibTeX field types`_
 - `BibTeX format`_
 ----
 .. autoclass:: Paper
   :members:
   :show-inheritance:
 """
 # pylint: disable=too-few-public-methods, disable=invalid-name
 from __future__ import annotations
 __all__ = ["Paper"]
 import typing as t
 from searx.weather import DateTime
 from ._base import MainResult
@t.final
 class Paper(MainResult, kw_only=True):
    """Result type suitable for displaying scientific papers and other
    documents."""
    template: str = "paper.html"
    date_of_publication: DateTime | None = None
    """Date the document was published."""
    content: str = ""
    """An abstract or excerpt from the document."""
    comments: str = ""
    """Free text display in italic below the content."""
    tags: list[str] = []
    """Free tag list."""
    type: str = ""
    """Short description of medium type, e.g. *book*, *pdf* or *html* ..."""
    authors: list[str] | set[str] = []
    """List of authors of the work (authors with a "s" suffix, the "author" is
    in the :py:obj:`MainResult.author`)."""
    editor: str = ""
    """Editor of the book/paper."""
    publisher: str = ""
    """Name of the publisher."""
    journal: str = ""
    """Name of the journal or magazine the article was published in."""
    volume: str | int = ""
    """Volume number."""
    pages: str = ""
    """Page range where the article is."""
    number: str = ""
    """Number of the report or the issue number for a journal article."""
    doi: str = ""
    """DOI number (like ``10.1038/d41586-018-07848-2``)."""
    issn: list[str] = []
    """List of ISSN numbers like ``1476-4687``"""
    isbn: list[str] = []
    """List of ISBN numbers like ``9780201896831``"""
    pdf_url: str = ""
    """URL to the full article, the PDF version"""
    html_url: str = ""
    """URL to full article, HTML version"""
    def __post_init__(self):
        super().__post_init__()
        if self.date_of_publication is None and self.publishedDate is not None:
            self.date_of_publication = DateTime(self.publishedDate)
--- a/searx/templates/simple/result_templates/paper.html
+++ b/searx/templates/simple/result_templates/paper.html
@ -1,34 +1,92 @@
 {% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %}
-{{ result_header(result, favicons, image_proxify) -}}
+{{ result_header(result, favicons, image_proxify) }}
 <div class="attributes">
-  {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
+  {%- if result.date_of_publication %}
-  {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
+    <div>
      <span>{{ _("Published date") }}:</span>
      <span>{{ result.date_of_publication.l10n_date("long", "UI") }}</span>
    </div>
  {% endif -%}
  {%- if result.authors %}
    <div>
      <span>{{ _("Author") }}:</span>
      <span>{{ result.authors | join(", ") }}</span>
    </div>
  {% endif -%}
  {%- if result.journal -%}
-    <div class="result_journal">
+    <div>
-      <span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
+      <span>{{- _("Journal") }}:</span>
-      {%- if result.volume -%}
+      <span>{{ result.journal -}}
-        &nbsp;{{- result.volume -}}
+        {%- if result.volume -%}
-        {%- if result.number -%}
+          &nbsp;{{- result.volume -}}
-          .{{- result.number -}}
+          {%- if result.number -%}.{{- result.number -}}{%- endif -%}
        {%- endif -%}
-      {%- endif -%}
+        {%- if result.pages -%}&nbsp;{{- result.pages -}}{%- endif -%}
      {%- if result.pages -%}
        &nbsp;{{- result.pages -}}
      {%- endif -%}
      </span>
    </div>
  {%- endif %}
-  {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
+  {%- if result.editor %}
-  {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
+    <div>
-  {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
+      <span>{{ _("Editor") }}:</span>
-  {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
+      <span>{{ result.editor }}</span>
-  {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span></div>{% endif -%}
+    </div>
-  {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
+  {% endif -%}
-  {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
+  {%- if result.publisher %}
    <div>
      <span>{{ _("Publisher") }}:</span>
      <span>{{ result.publisher }}</span>
    </div>
  {% endif -%}
  {%- if result.type %}
    <div>
      <span>{{ _("Type") }}:</span>
      <span>{{ result.type }}</span>
    </div>
  {% endif -%}
  {%- if result.tags %}
    <div>
      <span>{{ _("Tags") }}:</span>
      <span>{{ result.tags | join(", ")}}</span>
    </div>
  {%- endif -%}
  {%- if result.doi %}
    <div>
      <span>{{ _("DOI") }}:</span>
      <span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span>
    </div>
  {% endif -%}
  {%- if result.issn %}
    <div>
      <span>{{ _("ISSN") }}:</span>
      <span>{{ result.issn | join(", ") }}</span>
    </div>
  {% endif -%}
  {%- if result.isbn %}
    <div class="result_isbn">
      <span>{{ _("ISBN") }}:</span>
      <span>{{ result.isbn | join(", ") }}</span>
    </div>
  {% endif -%}
  {%- if result.views %}
    <div>
      <span>{{ _('Views') }}:</span>
      <span>{{ result.views }}</span>
    </div>
  {% endif -%}
 </div>
-{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
+{%- if result.content -%}
-{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
+  <p class="content">{{- result.content | safe -}}</p>
 {%- endif -%}
 {%- if result.comments -%}
  <p class="comments">{{- result.comments -}}</p>
 {%- endif -%}
 {%- if result.metadata %}
  <div class="highlight">{{ result.metadata|safe }}</div>
 {% endif -%}
 <p class="altlink">
  {%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%}
  {%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%}
--- a/searx/utils.py
+++ b/searx/utils.py
@ -21,7 +21,8 @@ from datetime import timedelta
 from markdown_it import MarkdownIt
 from lxml import html
-from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError
+from lxml.etree import XPath, XPathError, XPathSyntaxError
 from lxml.etree import ElementBase, _Element  # pyright: ignore[reportPrivateUsage]
 from searx import settings
 from searx.data import USER_AGENTS, data_dir
@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath
 """Type alias used by :py:obj:`searx.utils.get_xpath`,
 :py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
 ElementType: t.TypeAlias = ElementBase | _Element
 _BLOCKED_TAGS = ('script', 'style')
 _ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str:
 def extract_text(
-    xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
+    xpath_results: list[ElementType] | ElementType | str | Number | bool | None,
    allow_none: bool = False,
 ) -> str | None:
    """Extract text from a lxml result
-    * if xpath_results is list, extract the text from each result and concat the list
+    - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract
-    * if xpath_results is a xml element, extract all the text node from it
+      the text from each result and concatenate the list in a string.
-      ( text_content() method from lxml )
+
-    * if xpath_results is a string element, then it's already done
+    - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the
      text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )
    - If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,
      :py:obj:`bool` the string value is returned.
    - If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,
      except ``allow_none`` is ``True`` where ``None`` is returned.
    """
    if isinstance(xpath_results, list):
        # it's list of result : concat everything using recursive call
@ -220,7 +232,7 @@ def extract_text(
        for e in xpath_results:
            result = result + (extract_text(e) or '')
        return result.strip()
-    if isinstance(xpath_results, ElementBase):
+    if isinstance(xpath_results, ElementType):
        # it's a element
        text: str = html.tostring(  # type: ignore
            xpath_results,  # pyright: ignore[reportArgumentType]
@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str:
    return url
-def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
+def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str:
    """Extract and normalize URL from lxml Element
    Example:
@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
    raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath')  # pyright: ignore[reportUnreachable]
-def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
+def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any:
    """Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
    a :py:obj:`lxml.etree.XPath` object once for all.  The return value of
    ``xpath(..)`` is complex, read `XPath return values`_ for more details.
@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
        raise SearxEngineXPathException(xpath_spec, arg) from e
-def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
+def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
    """Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
    return value is a :py:obj:`list`.  The minimum length of the list is also
    checked (if ``min_len`` is set)."""
-    result = eval_xpath(element, xpath_spec)
+    result: list[t.Any] = eval_xpath(element, xpath_spec)
    if not isinstance(result, list):
        raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
    if min_len is not None and min_len > len(result):
@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in
 def eval_xpath_getindex(
-    element: ElementBase,
+    element: ElementType,
    xpath_spec: XPathSpecType,
    index: int,
    default: t.Any = _NOTSET,
--- a/searx/weather.py
+++ b/searx/weather.py
@ -27,6 +27,7 @@ import babel
 import babel.numbers
 import babel.dates
 import babel.languages
 import flask_babel
 from searx import network
 from searx.cache import ExpireCache, ExpireCacheCfg
@ -197,6 +198,7 @@ class GeoLocation:
 DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
 DateTimeLocaleTypes = typing.Literal["UI"]
@typing.final
@ -205,6 +207,13 @@ class DateTime:
    conveniently combines :py:obj:`datetime.datetime` and
    :py:obj:`babel.dates.format_datetime`.  A conversion of time zones is not
    provided (in the current version).
    The localized string representation can be obtained via the
    :py:obj:`DateTime.l10n` and :py:obj:`DateTime.l10n_date` methods, where the
    ``locale`` parameter defaults to the search language.  Alternatively, a
    :py:obj:`GeoLocation` or a :py:obj:`babel.Locale` instance can be passed
    directly. If the UI language is to be used, the string ``UI`` can be passed
    as the value for the ``locale``.
    """
    def __init__(self, time: datetime.datetime):
@ -216,15 +225,32 @@ class DateTime:
    def l10n(
        self,
        fmt: DateTimeFormats | str = "medium",
-        locale: babel.Locale | GeoLocation | None = None,
+        locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
    ) -> str:
        """Localized representation of date & time."""
-        if isinstance(locale, GeoLocation):
+        if isinstance(locale, str) and locale == "UI":
            locale = flask_babel.get_locale()
        elif isinstance(locale, GeoLocation):
            locale = locale.locale()
        elif locale is None:
            locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
        return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
    def l10n_date(
        self,
        fmt: DateTimeFormats | str = "medium",
        locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
    ) -> str:
        """Localized representation of date."""
        if isinstance(locale, str) and locale == "UI":
            locale = flask_babel.get_locale()
        elif isinstance(locale, GeoLocation):
            locale = locale.locale()
        elif locale is None:
            locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
        return babel.dates.format_date(self.datetime, format=fmt, locale=locale)
@typing.final
 class Temperature: