[mod] typification of SearXNG: add new result type Paper

This patch adds a new result type: Paper - Python class: searx/result_types/paper.py - Jinja template: searx/templates/simple/result_templates/paper.html - CSS (less) client/simple/src/less/result_types/paper.less Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2025-09-29 15:30:51 -04:00 · 2025-09-10 16:10:42 +02:00 · 2025-09-10 16:10:42 +02:00 · 7eedd44f5f
commit 7eedd44f5f
parent 57ef342ad1
10 changed files with 318 additions and 46 deletions
--- a/client/simple/src/less/result_types/paper.less
+++ b/client/simple/src/less/result_types/paper.less
@ -0,0 +1,72 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+/*
+  Layout of the Paper result class
+*/
+
+.result-paper {
+  .attributes {
+    display: table;
+    border-spacing: 0.125rem;
+
+    div {
+      display: table-row;
+
+      span {
+        font-size: 0.9rem;
+        margin-top: 0.25rem;
+        display: table-cell;
+
+        time {
+          font-size: 0.9rem;
+        }
+      }
+
+      span:first-child {
+        color: var(--color-base-font);
+        min-width: 10rem;
+      }
+
+      span:nth-child(2) {
+        color: var(--color-result-publishdate-font);
+      }
+    }
+  }
+
+  .content {
+    margin-top: 0.25rem;
+  }
+
+  .comments {
+    font-size: 0.9rem;
+    margin: 0.25rem 0 0 0;
+    padding: 0;
+    word-wrap: break-word;
+    line-height: 1.24;
+    font-style: italic;
+  }
+}
+
+@media screen and (max-width: @phone) {
+  .result-paper {
+    .attributes {
+      display: block;
+
+      div {
+        display: block;
+
+        span {
+          display: inline;
+        }
+
+        span:first-child {
+          font-weight: bold;
+        }
+
+        span:nth-child(2) {
+          .ltr-margin-left(0.5rem);
+        }
+      }
+    }
+  }
+}
--- a/client/simple/src/less/style.less
+++ b/client/simple/src/less/style.less
@ -309,11 +309,11 @@ article[data-vim-selected].category-social {
  }
 }

-.result-paper,
 .result-packages {
  .attributes {
    display: table;
    border-spacing: 0.125rem;
+    margin-top: 0.3rem;

    div {
      display: table-row;
@ -353,12 +353,6 @@ article[data-vim-selected].category-social {
  }
 }

-.result-packages {
-  .attributes {
-    margin-top: 0.3rem;
-  }
-}
-
 .template_group_images {
  display: flex;
  flex-wrap: wrap;
@ -1118,7 +1112,6 @@ summary.title {
    display: none;
  }

-  .result-paper,
  .result-packages {
    .attributes {
      display: block;
@ -1164,3 +1157,4 @@ pre code {

 // import layouts of the Result types
@import "result_types/keyvalue.less";
+@import "result_types/paper.less";
--- a/docs/dev/result_types/main/paper.rst
+++ b/docs/dev/result_types/main/paper.rst
@ -0,0 +1,7 @@
+.. _result_types.paper:
+
+=============
+Paper Results
+=============
+
+.. automodule:: searx.result_types.paper
--- a/docs/dev/result_types/main_result.rst
+++ b/docs/dev/result_types/main_result.rst
@ -16,6 +16,7 @@ following types have been implemented so far ..
   main/mainresult
   main/keyvalue
   main/code
+   main/paper

 The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
 have not yet been typed.  The templates can be used as orientation until the
@ -26,7 +27,6 @@ final typing is complete.
 - :ref:`template videos`
 - :ref:`template torrent`
 - :ref:`template map`
- :ref:`template paper`
 - :ref:`template packages`
 - :ref:`template files`
 - :ref:`template products`
--- a/searx/result_types/init.py
+++ b/searx/result_types/init.py
@ -22,6 +22,7 @@ __all__ = [
    "Translations",
    "WeatherAnswer",
    "Code",
+    "Paper",
 ]

 import typing as t
@ -31,6 +32,7 @@ from ._base import Result, MainResult, LegacyResult
 from .answer import AnswerSet, Answer, Translations, WeatherAnswer
 from .keyvalue import KeyValue
 from .code import Code
+from .paper import Paper


 class ResultList(list[Result | LegacyResult], abc.ABC):
@ -44,6 +46,7 @@ class ResultList(list[Result | LegacyResult], abc.ABC):
        Answer = Answer
        KeyValue = KeyValue
        Code = Code
+        Paper = Paper
        MainResult = MainResult
        Result = Result
        Translations = Translations
--- a/searx/result_types/_base.py
+++ b/searx/result_types/_base.py
@ -362,7 +362,11 @@ class MainResult(Result):  # pylint: disable=missing-class-docstring
    """The date on which the object was published."""

    pubdate: str = ""
-    """String representation of :py:obj:`MainResult.publishedDate`"""
+    """String representation of :py:obj:`MainResult.publishedDate`
+
+    Deprecated: it is still partially used in the templates, but will one day be
+    completely eliminated.
+    """

    length: time.struct_time | None = None
    """Playing duration in seconds."""
--- a/searx/result_types/paper.py
+++ b/searx/result_types/paper.py
@ -0,0 +1,96 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Typification of the *paper* results.
+
+.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
+.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
+
+Results of this type are rendered in the :origin:`paper.html
+<searx/templates/simple/result_templates/paper.html>` template.
+
+Related topics:
+
+- `BibTeX field types`_
+- `BibTeX format`_
+
+----
+
+.. autoclass:: Paper
+   :members:
+   :show-inheritance:
+
+"""
+# pylint: disable=too-few-public-methods, disable=invalid-name
+
+from __future__ import annotations
+
+__all__ = ["Paper"]
+
+import typing as t
+
+from searx.weather import DateTime
+from ._base import MainResult
+
+
+@t.final
+class Paper(MainResult, kw_only=True):
+    """Result type suitable for displaying scientific papers and other
+    documents."""
+
+    template: str = "paper.html"
+
+    date_of_publication: DateTime | None = None
+    """Date the document was published."""
+
+    content: str = ""
+    """An abstract or excerpt from the document."""
+
+    comments: str = ""
+    """Free text display in italic below the content."""
+
+    tags: list[str] = []
+    """Free tag list."""
+
+    type: str = ""
+    """Short description of medium type, e.g. *book*, *pdf* or *html* ..."""
+
+    authors: list[str] | set[str] = []
+    """List of authors of the work (authors with a "s" suffix, the "author" is
+    in the :py:obj:`MainResult.author`)."""
+
+    editor: str = ""
+    """Editor of the book/paper."""
+
+    publisher: str = ""
+    """Name of the publisher."""
+
+    journal: str = ""
+    """Name of the journal or magazine the article was published in."""
+
+    volume: str | int = ""
+    """Volume number."""
+
+    pages: str = ""
+    """Page range where the article is."""
+
+    number: str = ""
+    """Number of the report or the issue number for a journal article."""
+
+    doi: str = ""
+    """DOI number (like ``10.1038/d41586-018-07848-2``)."""
+
+    issn: list[str] = []
+    """List of ISSN numbers like ``1476-4687``"""
+
+    isbn: list[str] = []
+    """List of ISBN numbers like ``9780201896831``"""
+
+    pdf_url: str = ""
+    """URL to the full article, the PDF version"""
+
+    html_url: str = ""
+    """URL to full article, HTML version"""
+
+    def __post_init__(self):
+        super().__post_init__()
+        if self.date_of_publication is None and self.publishedDate is not None:
+            self.date_of_publication = DateTime(self.publishedDate)
--- a/searx/templates/simple/result_templates/paper.html
+++ b/searx/templates/simple/result_templates/paper.html
@ -1,34 +1,92 @@
 {% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %}

-{{ result_header(result, favicons, image_proxify) -}}
+{{ result_header(result, favicons, image_proxify) }}
+
 <div class="attributes">
-  {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
-  {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
+  {%- if result.date_of_publication %}
+    <div>
+      <span>{{ _("Published date") }}:</span>
+      <span>{{ result.date_of_publication.l10n_date("long", "UI") }}</span>
+    </div>
+  {% endif -%}
+  {%- if result.authors %}
+    <div>
+      <span>{{ _("Author") }}:</span>
+      <span>{{ result.authors | join(", ") }}</span>
+    </div>
+  {% endif -%}
  {%- if result.journal -%}
-    <div class="result_journal">
-      <span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
+    <div>
+      <span>{{- _("Journal") }}:</span>
+      <span>{{ result.journal -}}
        {%- if result.volume -%}
          &nbsp;{{- result.volume -}}
-        {%- if result.number -%}
-          .{{- result.number -}}
-        {%- endif -%}
-      {%- endif -%}
-      {%- if result.pages -%}
-        &nbsp;{{- result.pages -}}
+          {%- if result.number -%}.{{- result.number -}}{%- endif -%}
        {%- endif -%}
+        {%- if result.pages -%}&nbsp;{{- result.pages -}}{%- endif -%}
      </span>
    </div>
  {%- endif %}
-  {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
-  {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
-  {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
-  {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
-  {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span></div>{% endif -%}
-  {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
-  {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
+  {%- if result.editor %}
+    <div>
+      <span>{{ _("Editor") }}:</span>
+      <span>{{ result.editor }}</span>
    </div>
-{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
-{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
+  {% endif -%}
+  {%- if result.publisher %}
+    <div>
+      <span>{{ _("Publisher") }}:</span>
+      <span>{{ result.publisher }}</span>
+    </div>
+  {% endif -%}
+  {%- if result.type %}
+    <div>
+      <span>{{ _("Type") }}:</span>
+      <span>{{ result.type }}</span>
+    </div>
+  {% endif -%}
+  {%- if result.tags %}
+    <div>
+      <span>{{ _("Tags") }}:</span>
+      <span>{{ result.tags | join(", ")}}</span>
+    </div>
+  {%- endif -%}
+  {%- if result.doi %}
+    <div>
+      <span>{{ _("DOI") }}:</span>
+      <span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span>
+    </div>
+  {% endif -%}
+  {%- if result.issn %}
+    <div>
+      <span>{{ _("ISSN") }}:</span>
+      <span>{{ result.issn | join(", ") }}</span>
+    </div>
+  {% endif -%}
+  {%- if result.isbn %}
+    <div class="result_isbn">
+      <span>{{ _("ISBN") }}:</span>
+      <span>{{ result.isbn | join(", ") }}</span>
+    </div>
+  {% endif -%}
+  {%- if result.views %}
+    <div>
+      <span>{{ _('Views') }}:</span>
+      <span>{{ result.views }}</span>
+    </div>
+  {% endif -%}
+</div>
+{%- if result.content -%}
+  <p class="content">{{- result.content | safe -}}</p>
+{%- endif -%}
+{%- if result.comments -%}
+  <p class="comments">{{- result.comments -}}</p>
+{%- endif -%}
+
+{%- if result.metadata %}
+  <div class="highlight">{{ result.metadata|safe }}</div>
+{% endif -%}
+
 <p class="altlink">
  {%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%}
  {%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%}
--- a/searx/utils.py
+++ b/searx/utils.py
@ -21,7 +21,8 @@ from datetime import timedelta
 from markdown_it import MarkdownIt

 from lxml import html
-from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError
+from lxml.etree import XPath, XPathError, XPathSyntaxError
+from lxml.etree import ElementBase, _Element  # pyright: ignore[reportPrivateUsage]

 from searx import settings
 from searx.data import USER_AGENTS, data_dir
@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath
 """Type alias used by :py:obj:`searx.utils.get_xpath`,
 :py:obj:`searx.utils.eval_xpath` and other XPath selectors."""

+ElementType: t.TypeAlias = ElementBase | _Element
+
+
 _BLOCKED_TAGS = ('script', 'style')

 _ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str:


 def extract_text(
-    xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
+    xpath_results: list[ElementType] | ElementType | str | Number | bool | None,
    allow_none: bool = False,
 ) -> str | None:
    """Extract text from a lxml result

-    * if xpath_results is list, extract the text from each result and concat the list
-    * if xpath_results is a xml element, extract all the text node from it
-      ( text_content() method from lxml )
-    * if xpath_results is a string element, then it's already done
+    - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract
+      the text from each result and concatenate the list in a string.
+
+    - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the
+      text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )
+
+    - If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,
+      :py:obj:`bool` the string value is returned.
+
+    - If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,
+      except ``allow_none`` is ``True`` where ``None`` is returned.
+
    """
    if isinstance(xpath_results, list):
        # it's list of result : concat everything using recursive call
@ -220,7 +232,7 @@ def extract_text(
        for e in xpath_results:
            result = result + (extract_text(e) or '')
        return result.strip()
-    if isinstance(xpath_results, ElementBase):
+    if isinstance(xpath_results, ElementType):
        # it's a element
        text: str = html.tostring(  # type: ignore
            xpath_results,  # pyright: ignore[reportArgumentType]
@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str:
    return url


-def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
+def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str:
    """Extract and normalize URL from lxml Element

    Example:
@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
    raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath')  # pyright: ignore[reportUnreachable]


-def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
+def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any:
    """Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
    a :py:obj:`lxml.etree.XPath` object once for all.  The return value of
    ``xpath(..)`` is complex, read `XPath return values`_ for more details.
@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
        raise SearxEngineXPathException(xpath_spec, arg) from e


-def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
+def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
    """Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
    return value is a :py:obj:`list`.  The minimum length of the list is also
    checked (if ``min_len`` is set)."""

-    result = eval_xpath(element, xpath_spec)
+    result: list[t.Any] = eval_xpath(element, xpath_spec)
    if not isinstance(result, list):
        raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
    if min_len is not None and min_len > len(result):
@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in


 def eval_xpath_getindex(
-    element: ElementBase,
+    element: ElementType,
    xpath_spec: XPathSpecType,
    index: int,
    default: t.Any = _NOTSET,
--- a/searx/weather.py
+++ b/searx/weather.py
@ -27,6 +27,7 @@ import babel
 import babel.numbers
 import babel.dates
 import babel.languages
+import flask_babel

 from searx import network
 from searx.cache import ExpireCache, ExpireCacheCfg
@ -197,6 +198,7 @@ class GeoLocation:


 DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
+DateTimeLocaleTypes = typing.Literal["UI"]


@typing.final
@ -205,6 +207,13 @@ class DateTime:
    conveniently combines :py:obj:`datetime.datetime` and
    :py:obj:`babel.dates.format_datetime`.  A conversion of time zones is not
    provided (in the current version).
+
+    The localized string representation can be obtained via the
+    :py:obj:`DateTime.l10n` and :py:obj:`DateTime.l10n_date` methods, where the
+    ``locale`` parameter defaults to the search language.  Alternatively, a
+    :py:obj:`GeoLocation` or a :py:obj:`babel.Locale` instance can be passed
+    directly. If the UI language is to be used, the string ``UI`` can be passed
+    as the value for the ``locale``.
    """

    def __init__(self, time: datetime.datetime):
@ -216,15 +225,32 @@ class DateTime:
    def l10n(
        self,
        fmt: DateTimeFormats | str = "medium",
-        locale: babel.Locale | GeoLocation | None = None,
+        locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
    ) -> str:
        """Localized representation of date & time."""
-        if isinstance(locale, GeoLocation):
+        if isinstance(locale, str) and locale == "UI":
+            locale = flask_babel.get_locale()
+        elif isinstance(locale, GeoLocation):
            locale = locale.locale()
        elif locale is None:
            locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
        return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)

+    def l10n_date(
+        self,
+        fmt: DateTimeFormats | str = "medium",
+        locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
+    ) -> str:
+        """Localized representation of date."""
+
+        if isinstance(locale, str) and locale == "UI":
+            locale = flask_babel.get_locale()
+        elif isinstance(locale, GeoLocation):
+            locale = locale.locale()
+        elif locale is None:
+            locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
+        return babel.dates.format_date(self.datetime, format=fmt, locale=locale)
+

@typing.final
 class Temperature: