[mod] typification of SearXNG: add new result type Paper

This patch adds a new result type: Paper

- Python class:   searx/result_types/paper.py
- Jinja template: searx/templates/simple/result_templates/paper.html
- CSS (less)      client/simple/src/less/result_types/paper.less

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-10 16:10:42 +02:00 committed by Markus Heiser
parent 57ef342ad1
commit 7eedd44f5f
10 changed files with 318 additions and 46 deletions

View File

@ -0,0 +1,72 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
/*
Layout of the Paper result class
*/
.result-paper {
.attributes {
display: table;
border-spacing: 0.125rem;
div {
display: table-row;
span {
font-size: 0.9rem;
margin-top: 0.25rem;
display: table-cell;
time {
font-size: 0.9rem;
}
}
span:first-child {
color: var(--color-base-font);
min-width: 10rem;
}
span:nth-child(2) {
color: var(--color-result-publishdate-font);
}
}
}
.content {
margin-top: 0.25rem;
}
.comments {
font-size: 0.9rem;
margin: 0.25rem 0 0 0;
padding: 0;
word-wrap: break-word;
line-height: 1.24;
font-style: italic;
}
}
@media screen and (max-width: @phone) {
.result-paper {
.attributes {
display: block;
div {
display: block;
span {
display: inline;
}
span:first-child {
font-weight: bold;
}
span:nth-child(2) {
.ltr-margin-left(0.5rem);
}
}
}
}
}

View File

@ -309,11 +309,11 @@ article[data-vim-selected].category-social {
}
}
.result-paper,
.result-packages {
.attributes {
display: table;
border-spacing: 0.125rem;
margin-top: 0.3rem;
div {
display: table-row;
@ -353,12 +353,6 @@ article[data-vim-selected].category-social {
}
}
.result-packages {
.attributes {
margin-top: 0.3rem;
}
}
.template_group_images {
display: flex;
flex-wrap: wrap;
@ -1118,7 +1112,6 @@ summary.title {
display: none;
}
.result-paper,
.result-packages {
.attributes {
display: block;
@ -1164,3 +1157,4 @@ pre code {
// import layouts of the Result types
@import "result_types/keyvalue.less";
@import "result_types/paper.less";

View File

@ -0,0 +1,7 @@
.. _result_types.paper:
=============
Paper Results
=============
.. automodule:: searx.result_types.paper

View File

@ -16,6 +16,7 @@ following types have been implemented so far ..
main/mainresult
main/keyvalue
main/code
main/paper
The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
have not yet been typed. The templates can be used as orientation until the
@ -26,7 +27,6 @@ final typing is complete.
- :ref:`template videos`
- :ref:`template torrent`
- :ref:`template map`
- :ref:`template paper`
- :ref:`template packages`
- :ref:`template files`
- :ref:`template products`

View File

@ -22,6 +22,7 @@ __all__ = [
"Translations",
"WeatherAnswer",
"Code",
"Paper",
]
import typing as t
@ -31,6 +32,7 @@ from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue
from .code import Code
from .paper import Paper
class ResultList(list[Result | LegacyResult], abc.ABC):
@ -44,6 +46,7 @@ class ResultList(list[Result | LegacyResult], abc.ABC):
Answer = Answer
KeyValue = KeyValue
Code = Code
Paper = Paper
MainResult = MainResult
Result = Result
Translations = Translations

View File

@ -362,7 +362,11 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
"""The date on which the object was published."""
pubdate: str = ""
"""String representation of :py:obj:`MainResult.publishedDate`"""
"""String representation of :py:obj:`MainResult.publishedDate`
Deprecated: it is still partially used in the templates, but will one day be
completely eliminated.
"""
length: time.struct_time | None = None
"""Playing duration in seconds."""

View File

@ -0,0 +1,96 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Typification of the *paper* results.
.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
Results of this type are rendered in the :origin:`paper.html
<searx/templates/simple/result_templates/paper.html>` template.
Related topics:
- `BibTeX field types`_
- `BibTeX format`_
----
.. autoclass:: Paper
:members:
:show-inheritance:
"""
# pylint: disable=too-few-public-methods, disable=invalid-name
from __future__ import annotations
__all__ = ["Paper"]
import typing as t
from searx.weather import DateTime
from ._base import MainResult
@t.final
class Paper(MainResult, kw_only=True):
"""Result type suitable for displaying scientific papers and other
documents."""
template: str = "paper.html"
date_of_publication: DateTime | None = None
"""Date the document was published."""
content: str = ""
"""An abstract or excerpt from the document."""
comments: str = ""
"""Free text display in italic below the content."""
tags: list[str] = []
"""Free tag list."""
type: str = ""
"""Short description of medium type, e.g. *book*, *pdf* or *html* ..."""
authors: list[str] | set[str] = []
"""List of authors of the work (authors with a "s" suffix, the "author" is
in the :py:obj:`MainResult.author`)."""
editor: str = ""
"""Editor of the book/paper."""
publisher: str = ""
"""Name of the publisher."""
journal: str = ""
"""Name of the journal or magazine the article was published in."""
volume: str | int = ""
"""Volume number."""
pages: str = ""
"""Page range where the article is."""
number: str = ""
"""Number of the report or the issue number for a journal article."""
doi: str = ""
"""DOI number (like ``10.1038/d41586-018-07848-2``)."""
issn: list[str] = []
"""List of ISSN numbers like ``1476-4687``"""
isbn: list[str] = []
"""List of ISBN numbers like ``9780201896831``"""
pdf_url: str = ""
"""URL to the full article, the PDF version"""
html_url: str = ""
"""URL to full article, HTML version"""
def __post_init__(self):
super().__post_init__()
if self.date_of_publication is None and self.publishedDate is not None:
self.date_of_publication = DateTime(self.publishedDate)

View File

@ -1,34 +1,92 @@
{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %}
{{ result_header(result, favicons, image_proxify) -}}
{{ result_header(result, favicons, image_proxify) }}
<div class="attributes">
{%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
{%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
{%- if result.date_of_publication %}
<div>
<span>{{ _("Published date") }}:</span>
<span>{{ result.date_of_publication.l10n_date("long", "UI") }}</span>
</div>
{% endif -%}
{%- if result.authors %}
<div>
<span>{{ _("Author") }}:</span>
<span>{{ result.authors | join(", ") }}</span>
</div>
{% endif -%}
{%- if result.journal -%}
<div class="result_journal">
<span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
<div>
<span>{{- _("Journal") }}:</span>
<span>{{ result.journal -}}
{%- if result.volume -%}
&nbsp;{{- result.volume -}}
{%- if result.number -%}
.{{- result.number -}}
{%- endif -%}
{%- endif -%}
{%- if result.pages -%}
&nbsp;{{- result.pages -}}
{%- if result.number -%}.{{- result.number -}}{%- endif -%}
{%- endif -%}
{%- if result.pages -%}&nbsp;{{- result.pages -}}{%- endif -%}
</span>
</div>
{%- endif %}
{%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
{%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
{%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
{%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
{%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span></div>{% endif -%}
{%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
{%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
{%- if result.editor %}
<div>
<span>{{ _("Editor") }}:</span>
<span>{{ result.editor }}</span>
</div>
{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
{% endif -%}
{%- if result.publisher %}
<div>
<span>{{ _("Publisher") }}:</span>
<span>{{ result.publisher }}</span>
</div>
{% endif -%}
{%- if result.type %}
<div>
<span>{{ _("Type") }}:</span>
<span>{{ result.type }}</span>
</div>
{% endif -%}
{%- if result.tags %}
<div>
<span>{{ _("Tags") }}:</span>
<span>{{ result.tags | join(", ")}}</span>
</div>
{%- endif -%}
{%- if result.doi %}
<div>
<span>{{ _("DOI") }}:</span>
<span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span>
</div>
{% endif -%}
{%- if result.issn %}
<div>
<span>{{ _("ISSN") }}:</span>
<span>{{ result.issn | join(", ") }}</span>
</div>
{% endif -%}
{%- if result.isbn %}
<div class="result_isbn">
<span>{{ _("ISBN") }}:</span>
<span>{{ result.isbn | join(", ") }}</span>
</div>
{% endif -%}
{%- if result.views %}
<div>
<span>{{ _('Views') }}:</span>
<span>{{ result.views }}</span>
</div>
{% endif -%}
</div>
{%- if result.content -%}
<p class="content">{{- result.content | safe -}}</p>
{%- endif -%}
{%- if result.comments -%}
<p class="comments">{{- result.comments -}}</p>
{%- endif -%}
{%- if result.metadata %}
<div class="highlight">{{ result.metadata|safe }}</div>
{% endif -%}
<p class="altlink">
{%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%}
{%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%}

View File

@ -21,7 +21,8 @@ from datetime import timedelta
from markdown_it import MarkdownIt
from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError
from lxml.etree import XPath, XPathError, XPathSyntaxError
from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage]
from searx import settings
from searx.data import USER_AGENTS, data_dir
@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath
"""Type alias used by :py:obj:`searx.utils.get_xpath`,
:py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
ElementType: t.TypeAlias = ElementBase | _Element
_BLOCKED_TAGS = ('script', 'style')
_ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str:
def extract_text(
xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
xpath_results: list[ElementType] | ElementType | str | Number | bool | None,
allow_none: bool = False,
) -> str | None:
"""Extract text from a lxml result
* if xpath_results is list, extract the text from each result and concat the list
* if xpath_results is a xml element, extract all the text node from it
( text_content() method from lxml )
* if xpath_results is a string element, then it's already done
- If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract
the text from each result and concatenate the list in a string.
- If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the
text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )
- If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,
:py:obj:`bool` the string value is returned.
- If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,
except ``allow_none`` is ``True`` where ``None`` is returned.
"""
if isinstance(xpath_results, list):
# it's list of result : concat everything using recursive call
@ -220,7 +232,7 @@ def extract_text(
for e in xpath_results:
result = result + (extract_text(e) or '')
return result.strip()
if isinstance(xpath_results, ElementBase):
if isinstance(xpath_results, ElementType):
# it's a element
text: str = html.tostring( # type: ignore
xpath_results, # pyright: ignore[reportArgumentType]
@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str:
return url
def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str:
"""Extract and normalize URL from lxml Element
Example:
@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable]
def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any:
"""Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
a :py:obj:`lxml.etree.XPath` object once for all. The return value of
``xpath(..)`` is complex, read `XPath return values`_ for more details.
@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
raise SearxEngineXPathException(xpath_spec, arg) from e
def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
"""Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
return value is a :py:obj:`list`. The minimum length of the list is also
checked (if ``min_len`` is set)."""
result = eval_xpath(element, xpath_spec)
result: list[t.Any] = eval_xpath(element, xpath_spec)
if not isinstance(result, list):
raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
if min_len is not None and min_len > len(result):
@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in
def eval_xpath_getindex(
element: ElementBase,
element: ElementType,
xpath_spec: XPathSpecType,
index: int,
default: t.Any = _NOTSET,

View File

@ -27,6 +27,7 @@ import babel
import babel.numbers
import babel.dates
import babel.languages
import flask_babel
from searx import network
from searx.cache import ExpireCache, ExpireCacheCfg
@ -197,6 +198,7 @@ class GeoLocation:
DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
DateTimeLocaleTypes = typing.Literal["UI"]
@typing.final
@ -205,6 +207,13 @@ class DateTime:
conveniently combines :py:obj:`datetime.datetime` and
:py:obj:`babel.dates.format_datetime`. A conversion of time zones is not
provided (in the current version).
The localized string representation can be obtained via the
:py:obj:`DateTime.l10n` and :py:obj:`DateTime.l10n_date` methods, where the
``locale`` parameter defaults to the search language. Alternatively, a
:py:obj:`GeoLocation` or a :py:obj:`babel.Locale` instance can be passed
directly. If the UI language is to be used, the string ``UI`` can be passed
as the value for the ``locale``.
"""
def __init__(self, time: datetime.datetime):
@ -216,15 +225,32 @@ class DateTime:
def l10n(
self,
fmt: DateTimeFormats | str = "medium",
locale: babel.Locale | GeoLocation | None = None,
locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
) -> str:
"""Localized representation of date & time."""
if isinstance(locale, GeoLocation):
if isinstance(locale, str) and locale == "UI":
locale = flask_babel.get_locale()
elif isinstance(locale, GeoLocation):
locale = locale.locale()
elif locale is None:
locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
def l10n_date(
self,
fmt: DateTimeFormats | str = "medium",
locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
) -> str:
"""Localized representation of date."""
if isinstance(locale, str) and locale == "UI":
locale = flask_babel.get_locale()
elif isinstance(locale, GeoLocation):
locale = locale.locale()
elif locale is None:
locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
return babel.dates.format_date(self.datetime, format=fmt, locale=locale)
@typing.final
class Temperature: