[mod] typification of SearXNG: add new result type Paper

This patch adds a new result type: Paper

- Python class:   searx/result_types/paper.py
- Jinja template: searx/templates/simple/result_templates/paper.html
- CSS (less)      client/simple/src/less/result_types/paper.less

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-10 16:10:42 +02:00 committed by Markus Heiser
parent 57ef342ad1
commit 7eedd44f5f
10 changed files with 318 additions and 46 deletions

View File

@ -0,0 +1,72 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
/*
Layout of the Paper result class
*/
.result-paper {
.attributes {
display: table;
border-spacing: 0.125rem;
div {
display: table-row;
span {
font-size: 0.9rem;
margin-top: 0.25rem;
display: table-cell;
time {
font-size: 0.9rem;
}
}
span:first-child {
color: var(--color-base-font);
min-width: 10rem;
}
span:nth-child(2) {
color: var(--color-result-publishdate-font);
}
}
}
.content {
margin-top: 0.25rem;
}
.comments {
font-size: 0.9rem;
margin: 0.25rem 0 0 0;
padding: 0;
word-wrap: break-word;
line-height: 1.24;
font-style: italic;
}
}
@media screen and (max-width: @phone) {
.result-paper {
.attributes {
display: block;
div {
display: block;
span {
display: inline;
}
span:first-child {
font-weight: bold;
}
span:nth-child(2) {
.ltr-margin-left(0.5rem);
}
}
}
}
}

View File

@ -309,11 +309,11 @@ article[data-vim-selected].category-social {
} }
} }
.result-paper,
.result-packages { .result-packages {
.attributes { .attributes {
display: table; display: table;
border-spacing: 0.125rem; border-spacing: 0.125rem;
margin-top: 0.3rem;
div { div {
display: table-row; display: table-row;
@ -353,12 +353,6 @@ article[data-vim-selected].category-social {
} }
} }
.result-packages {
.attributes {
margin-top: 0.3rem;
}
}
.template_group_images { .template_group_images {
display: flex; display: flex;
flex-wrap: wrap; flex-wrap: wrap;
@ -1118,7 +1112,6 @@ summary.title {
display: none; display: none;
} }
.result-paper,
.result-packages { .result-packages {
.attributes { .attributes {
display: block; display: block;
@ -1164,3 +1157,4 @@ pre code {
// import layouts of the Result types // import layouts of the Result types
@import "result_types/keyvalue.less"; @import "result_types/keyvalue.less";
@import "result_types/paper.less";

View File

@ -0,0 +1,7 @@
.. _result_types.paper:
=============
Paper Results
=============
.. automodule:: searx.result_types.paper

View File

@ -16,6 +16,7 @@ following types have been implemented so far ..
main/mainresult main/mainresult
main/keyvalue main/keyvalue
main/code main/code
main/paper
The :ref:`LegacyResult <LegacyResult>` is used internally for the results that The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
have not yet been typed. The templates can be used as orientation until the have not yet been typed. The templates can be used as orientation until the
@ -26,7 +27,6 @@ final typing is complete.
- :ref:`template videos` - :ref:`template videos`
- :ref:`template torrent` - :ref:`template torrent`
- :ref:`template map` - :ref:`template map`
- :ref:`template paper`
- :ref:`template packages` - :ref:`template packages`
- :ref:`template files` - :ref:`template files`
- :ref:`template products` - :ref:`template products`

View File

@ -22,6 +22,7 @@ __all__ = [
"Translations", "Translations",
"WeatherAnswer", "WeatherAnswer",
"Code", "Code",
"Paper",
] ]
import typing as t import typing as t
@ -31,6 +32,7 @@ from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue from .keyvalue import KeyValue
from .code import Code from .code import Code
from .paper import Paper
class ResultList(list[Result | LegacyResult], abc.ABC): class ResultList(list[Result | LegacyResult], abc.ABC):
@ -44,6 +46,7 @@ class ResultList(list[Result | LegacyResult], abc.ABC):
Answer = Answer Answer = Answer
KeyValue = KeyValue KeyValue = KeyValue
Code = Code Code = Code
Paper = Paper
MainResult = MainResult MainResult = MainResult
Result = Result Result = Result
Translations = Translations Translations = Translations

View File

@ -362,7 +362,11 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
"""The date on which the object was published.""" """The date on which the object was published."""
pubdate: str = "" pubdate: str = ""
"""String representation of :py:obj:`MainResult.publishedDate`""" """String representation of :py:obj:`MainResult.publishedDate`
Deprecated: it is still partially used in the templates, but will one day be
completely eliminated.
"""
length: time.struct_time | None = None length: time.struct_time | None = None
"""Playing duration in seconds.""" """Playing duration in seconds."""

View File

@ -0,0 +1,96 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Typification of the *paper* results.
.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
Results of this type are rendered in the :origin:`paper.html
<searx/templates/simple/result_templates/paper.html>` template.
Related topics:
- `BibTeX field types`_
- `BibTeX format`_
----
.. autoclass:: Paper
:members:
:show-inheritance:
"""
# pylint: disable=too-few-public-methods, disable=invalid-name
from __future__ import annotations
__all__ = ["Paper"]
import typing as t
from searx.weather import DateTime
from ._base import MainResult
@t.final
class Paper(MainResult, kw_only=True):
"""Result type suitable for displaying scientific papers and other
documents."""
template: str = "paper.html"
date_of_publication: DateTime | None = None
"""Date the document was published."""
content: str = ""
"""An abstract or excerpt from the document."""
comments: str = ""
"""Free text display in italic below the content."""
tags: list[str] = []
"""Free tag list."""
type: str = ""
"""Short description of medium type, e.g. *book*, *pdf* or *html* ..."""
authors: list[str] | set[str] = []
"""List of authors of the work (authors with a "s" suffix, the "author" is
in the :py:obj:`MainResult.author`)."""
editor: str = ""
"""Editor of the book/paper."""
publisher: str = ""
"""Name of the publisher."""
journal: str = ""
"""Name of the journal or magazine the article was published in."""
volume: str | int = ""
"""Volume number."""
pages: str = ""
"""Page range where the article is."""
number: str = ""
"""Number of the report or the issue number for a journal article."""
doi: str = ""
"""DOI number (like ``10.1038/d41586-018-07848-2``)."""
issn: list[str] = []
"""List of ISSN numbers like ``1476-4687``"""
isbn: list[str] = []
"""List of ISBN numbers like ``9780201896831``"""
pdf_url: str = ""
"""URL to the full article, the PDF version"""
html_url: str = ""
"""URL to full article, HTML version"""
def __post_init__(self):
super().__post_init__()
if self.date_of_publication is None and self.publishedDate is not None:
self.date_of_publication = DateTime(self.publishedDate)

View File

@ -1,34 +1,92 @@
{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %} {% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer, result_link with context %}
{{ result_header(result, favicons, image_proxify) -}} {{ result_header(result, favicons, image_proxify) }}
<div class="attributes"> <div class="attributes">
{%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%} {%- if result.date_of_publication %}
{%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%} <div>
<span>{{ _("Published date") }}:</span>
<span>{{ result.date_of_publication.l10n_date("long", "UI") }}</span>
</div>
{% endif -%}
{%- if result.authors %}
<div>
<span>{{ _("Author") }}:</span>
<span>{{ result.authors | join(", ") }}</span>
</div>
{% endif -%}
{%- if result.journal -%} {%- if result.journal -%}
<div class="result_journal"> <div>
<span>{{- _("Journal") }}:</span><span>{{ result.journal -}} <span>{{- _("Journal") }}:</span>
{%- if result.volume -%} <span>{{ result.journal -}}
&nbsp;{{- result.volume -}} {%- if result.volume -%}
{%- if result.number -%} &nbsp;{{- result.volume -}}
.{{- result.number -}} {%- if result.number -%}.{{- result.number -}}{%- endif -%}
{%- endif -%} {%- endif -%}
{%- endif -%} {%- if result.pages -%}&nbsp;{{- result.pages -}}{%- endif -%}
{%- if result.pages -%}
&nbsp;{{- result.pages -}}
{%- endif -%}
</span> </span>
</div> </div>
{%- endif %} {%- endif %}
{%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%} {%- if result.editor %}
{%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%} <div>
{%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%} <span>{{ _("Editor") }}:</span>
{%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%} <span>{{ result.editor }}</span>
{%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span></div>{% endif -%} </div>
{%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%} {% endif -%}
{%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%} {%- if result.publisher %}
<div>
<span>{{ _("Publisher") }}:</span>
<span>{{ result.publisher }}</span>
</div>
{% endif -%}
{%- if result.type %}
<div>
<span>{{ _("Type") }}:</span>
<span>{{ result.type }}</span>
</div>
{% endif -%}
{%- if result.tags %}
<div>
<span>{{ _("Tags") }}:</span>
<span>{{ result.tags | join(", ")}}</span>
</div>
{%- endif -%}
{%- if result.doi %}
<div>
<span>{{ _("DOI") }}:</span>
<span>{{ result_link(doi_resolver + result.doi, result.doi) }}</span>
</div>
{% endif -%}
{%- if result.issn %}
<div>
<span>{{ _("ISSN") }}:</span>
<span>{{ result.issn | join(", ") }}</span>
</div>
{% endif -%}
{%- if result.isbn %}
<div class="result_isbn">
<span>{{ _("ISBN") }}:</span>
<span>{{ result.isbn | join(", ") }}</span>
</div>
{% endif -%}
{%- if result.views %}
<div>
<span>{{ _('Views') }}:</span>
<span>{{ result.views }}</span>
</div>
{% endif -%}
</div> </div>
{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%} {%- if result.content -%}
{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%} <p class="content">{{- result.content | safe -}}</p>
{%- endif -%}
{%- if result.comments -%}
<p class="comments">{{- result.comments -}}</p>
{%- endif -%}
{%- if result.metadata %}
<div class="highlight">{{ result.metadata|safe }}</div>
{% endif -%}
<p class="altlink"> <p class="altlink">
{%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%} {%- if result.pdf_url -%}{{ result_link(result.pdf_url, _('PDF')) }}{%- endif -%}
{%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%} {%- if result.html_url -%}{{ result_link(result.html_url, _('HTML')) }}{%- endif -%}

View File

@ -21,7 +21,8 @@ from datetime import timedelta
from markdown_it import MarkdownIt from markdown_it import MarkdownIt
from lxml import html from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError from lxml.etree import XPath, XPathError, XPathSyntaxError
from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage]
from searx import settings from searx import settings
from searx.data import USER_AGENTS, data_dir from searx.data import USER_AGENTS, data_dir
@ -40,6 +41,9 @@ XPathSpecType: t.TypeAlias = str | XPath
"""Type alias used by :py:obj:`searx.utils.get_xpath`, """Type alias used by :py:obj:`searx.utils.get_xpath`,
:py:obj:`searx.utils.eval_xpath` and other XPath selectors.""" :py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
ElementType: t.TypeAlias = ElementBase | _Element
_BLOCKED_TAGS = ('script', 'style') _BLOCKED_TAGS = ('script', 'style')
_ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) _ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
@ -204,15 +208,23 @@ def markdown_to_text(markdown_str: str) -> str:
def extract_text( def extract_text(
xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, xpath_results: list[ElementType] | ElementType | str | Number | bool | None,
allow_none: bool = False, allow_none: bool = False,
) -> str | None: ) -> str | None:
"""Extract text from a lxml result """Extract text from a lxml result
* if xpath_results is list, extract the text from each result and concat the list - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract
* if xpath_results is a xml element, extract all the text node from it the text from each result and concatenate the list in a string.
( text_content() method from lxml )
* if xpath_results is a string element, then it's already done - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the
text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )
- If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,
:py:obj:`bool` the string value is returned.
- If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,
except ``allow_none`` is ``True`` where ``None`` is returned.
""" """
if isinstance(xpath_results, list): if isinstance(xpath_results, list):
# it's list of result : concat everything using recursive call # it's list of result : concat everything using recursive call
@ -220,7 +232,7 @@ def extract_text(
for e in xpath_results: for e in xpath_results:
result = result + (extract_text(e) or '') result = result + (extract_text(e) or '')
return result.strip() return result.strip()
if isinstance(xpath_results, ElementBase): if isinstance(xpath_results, ElementType):
# it's a element # it's a element
text: str = html.tostring( # type: ignore text: str = html.tostring( # type: ignore
xpath_results, # pyright: ignore[reportArgumentType] xpath_results, # pyright: ignore[reportArgumentType]
@ -289,7 +301,7 @@ def normalize_url(url: str, base_url: str) -> str:
return url return url
def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str: def extract_url(xpath_results: list[ElementType] | ElementType | str | Number | bool | None, base_url: str) -> str:
"""Extract and normalize URL from lxml Element """Extract and normalize URL from lxml Element
Example: Example:
@ -520,7 +532,7 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable] raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable]
def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any: def eval_xpath(element: ElementType, xpath_spec: XPathSpecType) -> t.Any:
"""Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into """Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
a :py:obj:`lxml.etree.XPath` object once for all. The return value of a :py:obj:`lxml.etree.XPath` object once for all. The return value of
``xpath(..)`` is complex, read `XPath return values`_ for more details. ``xpath(..)`` is complex, read `XPath return values`_ for more details.
@ -548,12 +560,12 @@ def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
raise SearxEngineXPathException(xpath_spec, arg) from e raise SearxEngineXPathException(xpath_spec, arg) from e
def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]: def eval_xpath_list(element: ElementType, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
"""Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the """Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
return value is a :py:obj:`list`. The minimum length of the list is also return value is a :py:obj:`list`. The minimum length of the list is also
checked (if ``min_len`` is set).""" checked (if ``min_len`` is set)."""
result = eval_xpath(element, xpath_spec) result: list[t.Any] = eval_xpath(element, xpath_spec)
if not isinstance(result, list): if not isinstance(result, list):
raise SearxEngineXPathException(xpath_spec, 'the result is not a list') raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
if min_len is not None and min_len > len(result): if min_len is not None and min_len > len(result):
@ -562,7 +574,7 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: in
def eval_xpath_getindex( def eval_xpath_getindex(
element: ElementBase, element: ElementType,
xpath_spec: XPathSpecType, xpath_spec: XPathSpecType,
index: int, index: int,
default: t.Any = _NOTSET, default: t.Any = _NOTSET,

View File

@ -27,6 +27,7 @@ import babel
import babel.numbers import babel.numbers
import babel.dates import babel.dates
import babel.languages import babel.languages
import flask_babel
from searx import network from searx import network
from searx.cache import ExpireCache, ExpireCacheCfg from searx.cache import ExpireCache, ExpireCacheCfg
@ -197,6 +198,7 @@ class GeoLocation:
DateTimeFormats = typing.Literal["full", "long", "medium", "short"] DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
DateTimeLocaleTypes = typing.Literal["UI"]
@typing.final @typing.final
@ -205,6 +207,13 @@ class DateTime:
conveniently combines :py:obj:`datetime.datetime` and conveniently combines :py:obj:`datetime.datetime` and
:py:obj:`babel.dates.format_datetime`. A conversion of time zones is not :py:obj:`babel.dates.format_datetime`. A conversion of time zones is not
provided (in the current version). provided (in the current version).
The localized string representation can be obtained via the
:py:obj:`DateTime.l10n` and :py:obj:`DateTime.l10n_date` methods, where the
``locale`` parameter defaults to the search language. Alternatively, a
:py:obj:`GeoLocation` or a :py:obj:`babel.Locale` instance can be passed
directly. If the UI language is to be used, the string ``UI`` can be passed
as the value for the ``locale``.
""" """
def __init__(self, time: datetime.datetime): def __init__(self, time: datetime.datetime):
@ -216,15 +225,32 @@ class DateTime:
def l10n( def l10n(
self, self,
fmt: DateTimeFormats | str = "medium", fmt: DateTimeFormats | str = "medium",
locale: babel.Locale | GeoLocation | None = None, locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
) -> str: ) -> str:
"""Localized representation of date & time.""" """Localized representation of date & time."""
if isinstance(locale, GeoLocation): if isinstance(locale, str) and locale == "UI":
locale = flask_babel.get_locale()
elif isinstance(locale, GeoLocation):
locale = locale.locale() locale = locale.locale()
elif locale is None: elif locale is None:
locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-') locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale) return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
def l10n_date(
self,
fmt: DateTimeFormats | str = "medium",
locale: DateTimeLocaleTypes | babel.Locale | GeoLocation | None = None,
) -> str:
"""Localized representation of date."""
if isinstance(locale, str) and locale == "UI":
locale = flask_babel.get_locale()
elif isinstance(locale, GeoLocation):
locale = locale.locale()
elif locale is None:
locale = babel.Locale.parse(_get_sxng_locale_tag(), sep='-')
return babel.dates.format_date(self.datetime, format=fmt, locale=locale)
@typing.final @typing.final
class Temperature: class Temperature: