[mod] Z-Library engine: revision of the engine (Paper result)

Revision of the engine / use of the result type Paper as well as other
typifications.

The engine has been placed on inactive because no service is currently
available, or at least not known in the SearXNG community [1]

[1] https://github.com/searxng/searxng/issues/3610

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-09-10 17:17:58 +02:00 committed by Markus Heiser
parent 4c42704c80
commit f8f7adce6b
2 changed files with 45 additions and 38 deletions

View File

@ -39,15 +39,16 @@ from urllib.parse import quote
from lxml import html
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
from searx.utils import extract_text, eval_xpath, eval_xpath_list
from searx.utils import extract_text, eval_xpath, eval_xpath_list, ElementType
from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
from searx.exceptions import SearxException
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
# about
about: dict[str, t.Any] = {
"website": "https://zlibrary-global.se",
"wikidata_id": "Q104863992",
@ -57,7 +58,7 @@ about: dict[str, t.Any] = {
"results": "HTML",
}
categories: list[str] = ["files"]
categories: list[str] = ["files", "books"]
paging: bool = True
base_url: str = "https://zlibrary-global.se"
@ -74,8 +75,12 @@ zlib_ext: str = ""
``PDF`` and ``EPUB``.
"""
i18n_language = gettext("Language")
i18n_book_rating = gettext("Book rating")
i18n_file_quality = gettext("File quality")
def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: disable=unused-argument
def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
"""Check of engine's settings."""
traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"])
@ -85,10 +90,11 @@ def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: di
raise ValueError(f"invalid setting year_from: {zlib_year_from}")
if zlib_year_to and zlib_year_to not in traits.custom["year_to"]:
raise ValueError(f"invalid setting year_to: {zlib_year_to}")
return True
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore
def request(query: str, params: "OnlineParams") -> None:
lang: str | None = traits.get_language(params["searxng_locale"], traits.all_locale)
search_url: str = (
base_url
+ "/s/{search_query}/?page={pageno}"
@ -106,41 +112,35 @@ def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
zlib_ext=zlib_ext,
)
params["verify"] = False
return params
def domain_is_seized(dom):
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
results: list[dict[str, t.Any]] = []
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
dom = html.fromstring(resp.text)
if domain_is_seized(dom):
raise SearxException(f"zlibrary domain is seized: {base_url}")
for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'):
results.append(_parse_result(item))
kwargs = _parse_result(item)
res.add(res.types.Paper(**kwargs))
return results
return res
def _text(item, selector: str) -> str | None:
def domain_is_seized(dom: ElementType):
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
def _text(item: ElementType, selector: str) -> str | None:
return extract_text(eval_xpath(item, selector))
i18n_language = gettext("Language")
i18n_book_rating = gettext("Book rating")
i18n_file_quality = gettext("File quality")
def _parse_result(item) -> dict[str, t.Any]:
def _parse_result(item: ElementType) -> dict[str, t.Any]:
author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]')
result = {
"template": "paper.html",
"url": base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0],
"title": _text(item, './/*[@itemprop="name"]'),
"authors": [extract_text(author) for author in author_elements],
@ -148,15 +148,15 @@ def _parse_result(item) -> dict[str, t.Any]:
"type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'),
}
thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src')
if not thumbnail.startswith('/'):
thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src')
if thumbnail and not thumbnail.startswith('/'):
result["thumbnail"] = thumbnail
year = _text(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]')
if year:
result["publishedDate"] = datetime.strptime(year, '%Y')
content = []
content: list[str] = []
language = _text(item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]')
if language:
content.append(f"{i18n_language}: {language.capitalize()}")
@ -173,9 +173,10 @@ def _parse_result(item) -> dict[str, t.Any]:
def fetch_traits(engine_traits: EngineTraits) -> None:
"""Fetch languages and other search arguments from zlibrary's search form."""
# pylint: disable=import-outside-toplevel, too-many-branches
# pylint: disable=import-outside-toplevel, too-many-branches, too-many-statements
import babel
import babel.core
import httpx
from searx.network import get # see https://github.com/searxng/searxng/issues/762
@ -197,7 +198,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
if not resp.ok:
raise RuntimeError("Response from zlibrary's search page is not OK.")
dom = html.fromstring(resp.text) # type: ignore
dom = html.fromstring(resp.text)
if domain_is_seized(dom):
print(f"ERROR: zlibrary domain is seized: {base_url}")
@ -206,25 +207,30 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
engine_traits.all_locale = ""
engine_traits.custom["ext"] = []
engine_traits.custom["year_from"] = []
engine_traits.custom["year_to"] = []
l: list[str]
# years_from
l = []
for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"):
engine_traits.custom["year_from"].append(year.get("value"))
l.append(year.get("value") or "")
engine_traits.custom["year_from"] = l
# years_to
l = []
for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"):
engine_traits.custom["year_to"].append(year.get("value"))
l.append(year.get("value") or "")
engine_traits.custom["year_to"] = l
# ext (file extensions)
l = []
for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"):
value: str | None = ext.get("value")
if value is None:
value = ""
engine_traits.custom["ext"].append(value)
l.append(ext.get("value") or "")
engine_traits.custom["ext"] = l
# Handle languages
# Z-library uses English names for languages, so we need to map them to their respective locales
language_name_locale_map: dict[str, babel.Locale] = {}
for locale in babel.core.localedata.locale_identifiers(): # type: ignore
for locale in babel.core.localedata.locale_identifiers():
# Create a Locale object for the current locale
loc = babel.Locale.parse(locale)
if loc.english_name is None:

View File

@ -1254,9 +1254,10 @@ engines:
- name: z-library
engine: zlibrary
shortcut: zlib
categories: files
timeout: 7.0
disabled: true
# https://github.com/searxng/searxng/issues/3610
inactive: true
- name: library of congress
engine: loc