diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py index a1729861f..233f776f3 100644 --- a/searx/engines/zlibrary.py +++ b/searx/engines/zlibrary.py @@ -39,15 +39,16 @@ from urllib.parse import quote from lxml import html from flask_babel import gettext # pyright: ignore[reportUnknownVariableType] -from searx.utils import extract_text, eval_xpath, eval_xpath_list +from searx.utils import extract_text, eval_xpath, eval_xpath_list, ElementType from searx.enginelib.traits import EngineTraits from searx.data import ENGINE_TRAITS from searx.exceptions import SearxException +from searx.result_types import EngineResults if t.TYPE_CHECKING: from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams -# about about: dict[str, t.Any] = { "website": "https://zlibrary-global.se", "wikidata_id": "Q104863992", @@ -57,7 +58,7 @@ about: dict[str, t.Any] = { "results": "HTML", } -categories: list[str] = ["files"] +categories: list[str] = ["files", "books"] paging: bool = True base_url: str = "https://zlibrary-global.se" @@ -74,8 +75,12 @@ zlib_ext: str = "" ``PDF`` and ``EPUB``. """ +i18n_language = gettext("Language") +i18n_book_rating = gettext("Book rating") +i18n_file_quality = gettext("File quality") -def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: disable=unused-argument + +def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument """Check of engine's settings.""" traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"]) @@ -85,10 +90,11 @@ def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: di raise ValueError(f"invalid setting year_from: {zlib_year_from}") if zlib_year_to and zlib_year_to not in traits.custom["year_to"]: raise ValueError(f"invalid setting year_to: {zlib_year_to}") + return True -def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: - lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore +def request(query: str, params: "OnlineParams") -> None: + lang: str | None = traits.get_language(params["searxng_locale"], traits.all_locale) search_url: str = ( base_url + "/s/{search_query}/?page={pageno}" @@ -106,41 +112,35 @@ def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: zlib_ext=zlib_ext, ) params["verify"] = False - return params -def domain_is_seized(dom): - return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower()) - - -def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]: - results: list[dict[str, t.Any]] = [] +def response(resp: "SXNG_Response") -> EngineResults: + res = EngineResults() dom = html.fromstring(resp.text) if domain_is_seized(dom): raise SearxException(f"zlibrary domain is seized: {base_url}") for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'): - results.append(_parse_result(item)) + kwargs = _parse_result(item) + res.add(res.types.Paper(**kwargs)) - return results + return res -def _text(item, selector: str) -> str | None: +def domain_is_seized(dom: ElementType): + return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower()) + + +def _text(item: ElementType, selector: str) -> str | None: return extract_text(eval_xpath(item, selector)) -i18n_language = gettext("Language") -i18n_book_rating = gettext("Book rating") -i18n_file_quality = gettext("File quality") - - -def _parse_result(item) -> dict[str, t.Any]: +def _parse_result(item: ElementType) -> dict[str, t.Any]: author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]') result = { - "template": "paper.html", "url": base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0], "title": _text(item, './/*[@itemprop="name"]'), "authors": [extract_text(author) for author in author_elements], @@ -148,15 +148,15 @@ def _parse_result(item) -> dict[str, t.Any]: "type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'), } - thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src') - if not thumbnail.startswith('/'): + thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src') + if thumbnail and not thumbnail.startswith('/'): result["thumbnail"] = thumbnail year = _text(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]') if year: result["publishedDate"] = datetime.strptime(year, '%Y') - content = [] + content: list[str] = [] language = _text(item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]') if language: content.append(f"{i18n_language}: {language.capitalize()}") @@ -173,9 +173,10 @@ def _parse_result(item) -> dict[str, t.Any]: def fetch_traits(engine_traits: EngineTraits) -> None: """Fetch languages and other search arguments from zlibrary's search form.""" - # pylint: disable=import-outside-toplevel, too-many-branches + # pylint: disable=import-outside-toplevel, too-many-branches, too-many-statements import babel + import babel.core import httpx from searx.network import get # see https://github.com/searxng/searxng/issues/762 @@ -197,7 +198,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None: if not resp.ok: raise RuntimeError("Response from zlibrary's search page is not OK.") - dom = html.fromstring(resp.text) # type: ignore + dom = html.fromstring(resp.text) if domain_is_seized(dom): print(f"ERROR: zlibrary domain is seized: {base_url}") @@ -206,25 +207,30 @@ def fetch_traits(engine_traits: EngineTraits) -> None: engine_traits.all_locale = "" engine_traits.custom["ext"] = [] - engine_traits.custom["year_from"] = [] - engine_traits.custom["year_to"] = [] + l: list[str] + # years_from + l = [] for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"): - engine_traits.custom["year_from"].append(year.get("value")) + l.append(year.get("value") or "") + engine_traits.custom["year_from"] = l + # years_to + l = [] for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"): - engine_traits.custom["year_to"].append(year.get("value")) + l.append(year.get("value") or "") + engine_traits.custom["year_to"] = l + # ext (file extensions) + l = [] for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"): - value: str | None = ext.get("value") - if value is None: - value = "" - engine_traits.custom["ext"].append(value) + l.append(ext.get("value") or "") + engine_traits.custom["ext"] = l # Handle languages # Z-library uses English names for languages, so we need to map them to their respective locales language_name_locale_map: dict[str, babel.Locale] = {} - for locale in babel.core.localedata.locale_identifiers(): # type: ignore + for locale in babel.core.localedata.locale_identifiers(): # Create a Locale object for the current locale loc = babel.Locale.parse(locale) if loc.english_name is None: diff --git a/searx/settings.yml b/searx/settings.yml index 200f96de9..217658088 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1254,9 +1254,10 @@ engines: - name: z-library engine: zlibrary shortcut: zlib - categories: files timeout: 7.0 disabled: true + # https://github.com/searxng/searxng/issues/3610 + inactive: true - name: library of congress engine: loc