mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 07:49:02 -04:00 
			
		
		
		
	[mod] Z-Library engine: revision of the engine (Paper result)
Revision of the engine / use of the result type Paper as well as other typifications. The engine has been placed on inactive because no service is currently available, or at least not known in the SearXNG community [1] [1] https://github.com/searxng/searxng/issues/3610 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									4c42704c80
								
							
						
					
					
						commit
						f8f7adce6b
					
				| @ -39,15 +39,16 @@ from urllib.parse import quote | |||||||
| from lxml import html | from lxml import html | ||||||
| from flask_babel import gettext  # pyright: ignore[reportUnknownVariableType] | from flask_babel import gettext  # pyright: ignore[reportUnknownVariableType] | ||||||
| 
 | 
 | ||||||
| from searx.utils import extract_text, eval_xpath, eval_xpath_list | from searx.utils import extract_text, eval_xpath, eval_xpath_list, ElementType | ||||||
| from searx.enginelib.traits import EngineTraits | from searx.enginelib.traits import EngineTraits | ||||||
| from searx.data import ENGINE_TRAITS | from searx.data import ENGINE_TRAITS | ||||||
| from searx.exceptions import SearxException | from searx.exceptions import SearxException | ||||||
|  | from searx.result_types import EngineResults | ||||||
| 
 | 
 | ||||||
| if t.TYPE_CHECKING: | if t.TYPE_CHECKING: | ||||||
|     from searx.extended_types import SXNG_Response |     from searx.extended_types import SXNG_Response | ||||||
|  |     from searx.search.processors import OnlineParams | ||||||
| 
 | 
 | ||||||
| # about |  | ||||||
| about: dict[str, t.Any] = { | about: dict[str, t.Any] = { | ||||||
|     "website": "https://zlibrary-global.se", |     "website": "https://zlibrary-global.se", | ||||||
|     "wikidata_id": "Q104863992", |     "wikidata_id": "Q104863992", | ||||||
| @ -57,7 +58,7 @@ about: dict[str, t.Any] = { | |||||||
|     "results": "HTML", |     "results": "HTML", | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| categories: list[str] = ["files"] | categories: list[str] = ["files", "books"] | ||||||
| paging: bool = True | paging: bool = True | ||||||
| base_url: str = "https://zlibrary-global.se" | base_url: str = "https://zlibrary-global.se" | ||||||
| 
 | 
 | ||||||
| @ -74,8 +75,12 @@ zlib_ext: str = "" | |||||||
| ``PDF`` and ``EPUB``. | ``PDF`` and ``EPUB``. | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | i18n_language = gettext("Language") | ||||||
|  | i18n_book_rating = gettext("Book rating") | ||||||
|  | i18n_file_quality = gettext("File quality") | ||||||
| 
 | 
 | ||||||
| def init(engine_settings: dict[str, t.Any] | None = None) -> None:  # pylint: disable=unused-argument | 
 | ||||||
|  | def setup(engine_settings: dict[str, t.Any]) -> bool:  # pylint: disable=unused-argument | ||||||
|     """Check of engine's settings.""" |     """Check of engine's settings.""" | ||||||
|     traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"]) |     traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"]) | ||||||
| 
 | 
 | ||||||
| @ -85,10 +90,11 @@ def init(engine_settings: dict[str, t.Any] | None = None) -> None:  # pylint: di | |||||||
|         raise ValueError(f"invalid setting year_from: {zlib_year_from}") |         raise ValueError(f"invalid setting year_from: {zlib_year_from}") | ||||||
|     if zlib_year_to and zlib_year_to not in traits.custom["year_to"]: |     if zlib_year_to and zlib_year_to not in traits.custom["year_to"]: | ||||||
|         raise ValueError(f"invalid setting year_to: {zlib_year_to}") |         raise ValueError(f"invalid setting year_to: {zlib_year_to}") | ||||||
|  |     return True | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: | def request(query: str, params: "OnlineParams") -> None: | ||||||
|     lang: str = traits.get_language(params["language"], traits.all_locale)  # type: ignore |     lang: str | None = traits.get_language(params["searxng_locale"], traits.all_locale) | ||||||
|     search_url: str = ( |     search_url: str = ( | ||||||
|         base_url |         base_url | ||||||
|         + "/s/{search_query}/?page={pageno}" |         + "/s/{search_query}/?page={pageno}" | ||||||
| @ -106,41 +112,35 @@ def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: | |||||||
|         zlib_ext=zlib_ext, |         zlib_ext=zlib_ext, | ||||||
|     ) |     ) | ||||||
|     params["verify"] = False |     params["verify"] = False | ||||||
|     return params |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def domain_is_seized(dom): | def response(resp: "SXNG_Response") -> EngineResults: | ||||||
|     return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower()) |     res = EngineResults() | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]: |  | ||||||
|     results: list[dict[str, t.Any]] = [] |  | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     if domain_is_seized(dom): |     if domain_is_seized(dom): | ||||||
|         raise SearxException(f"zlibrary domain is seized: {base_url}") |         raise SearxException(f"zlibrary domain is seized: {base_url}") | ||||||
| 
 | 
 | ||||||
|     for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'): |     for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'): | ||||||
|         results.append(_parse_result(item)) |         kwargs = _parse_result(item) | ||||||
|  |         res.add(res.types.Paper(**kwargs)) | ||||||
| 
 | 
 | ||||||
|     return results |     return res | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _text(item, selector: str) -> str | None: | def domain_is_seized(dom: ElementType): | ||||||
|  |     return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower()) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _text(item: ElementType, selector: str) -> str | None: | ||||||
|     return extract_text(eval_xpath(item, selector)) |     return extract_text(eval_xpath(item, selector)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| i18n_language = gettext("Language") | def _parse_result(item: ElementType) -> dict[str, t.Any]: | ||||||
| i18n_book_rating = gettext("Book rating") |  | ||||||
| i18n_file_quality = gettext("File quality") |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def _parse_result(item) -> dict[str, t.Any]: |  | ||||||
| 
 | 
 | ||||||
|     author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]') |     author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]') | ||||||
| 
 | 
 | ||||||
|     result = { |     result = { | ||||||
|         "template": "paper.html", |  | ||||||
|         "url": base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0], |         "url": base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0], | ||||||
|         "title": _text(item, './/*[@itemprop="name"]'), |         "title": _text(item, './/*[@itemprop="name"]'), | ||||||
|         "authors": [extract_text(author) for author in author_elements], |         "authors": [extract_text(author) for author in author_elements], | ||||||
| @ -148,15 +148,15 @@ def _parse_result(item) -> dict[str, t.Any]: | |||||||
|         "type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'), |         "type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'), | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src') |     thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src') | ||||||
|     if not thumbnail.startswith('/'): |     if thumbnail and not thumbnail.startswith('/'): | ||||||
|         result["thumbnail"] = thumbnail |         result["thumbnail"] = thumbnail | ||||||
| 
 | 
 | ||||||
|     year = _text(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]') |     year = _text(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]') | ||||||
|     if year: |     if year: | ||||||
|         result["publishedDate"] = datetime.strptime(year, '%Y') |         result["publishedDate"] = datetime.strptime(year, '%Y') | ||||||
| 
 | 
 | ||||||
|     content = [] |     content: list[str] = [] | ||||||
|     language = _text(item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]') |     language = _text(item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]') | ||||||
|     if language: |     if language: | ||||||
|         content.append(f"{i18n_language}: {language.capitalize()}") |         content.append(f"{i18n_language}: {language.capitalize()}") | ||||||
| @ -173,9 +173,10 @@ def _parse_result(item) -> dict[str, t.Any]: | |||||||
| 
 | 
 | ||||||
| def fetch_traits(engine_traits: EngineTraits) -> None: | def fetch_traits(engine_traits: EngineTraits) -> None: | ||||||
|     """Fetch languages and other search arguments from zlibrary's search form.""" |     """Fetch languages and other search arguments from zlibrary's search form.""" | ||||||
|     # pylint: disable=import-outside-toplevel, too-many-branches |     # pylint: disable=import-outside-toplevel, too-many-branches, too-many-statements | ||||||
| 
 | 
 | ||||||
|     import babel |     import babel | ||||||
|  |     import babel.core | ||||||
|     import httpx |     import httpx | ||||||
| 
 | 
 | ||||||
|     from searx.network import get  # see https://github.com/searxng/searxng/issues/762 |     from searx.network import get  # see https://github.com/searxng/searxng/issues/762 | ||||||
| @ -197,7 +198,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None: | |||||||
| 
 | 
 | ||||||
|     if not resp.ok: |     if not resp.ok: | ||||||
|         raise RuntimeError("Response from zlibrary's search page is not OK.") |         raise RuntimeError("Response from zlibrary's search page is not OK.") | ||||||
|     dom = html.fromstring(resp.text)  # type: ignore |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     if domain_is_seized(dom): |     if domain_is_seized(dom): | ||||||
|         print(f"ERROR: zlibrary domain is seized: {base_url}") |         print(f"ERROR: zlibrary domain is seized: {base_url}") | ||||||
| @ -206,25 +207,30 @@ def fetch_traits(engine_traits: EngineTraits) -> None: | |||||||
| 
 | 
 | ||||||
|     engine_traits.all_locale = "" |     engine_traits.all_locale = "" | ||||||
|     engine_traits.custom["ext"] = [] |     engine_traits.custom["ext"] = [] | ||||||
|     engine_traits.custom["year_from"] = [] |  | ||||||
|     engine_traits.custom["year_to"] = [] |  | ||||||
| 
 | 
 | ||||||
|  |     l: list[str] | ||||||
|  |     # years_from | ||||||
|  |     l = [] | ||||||
|     for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"): |     for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"): | ||||||
|         engine_traits.custom["year_from"].append(year.get("value")) |         l.append(year.get("value") or "") | ||||||
|  |     engine_traits.custom["year_from"] = l | ||||||
| 
 | 
 | ||||||
|  |     # years_to | ||||||
|  |     l = [] | ||||||
|     for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"): |     for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"): | ||||||
|         engine_traits.custom["year_to"].append(year.get("value")) |         l.append(year.get("value") or "") | ||||||
|  |     engine_traits.custom["year_to"] = l | ||||||
| 
 | 
 | ||||||
|  |     # ext (file extensions) | ||||||
|  |     l = [] | ||||||
|     for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"): |     for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"): | ||||||
|         value: str | None = ext.get("value") |         l.append(ext.get("value") or "") | ||||||
|         if value is None: |     engine_traits.custom["ext"] = l | ||||||
|             value = "" |  | ||||||
|         engine_traits.custom["ext"].append(value) |  | ||||||
| 
 | 
 | ||||||
|     # Handle languages |     # Handle languages | ||||||
|     # Z-library uses English names for languages, so we need to map them to their respective locales |     # Z-library uses English names for languages, so we need to map them to their respective locales | ||||||
|     language_name_locale_map: dict[str, babel.Locale] = {} |     language_name_locale_map: dict[str, babel.Locale] = {} | ||||||
|     for locale in babel.core.localedata.locale_identifiers():  # type: ignore |     for locale in babel.core.localedata.locale_identifiers(): | ||||||
|         # Create a Locale object for the current locale |         # Create a Locale object for the current locale | ||||||
|         loc = babel.Locale.parse(locale) |         loc = babel.Locale.parse(locale) | ||||||
|         if loc.english_name is None: |         if loc.english_name is None: | ||||||
|  | |||||||
| @ -1254,9 +1254,10 @@ engines: | |||||||
|   - name: z-library |   - name: z-library | ||||||
|     engine: zlibrary |     engine: zlibrary | ||||||
|     shortcut: zlib |     shortcut: zlib | ||||||
|     categories: files |  | ||||||
|     timeout: 7.0 |     timeout: 7.0 | ||||||
|     disabled: true |     disabled: true | ||||||
|  |     # https://github.com/searxng/searxng/issues/3610 | ||||||
|  |     inactive: true | ||||||
| 
 | 
 | ||||||
|   - name: library of congress |   - name: library of congress | ||||||
|     engine: loc |     engine: loc | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user