From 4fc67c81a4858d490d88a26e2c84f43b8a3983c8 Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 21 Feb 2021 12:55:40 +0200 Subject: [PATCH] Fix biblio store Update html parser to be consistent with the current version of the site. --- .../gui2/store/stores/biblio_plugin.py | 100 +++++++++++++----- 1 file changed, 72 insertions(+), 28 deletions(-) diff --git a/src/calibre/gui2/store/stores/biblio_plugin.py b/src/calibre/gui2/store/stores/biblio_plugin.py index e43f64376e..fa2f874065 100644 --- a/src/calibre/gui2/store/stores/biblio_plugin.py +++ b/src/calibre/gui2/store/stores/biblio_plugin.py @@ -1,58 +1,102 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals -store_version = 1 # Needed for dynamic plugin loading +store_version = 2 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2012, Alex Stanev ' __docformat__ = 'restructuredtext en' -import re +try: + from urllib.parse import quote_plus +except ImportError: + from urllib import quote_plus +from calibre import browser +from calibre.gui2 import open_url from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore +from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult +from calibre.gui2.store.web_store_dialog import WebStoreDialog +from contextlib import closing +from lxml import html -class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore): +class BiblioStore(BasicStoreConfig, StorePlugin): - open_search_url = 'http://biblio.bg/feed.opds.php' - web_url = 'http://biblio.bg/' + web_url = 'https://biblio.bg' + + def open(self, parent=None, detail_item=None, external=False): + if external or self.config.get('open_external', False): + open_url(detail_item) + else: + d = WebStoreDialog(self.gui, self.web_url, parent, detail_item) + d.setWindowTitle(self.name) + d.set_tags(self.config.get('tags', '')) + d.exec_() def search(self, query, max_results=10, timeout=60): - # check for cyrillic symbols before performing search if isinstance(query, bytes): query = query.decode('utf-8') - uquery = query.strip() - reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery) - if not reObj: + + if len(query) < 3: return - for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): - yield s + # do keyword search + url = '{}/книги?query={}&search_by=0'.format(self.web_url, quote_plus(query)) + yield from self._do_search(url, max_results, timeout) + def get_details(self, search_result, timeout): - # get format and DRM status - from calibre import browser - from contextlib import closing - from lxml import html - br = browser() with closing(br.open(search_result.detail_item, timeout=timeout)) as nf: idata = html.fromstring(nf.read()) search_result.formats = '' - if idata.xpath('.//span[@class="format epub"]'): - search_result.formats = 'EPUB' + search_result.drm = SearchResult.DRM_LOCKED - if idata.xpath('.//span[@class="format pdf"]'): - if search_result.formats == '': - search_result.formats = 'PDF' - else: - search_result.formats.join(', PDF') + for option in idata.xpath('//ul[@class="order_product_options"]/li'): + option_type = option.text.strip() if option.text else '' + if option_type.startswith('Формат:'): + search_result.formats = ''.join(option.xpath('.//b/text()')).strip() + if option_type.startswith('Защита:'): + if ''.join(option.xpath('.//b/text()')).strip() == 'няма': + search_result.drm = SearchResult.DRM_UNLOCKED - if idata.xpath('.//span[@class="format nodrm-icon"]'): - search_result.drm = SearchResult.DRM_UNLOCKED - else: - search_result.drm = SearchResult.DRM_LOCKED + if not search_result.author: + search_result.author = ', '.join(idata.xpath('//div[@class="row product_info"]/div/div/div[@class="item-author"]/a/text()')).strip(', ') return True + + + def _do_search(self, url, max_results, timeout): + br = browser() + with closing(br.open(url, timeout=timeout)) as f: + page = f.read().decode('utf-8') + doc = html.fromstring(page) + + for data in doc.xpath('//ul[contains(@class,"book_list")]/li'): + if max_results <= 0: + break + + s = SearchResult() + s.detail_item = ''.join(data.xpath('.//a[@class="th"]/@href')).strip() + if not id: + continue + + s.cover_url = ''.join(data.xpath('.//a[@class="th"]/img/@data-original')).strip() + s.title = ''.join(data.xpath('.//div[@class="item-title"]/a/text()')).strip() + s.author = ', '.join(data.xpath('.//div[@class="item-author"]/a/text()')).strip(', ') + + price_list = data.xpath('.//div[@class="item-price"]') + for price_item in price_list: + if price_item.text.startswith('е-книга:'): + s.price = ''.join(price_item.xpath('.//span/text()')) + break + + s.price = '0.00 лв.' if not s.price and not price_list else s.price + if not s.price: + # no e-book available + continue + + max_results -= 1 + yield s