mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix biblio store
Update html parser to be consistent with the current version of the site.
This commit is contained in:
parent
7d9ebf412d
commit
4fc67c81a4
@ -1,58 +1,102 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
store_version = 1 # Needed for dynamic plugin loading
|
store_version = 2 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2012, Alex Stanev <alex@stanev.org>'
|
__copyright__ = '2012, Alex Stanev <alex@stanev.org>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
try:
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
except ImportError:
|
||||||
|
from urllib import quote_plus
|
||||||
|
|
||||||
|
from calibre import browser
|
||||||
|
from calibre.gui2 import open_url
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
from calibre.gui2.store import StorePlugin
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||||
|
from contextlib import closing
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
|
||||||
class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore):
|
class BiblioStore(BasicStoreConfig, StorePlugin):
|
||||||
|
|
||||||
open_search_url = 'http://biblio.bg/feed.opds.php'
|
web_url = 'https://biblio.bg'
|
||||||
web_url = 'http://biblio.bg/'
|
|
||||||
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
|
if external or self.config.get('open_external', False):
|
||||||
|
open_url(detail_item)
|
||||||
|
else:
|
||||||
|
d = WebStoreDialog(self.gui, self.web_url, parent, detail_item)
|
||||||
|
d.setWindowTitle(self.name)
|
||||||
|
d.set_tags(self.config.get('tags', ''))
|
||||||
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
# check for cyrillic symbols before performing search
|
|
||||||
if isinstance(query, bytes):
|
if isinstance(query, bytes):
|
||||||
query = query.decode('utf-8')
|
query = query.decode('utf-8')
|
||||||
uquery = query.strip()
|
|
||||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
if len(query) < 3:
|
||||||
if not reObj:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
# do keyword search
|
||||||
yield s
|
url = '{}/книги?query={}&search_by=0'.format(self.web_url, quote_plus(query))
|
||||||
|
yield from self._do_search(url, max_results, timeout)
|
||||||
|
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
# get format and DRM status
|
|
||||||
from calibre import browser
|
|
||||||
from contextlib import closing
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
||||||
idata = html.fromstring(nf.read())
|
idata = html.fromstring(nf.read())
|
||||||
search_result.formats = ''
|
search_result.formats = ''
|
||||||
if idata.xpath('.//span[@class="format epub"]'):
|
|
||||||
search_result.formats = 'EPUB'
|
|
||||||
|
|
||||||
if idata.xpath('.//span[@class="format pdf"]'):
|
|
||||||
if search_result.formats == '':
|
|
||||||
search_result.formats = 'PDF'
|
|
||||||
else:
|
|
||||||
search_result.formats.join(', PDF')
|
|
||||||
|
|
||||||
if idata.xpath('.//span[@class="format nodrm-icon"]'):
|
|
||||||
search_result.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
else:
|
|
||||||
search_result.drm = SearchResult.DRM_LOCKED
|
search_result.drm = SearchResult.DRM_LOCKED
|
||||||
|
|
||||||
|
for option in idata.xpath('//ul[@class="order_product_options"]/li'):
|
||||||
|
option_type = option.text.strip() if option.text else ''
|
||||||
|
if option_type.startswith('Формат:'):
|
||||||
|
search_result.formats = ''.join(option.xpath('.//b/text()')).strip()
|
||||||
|
if option_type.startswith('Защита:'):
|
||||||
|
if ''.join(option.xpath('.//b/text()')).strip() == 'няма':
|
||||||
|
search_result.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
|
||||||
|
if not search_result.author:
|
||||||
|
search_result.author = ', '.join(idata.xpath('//div[@class="row product_info"]/div/div/div[@class="item-author"]/a/text()')).strip(', ')
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _do_search(self, url, max_results, timeout):
|
||||||
|
br = browser()
|
||||||
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
|
page = f.read().decode('utf-8')
|
||||||
|
doc = html.fromstring(page)
|
||||||
|
|
||||||
|
for data in doc.xpath('//ul[contains(@class,"book_list")]/li'):
|
||||||
|
if max_results <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
s.detail_item = ''.join(data.xpath('.//a[@class="th"]/@href')).strip()
|
||||||
|
if not id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
s.cover_url = ''.join(data.xpath('.//a[@class="th"]/img/@data-original')).strip()
|
||||||
|
s.title = ''.join(data.xpath('.//div[@class="item-title"]/a/text()')).strip()
|
||||||
|
s.author = ', '.join(data.xpath('.//div[@class="item-author"]/a/text()')).strip(', ')
|
||||||
|
|
||||||
|
price_list = data.xpath('.//div[@class="item-price"]')
|
||||||
|
for price_item in price_list:
|
||||||
|
if price_item.text.startswith('е-книга:'):
|
||||||
|
s.price = ''.join(price_item.xpath('.//span/text()'))
|
||||||
|
break
|
||||||
|
|
||||||
|
s.price = '0.00 лв.' if not s.price and not price_list else s.price
|
||||||
|
if not s.price:
|
||||||
|
# no e-book available
|
||||||
|
continue
|
||||||
|
|
||||||
|
max_results -= 1
|
||||||
|
yield s
|
||||||
|
Loading…
x
Reference in New Issue
Block a user