This commit is contained in:
Kovid Goyal 2016-06-15 09:17:02 +05:30
parent 23fd53cf1b
commit e622cee5ed

View File

@ -1,116 +1,65 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011-2013, Roman Mukhin <ramses_ru at hotmail.com>' __copyright__ = '2011-2013, Roman Mukhin <ramses_ru at hotmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random import urllib
import re
import urllib2
from contextlib import closing from contextlib import closing
from lxml import etree, html
from PyQt5.Qt import QUrl from PyQt5.Qt import QUrl
import html5lib
from calibre import browser, url_slash_cleaner from calibre import browser, url_slash_cleaner
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.gui2.store.web_store_dialog import WebStoreDialog
class OzonRUStore(BasicStoreConfig, StorePlugin): shop_url = 'http://www.ozon.ru'
shop_url = 'http://www.ozon.ru'
def search(query, max_results=15, timeout=60):
url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % urllib.quote_plus(query)
counter = max_results
br = browser()
with closing(br.open(url, timeout=timeout)) as f:
raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
for tile in root.xpath('//*[@class="bShelfTile inline"]'):
if counter <= 0:
break
counter -= 1
s = SearchResult(store_name='OZON.ru')
s.detail_item = shop_url + tile.xpath('descendant::a[@class="eShelfTile_Link"]/@href')[0]
s.title = tile.xpath('descendant::span[@class="eShelfTile_ItemNameText"]/@title')[0]
s.author = tile.xpath('descendant::span[@class="eShelfTile_ItemPerson"]/@title')[0]
s.price = ''.join(tile.xpath('descendant::div[contains(@class, "eShelfTile_Price")]/text()'))
s.cover_url = 'http:' + tile.xpath('descendant::img/@data-original')[0]
s.price = format_price_in_RUR(s.price)
yield s
class OzonRUStore(StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = detail_item or shop_url
aff_id = '?partner=romuk'
# Use Kovid's affiliate id 30% of the time.
if random.randint(1, 10) in (1, 2, 3):
aff_id = '?partner=kovidgoyal'
url = self.shop_url + aff_id
detail_url = None
if detail_item:
# http://www.ozon.ru/context/detail/id/3037277/
detail_url = self.shop_url + '/context/detail/id/' + urllib2.quote(detail_item) + aff_id
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url))) open_url(QUrl(url_slash_cleaner(url)))
else: else:
d = WebStoreDialog(self.gui, url, parent, detail_url) d = WebStoreDialog(self.gui, shop_url, parent, url)
d.setWindowTitle(self.name) d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', '')) d.set_tags(self.config.get('tags', ''))
d.exec_() d.exec_()
def search(self, query, max_results=15, timeout=60): def search(self, query, max_results=15, timeout=60):
search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\ for s in search(query, max_results=max_results, timeout=timeout):
'searchText=%s&searchContext=ebook' % urllib2.quote(query) yield s
search_urls = [ search_url ]
xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
counter = max_results
br = browser()
for url in search_urls:
with closing(br.open(url, timeout=timeout)) as f:
raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
doc = etree.fromstring(raw)
for data in doc.xpath('//*[local-name()="SearchItems" or local-name()="ItemDetail"]'):
if counter <= 0:
break
counter -= 1
s = SearchResult()
s.detail_item = data.xpath(xp_template.format('ID'))
s.title = data.xpath(xp_template.format('Name'))
s.author = data.xpath(xp_template.format('Author'))
s.price = data.xpath(xp_template.format('Price'))
s.cover_url = data.xpath(xp_template.format('Picture'))
s.price = format_price_in_RUR(s.price)
yield s
def get_details(self, search_result, timeout=60):
url = self.shop_url + '/context/detail/id/' + urllib2.quote(search_result.detail_item)
br = browser()
result = False
with closing(br.open(url, timeout=timeout)) as f:
raw = xml_to_unicode(f.read(), verbose=True)[0]
doc = html.fromstring(raw)
# example where we are going to find formats
# <div class="l">
# <p>
# Доступно:
# </p>
# </div>
# <div class="l">
# <p>.epub, .fb2.zip, .pdf</p>
# </div>
xpt = u'normalize-space(//div[contains(@id, "saleBlock")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
formats = doc.xpath(xpt)
if formats:
result = True
search_result.drm = SearchResult.DRM_UNLOCKED
search_result.formats = ', '.join(_parse_ebook_formats(formats))
# unfortunately no direct links to download books (only buy link)
# search_result.downloads['BF2'] = self.shop_url + '/order/digitalorder.aspx?id=' + + urllib2.quote(search_result.detail_item)
#<p class="main-cost"><span class="main">215</span><span class="submain">00</span> руб.</p>
#<span itemprop="price" class="hidden">215.00</span>
#<meta itemprop="priceCurrency" content="RUR " />
# if the price not in the search result (the ID search case)
if not search_result.price:
price = doc.xpath(u'normalize-space(//*[@itemprop="price"]/text())')
search_result.price = format_price_in_RUR(price)
return result
def format_price_in_RUR(price): def format_price_in_RUR(price):
''' '''
@ -119,37 +68,10 @@ def format_price_in_RUR(price):
@return: formatted price if possible otherwise original value @return: formatted price if possible otherwise original value
@rtype: unicode @rtype: unicode
''' '''
if price and re.match("^\d*?\.\d*?$", price): price = price.replace('\xa0', '').replace(',', '.').strip() + ' py6'
try:
price = u'{:,.2F} руб.'.format(float(price))
price = price.replace(',', ' ').replace('.', ',', 1)
except:
pass
return price return price
def _parse_ebook_formats(formatsStr): if __name__ == '__main__':
''' import sys
Creates a list with displayable names of the formats for r in search(sys.argv[-1]):
print(r)
:param formatsStr: string with comma separated book formats
as it provided by ozon.ru
:return: a list with displayable book formats
'''
formatsUnstruct = formatsStr.lower()
formats = []
if 'epub' in formatsUnstruct:
formats.append('ePub')
if 'pdf' in formatsUnstruct:
formats.append('PDF')
if 'fb2' in formatsUnstruct:
formats.append('FB2')
if 'rtf' in formatsUnstruct:
formats.append('RTF')
if 'txt' in formatsUnstruct:
formats.append('TXT')
if 'djvu' in formatsUnstruct:
formats.append('DjVu')
if 'doc' in formatsUnstruct:
formats.append('DOC')
return formats