Get Books: Update Baen store plugin for website changes

This commit is contained in:
Kovid Goyal 2017-05-01 10:13:19 +05:30
parent 5e33507ce4
commit a00ec726ab
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,21 +1,21 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 1 # Needed for dynamic plugin loading store_version = 2 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re from urllib import urlencode
import urllib2
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
from PyQt5.Qt import QUrl from PyQt5.Qt import QUrl
from calibre import browser, url_slash_cleaner from calibre import browser
from calibre.ebooks.metadata import authors_to_string
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.basic_config import BasicStoreConfig
@ -23,70 +23,73 @@ from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.gui2.store.web_store_dialog import WebStoreDialog
def search(query, max_results=10, timeout=60):
url = 'http://www.baen.com/catalogsearch/result/?' + urlencode(
{'q':query.lower(), 'dir':'desc', 'order':'relevance'})
br = browser()
counter = max_results
with closing(br.open_novisit(url, timeout=timeout)) as f:
raw = f.read()
root = html.fromstring(raw)
for data in root.xpath('//div[@id="productMatches"]//table[@id="authorTable"]//tr[contains(@class, "IDCell")]'):
if counter <= 0:
break
try:
book_url = data.xpath('./td[1]/a/@href[1]')[0]
except IndexError:
continue
try:
title = data.xpath('./td[2]/a[1]/text()')[0].strip()
except IndexError:
continue
try:
cover_url = data.xpath('./td[1]//img[1]/@src')[0]
except IndexError:
cover_url = ''
tails = [(b.tail or '').strip() for b in data.xpath('./td[2]/br')]
authors = [x[2:].strip() for x in tails if x.startswith('by ')]
author = authors_to_string(authors)
price = ''.join(data.xpath('.//span[@class="variantprice"]/text()'))
a, b, price = price.partition('$')
price = b + price
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = book_url.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = 'RB, MOBI, EPUB, LIT, LRF, RTF, HTML'
yield s
class BaenWebScriptionStore(BasicStoreConfig, StorePlugin): class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = 'http://www.baenebooks.com/' url = 'http://www.baenebooks.com/'
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
if detail_item: open_url(QUrl(detail_item or url))
url = url + detail_item
open_url(QUrl(url_slash_cleaner(url)))
else: else:
detail_url = None d = WebStoreDialog(self.gui, url, parent, detail_item or url)
if detail_item:
detail_url = url + detail_item
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name) d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', '')) d.set_tags(self.config.get('tags', ''))
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://www.baenebooks.com/searchadv.aspx?IsSubmit=true&SearchTerm=' + urllib2.quote(query) for result in search(query, max_results, timeout):
yield result
br = browser()
counter = max_results if __name__ == '__main__':
with closing(br.open(url, timeout=timeout)) as f: import sys
doc = html.fromstring(f.read()) for result in search(' '.join(sys.argv[1:])):
for data in doc.xpath('//table//table//table//table//tr'): print(result)
if counter <= 0:
break
id = ''.join(data.xpath('./td[1]/a/@href'))
if not id or not id.startswith('p-'):
continue
title = ''.join(data.xpath('./td[1]/a/text()'))
author = ''
cover_url = ''
price = ''
with closing(br.open('http://www.baenebooks.com/' + id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read())
author = ''.join(idata.xpath('//span[@class="ProductNameText"]/../b/text()'))
author = author.split('by ')[-1]
price = ''.join(idata.xpath('//span[@class="variantprice"]/text()'))
a, b, price = price.partition('$')
price = b + price
pnum = ''
mo = re.search(r'p-(?P<num>\d+)-', id.strip())
if mo:
pnum = mo.group('num')
if pnum:
cover_url = 'http://www.baenebooks.com/' + ''.join(idata.xpath('//img[@id="ProductPic%s"]/@src' % pnum))
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = 'RB, MOBI, EPUB, LIT, LRF, RTF, HTML'
yield s