Get Books: Fix Baen Webscription and O'Reilly stores. Fix price detection for Google Books

This commit is contained in:
Kovid Goyal 2012-02-19 00:12:05 +05:30
commit 228a619c18
4 changed files with 22 additions and 30 deletions

View File

@ -1217,7 +1217,7 @@ class StoreArchiveOrgStore(StoreBase):
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT'] formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreBaenWebScriptionStore(StoreBase): class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription' name = 'Baen Ebooks'
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.' description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore' actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'

View File

@ -24,7 +24,7 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class BaenWebScriptionStore(BasicStoreConfig, StorePlugin): class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = 'http://www.webscription.net/' url = 'http://www.baenebooks.com/'
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
if detail_item: if detail_item:
@ -40,19 +40,19 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://www.webscription.net/searchadv.aspx?IsSubmit=true&SearchTerm=' + urllib2.quote(query) url = 'http://www.baenebooks.com/searchadv.aspx?IsSubmit=true&SearchTerm=' + urllib2.quote(query)
br = browser() br = browser()
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
for data in doc.xpath('//table/tr/td/img[@src="skins/Skin_1/images/matchingproducts.gif"]/..//tr'): for data in doc.xpath('//table//table//table//table//tr'):
if counter <= 0: if counter <= 0:
break break
id = ''.join(data.xpath('./td[1]/a/@href')) id = ''.join(data.xpath('./td[1]/a/@href'))
if not id: if not id or not id.startswith('p-'):
continue continue
title = ''.join(data.xpath('./td[1]/a/text()')) title = ''.join(data.xpath('./td[1]/a/text()'))
@ -61,7 +61,7 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
cover_url = '' cover_url = ''
price = '' price = ''
with closing(br.open('http://www.webscription.net/' + id.strip(), timeout=timeout/4)) as nf: with closing(br.open('http://www.baenebooks.com/' + id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read()) idata = html.fromstring(nf.read())
author = ''.join(idata.xpath('//span[@class="ProductNameText"]/../b/text()')) author = ''.join(idata.xpath('//span[@class="ProductNameText"]/../b/text()'))
author = author.split('by ')[-1] author = author.split('by ')[-1]
@ -74,7 +74,7 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
if mo: if mo:
pnum = mo.group('num') pnum = mo.group('num')
if pnum: if pnum:
cover_url = 'http://www.webscription.net/' + ''.join(idata.xpath('//img[@id="ProductPic%s"]/@src' % pnum)) cover_url = 'http://www.baenebooks.com/' + ''.join(idata.xpath('//img[@id="ProductPic%s"]/@src' % pnum))
counter -= 1 counter -= 1

View File

@ -93,16 +93,13 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src')) search_result.cover_url = ''.join(doc.xpath('//div[@class="sidebarcover"]//img/@src'))
# Try to get the set price. # Try to get the set price.
price = ''.join(doc.xpath('//div[@class="buy-price-container"]/span[contains(@class, "buy-price")]/text()')) price = ''.join(doc.xpath('//div[@id="gb-get-book-container"]//a/text()'))
# Try to get the price inside of a buy button. if 'read' in price.lower():
if not price.strip():
price = ''.join(doc.xpath('//div[@class="buy-container"]/a/text()'))
price = price.split('-')[-1]
if 'view' in price.lower():
price = 'Unknown' price = 'Unknown'
# No price set for this book. elif 'free' in price.lower() or not price.strip():
if not price.strip():
price = '$0.00' price = '$0.00'
elif '-' in price:
a, b, price = price.partition(' - ')
search_result.price = price.strip() search_result.price = price.strip()
search_result.formats = ', '.join(doc.xpath('//div[contains(@class, "download-panel-div")]//a/text()')).upper() search_result.formats = ', '.join(doc.xpath('//div[contains(@class, "download-panel-div")]//a/text()')).upper()

View File

@ -26,9 +26,6 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = 'http://oreilly.com/ebooks/' url = 'http://oreilly.com/ebooks/'
if detail_item:
detail_item = 'https://epoch.oreilly.com/shop/cart.orm?prod=%s.EBOOK&p=CALIBRE' % detail_item
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url))) open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else: else:
@ -49,11 +46,11 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
if counter <= 0: if counter <= 0:
break break
full_id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href')) ebook = ' '.join(data.xpath('.//p[@class="note"]/text()'))
mo = re.search('\d+', full_id) if 'ebook' not in ebook.lower():
if not mo:
continue continue
id = mo.group()
id = ''.join(data.xpath('./div[@class="book_text"]//p[@class="title"]/a/@href'))
cover_url = ''.join(data.xpath('./a/img[1]/@src')) cover_url = ''.join(data.xpath('./a/img[1]/@src'))
@ -62,16 +59,14 @@ class OReillyStore(BasicStoreConfig, StorePlugin):
author = author.split('By ')[-1].strip() author = author.split('By ')[-1].strip()
# Get the detail here because we need to get the ebook id for the detail_item. # Get the detail here because we need to get the ebook id for the detail_item.
with closing(br.open(full_id, timeout=timeout)) as nf: with closing(br.open(id, timeout=timeout)) as nf:
idoc = html.fromstring(nf.read()) idoc = html.fromstring(nf.read())
price = ''.join(idoc.xpath('(//span[@class="price"])[1]/span//text()')) for td in idoc.xpath('//td[@class="optionsTd"]'):
formats = ', '.join(idoc.xpath('//div[@class="ebook_formats"]//a/text()')) if 'ebook' in ''.join(td.xpath('.//text()')).lower():
price = ''.join(td.xpath('.//span[@class="price"]/text()')).strip()
eid = ''.join(idoc.xpath('(//a[@class="product_buy_link" and contains(@href, ".EBOOK")])[1]/@href')).strip() formats = ''.join(td.xpath('.//a[@id="availableFormats"]/text()')).strip()
mo = re.search('\d+', eid) break
if mo:
id = mo.group()
counter -= 1 counter -= 1