mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Get Books: Updates to various store plugins to deal with website changes: Amazon Europe, Waterstones, Foyles, B&N, Kobo and Empik
This commit is contained in:
commit
2b60b652fa
@ -1538,6 +1538,7 @@ class StoreWaterstonesUKStore(StoreBase):
|
|||||||
|
|
||||||
headquarters = 'UK'
|
headquarters = 'UK'
|
||||||
formats = ['EPUB', 'PDF']
|
formats = ['EPUB', 'PDF']
|
||||||
|
affiliate = True
|
||||||
|
|
||||||
class StoreWeightlessBooksStore(StoreBase):
|
class StoreWeightlessBooksStore(StoreBase):
|
||||||
name = 'Weightless Books'
|
name = 'Weightless Books'
|
||||||
@ -1557,15 +1558,6 @@ class StoreWHSmithUKStore(StoreBase):
|
|||||||
headquarters = 'UK'
|
headquarters = 'UK'
|
||||||
formats = ['EPUB', 'PDF']
|
formats = ['EPUB', 'PDF']
|
||||||
|
|
||||||
class StoreWizardsTowerBooksStore(StoreBase):
|
|
||||||
name = 'Wizards Tower Books'
|
|
||||||
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
|
|
||||||
actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
|
|
||||||
|
|
||||||
drm_free_only = True
|
|
||||||
headquarters = 'UK'
|
|
||||||
formats = ['EPUB', 'MOBI']
|
|
||||||
|
|
||||||
class StoreWoblinkStore(StoreBase):
|
class StoreWoblinkStore(StoreBase):
|
||||||
name = 'Woblink'
|
name = 'Woblink'
|
||||||
author = u'Tomasz Długosz'
|
author = u'Tomasz Długosz'
|
||||||
@ -1636,7 +1628,6 @@ plugins += [
|
|||||||
StoreWaterstonesUKStore,
|
StoreWaterstonesUKStore,
|
||||||
StoreWeightlessBooksStore,
|
StoreWeightlessBooksStore,
|
||||||
StoreWHSmithUKStore,
|
StoreWHSmithUKStore,
|
||||||
StoreWizardsTowerBooksStore,
|
|
||||||
StoreWoblinkStore,
|
StoreWoblinkStore,
|
||||||
XinXiiStore,
|
XinXiiStore,
|
||||||
StoreZixoStore
|
StoreZixoStore
|
||||||
|
@ -5,4 +5,3 @@ or asked not to be included in the store integration.
|
|||||||
* Indigo (http://www.chapters.indigo.ca/).
|
* Indigo (http://www.chapters.indigo.ca/).
|
||||||
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
|
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
|
||||||
* EPubBuy DE: reason: too much traffic for too little sales
|
* EPubBuy DE: reason: too much traffic for too little sales
|
||||||
* Empik (http://empik.com.pl).
|
|
||||||
|
@ -41,7 +41,9 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -65,8 +67,8 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
|
|
||||||
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||||
if author.startswith('von '):
|
if author.startswith('von '):
|
||||||
|
@ -37,7 +37,9 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -61,8 +63,8 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
if author.startswith('de '):
|
if author.startswith('de '):
|
||||||
author = author[3:]
|
author = author[3:]
|
||||||
|
@ -39,7 +39,7 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
# Apparently amazon.fr is responding in UTF-8 now
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
@ -64,8 +64,8 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
if author.startswith('de '):
|
if author.startswith('de '):
|
||||||
author = author[3:]
|
author = author[3:]
|
||||||
|
@ -37,7 +37,9 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -61,8 +63,8 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
if author.startswith('di '):
|
if author.startswith('di '):
|
||||||
author = author[3:]
|
author = author[3:]
|
||||||
|
@ -38,7 +38,8 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -62,8 +63,8 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
|
|
||||||
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||||
if author.startswith('by '):
|
if author.startswith('by '):
|
||||||
|
@ -62,7 +62,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
|
title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
|
||||||
author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
|
author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
|
||||||
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
|
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[contains(@class, "bn-price")]/text()'))
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
|
@ -7,7 +7,8 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import urllib2
|
import re
|
||||||
|
import urllib
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -33,7 +34,7 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
detail_url = None
|
detail_url = None
|
||||||
if detail_item:
|
if detail_item:
|
||||||
detail_url = url + detail_item + aff_id
|
detail_url = detail_item + aff_id
|
||||||
url = url + aff_id
|
url = url + aff_id
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
if external or self.config.get('open_external', False):
|
||||||
@ -45,33 +46,36 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib2.quote(query)
|
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib.quote_plus(query)
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
for data in doc.xpath('//div[@class="item clearfix"]'):
|
for data in doc.xpath('//div[contains(@class, "item")]'):
|
||||||
data = html.fromstring(html.tostring(data))
|
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
|
id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
|
||||||
if not id or '/item/' not in id:
|
if not id or '/item/' not in id:
|
||||||
continue
|
continue
|
||||||
a, b, id = id.partition('/item/')
|
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
|
cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="content"]//h2/text()'))
|
title = ''.join(data.xpath('.//div[@class="content"]//h2/a/text()'))
|
||||||
author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()'))
|
author = ''.join(data.xpath('.//div[@class="content"]/span//a/text()'))
|
||||||
price = ''
|
price = ''
|
||||||
price_elem = data.xpath('//td[@class="price"]/text()')
|
price_elem = data.xpath('.//div[@class="price_fat"]//h1/text()')
|
||||||
if price_elem:
|
if price_elem:
|
||||||
price = price_elem[0]
|
price = price_elem[0]
|
||||||
|
|
||||||
formats = ', '.join(data.xpath('.//td[@class="format"]/text()'))
|
formats = ', '.join(data.xpath('.//div[@class="book-info"]//text()')).strip()
|
||||||
|
a, b, formats = formats.partition('Format:')
|
||||||
|
drm = SearchResult.DRM_LOCKED
|
||||||
|
if 'drm free' not in formats.lower():
|
||||||
|
drm = SearchResult.DRM_UNLOCKED
|
||||||
|
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
@ -80,19 +84,8 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
s.title = title.strip()
|
s.title = title.strip()
|
||||||
s.author = author.strip()
|
s.author = author.strip()
|
||||||
s.price = price.strip()
|
s.price = price.strip()
|
||||||
s.detail_item = '/item/' + id.strip()
|
s.detail_item = id.strip()
|
||||||
s.formats = formats
|
s.formats = formats
|
||||||
|
s.drm = drm
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
|
||||||
url = 'http://www.diesel-ebooks.com/item/'
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
|
|
||||||
idata = html.fromstring(nf.read())
|
|
||||||
if idata.xpath('boolean(//table[@class="format-info"]//tr[contains(th, "DRM") and contains(td, "No")])'):
|
|
||||||
search_result.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
else:
|
|
||||||
search_result.drm = SearchResult.DRM_LOCKED
|
|
||||||
return True
|
|
||||||
|
@ -60,10 +60,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
|
cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
|
||||||
if cover_url:
|
|
||||||
cover_url = 'http://www.foyles.co.uk' + cover_url
|
|
||||||
#print(cover_url)
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
|
title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
|
||||||
author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
|
author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
|
||||||
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
|
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
|
||||||
|
@ -68,7 +68,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
|
|||||||
cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
|
cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
|
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
|
||||||
author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()'))
|
author = ', '.join(data.xpath('.//div[@class="SCItemSummary"]//span//a/text()'))
|
||||||
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
|
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
@ -57,7 +57,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
|
|||||||
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
|
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
|
||||||
title = ''.join(data.xpath('./div/div/h2/a/text()'))
|
title = ''.join(data.xpath('./div/div/h2/a/text()'))
|
||||||
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
|
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
|
||||||
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceStandard"]/text()'))
|
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))
|
||||||
drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
|
drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
|
||||||
pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
|
pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
|
||||||
epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')
|
epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')
|
||||||
|
@ -1,118 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl
|
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
|
||||||
from calibre.gui2 import open_url
|
|
||||||
from calibre.gui2.store import StorePlugin
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
|
||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|
||||||
|
|
||||||
class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
|
|
||||||
|
|
||||||
url = 'http://www.wizardstowerbooks.com/'
|
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
|
||||||
if detail_item:
|
|
||||||
detail_item = self.url + detail_item
|
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
|
||||||
open_url(QUrl(url_slash_cleaner(detail_item)))
|
|
||||||
else:
|
|
||||||
d = WebStoreDialog(self.gui, self.url, parent, detail_item)
|
|
||||||
d.setWindowTitle(self.name)
|
|
||||||
d.set_tags(self.config.get('tags', ''))
|
|
||||||
d.exec_()
|
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
|
||||||
url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
|
|
||||||
counter = max_results
|
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
doc = html.fromstring(f.read())
|
|
||||||
if 'search.html' in f.geturl():
|
|
||||||
for data in doc.xpath('//table[@class="gridp"]//td'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
|
|
||||||
id = id.strip()
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
|
|
||||||
cover_url = url_slash_cleaner(self.url + cover_url.strip())
|
|
||||||
|
|
||||||
price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
|
|
||||||
price = price.strip()
|
|
||||||
if not price:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
|
|
||||||
author = ''.join(data.xpath('.//p[@class="last"]/text()'))
|
|
||||||
a, b, author = author.partition(' by ')
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.cover_url = cover_url
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.price = price.strip()
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
|
||||||
# Exact match brought us to the books detail page.
|
|
||||||
else:
|
|
||||||
s = SearchResult()
|
|
||||||
|
|
||||||
cover_url = ''.join(doc.xpath('//div[@id="image"]/a/img[@title="Zoom"]/@src')).strip()
|
|
||||||
s.cover_url = url_slash_cleaner(self.url + cover_url.strip())
|
|
||||||
|
|
||||||
s.title = ''.join(doc.xpath('//form[@name="details"]/h1/text()')).strip()
|
|
||||||
|
|
||||||
authors = doc.xpath('//p[contains(., "Author:")]//text()')
|
|
||||||
author_index = None
|
|
||||||
for i, a in enumerate(authors):
|
|
||||||
if 'author' in a.lower():
|
|
||||||
author_index = i + 1
|
|
||||||
break
|
|
||||||
if author_index is not None and len(authors) > author_index:
|
|
||||||
a = authors[author_index]
|
|
||||||
a = a.replace(u'\xa0', '')
|
|
||||||
s.author = a.strip()
|
|
||||||
|
|
||||||
s.price = ''.join(doc.xpath('//span[@id="price_selling"]//text()')).strip()
|
|
||||||
s.detail_item = f.geturl().replace(self.url, '').strip()
|
|
||||||
s.formats = ', '.join(doc.xpath('//select[@id="N1_"]//option//text()'))
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
|
||||||
if search_result.formats:
|
|
||||||
return False
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
|
|
||||||
idata = html.fromstring(nf.read())
|
|
||||||
|
|
||||||
formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
|
|
||||||
search_result.formats = formats.upper()
|
|
||||||
|
|
||||||
return True
|
|
Loading…
x
Reference in New Issue
Block a user