Get Books: Fix ebooks.com website change causing title and author to not be fetched

This commit is contained in:
Kovid Goyal 2012-06-14 19:51:15 +05:30
parent afb9d18597
commit 07203f86bd
2 changed files with 20 additions and 27 deletions

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import random import random
import re import re
import urllib2 import urllib
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -32,7 +32,7 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
if random.randint(1, 10) in (1, 2, 3): if random.randint(1, 10) in (1, 2, 3):
h_click = 'click-4913808-10364500' h_click = 'click-4913808-10364500'
d_click = 'click-4913808-10281551' d_click = 'click-4913808-10281551'
url = m_url + h_click url = m_url + h_click
detail_url = None detail_url = None
if detail_item: if detail_item:
@ -47,10 +47,10 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + urllib2.quote(query) url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + urllib.quote_plus(query)
br = browser() br = browser()
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
@ -63,32 +63,29 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
if not mo: if not mo:
continue continue
id = mo.group() id = mo.group()
cover_url = ''.join(data.xpath('.//div[@class="img"]//img/@src')) cover_url = ''.join(data.xpath('.//div[@class="img"]//img/@src'))
title = '' title = ''.join(data.xpath(
author = '' 'descendant::span[@class="book-title"]/a/text()')).strip()
header_parts = data.xpath('.//div[@class="descr"]/h4//a//text()') author = ''.join(data.xpath(
if header_parts: 'descendant::span[@class="author"]/a/text()')).strip()
title = header_parts[0] if not title or not author:
header_parts = header_parts[1:] continue
if header_parts:
author = ', '.join(header_parts)
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip() s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip()
yield s yield s
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
url = 'http://www.ebooks.com/ebooks/book_display.asp?IID=' url = 'http://www.ebooks.com/ebooks/book_display.asp?IID='
mo = re.search(r'\?IID=(?P<id>\d+)', search_result.detail_item) mo = re.search(r'\?IID=(?P<id>\d+)', search_result.detail_item)
if mo: if mo:
id = mo.group('id') id = mo.group('id')
@ -99,17 +96,17 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
br = browser() br = browser()
with closing(br.open(url + id, timeout=timeout)) as nf: with closing(br.open(url + id, timeout=timeout)) as nf:
pdoc = html.fromstring(nf.read()) pdoc = html.fromstring(nf.read())
price_l = pdoc.xpath('//span[@class="price"]/text()') price_l = pdoc.xpath('//span[@class="price"]/text()')
if price_l: if price_l:
price = price_l[0] price = price_l[0]
search_result.price = price.strip() search_result.price = price.strip()
search_result.drm = SearchResult.DRM_UNLOCKED search_result.drm = SearchResult.DRM_UNLOCKED
permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()')) permissions = ' '.join(pdoc.xpath('//div[@class="permissions-items"]//text()'))
if 'off' in permissions: if 'off' in permissions:
search_result.drm = SearchResult.DRM_LOCKED search_result.drm = SearchResult.DRM_LOCKED
fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()') fdata = pdoc.xpath('//div[contains(@class, "more-links") and contains(@class, "more-links-info")]/div//span/text()')
if len(fdata) > 1: if len(fdata) > 1:
search_result.formats = ', '.join(fdata[1:]) search_result.formats = ', '.join(fdata[1:])

View File

@ -48,13 +48,9 @@ class SonyStore(BasicStoreConfig, StorePlugin):
break break
curr = ''.join(item.xpath('descendant::div[@class="pricing"]/descendant::*[@class="currency"]/@title')).strip() curr = ''.join(item.xpath('descendant::div[@class="pricing"]/descendant::*[@class="currency"]/@title')).strip()
if not curr:
curr = 'USD'
amt = ''.join(item.xpath('descendant::div[@class="pricing"]/descendant::*[@class="amount"]/text()')).strip() amt = ''.join(item.xpath('descendant::div[@class="pricing"]/descendant::*[@class="amount"]/text()')).strip()
if not amt:
amt = '0'
s = SearchResult() s = SearchResult()
s.price = curr+' '+amt s.price = (curr+' '+amt) if (curr and amt) else _('Not Available')
title = item.xpath('descendant::h3[@class="item"]') title = item.xpath('descendant::h3[@class="item"]')
if not title: continue if not title: continue
title = etree.tostring(title[0], method='text', title = etree.tostring(title[0], method='text',