mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
f7886289d9
@ -1,6 +1,7 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
#from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from urllib import quote
|
||||
import re
|
||||
|
||||
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
__author__ = 'kwetal'
|
||||
@ -15,65 +16,52 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
INDEX = 'http://sportsillustrated.cnn.com/'
|
||||
INDEX = 'http://sportsillustrated.cnn.com/vault/cover/home/index.htm'
|
||||
|
||||
def parse_index(self):
|
||||
answer = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
# Find the link to the current issue on the front page. SI Cover
|
||||
cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
|
||||
if cover:
|
||||
currentIssue = cover.parent['href']
|
||||
if currentIssue:
|
||||
# Open the index of current issue
|
||||
|
||||
index = self.index_to_soup(currentIssue)
|
||||
self.log('\tLooking for current issue in: ' + currentIssue)
|
||||
# Now let us see if they updated their frontpage
|
||||
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
|
||||
if nav:
|
||||
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
|
||||
if img:
|
||||
parent = img.parent
|
||||
if parent.name == 'a':
|
||||
# They didn't update their frontpage; Load the next issue from here
|
||||
href = self.INDEX + parent['href']
|
||||
index = self.index_to_soup(href)
|
||||
self.log('\tLooking for current issue in: ' + href)
|
||||
#Loop through all of the "latest" covers until we find one that actually has articles
|
||||
for item in soup.findAll('div', attrs={'id': re.compile("ecomthumb_latest_*")}):
|
||||
regex = re.compile('ecomthumb_latest_(\d*)')
|
||||
result = regex.search(str(item))
|
||||
current_issue_number = str(result.group(1))
|
||||
current_issue_link = 'http://sportsillustrated.cnn.com/vault/cover/toc/' + current_issue_number + '/index.htm'
|
||||
self.log('Checking this link for a TOC: ', current_issue_link)
|
||||
|
||||
index = self.index_to_soup(current_issue_link)
|
||||
if index:
|
||||
if index.find('div', 'siv_noArticleMessage'):
|
||||
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
|
||||
if nav:
|
||||
# Their frontpage points to an issue without any articles; Use the previous issue
|
||||
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
|
||||
if img:
|
||||
parent = img.parent
|
||||
if parent.name == 'a':
|
||||
href = self.INDEX + parent['href']
|
||||
index = self.index_to_soup(href)
|
||||
self.log('\tLooking for current issue in: ' + href)
|
||||
self.log('No TOC for this one. Skipping...')
|
||||
else:
|
||||
self.log('Found a TOC... Using this link')
|
||||
break
|
||||
|
||||
# Find all articles.
|
||||
list = index.find('div', attrs = {'class' : 'siv_artList'})
|
||||
if list:
|
||||
self.log ('found siv_artList')
|
||||
articles = []
|
||||
# Get all the artcles ready for calibre.
|
||||
counter = 0
|
||||
for headline in list.findAll('div', attrs = {'class' : 'headline'}):
|
||||
counter = counter + 1
|
||||
title = self.tag_to_string(headline.a) + '\n' + self.tag_to_string(headline.findNextSibling('div', attrs = {'class' : 'info'}))
|
||||
url = self.INDEX + headline.a['href']
|
||||
description = self.tag_to_string(headline.findNextSibling('a').div)
|
||||
article = {'title' : title, 'date' : u'', 'url' : url, 'description' : description}
|
||||
articles.append(article)
|
||||
if counter > 5:
|
||||
break
|
||||
|
||||
# Find all articles.
|
||||
list = index.find('div', attrs = {'class' : 'siv_artList'})
|
||||
if list:
|
||||
articles = []
|
||||
# Get all the artcles ready for calibre.
|
||||
for headline in list.findAll('div', attrs = {'class' : 'headline'}):
|
||||
title = self.tag_to_string(headline.a) + '\n' + self.tag_to_string(headline.findNextSibling('div', attrs = {'class' : 'info'}))
|
||||
url = self.INDEX + headline.a['href']
|
||||
description = self.tag_to_string(headline.findNextSibling('a').div)
|
||||
article = {'title' : title, 'date' : u'', 'url' : url, 'description' : description}
|
||||
# See if we can find a meaningfull title
|
||||
feedTitle = 'Current Issue'
|
||||
hasTitle = index.find('div', attrs = {'class' : 'siv_imageText_head'})
|
||||
if hasTitle :
|
||||
feedTitle = self.tag_to_string(hasTitle.h1)
|
||||
|
||||
articles.append(article)
|
||||
|
||||
# See if we can find a meaningfull title
|
||||
feedTitle = 'Current Issue'
|
||||
hasTitle = index.find('div', attrs = {'class' : 'siv_imageText_head'})
|
||||
if hasTitle :
|
||||
feedTitle = self.tag_to_string(hasTitle.h1)
|
||||
|
||||
answer.append([feedTitle, articles])
|
||||
answer.append([feedTitle, articles])
|
||||
|
||||
return answer
|
||||
|
||||
@ -82,6 +70,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
# This is the url and the parameters that work to get the print version.
|
||||
printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis'
|
||||
printUrl += '&fb=Y&partnerID=2356&url=' + quote(url)
|
||||
self.log('PrintURL: ' , printUrl)
|
||||
|
||||
return printUrl
|
||||
|
||||
@ -116,4 +105,3 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
|
||||
return homeMadeSoup
|
||||
'''
|
||||
|
||||
|
@ -98,7 +98,7 @@ class ANDROID(USBMS):
|
||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956']
|
||||
'7', 'A956', 'A955']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7']
|
||||
|
@ -154,17 +154,16 @@ def get_metadata(br, asin, mi):
|
||||
return False
|
||||
if root.xpath('//*[@id="errorMessage"]'):
|
||||
return False
|
||||
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
||||
|
||||
ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
|
||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||
if ratings:
|
||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||
r = ratings[0]
|
||||
for elem in r.xpath('descendant::*[@title]'):
|
||||
t = elem.get('title')
|
||||
for elem in ratings[0].xpath('descendant::*[@title]'):
|
||||
t = elem.get('title').strip()
|
||||
m = pat.match(t)
|
||||
if m is not None:
|
||||
try:
|
||||
mi.rating = float(m.group(1))/float(m.group(2)) * 5
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
@ -216,6 +215,7 @@ def main(args=sys.argv):
|
||||
print 'Failed to downlaod social metadata for', title
|
||||
return 1
|
||||
#print '\n\n', time.time() - st, '\n\n'
|
||||
print mi
|
||||
print '\n'
|
||||
|
||||
return 0
|
||||
|
@ -227,6 +227,11 @@ class Metadata(object):
|
||||
if val:
|
||||
identifiers[typ] = val
|
||||
|
||||
def has_identifier(self, typ):
|
||||
identifiers = object.__getattribute__(self,
|
||||
'_data')['identifiers']
|
||||
return typ in identifiers
|
||||
|
||||
# field-oriented interface. Intended to be the same as in LibraryDatabase
|
||||
|
||||
def standard_field_keys(self):
|
||||
@ -633,10 +638,6 @@ class Metadata(object):
|
||||
fmt('Publisher', self.publisher)
|
||||
if getattr(self, 'book_producer', False):
|
||||
fmt('Book Producer', self.book_producer)
|
||||
if self.comments:
|
||||
fmt('Comments', self.comments)
|
||||
if self.isbn:
|
||||
fmt('ISBN', self.isbn)
|
||||
if self.tags:
|
||||
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
|
||||
if self.series:
|
||||
@ -651,6 +652,12 @@ class Metadata(object):
|
||||
fmt('Published', isoformat(self.pubdate))
|
||||
if self.rights is not None:
|
||||
fmt('Rights', unicode(self.rights))
|
||||
if self.identifiers:
|
||||
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
|
||||
self.identifiers.iteritems()]))
|
||||
if self.comments:
|
||||
fmt('Comments', self.comments)
|
||||
|
||||
for key in self.custom_field_keys():
|
||||
val = self.get(key, None)
|
||||
if val:
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import socket, time
|
||||
import socket, time, re
|
||||
from urllib import urlencode
|
||||
from threading import Thread
|
||||
|
||||
@ -18,14 +18,23 @@ from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
from calibre.utils.date import parse_date
|
||||
|
||||
class Worker(Thread):
|
||||
class Worker(Thread): # {{{
|
||||
|
||||
'''
|
||||
Get book details from amazons book page in a separate thread
|
||||
'''
|
||||
|
||||
def __init__(self, url, result_queue, browser, log, timeout=20):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self.url, self.result_queue = url, result_queue
|
||||
self.log, self.timeout = log, timeout
|
||||
self.browser = browser.clone_browser()
|
||||
self.cover_url = self.amazon_id = None
|
||||
self.cover_url = self.amazon_id = self.isbn = None
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
@ -53,6 +62,7 @@ class Worker(Thread):
|
||||
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
# open('/t/t.html', 'wb').write(raw)
|
||||
|
||||
if '<title>404 - ' in raw:
|
||||
self.log.error('URL malformed: %r'%self.url)
|
||||
@ -75,8 +85,181 @@ class Worker(Thread):
|
||||
self.parse_details(root)
|
||||
|
||||
def parse_details(self, root):
|
||||
pass
|
||||
try:
|
||||
asin = self.parse_asin(root)
|
||||
except:
|
||||
self.log.exception('Error parsing asin for url: %r'%self.url)
|
||||
asin = None
|
||||
|
||||
try:
|
||||
title = self.parse_title(root)
|
||||
except:
|
||||
self.log.exception('Error parsing title for url: %r'%self.url)
|
||||
title = None
|
||||
|
||||
try:
|
||||
authors = self.parse_authors(root)
|
||||
except:
|
||||
self.log.exception('Error parsing authors for url: %r'%self.url)
|
||||
authors = []
|
||||
|
||||
|
||||
if not title or not authors or not asin:
|
||||
self.log.error('Could not find title/authors/asin for %r'%self.url)
|
||||
self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title,
|
||||
authors))
|
||||
return
|
||||
|
||||
mi = Metadata(title, authors)
|
||||
mi.set_identifier('amazon', asin)
|
||||
self.amazon_id = asin
|
||||
|
||||
try:
|
||||
mi.rating = self.parse_rating(root)
|
||||
except:
|
||||
self.log.exception('Error parsing ratings for url: %r'%self.url)
|
||||
|
||||
try:
|
||||
mi.comments = self.parse_comments(root)
|
||||
except:
|
||||
self.log.exception('Error parsing comments for url: %r'%self.url)
|
||||
|
||||
try:
|
||||
self.cover_url = self.parse_cover(root)
|
||||
except:
|
||||
self.log.exception('Error parsing cover for url: %r'%self.url)
|
||||
mi.has_cover = bool(self.cover_url)
|
||||
|
||||
pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]')
|
||||
if pd:
|
||||
pd = pd[0]
|
||||
|
||||
try:
|
||||
isbn = self.parse_isbn(pd)
|
||||
if isbn:
|
||||
self.isbn = mi.isbn = isbn
|
||||
except:
|
||||
self.log.exception('Error parsing ISBN for url: %r'%self.url)
|
||||
|
||||
try:
|
||||
mi.publisher = self.parse_publisher(pd)
|
||||
except:
|
||||
self.log.exception('Error parsing publisher for url: %r'%self.url)
|
||||
|
||||
try:
|
||||
mi.pubdate = self.parse_pubdate(pd)
|
||||
except:
|
||||
self.log.exception('Error parsing publish date for url: %r'%self.url)
|
||||
|
||||
try:
|
||||
lang = self.parse_language(pd)
|
||||
if lang:
|
||||
mi.language = lang
|
||||
except:
|
||||
self.log.exception('Error parsing language for url: %r'%self.url)
|
||||
|
||||
else:
|
||||
self.log.warning('Failed to find product description for url: %r'%self.url)
|
||||
|
||||
self.result_queue.put(mi)
|
||||
|
||||
def parse_asin(self, root):
|
||||
link = root.xpath('//link[@rel="canonical" and @href]')
|
||||
for l in link:
|
||||
return l.get('href').rpartition('/')[-1]
|
||||
|
||||
def parse_title(self, root):
|
||||
tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0]
|
||||
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
|
||||
if actual_title:
|
||||
title = tostring(actual_title[0], encoding=unicode,
|
||||
method='text').strip()
|
||||
else:
|
||||
title = tostring(tdiv, encoding=unicode, method='text').strip()
|
||||
return re.sub(r'[(\[].*[)\]]', '', title).strip()
|
||||
|
||||
def parse_authors(self, root):
|
||||
x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
|
||||
aname = root.xpath(x)
|
||||
for x in aname:
|
||||
x.tail = ''
|
||||
authors = [tostring(x, encoding=unicode, method='text').strip() for x
|
||||
in aname]
|
||||
return authors
|
||||
|
||||
def parse_rating(self, root):
|
||||
ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
|
||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||
if ratings:
|
||||
for elem in ratings[0].xpath('descendant::*[@title]'):
|
||||
t = elem.get('title').strip()
|
||||
m = pat.match(t)
|
||||
if m is not None:
|
||||
return float(m.group(1))/float(m.group(2)) * 5
|
||||
|
||||
def parse_comments(self, root):
|
||||
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
|
||||
if desc:
|
||||
desc = desc[0]
|
||||
for c in desc.xpath('descendant::*[@class="seeAll" or'
|
||||
' @class="emptyClear" or @href]'):
|
||||
c.getparent().remove(c)
|
||||
desc = tostring(desc, method='html', encoding=unicode).strip()
|
||||
# remove all attributes from tags
|
||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||
# Collapse whitespace
|
||||
#desc = re.sub('\n+', '\n', desc)
|
||||
#desc = re.sub(' +', ' ', desc)
|
||||
# Remove the notice about text referring to out of print editions
|
||||
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
||||
# Remove comments
|
||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||
return sanitize_comments_html(desc)
|
||||
|
||||
def parse_cover(self, root):
|
||||
imgs = root.xpath('//img[@id="prodImage" and @src]')
|
||||
if imgs:
|
||||
src = imgs[0].get('src')
|
||||
if '/no-image-avail' not in src:
|
||||
parts = src.split('/')
|
||||
if len(parts) > 3:
|
||||
bn = parts[-1]
|
||||
sparts = bn.split('_')
|
||||
if len(sparts) > 2:
|
||||
bn = sparts[0] + sparts[-1]
|
||||
return ('/'.join(parts[:-1]))+'/'+bn
|
||||
|
||||
def parse_isbn(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "ISBN")]')):
|
||||
if x.tail:
|
||||
ans = check_isbn(x.tail.strip())
|
||||
if ans:
|
||||
return ans
|
||||
|
||||
def parse_publisher(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "Publisher:")]')):
|
||||
if x.tail:
|
||||
ans = x.tail.partition(';')[0]
|
||||
return ans.partition('(')[0].strip()
|
||||
|
||||
def parse_pubdate(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "Publisher:")]')):
|
||||
if x.tail:
|
||||
ans = x.tail
|
||||
date = ans.partition('(')[-1].replace(')', '').strip()
|
||||
return parse_date(date, assume_utc=True)
|
||||
|
||||
def parse_language(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "Language:")]')):
|
||||
if x.tail:
|
||||
ans = x.tail.strip()
|
||||
if ans == 'English':
|
||||
return 'en'
|
||||
# }}}
|
||||
|
||||
class Amazon(Source):
|
||||
|
||||
@ -84,7 +267,8 @@ class Amazon(Source):
|
||||
description = _('Downloads metadata from Amazon')
|
||||
|
||||
capabilities = frozenset(['identify'])
|
||||
touched_fields = frozenset(['title', 'authors', 'isbn', 'pubdate', 'comments'])
|
||||
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
|
||||
'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
|
||||
|
||||
AMAZON_DOMAINS = {
|
||||
'com': _('US'),
|
||||
@ -92,7 +276,7 @@ class Amazon(Source):
|
||||
'de' : _('Germany'),
|
||||
}
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}):
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
domain = self.prefs.get('domain', 'com')
|
||||
|
||||
# See the amazon detailed search page to get all options
|
||||
@ -135,9 +319,14 @@ class Amazon(Source):
|
||||
url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
|
||||
return url
|
||||
|
||||
# }}}
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=20):
|
||||
'''
|
||||
Note this method will retry without identifiers automatically if no
|
||||
match is found with identifiers.
|
||||
'''
|
||||
query = self.create_query(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if query is None:
|
||||
@ -165,37 +354,45 @@ class Amazon(Source):
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
|
||||
if '<title>404 - ' in raw:
|
||||
log.error('No matches found for query: %r'%query)
|
||||
return
|
||||
|
||||
try:
|
||||
root = soupparser.fromstring(clean_ascii_chars(raw))
|
||||
except:
|
||||
msg = 'Failed to parse amazon page for query: %r'%query
|
||||
log.exception(msg)
|
||||
return msg
|
||||
|
||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||
if errmsg:
|
||||
msg = tostring(errmsg, method='text', encoding=unicode).strip()
|
||||
log.error(msg)
|
||||
# The error is almost always a not found error
|
||||
return
|
||||
|
||||
matches = []
|
||||
for div in root.xpath(r'//div[starts-with(@id, "result_")]'):
|
||||
for a in div.xpath(r'descendant::a[@class="title" and @href]'):
|
||||
title = tostring(a, method='text', encoding=unicode).lower()
|
||||
if 'bulk pack' not in title:
|
||||
matches.append(a.get('href'))
|
||||
break
|
||||
found = '<title>404 - ' not in raw
|
||||
|
||||
if found:
|
||||
try:
|
||||
root = soupparser.fromstring(clean_ascii_chars(raw))
|
||||
except:
|
||||
msg = 'Failed to parse amazon page for query: %r'%query
|
||||
log.exception(msg)
|
||||
return msg
|
||||
|
||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||
if errmsg:
|
||||
msg = tostring(errmsg, method='text', encoding=unicode).strip()
|
||||
log.error(msg)
|
||||
# The error is almost always a not found error
|
||||
found = False
|
||||
|
||||
if found:
|
||||
for div in root.xpath(r'//div[starts-with(@id, "result_")]'):
|
||||
for a in div.xpath(r'descendant::a[@class="title" and @href]'):
|
||||
title = tostring(a, method='text', encoding=unicode).lower()
|
||||
if 'bulk pack' not in title:
|
||||
matches.append(a.get('href'))
|
||||
break
|
||||
|
||||
# Keep only the top 5 matches as the matches are sorted by relevance by
|
||||
# Amazon so lower matches are not likely to be very relevant
|
||||
matches = matches[:5]
|
||||
|
||||
if abort.is_set():
|
||||
return
|
||||
|
||||
if not matches:
|
||||
if identifiers and title and authors:
|
||||
log('No matches found with identifiers, retrying using only'
|
||||
' title and authors')
|
||||
return self.identify(log, result_queue, abort, title=title,
|
||||
authors=authors, timeout=timeout)
|
||||
log.error('No matches found with query: %r'%query)
|
||||
return
|
||||
|
||||
@ -217,21 +414,63 @@ class Amazon(Source):
|
||||
if not a_worker_is_alive:
|
||||
break
|
||||
|
||||
for w in workers:
|
||||
if w.amazon_id:
|
||||
if w.isbn:
|
||||
self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
|
||||
if w.cover_url:
|
||||
self.cache_identifier_to_cover_url(w.amazon_id,
|
||||
w.cover_url)
|
||||
|
||||
return None
|
||||
# }}}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# To run these test use: calibre-debug -e
|
||||
# src/calibre/ebooks/metadata/sources/amazon.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
title_test)
|
||||
title_test, authors_test)
|
||||
test_identify_plugin(Amazon.name,
|
||||
[
|
||||
|
||||
(
|
||||
{'identifiers':{'isbn': '0743273567'}},
|
||||
[title_test('The great gatsby', exact=True)]
|
||||
( # An e-book ISBN not on Amazon, one of the authors is
|
||||
# unknown to Amazon, so no popup wrapper
|
||||
{'identifiers':{'isbn': '0307459671'},
|
||||
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
|
||||
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
|
||||
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
|
||||
|
||||
),
|
||||
|
||||
( # This isbn not on amazon
|
||||
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
|
||||
'authors':['Lutz']},
|
||||
[title_test('Learning Python: Powerful Object-Oriented Programming',
|
||||
exact=True), authors_test(['Mark Lutz'])
|
||||
]
|
||||
|
||||
),
|
||||
|
||||
( # Sophisticated comment formatting
|
||||
{'identifiers':{'isbn': '9781416580829'}},
|
||||
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
||||
exact=True), authors_test(['Dan Brown'])]
|
||||
),
|
||||
|
||||
( # No specific problems
|
||||
{'identifiers':{'isbn': '0743273567'}},
|
||||
[title_test('The great gatsby', exact=True),
|
||||
authors_test(['F. Scott Fitzgerald'])]
|
||||
),
|
||||
|
||||
( # A newer book
|
||||
{'identifiers':{'isbn': '9780316044981'}},
|
||||
[title_test('The Heroes', exact=True),
|
||||
authors_test(['Joe Abercrombie'])]
|
||||
|
||||
),
|
||||
|
||||
])
|
||||
|
||||
|
||||
|
@ -35,6 +35,7 @@ class Source(Plugin):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Plugin.__init__(self, *args, **kwargs)
|
||||
self._isbn_to_identifier_cache = {}
|
||||
self._identifier_to_cover_url_cache = {}
|
||||
self.cache_lock = threading.RLock()
|
||||
self._config_obj = None
|
||||
self._browser = None
|
||||
@ -68,6 +69,14 @@ class Source(Plugin):
|
||||
with self.cache_lock:
|
||||
return self._isbn_to_identifier_cache.get(isbn, None)
|
||||
|
||||
def cache_identifier_to_cover_url(self, id_, url):
|
||||
with self.cache_lock:
|
||||
self._identifier_to_cover_url_cache[id_] = url
|
||||
|
||||
def cached_identifier_to_cover_url(self, id_):
|
||||
with self.cache_lock:
|
||||
return self._identifier_to_cover_url_cache.get(id_, None)
|
||||
|
||||
def get_author_tokens(self, authors, only_first_author=True):
|
||||
'''
|
||||
Take a list of authors and return a list of tokens useful for an
|
||||
|
@ -42,7 +42,7 @@ subject = XPath('descendant::dc:subject')
|
||||
description = XPath('descendant::dc:description')
|
||||
language = XPath('descendant::dc:language')
|
||||
|
||||
def get_details(browser, url, timeout):
|
||||
def get_details(browser, url, timeout): # {{{
|
||||
try:
|
||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||
except Exception as e:
|
||||
@ -54,8 +54,9 @@ def get_details(browser, url, timeout):
|
||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||
|
||||
return raw
|
||||
# }}}
|
||||
|
||||
def to_metadata(browser, log, entry_, timeout):
|
||||
def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
|
||||
def get_text(extra, x):
|
||||
try:
|
||||
@ -94,12 +95,6 @@ def to_metadata(browser, log, entry_, timeout):
|
||||
#mi.language = get_text(extra, language)
|
||||
mi.publisher = get_text(extra, publisher)
|
||||
|
||||
# Author sort
|
||||
for x in creator(extra):
|
||||
for key, val in x.attrib.items():
|
||||
if key.endswith('file-as') and val and val.strip():
|
||||
mi.author_sort = val
|
||||
break
|
||||
# ISBN
|
||||
isbns = []
|
||||
for x in identifier(extra):
|
||||
@ -137,7 +132,7 @@ def to_metadata(browser, log, entry_, timeout):
|
||||
|
||||
|
||||
return mi
|
||||
|
||||
# }}}
|
||||
|
||||
class GoogleBooks(Source):
|
||||
|
||||
@ -145,10 +140,11 @@ class GoogleBooks(Source):
|
||||
description = _('Downloads metadata from Google Books')
|
||||
|
||||
capabilities = frozenset(['identify'])
|
||||
touched_fields = frozenset(['title', 'authors', 'isbn', 'tags', 'pubdate',
|
||||
'comments', 'publisher', 'author_sort']) # language currently disabled
|
||||
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
||||
'comments', 'publisher', 'identifier:isbn',
|
||||
'identifier:google']) # language currently disabled
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}):
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
||||
isbn = check_isbn(identifiers.get('isbn', None))
|
||||
q = ''
|
||||
@ -176,6 +172,7 @@ class GoogleBooks(Source):
|
||||
'start-index':1,
|
||||
'min-viewability':'none',
|
||||
})
|
||||
# }}}
|
||||
|
||||
def cover_url_from_identifiers(self, identifiers):
|
||||
goog = identifiers.get('google', None)
|
||||
@ -208,11 +205,11 @@ class GoogleBooks(Source):
|
||||
if abort.is_set():
|
||||
break
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=20):
|
||||
query = self.create_query(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
br = self.browser()
|
||||
br = self.browser
|
||||
try:
|
||||
raw = br.open_novisit(query, timeout=timeout).read()
|
||||
except Exception, e:
|
||||
@ -233,6 +230,7 @@ class GoogleBooks(Source):
|
||||
self.get_all_details(br, log, entries, abort, result_queue, timeout)
|
||||
|
||||
return None
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
||||
|
@ -37,6 +37,15 @@ def title_test(title, exact=False):
|
||||
|
||||
return test
|
||||
|
||||
def authors_test(authors):
|
||||
authors = set([x.lower() for x in authors])
|
||||
|
||||
def test(mi):
|
||||
au = set([x.lower() for x in mi.authors])
|
||||
return au == authors
|
||||
|
||||
return test
|
||||
|
||||
def test_identify_plugin(name, tests):
|
||||
'''
|
||||
:param name: Plugin name
|
||||
@ -102,6 +111,16 @@ def test_identify_plugin(name, tests):
|
||||
prints('Log saved to', lf)
|
||||
raise SystemExit(1)
|
||||
|
||||
for key in plugin.touched_fields:
|
||||
if key.startswith('identifier:'):
|
||||
key = key.partition(':')[-1]
|
||||
if not match_found.has_identifier(key):
|
||||
prints('Failed to find identifier:', key)
|
||||
raise SystemExit(1)
|
||||
elif match_found.is_null(key):
|
||||
prints('Failed to find', key)
|
||||
raise SystemExit(1)
|
||||
|
||||
prints('Average time per query', sum(times)/len(times))
|
||||
|
||||
if os.stat(lf).st_size > 10:
|
||||
|
@ -807,7 +807,7 @@ class Textile(object):
|
||||
|
||||
for qtag in qtags:
|
||||
pattern = re.compile(r"""
|
||||
(?:^|(?<=[\s>%(pnct)s])|([\]}]))
|
||||
(?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
|
||||
(%(qtag)s)(?!%(qtag)s)
|
||||
(%(c)s)
|
||||
(?::(\S+))?
|
||||
|
@ -6,7 +6,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>767</width>
|
||||
<width>792</width>
|
||||
<height>575</height>
|
||||
</rect>
|
||||
</property>
|
||||
@ -44,7 +44,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>469</width>
|
||||
<width>486</width>
|
||||
<height>504</height>
|
||||
</rect>
|
||||
</property>
|
||||
|
@ -20,7 +20,8 @@ Usage: %prog [options]
|
||||
Launch the Graphical User Interface
|
||||
'''):
|
||||
parser = OptionParser(usage)
|
||||
parser.add_option('--redirect-console-output', default=False, action='store_true', dest='redirect',
|
||||
# The b is required because of a regression in optparse.py in python 2.7.0
|
||||
parser.add_option(b'--redirect-console-output', default=False, action='store_true', dest='redirect',
|
||||
help=_('Redirect console output to a dialog window (both stdout and stderr). Useful on windows where GUI apps do not have a output streams.'))
|
||||
return parser
|
||||
|
||||
|
@ -225,6 +225,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
self.action_quit.setShortcuts(qs)
|
||||
self.connect(self.action_quit, SIGNAL('triggered(bool)'),
|
||||
lambda x:QApplication.instance().quit())
|
||||
self.action_focus_search = QAction(self)
|
||||
self.addAction(self.action_focus_search)
|
||||
self.action_focus_search.setShortcuts([Qt.Key_Slash,
|
||||
QKeySequence(QKeySequence.Find)])
|
||||
self.action_focus_search.triggered.connect(lambda x:
|
||||
self.search.setFocus(Qt.OtherFocusReason))
|
||||
self.action_copy.setDisabled(True)
|
||||
self.action_metadata.setCheckable(True)
|
||||
self.action_metadata.setShortcut(Qt.CTRL+Qt.Key_I)
|
||||
@ -293,6 +299,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
ca.setShortcut(QKeySequence.Copy)
|
||||
self.addAction(ca)
|
||||
self.open_history_menu = QMenu()
|
||||
self.clear_recent_history_action = QAction(
|
||||
_('Clear list of recently opened books'), self)
|
||||
self.clear_recent_history_action.triggered.connect(self.clear_recent_history)
|
||||
self.build_recent_menu()
|
||||
self.action_open_ebook.setMenu(self.open_history_menu)
|
||||
self.open_history_menu.triggered[QAction].connect(self.open_recent)
|
||||
@ -301,11 +310,19 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
|
||||
self.restore_state()
|
||||
|
||||
def clear_recent_history(self, *args):
|
||||
vprefs.set('viewer_open_history', [])
|
||||
self.build_recent_menu()
|
||||
|
||||
def build_recent_menu(self):
|
||||
m = self.open_history_menu
|
||||
m.clear()
|
||||
recent = vprefs.get('viewer_open_history', [])
|
||||
if recent:
|
||||
m.addAction(self.clear_recent_history_action)
|
||||
m.addSeparator()
|
||||
count = 0
|
||||
for path in vprefs.get('viewer_open_history', []):
|
||||
for path in recent:
|
||||
if count > 9:
|
||||
break
|
||||
if os.path.exists(path):
|
||||
@ -494,12 +511,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
if self.view.search(text, backwards=backwards):
|
||||
self.scrolled(self.view.scroll_fraction)
|
||||
|
||||
def keyPressEvent(self, event):
|
||||
if event.key() == Qt.Key_Slash:
|
||||
self.search.setFocus(Qt.OtherFocusReason)
|
||||
else:
|
||||
return MainWindow.keyPressEvent(self, event)
|
||||
|
||||
def internal_link_clicked(self, frac):
|
||||
self.history.add(self.pos.value())
|
||||
|
||||
|
@ -12,7 +12,7 @@ import cherrypy
|
||||
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre import isbytestring, force_unicode, fit_image, \
|
||||
prepare_string_for_xml as xml
|
||||
prepare_string_for_xml
|
||||
from calibre.utils.ordered_dict import OrderedDict
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.config import prefs, tweaks
|
||||
@ -23,6 +23,10 @@ from calibre.library.server import custom_fields_to_display
|
||||
from calibre.library.field_metadata import category_icon_map
|
||||
from calibre.library.server.utils import quote, unquote
|
||||
|
||||
def xml(*args, **kwargs):
|
||||
ans = prepare_string_for_xml(*args, **kwargs)
|
||||
return ans.replace(''', ''')
|
||||
|
||||
def render_book_list(ids, prefix, suffix=''): # {{{
|
||||
pages = []
|
||||
num = len(ids)
|
||||
|
@ -508,9 +508,9 @@ You have two choices:
|
||||
1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development <http://calibre-ebook.com/get-involved>`_.
|
||||
2. `Open a ticket <http://bugs.calibre-ebook.com/newticket>`_ (you have to register and login first). Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it.
|
||||
|
||||
Can I include |app| on a CD to be distributed with my product/magazine?
|
||||
How is |app| licensed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode <http://code.google.com/p/calibre-ebook/downloads/list>`_.
|
||||
|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode <http://code.google.com/p/calibre-ebook/downloads/list>`_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 http://www.gnu.org/licenses/gpl.html`_.
|
||||
|
||||
How do I run calibre from my USB stick?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
Loading…
x
Reference in New Issue
Block a user