Merge from main branch

This commit is contained in:
Tom Scholl 2011-04-05 23:00:13 +00:00
commit b5550fc481
15 changed files with 239 additions and 107 deletions

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
perfil.com perfil.com
''' '''
@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe):
dict(name=['iframe','embed','object','base','meta','link']) dict(name=['iframe','embed','object','base','meta','link'])
,dict(name='a', attrs={'href':'#comentarios'}) ,dict(name='a', attrs={'href':'#comentarios'})
,dict(name='div', attrs={'class':'foto3'}) ,dict(name='div', attrs={'class':'foto3'})
,dict(name='img', attrs={'alt':'ampliar'}) ,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
] ]
keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})] keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})]
remove_attributes=['onload','lang','width','height','border'] remove_attributes=['onload','lang','width','height','border']
feeds = [ feeds = [

View File

@ -281,16 +281,17 @@ def get_parsed_proxy(typ='http', debug=True):
def random_user_agent(): def random_user_agent():
choices = [ choices = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)', 'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
] ]
#return choices[-1]
return choices[random.randint(0, len(choices)-1)] return choices[random.randint(0, len(choices)-1)]

View File

@ -62,24 +62,22 @@ class OEB2HTML(object):
self.links[aid] = 'calibre_link-%s' % len(self.links.keys()) self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
return self.links[aid] return self.links[aid]
def rewrite_links(self, tag, attribs, page): def rewrite_link(self, tag, attribs, page):
# Rewrite ids. # Rewrite ids.
if 'id' in attribs: if 'id' in attribs:
attribs['id'] = self.get_link_id(page.href, attribs['id']) attribs['id'] = self.get_link_id(page.href, attribs['id'])
# Rewrite links. # Rewrite links.
if tag == 'a': if tag == 'a' and 'href' in attribs:
href = attribs['href'] href = page.abshref(attribs['href'])
href = page.abshref(href)
if self.url_is_relative(href): if self.url_is_relative(href):
if '#' not in href: id = ''
href += '#' if '#' in href:
if href not in self.links: href, n, id = href.partition('#')
self.links[href] = 'calibre_link-%s' % len(self.links.keys()) href = '#%s' % self.get_link_id(href, id)
href = '#%s' % self.links[href]
attribs['href'] = href attribs['href'] = href
return attribs return attribs
def rewrite_images(self, tag, attribs, page): def rewrite_image(self, tag, attribs, page):
if tag == 'img': if tag == 'img':
src = attribs.get('src', None) src = attribs.get('src', None)
if src: if src:
@ -131,6 +129,10 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
tags = [] tags = []
tag = barename(elem.tag) tag = barename(elem.tag)
attribs = elem.attrib attribs = elem.attrib
attribs = self.rewrite_link(tag, attribs, page)
attribs = self.rewrite_image(tag, attribs, page)
if tag == 'body': if tag == 'body':
tag = 'div' tag = 'div'
attribs['id'] = self.get_link_id(page.href, '') attribs['id'] = self.get_link_id(page.href, '')
@ -147,9 +149,6 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
if 'style' in attribs: if 'style' in attribs:
del attribs['style'] del attribs['style']
attribs = self.rewrite_links(tag, attribs, page)
attribs = self.rewrite_images(tag, attribs, page)
# Turn the rest of the attributes into a string we can write with the tag. # Turn the rest of the attributes into a string we can write with the tag.
at = '' at = ''
for k, v in attribs.items(): for k, v in attribs.items():
@ -219,6 +218,9 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
tag = barename(elem.tag) tag = barename(elem.tag)
attribs = elem.attrib attribs = elem.attrib
attribs = self.rewrite_link(tag, attribs, page)
attribs = self.rewrite_image(tag, attribs, page)
style_a = '%s' % style style_a = '%s' % style
if tag == 'body': if tag == 'body':
tag = 'div' tag = 'div'
@ -233,9 +235,6 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
if 'style' in attribs: if 'style' in attribs:
del attribs['style'] del attribs['style']
attribs = self.rewrite_links(tag, attribs, page)
attribs = self.rewrite_images(tag, attribs, page)
# Turn the rest of the attributes into a string we can write with the tag. # Turn the rest of the attributes into a string we can write with the tag.
at = '' at = ''
for k, v in attribs.items(): for k, v in attribs.items():
@ -312,6 +311,9 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
tag = barename(elem.tag) tag = barename(elem.tag)
attribs = elem.attrib attribs = elem.attrib
attribs = self.rewrite_link(tag, attribs, page)
attribs = self.rewrite_image(tag, attribs, page)
if tag == 'body': if tag == 'body':
tag = 'div' tag = 'div'
attribs['id'] = self.get_link_id(page.href, '') attribs['id'] = self.get_link_id(page.href, '')
@ -321,9 +323,6 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
if 'style' in attribs: if 'style' in attribs:
del attribs['style'] del attribs['style']
attribs = self.rewrite_links(tag, attribs, page)
attribs = self.rewrite_images(tag, attribs, page)
# Turn the rest of the attributes into a string we can write with the tag. # Turn the rest of the attributes into a string we can write with the tag.
at = '' at = ''
for k, v in attribs.items(): for k, v in attribs.items():

View File

@ -218,6 +218,9 @@ class Worker(Thread): # Get details {{{
' @class="emptyClear" or @href]'): ' @class="emptyClear" or @href]'):
c.getparent().remove(c) c.getparent().remove(c)
desc = tostring(desc, method='html', encoding=unicode).strip() desc = tostring(desc, method='html', encoding=unicode).strip()
# Encoding bug in Amazon data U+fffd (replacement char)
# in some examples it is present in place of '
desc = desc.replace('\ufffd', "'")
# remove all attributes from tags # remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
# Collapse whitespace # Collapse whitespace
@ -410,6 +413,18 @@ class Amazon(Source):
if 'bulk pack' not in title: if 'bulk pack' not in title:
matches.append(a.get('href')) matches.append(a.get('href'))
break break
if not matches:
# This can happen for some user agents that Amazon thinks are
# mobile/less capable
log('Trying alternate results page markup')
for td in root.xpath(
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
title = tostring(a, method='text', encoding=unicode).lower()
if 'bulk pack' not in title:
matches.append(a.get('href'))
break
# Keep only the top 5 matches as the matches are sorted by relevance by # Keep only the top 5 matches as the matches are sorted by relevance by
# Amazon so lower matches are not likely to be very relevant # Amazon so lower matches are not likely to be very relevant

View File

@ -17,10 +17,10 @@ from calibre.utils.config import JSONConfig
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
msprefs = JSONConfig('metadata_sources.json') msprefs = JSONConfig('metadata_sources/global.json')
msprefs.defaults['txt_comments'] = False msprefs.defaults['txt_comments'] = False
msprefs.defaults['ignore_fields'] = [] msprefs.defaults['ignore_fields'] = []
msprefs.defaults['max_tags'] = 10 msprefs.defaults['max_tags'] = 20
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
def create_log(ostream=None): def create_log(ostream=None):
@ -95,7 +95,7 @@ class InternalMetadataCompareKeyGen(object):
def get_cached_cover_urls(mi): def get_cached_cover_urls(mi):
from calibre.customize.ui import metadata_plugins from calibre.customize.ui import metadata_plugins
plugins = list(metadata_plugins['identify']) plugins = list(metadata_plugins(['identify']))
for p in plugins: for p in plugins:
url = p.get_cached_cover_url(mi.identifiers) url = p.get_cached_cover_url(mi.identifiers)
if url: if url:

View File

@ -34,10 +34,12 @@ class Worker(Thread):
self.log = create_log(self.buf) self.log = create_log(self.buf)
def run(self): def run(self):
start = time.time()
try: try:
self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs) self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
except: except:
self.log.exception('Plugin', self.plugin.name, 'failed') self.log.exception('Plugin', self.plugin.name, 'failed')
self.plugin.dl_time_spent = time.time() - start
def is_worker_alive(workers): def is_worker_alive(workers):
for w in workers: for w in workers:
@ -57,13 +59,13 @@ class ISBNMerge(object):
def isbn_in_pool(self, isbn): def isbn_in_pool(self, isbn):
if isbn: if isbn:
for p in self.pools: for isbns, pool in self.pools.iteritems():
if isbn in p: if isbn in isbns:
return p return pool
return None return None
def pool_has_result_from_same_source(self, pool, result): def pool_has_result_from_same_source(self, pool, result):
results = self.pools[pool][1] results = pool[1]
for r in results: for r in results:
if r.identify_plugin is result.identify_plugin: if r.identify_plugin is result.identify_plugin:
return True return True
@ -77,7 +79,7 @@ class ISBNMerge(object):
isbns, min_year = xisbn.get_isbn_pool(isbn) isbns, min_year = xisbn.get_isbn_pool(isbn)
if not isbns: if not isbns:
isbns = frozenset([isbn]) isbns = frozenset([isbn])
self.pool[isbns] = pool = (min_year, []) self.pools[isbns] = pool = (min_year, [])
if not self.pool_has_result_from_same_source(pool, result): if not self.pool_has_result_from_same_source(pool, result):
pool[1].append(result) pool[1].append(result)
@ -102,7 +104,7 @@ class ISBNMerge(object):
def merge_isbn_results(self): def merge_isbn_results(self):
self.results = [] self.results = []
for min_year, results in self.pool.itervalues(): for min_year, results in self.pools.itervalues():
if results: if results:
self.results.append(self.merge(results, min_year)) self.results.append(self.merge(results, min_year))
@ -169,11 +171,11 @@ class ISBNMerge(object):
min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
ans.pubdate = min_date ans.pubdate = min_date
else: else:
min_date = datetime(10000, 1, 1, tzinfo=utc_tz) min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
for r in results: for r in results:
if r.pubdate is not None and r.pubdate < min_date: if r.pubdate is not None and r.pubdate < min_date:
min_date = r.pubdate min_date = r.pubdate
if min_date.year < 10000: if min_date.year < 3000:
ans.pubdate = min_date ans.pubdate = min_date
# Identifiers # Identifiers
@ -183,7 +185,7 @@ class ISBNMerge(object):
# Merge any other fields with no special handling (random merge) # Merge any other fields with no special handling (random merge)
touched_fields = set() touched_fields = set()
for r in results: for r in results:
touched_fields |= r.plugin.touched_fields touched_fields |= r.identify_plugin.touched_fields
for f in touched_fields: for f in touched_fields:
if f.startswith('identifier:') or not ans.is_null(f): if f.startswith('identifier:') or not ans.is_null(f):
@ -208,9 +210,9 @@ def merge_identify_results(result_map, log):
# }}} # }}}
def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
start_time = time.time() start_time = time.time()
plugins = list(metadata_plugins['identify']) plugins = list(metadata_plugins(['identify']))
kwargs = { kwargs = {
'title': title, 'title': title,
@ -222,14 +224,17 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
log('Running identify query with parameters:') log('Running identify query with parameters:')
log(kwargs) log(kwargs)
log('Using plugins:', ', '.join([p.name for p in plugins])) log('Using plugins:', ', '.join([p.name for p in plugins]))
log('The log (if any) from individual plugins is below') log('The log from individual plugins is below')
workers = [Worker(p, kwargs, abort) for p in plugins] workers = [Worker(p, kwargs, abort) for p in plugins]
for w in workers: for w in workers:
w.start() w.start()
first_result_at = None first_result_at = None
results = dict.fromkeys(plugins, []) results = {}
for p in plugins:
results[p] = []
logs = dict([(w.plugin, w.buf) for w in workers])
def get_results(): def get_results():
found = False found = False
@ -253,34 +258,50 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
if not is_worker_alive(workers): if not is_worker_alive(workers):
break break
if (first_result_at is not None and time.time() - first_result_at < if (first_result_at is not None and time.time() - first_result_at >
wait_time): wait_time):
log('Not waiting any longer for more results') log('Not waiting any longer for more results')
abort.set() abort.set()
break break
get_results() while not abort.is_set() and get_results():
pass
sort_kwargs = dict(kwargs) sort_kwargs = dict(kwargs)
for k in list(sort_kwargs.iterkeys()): for k in list(sort_kwargs.iterkeys()):
if k not in ('title', 'authors', 'identifiers'): if k not in ('title', 'authors', 'identifiers'):
sort_kwargs.pop(k) sort_kwargs.pop(k)
for plugin, results in results.iteritems(): longest, lp = -1, ''
results.sort(key=plugin.identify_results_keygen(**sort_kwargs)) for plugin, presults in results.iteritems():
plog = plugin.buf.getvalue().strip() presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
plog = logs[plugin].getvalue().strip()
log('\n'+'*'*30, plugin.name, '*'*30)
log('Request extra headers:', plugin.browser.addheaders)
log('Found %d results'%len(presults))
time_spent = getattr(plugin, 'dl_time_spent', None)
if time_spent is None:
log('Downloading was aborted')
longest, lp = -1, plugin.name
else:
log('Downloading from', plugin.name, 'took', time_spent)
if time_spent > longest:
longest, lp = time_spent, plugin.name
for r in presults:
log('\n\n---')
log(unicode(r))
if plog: if plog:
log('\n'+'*'*35, plugin.name, '*'*35)
log('Found %d results'%len(results))
log(plog) log(plog)
log('\n'+'*'*80) log('\n'+'*'*80)
for i, result in enumerate(results): for i, result in enumerate(presults):
result.relevance_in_source = i result.relevance_in_source = i
result.has_cached_cover_url = \ result.has_cached_cover_url = \
plugin.get_cached_cover_url(result.identifiers) is not None plugin.get_cached_cover_url(result.identifiers) is not None
result.identify_plugin = plugin result.identify_plugin = plugin
log('The identify phase took %.2f seconds'%(time.time() - start_time)) log('The identify phase took %.2f seconds'%(time.time() - start_time))
log('The longest time (%f) was taken by:'%longest, lp)
log('Merging results from different sources and finding earliest', log('Merging results from different sources and finding earliest',
'publication dates') 'publication dates')
start_time = time.time() start_time = time.time()
@ -295,8 +316,8 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
dummy = Metadata(_('Unknown')) dummy = Metadata(_('Unknown'))
max_tags = msprefs['max_tags'] max_tags = msprefs['max_tags']
for f in msprefs['ignore_fields']:
for r in results: for r in results:
for f in msprefs['ignore_fields']:
setattr(r, f, getattr(dummy, f)) setattr(r, f, getattr(dummy, f))
r.tags = r.tags[:max_tags] r.tags = r.tags[:max_tags]
@ -307,8 +328,7 @@ if __name__ == '__main__': # tests {{{
# src/calibre/ebooks/metadata/sources/identify.py # src/calibre/ebooks/metadata/sources/identify.py
from calibre.ebooks.metadata.sources.test import (test_identify, from calibre.ebooks.metadata.sources.test import (test_identify,
title_test, authors_test) title_test, authors_test)
test_identify( tests = [
[
( # An e-book ISBN not on Amazon, one of the authors is ( # An e-book ISBN not on Amazon, one of the authors is
# unknown to Amazon # unknown to Amazon
@ -319,10 +339,10 @@ if __name__ == '__main__': # tests {{{
), ),
( # This isbn not on amazon ( # Test absence of identifiers
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', {'title':'Learning Python',
'authors':['Lutz']}, 'authors':['Lutz']},
[title_test('Learning Python, 3rd Edition', [title_test('Learning Python',
exact=True), authors_test(['Mark Lutz']) exact=True), authors_test(['Mark Lutz'])
] ]
@ -330,14 +350,14 @@ if __name__ == '__main__': # tests {{{
( # Sophisticated comment formatting ( # Sophisticated comment formatting
{'identifiers':{'isbn': '9781416580829'}}, {'identifiers':{'isbn': '9781416580829'}},
[title_test('Angels & Demons - Movie Tie-In: A Novel', [title_test('Angels & Demons',
exact=True), authors_test(['Dan Brown'])] exact=True), authors_test(['Dan Brown'])]
), ),
( # No specific problems ( # No specific problems
{'identifiers':{'isbn': '0743273567'}}, {'identifiers':{'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True), [title_test('The great gatsby', exact=True),
authors_test(['F. Scott Fitzgerald'])] authors_test(['Francis Scott Fitzgerald'])]
), ),
( # A newer book ( # A newer book
@ -347,6 +367,8 @@ if __name__ == '__main__': # tests {{{
), ),
]) ]
#test_identify(tests[1:2])
test_identify(tests)
# }}} # }}}

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.metadata.sources.base import Source
class ISBNDB(Source):
name = 'ISBNDB'
description = _('Downloads metadata from isbndb.com')
capabilities = frozenset(['identify'])
touched_fields = frozenset(['title', 'authors',
'identifier:isbn', 'comments', 'publisher'])
supports_gzip_transfer_encoding = True
def __init__(self, *args, **kwargs):
Source.__init__(self, *args, **kwargs)
prefs = self.prefs
prefs.defaults['key_migrated'] = False
prefs.defaults['isbndb_key'] = None
if not prefs['key_migrated']:
prefs['key_migrated'] = True
try:
from calibre.customize.ui import config
key = config['plugin_customization']['IsbnDB']
prefs['isbndb_key'] = key
except:
pass
self.isbndb_key = prefs['isbndb_key']

View File

@ -64,10 +64,14 @@ def test_identify(tests): # {{{
from calibre.ebooks.metadata.sources.identify import identify from calibre.ebooks.metadata.sources.identify import identify
tdir, lf, log, abort = init_test('Full Identify') tdir, lf, log, abort = init_test('Full Identify')
prints('Log saved to', lf)
times = [] times = []
for kwargs, test_funcs in tests: for kwargs, test_funcs in tests:
log('#'*80)
log('### Running test with:', kwargs)
log('#'*80)
prints('Running test with:', kwargs) prints('Running test with:', kwargs)
args = (log, abort) args = (log, abort)
start_time = time.time() start_time = time.time()
@ -107,10 +111,11 @@ def test_identify(tests): # {{{
prints('Most relevant result failed the tests') prints('Most relevant result failed the tests')
raise SystemExit(1) raise SystemExit(1)
log('\n\n')
prints('Average time per query', sum(times)/len(times)) prints('Average time per query', sum(times)/len(times))
if os.stat(lf).st_size > 10: prints('Full log is at:', lf)
prints('There were some errors/warnings, see log', lf)
# }}} # }}}
@ -129,6 +134,7 @@ def test_identify_plugin(name, tests): # {{{
plugin = x plugin = x
break break
prints('Testing the identify function of', plugin.name) prints('Testing the identify function of', plugin.name)
prints('Using extra headers:', plugin.browser.addheaders)
tdir, lf, log, abort = init_test(plugin.name) tdir, lf, log, abort = init_test(plugin.name)
prints('Log saved to', lf) prints('Log saved to', lf)

View File

@ -73,7 +73,11 @@ class xISBN(object):
def get_isbn_pool(self, isbn): def get_isbn_pool(self, isbn):
data = self.get_data(isbn) data = self.get_data(isbn)
isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x]) raw = tuple(x.get('isbn') for x in data if 'isbn' in x)
isbns = []
for x in raw:
isbns += x
isbns = frozenset(isbns)
min_year = 100000 min_year = 100000
for x in data: for x in data:
try: try:

View File

@ -282,8 +282,8 @@ class Serializer(object):
buffer.write('="') buffer.write('="')
self.serialize_text(val, quot=True) self.serialize_text(val, quot=True)
buffer.write('"') buffer.write('"')
if elem.text or len(elem) > 0:
buffer.write('>') buffer.write('>')
if elem.text or len(elem) > 0:
if elem.text: if elem.text:
self.anchor_offset = None self.anchor_offset = None
self.serialize_text(elem.text) self.serialize_text(elem.text)
@ -293,8 +293,6 @@ class Serializer(object):
self.anchor_offset = None self.anchor_offset = None
self.serialize_text(child.tail) self.serialize_text(child.tail)
buffer.write('</%s>' % tag) buffer.write('</%s>' % tag)
else:
buffer.write('/>')
def serialize_text(self, text, quot=False): def serialize_text(self, text, quot=False):
text = text.replace('&', '&amp;') text = text.replace('&', '&amp;')

View File

@ -37,7 +37,7 @@ class MarkdownMLizer(object):
if not self.opts.keep_links: if not self.opts.keep_links:
html = re.sub(r'<\s*/*\s*a[^>]*>', '', html) html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
if not self.opts.keep_image_references: if not self.opts.keep_image_references:
html = re.sub(r'<\s*img[^>]*>', '', html)\ html = re.sub(r'<\s*img[^>]*>', '', html)
text = html2text(html) text = html2text(html)

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -7,10 +9,10 @@ __docformat__ = 'restructuredtext en'
import textwrap, re, os import textwrap, re, os
from PyQt4.Qt import Qt, QDateEdit, QDate, \ from PyQt4.Qt import (Qt, QDateEdit, QDate,
QIcon, QToolButton, QWidget, QLabel, QGridLayout, \ QIcon, QToolButton, QWidget, QLabel, QGridLayout,
QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \ QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
QPushButton, QSpinBox, QLineEdit QPushButton, QSpinBox, QLineEdit, QSizePolicy)
from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView
from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
@ -22,7 +24,7 @@ from calibre.ebooks.metadata.meta import get_metadata
from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \ from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \
choose_files, error_dialog, choose_images, question_dialog choose_files, error_dialog, choose_images, question_dialog
from calibre.utils.date import local_tz, qt_to_dt from calibre.utils.date import local_tz, qt_to_dt
from calibre import strftime from calibre import strftime, fit_image
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
from calibre.utils.date import utcfromtimestamp from calibre.utils.date import utcfromtimestamp
@ -480,6 +482,7 @@ class FormatsManager(QWidget): # {{{
def initialize(self, db, id_): def initialize(self, db, id_):
self.changed = False self.changed = False
self.formats.clear()
exts = db.formats(id_, index_is_id=True) exts = db.formats(id_, index_is_id=True)
self.original_val = set([]) self.original_val = set([])
if exts: if exts:
@ -638,6 +641,23 @@ class Cover(ImageView): # {{{
self.trim_cover_button, self.download_cover_button, self.trim_cover_button, self.download_cover_button,
self.generate_cover_button] self.generate_cover_button]
self.frame_size = (300, 400)
self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred,
QSizePolicy.Preferred))
def frame_resized(self, ev):
sz = ev.size()
self.frame_size = (sz.width()//3, sz.height())
def sizeHint(self):
sz = ImageView.sizeHint(self)
w, h = sz.width(), sz.height()
resized, nw, nh = fit_image(w, h, self.frame_size[0],
self.frame_size[1])
if resized:
sz = QSize(nw, nh)
return sz
def select_cover(self, *args): def select_cover(self, *args):
files = choose_images(self, 'change cover dialog', files = choose_images(self, 'change cover dialog',
_('Choose cover for ') + _('Choose cover for ') +
@ -882,8 +902,11 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
# }}} # }}}
class ISBNEdit(QLineEdit): # {{{ class IdentifiersEdit(QLineEdit): # {{{
LABEL = _('IS&BN:') LABEL = _('I&ds:')
BASE_TT = _('Edit the identifiers for this book. '
'For example: \n\n%s')%(
'isbn:1565927249, doi:10.1000/182, amazon:1565927249')
def __init__(self, parent): def __init__(self, parent):
QLineEdit.__init__(self, parent) QLineEdit.__init__(self, parent)
@ -893,32 +916,44 @@ class ISBNEdit(QLineEdit): # {{{
@dynamic_property @dynamic_property
def current_val(self): def current_val(self):
def fget(self): def fget(self):
return self.pat.sub('', unicode(self.text()).strip()) raw = unicode(self.text()).strip()
parts = [x.strip() for x in raw.split(',')]
ans = {}
for x in parts:
c = x.split(':')
if len(c) == 2:
ans[c[0]] = c[1]
return ans
def fset(self, val): def fset(self, val):
if not val: if not val:
val = '' val = {}
self.setText(val.strip()) txt = ', '.join(['%s:%s'%(k, v) for k, v in val.iteritems()])
self.setText(txt.strip())
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
def initialize(self, db, id_): def initialize(self, db, id_):
self.current_val = db.isbn(id_, index_is_id=True) self.current_val = db.get_identifiers(id_, index_is_id=True)
self.original_val = self.current_val self.original_val = self.current_val
def commit(self, db, id_): def commit(self, db, id_):
db.set_isbn(id_, self.current_val, notify=False, commit=False) if self.original_val != self.current_val:
db.set_identifiers(id_, self.current_val, notify=False, commit=False)
return True return True
def validate(self, *args): def validate(self, *args):
isbn = self.current_val identifiers = self.current_val
tt = _('This ISBN number is valid') isbn = identifiers.get('isbn', '')
tt = self.BASE_TT
extra = ''
if not isbn: if not isbn:
col = 'rgba(0,255,0,0%)' col = 'rgba(0,255,0,0%)'
elif check_isbn(isbn) is not None: elif check_isbn(isbn) is not None:
col = 'rgba(0,255,0,20%)' col = 'rgba(0,255,0,20%)'
extra = '\n\n'+_('This ISBN number is valid')
else: else:
col = 'rgba(255,0,0,20%)' col = 'rgba(255,0,0,20%)'
tt = _('This ISBN number is invalid') extra = '\n\n' + _('This ISBN number is invalid')
self.setToolTip(tt) self.setToolTip(tt+extra)
self.setStyleSheet('QLineEdit { background-color: %s }'%col) self.setStyleSheet('QLineEdit { background-color: %s }'%col)
# }}} # }}}

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -8,17 +10,17 @@ __docformat__ = 'restructuredtext en'
import os import os
from functools import partial from functools import partial
from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \ from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \ QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont,
QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \ QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem,
QSizePolicy, QPalette, QFrame, QSize, QKeySequence QSizePolicy, QPalette, QFrame, QSize, QKeySequence)
from calibre.ebooks.metadata import authors_to_string, string_to_authors from calibre.ebooks.metadata import authors_to_string, string_to_authors
from calibre.gui2 import ResizableDialog, error_dialog, gprefs from calibre.gui2 import ResizableDialog, error_dialog, gprefs
from calibre.gui2.metadata.basic_widgets import TitleEdit, AuthorsEdit, \ from calibre.gui2.metadata.basic_widgets import (TitleEdit, AuthorsEdit,
AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, ISBNEdit, \ AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, IdentifiersEdit,
RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, \ RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit,
BuddyLabel, DateEdit, PubdateEdit BuddyLabel, DateEdit, PubdateEdit)
from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
@ -145,8 +147,8 @@ class MetadataSingleDialogBase(ResizableDialog):
self.tags_editor_button.clicked.connect(self.tags_editor) self.tags_editor_button.clicked.connect(self.tags_editor)
self.basic_metadata_widgets.append(self.tags) self.basic_metadata_widgets.append(self.tags)
self.isbn = ISBNEdit(self) self.identifiers = IdentifiersEdit(self)
self.basic_metadata_widgets.append(self.isbn) self.basic_metadata_widgets.append(self.identifiers)
self.publisher = PublisherEdit(self) self.publisher = PublisherEdit(self)
self.basic_metadata_widgets.append(self.publisher) self.basic_metadata_widgets.append(self.publisher)
@ -280,8 +282,8 @@ class MetadataSingleDialogBase(ResizableDialog):
self.publisher.current_val = mi.publisher self.publisher.current_val = mi.publisher
if not mi.is_null('tags'): if not mi.is_null('tags'):
self.tags.current_val = mi.tags self.tags.current_val = mi.tags
if not mi.is_null('isbn'): if not mi.is_null('identifiers'):
self.isbn.current_val = mi.isbn self.identifiers.current_val = mi.identifiers
if not mi.is_null('pubdate'): if not mi.is_null('pubdate'):
self.pubdate.current_val = mi.pubdate self.pubdate.current_val = mi.pubdate
if not mi.is_null('series') and mi.series.strip(): if not mi.is_null('series') and mi.series.strip():
@ -385,6 +387,14 @@ class MetadataSingleDialogBase(ResizableDialog):
disconnect(x.clicked) disconnect(x.clicked)
# }}} # }}}
class Splitter(QSplitter):
frame_resized = pyqtSignal(object)
def resizeEvent(self, ev):
self.frame_resized.emit(ev)
return QSplitter.resizeEvent(self, ev)
class MetadataSingleDialog(MetadataSingleDialogBase): # {{{ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
def do_layout(self): def do_layout(self):
@ -437,8 +447,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
tl.addWidget(self.formats_manager, 0, 6, 3, 1) tl.addWidget(self.formats_manager, 0, 6, 3, 1)
self.splitter = QSplitter(Qt.Horizontal, self) self.splitter = Splitter(Qt.Horizontal, self)
self.splitter.addWidget(self.cover) self.splitter.addWidget(self.cover)
self.splitter.frame_resized.connect(self.cover.frame_resized)
l.addWidget(self.splitter) l.addWidget(self.splitter)
self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self) self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
gb.l = l = QGridLayout() gb.l = l = QGridLayout()
@ -475,9 +486,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
create_row2(1, self.rating) create_row2(1, self.rating)
sto(self.rating, self.tags) sto(self.rating, self.tags)
create_row2(2, self.tags, self.tags_editor_button) create_row2(2, self.tags, self.tags_editor_button)
sto(self.tags_editor_button, self.isbn) sto(self.tags_editor_button, self.identifiers)
create_row2(3, self.isbn) create_row2(3, self.identifiers)
sto(self.isbn, self.timestamp) sto(self.identifiers, self.timestamp)
create_row2(4, self.timestamp, self.timestamp.clear_button) create_row2(4, self.timestamp, self.timestamp.clear_button)
sto(self.timestamp.clear_button, self.pubdate) sto(self.timestamp.clear_button, self.pubdate)
create_row2(5, self.pubdate, self.pubdate.clear_button) create_row2(5, self.pubdate, self.pubdate.clear_button)
@ -562,9 +573,9 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
create_row(8, self.pubdate, self.publisher, create_row(8, self.pubdate, self.publisher,
button=self.pubdate.clear_button, icon='trash.png') button=self.pubdate.clear_button, icon='trash.png')
create_row(9, self.publisher, self.timestamp) create_row(9, self.publisher, self.timestamp)
create_row(10, self.timestamp, self.isbn, create_row(10, self.timestamp, self.identifiers,
button=self.timestamp.clear_button, icon='trash.png') button=self.timestamp.clear_button, icon='trash.png')
create_row(11, self.isbn, self.comments) create_row(11, self.identifiers, self.comments)
tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding), tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
12, 1, 1 ,1) 12, 1, 1 ,1)
@ -580,7 +591,7 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
sr.setWidget(w) sr.setWidget(w)
gbl.addWidget(sr) gbl.addWidget(sr)
self.tabs[0].l.addWidget(gb, 0, 1, 1, 1) self.tabs[0].l.addWidget(gb, 0, 1, 1, 1)
sto(self.isbn, gb) sto(self.identifiers, gb)
w = QGroupBox(_('&Comments'), tab0) w = QGroupBox(_('&Comments'), tab0)
sp = QSizePolicy() sp = QSizePolicy()

View File

@ -312,6 +312,7 @@ class ImageView(QWidget, ImageDropMixin):
p.setPen(pen) p.setPen(pen)
if self.draw_border: if self.draw_border:
p.drawRect(target) p.drawRect(target)
#p.drawRect(self.rect())
p.end() p.end()
class CoverView(QGraphicsView, ImageDropMixin): class CoverView(QGraphicsView, ImageDropMixin):