From f89d0efa1f216018aeab84be2be53ab15012e41a Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 4 Apr 2011 19:47:59 -0400 Subject: [PATCH 01/27] HTMLZ Output: Use urldefrag instead of doing it ourself. --- src/calibre/ebooks/htmlz/oeb2html.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py index 827e57b932..af5867356a 100644 --- a/src/calibre/ebooks/htmlz/oeb2html.py +++ b/src/calibre/ebooks/htmlz/oeb2html.py @@ -12,7 +12,7 @@ Transform OEB content into a single (more or less) HTML file. import os -from urlparse import urlparse +from urlparse import urlparse, urldefrag from calibre import prepare_string_for_xml from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace @@ -70,9 +70,7 @@ class OEB2HTML(object): if tag == 'a': href = page.abshref(attribs['href']) if self.url_is_relative(href): - id = '' - if '#' in href: - href, n, id = href.partition('#') + href, id = urldefrag(href) href = '#%s' % self.get_link_id(href, id) attribs['href'] = href return attribs From cc0f8f4323a788f7736c02ae2685c3e7ddb760d9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 5 Apr 2011 17:21:16 -0600 Subject: [PATCH 02/27] Switch fetch-ebook-metadata to use the new metadata download framework --- src/calibre/ebooks/metadata/sources/cli.py | 79 +++++++++++++++++++ src/calibre/ebooks/metadata/sources/google.py | 3 + src/calibre/linux.py | 4 +- 3 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 src/calibre/ebooks/metadata/sources/cli.py diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py new file mode 100644 index 0000000000..d2cc1648f9 --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import sys, textwrap +from io import BytesIO +from threading import Event + +from calibre import prints +from calibre.utils.config import OptionParser +from calibre.ebooks.metadata import string_to_authors +from calibre.ebooks.metadata.opf2 import metadata_to_opf +from calibre.ebooks.metadata.sources.base import create_log +from calibre.ebooks.metadata.sources.identify import identify + +def option_parser(): + parser = OptionParser(textwrap.dedent( + '''\ + %prog [options] + + Fetch book metadata from online sources. You must specify at least one + of title, authors or ISBN. + ''' + )) + parser.add_option('-t', '--title', help='Book title') + parser.add_option('-a', '--authors', help='Book author(s)') + parser.add_option('-i', '--isbn', help='Book ISBN') + parser.add_option('-v', '--verbose', default=False, action='store_true', + help='Print the log to the console (stderr)') + parser.add_option('-o', '--opf', help='Output the metadata in OPF format') + parser.add_option('-d', '--timeout', default='30', + help='Timeout in seconds. Default is 30') + + return parser + +def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) + + buf = BytesIO() + log = create_log(buf) + abort = Event() + + authors = [] + if opts.authors: + authors = string_to_authors(opts.authors) + + identifiers = {} + if opts.isbn: + identifiers['isbn'] = opts.isbn + + results = identify(log, abort, title=opts.title, authors=authors, + identifiers=identifiers, timeout=int(opts.timeout)) + + log = buf.getvalue() + + if not results: + print (log, file=sys.stderr) + prints('No results found', file=sys.stderr) + raise SystemExit(1) + + result = results[0] + result = (metadata_to_opf(result) if opts.opf else + unicode(result).encode('utf-8')) + + if opts.verbose: + print (log, file=sys.stderr) + + print (result) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 21c99fdf46..c4e2f9fe24 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -271,6 +271,9 @@ class GoogleBooks(Source): identifiers={}, timeout=30): query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) + if not query: + log.error('Insufficient metadata to construct query') + return br = self.browser try: raw = br.open_novisit(query, timeout=timeout).read() diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 64c363b8ba..5c80df20df 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -30,7 +30,7 @@ entry_points = { 'calibre-customize = calibre.customize.ui:main', 'calibre-complete = calibre.utils.complete:main', 'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main', - 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main', + 'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main', 'epub-fix = calibre.ebooks.epub.fix.main:main', 'calibre-smtp = calibre.utils.smtp:main', ], @@ -183,7 +183,7 @@ class PostInstall: from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop from calibre.gui2.viewer.main import option_parser as viewer_op - from calibre.ebooks.metadata.fetch import option_parser as fem_op + from calibre.ebooks.metadata.sources.cli import option_parser as fem_op from calibre.gui2.main import option_parser as guiop from calibre.utils.smtp import option_parser as smtp_op from calibre.library.server.main import option_parser as serv_op From 265eabf1a613fcdc3651631fd3f9589bf7d4e7be Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 5 Apr 2011 21:59:11 -0400 Subject: [PATCH 03/27] HTMLZ Output: Rewrite links via oeb.base.rewrite_links function. --- src/calibre/ebooks/htmlz/oeb2html.py | 112 +++++++++++++++------------ 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py index af5867356a..7d915bcfcb 100644 --- a/src/calibre/ebooks/htmlz/oeb2html.py +++ b/src/calibre/ebooks/htmlz/oeb2html.py @@ -12,10 +12,13 @@ Transform OEB content into a single (more or less) HTML file. import os -from urlparse import urlparse, urldefrag +from functools import partial +from lxml import html +from urlparse import urldefrag from calibre import prepare_string_for_xml -from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace,\ + OEB_IMAGES, XLINK, rewrite_links from calibre.ebooks.oeb.stylizer import Stylizer from calibre.utils.logging import default_log @@ -40,6 +43,8 @@ class OEB2HTML(object): self.opts = opts self.links = {} self.images = {} + self.base_hrefs = [item.href for item in oeb_book.spine] + self.map_resources(oeb_book) return self.mlize_spine(oeb_book) @@ -47,6 +52,8 @@ class OEB2HTML(object): output = [u''] for item in oeb_book.spine: self.log.debug('Converting %s to HTML...' % item.href) + self.rewrite_ids(item.data, item) + rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts) output += self.dump_text(item.data.find(XHTML('body')), stylizer, item) output.append('\n\n') @@ -56,41 +63,61 @@ class OEB2HTML(object): def dump_text(self, elem, stylizer, page): raise NotImplementedError - def get_link_id(self, href, aid): - aid = '%s#%s' % (href, aid) - if aid not in self.links: - self.links[aid] = 'calibre_link-%s' % len(self.links.keys()) - return self.links[aid] + def get_link_id(self, href, id=''): + if id: + href += '#%s' % id + if href not in self.links: + self.links[href] = '#calibre_link-%s' % len(self.links.keys()) + return self.links[href] - def rewrite_link(self, tag, attribs, page): - # Rewrite ids. - if 'id' in attribs: - attribs['id'] = self.get_link_id(page.href, attribs['id']) - # Rewrite links. - if tag == 'a': - href = page.abshref(attribs['href']) - if self.url_is_relative(href): - href, id = urldefrag(href) - href = '#%s' % self.get_link_id(href, id) - attribs['href'] = href - return attribs - - def rewrite_image(self, tag, attribs, page): - if tag == 'img': - src = attribs.get('src', None) - if src: - src = page.abshref(src) - if src not in self.images: - ext = os.path.splitext(src)[1] + def map_resources(self, oeb_book): + for item in oeb_book.manifest: + if item.media_type in OEB_IMAGES: + if item.href not in self.images: + ext = os.path.splitext(item.href)[1] fname = '%s%s' % (len(self.images), ext) fname = fname.zfill(10) - self.images[src] = fname - attribs['src'] = 'images/%s' % self.images[src] - return attribs - - def url_is_relative(self, url): - o = urlparse(url) - return False if o.scheme else True + self.images[item.href] = fname + if item in oeb_book.spine: + self.get_link_id(item.href) + root = item.data.find(XHTML('body')) + link_attrs = set(html.defs.link_attrs) + link_attrs.add(XLINK('href')) + for el in root.iter(): + attribs = el.attrib + try: + if not isinstance(el.tag, basestring): + continue + except UnicodeDecodeError: + continue + for attr in attribs: + if attr in link_attrs: + href = item.abshref(attribs[attr]) + href, id = urldefrag(href) + if href in self.base_hrefs: + self.get_link_id(href, id) + + def rewrite_link(self, url, page=None): + if not page: + return url + abs_url = page.abshref(url) + if abs_url in self.images: + return 'images/%s' % self.images[abs_url] + if abs_url in self.links: + return self.links[abs_url] + return url + + def rewrite_ids(self, root, page): + for el in root.iter(): + try: + tag = el.tag + except UnicodeDecodeError: + continue + if tag == XHTML('body'): + el.attrib['id'] = self.get_link_id(page.href)[1:] + continue + if 'id' in el.attrib: + el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:] def get_css(self, oeb_book): css = u'' @@ -127,13 +154,9 @@ class OEB2HTMLNoCSSizer(OEB2HTML): tags = [] tag = barename(elem.tag) attribs = elem.attrib - - attribs = self.rewrite_link(tag, attribs, page) - attribs = self.rewrite_image(tag, attribs, page) - + if tag == 'body': tag = 'div' - attribs['id'] = self.get_link_id(page.href, '') tags.append(tag) # Ignore anything that is set to not be displayed. @@ -215,14 +238,10 @@ class OEB2HTMLInlineCSSizer(OEB2HTML): tags = [] tag = barename(elem.tag) attribs = elem.attrib - - attribs = self.rewrite_link(tag, attribs, page) - attribs = self.rewrite_image(tag, attribs, page) style_a = '%s' % style if tag == 'body': tag = 'div' - attribs['id'] = self.get_link_id(page.href, '') if not style['page-break-before'] == 'always': style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a tags.append(tag) @@ -277,6 +296,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML): output = [] for item in oeb_book.spine: self.log.debug('Converting %s to HTML...' % item.href) + self.rewrite_ids(item.data, item) + rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts) output += self.dump_text(item.data.find(XHTML('body')), stylizer, item) output.append('\n\n') @@ -304,17 +325,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML): # Setup our variables. text = [''] - #style = stylizer.style(elem) tags = [] tag = barename(elem.tag) attribs = elem.attrib - attribs = self.rewrite_link(tag, attribs, page) - attribs = self.rewrite_image(tag, attribs, page) - if tag == 'body': tag = 'div' - attribs['id'] = self.get_link_id(page.href, '') tags.append(tag) # Remove attributes we won't want. From 739609210ef60dc4d0bb15fa0253d0c1b7940081 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 5 Apr 2011 22:12:50 -0400 Subject: [PATCH 04/27] ... --- src/calibre/ebooks/htmlz/oeb2html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py index 7d915bcfcb..b8a6362a99 100644 --- a/src/calibre/ebooks/htmlz/oeb2html.py +++ b/src/calibre/ebooks/htmlz/oeb2html.py @@ -88,7 +88,7 @@ class OEB2HTML(object): try: if not isinstance(el.tag, basestring): continue - except UnicodeDecodeError: + except: continue for attr in attribs: if attr in link_attrs: From 458727a5600af8683101e9362eca9c9a003462f8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 5 Apr 2011 21:35:26 -0600 Subject: [PATCH 05/27] ... --- src/calibre/ebooks/metadata/book/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 328ab7be26..ff22cd3608 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -125,7 +125,10 @@ class Metadata(object): _data = object.__getattribute__(self, '_data') if field in TOP_LEVEL_IDENTIFIERS: field, val = self._clean_identifier(field, val) - _data['identifiers'].update({field: val}) + identifiers = _data['identifiers'] + identifiers.pop(field, None) + if val: + identifiers[field] = val elif field == 'identifiers': if not val: val = copy.copy(NULL_VALUES.get('identifiers', None)) @@ -224,8 +227,7 @@ class Metadata(object): identifiers = object.__getattribute__(self, '_data')['identifiers'] - if not val and typ in identifiers: - identifiers.pop(typ) + identifiers.pop(typ, None) if val: identifiers[typ] = val @@ -647,7 +649,7 @@ class Metadata(object): fmt('Tags', u', '.join([unicode(t) for t in self.tags])) if self.series: fmt('Series', self.series + ' #%s'%self.format_series_index()) - if self.language: + if not self.is_null('language'): fmt('Language', self.language) if self.rating is not None: fmt('Rating', self.rating) From afebdabbf140c14a9ee61dd935b659db0dc5e59e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 5 Apr 2011 23:11:43 -0600 Subject: [PATCH 06/27] save_cover_data_to: Fix return_data returning Image object instead of bytes when an Image object is passed in --- src/calibre/utils/magick/draw.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index 42659d70cc..fdce30177a 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -92,12 +92,12 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None, ret = None if return_data: ret = data - if changed: + if changed or isinstance(ret, Image): if hasattr(img, 'set_compression_quality') and fmt == 'jpg': img.set_compression_quality(compression_quality) ret = img.export(fmt) else: - if changed: + if changed or isinstance(ret, Image): if hasattr(img, 'set_compression_quality') and fmt == 'jpg': img.set_compression_quality(compression_quality) img.save(path) From 2828ba527699ef3911281f378ea608248c79a52e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 5 Apr 2011 23:12:18 -0600 Subject: [PATCH 07/27] ... --- src/calibre/ebooks/metadata/sources/identify.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index b04a697ed8..77391bac6b 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -354,10 +354,10 @@ if __name__ == '__main__': # tests {{{ exact=True), authors_test(['Dan Brown'])] ), - ( # No specific problems - {'identifiers':{'isbn': '0743273567'}}, - [title_test('The great gatsby', exact=True), - authors_test(['Francis Scott Fitzgerald'])] + ( # No ISBN + {'title':'Justine', 'authors':['Durrel']}, + [title_test('Justine', exact=True), + authors_test(['Lawrence Durrel'])] ), ( # A newer book From 6773cf71af98e80ea04d951f043b08f9eae508ab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 5 Apr 2011 23:16:59 -0600 Subject: [PATCH 08/27] Add cover downloading to the new fetch-ebook-metadata --- src/calibre/ebooks/metadata/sources/amazon.py | 5 +- src/calibre/ebooks/metadata/sources/base.py | 9 +- src/calibre/ebooks/metadata/sources/cli.py | 25 ++- src/calibre/ebooks/metadata/sources/covers.py | 178 ++++++++++++++++++ src/calibre/ebooks/metadata/sources/google.py | 20 +- .../ebooks/metadata/sources/openlibrary.py | 2 +- 6 files changed, 224 insertions(+), 15 deletions(-) create mode 100644 src/calibre/ebooks/metadata/sources/covers.py diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index d1c8f24da6..d48f502c29 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{ class Amazon(Source): - name = 'Amazon' + name = 'Amazon Metadata' description = _('Downloads metadata from Amazon') capabilities = frozenset(['identify', 'cover']) @@ -493,9 +493,10 @@ class Amazon(Source): if abort.is_set(): return br = self.browser + log('Downloading cover from:', cached_url) try: cdata = br.open_novisit(cached_url, timeout=timeout).read() - result_queue.put(cdata) + result_queue.put((self, cdata)) except: log.exception('Failed to download cover from:', cached_url) # }}} diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 30b804a76e..33232f25ab 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -22,6 +22,12 @@ msprefs.defaults['txt_comments'] = False msprefs.defaults['ignore_fields'] = [] msprefs.defaults['max_tags'] = 20 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds +msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds + +# Google covers are often poor quality (scans/errors) but they have high +# resolution, so they trump covers from better sources. So make sure they +# are only used if no other covers are found. +msprefs.defaults['cover_priorities'] = {'Google':2} def create_log(ostream=None): log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) @@ -340,7 +346,8 @@ class Source(Plugin): title=None, authors=None, identifiers={}, timeout=30): ''' Download a cover and put it into result_queue. The parameters all have - the same meaning as for :meth:`identify`. + the same meaning as for :meth:`identify`. Put (self, cover_data) into + result_queue. This method should use cached cover URLs for efficiency whenever possible. When cached data is not present, most plugins simply call diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py index d2cc1648f9..b39da07d53 100644 --- a/src/calibre/ebooks/metadata/sources/cli.py +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -13,10 +13,13 @@ from threading import Event from calibre import prints from calibre.utils.config import OptionParser +from calibre.utils.magick.draw import save_cover_data_to from calibre.ebooks.metadata import string_to_authors from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.sources.base import create_log from calibre.ebooks.metadata.sources.identify import identify +from calibre.ebooks.metadata.sources.covers import download_cover + def option_parser(): parser = OptionParser(textwrap.dedent( @@ -33,6 +36,8 @@ def option_parser(): parser.add_option('-v', '--verbose', default=False, action='store_true', help='Print the log to the console (stderr)') parser.add_option('-o', '--opf', help='Output the metadata in OPF format') + parser.add_option('-c', '--cover', + help='Specify a filename. The cover, if available, will be saved to it') parser.add_option('-d', '--timeout', default='30', help='Timeout in seconds. Default is 30') @@ -57,14 +62,26 @@ def main(args=sys.argv): results = identify(log, abort, title=opts.title, authors=authors, identifiers=identifiers, timeout=int(opts.timeout)) - log = buf.getvalue() - if not results: print (log, file=sys.stderr) prints('No results found', file=sys.stderr) raise SystemExit(1) - result = results[0] + + cf = None + if opts.cover and results: + cover = download_cover(log, title=opts.title, authors=authors, + identifiers=result.identifiers, timeout=int(opts.timeout)) + if cover is None: + prints('No cover found', file=sys.stderr) + else: + save_cover_data_to(cover[-1], opts.cover) + result.cover = cf = opts.cover + + + log = buf.getvalue() + + result = (metadata_to_opf(result) if opts.opf else unicode(result).encode('utf-8')) @@ -72,6 +89,8 @@ def main(args=sys.argv): print (log, file=sys.stderr) print (result) + if not opts.opf: + prints('Cover :', cf) return 0 diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py new file mode 100644 index 0000000000..46b278397c --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/covers.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import time +from Queue import Queue, Empty +from threading import Thread, Event +from io import BytesIO + +from calibre.customize.ui import metadata_plugins +from calibre.ebooks.metadata.sources.base import msprefs, create_log +from calibre.utils.magick.draw import Image, save_cover_data_to + +class Worker(Thread): + + def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq): + Thread.__init__(self) + self.daemon = True + + self.plugin = plugin + self.abort = abort + self.buf = BytesIO() + self.log = create_log(self.buf) + self.title, self.authors, self.identifiers = (title, authors, + identifiers) + self.timeout, self.rq = timeout, rq + self.time_spent = None + + def run(self): + start_time = time.time() + if not self.abort.is_set(): + try: + self.plugin.download_cover(self.log, self.rq, self.abort, + title=self.title, authors=self.authors, + identifiers=self.identifiers, timeout=self.timeout) + except: + self.log.exception('Failed to download cover from', + self.plugin.name) + self.time_spent = time.time() - start_time + +def is_worker_alive(workers): + for w in workers: + if w.is_alive(): + return True + return False + +def process_result(log, result): + plugin, data = result + try: + im = Image() + im.load(data) + im.trim(10) + width, height = im.size + fmt = im.format + + if width < 50 or height < 50: + raise ValueError('Image too small') + data = save_cover_data_to(im, '/cover.jpg', return_data=True) + except: + log.exception('Invalid cover from', plugin.name) + return None + return (plugin, width, height, fmt, data) + +def run_download(log, results, abort, + title=None, authors=None, identifiers={}, timeout=30): + ''' + Run the cover download, putting results into the queue :param:`results`. + + Each result is a tuple of the form: + + (plugin, width, height, fmt, bytes) + + ''' + plugins = list(metadata_plugins(['cover'])) + + rq = Queue() + workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p + in plugins] + for w in workers: + w.start() + + first_result_at = None + wait_time = msprefs['wait_after_first_cover_result'] + found_results = {} + + while True: + time.sleep(0.1) + try: + x = rq.get_nowait() + result = process_result(log, x) + if result is not None: + results.put(result) + found_results[result[0]] = result + if first_result_at is not None: + first_result_at = time.time() + except Empty: + pass + + if not is_worker_alive(workers): + break + + if first_result_at is not None and time.time() - first_result_at > wait_time: + log('Not waiting for any more results') + abort.set() + + if abort.is_set(): + break + + while True: + try: + x = rq.get_nowait() + result = process_result(log, x) + if result is not None: + results.put(result) + found_results[result[0]] = result + except Empty: + break + + for w in workers: + wlog = w.buf.getvalue().strip() + log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30) + log('Request extra headers:', w.plugin.browser.addheaders) + if w.plugin in found_results: + result = found_results[w.plugin] + log('Downloaded cover:', '%dx%d'%(result[1], result[2])) + else: + log('Failed to download valid cover') + if w.time_spent is None: + log('Download aborted') + else: + log('Took', w.time_spent, 'seconds') + if wlog: + log(wlog) + log('\n'+'*'*80) + + +def download_cover(log, + title=None, authors=None, identifiers={}, timeout=30): + ''' + Synchronous cover download. Returns the "best" cover as per user + prefs/cover resolution. + + Return cover is a tuple: (plugin, width, height, fmt, data) + + Returns None if no cover is found. + ''' + rq = Queue() + abort = Event() + + run_download(log, rq, abort, title=title, authors=authors, + identifiers=identifiers, timeout=timeout) + + results = [] + + while True: + try: + results.append(rq.get_nowait()) + except Empty: + break + + cp = msprefs['cover_priorities'] + + def keygen(result): + plugin, width, height, fmt, data = result + return (cp.get(plugin.name, 1), 1/(width*height)) + + results.sort(key=keygen) + + return results[0] if results else None + + + + diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index c4e2f9fe24..47cfb823bb 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -145,15 +145,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{ log.exception('Failed to parse rating') # Cover - mi.has_google_cover = len(extra.xpath( - '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0 + mi.has_google_cover = None + for x in extra.xpath( + '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'): + mi.has_google_cover = x.get('href') + break return mi # }}} class GoogleBooks(Source): - name = 'Google Books' + name = 'Google' description = _('Downloads metadata from Google Books') capabilities = frozenset(['identify', 'cover']) @@ -213,7 +216,7 @@ class GoogleBooks(Source): results.sort(key=self.identify_results_keygen( title=title, authors=authors, identifiers=identifiers)) for mi in results: - cached_url = self.cover_url_from_identifiers(mi.identifiers) + cached_url = self.get_cached_cover_url(mi.identifiers) if cached_url is not None: break if cached_url is None: @@ -223,9 +226,10 @@ class GoogleBooks(Source): if abort.is_set(): return br = self.browser + log('Downloading cover from:', cached_url) try: cdata = br.open_novisit(cached_url, timeout=timeout).read() - result_queue.put(cdata) + result_queue.put((self, cdata)) except: log.exception('Failed to download cover from:', cached_url) @@ -254,9 +258,9 @@ class GoogleBooks(Source): goog = ans.identifiers['google'] for isbn in getattr(ans, 'all_isbns', []): self.cache_isbn_to_identifier(isbn, goog) - if ans.has_google_cover: - self.cache_identifier_to_cover_url(goog, - self.GOOGLE_COVER%goog) + if ans.has_google_cover: + self.cache_identifier_to_cover_url(goog, + self.GOOGLE_COVER%goog) self.clean_downloaded_metadata(ans) result_queue.put(ans) except: diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py index 1fcb33e35f..19b8747265 100644 --- a/src/calibre/ebooks/metadata/sources/openlibrary.py +++ b/src/calibre/ebooks/metadata/sources/openlibrary.py @@ -26,7 +26,7 @@ class OpenLibrary(Source): br = self.browser try: ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read() - result_queue.put(ans) + result_queue.put((self, ans)) except Exception as e: if callable(getattr(e, 'getcode', None)) and e.getcode() == 404: log.error('No cover for ISBN: %r found'%isbn) From 62b1ae917608c47571180ff644ba81bdd4438509 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 00:04:27 -0600 Subject: [PATCH 09/27] identify(): Merge results with identical title and authors that aren't matched by xISBN --- src/calibre/ebooks/metadata/sources/cli.py | 2 +- .../ebooks/metadata/sources/identify.py | 53 ++++++++++++++++--- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py index b39da07d53..58042da2bf 100644 --- a/src/calibre/ebooks/metadata/sources/cli.py +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -89,7 +89,7 @@ def main(args=sys.argv): print (log, file=sys.stderr) print (result) - if not opts.opf: + if not opts.opf and opts.cover: prints('Cover :', cf) return 0 diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 77391bac6b..cbc12b6167 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -20,6 +20,7 @@ from calibre.ebooks.metadata.xisbn import xisbn from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import utc_tz from calibre.utils.html2text import html2text +from calibre.utils.icu import lower # Download worker {{{ class Worker(Thread): @@ -97,11 +98,45 @@ class ISBNMerge(object): if has_isbn_result: self.merge_isbn_results() else: - self.results = sorted(self.isbnless_results, + results = sorted(self.isbnless_results, key=attrgetter('relevance_in_source')) + # Pick only the most relevant result from each source + self.results = [] + seen = set() + for result in results: + if result.identify_plugin not in seen: + seen.add(result.identify_plugin) + self.results.append(result) + result.average_source_relevance = \ + result.relevance_in_source + + self.merge_metadata_results() return self.results + def merge_metadata_results(self): + ' Merge results with identical title and authors ' + groups = {} + for result in self.results: + title = lower(result.title if result.title else '') + key = (title, tuple([lower(x) for x in result.authors])) + if key not in groups: + groups[key] = [] + groups[key].append(result) + + if len(groups) != len(self.results): + self.results = [] + for rgroup in groups.itervalues(): + rel = [r.average_source_relevance for r in rgroup] + if len(rgroup) > 1: + result = self.merge(rgroup, None, do_asr=False) + result.average_source_relevance = sum(rel)/len(rel) + else: + result = rgroup[0] + self.results.append(result) + + self.results.sort(key=attrgetter('average_source_relevance')) + def merge_isbn_results(self): self.results = [] for min_year, results in self.pools.itervalues(): @@ -122,7 +157,7 @@ class ISBNMerge(object): values = [getattr(x, attr) for x in results if not x.is_null(attr)] return values[0] if values else null_value - def merge(self, results, min_year): + def merge(self, results, min_year, do_asr=True): ans = Metadata(_('Unknown')) # We assume the shortest title has the least cruft in it @@ -185,7 +220,8 @@ class ISBNMerge(object): # Merge any other fields with no special handling (random merge) touched_fields = set() for r in results: - touched_fields |= r.identify_plugin.touched_fields + if hasattr(r, 'identify_plugin'): + touched_fields |= r.identify_plugin.touched_fields for f in touched_fields: if f.startswith('identifier:') or not ans.is_null(f): @@ -193,9 +229,10 @@ class ISBNMerge(object): setattr(ans, f, self.random_merge(f, results, null_value=getattr(ans, f))) - avg = [x.relevance_in_source for x in results] - avg = sum(avg)/len(avg) - ans.average_source_relevance = avg + if do_asr: + avg = [x.relevance_in_source for x in results] + avg = sum(avg)/len(avg) + ans.average_source_relevance = avg return ans @@ -210,7 +247,8 @@ def merge_identify_results(result_map, log): # }}} -def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30): +def identify(log, abort, # {{{ + title=None, authors=None, identifiers={}, timeout=30): start_time = time.time() plugins = list(metadata_plugins(['identify'])) @@ -322,6 +360,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30): r.tags = r.tags[:max_tags] return results +# }}} if __name__ == '__main__': # tests {{{ # To run these test use: calibre-debug -e From 6059a77d86de5f4433bf4a23fb595a4bf9df9113 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 00:14:44 -0600 Subject: [PATCH 10/27] Fix author name casing algorithm --- src/calibre/ebooks/metadata/sources/base.py | 26 +++++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 33232f25ab..77cc8eaba8 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -15,6 +15,7 @@ from calibre.customize import Plugin from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.config import JSONConfig from calibre.utils.titlecase import titlecase +from calibre.utils.icu import capitalize, lower from calibre.ebooks.metadata import check_isbn msprefs = JSONConfig('metadata_sources/global.json') @@ -107,6 +108,25 @@ def get_cached_cover_urls(mi): if url: yield (p, url) +def cap_author_token(token): + if lower(token) in ('von', 'de', 'el', 'van'): + return lower(token) + return capitalize(token) + +def fixauthors(authors): + if not authors: + return authors + ans = [] + for x in authors: + ans.append(' '.join(map(cap_author_token, x.split()))) + return ans + +def fixcase(x): + if x: + x = titlecase(x) + return x + + class Source(Plugin): @@ -259,13 +279,9 @@ class Source(Plugin): before putting the Metadata object into result_queue. You can of course, use a custom algorithm suited to your metadata source. ''' - def fixcase(x): - if x: - x = titlecase(x) - return x if mi.title: mi.title = fixcase(mi.title) - mi.authors = list(map(fixcase, mi.authors)) + mi.authors = fixauthors(mi.authors) mi.tags = list(map(fixcase, mi.tags)) mi.isbn = check_isbn(mi.isbn) From ea2a5c7537457bc4daf8cddfd9eadbc899374dde Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 00:15:27 -0600 Subject: [PATCH 11/27] ... --- src/calibre/ebooks/metadata/sources/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 77cc8eaba8..ac95860f66 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -109,7 +109,7 @@ def get_cached_cover_urls(mi): yield (p, url) def cap_author_token(token): - if lower(token) in ('von', 'de', 'el', 'van'): + if lower(token) in ('von', 'de', 'el', 'van', 'le'): return lower(token) return capitalize(token) From 41815e218ae32d7c8faa60244773e9078219e5da Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 00:26:46 -0600 Subject: [PATCH 12/27] Normalize author names with run together initials --- src/calibre/ebooks/metadata/sources/base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index ac95860f66..fe57124cae 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -109,8 +109,12 @@ def get_cached_cover_urls(mi): yield (p, url) def cap_author_token(token): - if lower(token) in ('von', 'de', 'el', 'van', 'le'): - return lower(token) + lt = lower(token) + if lt in ('von', 'de', 'el', 'van', 'le'): + return lt + if re.match(r'([a-z]\.){2,}$', lt) is not None: + parts = token.split('.') + return '. '.join(map(capitalize, parts)).strip() return capitalize(token) def fixauthors(authors): From 67eb873eab8b2ef2c5e512f9cc6aef519b47994e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 00:28:24 -0600 Subject: [PATCH 13/27] ... --- src/calibre/ebooks/metadata/sources/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index fe57124cae..faa7420081 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -113,6 +113,7 @@ def cap_author_token(token): if lt in ('von', 'de', 'el', 'van', 'le'): return lt if re.match(r'([a-z]\.){2,}$', lt) is not None: + # Normalize tokens of the form J.K. to J. K. parts = token.split('.') return '. '.join(map(capitalize, parts)).strip() return capitalize(token) From 97c5b041a3fcaa2632f42973b278cb1f42e78118 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Wed, 6 Apr 2011 08:11:27 +0100 Subject: [PATCH 14/27] When drag & dropping onto the tag browser, set the current node to the one dropped upon. --- src/calibre/gui2/tag_view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index f86e261443..73f423981a 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -985,6 +985,7 @@ class TagsModel(QAbstractItemModel): # {{{ def do_drop_from_library(self, md, action, row, column, parent): idx = parent if idx.isValid(): + self.tags_view.setCurrentIndex(idx) node = self.data(idx, Qt.UserRole) if node.type == TagTreeItem.TAG: fm = self.db.metadata_for_field(node.tag.category) From 049776de273cd8bb77fd81887cad0eeb008bc930 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 08:24:25 -0600 Subject: [PATCH 15/27] ... --- src/calibre/manual/faq.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 97ef32e9d4..f48fa9dc16 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -99,7 +99,8 @@ We just need some information from you: device. Once you send us the output for a particular operating system, support for the device in that operating system -will appear in the next release of |app|. +will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it. +See `http://calibre-ebook.com/bugs`_. My device is not being detected by |app|? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 593f3aaf0a6f08bbab384a66d0f4af9bf074d397 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 08:44:50 -0600 Subject: [PATCH 16/27] Support for Motorola Atrix --- src/calibre/devices/android/driver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 54e4979524..7702a7caf0 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -36,7 +36,9 @@ class ANDROID(USBMS): # Motorola 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100], 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], - 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] }, + 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216], + 0x7086 : [0x0226], + }, # Sony Ericsson 0xfce : { 0xd12e : [0x0100]}, @@ -101,7 +103,8 @@ class ANDROID(USBMS): 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', - '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2'] + '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', + 'MB860'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7'] From 504ef950568ab8fcdd0b04c7af5de78ffd4ab0a1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 10:03:46 -0600 Subject: [PATCH 17/27] When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa --- src/calibre/__init__.py | 21 ++++++++++++++++----- src/calibre/customize/builtins.py | 8 ++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 1799072045..2f457bf2bc 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -217,14 +217,25 @@ def filename_to_utf8(name): return name.decode(codec, 'replace').encode('utf8') def extract(path, dir): - ext = os.path.splitext(path)[1][1:].lower() extractor = None - if ext in ['zip', 'cbz', 'epub', 'oebzip']: - from calibre.libunzip import extract as zipextract - extractor = zipextract - elif ext in ['cbr', 'rar']: + # First use the file header to identify its type + with open(path, 'rb') as f: + id_ = f.read(3) + if id_ == b'Rar': from calibre.libunrar import extract as rarextract extractor = rarextract + elif id_.startswith(b'PK'): + from calibre.libunzip import extract as zipextract + extractor = zipextract + if extractor is None: + # Fallback to file extension + ext = os.path.splitext(path)[1][1:].lower() + if ext in ['zip', 'cbz', 'epub', 'oebzip']: + from calibre.libunzip import extract as zipextract + extractor = zipextract + elif ext in ['cbr', 'rar']: + from calibre.libunrar import extract as rarextract + extractor = rarextract if extractor is None: raise Exception('Unknown archive type') extractor(path, dir) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1e40a8e5ff..91abfacc95 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -166,6 +166,14 @@ class ComicMetadataReader(MetadataReaderPlugin): description = _('Extract cover from comic files') def get_metadata(self, stream, ftype): + if hasattr(stream, 'seek') and hasattr(stream, 'tell'): + pos = stream.tell() + id_ = stream.read(3) + stream.seek(pos) + if id_ == b'Rar': + ftype = 'cbr' + elif id.startswith(b'PK'): + ftype = 'cbz' if ftype == 'cbr': from calibre.libunrar import extract_first_alphabetically as extract_first extract_first From ddf6bd19f557a8f546422da0c10c667dd623fc18 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 10:50:55 -0600 Subject: [PATCH 18/27] Add a 'plugin tweak' test_eight_code which if set to True will cause calibre to use code intended for the 0.8.x series. Note that this code is in heavy development so only set this tweak if you are OK with having parts of calibre broken. --- src/calibre/customize/builtins.py | 26 ++++++++++++++++---------- src/calibre/utils/config.py | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 91abfacc95..93cdfe50d9 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -10,6 +10,7 @@ from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.oeb.base import OEB_IMAGES +from calibre.utils.config import test_eight_code # To archive plugins {{{ class HTML2ZIP(FileTypePlugin): @@ -612,20 +613,25 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO from calibre.devices.bambook.driver import BAMBOOK -from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ - KentDistrictLibrary -from calibre.ebooks.metadata.douban import DoubanBooks -from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers -from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ - AmazonCovers, DoubanCovers from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck -plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, - KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, - Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, - NiceBooksCovers] +plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, + Epubcheck, ] + +if not test_eight_code: + from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ + KentDistrictLibrary + from calibre.ebooks.metadata.douban import DoubanBooks + from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers + from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ + AmazonCovers, DoubanCovers + + plugins += [GoogleBooks, ISBNDB, Amazon, + OpenLibraryCovers, AmazonCovers, DoubanCovers, + NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks] + plugins += [ ComicInput, EPUBInput, diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index d5a489acf1..66316d051b 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -784,6 +784,7 @@ def write_tweaks(raw): tweaks = read_tweaks() +test_eight_code = tweaks.get('test_eight_code', False) def migrate(): if hasattr(os, 'geteuid') and os.geteuid() == 0: From 086a2959173f56fc27d9d55008dc66c4cba8d0bb Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Wed, 6 Apr 2011 17:56:54 +0100 Subject: [PATCH 19/27] Fix #751950: make content server ignore non-existent search restrictions. --- src/calibre/library/server/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index dba6abbfa5..eea28469a9 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -24,6 +24,8 @@ from calibre.library.server.xml import XMLServer from calibre.library.server.opds import OPDSServer from calibre.library.server.cache import Cache from calibre.library.server.browse import BrowseServer +from calibre.utils.search_query_parser import saved_searches +from calibre import prints class DispatchController(object): # {{{ @@ -178,7 +180,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, def set_search_restriction(self, restriction): self.search_restriction_name = restriction if restriction: - self.search_restriction = 'search:"%s"'%restriction + if restriction not in saved_searches().names(): + prints('WARNING: Content server: search restriction ', + restriction, ' does not exist') + self.search_restriction = '' + else: + self.search_restriction = 'search:"%s"'%restriction else: self.search_restriction = '' self.reset_caches() From 261df5b15d1bb9636f2adf6fb982708fb3c35f91 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 11:09:50 -0600 Subject: [PATCH 20/27] Use test_eight_code in fetch-ebook-metadata --- src/calibre/customize/builtins.py | 18 ++++++++++-------- src/calibre/ebooks/metadata/sources/cli.py | 9 ++++++++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 93cdfe50d9..298799daa5 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -620,7 +620,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, Epubcheck, ] -if not test_eight_code: +if test_eight_code: +# New metadata download plugins {{{ + from calibre.ebooks.metadata.sources.google import GoogleBooks + from calibre.ebooks.metadata.sources.amazon import Amazon + from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary + + plugins += [GoogleBooks, Amazon, OpenLibrary] + +# }}} +else: from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ KentDistrictLibrary from calibre.ebooks.metadata.douban import DoubanBooks @@ -1069,11 +1078,4 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions, #}}} -# New metadata download plugins {{{ -from calibre.ebooks.metadata.sources.google import GoogleBooks -from calibre.ebooks.metadata.sources.amazon import Amazon -from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary -plugins += [GoogleBooks, Amazon, OpenLibrary] - -# }}} diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py index 58042da2bf..cb422f939d 100644 --- a/src/calibre/ebooks/metadata/sources/cli.py +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -19,9 +19,13 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.sources.base import create_log from calibre.ebooks.metadata.sources.identify import identify from calibre.ebooks.metadata.sources.covers import download_cover - +from calibre.utils.config import test_eight_code def option_parser(): + if not test_eight_code: + from calibre.ebooks.metadata.fetch import option_parser + return option_parser() + parser = OptionParser(textwrap.dedent( '''\ %prog [options] @@ -44,6 +48,9 @@ def option_parser(): return parser def main(args=sys.argv): + if not test_eight_code: + from calibre.ebooks.metadata.fetch import main + return main(args) parser = option_parser() opts, args = parser.parse_args(args) From d63d47a9f53ffa73017351802c454cd10d010062 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 12:28:07 -0600 Subject: [PATCH 21/27] Use new edit metadata dialog when test_eight_code is True --- src/calibre/gui2/actions/edit_metadata.py | 50 +++++++++++++++++----- src/calibre/gui2/metadata/basic_widgets.py | 5 +-- src/calibre/gui2/metadata/single.py | 22 ++++++++-- 3 files changed, 60 insertions(+), 17 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 3f053e5223..c3ceb27e7e 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.tag_list_editor import TagListEditor from calibre.gui2.actions import InterfaceAction from calibre.utils.icu import sort_key +from calibre.utils.config import test_eight_code class EditMetadataAction(InterfaceAction): @@ -133,8 +134,6 @@ class EditMetadataAction(InterfaceAction): row_list = [r.row() for r in rows] current_row = 0 - changed = set([]) - db = self.gui.library_view.model().db if len(row_list) == 1: cr = row_list[0] @@ -142,6 +141,24 @@ class EditMetadataAction(InterfaceAction): list(range(self.gui.library_view.model().rowCount(QModelIndex()))) current_row = row_list.index(cr) + if test_eight_code: + changed = self.do_edit_metadata(row_list, current_row) + else: + changed = self.do_edit_metadata_old(row_list, current_row) + + if changed: + self.gui.library_view.model().refresh_ids(list(changed)) + current = self.gui.library_view.currentIndex() + m = self.gui.library_view.model() + if self.gui.cover_flow: + self.gui.cover_flow.dataChanged() + m.current_changed(current, previous) + self.gui.tags_view.recount() + + def do_edit_metadata_old(self, row_list, current_row): + changed = set([]) + db = self.gui.library_view.model().db + while True: prev = next_ = None if current_row > 0: @@ -167,15 +184,28 @@ class EditMetadataAction(InterfaceAction): self.gui.library_view.set_current_row(current_row) self.gui.library_view.scroll_to_row(current_row) + def do_edit_metadata(self, row_list, current_row): + from calibre.gui2.metadata.single import edit_metadata + db = self.gui.library_view.model().db + changed, rows_to_refresh = edit_metadata(db, row_list, current_row, + parent=self.gui, view_slot=self.view_format_callback, + set_current_callback=self.set_current_callback) + return changed + + def set_current_callback(self, id_): + db = self.gui.library_view.model().db + current_row = db.row(id_) + self.gui.library_view.set_current_row(current_row) + self.gui.library_view.scroll_to_row(current_row) + + def view_format_callback(self, id_, fmt): + view = self.gui.iactions['View'] + if id_ is None: + view._view_file(fmt) + else: + db = self.gui.library_view.model().db + view.view_format(db.row(id_), fmt) - if changed: - self.gui.library_view.model().refresh_ids(list(changed)) - current = self.gui.library_view.currentIndex() - m = self.gui.library_view.model() - if self.gui.cover_flow: - self.gui.cover_flow.dataChanged() - m.current_changed(current, previous) - self.gui.tags_view.recount() def edit_bulk_metadata(self, checked): ''' diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index bab9073588..0b7d96c07c 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -428,7 +428,7 @@ class Format(QListWidgetItem): # {{{ if timestamp is not None: ts = timestamp.astimezone(local_tz) t = strftime('%a, %d %b %Y [%H:%M:%S]', ts.timetuple()) - text = _('Last modified: %s')%t + text = _('Last modified: %s\n\nDouble click to view')%t self.setToolTip(text) self.setStatusTip(text) @@ -577,8 +577,7 @@ class FormatsManager(QWidget): # {{{ self.changed = True def show_format(self, item, *args): - fmt = item.ext - self.dialog.view_format.emit(fmt) + self.dialog.do_view_format.emit(item.path, item.ext) def get_selected_format_metadata(self, db, id_): old = prefs['read_file_metadata'] diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index 70307eb3b1..bba8528573 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -26,7 +26,7 @@ from calibre.utils.config import tweaks class MetadataSingleDialogBase(ResizableDialog): - view_format = pyqtSignal(object) + view_format = pyqtSignal(object, object) cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields'] one_line_comments_toolbar = False @@ -194,6 +194,13 @@ class MetadataSingleDialogBase(ResizableDialog): pass # Do something # }}} + def do_view_format(self, path, fmt): + if path: + self.view_format.emit(None, path) + else: + self.view_format.emit(self.book_id, fmt) + + def do_layout(self): raise NotImplementedError() @@ -204,6 +211,8 @@ class MetadataSingleDialogBase(ResizableDialog): widget.initialize(self.db, id_) for widget in getattr(self, 'custom_metadata_widgets', []): widget.initialize(id_) + if callable(self.set_current_callback): + self.set_current_callback(id_) # Commented out as it doesn't play nice with Next, Prev buttons #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason) @@ -339,11 +348,13 @@ class MetadataSingleDialogBase(ResizableDialog): gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry()) # Dialog use methods {{{ - def start(self, row_list, current_row, view_slot=None): + def start(self, row_list, current_row, view_slot=None, + set_current_callback=None): self.row_list = row_list self.current_row = current_row if view_slot is not None: self.view_format.connect(view_slot) + self.set_current_callback = set_current_callback self.do_one(apply_changes=False) ret = self.exec_() self.break_cycles() @@ -375,6 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog): def break_cycles(self): # Break any reference cycles that could prevent python # from garbage collecting this dialog + self.set_current_callback = None def disconnect(signal): try: signal.disconnect() @@ -643,9 +655,11 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{ # }}} -def edit_metadata(db, row_list, current_row, parent=None, view_slot=None): +def edit_metadata(db, row_list, current_row, parent=None, view_slot=None, + set_current_callback=None): d = MetadataSingleDialog(db, parent) - d.start(row_list, current_row, view_slot=view_slot) + d.start(row_list, current_row, view_slot=view_slot, + set_current_callback=set_current_callback) return d.changed, d.rows_to_refresh if __name__ == '__main__': From 901960ec044b8689a5cdf9062a69cdeae8306940 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 12:29:29 -0600 Subject: [PATCH 22/27] ... --- src/calibre/gui2/metadata/basic_widgets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 0b7d96c07c..b0b7115ca1 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -577,7 +577,7 @@ class FormatsManager(QWidget): # {{{ self.changed = True def show_format(self, item, *args): - self.dialog.do_view_format.emit(item.path, item.ext) + self.dialog.do_view_format(item.path, item.ext) def get_selected_format_metadata(self, db, id_): old = prefs['read_file_metadata'] From 67a467107ea387042880d2257c8c61a063b80b4f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 12:35:01 -0600 Subject: [PATCH 23/27] ... --- src/calibre/gui2/metadata/single.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index bba8528573..4f66e0d2ba 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -32,9 +32,9 @@ class MetadataSingleDialogBase(ResizableDialog): def __init__(self, db, parent=None): self.db = db - self.changed = set([]) - self.books_to_refresh = set([]) - self.rows_to_refresh = set([]) + self.changed = set() + self.books_to_refresh = set() + self.rows_to_refresh = set() ResizableDialog.__init__(self, parent) def setupUi(self, *args): # {{{ @@ -386,7 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog): def break_cycles(self): # Break any reference cycles that could prevent python # from garbage collecting this dialog - self.set_current_callback = None + self.set_current_callback = self.db = None def disconnect(signal): try: signal.disconnect() From 2e08bc51712079312a96c18d0dbb0481bed56bc0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 12:41:10 -0600 Subject: [PATCH 24/27] Fix #752464 ("Kommersant" recipe is broken) --- recipes/kommersant.recipe | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/recipes/kommersant.recipe b/recipes/kommersant.recipe index f24a5da909..09fb8f8ad8 100644 --- a/recipes/kommersant.recipe +++ b/recipes/kommersant.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' www.kommersant.ru ''' @@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe): language = 'ru' publication_type = 'newspaper' masthead_url = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} ' + extra_css = """ + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif} + .title{font-size: x-large; font-weight: bold; margin-bottom: 1em} + .subtitle{font-size: large; margin-bottom: 1em} + .document_vvodka{font-weight: bold; margin-bottom: 1em} + """ conversion_options = { 'comment' : description @@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [ - dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'}) - ,dict(attrs={'class':['vvodka','paragraph','author']}) - ] - remove_tags = [dict(name=['iframe','object','link','img','base'])] + keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})] + remove_tags = [dict(name=['iframe','object','link','img','base','meta'])] feeds = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')] - def print_version(self, url): - return url.replace('doc-rss.aspx','doc.aspx') + '&print=true' - + def print_version(self, url): + return url.replace('/doc-rss/','/Doc/') + '/Print' + \ No newline at end of file From cad3b71b324ffb280b90c309bfdbe7ea376a1430 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 13:54:10 -0600 Subject: [PATCH 25/27] ... --- src/calibre/manual/gui.rst | 3 ++- src/calibre/manual/index.rst | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 9307ff30f6..7b6e60c93a 100644 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -71,7 +71,7 @@ Edit metadata |emii| The :guilabel:`Edit metadata` action has six variations, which can be accessed by clicking the down arrow on the right side of the button. - 1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. For more detail see :ref:`metadata`. + 1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. 2. **Edit metadata in bulk**: This allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view `. 3. **Download metadata and covers**: Downloads metadata and covers (if available), for the books that are selected in the book list. 4. **Download only metadata**: Downloads only metadata (if available), for the books that are selected in the book list. @@ -79,6 +79,7 @@ Edit metadata 6. **Download only social metadata**: Downloads only social metadata such as tags and reviews (if available), for the books that are selected in the book list. 7. **Merge Book Records**: Gives you the capability of merging the metadata and formats of two or more book records together. You can choose to either delete or keep the records that were not clicked first. +For more details see :ref:`metadata`. .. _convert_ebooks: diff --git a/src/calibre/manual/index.rst b/src/calibre/manual/index.rst index 996a1de382..e54882dda0 100644 --- a/src/calibre/manual/index.rst +++ b/src/calibre/manual/index.rst @@ -70,7 +70,7 @@ Customizing |app|'s e-book conversion .. toctree:: :maxdepth: 2 - viewer + conversion Editing e-book metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -78,7 +78,7 @@ Editing e-book metadata .. toctree:: :maxdepth: 2 - viewer + metadata Frequently Asked Questions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 3e230ac838eab493f7125534fb024f1f01eaefb9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 15:46:51 -0600 Subject: [PATCH 26/27] ... --- src/calibre/manual/conf.py | 2 +- src/calibre/manual/faq.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/manual/conf.py b/src/calibre/manual/conf.py index fc8962bcfd..d2b3a91d8d 100644 --- a/src/calibre/manual/conf.py +++ b/src/calibre/manual/conf.py @@ -126,7 +126,7 @@ html_use_modindex = False html_use_index = False # If true, the reST sources are included in the HTML build as _sources/. -html_copy_source = False +html_copy_source = True # Output file base name for HTML help builder. htmlhelp_basename = 'calibredoc' diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index f48fa9dc16..f8b257fd75 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -100,7 +100,7 @@ We just need some information from you: Once you send us the output for a particular operating system, support for the device in that operating system will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it. -See `http://calibre-ebook.com/bugs`_. +See `calibre bugs `_. My device is not being detected by |app|? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 58899e65ef4e532976f59fb6da1c1484a9a5ad4d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 16:23:18 -0600 Subject: [PATCH 27/27] ... --- src/calibre/gui2/metadata/single_download.py | 39 ++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/calibre/gui2/metadata/single_download.py diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py new file mode 100644 index 0000000000..ace4133d7a --- /dev/null +++ b/src/calibre/gui2/metadata/single_download.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, + QStyle, QApplication) + +class RichTextDelegate(QStyledItemDelegate): # {{{ + + def __init__(self, parent=None): + QStyledItemDelegate.__init__(self, parent) + + def to_doc(self, index): + doc = QTextDocument() + doc.setHtml(index.data().toString()) + return doc + + def sizeHint(self, option, index): + ans = self.to_doc(index).size().toSize() + ans.setHeight(ans.height()+10) + return ans + + def paint(self, painter, option, index): + painter.save() + painter.setClipRect(QRectF(option.rect)) + if hasattr(QStyle, 'CE_ItemViewItem'): + QApplication.style().drawControl(QStyle.CE_ItemViewItem, option, painter) + elif option.state & QStyle.State_Selected: + painter.fillRect(option.rect, option.palette.highlight()) + painter.translate(option.rect.topLeft()) + self.to_doc(index).drawContents(painter) + painter.restore() +# }}} +