From 18d0f6a6ef79bc80b23a3a58a1c7c607169fe662 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 18:58:54 -0400 Subject: [PATCH 01/15] Add HTMLZ as a book extension. Use HTML icon for HTMLZ. --- src/calibre/ebooks/__init__.py | 2 +- src/calibre/gui2/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 7776be5e28..a56abb907e 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -26,7 +26,7 @@ class ParserError(ValueError): pass BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', - 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', + 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb'] diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 22aaabf592..e39427021e 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -357,6 +357,7 @@ class FileIconProvider(QFileIconProvider): 'bmp' : 'bmp', 'svg' : 'svg', 'html' : 'html', + 'htmlz' : 'html', 'htm' : 'html', 'xhtml' : 'html', 'xhtm' : 'html', From d5119f0c2f0bad0220122e7771cbb6388d22a21a Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 19:11:52 -0400 Subject: [PATCH 02/15] HTMLZ Output: Handle SVG data returned as lxml.etree._Element properly. --- src/calibre/ebooks/htmlz/output.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/htmlz/output.py b/src/calibre/ebooks/htmlz/output.py index 7cdf04bcdb..03fe12c89e 100644 --- a/src/calibre/ebooks/htmlz/output.py +++ b/src/calibre/ebooks/htmlz/output.py @@ -12,7 +12,7 @@ from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.oeb.base import OEB_IMAGES +from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile @@ -71,9 +71,13 @@ class HTMLZOutput(OutputFormatPlugin): os.makedirs(os.path.join(tdir, 'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: + if item.media_type == SVG_MIME: + data = unicode(etree.tostring(item.data, encoding=unicode)) + else: + data = item.data fname = os.path.join(tdir, 'images', images[item.href]) with open(fname, 'wb') as img: - img.write(item.data) + img.write(data) # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: From 1d6521aa5e34fc04902130680b0e73a1979ae0c7 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 19:53:04 -0400 Subject: [PATCH 03/15] extZ metadata: Read and write first opf file found in archive. --- src/calibre/ebooks/metadata/extz.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 0ecdbe9ea6..b49f3f6ddd 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -25,14 +25,30 @@ def get_metadata(stream, extract_cover=True): with TemporaryDirectory('_untxtz_mdata') as tdir: try: - zf = ZipFile(stream) - zf.extract('metadata.opf', tdir) - with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff: - mi = OPF(opff).to_book_metadata() + with ZipFile(stream) as zf: + opf_name = get_first_opf_name(stream) + opf_stream = StringIO(zf.read(opf_name)) + mi = OPF(opf_stream).to_book_metadata() except: return mi return mi def set_metadata(stream, mi): opf = StringIO(metadata_to_opf(mi)) - safe_replace(stream, 'metadata.opf', opf) + try: + opf_name = get_first_opf_name(stream) + except: + opf_name = 'metadata.opf' + safe_replace(stream, opf_name, opf) + +def get_first_opf_name(stream): + with ZipFile(stream) as zf: + names = zf.namelist() + opfs = [] + for n in names: + if n.endswith('.opf') and '/' not in n: + opfs.append(n) + if not opfs: + raise Exception('No OPF found') + opfs.sort() + return opfs[0] From f3beb13b6221aebb0366dfc1044fdfd959646f2f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 18:06:18 -0600 Subject: [PATCH 04/15] Bulk metadata download works again. More testing of corner cases needed --- src/calibre/ebooks/metadata/sources/covers.py | 2 +- src/calibre/gui2/actions/edit_metadata.py | 2 +- src/calibre/gui2/metadata/bulk_download2.py | 29 +++++++++++++++---- src/calibre/gui2/threaded_jobs.py | 6 +++- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py index cf6ec90c54..44f902eeee 100644 --- a/src/calibre/ebooks/metadata/sources/covers.py +++ b/src/calibre/ebooks/metadata/sources/covers.py @@ -145,7 +145,7 @@ def download_cover(log, Synchronous cover download. Returns the "best" cover as per user prefs/cover resolution. - Return cover is a tuple: (plugin, width, height, fmt, data) + Returned cover is a tuple: (plugin, width, height, fmt, data) Returns None if no cover is found. ''' diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 9f2cacb177..18a73fb282 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -94,7 +94,7 @@ class EditMetadataAction(InterfaceAction): def bulk_metadata_downloaded(self, job): if job.failed: - self.job_exception(job, dialog_title=_('Failed to download metadata')) + self.gui.job_exception(job, dialog_title=_('Failed to download metadata')) return from calibre.gui2.metadata.bulk_download2 import proceed proceed(self.gui, job) diff --git a/src/calibre/gui2/metadata/bulk_download2.py b/src/calibre/gui2/metadata/bulk_download2.py index 19cd3df9d4..05c61f6037 100644 --- a/src/calibre/gui2/metadata/bulk_download2.py +++ b/src/calibre/gui2/metadata/bulk_download2.py @@ -54,6 +54,8 @@ def start_download(gui, ids, callback, identify, covers): _('Download metadata for %d books')%len(ids), download, (ids, gui.current_db, identify, covers), {}, callback) gui.job_manager.run_threaded_job(job) + gui.status_bar.show_message(_('Metadata download started'), 3000) + class ViewLog(QDialog): # {{{ @@ -110,11 +112,12 @@ class ApplyDialog(QDialog): self.bb.accepted.connect(self.accept) l.addWidget(self.bb) - self.db = gui.current_db + self.gui = gui self.id_map = list(id_map.iteritems()) self.current_idx = 0 self.failures = [] + self.ids = [] self.canceled = False QTimer.singleShot(20, self.do_one) @@ -124,11 +127,13 @@ class ApplyDialog(QDialog): if self.canceled: return i, mi = self.id_map[self.current_idx] + db = self.gui.current_db try: set_title = not mi.is_null('title') set_authors = not mi.is_null('authors') - self.db.set_metadata(i, mi, commit=False, set_title=set_title, + db.set_metadata(i, mi, commit=False, set_title=set_title, set_authors=set_authors) + self.ids.append(i) except: import traceback self.failures.append((i, traceback.format_exc())) @@ -156,9 +161,10 @@ class ApplyDialog(QDialog): return if self.failures: msg = [] + db = self.gui.current_db for i, tb in self.failures: - title = self.db.title(i, index_is_id=True) - authors = self.db.authors(i, index_is_id=True) + title = db.title(i, index_is_id=True) + authors = db.authors(i, index_is_id=True) if authors: authors = [x.replace('|', ',') for x in authors.split(',')] title += ' - ' + authors_to_string(authors) @@ -169,6 +175,12 @@ class ApplyDialog(QDialog): ' in your library. Click "Show Details" to see ' 'details.'), det_msg='\n\n'.join(msg), show=True) self.accept() + if self.ids: + cr = self.gui.library_view.currentIndex().row() + self.gui.library_view.model().refresh_ids( + self.ids, cr) + if self.gui.cover_flow: + self.gui.cover_flow.dataChanged() _amd = None def apply_metadata(job, gui, q, result): @@ -209,6 +221,7 @@ def apply_metadata(job, gui, q, result): _amd = ApplyDialog(id_map, gui) def proceed(gui, job): + gui.status_bar.show_message(_('Metadata download completed'), 3000) id_map, failed_ids = job.result fmsg = det_msg = '' if failed_ids: @@ -242,6 +255,10 @@ def merge_result(oldmi, newmi): if (not newmi.is_null(f) and getattr(newmi, f) == getattr(oldmi, f)): setattr(newmi, f, getattr(dummy, f)) + newmi.last_modified = oldmi.last_modified + + return newmi + def download(ids, db, do_identify, covers, log=None, abort=None, notifications=None): ids = list(ids) @@ -271,9 +288,9 @@ def download(ids, db, do_identify, covers, if covers: cdata = download_cover(log, title=title, authors=authors, identifiers=identifiers) - if cdata: + if cdata is not None: with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f: - f.write(cdata) + f.write(cdata[-1]) mi.cover = f.name ans[i] = mi count += 1 diff --git a/src/calibre/gui2/threaded_jobs.py b/src/calibre/gui2/threaded_jobs.py index f98488da79..9c791c5b0d 100644 --- a/src/calibre/gui2/threaded_jobs.py +++ b/src/calibre/gui2/threaded_jobs.py @@ -189,7 +189,11 @@ class ThreadedJobServer(Thread): def run(self): while self.keep_going: - self.run_once() + try: + self.run_once() + except: + import traceback + traceback.print_exc() time.sleep(0.1) def run_once(self): From 184692b587e67d79ef35edc04ff5b97c0c27654d Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 20:39:01 -0400 Subject: [PATCH 05/15] extZ metadata: Get cover, update OPF without losing other data such as spine, and guide. --- src/calibre/ebooks/metadata/extz.py | 34 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index b49f3f6ddd..338c4dd91d 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -7,13 +7,10 @@ __copyright__ = '2011, John Schember ' Read meta information from extZ (TXTZ, HTMLZ...) files. ''' -import os - from cStringIO import StringIO from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf -from calibre.ptempfile import TemporaryDirectory +from calibre.ebooks.metadata.opf2 import OPF from calibre.utils.zipfile import ZipFile, safe_replace def get_metadata(stream, extract_cover=True): @@ -23,23 +20,32 @@ def get_metadata(stream, extract_cover=True): mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) - with TemporaryDirectory('_untxtz_mdata') as tdir: - try: - with ZipFile(stream) as zf: - opf_name = get_first_opf_name(stream) - opf_stream = StringIO(zf.read(opf_name)) - mi = OPF(opf_stream).to_book_metadata() - except: - return mi + try: + with ZipFile(stream) as zf: + opf_name = get_first_opf_name(stream) + opf_stream = StringIO(zf.read(opf_name)) + opf = OPF(opf_stream) + mi = opf.to_book_metadata() + if extract_cover: + cover_name = opf.raster_cover + if cover_name: + mi.cover_data = ('jpg', zf.read(cover_name)) + except: + return mi return mi def set_metadata(stream, mi): - opf = StringIO(metadata_to_opf(mi)) try: opf_name = get_first_opf_name(stream) + with ZipFile(stream) as zf: + opf_stream = StringIO(zf.read(opf_name)) + opf = OPF(opf_stream) except: opf_name = 'metadata.opf' - safe_replace(stream, opf_name, opf) + opf = OPF(StringIO()) + opf.smart_update(mi, replace_metadata=True) + newopf = StringIO(opf.render()) + safe_replace(stream, opf_name, newopf) def get_first_opf_name(stream): with ZipFile(stream) as zf: From 15f638784d2829d6b96e55a475a36cfdcacfba97 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 18:39:12 -0600 Subject: [PATCH 06/15] ... --- src/calibre/gui2/metadata/bulk_download2.py | 39 +++++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/calibre/gui2/metadata/bulk_download2.py b/src/calibre/gui2/metadata/bulk_download2.py index 05c61f6037..5f0af1b316 100644 --- a/src/calibre/gui2/metadata/bulk_download2.py +++ b/src/calibre/gui2/metadata/bulk_download2.py @@ -77,7 +77,7 @@ class ViewLog(QDialog): # {{{ self.copy_button.clicked.connect(self.copy_to_clipboard) l.addWidget(self.bb) self.setModal(False) - self.resize(QSize(500, 400)) + self.resize(QSize(700, 500)) self.setWindowTitle(_('Download log')) self.setWindowIcon(QIcon(I('debug.png'))) self.show() @@ -121,7 +121,6 @@ class ApplyDialog(QDialog): self.canceled = False QTimer.singleShot(20, self.do_one) - self.exec_() def do_one(self): if self.canceled: @@ -189,7 +188,7 @@ def apply_metadata(job, gui, q, result): q.finished.disconnect() if result != q.Accepted: return - id_map, failed_ids = job.result + id_map, failed_ids, failed_covers, title_map = job.result id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in failed_ids]) if not id_map: @@ -219,24 +218,32 @@ def apply_metadata(job, gui, q, result): return _amd = ApplyDialog(id_map, gui) + _amd.exec_() def proceed(gui, job): gui.status_bar.show_message(_('Metadata download completed'), 3000) - id_map, failed_ids = job.result + id_map, failed_ids, failed_covers, title_map = job.result fmsg = det_msg = '' - if failed_ids: - fmsg = _('Could not download metadata for %d of the books. Click' + if failed_ids or failed_covers: + fmsg = '

'+_('Could not download metadata and/or covers for %d of the books. Click' ' "Show details" to see which books.')%len(failed_ids) - det_msg = '\n'.join([id_map[i].title for i in failed_ids]) + det_msg = [] + for i in failed_ids | failed_covers: + title = title_map[i] + if i in failed_ids: + title += (' ' + _('(Failed metadata)')) + if i in failed_covers: + title += (' ' + _('(Failed cover)')) + det_msg.append(title) msg = '

' + _('Finished downloading metadata for %d book(s). ' 'Proceed with updating the metadata in your library?')%len(id_map) q = MessageBox(MessageBox.QUESTION, _('Download complete'), - msg + fmsg, det_msg=det_msg, show_copy_button=bool(failed_ids), + msg + fmsg, det_msg='\n'.join(det_msg), show_copy_button=bool(failed_ids), parent=gui) q.vlb = q.bb.addButton(_('View log'), q.bb.ActionRole) q.vlb.setIcon(QIcon(I('debug.png'))) q.vlb.clicked.connect(partial(view_log, job, q)) - q.det_msg_toggle.setVisible(bool(failed_ids)) + q.det_msg_toggle.setVisible(bool(failed_ids | failed_covers)) q.setModal(False) q.show() q.finished.connect(partial(apply_metadata, job, gui, q)) @@ -265,6 +272,8 @@ def download(ids, db, do_identify, covers, metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False) for i in ids] failed_ids = set() + failed_covers = set() + title_map = {} ans = {} count = 0 for i, mi in izip(ids, metadata): @@ -272,6 +281,7 @@ def download(ids, db, do_identify, covers, log.error('Aborting...') break title, authors, identifiers = mi.title, mi.authors, mi.identifiers + title_map[i] = title if do_identify: results = [] try: @@ -282,9 +292,14 @@ def download(ids, db, do_identify, covers, if results: mi = merge_result(mi, results[0]) identifiers = mi.identifiers + if not mi.is_null('rating'): + # set_metadata expects a rating out of 10 + mi.rating *= 2 else: log.error('Failed to download metadata for', title) - failed_ids.add(mi) + failed_ids.add(i) + # We don't want set_metadata operating on anything but covers + mi = merge_result(mi, mi) if covers: cdata = download_cover(log, title=title, authors=authors, identifiers=identifiers) @@ -292,12 +307,14 @@ def download(ids, db, do_identify, covers, with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f: f.write(cdata[-1]) mi.cover = f.name + else: + failed_covers.add(i) ans[i] = mi count += 1 notifications.put((count/len(ids), _('Downloaded %d of %d')%(count, len(ids)))) log('Download complete, with %d failures'%len(failed_ids)) - return (ans, failed_ids) + return (ans, failed_ids, failed_covers, title_map) From 40d01c5aeace074eb1700fc4a3b9a62a120d9beb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 18:45:16 -0600 Subject: [PATCH 07/15] MOBI Output: Fix bug that would cause conversion to unneccessarily abort when malformed hyperlinks are present in the input document. Fixes #759313 (Private bug) --- src/calibre/ebooks/mobi/writer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 5f4c47cdf3..89ef9fcd82 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -310,10 +310,11 @@ class Serializer(object): if href not in id_offsets: self.logger.warn('Hyperlink target %r not found' % href) href, _ = urldefrag(href) - ioff = self.id_offsets[href] - for hoff in hoffs: - buffer.seek(hoff) - buffer.write('%010d' % ioff) + else: + ioff = self.id_offsets[href] + for hoff in hoffs: + buffer.seek(hoff) + buffer.write('%010d' % ioff) class MobiWriter(object): COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') From 5e75259355e47a19c45554d12711af9df907e727 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 18:54:06 -0600 Subject: [PATCH 08/15] ... --- src/calibre/ebooks/mobi/writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 89ef9fcd82..fc47b26c02 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -310,7 +310,7 @@ class Serializer(object): if href not in id_offsets: self.logger.warn('Hyperlink target %r not found' % href) href, _ = urldefrag(href) - else: + if href in self.id_offsets: ioff = self.id_offsets[href] for hoff in hoffs: buffer.seek(hoff) From fbde96b7a1b947f349fa3c71d1c5b6e090418fd9 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 20:54:14 -0400 Subject: [PATCH 09/15] extZ metadata: Set cover. --- src/calibre/ebooks/metadata/extz.py | 52 ++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 338c4dd91d..18c5a25671 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -7,10 +7,14 @@ __copyright__ = '2011, John Schember ' Read meta information from extZ (TXTZ, HTMLZ...) files. ''' +import os +import posixpath + from cStringIO import StringIO from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPF +from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.zipfile import ZipFile, safe_replace def get_metadata(stream, extract_cover=True): @@ -35,17 +39,50 @@ def get_metadata(stream, extract_cover=True): return mi def set_metadata(stream, mi): + replacements = {} + + # Get the OPF in the archive. try: - opf_name = get_first_opf_name(stream) + opf_path = get_first_opf_name(stream) with ZipFile(stream) as zf: - opf_stream = StringIO(zf.read(opf_name)) + opf_stream = StringIO(zf.read(opf_path)) opf = OPF(opf_stream) except: - opf_name = 'metadata.opf' + opf_path = 'metadata.opf' opf = OPF(StringIO()) + + # Cover. + new_cdata = None + try: + new_cdata = mi.cover_data[1] + if not new_cdata: + raise Exception('no cover') + except: + try: + new_cdata = open(mi.cover, 'rb').read() + except: + pass + if new_cdata: + raster_cover = opf.raster_cover + if not raster_cover: + raster_cover = 'cover.jpg' + cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover) + new_cover = _write_new_cover(new_cdata, cpath) + replacements[cpath] = open(new_cover.name, 'rb') + + # Update the metadata. opf.smart_update(mi, replace_metadata=True) newopf = StringIO(opf.render()) - safe_replace(stream, opf_name, newopf) + safe_replace(stream, opf_path, newopf, extra_replacements=replacements) + + # Cleanup temporary files. + try: + if cpath is not None: + replacements[cpath].close() + os.remove(replacements[cpath].name) + except: + pass + def get_first_opf_name(stream): with ZipFile(stream) as zf: @@ -58,3 +95,10 @@ def get_first_opf_name(stream): raise Exception('No OPF found') opfs.sort() return opfs[0] + +def _write_new_cover(new_cdata, cpath): + from calibre.utils.magick.draw import save_cover_data_to + new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1]) + new_cover.close() + save_cover_data_to(new_cdata, new_cover.name) + return new_cover From 5b82c42e4bc5b96ee242f61bc30d0be3d8ecf703 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 20:55:06 -0400 Subject: [PATCH 10/15] ... --- src/calibre/ebooks/metadata/extz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 18c5a25671..6d41f7819d 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -83,7 +83,6 @@ def set_metadata(stream, mi): except: pass - def get_first_opf_name(stream): with ZipFile(stream) as zf: names = zf.namelist() From eecf3ec73e8e4a33600d67c4c3d9b8235e2ec6b5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 19:38:31 -0600 Subject: [PATCH 11/15] ... --- src/calibre/ebooks/metadata/sources/covers.py | 5 +++ .../ebooks/metadata/sources/identify.py | 4 ++ src/calibre/ebooks/metadata/sources/isbndb.py | 42 ++++++++++++++++++- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py index 44f902eeee..d28ce146c6 100644 --- a/src/calibre/ebooks/metadata/sources/covers.py +++ b/src/calibre/ebooks/metadata/sources/covers.py @@ -76,6 +76,11 @@ def run_download(log, results, abort, (plugin, width, height, fmt, bytes) ''' + if title == _('Unknown'): + title = None + if authors == [_('Unknown')]: + authors = None + plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()] rq = Queue() diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index fad810c26e..b494e05e1a 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -253,6 +253,10 @@ def merge_identify_results(result_map, log): def identify(log, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30): + if title == _('Unknown'): + title = None + if authors == [_('Unknown')]: + authors = None start_time = time.time() plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()] diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index ab9342c6cb..af192227c1 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -7,7 +7,13 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from calibre.ebooks.metadata.sources.base import Source +from urllib import quote + +from calibre.ebooks.metadata import check_isbn +from calibre.ebooks.metadata.sources.base import Source, Option + +BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&' + class ISBNDB(Source): @@ -18,6 +24,14 @@ class ISBNDB(Source): touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'comments', 'publisher']) supports_gzip_transfer_encoding = True + # Shortcut, since we have no cached cover URLS + cached_cover_url_is_reliable = False + + options = ( + Option('isbndb_key', 'string', None, _('IsbnDB key:'), + _('To use isbndb.com you have to sign up for a free account' + 'at isbndb.com and get an access key.')), + ) def __init__(self, *args, **kwargs): Source.__init__(self, *args, **kwargs) @@ -35,9 +49,33 @@ class ISBNDB(Source): except: pass - self.isbndb_key = prefs['isbndb_key'] + @property + def isbndb_key(self): + return self.prefs['isbndb_key'] def is_configured(self): return self.isbndb_key is not None + def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ + base_url = BASE_URL%self.isbndb_key + isbn = check_isbn(identifiers.get('isbn', None)) + q = '' + if isbn is not None: + q = 'index1=isbn&value1='+isbn + elif title or authors: + tokens = [] + title_tokens = list(self.get_title_tokens(title)) + tokens += title_tokens + author_tokens = self.get_author_tokens(authors, + only_first_author=True) + tokens += author_tokens + tokens = [quote(t) for t in tokens] + q = '+'.join(tokens) + q = 'index1=combined&value1='+q + + if not q: + return None + if isinstance(q, unicode): + q = q.encode('utf-8') + return base_url + q From 2bdc0c48a48125db99b1a76c853fe94f2fd48f13 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 22:39:38 -0600 Subject: [PATCH 12/15] Complete migration of ISBNDB plugin. However, I'm not enabling it, as it seems to provide largely useless results anyway. --- src/calibre/ebooks/metadata/sources/isbndb.py | 140 +++++++++++++++++- src/calibre/manual/server.rst | 2 + 2 files changed, 141 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index af192227c1..361554ad9c 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -9,8 +9,14 @@ __docformat__ = 'restructuredtext en' from urllib import quote +from lxml import etree + from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source, Option +from calibre.ebooks.chardet import xml_to_unicode +from calibre.utils.cleantext import clean_ascii_chars +from calibre.utils.icu import lower +from calibre.ebooks.metadata.book.base import Metadata BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&' @@ -56,7 +62,7 @@ class ISBNDB(Source): def is_configured(self): return self.isbndb_key is not None - def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ + def create_query(self, title=None, authors=None, identifiers={}): # {{{ base_url = BASE_URL%self.isbndb_key isbn = check_isbn(identifiers.get('isbn', None)) q = '' @@ -78,4 +84,136 @@ class ISBNDB(Source): if isinstance(q, unicode): q = q.encode('utf-8') return base_url + q + # }}} + def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ + identifiers={}, timeout=30): + if not self.is_configured(): + return + query = self.create_query(title=title, authors=authors, + identifiers=identifiers) + if not query: + err = 'Insufficient metadata to construct query' + log.error(err) + return err + + results = [] + try: + results = self.make_query(query, abort, title=title, authors=authors, + identifiers=identifiers, timeout=timeout) + except: + err = 'Failed to make query to ISBNDb, aborting.' + log.exception(err) + return err + + if not results and identifiers.get('isbn', False) and title and authors and \ + not abort.is_set(): + return self.identify(log, result_queue, abort, title=title, + authors=authors, timeout=timeout) + + for result in results: + self.clean_downloaded_metadata(result) + result_queue.put(result) + + def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers): + + def tostring(x): + if x is None: + return '' + return etree.tostring(x, method='text', encoding=unicode).strip() + + orig_isbn = identifiers.get('isbn', None) + title_tokens = self.get_title_tokens(orig_title) + author_tokens = self.get_author_tokens(orig_authors) + results = [] + + def ismatch(title, authors): + authors = lower(' '.join(authors)) + title = lower(title) + match = False + for t in title_tokens: + if lower(t) in title: + match = True + break + if not title_tokens: match = True + amatch = False + for a in author_tokens: + if a in authors: + amatch = True + break + if not author_tokens: amatch = True + return match and amatch + + bl = feed.find('BookList') + if bl is None: + err = tostring(etree.find('errormessage')) + raise ValueError('ISBNDb query failed:' + err) + total_results = int(bl.get('total_results')) + shown_results = int(bl.get('shown_results')) + for bd in bl.xpath('.//BookData'): + isbn = check_isbn(bd.get('isbn13', bd.get('isbn', None))) + if not isbn: + continue + if orig_isbn and isbn != orig_isbn: + continue + title = tostring(bd.find('Title')) + if not title: + continue + authors = [] + for au in bd.xpath('.//Authors/Person'): + au = tostring(au) + if au: + if ',' in au: + ln, _, fn = au.partition(',') + au = fn.strip() + ' ' + ln.strip() + authors.append(au) + if not authors: + continue + id_ = (title, tuple(authors)) + if id_ in seen: + continue + seen.add(id_) + if not ismatch(title, authors): + continue + publisher = tostring(bd.find('PublisherText')) + if not publisher: publisher = None + comments = tostring(bd.find('Summary')) + if not comments: comments = None + mi = Metadata(title, authors) + mi.isbn = isbn + mi.publisher = publisher + mi.comments = comments + results.append(mi) + return total_results, shown_results, results + + def make_query(self, q, abort, title=None, authors=None, identifiers={}, + max_pages=10, timeout=30): + page_num = 1 + parser = etree.XMLParser(recover=True, no_network=True) + br = self.browser + + seen = set() + + candidates = [] + total_found = 0 + while page_num <= max_pages and not abort.is_set(): + url = q.replace('&page_number=1&', '&page_number=%d&'%page_num) + page_num += 1 + raw = br.open_novisit(url, timeout=timeout).read() + feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw), + strip_encoding_pats=True)[0], parser=parser) + total, found, results = self.parse_feed( + feed, seen, title, authors, identifiers) + total_found += found + if results or total_found >= total: + candidates += results + break + + return candidates + # }}} + +if __name__ == '__main__': + s = ISBNDB(None) + t, a = 'great gatsby', ['fitzgerald'] + q = s.create_query(title=t, authors=a) + s.make_query(q, title=t, authors=a) diff --git a/src/calibre/manual/server.rst b/src/calibre/manual/server.rst index 82ec5c2927..aa98ba57df 100644 --- a/src/calibre/manual/server.rst +++ b/src/calibre/manual/server.rst @@ -22,6 +22,8 @@ First start the |app| content server as shown below:: calibre-server --url-prefix /calibre --port 8080 +The key parameter here is ``--url-prefix /calibre``. This causes the content server to serve all URLs prefixed by calibre. To see this in action, visit ``http://localhost:8080/calibre`` in your browser. You should see the normal content server website, but now it will run under /calibre. + Now suppose you are using Apache as your main server. First enable the proxy modules in apache, by adding the following to :file:`httpd.conf`:: LoadModule proxy_module modules/mod_proxy.so From cf675d79d862b26928083f6dd622064baddc7692 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 22:42:47 -0600 Subject: [PATCH 13/15] ... --- src/calibre/ebooks/metadata/sources/isbndb.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index 361554ad9c..18d797ba71 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -169,6 +169,11 @@ class ISBNDB(Source): authors.append(au) if not authors: continue + comments = tostring(bd.find('Summary')) + if not comments: + # Require comments, since without them the result is useless + # anyway + continue id_ = (title, tuple(authors)) if id_ in seen: continue @@ -177,8 +182,6 @@ class ISBNDB(Source): continue publisher = tostring(bd.find('PublisherText')) if not publisher: publisher = None - comments = tostring(bd.find('Summary')) - if not comments: comments = None mi = Metadata(title, authors) mi.isbn = isbn mi.publisher = publisher @@ -213,7 +216,8 @@ class ISBNDB(Source): # }}} if __name__ == '__main__': + from threading import Event s = ISBNDB(None) t, a = 'great gatsby', ['fitzgerald'] q = s.create_query(title=t, authors=a) - s.make_query(q, title=t, authors=a) + s.make_query(q, Event(), title=t, authors=a) From ec583f232d0611f9ba48e7f3e4f61f71717390e5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 23:01:55 -0600 Subject: [PATCH 14/15] On second thoughts enable the ISBNDB plugin by default --- src/calibre/customize/builtins.py | 3 +- src/calibre/ebooks/metadata/sources/isbndb.py | 43 +++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 8f6c597ee5..d5957eb70a 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -625,8 +625,9 @@ if test_eight_code: from calibre.ebooks.metadata.sources.google import GoogleBooks from calibre.ebooks.metadata.sources.amazon import Amazon from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary + from calibre.ebooks.metadata.sources.isbndb import ISBNDB - plugins += [GoogleBooks, Amazon, OpenLibrary] + plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB] # }}} else: diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index 18d797ba71..a2a10708fb 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -123,22 +123,21 @@ class ISBNDB(Source): return etree.tostring(x, method='text', encoding=unicode).strip() orig_isbn = identifiers.get('isbn', None) - title_tokens = self.get_title_tokens(orig_title) - author_tokens = self.get_author_tokens(orig_authors) + title_tokens = list(self.get_title_tokens(orig_title)) + author_tokens = list(self.get_author_tokens(orig_authors)) results = [] def ismatch(title, authors): authors = lower(' '.join(authors)) title = lower(title) - match = False + match = not title_tokens for t in title_tokens: if lower(t) in title: match = True break - if not title_tokens: match = True - amatch = False + amatch = not author_tokens for a in author_tokens: - if a in authors: + if lower(a) in authors: amatch = True break if not author_tokens: amatch = True @@ -182,6 +181,8 @@ class ISBNDB(Source): continue publisher = tostring(bd.find('PublisherText')) if not publisher: publisher = None + if publisher and 'audio' in publisher.lower(): + continue mi = Metadata(title, authors) mi.isbn = isbn mi.publisher = publisher @@ -208,16 +209,32 @@ class ISBNDB(Source): total, found, results = self.parse_feed( feed, seen, title, authors, identifiers) total_found += found - if results or total_found >= total: - candidates += results + candidates += results + if total_found >= total or len(candidates) > 9: break return candidates # }}} if __name__ == '__main__': - from threading import Event - s = ISBNDB(None) - t, a = 'great gatsby', ['fitzgerald'] - q = s.create_query(title=t, authors=a) - s.make_query(q, Event(), title=t, authors=a) + # To run these test use: + # calibre-debug -e src/calibre/ebooks/metadata/sources/isbndb.py + from calibre.ebooks.metadata.sources.test import (test_identify_plugin, + title_test, authors_test) + test_identify_plugin(ISBNDB.name, + [ + + + ( + {'title':'Great Gatsby', + 'authors':['Fitzgerald']}, + [title_test('The great gatsby', exact=True), + authors_test(['F. Scott Fitzgerald'])] + ), + + ( + {'title': 'Flatland', 'authors':['Abbott']}, + [title_test('Flatland', exact=False)] + ), + ]) + From 9bd44ed078134b034da2d82cbe52dbbd7e04622f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Apr 2011 23:24:55 -0600 Subject: [PATCH 15/15] ... --- src/calibre/ebooks/metadata/sources/base.py | 4 ++++ src/calibre/ebooks/metadata/sources/isbndb.py | 6 ++++++ src/calibre/gui2/metadata/config.py | 9 +++++++-- src/calibre/gui2/preferences/metadata_sources.py | 9 ++++++++- src/calibre/gui2/preferences/metadata_sources.ui | 10 ++++++++++ 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 5089d8951b..d9144fdf34 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -181,6 +181,10 @@ class Source(Plugin): #: construct the configuration widget for this plugin options = () + #: A string that is displayed at the top of the config widget for this + #: plugin + config_help_message = None + def __init__(self, *args, **kwargs): Plugin.__init__(self, *args, **kwargs) diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index a2a10708fb..b8deea56df 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -39,6 +39,12 @@ class ISBNDB(Source): 'at isbndb.com and get an access key.')), ) + config_help_message = '

'+_('To use metadata from isbndb.com you must sign' + ' up for a free account and get an isbndb key and enter it below.' + ' Instructions to get the key are ' + 'here.') + + def __init__(self, *args, **kwargs): Source.__init__(self, *args, **kwargs) diff --git a/src/calibre/gui2/metadata/config.py b/src/calibre/gui2/metadata/config.py index 68c935061d..abb45faa46 100644 --- a/src/calibre/gui2/metadata/config.py +++ b/src/calibre/gui2/metadata/config.py @@ -56,7 +56,12 @@ class ConfigWidget(QWidget): self.setLayout(l) self.gb = QGroupBox(_('Downloaded metadata fields'), self) - l.addWidget(self.gb, 0, 0, 1, 2) + if plugin.config_help_message: + self.pchm = QLabel(plugin.config_help_message) + self.pchm.setWordWrap(True) + self.pchm.setOpenExternalLinks(True) + l.addWidget(self.pchm, 0, 0, 1, 2) + l.addWidget(self.gb, l.rowCount(), 0, 1, 2) self.gb.l = QGridLayout() self.gb.setLayout(self.gb.l) self.fields_view = v = QListView(self) @@ -81,7 +86,7 @@ class ConfigWidget(QWidget): widget.setValue(val) elif opt.type == 'string': widget = QLineEdit(self) - widget.setText(val) + widget.setText(val if val else '') elif opt.type == 'bool': widget = QCheckBox(opt.label, self) widget.setChecked(bool(val)) diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index 4500a03b30..17a70bcc33 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en' from operator import attrgetter from PyQt4.Qt import (QAbstractTableModel, Qt, QAbstractListModel, QWidget, - pyqtSignal, QVBoxLayout, QDialogButtonBox, QFrame, QLabel) + pyqtSignal, QVBoxLayout, QDialogButtonBox, QFrame, QLabel, QIcon) from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences.metadata_sources_ui import Ui_Form @@ -67,6 +67,13 @@ class SourcesModel(QAbstractTableModel): # {{{ return self.enabled_overrides.get(plugin, orig) elif role == Qt.UserRole: return plugin + elif (role == Qt.DecorationRole and col == 0 and not + plugin.is_configured()): + return QIcon(I('list_remove.png')) + elif role == Qt.ToolTipRole: + if plugin.is_configured(): + return _('This source is configured and ready to go') + return _('This source needs configuration') return NONE def setData(self, index, val, role): diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui index 546120f628..b515f13ba1 100644 --- a/src/calibre/gui2/preferences/metadata_sources.ui +++ b/src/calibre/gui2/preferences/metadata_sources.ui @@ -48,6 +48,16 @@ + + + + Sources with a red X next to their names must be configured before they will be used. + + + true + + +