From 1663af9bd0faf5c71f04aef0d0caae907a0ff6ca Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Feb 2013 16:25:31 +0530 Subject: [PATCH] Update metadata when polishing --- src/calibre/ebooks/metadata/epub.py | 49 ++++++++++++---------- src/calibre/ebooks/oeb/polish/container.py | 5 +++ src/calibre/ebooks/oeb/polish/main.py | 44 ++++++++++++++----- 3 files changed, 65 insertions(+), 33 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index e216610ad5..177158b48c 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -249,6 +249,30 @@ def _write_new_cover(new_cdata, cpath): save_cover_data_to(new_cdata, new_cover.name) return new_cover +def update_metadata(opf, mi, apply_null=False, update_timestamp=False): + for x in ('guide', 'toc', 'manifest', 'spine'): + setattr(mi, x, None) + if mi.languages: + langs = [] + for lc in mi.languages: + lc2 = lang_as_iso639_1(lc) + if lc2: lc = lc2 + langs.append(lc) + mi.languages = langs + + opf.smart_update(mi) + if getattr(mi, 'uuid', None): + opf.application_id = mi.uuid + if apply_null: + if not getattr(mi, 'series', None): + opf.series = None + if not getattr(mi, 'tags', []): + opf.tags = [] + if not getattr(mi, 'isbn', None): + opf.isbn = None + if update_timestamp and mi.timestamp is not None: + opf.timestamp = mi.timestamp + def set_metadata(stream, mi, apply_null=False, update_timestamp=False): stream.seek(0) reader = get_zip_reader(stream, root=os.getcwdu()) @@ -279,29 +303,8 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False): import traceback traceback.print_exc() - for x in ('guide', 'toc', 'manifest', 'spine'): - setattr(mi, x, None) - if mi.languages: - langs = [] - for lc in mi.languages: - lc2 = lang_as_iso639_1(lc) - if lc2: lc = lc2 - langs.append(lc) - mi.languages = langs - - - reader.opf.smart_update(mi) - if getattr(mi, 'uuid', None): - reader.opf.application_id = mi.uuid - if apply_null: - if not getattr(mi, 'series', None): - reader.opf.series = None - if not getattr(mi, 'tags', []): - reader.opf.tags = [] - if not getattr(mi, 'isbn', None): - reader.opf.isbn = None - if update_timestamp and mi.timestamp is not None: - reader.opf.timestamp = mi.timestamp + update_metadata(reader.opf, mi, apply_null=apply_null, + update_timestamp=update_timestamp) newopf = StringIO(reader.opf.render()) if isinstance(reader.archive, LocalZipFile): diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 08fd53158f..7b1b32ddf7 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -371,8 +371,13 @@ class Container(object): f.write(data) def open(self, name, mode='rb'): + ''' Open the file pointed to by name for direct read/write. Note that + this will commit the file if it is dirtied and remove it from the parse + cache. You must finish with this file before accessing the parsed + version of it again, or bad things will happen. ''' if name in self.dirtied: self.commit_item(name) + self.parsed_cache.pop(name, False) path = self.name_to_abspath(name) base = os.path.dirname(path) if not os.path.exists(base): diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 3e17f0ebe5..b789c18198 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, sys +import re, sys, os, time from collections import namedtuple from functools import partial @@ -72,8 +72,25 @@ def hfix(name, raw): CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in HELP.iteritems()} # }}} +def update_metadata(ebook, new_opf): + from calibre.ebooks.metadata.opf2 import OPF + from calibre.ebooks.metadata.epub import update_metadata + opfpath = ebook.name_to_abspath(ebook.opf_name) + with ebook.open(ebook.opf_name, 'r+b') as stream, open(new_opf, 'rb') as ns: + opf = OPF(stream, basedir=os.path.dirname(opfpath), populate_spine=False, + unquote_urls=False) + mi = OPF(ns, unquote_urls=False, + populate_spine=False).to_book_metadata() + mi.cover, mi.cover_data = None, (None, None) + + update_metadata(opf, mi, apply_null=True, update_timestamp=True) + stream.seek(0) + stream.truncate() + stream.write(opf.render()) + def polish(file_map, opts, log, report): rt = lambda x: report('\n### ' + x) + st = time.time() for inbook, outbook in file_map.iteritems(): report('Polishing: %s'%(inbook.rpartition('.')[-1].upper())) ebook = get_container(inbook, log) @@ -81,6 +98,11 @@ def polish(file_map, opts, log, report): if opts.subset: stats = StatsCollector(ebook) + if opts.opf: + rt('Updating metadata') + update_metadata(ebook, opts.opf) + report('Metadata updated\n') + if opts.subset: rt('Subsetting embedded fonts') subset_all_fonts(ebook, stats.font_stats, report) @@ -92,6 +114,9 @@ def polish(file_map, opts, log, report): report('') ebook.commit(outbook) + report('Polishing took: %.1f seconds'%(time.time()-st)) + +REPORT = '{0} REPORT {0}'.format('-'*30) def gui_polish(data): files = data.pop('files') @@ -106,7 +131,8 @@ def gui_polish(data): log = Log(level=Log.DEBUG) report = [] polish(file_map, opts, log, report.append) - log('\n', '-'*30, ' REPORT ', '-'*30) + log('') + log(REPORT) for msg in report: log(msg) @@ -121,6 +147,9 @@ def option_parser(): a('--cover', '-c', help=_( 'Path to a cover image. Changes the cover specified in the ebook. ' 'If no cover is present, or the cover is not properly identified, inserts a new cover.')) + a('--opf', '-o', help=_( + 'Path to an OPF file. The metadata in the book is updated from the OPF file.')) + o('--verbose', help=_('Produce more verbose output, useful for debugging.')) return parser @@ -151,19 +180,14 @@ def main(args=None): O = namedtuple('Options', ' '.join(popts.iterkeys())) popts = O(**popts) report = [] - something = False - for name in ALL_OPTS: - if name not in {'opf', }: - if getattr(popts, name): - something = True - - if not something: + if not tuple(filter(None, (getattr(popts, name) for name in ALL_OPTS))): parser.print_help() log.error(_('You must specify at least one action to perform')) raise SystemExit(1) polish({inbook:outbook}, popts, log, report.append) - log('\n', '-'*30, ' REPORT ', '-'*30) + log('') + log(REPORT) for msg in report: log(msg)