From 24e5133c3b4346db346213655898e5a69827fa0f Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 16 Dec 2008 18:55:44 -0500 Subject: [PATCH] Handle recoverable OPF errors more gracefully, with logging. Change LIT-writing logging to share logger with OEB-processing. --- src/calibre/ebooks/lit/oeb.py | 43 ++++++++++++++++++++++++++------ src/calibre/ebooks/lit/writer.py | 36 +++++++++++++++----------- 2 files changed, 57 insertions(+), 22 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index 6378c99219..339783f350 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -13,7 +13,9 @@ from types import StringTypes from itertools import izip, count from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote +import logging from lxml import etree +from calibre import LoggingInterface XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False) XML_NS = 'http://www.w3.org/XML/1998/namespace' @@ -82,6 +84,13 @@ def urlnormalize(href): return urlunparse(parts) +class FauxLogger(object): + def __getattr__(self, name): + return self + def __call__(self, message): + print message + + class AbstractContainer(object): def read_xml(self, path): return etree.fromstring( @@ -102,6 +111,10 @@ class DirContainer(AbstractContainer): with open(urlunquote(path), 'wb') as f: return f.write(data) + def exists(self, path): + path = os.path.join(self.rootdir, path) + return os.path.isfile(path) + class Metadata(object): TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description', @@ -287,7 +300,7 @@ class Manifest(object): yield id, items def __contains__(self, key): - return id in self.items + return key in self.items def to_opf1(self, parent=None): elem = element(parent, 'manifest') @@ -473,13 +486,14 @@ class TOC(object): node.to_ncx(point, playorder, depth+1) return parent - + class OEBBook(object): - def __init__(self, opfpath, container=None): + def __init__(self, opfpath, container=None, logger=FauxLogger()): if not container: container = DirContainer(os.path.dirname(opfpath)) opfpath = os.path.basename(opfpath) self.container = container + self.logger = logger opf = self._read_opf(opfpath) self._all_from_opf(opf) @@ -533,17 +547,28 @@ class OEBBook(object): if item.id == uid: self.uid = item break + else: + self.logger.log_warn(u'Unique-identifier %r not found.' % uid) + self.uid = metadata.identifier[0] def _manifest_from_opf(self, opf): self.manifest = manifest = Manifest(self) for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): - manifest.add(elem.get('id'), elem.get('href'), - elem.get('media-type'), elem.get('fallback')) + href = elem.get('href') + if not self.container.exists(href): + self.logger.log_warn(u'Manifest item %r not found.' % href) + continue + manifest.add(elem.get('id'), href, elem.get('media-type'), + elem.get('fallback')) def _spine_from_opf(self, opf): self.spine = spine = Spine(self) for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): - item = self.manifest[elem.get('idref')] + idref = elem.get('idref') + if idref not in self.manifest: + self.logger.log_warn(u'Spine item %r not found.' % idref) + continue + item = self.manifest[idref] spine.add(item, elem.get('linear')) extras = [] for item in self.manifest.values(): @@ -557,7 +582,11 @@ class OEBBook(object): def _guide_from_opf(self, opf): self.guide = guide = Guide(self) for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): - guide.add(elem.get('type'), elem.get('title'), elem.get('href')) + href = elem.get('href') + if href not in self.manifest.hrefs: + self.logger.log_warn(u'Guide reference %r not found' % href) + continue + guide.add(elem.get('type'), elem.get('title'), href) def _toc_from_navpoint(self, toc, navpoint): children = xpath(navpoint, 'ncx:navPoint') diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 12e0d8b718..4d8a76fff2 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -26,10 +26,11 @@ import calibre.ebooks.lit.maps as maps from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ CSS_MIME, OPF_MIME, XML_NS, XML from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath -from calibre.ebooks.lit.oeb import OEBBook +from calibre.ebooks.lit.oeb import FauxLogger, OEBBook from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.lzx import Compressor import calibre +from calibre import LoggingInterface from calibre import plugins msdes, msdeserror = plugins['msdes'] import calibre.ebooks.lit.mssha1 as mssha1 @@ -141,9 +142,9 @@ def warn(x): class ReBinary(object): NSRMAP = {'': None, XML_NS: 'xml'} - def __init__(self, root, path, oeb, map=HTML_MAP, warn=warn): + def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()): self.path = path - self.log_warn = warn + self.logger = logger self.dir = os.path.dirname(path) self.manifest = oeb.manifest self.tags, self.tattrs = map @@ -272,7 +273,7 @@ class ReBinary(object): def build_ahc(self): if len(self.anchors) > 6: - self.log_warn("More than six anchors in file %r. " \ + self.logger.log_warn("More than six anchors in file %r. " \ "Some links may not work properly." % self.path) data = StringIO() data.write(unichr(len(self.anchors)).encode('utf-8')) @@ -296,11 +297,10 @@ def preserve(function): functools.update_wrapper(wrapper, function) return wrapper -class LitWriter(object, calibre.LoggingInterface): - def __init__(self, oeb, verbose=0): - calibre.LoggingInterface.__init__(self, logging.getLogger('oeb2lit')) - self.setup_cli_handler(verbose) +class LitWriter(object): + def __init__(self, oeb, logger=FauxLogger()): self._oeb = oeb + self._logger = logger self._litize_oeb() def _litize_oeb(self): @@ -325,7 +325,7 @@ class LitWriter(object, calibre.LoggingInterface): if type not in oeb.guide: oeb.guide.add(type, title, cover.href) else: - self.log_warn('No suitable cover image found.') + self._logger.log_warn('No suitable cover image found.') def dump(self, stream): self._stream = stream @@ -467,7 +467,7 @@ class LitWriter(object, calibre.LoggingInterface): self._add_folder('/data') for item in self._oeb.manifest.values(): if item.media_type not in LIT_MIMES: - self.log_warn("File %r of unknown media-type %r " \ + self._logger.log_warn("File %r of unknown media-type %r " \ "excluded from output." % (item.href, item.media_type)) continue name = '/data/' + item.id @@ -475,7 +475,8 @@ class LitWriter(object, calibre.LoggingInterface): secnum = 0 if not isinstance(data, basestring): self._add_folder(name) - rebin = ReBinary(data, item.href, self._oeb, warn=self.log_warn) + rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP, + logger=self._logger) self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/aht', rebin.aht, 0) item.page_breaks = rebin.page_breaks @@ -554,7 +555,8 @@ class LitWriter(object, calibre.LoggingInterface): meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--attr5'] = '1' meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() - rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, warn=self.log_warn) + rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, + logger=self._logger) meta = rebin.content self._meta = meta self._add_file('/meta', meta) @@ -713,19 +715,23 @@ def option_parser(): parser.add_option( '-o', '--output', default=None, help=_('Output file. Default is derived from input filename.')) + parser.add_option( + '--verbose', default=False, action='store_true', + help=_('Useful for debugging.')) return parser def oeb2lit(opts, opfpath): + logger = LoggingInterface(logging.getLogger('oeb2lit')) + logger.setup_cli_handler(opts.verbose) litpath = opts.output if litpath is None: litpath = os.path.basename(opfpath) litpath = os.path.splitext(litpath)[0] + '.lit' litpath = os.path.abspath(litpath) - lit = LitWriter(OEBBook(opfpath), opts.verbose) + lit = LitWriter(OEBBook(opfpath)) with open(litpath, 'wb') as f: lit.dump(f) - logger = logging.getLogger('oeb2lit') - logger.info(_('Output written to ')+litpath) + logger.log_info(_('Output written to ')+litpath) def main(argv=sys.argv):