This commit is contained in:
Kovid Goyal 2008-12-16 16:32:30 -08:00
commit 839d4baf4c
2 changed files with 57 additions and 22 deletions

View File

@ -13,7 +13,9 @@ from types import StringTypes
from itertools import izip, count from itertools import izip, count
from urlparse import urldefrag, urlparse, urlunparse from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
import logging
from lxml import etree from lxml import etree
from calibre import LoggingInterface
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False) XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
XML_NS = 'http://www.w3.org/XML/1998/namespace' XML_NS = 'http://www.w3.org/XML/1998/namespace'
@ -82,6 +84,13 @@ def urlnormalize(href):
return urlunparse(parts) return urlunparse(parts)
class FauxLogger(object):
def __getattr__(self, name):
return self
def __call__(self, message):
print message
class AbstractContainer(object): class AbstractContainer(object):
def read_xml(self, path): def read_xml(self, path):
return etree.fromstring( return etree.fromstring(
@ -102,6 +111,10 @@ class DirContainer(AbstractContainer):
with open(urlunquote(path), 'wb') as f: with open(urlunquote(path), 'wb') as f:
return f.write(data) return f.write(data)
def exists(self, path):
path = os.path.join(self.rootdir, path)
return os.path.isfile(path)
class Metadata(object): class Metadata(object):
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description', TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
@ -287,7 +300,7 @@ class Manifest(object):
yield id, items yield id, items
def __contains__(self, key): def __contains__(self, key):
return id in self.items return key in self.items
def to_opf1(self, parent=None): def to_opf1(self, parent=None):
elem = element(parent, 'manifest') elem = element(parent, 'manifest')
@ -475,11 +488,12 @@ class TOC(object):
class OEBBook(object): class OEBBook(object):
def __init__(self, opfpath, container=None): def __init__(self, opfpath, container=None, logger=FauxLogger()):
if not container: if not container:
container = DirContainer(os.path.dirname(opfpath)) container = DirContainer(os.path.dirname(opfpath))
opfpath = os.path.basename(opfpath) opfpath = os.path.basename(opfpath)
self.container = container self.container = container
self.logger = logger
opf = self._read_opf(opfpath) opf = self._read_opf(opfpath)
self._all_from_opf(opf) self._all_from_opf(opf)
@ -533,17 +547,28 @@ class OEBBook(object):
if item.id == uid: if item.id == uid:
self.uid = item self.uid = item
break break
else:
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
self.uid = metadata.identifier[0]
def _manifest_from_opf(self, opf): def _manifest_from_opf(self, opf):
self.manifest = manifest = Manifest(self) self.manifest = manifest = Manifest(self)
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
manifest.add(elem.get('id'), elem.get('href'), href = elem.get('href')
elem.get('media-type'), elem.get('fallback')) if not self.container.exists(href):
self.logger.log_warn(u'Manifest item %r not found.' % href)
continue
manifest.add(elem.get('id'), href, elem.get('media-type'),
elem.get('fallback'))
def _spine_from_opf(self, opf): def _spine_from_opf(self, opf):
self.spine = spine = Spine(self) self.spine = spine = Spine(self)
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
item = self.manifest[elem.get('idref')] idref = elem.get('idref')
if idref not in self.manifest:
self.logger.log_warn(u'Spine item %r not found.' % idref)
continue
item = self.manifest[idref]
spine.add(item, elem.get('linear')) spine.add(item, elem.get('linear'))
extras = [] extras = []
for item in self.manifest.values(): for item in self.manifest.values():
@ -557,7 +582,11 @@ class OEBBook(object):
def _guide_from_opf(self, opf): def _guide_from_opf(self, opf):
self.guide = guide = Guide(self) self.guide = guide = Guide(self)
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
guide.add(elem.get('type'), elem.get('title'), elem.get('href')) href = elem.get('href')
if href not in self.manifest.hrefs:
self.logger.log_warn(u'Guide reference %r not found' % href)
continue
guide.add(elem.get('type'), elem.get('title'), href)
def _toc_from_navpoint(self, toc, navpoint): def _toc_from_navpoint(self, toc, navpoint):
children = xpath(navpoint, 'ncx:navPoint') children = xpath(navpoint, 'ncx:navPoint')

View File

@ -26,10 +26,11 @@ import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, OPF_MIME, XML_NS, XML CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
from calibre.ebooks.lit.oeb import OEBBook from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.stylizer import Stylizer
from calibre.ebooks.lit.lzx import Compressor from calibre.ebooks.lit.lzx import Compressor
import calibre import calibre
from calibre import LoggingInterface
from calibre import plugins from calibre import plugins
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
@ -141,9 +142,9 @@ def warn(x):
class ReBinary(object): class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'} NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, warn=warn): def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
self.path = path self.path = path
self.log_warn = warn self.logger = logger
self.dir = os.path.dirname(path) self.dir = os.path.dirname(path)
self.manifest = oeb.manifest self.manifest = oeb.manifest
self.tags, self.tattrs = map self.tags, self.tattrs = map
@ -272,7 +273,7 @@ class ReBinary(object):
def build_ahc(self): def build_ahc(self):
if len(self.anchors) > 6: if len(self.anchors) > 6:
self.log_warn("More than six anchors in file %r. " \ self.logger.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path) "Some links may not work properly." % self.path)
data = StringIO() data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8')) data.write(unichr(len(self.anchors)).encode('utf-8'))
@ -296,11 +297,10 @@ def preserve(function):
functools.update_wrapper(wrapper, function) functools.update_wrapper(wrapper, function)
return wrapper return wrapper
class LitWriter(object, calibre.LoggingInterface): class LitWriter(object):
def __init__(self, oeb, verbose=0): def __init__(self, oeb, logger=FauxLogger()):
calibre.LoggingInterface.__init__(self, logging.getLogger('oeb2lit'))
self.setup_cli_handler(verbose)
self._oeb = oeb self._oeb = oeb
self._logger = logger
self._litize_oeb() self._litize_oeb()
def _litize_oeb(self): def _litize_oeb(self):
@ -325,7 +325,7 @@ class LitWriter(object, calibre.LoggingInterface):
if type not in oeb.guide: if type not in oeb.guide:
oeb.guide.add(type, title, cover.href) oeb.guide.add(type, title, cover.href)
else: else:
self.log_warn('No suitable cover image found.') self._logger.log_warn('No suitable cover image found.')
def dump(self, stream): def dump(self, stream):
self._stream = stream self._stream = stream
@ -467,7 +467,7 @@ class LitWriter(object, calibre.LoggingInterface):
self._add_folder('/data') self._add_folder('/data')
for item in self._oeb.manifest.values(): for item in self._oeb.manifest.values():
if item.media_type not in LIT_MIMES: if item.media_type not in LIT_MIMES:
self.log_warn("File %r of unknown media-type %r " \ self._logger.log_warn("File %r of unknown media-type %r " \
"excluded from output." % (item.href, item.media_type)) "excluded from output." % (item.href, item.media_type))
continue continue
name = '/data/' + item.id name = '/data/' + item.id
@ -475,7 +475,8 @@ class LitWriter(object, calibre.LoggingInterface):
secnum = 0 secnum = 0
if not isinstance(data, basestring): if not isinstance(data, basestring):
self._add_folder(name) self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, warn=self.log_warn) rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
logger=self._logger)
self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0) self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks item.page_breaks = rebin.page_breaks
@ -554,7 +555,8 @@ class LitWriter(object, calibre.LoggingInterface):
meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1' meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, warn=self.log_warn) rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
logger=self._logger)
meta = rebin.content meta = rebin.content
self._meta = meta self._meta = meta
self._add_file('/meta', meta) self._add_file('/meta', meta)
@ -713,19 +715,23 @@ def option_parser():
parser.add_option( parser.add_option(
'-o', '--output', default=None, '-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.')) help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
return parser return parser
def oeb2lit(opts, opfpath): def oeb2lit(opts, opfpath):
logger = LoggingInterface(logging.getLogger('oeb2lit'))
logger.setup_cli_handler(opts.verbose)
litpath = opts.output litpath = opts.output
if litpath is None: if litpath is None:
litpath = os.path.basename(opfpath) litpath = os.path.basename(opfpath)
litpath = os.path.splitext(litpath)[0] + '.lit' litpath = os.path.splitext(litpath)[0] + '.lit'
litpath = os.path.abspath(litpath) litpath = os.path.abspath(litpath)
lit = LitWriter(OEBBook(opfpath), opts.verbose) lit = LitWriter(OEBBook(opfpath))
with open(litpath, 'wb') as f: with open(litpath, 'wb') as f:
lit.dump(f) lit.dump(f)
logger = logging.getLogger('oeb2lit') logger.log_info(_('Output written to ')+litpath)
logger.info(_('Output written to ')+litpath)
def main(argv=sys.argv): def main(argv=sys.argv):