This commit is contained in:
Kovid Goyal 2008-12-16 16:32:30 -08:00
commit 839d4baf4c
2 changed files with 57 additions and 22 deletions

View File

@ -13,7 +13,9 @@ from types import StringTypes
from itertools import izip, count
from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote
import logging
from lxml import etree
from calibre import LoggingInterface
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
XML_NS = 'http://www.w3.org/XML/1998/namespace'
@ -82,6 +84,13 @@ def urlnormalize(href):
return urlunparse(parts)
class FauxLogger(object):
def __getattr__(self, name):
return self
def __call__(self, message):
print message
class AbstractContainer(object):
def read_xml(self, path):
return etree.fromstring(
@ -102,6 +111,10 @@ class DirContainer(AbstractContainer):
with open(urlunquote(path), 'wb') as f:
return f.write(data)
def exists(self, path):
path = os.path.join(self.rootdir, path)
return os.path.isfile(path)
class Metadata(object):
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
@ -287,7 +300,7 @@ class Manifest(object):
yield id, items
def __contains__(self, key):
return id in self.items
return key in self.items
def to_opf1(self, parent=None):
elem = element(parent, 'manifest')
@ -473,13 +486,14 @@ class TOC(object):
node.to_ncx(point, playorder, depth+1)
return parent
class OEBBook(object):
def __init__(self, opfpath, container=None):
def __init__(self, opfpath, container=None, logger=FauxLogger()):
if not container:
container = DirContainer(os.path.dirname(opfpath))
opfpath = os.path.basename(opfpath)
self.container = container
self.logger = logger
opf = self._read_opf(opfpath)
self._all_from_opf(opf)
@ -533,17 +547,28 @@ class OEBBook(object):
if item.id == uid:
self.uid = item
break
else:
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
self.uid = metadata.identifier[0]
def _manifest_from_opf(self, opf):
self.manifest = manifest = Manifest(self)
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
manifest.add(elem.get('id'), elem.get('href'),
elem.get('media-type'), elem.get('fallback'))
href = elem.get('href')
if not self.container.exists(href):
self.logger.log_warn(u'Manifest item %r not found.' % href)
continue
manifest.add(elem.get('id'), href, elem.get('media-type'),
elem.get('fallback'))
def _spine_from_opf(self, opf):
self.spine = spine = Spine(self)
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
item = self.manifest[elem.get('idref')]
idref = elem.get('idref')
if idref not in self.manifest:
self.logger.log_warn(u'Spine item %r not found.' % idref)
continue
item = self.manifest[idref]
spine.add(item, elem.get('linear'))
extras = []
for item in self.manifest.values():
@ -557,7 +582,11 @@ class OEBBook(object):
def _guide_from_opf(self, opf):
self.guide = guide = Guide(self)
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
guide.add(elem.get('type'), elem.get('title'), elem.get('href'))
href = elem.get('href')
if href not in self.manifest.hrefs:
self.logger.log_warn(u'Guide reference %r not found' % href)
continue
guide.add(elem.get('type'), elem.get('title'), href)
def _toc_from_navpoint(self, toc, navpoint):
children = xpath(navpoint, 'ncx:navPoint')

View File

@ -26,10 +26,11 @@ import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
from calibre.ebooks.lit.oeb import OEBBook
from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
from calibre.ebooks.lit.stylizer import Stylizer
from calibre.ebooks.lit.lzx import Compressor
import calibre
from calibre import LoggingInterface
from calibre import plugins
msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1
@ -141,9 +142,9 @@ def warn(x):
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, warn=warn):
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
self.path = path
self.log_warn = warn
self.logger = logger
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest
self.tags, self.tattrs = map
@ -272,7 +273,7 @@ class ReBinary(object):
def build_ahc(self):
if len(self.anchors) > 6:
self.log_warn("More than six anchors in file %r. " \
self.logger.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path)
data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8'))
@ -296,11 +297,10 @@ def preserve(function):
functools.update_wrapper(wrapper, function)
return wrapper
class LitWriter(object, calibre.LoggingInterface):
def __init__(self, oeb, verbose=0):
calibre.LoggingInterface.__init__(self, logging.getLogger('oeb2lit'))
self.setup_cli_handler(verbose)
class LitWriter(object):
def __init__(self, oeb, logger=FauxLogger()):
self._oeb = oeb
self._logger = logger
self._litize_oeb()
def _litize_oeb(self):
@ -325,7 +325,7 @@ class LitWriter(object, calibre.LoggingInterface):
if type not in oeb.guide:
oeb.guide.add(type, title, cover.href)
else:
self.log_warn('No suitable cover image found.')
self._logger.log_warn('No suitable cover image found.')
def dump(self, stream):
self._stream = stream
@ -467,7 +467,7 @@ class LitWriter(object, calibre.LoggingInterface):
self._add_folder('/data')
for item in self._oeb.manifest.values():
if item.media_type not in LIT_MIMES:
self.log_warn("File %r of unknown media-type %r " \
self._logger.log_warn("File %r of unknown media-type %r " \
"excluded from output." % (item.href, item.media_type))
continue
name = '/data/' + item.id
@ -475,7 +475,8 @@ class LitWriter(object, calibre.LoggingInterface):
secnum = 0
if not isinstance(data, basestring):
self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, warn=self.log_warn)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
logger=self._logger)
self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks
@ -554,7 +555,8 @@ class LitWriter(object, calibre.LoggingInterface):
meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, warn=self.log_warn)
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
logger=self._logger)
meta = rebin.content
self._meta = meta
self._add_file('/meta', meta)
@ -713,19 +715,23 @@ def option_parser():
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
return parser
def oeb2lit(opts, opfpath):
logger = LoggingInterface(logging.getLogger('oeb2lit'))
logger.setup_cli_handler(opts.verbose)
litpath = opts.output
if litpath is None:
litpath = os.path.basename(opfpath)
litpath = os.path.splitext(litpath)[0] + '.lit'
litpath = os.path.abspath(litpath)
lit = LitWriter(OEBBook(opfpath), opts.verbose)
lit = LitWriter(OEBBook(opfpath))
with open(litpath, 'wb') as f:
lit.dump(f)
logger = logging.getLogger('oeb2lit')
logger.info(_('Output written to ')+litpath)
logger.log_info(_('Output written to ')+litpath)
def main(argv=sys.argv):