From 89a025ed76caad6767856109125e5c7c8f3126d0 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 12 Dec 2008 09:07:30 -0500 Subject: [PATCH] Changes to LIT-writing support: - Locate and associate cover images in metadata. - Clean up warnings and language. - Shift to modifying OEBBook object instead of generated OPF. --- src/calibre/ebooks/lit/oeb.py | 9 +++--- src/calibre/ebooks/lit/writer.py | 53 ++++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index 2a553006ab..39b0c286e5 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -113,15 +113,14 @@ class Metadata(object): class Item(object): def __init__(self, term, value, fq_attrib={}): + self.fq_attrib = dict(fq_attrib) if term == OPF('meta') and not value: - fq_attrib = dict(fq_attrib) - term = fq_attrib.pop('name') - value = fq_attrib.pop('content') + term = self.fq_attrib.pop('name') + value = self.fq_attrib.pop('content') elif term in Metadata.TERMS and not namespace(term): term = DC(term) self.term = term self.value = value - self.fq_attrib = dict(fq_attrib) self.attrib = attrib = {} for fq_attr in fq_attrib: attr = barename(fq_attr) @@ -171,7 +170,7 @@ class Metadata(object): self.oeb = oeb self.items = defaultdict(list) - def add(self, term, value, attrib): + def add(self, term, value, attrib={}): item = self.Item(term, value, attrib) items = self.items[barename(term)] items.append(item) diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 2777fceba1..75e6c68adc 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -20,11 +20,12 @@ import functools from urlparse import urldefrag from urllib import unquote as urlunquote from lxml import etree +from calibre.ebooks.lit import LitError from calibre.ebooks.lit.reader import msguid, DirectoryEntry import calibre.ebooks.lit.maps as maps from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML -from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize +from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath from calibre.ebooks.lit.oeb import OEBBook from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.lzx import Compressor @@ -38,6 +39,14 @@ __all__ = ['LitWriter'] LIT_IMAGES = set(['image/png', 'image/jpeg', 'image/gif']) LIT_MIMES = OEB_DOCS | OEB_STYLES | LIT_IMAGES +MS_COVER_TYPE = 'other.ms-coverimage-standard' +ALL_MS_COVER_TYPES = [ + (MS_COVER_TYPE, 'Standard cover image'), + ('other.ms-thumbimage-standard', 'Standard thumbnail image'), + ('other.ms-coverimage', 'PocketPC cover image'), + ('other.ms-thumbimage', 'PocketPC thumbnail image'), + ] + def invert_tag_map(tag_map): tags, dattrs, tattrs = tag_map tags = dict((tags[i], i) for i in xrange(len(tags))) @@ -130,6 +139,7 @@ class ReBinary(object): NSRMAP = {'': None, XML_NS: 'xml'} def __init__(self, root, path, oeb, map=HTML_MAP): + self.path = path self.dir = os.path.dirname(path) self.manifest = oeb.manifest self.tags, self.tattrs = map @@ -140,8 +150,8 @@ class ReBinary(object): self.stylizer = Stylizer(root, path, oeb) if is_html else None self.tree_to_binary(root) self.content = self.buf.getvalue() - self.ahc = self.build_ahc() - self.aht = self.build_aht() + self.ahc = self.build_ahc() if is_html else None + self.aht = self.build_aht() if is_html else None def write(self, *values): for value in values: @@ -257,6 +267,9 @@ class ReBinary(object): self.page_breaks.append((self.buf.tell(), list(parents))) def build_ahc(self): + if len(self.anchors) > 6: + print "calibre: warning: More than six anchors in file %r. " \ + "Some links may not work properly." % self.path data = StringIO() data.write(unichr(len(self.anchors)).encode('utf-8')) for anchor, offset in self.anchors: @@ -282,6 +295,31 @@ def preserve(function): class LitWriter(object): def __init__(self, oeb): self._oeb = oeb + self._litize_oeb() + + def _litize_oeb(self): + oeb = self._oeb + oeb.metadata.add('calibre-oeb2lit-version', calibre.__version__) + cover = None + if oeb.metadata.cover: + id = str(oeb.metadata.cover[0]) + cover = oeb.manifest[id] + elif MS_COVER_TYPE in oeb.guide: + href = oeb.guide[MS_COVER_TYPE].href + cover = oeb.manifest.hrefs[href] + else: + html = oeb.spine[0].data + imgs = xpath(html, '//img[position()=1]') + href = imgs[0].get('src') if imgs else None + cover = oeb.manifest.hrefs[href] if href else None + if cover: + if not oeb.metadata.cover: + oeb.metadata.add('cover', cover.id) + for type, title in ALL_MS_COVER_TYPES: + if type not in oeb.guide: + oeb.guide.add(type, title, cover.href) + else: + print "calibre: warning: No suitable cover image found." def dump(self, stream): self._stream = stream @@ -423,7 +461,8 @@ class LitWriter(object): self._add_folder('/data') for item in self._oeb.manifest.values(): if item.media_type not in LIT_MIMES: - print "WARNING: excluding item %r" % item.href + print "calibre: warning: File %r of unknown media-type %r " \ + "excluded from output." % (item.href, item.media_type) continue name = '/data/' + item.id data = item.data @@ -506,10 +545,6 @@ class LitWriter(object): def _build_meta(self): _, meta = self._oeb.to_opf1()[OPF_MIME] - xmetadata, = meta.xpath('/package/metadata/x-metadata') - etree.SubElement(xmetadata, 'meta', attrib={ - 'name': 'calibre-oeb2lit-version', - 'content': calibre.__version__}) meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--attr5'] = '1' meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() @@ -519,7 +554,7 @@ class LitWriter(object): self._add_file('/meta', meta) def _build_drm_storage(self): - drmsource = u'Fuck Microsoft\0'.encode('utf-16-le') + drmsource = u'Free as in freedom\0'.encode('utf-16-le') self._add_file('/DRMStorage/DRMSource', drmsource) tempkey = self._calculate_deskey([self._meta, drmsource]) msdes.deskey(tempkey, msdes.EN0)