Changes to LIT-writing support:

- Locate and associate cover images in metadata.
  - Clean up warnings and language.
  - Shift to modifying OEBBook object instead of generated OPF.
This commit is contained in:
Marshall T. Vandegrift 2008-12-12 09:07:30 -05:00
parent 052657e6af
commit 89a025ed76
2 changed files with 48 additions and 14 deletions

View File

@ -113,15 +113,14 @@ class Metadata(object):
class Item(object): class Item(object):
def __init__(self, term, value, fq_attrib={}): def __init__(self, term, value, fq_attrib={}):
self.fq_attrib = dict(fq_attrib)
if term == OPF('meta') and not value: if term == OPF('meta') and not value:
fq_attrib = dict(fq_attrib) term = self.fq_attrib.pop('name')
term = fq_attrib.pop('name') value = self.fq_attrib.pop('content')
value = fq_attrib.pop('content')
elif term in Metadata.TERMS and not namespace(term): elif term in Metadata.TERMS and not namespace(term):
term = DC(term) term = DC(term)
self.term = term self.term = term
self.value = value self.value = value
self.fq_attrib = dict(fq_attrib)
self.attrib = attrib = {} self.attrib = attrib = {}
for fq_attr in fq_attrib: for fq_attr in fq_attrib:
attr = barename(fq_attr) attr = barename(fq_attr)
@ -171,7 +170,7 @@ class Metadata(object):
self.oeb = oeb self.oeb = oeb
self.items = defaultdict(list) self.items = defaultdict(list)
def add(self, term, value, attrib): def add(self, term, value, attrib={}):
item = self.Item(term, value, attrib) item = self.Item(term, value, attrib)
items = self.items[barename(term)] items = self.items[barename(term)]
items.append(item) items.append(item)

View File

@ -20,11 +20,12 @@ import functools
from urlparse import urldefrag from urlparse import urldefrag
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.reader import msguid, DirectoryEntry from calibre.ebooks.lit.reader import msguid, DirectoryEntry
import calibre.ebooks.lit.maps as maps import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
from calibre.ebooks.lit.oeb import OEBBook from calibre.ebooks.lit.oeb import OEBBook
from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.stylizer import Stylizer
from calibre.ebooks.lit.lzx import Compressor from calibre.ebooks.lit.lzx import Compressor
@ -38,6 +39,14 @@ __all__ = ['LitWriter']
LIT_IMAGES = set(['image/png', 'image/jpeg', 'image/gif']) LIT_IMAGES = set(['image/png', 'image/jpeg', 'image/gif'])
LIT_MIMES = OEB_DOCS | OEB_STYLES | LIT_IMAGES LIT_MIMES = OEB_DOCS | OEB_STYLES | LIT_IMAGES
MS_COVER_TYPE = 'other.ms-coverimage-standard'
ALL_MS_COVER_TYPES = [
(MS_COVER_TYPE, 'Standard cover image'),
('other.ms-thumbimage-standard', 'Standard thumbnail image'),
('other.ms-coverimage', 'PocketPC cover image'),
('other.ms-thumbimage', 'PocketPC thumbnail image'),
]
def invert_tag_map(tag_map): def invert_tag_map(tag_map):
tags, dattrs, tattrs = tag_map tags, dattrs, tattrs = tag_map
tags = dict((tags[i], i) for i in xrange(len(tags))) tags = dict((tags[i], i) for i in xrange(len(tags)))
@ -130,6 +139,7 @@ class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'} NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP): def __init__(self, root, path, oeb, map=HTML_MAP):
self.path = path
self.dir = os.path.dirname(path) self.dir = os.path.dirname(path)
self.manifest = oeb.manifest self.manifest = oeb.manifest
self.tags, self.tattrs = map self.tags, self.tattrs = map
@ -140,8 +150,8 @@ class ReBinary(object):
self.stylizer = Stylizer(root, path, oeb) if is_html else None self.stylizer = Stylizer(root, path, oeb) if is_html else None
self.tree_to_binary(root) self.tree_to_binary(root)
self.content = self.buf.getvalue() self.content = self.buf.getvalue()
self.ahc = self.build_ahc() self.ahc = self.build_ahc() if is_html else None
self.aht = self.build_aht() self.aht = self.build_aht() if is_html else None
def write(self, *values): def write(self, *values):
for value in values: for value in values:
@ -257,6 +267,9 @@ class ReBinary(object):
self.page_breaks.append((self.buf.tell(), list(parents))) self.page_breaks.append((self.buf.tell(), list(parents)))
def build_ahc(self): def build_ahc(self):
if len(self.anchors) > 6:
print "calibre: warning: More than six anchors in file %r. " \
"Some links may not work properly." % self.path
data = StringIO() data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8')) data.write(unichr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors: for anchor, offset in self.anchors:
@ -282,6 +295,31 @@ def preserve(function):
class LitWriter(object): class LitWriter(object):
def __init__(self, oeb): def __init__(self, oeb):
self._oeb = oeb self._oeb = oeb
self._litize_oeb()
def _litize_oeb(self):
oeb = self._oeb
oeb.metadata.add('calibre-oeb2lit-version', calibre.__version__)
cover = None
if oeb.metadata.cover:
id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id]
elif MS_COVER_TYPE in oeb.guide:
href = oeb.guide[MS_COVER_TYPE].href
cover = oeb.manifest.hrefs[href]
else:
html = oeb.spine[0].data
imgs = xpath(html, '//img[position()=1]')
href = imgs[0].get('src') if imgs else None
cover = oeb.manifest.hrefs[href] if href else None
if cover:
if not oeb.metadata.cover:
oeb.metadata.add('cover', cover.id)
for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide:
oeb.guide.add(type, title, cover.href)
else:
print "calibre: warning: No suitable cover image found."
def dump(self, stream): def dump(self, stream):
self._stream = stream self._stream = stream
@ -423,7 +461,8 @@ class LitWriter(object):
self._add_folder('/data') self._add_folder('/data')
for item in self._oeb.manifest.values(): for item in self._oeb.manifest.values():
if item.media_type not in LIT_MIMES: if item.media_type not in LIT_MIMES:
print "WARNING: excluding item %r" % item.href print "calibre: warning: File %r of unknown media-type %r " \
"excluded from output." % (item.href, item.media_type)
continue continue
name = '/data/' + item.id name = '/data/' + item.id
data = item.data data = item.data
@ -506,10 +545,6 @@ class LitWriter(object):
def _build_meta(self): def _build_meta(self):
_, meta = self._oeb.to_opf1()[OPF_MIME] _, meta = self._oeb.to_opf1()[OPF_MIME]
xmetadata, = meta.xpath('/package/metadata/x-metadata')
etree.SubElement(xmetadata, 'meta', attrib={
'name': 'calibre-oeb2lit-version',
'content': calibre.__version__})
meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1' meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
@ -519,7 +554,7 @@ class LitWriter(object):
self._add_file('/meta', meta) self._add_file('/meta', meta)
def _build_drm_storage(self): def _build_drm_storage(self):
drmsource = u'Fuck Microsoft\0'.encode('utf-16-le') drmsource = u'Free as in freedom\0'.encode('utf-16-le')
self._add_file('/DRMStorage/DRMSource', drmsource) self._add_file('/DRMStorage/DRMSource', drmsource)
tempkey = self._calculate_deskey([self._meta, drmsource]) tempkey = self._calculate_deskey([self._meta, drmsource])
msdes.deskey(tempkey, msdes.EN0) msdes.deskey(tempkey, msdes.EN0)