mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
More fixes and improvements. Etc etc etc.
This commit is contained in:
parent
c02491eddc
commit
a7753d3420
@ -19,6 +19,8 @@ from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
||||
|
||||
MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}
|
||||
|
||||
HEADER_TAGS = set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
NESTABLE_TAGS = set(['ol', 'ul', 'li', 'table', 'tr', 'td', 'th'])
|
||||
TABLE_TAGS = set(['table', 'tr', 'td', 'th'])
|
||||
@ -77,26 +79,34 @@ class FormatState(object):
|
||||
|
||||
|
||||
class MobiMLizer(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||
self.oeb = oeb
|
||||
self.profile = profile = context.dest
|
||||
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||
self.remove_html_cover()
|
||||
self.mobimlize_spine()
|
||||
|
||||
def remove_html_cover(self):
|
||||
oeb = self.oeb
|
||||
if not oeb.metadata.cover \
|
||||
or 'cover' not in oeb.guide:
|
||||
return
|
||||
href = oeb.guide['cover'].href
|
||||
del oeb.guide['cover']
|
||||
item = oeb.manifest.hrefs[href]
|
||||
oeb.manifest.remove(item)
|
||||
|
||||
def mobimlize_spine(self):
|
||||
for item in self.oeb.spine:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
|
||||
data = item.data
|
||||
data.remove(data.find(XHTML('head')))
|
||||
body = data.find(XHTML('body'))
|
||||
nbody = etree.Element(XHTML('body'))
|
||||
body = item.data.find(XHTML('body'))
|
||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
||||
[FormatState()])
|
||||
data.replace(body, nbody)
|
||||
item.data = nroot
|
||||
|
||||
def mobimlize_font(self, ptsize):
|
||||
return self.fnums[self.fmap[ptsize]]
|
||||
@ -116,7 +126,7 @@ class MobiMLizer(object):
|
||||
lines = text.split('\n')
|
||||
result = lines[:1]
|
||||
for line in lines[1:]:
|
||||
result.append(etree.Element('br'))
|
||||
result.append(etree.Element(XHTML('br')))
|
||||
if line:
|
||||
result.append(line)
|
||||
return result
|
||||
@ -134,7 +144,7 @@ class MobiMLizer(object):
|
||||
bstate.pbreak = False
|
||||
if istate.ids:
|
||||
for id in istate.ids:
|
||||
etree.SubElement(body, 'a', attrib={'id': id})
|
||||
etree.SubElement(body, XHTML('a'), attrib={'id': id})
|
||||
istate.ids.clear()
|
||||
bstate.istate = None
|
||||
bstate.anchor = None
|
||||
@ -147,22 +157,22 @@ class MobiMLizer(object):
|
||||
elif indent != 0 and abs(indent) < self.profile.fbase:
|
||||
indent = (indent / abs(indent)) * self.profile.fbase
|
||||
if tag in NESTABLE_TAGS:
|
||||
para = wrapper = etree.SubElement(parent, tag)
|
||||
para = wrapper = etree.SubElement(parent, XHTML(tag))
|
||||
bstate.nested.append(para)
|
||||
if tag == 'li' and len(istates) > 1:
|
||||
istates[-2].list_num += 1
|
||||
para.attrib['value'] = str(istates[-2].list_num)
|
||||
elif left > 0 and indent >= 0:
|
||||
para = wrapper = etree.SubElement(parent, 'blockquote')
|
||||
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
|
||||
para = wrapper
|
||||
emleft = int(round(left / self.profile.fbase)) - 1
|
||||
emleft = min((emleft, 10))
|
||||
while emleft > 0:
|
||||
para = etree.SubElement(para, 'blockquote')
|
||||
para = etree.SubElement(para, XHTML('blockquote'))
|
||||
emleft -= 1
|
||||
else:
|
||||
ptag = tag if tag in HEADER_TAGS else 'p'
|
||||
para = wrapper = etree.SubElement(parent, ptag)
|
||||
para = wrapper = etree.SubElement(parent, XHTML(ptag))
|
||||
bstate.inline = bstate.para = para
|
||||
vspace = bstate.vpadding + bstate.vmargin
|
||||
bstate.vpadding = bstate.vmargin = 0
|
||||
@ -174,7 +184,7 @@ class MobiMLizer(object):
|
||||
vspace = int(round(vspace / self.profile.fbase))
|
||||
index = max((0, len(body) - 1))
|
||||
while vspace > 0:
|
||||
body.insert(index, etree.Element('br'))
|
||||
body.insert(index, etree.Element(XHTML('br')))
|
||||
vspace -= 1
|
||||
if istate.halign != 'auto':
|
||||
para.attrib['align'] = istate.halign
|
||||
@ -182,7 +192,7 @@ class MobiMLizer(object):
|
||||
if tag in CONTENT_TAGS:
|
||||
bstate.inline = para
|
||||
pstate = bstate.istate = None
|
||||
etree.SubElement(para, tag, attrib=istate.attrib)
|
||||
etree.SubElement(para, XHTML(tag), attrib=istate.attrib)
|
||||
elif tag in TABLE_TAGS:
|
||||
para.attrib['valign'] = 'top'
|
||||
if not text:
|
||||
@ -197,20 +207,21 @@ class MobiMLizer(object):
|
||||
elif pstate and pstate.href == href:
|
||||
inline = bstate.anchor
|
||||
else:
|
||||
inline = etree.SubElement(inline, 'a', href=href)
|
||||
inline = etree.SubElement(inline, XHTML('a'), href=href)
|
||||
bstate.anchor = inline
|
||||
if valign == 'super':
|
||||
inline = etree.SubElement(inline, 'sup')
|
||||
inline = etree.SubElement(inline, XHTML('sup'))
|
||||
elif valign == 'sub':
|
||||
inline = etree.SubElement(inline, 'sub')
|
||||
inline = etree.SubElement(inline, XHTML('sub'))
|
||||
if istate.family == 'monospace':
|
||||
inline = etree.SubElement(inline, 'tt')
|
||||
inline = etree.SubElement(inline, XHTML('tt'))
|
||||
if fsize != 3:
|
||||
inline = etree.SubElement(inline, 'font', size=str(fsize))
|
||||
inline = etree.SubElement(inline, XHTML('font'),
|
||||
size=str(fsize))
|
||||
if istate.italic:
|
||||
inline = etree.SubElement(inline, 'i')
|
||||
inline = etree.SubElement(inline, XHTML('i'))
|
||||
if istate.bold:
|
||||
inline = etree.SubElement(inline, 'b')
|
||||
inline = etree.SubElement(inline, XHTML('b'))
|
||||
bstate.inline = inline
|
||||
bstate.istate = istate
|
||||
inline = bstate.inline
|
||||
@ -353,7 +364,7 @@ class MobiMLizer(object):
|
||||
if isblock:
|
||||
para = bstate.para
|
||||
if para is not None and para.text == u'\xa0':
|
||||
para.getparent().replace(para, etree.Element('br'))
|
||||
para.getparent().replace(para, etree.Element(XHTML('br')))
|
||||
bstate.para = None
|
||||
bstate.istate = None
|
||||
vmargin = asfloat(style['margin-bottom'])
|
||||
|
@ -26,6 +26,7 @@ from calibre.ebooks.oeb.base import FauxLogger, OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
|
||||
@ -66,23 +67,28 @@ def encode(data):
|
||||
return data.encode('utf-8')
|
||||
|
||||
# Almost like the one for MS LIT, but not quite.
|
||||
def decint(value):
|
||||
DECINT_FORWARD = 0
|
||||
DECINT_BACKWARD = 1
|
||||
def decint(value, direction):
|
||||
bytes = []
|
||||
while True:
|
||||
b = value & 0x7f
|
||||
value >>= 7
|
||||
if not bytes:
|
||||
b |= 0x80
|
||||
bytes.append(chr(b))
|
||||
bytes.append(b)
|
||||
if value == 0:
|
||||
break
|
||||
return ''.join(reversed(bytes))
|
||||
if direction == DECINT_FORWARD:
|
||||
bytes[0] |= 0x80
|
||||
elif direction == DECINT_BACKWARD:
|
||||
bytes[-1] |= 0x80
|
||||
return ''.join(chr(b) for b in reversed(bytes))
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||
|
||||
def __init__(self, oeb, images):
|
||||
oeb.logger.info('Serializing markup content...')
|
||||
self.oeb = oeb
|
||||
self.images = images
|
||||
self.id_offsets = {}
|
||||
@ -238,22 +244,11 @@ class MobiWriter(object):
|
||||
self._oeb = oeb
|
||||
self._stream = stream
|
||||
self._records = [None]
|
||||
self._remove_html_cover()
|
||||
self._generate_content()
|
||||
self._generate_record0()
|
||||
self._write_header()
|
||||
self._write_content()
|
||||
|
||||
def _remove_html_cover(self):
|
||||
oeb = self._oeb
|
||||
if not oeb.metadata.cover \
|
||||
or 'cover' not in oeb.guide:
|
||||
return
|
||||
href = oeb.guide['cover'].href
|
||||
del oeb.guide['cover']
|
||||
item = oeb.manifest.hrefs[href]
|
||||
oeb.manifest.remove(item)
|
||||
|
||||
def _generate_content(self):
|
||||
self._map_image_names()
|
||||
self._generate_text()
|
||||
@ -318,11 +313,17 @@ class MobiWriter(object):
|
||||
running = offset
|
||||
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
||||
pbreak = (breaks.pop(0) - running) >> 3
|
||||
encoded = decint(pbreak)
|
||||
encoded = decint(pbreak, DECINT_FORWARD)
|
||||
record.write(encoded)
|
||||
running += pbreak << 3
|
||||
nextra += len(encoded)
|
||||
record.write(decint(nextra + 1))
|
||||
lsize = 1
|
||||
while True:
|
||||
size = decint(nextra + lsize, DECINT_BACKWARD)
|
||||
if len(size) == lsize:
|
||||
break
|
||||
lsize += 1
|
||||
record.write(size)
|
||||
self._records.append(record.getvalue())
|
||||
nrecords += 1
|
||||
offset += RECORD_SIZE
|
||||
@ -479,10 +480,12 @@ def main(argv=sys.argv):
|
||||
flattener = CSSFlattener(fbase=fbase, fkey=fkey, unfloat=True,
|
||||
untable=True)
|
||||
rasterizer = SVGRasterizer()
|
||||
trimmer = ManifestTrimmer()
|
||||
mobimlizer = MobiMLizer()
|
||||
#flattener.transform(oeb, context)
|
||||
flattener.transform(oeb, context)
|
||||
rasterizer.transform(oeb, context)
|
||||
#mobimlizer.transform(oeb, context)
|
||||
mobimlizer.transform(oeb, context)
|
||||
trimmer.transform(oeb, context)
|
||||
writer.dump(oeb, outpath)
|
||||
return 0
|
||||
|
||||
|
@ -125,12 +125,17 @@ def urlnormalize(href):
|
||||
class OEBError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FauxLogger(object):
|
||||
def __getattr__(self, name):
|
||||
return self
|
||||
def __call__(self, message):
|
||||
print message
|
||||
|
||||
class Logger(LoggingInterface, object):
|
||||
def __getattr__(self, name):
|
||||
return object.__getattribute__(self, 'log_' + name)
|
||||
|
||||
|
||||
class AbstractContainer(object):
|
||||
def read_xml(self, path):
|
||||
@ -745,19 +750,19 @@ class OEBBook(object):
|
||||
self.uid = item
|
||||
break
|
||||
else:
|
||||
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
|
||||
self.logger.warn(u'Unique-identifier %r not found.' % uid)
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.uid = metadata.identifier[0]
|
||||
break
|
||||
if not metadata.language:
|
||||
self.logger.log_warn(u'Language not specified.')
|
||||
self.logger.warn(u'Language not specified.')
|
||||
metadata.add('language', 'en')
|
||||
if not metadata.creator:
|
||||
self.logger.log_warn(u'Creator not specified.')
|
||||
self.logger.warn(u'Creator not specified.')
|
||||
metadata.add('creator', 'Unknown')
|
||||
if not metadata.title:
|
||||
self.logger.log_warn(u'Title not specified.')
|
||||
self.logger.warn(u'Title not specified.')
|
||||
metadata.add('title', 'Unknown')
|
||||
|
||||
def _manifest_from_opf(self, opf):
|
||||
@ -765,7 +770,7 @@ class OEBBook(object):
|
||||
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
|
||||
href = elem.get('href')
|
||||
if not self.container.exists(href):
|
||||
self.logger.log_warn(u'Manifest item %r not found.' % href)
|
||||
self.logger.warn(u'Manifest item %r not found.' % href)
|
||||
continue
|
||||
manifest.add(elem.get('id'), href, elem.get('media-type'),
|
||||
elem.get('fallback'))
|
||||
@ -775,7 +780,7 @@ class OEBBook(object):
|
||||
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
|
||||
idref = elem.get('idref')
|
||||
if idref not in self.manifest:
|
||||
self.logger.log_warn(u'Spine item %r not found.' % idref)
|
||||
self.logger.warn(u'Spine item %r not found.' % idref)
|
||||
continue
|
||||
item = self.manifest[idref]
|
||||
spine.add(item, elem.get('linear'))
|
||||
@ -794,7 +799,7 @@ class OEBBook(object):
|
||||
href = elem.get('href')
|
||||
path, frag = urldefrag(href)
|
||||
if path not in self.manifest.hrefs:
|
||||
self.logger.log_warn(u'Guide reference %r not found' % href)
|
||||
self.logger.warn(u'Guide reference %r not found' % href)
|
||||
continue
|
||||
guide.add(elem.get('type'), elem.get('title'), href)
|
||||
|
||||
@ -993,7 +998,6 @@ class OEBBook(object):
|
||||
NCX_MIME: (href, ncx)}
|
||||
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
for arg in argv[1:]:
|
||||
oeb = OEBBook(arg)
|
||||
|
@ -88,6 +88,7 @@ class CSSFlattener(object):
|
||||
self.untable = untable
|
||||
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Flattening CSS and remapping font sizes...')
|
||||
self.oeb = oeb
|
||||
self.context = context
|
||||
self.stylize_spine()
|
||||
|
@ -33,6 +33,7 @@ class SVGRasterizer(object):
|
||||
QApplication([])
|
||||
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Rasterizing SVG images...')
|
||||
self.oeb = oeb
|
||||
self.profile = context.dest
|
||||
self.images = {}
|
||||
@ -143,6 +144,9 @@ class SVGRasterizer(object):
|
||||
if key in self.images:
|
||||
href = self.images[key]
|
||||
else:
|
||||
logger = self.oeb.logger
|
||||
logger.info('Rasterizing %r to %dx%d'
|
||||
% (svgitem.href, size.width(), size.height()))
|
||||
image = QImage(size, QImage.Format_ARGB32_Premultiplied)
|
||||
image.fill(QColor("white").rgb())
|
||||
painter = QPainter(image)
|
||||
|
40
src/calibre/ebooks/oeb/transforms/trimmanifest.py
Normal file
40
src/calibre/ebooks/oeb/transforms/trimmanifest.py
Normal file
@ -0,0 +1,40 @@
|
||||
'''
|
||||
OPF manifest trimming transform.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME
|
||||
|
||||
LINK_SELECTORS = []
|
||||
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||
'//*/@xl:href'):
|
||||
LINK_SELECTORS.append(etree.XPath(expr, namespaces=XPNSMAP))
|
||||
|
||||
class ManifestTrimmer(object):
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Trimming unused files from manifest...')
|
||||
used = set()
|
||||
for item in oeb.spine:
|
||||
used.add(item.href)
|
||||
for selector in LINK_SELECTORS:
|
||||
for href in selector(item.data):
|
||||
used.add(item.abshref(href))
|
||||
# TODO: Things mentioned in CSS
|
||||
# TODO: Things mentioned in SVG
|
||||
# Who knows what people will do...
|
||||
for term in oeb.metadata:
|
||||
for item in oeb.metadata[term]:
|
||||
if item.value in oeb.manifest.hrefs:
|
||||
used.add(item.value)
|
||||
elif item.value in oeb.manifest.ids:
|
||||
used.add(oeb.manifest.ids[item.value].href)
|
||||
for item in oeb.manifest.values():
|
||||
if item.href not in used:
|
||||
oeb.logger.info('Trimming %r from manifest' % item.href)
|
||||
oeb.manifest.remove(item)
|
Loading…
x
Reference in New Issue
Block a user