mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Output:Command line support for converting to MOBI via the command any2mobi (thanks to Marshall T. Vandegrift)
This commit is contained in:
commit
329fd4866f
2
setup.py
2
setup.py
@ -166,7 +166,7 @@ if __name__ == '__main__':
|
||||
metadata_sqlite = 'library/metadata_sqlite.sql',
|
||||
jquery = 'gui2/viewer/jquery.js',
|
||||
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
|
||||
html_css = 'ebooks/lit/html.css',
|
||||
html_css = 'ebooks/oeb/html.css',
|
||||
)
|
||||
|
||||
DEST = os.path.join('src', APPNAME, 'resources.py')
|
||||
|
@ -798,8 +798,9 @@ class Processor(Parser):
|
||||
if face is not None:
|
||||
faces = []
|
||||
for face in face.split(','):
|
||||
if ' ' in face:
|
||||
face = "%s" % face
|
||||
face = face.strip()
|
||||
if ' ' in face and not (face[0] == face[-1] == '"'):
|
||||
face = '"%s"' % face.replace('"', r'\"')
|
||||
faces.append(face)
|
||||
for generic in ('serif', 'sans-serif', 'monospace'):
|
||||
if generic in faces:
|
||||
|
@ -15,7 +15,7 @@ from lxml import etree
|
||||
from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
from calibre.ebooks.lit.oeb import urlnormalize
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre import plugins
|
||||
lzx, lxzerror = plugins['lzx']
|
||||
|
@ -23,14 +23,20 @@ from urllib import unquote as urlunquote
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.reader import DirectoryEntry
|
||||
import calibre.ebooks.lit.maps as maps
|
||||
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
|
||||
CSS_MIME, OPF_MIME, XML_NS, XML
|
||||
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
|
||||
from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
|
||||
from calibre.ebooks.lit.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
|
||||
urlnormalize, xpath
|
||||
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||
from calibre.ebooks.lit.lzx import Compressor
|
||||
import calibre
|
||||
from calibre import LoggingInterface
|
||||
from calibre import plugins
|
||||
msdes, msdeserror = plugins['msdes']
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
@ -116,12 +122,6 @@ LZXC_CONTROL = \
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def prefixname(name, nsrmap):
|
||||
prefix = nsrmap[namespace(name)]
|
||||
if not prefix:
|
||||
return barename(name)
|
||||
return ':'.join((prefix, barename(name)))
|
||||
|
||||
def decint(value):
|
||||
bytes = []
|
||||
while True:
|
||||
@ -143,9 +143,9 @@ def warn(x):
|
||||
class ReBinary(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||
|
||||
def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
|
||||
def __init__(self, root, path, oeb, map=HTML_MAP):
|
||||
self.item = item
|
||||
self.logger = logger
|
||||
self.logger = oeb.logger
|
||||
self.manifest = oeb.manifest
|
||||
self.tags, self.tattrs = map
|
||||
self.buf = StringIO()
|
||||
@ -300,10 +300,9 @@ def preserve(function):
|
||||
return wrapper
|
||||
|
||||
class LitWriter(object):
|
||||
def __init__(self, oeb, logger=FauxLogger()):
|
||||
self._oeb = oeb
|
||||
self._logger = logger
|
||||
self._litize_oeb()
|
||||
def __init__(self):
|
||||
# Wow, no options
|
||||
pass
|
||||
|
||||
def _litize_oeb(self):
|
||||
oeb = self._oeb
|
||||
@ -312,32 +311,27 @@ class LitWriter(object):
|
||||
if oeb.metadata.cover:
|
||||
id = str(oeb.metadata.cover[0])
|
||||
cover = oeb.manifest[id]
|
||||
elif MS_COVER_TYPE in oeb.guide:
|
||||
href = oeb.guide[MS_COVER_TYPE].href
|
||||
cover = oeb.manifest.hrefs[href]
|
||||
elif 'cover' in oeb.guide:
|
||||
href = oeb.guide['cover'].href
|
||||
cover = oeb.manifest.hrefs[href]
|
||||
else:
|
||||
html = oeb.spine[0].data
|
||||
imgs = xpath(html, '//img[position()=1]')
|
||||
href = imgs[0].get('src') if imgs else None
|
||||
cover = oeb.manifest.hrefs[href] if href else None
|
||||
if cover:
|
||||
if not oeb.metadata.cover:
|
||||
oeb.metadata.add('cover', cover.id)
|
||||
for type, title in ALL_MS_COVER_TYPES:
|
||||
if type not in oeb.guide:
|
||||
oeb.guide.add(type, title, cover.href)
|
||||
else:
|
||||
self._logger.log_warn('No suitable cover image found.')
|
||||
self._logger.warn('No suitable cover image found.')
|
||||
|
||||
def dump(self, stream):
|
||||
def dump(self, oeb, path):
|
||||
if hasattr(path, 'write'):
|
||||
return self._dump_stream(oeb, path)
|
||||
with open(path, 'w+b') as stream:
|
||||
return self._dump_stream(oeb, stream)
|
||||
|
||||
def _dump_stream(self, oeb, stream):
|
||||
self._oeb = oeb
|
||||
self._logger = oeb.logger
|
||||
self._stream = stream
|
||||
self._sections = [StringIO() for i in xrange(4)]
|
||||
self._directory = []
|
||||
self._meta = None
|
||||
self._dump()
|
||||
self._litize_oeb()
|
||||
self._write_content()
|
||||
|
||||
def _write(self, *data):
|
||||
for datum in data:
|
||||
@ -351,7 +345,7 @@ class LitWriter(object):
|
||||
def _tell(self):
|
||||
return self._stream.tell()
|
||||
|
||||
def _dump(self):
|
||||
def _write_content(self):
|
||||
# Build content sections
|
||||
self._build_sections()
|
||||
|
||||
@ -480,8 +474,7 @@ class LitWriter(object):
|
||||
secnum = 0
|
||||
if not isinstance(data, basestring):
|
||||
self._add_folder(name)
|
||||
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
|
||||
logger=self._logger)
|
||||
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
|
||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||
self._add_file(name + '/aht', rebin.aht, 0)
|
||||
item.page_breaks = rebin.page_breaks
|
||||
@ -560,8 +553,7 @@ class LitWriter(object):
|
||||
meta.attrib['ms--minimum_level'] = '0'
|
||||
meta.attrib['ms--attr5'] = '1'
|
||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
|
||||
logger=self._logger)
|
||||
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
|
||||
meta = rebin.content
|
||||
self._meta = meta
|
||||
self._add_file('/meta', meta)
|
||||
@ -721,23 +713,35 @@ def option_parser():
|
||||
'-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
parser.add_option(
|
||||
'--verbose', default=False, action='store_true',
|
||||
'-v', '--verbose', default=0, action='count',
|
||||
help=_('Useful for debugging.'))
|
||||
return parser
|
||||
|
||||
def oeb2lit(opts, opfpath):
|
||||
logger = LoggingInterface(logging.getLogger('oeb2lit'))
|
||||
def oeb2lit(opts, inpath):
|
||||
logger = Logger(logging.getLogger('oeb2lit'))
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
litpath = opts.output
|
||||
if litpath is None:
|
||||
litpath = os.path.basename(opfpath)
|
||||
litpath = os.path.splitext(litpath)[0] + '.lit'
|
||||
litpath = os.path.abspath(litpath)
|
||||
lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger)
|
||||
with open(litpath, 'wb') as f:
|
||||
lit.dump(f)
|
||||
run_plugins_on_postprocess(litpath, 'lit')
|
||||
logger.log_info(_('Output written to ')+litpath)
|
||||
outpath = opts.output
|
||||
if outpath is None:
|
||||
outpath = os.path.basename(inpath)
|
||||
outpath = os.path.splitext(outpath)[0] + '.lit'
|
||||
outpath = os.path.abspath(outpath)
|
||||
context = Context('Firefox', 'MSReader')
|
||||
oeb = OEBBook(inpath, logger=logger)
|
||||
tocadder = HTMLTOCAdder()
|
||||
tocadder.transform(oeb, context)
|
||||
mangler = CaseMangler()
|
||||
mangler.transform(oeb, context)
|
||||
fbase = context.dest.fbase
|
||||
flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
|
||||
flattener.transform(oeb, context)
|
||||
rasterizer = SVGRasterizer()
|
||||
rasterizer.transform(oeb, context)
|
||||
trimmer = ManifestTrimmer()
|
||||
trimmer.transform(oeb, context)
|
||||
lit = LitWriter()
|
||||
lit.dump(oeb, outpath)
|
||||
run_plugins_on_postprocess(outpath, 'lit')
|
||||
logger.info(_('Output written to ') + outpath)
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
@ -746,8 +750,8 @@ def main(argv=sys.argv):
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
return 1
|
||||
opfpath = args[0]
|
||||
oeb2lit(opts, opfpath)
|
||||
inpath = args[0]
|
||||
oeb2lit(opts, inpath)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
67
src/calibre/ebooks/mobi/from_any.py
Normal file
67
src/calibre/ebooks/mobi/from_any.py
Normal file
@ -0,0 +1,67 @@
|
||||
'''
|
||||
Convert any ebook format to Mobipocket.
|
||||
'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
|
||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, glob, logging
|
||||
|
||||
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
|
||||
from calibre.ebooks.epub import config as common_config
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options
|
||||
|
||||
def config(defaults=None):
|
||||
return common_config(defaults=defaults, name='mobi')
|
||||
|
||||
def option_parser(usage=USAGE):
|
||||
usage = usage % ('Mobipocket', formats())
|
||||
parser = config().option_parser(usage=usage)
|
||||
add_mobi_options(parser)
|
||||
return parser
|
||||
|
||||
def any2mobi(opts, path):
|
||||
ext = os.path.splitext(path)[1]
|
||||
if not ext:
|
||||
raise ValueError('Unknown file type: '+path)
|
||||
ext = ext.lower()[1:]
|
||||
|
||||
if opts.output is None:
|
||||
opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
|
||||
|
||||
opts.output = os.path.abspath(opts.output)
|
||||
orig_output = opts.output
|
||||
|
||||
with TemporaryDirectory('_any2mobi') as tdir:
|
||||
oebdir = os.path.join(tdir, 'oeb')
|
||||
os.mkdir(oebdir)
|
||||
opts.output = os.path.join(tdir, 'dummy.epub')
|
||||
opts.profile = 'None'
|
||||
opts.dont_split_on_page_breaks = True
|
||||
orig_bfs = opts.base_font_size2
|
||||
opts.base_font_size2 = 0
|
||||
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
|
||||
opts.base_font_size2 = orig_bfs
|
||||
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
|
||||
opts.output = orig_output
|
||||
logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
|
||||
oeb2mobi(opts, opf)
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) < 2:
|
||||
parser.print_help()
|
||||
print 'No input file specified.'
|
||||
return 1
|
||||
any2mobi(opts, args[1])
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -3,6 +3,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack
|
||||
|
||||
main_language = {
|
||||
0 : "NEUTRAL",
|
||||
54 : "AFRIKAANS",
|
||||
@ -155,5 +157,170 @@ sub_language = {
|
||||
2 : "SWEDISH_FINLAND",
|
||||
1 : "UZBEK_LATIN",
|
||||
2 : "UZBEK_CYRILLIC",
|
||||
|
||||
}
|
||||
|
||||
IANA_MOBI = \
|
||||
{None: {None: (0, 0)},
|
||||
'af': {None: (54, 0)},
|
||||
'ar': {None: (1, 0),
|
||||
'AE': (1, 56),
|
||||
'BH': (1, 60),
|
||||
'DZ': (1, 20),
|
||||
'EG': (1, 12),
|
||||
'JO': (1, 44),
|
||||
'KW': (1, 52),
|
||||
'LB': (1, 48),
|
||||
'MA': (1, 24),
|
||||
'OM': (1, 32),
|
||||
'QA': (1, 64),
|
||||
'SA': (1, 4),
|
||||
'SY': (1, 40),
|
||||
'TN': (1, 28),
|
||||
'YE': (1, 36)},
|
||||
'as': {None: (77, 0)},
|
||||
'az': {None: (44, 0)},
|
||||
'be': {None: (35, 0)},
|
||||
'bg': {None: (2, 0)},
|
||||
'bn': {None: (69, 0)},
|
||||
'ca': {None: (3, 0)},
|
||||
'cs': {None: (5, 0)},
|
||||
'da': {None: (6, 0)},
|
||||
'de': {None: (7, 0),
|
||||
'AT': (7, 12),
|
||||
'CH': (7, 8),
|
||||
'LI': (7, 20),
|
||||
'LU': (7, 16)},
|
||||
'el': {None: (8, 0)},
|
||||
'en': {None: (9, 0),
|
||||
'AU': (9, 12),
|
||||
'BZ': (9, 40),
|
||||
'CA': (9, 16),
|
||||
'GB': (9, 8),
|
||||
'IE': (9, 24),
|
||||
'JM': (9, 32),
|
||||
'NZ': (9, 20),
|
||||
'PH': (9, 52),
|
||||
'TT': (9, 44),
|
||||
'US': (9, 4),
|
||||
'ZA': (9, 28),
|
||||
'ZW': (9, 48)},
|
||||
'es': {None: (10, 0),
|
||||
'AR': (10, 44),
|
||||
'BO': (10, 64),
|
||||
'CL': (10, 52),
|
||||
'CO': (10, 36),
|
||||
'CR': (10, 20),
|
||||
'DO': (10, 28),
|
||||
'EC': (10, 48),
|
||||
'ES': (10, 4),
|
||||
'GT': (10, 16),
|
||||
'HN': (10, 72),
|
||||
'MX': (10, 8),
|
||||
'NI': (10, 76),
|
||||
'PA': (10, 24),
|
||||
'PE': (10, 40),
|
||||
'PR': (10, 80),
|
||||
'PY': (10, 60),
|
||||
'SV': (10, 68),
|
||||
'UY': (10, 56),
|
||||
'VE': (10, 32)},
|
||||
'et': {None: (37, 0)},
|
||||
'eu': {None: (45, 0)},
|
||||
'fa': {None: (41, 0)},
|
||||
'fi': {None: (11, 0)},
|
||||
'fo': {None: (56, 0)},
|
||||
'fr': {None: (12, 0),
|
||||
'BE': (12, 8),
|
||||
'CA': (12, 12),
|
||||
'CH': (12, 16),
|
||||
'FR': (12, 4),
|
||||
'LU': (12, 20),
|
||||
'MC': (12, 24)},
|
||||
'gu': {None: (71, 0)},
|
||||
'he': {None: (13, 0)},
|
||||
'hi': {None: (57, 0)},
|
||||
'hr': {None: (26, 0)},
|
||||
'hu': {None: (14, 0)},
|
||||
'hy': {None: (43, 0)},
|
||||
'id': {None: (33, 0)},
|
||||
'is': {None: (15, 0)},
|
||||
'it': {None: (16, 0),
|
||||
'CH': (16, 8),
|
||||
'IT': (16, 4)},
|
||||
'ja': {None: (17, 0)},
|
||||
'ka': {None: (55, 0)},
|
||||
'kk': {None: (63, 0)},
|
||||
'kn': {None: (75, 0)},
|
||||
'ko': {None: (18, 0)},
|
||||
'kok': {None: (87, 0)},
|
||||
'lt': {None: (39, 0)},
|
||||
'lv': {None: (38, 0)},
|
||||
'mk': {None: (47, 0)},
|
||||
'ml': {None: (76, 0)},
|
||||
'mr': {None: (78, 0)},
|
||||
'ms': {None: (62, 0)},
|
||||
'mt': {None: (58, 0)},
|
||||
'ne': {None: (97, 0)},
|
||||
'nl': {None: (19, 0),
|
||||
'BE': (19, 8)},
|
||||
'no': {None: (20, 0)},
|
||||
'or': {None: (72, 0)},
|
||||
'pa': {None: (70, 0)},
|
||||
'pl': {None: (21, 0)},
|
||||
'pt': {None: (22, 0),
|
||||
'BR': (22, 4),
|
||||
'PT': (22, 8)},
|
||||
'rm': {None: (23, 0)},
|
||||
'ro': {None: (24, 0)},
|
||||
'ru': {None: (25, 0)},
|
||||
'sa': {None: (79, 0)},
|
||||
'se': {None: (59, 0)},
|
||||
'sk': {None: (27, 0)},
|
||||
'sl': {None: (36, 0)},
|
||||
'sq': {None: (28, 0)},
|
||||
'sr': {None: (26, 12),
|
||||
'RS': (26, 12)},
|
||||
'st': {None: (48, 0)},
|
||||
'sv': {None: (29, 0),
|
||||
'FI': (29, 8)},
|
||||
'sw': {None: (65, 0)},
|
||||
'ta': {None: (73, 0)},
|
||||
'te': {None: (74, 0)},
|
||||
'th': {None: (30, 0)},
|
||||
'tn': {None: (50, 0)},
|
||||
'tr': {None: (31, 0)},
|
||||
'ts': {None: (49, 0)},
|
||||
'tt': {None: (68, 0)},
|
||||
'uk': {None: (34, 0)},
|
||||
'ur': {None: (32, 0)},
|
||||
'uz': {None: (67, 0),
|
||||
'UZ': (67, 8)},
|
||||
'vi': {None: (42, 0)},
|
||||
'wen': {None: (46, 0)},
|
||||
'xh': {None: (52, 0)},
|
||||
'zh': {None: (4, 0),
|
||||
'CN': (4, 8),
|
||||
'HK': (4, 12),
|
||||
'SG': (4, 16),
|
||||
'TW': (4, 4)},
|
||||
'zu': {None: (53, 0)}}
|
||||
|
||||
def iana2mobi(icode):
|
||||
subtags = list(icode.split('-'))
|
||||
langdict = IANA_MOBI[None]
|
||||
while len(subtags) > 0:
|
||||
lang = subtags.pop(0).lower()
|
||||
if lang in IANA_MOBI:
|
||||
langdict = IANA_MOBI[lang]
|
||||
break
|
||||
mcode = langdict[None]
|
||||
while len(subtags) > 0:
|
||||
subtag = subtags.pop(0)
|
||||
if subtag not in langdict:
|
||||
subtag = subtag.title()
|
||||
if subtag not in langdict:
|
||||
subtag = subtag.upper()
|
||||
if subtag in langdict:
|
||||
mcode = langdict[subtag]
|
||||
break
|
||||
return pack('>HBB', 0, mcode[1], mcode[0])
|
||||
|
379
src/calibre/ebooks/mobi/mobiml.py
Normal file
379
src/calibre/ebooks/mobi/mobiml.py
Normal file
@ -0,0 +1,379 @@
|
||||
'''
|
||||
Transform XHTML/OPS-ish content into Mobipocket HTML 3.2.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import copy
|
||||
import re
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import namespace, barename
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||
|
||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
||||
|
||||
MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}
|
||||
|
||||
HEADER_TAGS = set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
NESTABLE_TAGS = set(['ol', 'ul', 'li', 'table', 'tr', 'td', 'th'])
|
||||
TABLE_TAGS = set(['table', 'tr', 'td', 'th'])
|
||||
SPECIAL_TAGS = set(['hr', 'br'])
|
||||
CONTENT_TAGS = set(['img', 'hr', 'br'])
|
||||
|
||||
PAGE_BREAKS = set(['always', 'odd', 'even'])
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def asfloat(value):
|
||||
if not isinstance(value, (int, long, float)):
|
||||
return 0.0
|
||||
return float(value)
|
||||
|
||||
class BlockState(object):
|
||||
def __init__(self, body):
|
||||
self.body = body
|
||||
self.nested = []
|
||||
self.para = None
|
||||
self.inline = None
|
||||
self.anchor = None
|
||||
self.vpadding = 0.
|
||||
self.vmargin = 0.
|
||||
self.pbreak = False
|
||||
self.istate = None
|
||||
self.content = False
|
||||
|
||||
class FormatState(object):
|
||||
def __init__(self):
|
||||
self.left = 0.
|
||||
self.halign = 'auto'
|
||||
self.indent = 0.
|
||||
self.fsize = 3
|
||||
self.ids = set()
|
||||
self.valign = 'baseline'
|
||||
self.italic = False
|
||||
self.bold = False
|
||||
self.preserve = False
|
||||
self.family = 'serif'
|
||||
self.href = None
|
||||
self.list_num = 0
|
||||
self.attrib = {}
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.fsize == other.fsize \
|
||||
and self.italic == other.italic \
|
||||
and self.bold == other.bold \
|
||||
and self.href == other.href \
|
||||
and self.valign == other.valign \
|
||||
and self.preserve == other.preserve \
|
||||
and self.family == other.family
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
|
||||
class MobiMLizer(object):
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||
self.oeb = oeb
|
||||
self.profile = profile = context.dest
|
||||
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||
self.remove_html_cover()
|
||||
self.mobimlize_spine()
|
||||
|
||||
def remove_html_cover(self):
|
||||
oeb = self.oeb
|
||||
if not oeb.metadata.cover \
|
||||
or 'cover' not in oeb.guide:
|
||||
return
|
||||
href = oeb.guide['cover'].href
|
||||
del oeb.guide['cover']
|
||||
item = oeb.manifest.hrefs[href]
|
||||
oeb.manifest.remove(item)
|
||||
|
||||
def mobimlize_spine(self):
|
||||
for item in self.oeb.spine:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
|
||||
body = item.data.find(XHTML('body'))
|
||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
||||
[FormatState()])
|
||||
item.data = nroot
|
||||
|
||||
def mobimlize_font(self, ptsize):
|
||||
return self.fnums[self.fmap[ptsize]]
|
||||
|
||||
def mobimlize_measure(self, ptsize):
|
||||
if isinstance(ptsize, basestring):
|
||||
return ptsize
|
||||
embase = self.profile.fbase
|
||||
if round(ptsize) < embase:
|
||||
return "%dpt" % int(round(ptsize))
|
||||
return "%dem" % int(round(ptsize / embase))
|
||||
|
||||
def preize_text(self, text):
|
||||
text = unicode(text).replace(u' ', u'\xa0')
|
||||
text = text.replace('\r\n', '\n')
|
||||
text = text.replace('\r', '\n')
|
||||
lines = text.split('\n')
|
||||
result = lines[:1]
|
||||
for line in lines[1:]:
|
||||
result.append(etree.Element(XHTML('br')))
|
||||
if line:
|
||||
result.append(line)
|
||||
return result
|
||||
|
||||
def mobimlize_content(self, tag, text, bstate, istates):
|
||||
bstate.content = True
|
||||
istate = istates[-1]
|
||||
para = bstate.para
|
||||
if tag in SPECIAL_TAGS and not text:
|
||||
para = para if para is not None else bstate.body
|
||||
elif para is None:
|
||||
body = bstate.body
|
||||
if bstate.pbreak:
|
||||
etree.SubElement(body, MBP('pagebreak'))
|
||||
bstate.pbreak = False
|
||||
if istate.ids:
|
||||
for id in istate.ids:
|
||||
etree.SubElement(body, XHTML('a'), attrib={'id': id})
|
||||
istate.ids.clear()
|
||||
bstate.istate = None
|
||||
bstate.anchor = None
|
||||
parent = bstate.nested[-1] if bstate.nested else bstate.body
|
||||
indent = istate.indent
|
||||
left = istate.left
|
||||
if indent < 0 and abs(indent) < left:
|
||||
left += indent
|
||||
indent = 0
|
||||
elif indent != 0 and abs(indent) < self.profile.fbase:
|
||||
indent = (indent / abs(indent)) * self.profile.fbase
|
||||
if tag in NESTABLE_TAGS:
|
||||
para = wrapper = etree.SubElement(parent, XHTML(tag))
|
||||
bstate.nested.append(para)
|
||||
if tag == 'li' and len(istates) > 1:
|
||||
istates[-2].list_num += 1
|
||||
para.attrib['value'] = str(istates[-2].list_num)
|
||||
elif left > 0 and indent >= 0:
|
||||
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
|
||||
para = wrapper
|
||||
emleft = int(round(left / self.profile.fbase)) - 1
|
||||
emleft = min((emleft, 10))
|
||||
while emleft > 0:
|
||||
para = etree.SubElement(para, XHTML('blockquote'))
|
||||
emleft -= 1
|
||||
else:
|
||||
para = wrapper = etree.SubElement(parent, XHTML('p'))
|
||||
bstate.inline = bstate.para = para
|
||||
vspace = bstate.vpadding + bstate.vmargin
|
||||
bstate.vpadding = bstate.vmargin = 0
|
||||
if tag not in TABLE_TAGS:
|
||||
wrapper.attrib['height'] = self.mobimlize_measure(vspace)
|
||||
para.attrib['width'] = self.mobimlize_measure(indent)
|
||||
elif tag == 'table' and vspace > 0:
|
||||
body = bstate.body
|
||||
vspace = int(round(vspace / self.profile.fbase))
|
||||
index = max((0, len(body) - 1))
|
||||
while vspace > 0:
|
||||
body.insert(index, etree.Element(XHTML('br')))
|
||||
vspace -= 1
|
||||
if istate.halign != 'auto':
|
||||
para.attrib['align'] = istate.halign
|
||||
pstate = bstate.istate
|
||||
if tag in CONTENT_TAGS:
|
||||
bstate.inline = para
|
||||
pstate = bstate.istate = None
|
||||
etree.SubElement(para, XHTML(tag), attrib=istate.attrib)
|
||||
elif tag in TABLE_TAGS:
|
||||
para.attrib['valign'] = 'top'
|
||||
if not text:
|
||||
return
|
||||
if not pstate or istate != pstate:
|
||||
inline = para
|
||||
valign = istate.valign
|
||||
fsize = istate.fsize
|
||||
href = istate.href
|
||||
if not href:
|
||||
bstate.anchor = None
|
||||
elif pstate and pstate.href == href:
|
||||
inline = bstate.anchor
|
||||
else:
|
||||
inline = etree.SubElement(inline, XHTML('a'), href=href)
|
||||
bstate.anchor = inline
|
||||
if valign == 'super':
|
||||
inline = etree.SubElement(inline, XHTML('sup'))
|
||||
elif valign == 'sub':
|
||||
inline = etree.SubElement(inline, XHTML('sub'))
|
||||
elif fsize != 3:
|
||||
inline = etree.SubElement(inline, XHTML('font'),
|
||||
size=str(fsize))
|
||||
if istate.family == 'monospace':
|
||||
inline = etree.SubElement(inline, XHTML('tt'))
|
||||
if istate.italic:
|
||||
inline = etree.SubElement(inline, XHTML('i'))
|
||||
if istate.bold:
|
||||
inline = etree.SubElement(inline, XHTML('b'))
|
||||
bstate.inline = inline
|
||||
bstate.istate = istate
|
||||
inline = bstate.inline
|
||||
content = self.preize_text(text) if istate.preserve else [text]
|
||||
for item in content:
|
||||
if isinstance(item, basestring):
|
||||
if len(inline) == 0:
|
||||
inline.text = (inline.text or '') + item
|
||||
else:
|
||||
last = inline[-1]
|
||||
last.tail = (last.tail or '') + item
|
||||
else:
|
||||
inline.append(item)
|
||||
|
||||
def mobimlize_elem(self, elem, stylizer, bstate, istates):
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
or namespace(elem.tag) != XHTML_NS:
|
||||
return
|
||||
style = stylizer.style(elem)
|
||||
# <mbp:frame-set/> does not exist lalalala
|
||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||
or style['visibility'] == 'hidden':
|
||||
return
|
||||
tag = barename(elem.tag)
|
||||
istate = copy.copy(istates[-1])
|
||||
istate.list_num = 0
|
||||
istates.append(istate)
|
||||
left = 0
|
||||
display = style['display']
|
||||
isblock = not display.startswith('inline')
|
||||
isblock = isblock and style['float'] == 'none'
|
||||
isblock = isblock and tag != 'br'
|
||||
if isblock:
|
||||
bstate.para = None
|
||||
istate.halign = style['text-align']
|
||||
istate.indent = style['text-indent']
|
||||
if style['margin-left'] == 'auto' \
|
||||
and style['margin-right'] == 'auto':
|
||||
istate.halign = 'center'
|
||||
margin = asfloat(style['margin-left'])
|
||||
padding = asfloat(style['padding-left'])
|
||||
if tag != 'body':
|
||||
left = margin + padding
|
||||
istate.left += left
|
||||
vmargin = asfloat(style['margin-top'])
|
||||
bstate.vmargin = max((bstate.vmargin, vmargin))
|
||||
vpadding = asfloat(style['padding-top'])
|
||||
if vpadding > 0:
|
||||
bstate.vpadding += bstate.vmargin
|
||||
bstate.vmargin = 0
|
||||
bstate.vpadding += vpadding
|
||||
elif not istate.href:
|
||||
margin = asfloat(style['margin-left'])
|
||||
padding = asfloat(style['padding-left'])
|
||||
lspace = margin + padding
|
||||
if lspace > 0:
|
||||
spaces = int(round((lspace * 3) / style['font-size']))
|
||||
elem.text = (u'\xa0' * spaces) + (elem.text or '')
|
||||
margin = asfloat(style['margin-right'])
|
||||
padding = asfloat(style['padding-right'])
|
||||
rspace = margin + padding
|
||||
if rspace > 0:
|
||||
spaces = int(round((rspace * 3) / style['font-size']))
|
||||
if len(elem) == 0:
|
||||
elem.text = (elem.text or '') + (u'\xa0' * spaces)
|
||||
else:
|
||||
last = elem[-1]
|
||||
last.text = (last.text or '') + (u'\xa0' * spaces)
|
||||
if bstate.content and style['page-break-before'] in PAGE_BREAKS:
|
||||
bstate.pbreak = True
|
||||
istate.fsize = self.mobimlize_font(style['font-size'])
|
||||
istate.italic = True if style['font-style'] == 'italic' else False
|
||||
weight = style['font-weight']
|
||||
istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
|
||||
istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
|
||||
if 'monospace' in style['font-family']:
|
||||
istate.family = 'monospace'
|
||||
elif 'sans-serif' in style['font-family']:
|
||||
istate.family = 'sans-serif'
|
||||
else:
|
||||
istate.family = 'serif'
|
||||
valign = style['vertical-align']
|
||||
if valign in ('super', 'text-top') or asfloat(valign) > 0:
|
||||
istate.valign = 'super'
|
||||
elif valign == 'sub' or asfloat(valign) < 0:
|
||||
istate.valign = 'sub'
|
||||
else:
|
||||
istate.valign = 'baseline'
|
||||
if 'id' in elem.attrib:
|
||||
istate.ids.add(elem.attrib['id'])
|
||||
if 'name' in elem.attrib:
|
||||
istate.ids.add(elem.attrib['name'])
|
||||
if tag == 'a' and 'href' in elem.attrib:
|
||||
istate.href = elem.attrib['href']
|
||||
istate.attrib.clear()
|
||||
if tag == 'img' and 'src' in elem.attrib:
|
||||
istate.attrib['src'] = elem.attrib['src']
|
||||
istate.attrib['align'] = 'baseline'
|
||||
for prop in ('width', 'height'):
|
||||
if style[prop] != 'auto':
|
||||
value = style[prop]
|
||||
if value == getattr(self.profile, prop):
|
||||
result = '100%'
|
||||
else:
|
||||
ems = int(round(value / self.profile.fbase))
|
||||
result = "%dem" % ems
|
||||
istate.attrib[prop] = result
|
||||
elif tag == 'hr' and asfloat(style['width']) > 0:
|
||||
prop = style['width'] / self.profile.width
|
||||
istate.attrib['width'] = "%d%%" % int(round(prop * 100))
|
||||
elif display == 'table':
|
||||
tag = 'table'
|
||||
elif display == 'table-row':
|
||||
tag = 'tr'
|
||||
elif display == 'table-cell':
|
||||
tag = 'td'
|
||||
text = None
|
||||
if elem.text:
|
||||
if istate.preserve:
|
||||
text = elem.text
|
||||
elif len(elem) > 0 and elem.text.isspace():
|
||||
text = None
|
||||
else:
|
||||
text = COLLAPSE.sub(' ', elem.text)
|
||||
if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
|
||||
self.mobimlize_content(tag, text, bstate, istates)
|
||||
for child in elem:
|
||||
self.mobimlize_elem(child, stylizer, bstate, istates)
|
||||
tail = None
|
||||
if child.tail:
|
||||
if istate.preserve:
|
||||
tail = child.tail
|
||||
elif bstate.para is None and child.tail.isspace():
|
||||
tail = None
|
||||
else:
|
||||
tail = COLLAPSE.sub(' ', child.tail)
|
||||
if tail:
|
||||
self.mobimlize_content(tag, tail, bstate, istates)
|
||||
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
|
||||
bstate.pbreak = True
|
||||
if isblock:
|
||||
para = bstate.para
|
||||
if para is not None and para.text == u'\xa0':
|
||||
para.getparent().replace(para, etree.Element(XHTML('br')))
|
||||
bstate.para = None
|
||||
bstate.istate = None
|
||||
vmargin = asfloat(style['margin-bottom'])
|
||||
bstate.vmargin = max((bstate.vmargin, vmargin))
|
||||
vpadding = asfloat(style['padding-bottom'])
|
||||
if vpadding > 0:
|
||||
bstate.vpadding += bstate.vmargin
|
||||
bstate.vmargin = 0
|
||||
bstate.vpadding += vpadding
|
||||
if tag in NESTABLE_TAGS and bstate.nested:
|
||||
bstate.nested.pop()
|
||||
istates.pop()
|
@ -2,7 +2,11 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from cStringIO import StringIO
|
||||
from struct import pack
|
||||
|
||||
COUNT_BITS = 3
|
||||
|
||||
@ -32,3 +36,53 @@ def decompress_doc(data):
|
||||
|
||||
return ''.join([chr(i) for i in res])
|
||||
|
||||
def compress_doc(data):
|
||||
out = StringIO()
|
||||
i = 0
|
||||
ldata = len(data)
|
||||
while i < ldata:
|
||||
if i > 10 and (ldata - i) > 10:
|
||||
chunk = ''
|
||||
match = -1
|
||||
for j in xrange(10, 2, -1):
|
||||
chunk = data[i:i+j]
|
||||
try:
|
||||
match = data.rindex(chunk, 0, i)
|
||||
except ValueError:
|
||||
continue
|
||||
if (i - match) <= 2047:
|
||||
break
|
||||
match = -1
|
||||
if match >= 0:
|
||||
n = len(chunk)
|
||||
m = i - match
|
||||
code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3)
|
||||
out.write(pack('>H', code))
|
||||
i += n
|
||||
continue
|
||||
ch = data[i]
|
||||
och = ord(ch)
|
||||
i += 1
|
||||
if ch == ' ' and (i + 1) < ldata:
|
||||
onch = ord(data[i])
|
||||
if onch >= 0x40 and onch < 0x80:
|
||||
out.write(pack('>B', onch ^ 0x80))
|
||||
i += 1
|
||||
continue
|
||||
if och == 0 or (och > 8 and och < 0x80):
|
||||
out.write(ch)
|
||||
else:
|
||||
j = i
|
||||
binseq = [ch]
|
||||
while j < ldata and len(binseq) < 8:
|
||||
ch = data[j]
|
||||
och = ord(ch)
|
||||
if och == 0 or (och > 8 and och < 0x80):
|
||||
break
|
||||
binseq.append(ch)
|
||||
j += 1
|
||||
out.write(pack('>B', len(binseq)))
|
||||
out.write(''.join(binseq))
|
||||
i += len(binseq) - 1
|
||||
return out.getvalue()
|
||||
|
||||
|
583
src/calibre/ebooks/mobi/writer.py
Normal file
583
src/calibre/ebooks/mobi/writer.py
Normal file
@ -0,0 +1,583 @@
|
||||
'''
|
||||
Write content to Mobipocket books.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from struct import pack
|
||||
import functools
|
||||
import time
|
||||
import random
|
||||
from cStringIO import StringIO
|
||||
import re
|
||||
from itertools import izip, count
|
||||
from collections import defaultdict
|
||||
from urlparse import urldefrag
|
||||
import logging
|
||||
from lxml import etree
|
||||
from PIL import Image
|
||||
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
||||
OEB_RASTER_IMAGES
|
||||
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
|
||||
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
|
||||
from calibre.customize.ui import run_plugins_on_postprocess
|
||||
from calibre.utils.config import OptionParser
|
||||
from optparse import OptionGroup
|
||||
|
||||
# TODO:
|
||||
# - Allow override CSS (?)
|
||||
# - Generate index records
|
||||
# - Optionally rasterize tables
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
'publisher': 101,
|
||||
'description': 103,
|
||||
'identifier': 104,
|
||||
'subject': 105,
|
||||
'date': 106,
|
||||
'review': 107,
|
||||
'contributor': 108,
|
||||
'rights': 109,
|
||||
'type': 111,
|
||||
'source': 112,
|
||||
'title': 503,
|
||||
}
|
||||
|
||||
RECORD_SIZE = 0x1000
|
||||
|
||||
UNCOMPRESSED = 1
|
||||
PALMDOC = 2
|
||||
HUFFDIC = 17480
|
||||
|
||||
PALM_MAX_IMAGE_SIZE = 63 * 1024
|
||||
OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
|
||||
MAX_THUMB_SIZE = 16 * 1024
|
||||
MAX_THUMB_DIMEN = (180, 240)
|
||||
|
||||
def encode(data):
|
||||
return data.encode('utf-8')
|
||||
|
||||
# Almost like the one for MS LIT, but not quite.
|
||||
DECINT_FORWARD = 0
|
||||
DECINT_BACKWARD = 1
|
||||
def decint(value, direction):
|
||||
bytes = []
|
||||
while True:
|
||||
b = value & 0x7f
|
||||
value >>= 7
|
||||
bytes.append(b)
|
||||
if value == 0:
|
||||
break
|
||||
if direction == DECINT_FORWARD:
|
||||
bytes[0] |= 0x80
|
||||
elif direction == DECINT_BACKWARD:
|
||||
bytes[-1] |= 0x80
|
||||
return ''.join(chr(b) for b in reversed(bytes))
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||
|
||||
def __init__(self, oeb, images):
|
||||
self.oeb = oeb
|
||||
self.images = images
|
||||
self.id_offsets = {}
|
||||
self.href_offsets = defaultdict(list)
|
||||
self.breaks = []
|
||||
buffer = self.buffer = StringIO()
|
||||
buffer.write('<html>')
|
||||
self.serialize_head()
|
||||
self.serialize_body()
|
||||
buffer.write('</html>')
|
||||
self.fixup_links()
|
||||
self.text = buffer.getvalue()
|
||||
|
||||
def serialize_head(self):
|
||||
buffer = self.buffer
|
||||
buffer.write('<head>')
|
||||
if len(self.oeb.guide) > 0:
|
||||
self.serialize_guide()
|
||||
buffer.write('</head>')
|
||||
|
||||
def serialize_guide(self):
|
||||
buffer = self.buffer
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
buffer.write('<guide>')
|
||||
for ref in self.oeb.guide.values():
|
||||
path, frag = urldefrag(ref.href)
|
||||
if hrefs[path].media_type not in OEB_DOCS:
|
||||
continue
|
||||
buffer.write('<reference type="')
|
||||
self.serialize_text(ref.type, quot=True)
|
||||
buffer.write('" ')
|
||||
if ref.title is not None:
|
||||
buffer.write('title="')
|
||||
self.serialize_text(ref.title, quot=True)
|
||||
buffer.write('" ')
|
||||
self.serialize_href(ref.href)
|
||||
# Space required or won't work, I kid you not
|
||||
buffer.write(' />')
|
||||
buffer.write('</guide>')
|
||||
|
||||
def serialize_href(self, href, base=None):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
path, frag = urldefrag(href)
|
||||
if path and base:
|
||||
path = base.abshref(path)
|
||||
if path and path not in hrefs:
|
||||
return False
|
||||
buffer = self.buffer
|
||||
item = hrefs[path] if path else None
|
||||
if item and item.spine_position is None:
|
||||
return False
|
||||
id = item.id if item else base.id
|
||||
href = '#'.join((id, frag)) if frag else id
|
||||
buffer.write('filepos=')
|
||||
self.href_offsets[href].append(buffer.tell())
|
||||
buffer.write('0000000000')
|
||||
return True
|
||||
|
||||
def serialize_body(self):
|
||||
buffer = self.buffer
|
||||
buffer.write('<body>')
|
||||
# CybookG3 'Start Reading' link
|
||||
if 'text' in self.oeb.guide:
|
||||
href = self.oeb.guide['text'].href
|
||||
buffer.write('<a ')
|
||||
self.serialize_href(href)
|
||||
buffer.write(' />')
|
||||
spine = [item for item in self.oeb.spine if item.linear]
|
||||
spine.extend([item for item in self.oeb.spine if not item.linear])
|
||||
for item in spine:
|
||||
self.serialize_item(item)
|
||||
buffer.write('</body>')
|
||||
|
||||
def serialize_item(self, item):
|
||||
buffer = self.buffer
|
||||
if not item.linear:
|
||||
self.breaks.append(buffer.tell() - 1)
|
||||
self.id_offsets[item.id] = buffer.tell()
|
||||
for elem in item.data.find(XHTML('body')):
|
||||
self.serialize_elem(elem, item)
|
||||
buffer.write('<mbp:pagebreak/>')
|
||||
|
||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||
buffer = self.buffer
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
or namespace(elem.tag) not in nsrmap:
|
||||
return
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
tag = prefixname(elem.tag, nsrmap)
|
||||
for attr in ('name', 'id'):
|
||||
if attr in elem.attrib:
|
||||
id = '#'.join((item.id, elem.attrib[attr]))
|
||||
self.id_offsets[id] = buffer.tell()
|
||||
del elem.attrib[attr]
|
||||
if tag == 'a' and not elem.attrib \
|
||||
and not len(elem) and not elem.text:
|
||||
return
|
||||
buffer.write('<')
|
||||
buffer.write(tag)
|
||||
if elem.attrib:
|
||||
for attr, val in elem.attrib.items():
|
||||
if namespace(attr) not in nsrmap:
|
||||
continue
|
||||
attr = prefixname(attr, nsrmap)
|
||||
buffer.write(' ')
|
||||
if attr == 'href':
|
||||
if self.serialize_href(val, item):
|
||||
continue
|
||||
elif attr == 'src':
|
||||
href = item.abshref(val)
|
||||
if href in hrefs:
|
||||
index = self.images[href]
|
||||
buffer.write('recindex="%05d"' % index)
|
||||
continue
|
||||
buffer.write(attr)
|
||||
buffer.write('="')
|
||||
self.serialize_text(val, quot=True)
|
||||
buffer.write('"')
|
||||
if elem.text or len(elem) > 0:
|
||||
buffer.write('>')
|
||||
if elem.text:
|
||||
self.serialize_text(elem.text)
|
||||
for child in elem:
|
||||
self.serialize_elem(child, item)
|
||||
if child.tail:
|
||||
self.serialize_text(child.tail)
|
||||
buffer.write('</%s>' % tag)
|
||||
else:
|
||||
buffer.write('/>')
|
||||
|
||||
def serialize_text(self, text, quot=False):
|
||||
text = text.replace('&', '&')
|
||||
text = text.replace('<', '<')
|
||||
text = text.replace('>', '>')
|
||||
if quot:
|
||||
text = text.replace('"', '"')
|
||||
self.buffer.write(encode(text))
|
||||
|
||||
def fixup_links(self):
|
||||
buffer = self.buffer
|
||||
for id, hoffs in self.href_offsets.items():
|
||||
ioff = self.id_offsets[id]
|
||||
for hoff in hoffs:
|
||||
buffer.seek(hoff)
|
||||
buffer.write('%010d' % ioff)
|
||||
|
||||
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def __init__(self, compression=None, imagemax=None):
|
||||
self._compression = compression or UNCOMPRESSED
|
||||
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
||||
|
||||
def dump(self, oeb, path):
|
||||
if hasattr(path, 'write'):
|
||||
return self._dump_stream(oeb, path)
|
||||
with open(path, 'w+b') as stream:
|
||||
return self._dump_stream(oeb, stream)
|
||||
|
||||
def _write(self, *data):
|
||||
for datum in data:
|
||||
self._stream.write(datum)
|
||||
|
||||
def _tell(self):
|
||||
return self._stream.tell()
|
||||
|
||||
def _dump_stream(self, oeb, stream):
|
||||
self._oeb = oeb
|
||||
self._stream = stream
|
||||
self._records = [None]
|
||||
self._generate_content()
|
||||
self._generate_record0()
|
||||
self._write_header()
|
||||
self._write_content()
|
||||
|
||||
def _generate_content(self):
|
||||
self._map_image_names()
|
||||
self._generate_text()
|
||||
self._generate_images()
|
||||
|
||||
def _map_image_names(self):
|
||||
index = 1
|
||||
self._images = images = {}
|
||||
for item in self._oeb.manifest.values():
|
||||
if item.media_type in OEB_RASTER_IMAGES:
|
||||
images[item.href] = index
|
||||
index += 1
|
||||
|
||||
def _read_text_record(self, text):
|
||||
pos = text.tell()
|
||||
text.seek(0, 2)
|
||||
npos = min((pos + RECORD_SIZE, text.tell()))
|
||||
last = ''
|
||||
while not last.decode('utf-8', 'ignore'):
|
||||
size = len(last) + 1
|
||||
text.seek(npos - size)
|
||||
last = text.read(size)
|
||||
extra = 0
|
||||
try:
|
||||
last.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
prev = len(last)
|
||||
while True:
|
||||
text.seek(npos - prev)
|
||||
last = text.read(len(last) + 1)
|
||||
try:
|
||||
last.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
extra = len(last) - prev
|
||||
text.seek(pos)
|
||||
data = text.read(RECORD_SIZE)
|
||||
overlap = text.read(extra)
|
||||
text.seek(npos)
|
||||
return data, overlap
|
||||
|
||||
def _generate_text(self):
|
||||
self._oeb.logger.info('Serializing markup content...')
|
||||
serializer = Serializer(self._oeb, self._images)
|
||||
breaks = serializer.breaks
|
||||
text = serializer.text
|
||||
self._text_length = len(text)
|
||||
text = StringIO(text)
|
||||
nrecords = 0
|
||||
offset = 0
|
||||
if self._compression != UNCOMPRESSED:
|
||||
self._oeb.logger.info('Compressing markup content...')
|
||||
data, overlap = self._read_text_record(text)
|
||||
while len(data) > 0:
|
||||
if self._compression == PALMDOC:
|
||||
data = compress_doc(data)
|
||||
record = StringIO()
|
||||
record.write(data)
|
||||
record.write(overlap)
|
||||
record.write(pack('>B', len(overlap)))
|
||||
nextra = 0
|
||||
pbreak = 0
|
||||
running = offset
|
||||
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
||||
pbreak = (breaks.pop(0) - running) >> 3
|
||||
encoded = decint(pbreak, DECINT_FORWARD)
|
||||
record.write(encoded)
|
||||
running += pbreak << 3
|
||||
nextra += len(encoded)
|
||||
lsize = 1
|
||||
while True:
|
||||
size = decint(nextra + lsize, DECINT_BACKWARD)
|
||||
if len(size) == lsize:
|
||||
break
|
||||
lsize += 1
|
||||
record.write(size)
|
||||
self._records.append(record.getvalue())
|
||||
nrecords += 1
|
||||
offset += RECORD_SIZE
|
||||
data, overlap = self._read_text_record(text)
|
||||
self._text_nrecords = nrecords
|
||||
|
||||
def _rescale_image(self, data, maxsizeb, dimen=None):
|
||||
image = Image.open(StringIO(data))
|
||||
format = image.format
|
||||
changed = False
|
||||
if image.format not in ('JPEG', 'GIF'):
|
||||
width, height = image.size
|
||||
area = width * height
|
||||
format = 'GIF' if area <= 40000 else 'JPEG'
|
||||
changed = True
|
||||
if dimen is not None:
|
||||
image.thumbnail(dimen, Image.ANTIALIAS)
|
||||
changed = True
|
||||
if changed:
|
||||
data = StringIO()
|
||||
image.save(data, format)
|
||||
data = data.getvalue()
|
||||
if len(data) <= maxsizeb:
|
||||
return data
|
||||
image = image.convert('RGBA')
|
||||
for quality in xrange(95, -1, -1):
|
||||
data = StringIO()
|
||||
image.save(data, 'JPEG', quality=quality)
|
||||
data = data.getvalue()
|
||||
if len(data) <= maxsizeb:
|
||||
return data
|
||||
width, height = image.size
|
||||
for scale in xrange(99, 0, -1):
|
||||
scale = scale / 100.
|
||||
data = StringIO()
|
||||
scaled = image.copy()
|
||||
size = (int(width * scale), (height * scale))
|
||||
scaled.thumbnail(size, Image.ANTIALIAS)
|
||||
scaled.save(data, 'JPEG', quality=0)
|
||||
data = data.getvalue()
|
||||
if len(data) <= maxsizeb:
|
||||
return data
|
||||
# Well, we tried?
|
||||
return data
|
||||
|
||||
def _generate_images(self):
|
||||
self._oeb.logger.warn('Serializing images...')
|
||||
images = [(index, href) for href, index in self._images.items()]
|
||||
images.sort()
|
||||
metadata = self._oeb.metadata
|
||||
coverid = metadata.cover[0] if metadata.cover else None
|
||||
for _, href in images:
|
||||
item = self._oeb.manifest.hrefs[href]
|
||||
data = self._rescale_image(item.data, self._imagemax)
|
||||
self._records.append(data)
|
||||
|
||||
def _generate_record0(self):
|
||||
metadata = self._oeb.metadata
|
||||
exth = self._build_exth()
|
||||
record0 = StringIO()
|
||||
record0.write(pack('>HHIHHHH', self._compression, 0,
|
||||
self._text_length, self._text_nrecords, RECORD_SIZE, 0, 0))
|
||||
uid = random.randint(0, 0xffffffff)
|
||||
title = str(metadata.title[0])
|
||||
record0.write('MOBI')
|
||||
record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 6))
|
||||
record0.write('\xff' * 40)
|
||||
record0.write(pack('>I', self._text_nrecords + 1))
|
||||
record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))
|
||||
record0.write(iana2mobi(str(metadata.language[0])))
|
||||
record0.write('\0' * 8)
|
||||
record0.write(pack('>II', 6, self._text_nrecords + 1))
|
||||
record0.write('\0' * 16)
|
||||
record0.write(pack('>I', 0x50))
|
||||
record0.write('\0' * 32)
|
||||
record0.write(pack('>IIII', 0xffffffff, 0xffffffff, 0, 0))
|
||||
# The '5' is a bitmask of extra record data at the end:
|
||||
# - 0x1: <extra multibyte bytes><size> (?)
|
||||
# - 0x4: <uncrossable breaks><size>
|
||||
# Of course, the formats aren't quite the same.
|
||||
# TODO: What the hell are the rest of these fields?
|
||||
record0.write(pack('>IIIIIIIIIIIIIIIII',
|
||||
0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff,
|
||||
0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 5, 0xffffffff))
|
||||
record0.write(exth)
|
||||
record0.write(title)
|
||||
record0 = record0.getvalue()
|
||||
self._records[0] = record0 + ('\0' * (2452 - len(record0)))
|
||||
|
||||
def _build_exth(self):
|
||||
oeb = self._oeb
|
||||
exth = StringIO()
|
||||
nrecs = 0
|
||||
for term in oeb.metadata:
|
||||
if term not in EXTH_CODES: continue
|
||||
code = EXTH_CODES[term]
|
||||
for item in oeb.metadata[term]:
|
||||
data = self.COLLAPSE_RE.sub(' ', unicode(item))
|
||||
data = data.encode('utf-8')
|
||||
exth.write(pack('>II', code, len(data) + 8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
if oeb.metadata.cover:
|
||||
id = str(oeb.metadata.cover[0])
|
||||
item = oeb.manifest.ids[id]
|
||||
href = item.href
|
||||
index = self._images[href] - 1
|
||||
exth.write(pack('>III', 0xc9, 0x0c, index))
|
||||
exth.write(pack('>III', 0xcb, 0x0c, 0))
|
||||
index = self._add_thumbnail(item) - 1
|
||||
exth.write(pack('>III', 0xca, 0x0c, index))
|
||||
nrecs += 3
|
||||
exth = exth.getvalue()
|
||||
trail = len(exth) % 4
|
||||
pad = '' if not trail else '\0' * (4 - trail)
|
||||
exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad]
|
||||
return ''.join(exth)
|
||||
|
||||
def _add_thumbnail(self, item):
|
||||
data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
|
||||
manifest = self._oeb.manifest
|
||||
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
|
||||
manifest.add(id, href, 'image/jpeg', data=data)
|
||||
index = len(self._images) + 1
|
||||
self._images[href] = index
|
||||
self._records.append(data)
|
||||
return index
|
||||
|
||||
def _write_header(self):
|
||||
title = str(self._oeb.metadata.title[0])
|
||||
title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32]
|
||||
title = title + ('\0' * (32 - len(title)))
|
||||
now = int(time.time())
|
||||
nrecords = len(self._records)
|
||||
self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
|
||||
'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
|
||||
offset = self._tell() + (8 * nrecords) + 2
|
||||
for id, record in izip(count(), self._records):
|
||||
self._write(pack('>I', offset), '\0', pack('>I', id)[1:])
|
||||
offset += len(record)
|
||||
self._write('\0\0')
|
||||
|
||||
def _write_content(self):
|
||||
for record in self._records:
|
||||
self._write(record)
|
||||
|
||||
|
||||
def add_mobi_options(parser):
|
||||
profiles = Context.PROFILES.keys()
|
||||
profiles.sort()
|
||||
profiles = ', '.join(profiles)
|
||||
group = OptionGroup(parser, _('Mobipocket'),
|
||||
_('Mobipocket-specific options.'))
|
||||
group.add_option(
|
||||
'-c', '--compress', default=False, action='store_true',
|
||||
help=_('Compress file text using PalmDOC compression.'))
|
||||
group.add_option(
|
||||
'-r', '--rescale-images', default=False, action='store_true',
|
||||
help=_('Modify images to meet Palm device size limitations.'))
|
||||
parser.add_option_group(group)
|
||||
group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
|
||||
'Affects conversion of default font sizes and rasterization '
|
||||
'resolution. Valid profiles are: %s.') % profiles)
|
||||
group.add_option(
|
||||
'--source-profile', default='Browser', metavar='PROFILE',
|
||||
help=_("Source renderer profile. Default is 'Browser'."))
|
||||
group.add_option(
|
||||
'--dest-profile', default='CybookG3', metavar='PROFILE',
|
||||
help=_("Destination renderer profile. Default is 'CybookG3'."))
|
||||
parser.add_option_group(group)
|
||||
return
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
|
||||
parser.add_option(
|
||||
'-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
parser.add_option(
|
||||
'-v', '--verbose', default=0, action='count',
|
||||
help=_('Useful for debugging.'))
|
||||
add_mobi_options(parser)
|
||||
return parser
|
||||
|
||||
def oeb2mobi(opts, inpath):
|
||||
logger = Logger(logging.getLogger('oeb2mobi'))
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
outpath = opts.output
|
||||
if outpath is None:
|
||||
outpath = os.path.basename(inpath)
|
||||
outpath = os.path.splitext(outpath)[0] + '.mobi'
|
||||
source = opts.source_profile
|
||||
if source not in Context.PROFILES:
|
||||
logger.error(_('Unknown source profile %r') % source)
|
||||
return 1
|
||||
dest = opts.dest_profile
|
||||
if dest not in Context.PROFILES:
|
||||
logger.error(_('Unknown destination profile %r') % dest)
|
||||
return 1
|
||||
compression = PALMDOC if opts.compress else UNCOMPRESSED
|
||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||
context = Context(source, dest)
|
||||
oeb = OEBBook(inpath, logger=logger)
|
||||
tocadder = HTMLTOCAdder()
|
||||
tocadder.transform(oeb, context)
|
||||
mangler = CaseMangler()
|
||||
mangler.transform(oeb, context)
|
||||
fbase = context.dest.fbase
|
||||
fkey = context.dest.fnums.values()
|
||||
flattener = CSSFlattener(
|
||||
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
|
||||
flattener.transform(oeb, context)
|
||||
rasterizer = SVGRasterizer()
|
||||
rasterizer.transform(oeb, context)
|
||||
trimmer = ManifestTrimmer()
|
||||
trimmer.transform(oeb, context)
|
||||
mobimlizer = MobiMLizer()
|
||||
mobimlizer.transform(oeb, context)
|
||||
writer = MobiWriter(compression=compression, imagemax=imagemax)
|
||||
writer.dump(oeb, outpath)
|
||||
run_plugins_on_postprocess(outpath, 'mobi')
|
||||
logger.info(_('Output written to ') + outpath)
|
||||
|
||||
def main(argv=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(argv[1:])
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
return 1
|
||||
inpath = args[0]
|
||||
retval = oeb2mobi(opts, inpath)
|
||||
return retval
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
2
src/calibre/ebooks/oeb/__init__.py
Normal file
2
src/calibre/ebooks/oeb/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
@ -14,10 +14,14 @@ from itertools import izip, count
|
||||
from urlparse import urldefrag, urlparse, urlunparse
|
||||
from urllib import unquote as urlunquote
|
||||
import logging
|
||||
import re
|
||||
import htmlentitydefs
|
||||
import uuid
|
||||
import copy
|
||||
from lxml import etree
|
||||
from calibre import LoggingInterface
|
||||
|
||||
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
|
||||
XML_PARSER = etree.XMLParser(recover=True)
|
||||
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
||||
@ -28,25 +32,48 @@ DC11_NS = 'http://purl.org/dc/elements/1.1/'
|
||||
XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
|
||||
DCTERMS_NS = 'http://purl.org/dc/terms/'
|
||||
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
|
||||
SVG_NS = 'http://www.w3.org/2000/svg'
|
||||
XLINK_NS = 'http://www.w3.org/1999/xlink'
|
||||
XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
|
||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
|
||||
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS,
|
||||
'svg': SVG_NS, 'xl': XLINK_NS}
|
||||
|
||||
def XML(name): return '{%s}%s' % (XML_NS, name)
|
||||
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
||||
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
||||
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
||||
def NCX(name): return '{%s}%s' % (NCX_NS, name)
|
||||
def SVG(name): return '{%s}%s' % (SVG_NS, name)
|
||||
def XLINK(name): return '{%s}%s' % (XLINK_NS, name)
|
||||
|
||||
EPUB_MIME = 'application/epub+zip'
|
||||
XHTML_MIME = 'application/xhtml+xml'
|
||||
CSS_MIME = 'text/css'
|
||||
NCX_MIME = 'application/x-dtbncx+xml'
|
||||
OPF_MIME = 'application/oebps-package+xml'
|
||||
OEB_DOC_MIME = 'text/x-oeb1-document'
|
||||
OEB_CSS_MIME = 'text/x-oeb1-css'
|
||||
OPENTYPE_MIME = 'font/opentype'
|
||||
GIF_MIME = 'image/gif'
|
||||
JPEG_MIME = 'image/jpeg'
|
||||
PNG_MIME = 'image/png'
|
||||
SVG_MIME = 'image/svg+xml'
|
||||
|
||||
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
|
||||
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
|
||||
OEB_RASTER_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME])
|
||||
OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])
|
||||
|
||||
MS_COVER_TYPE = 'other.ms-coverimage-standard'
|
||||
|
||||
recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace')
|
||||
ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items())
|
||||
del ENTITYDEFS['lt']
|
||||
del ENTITYDEFS['gt']
|
||||
del ENTITYDEFS['quot']
|
||||
del ENTITYDEFS['amp']
|
||||
del recode
|
||||
|
||||
|
||||
def element(parent, *args, **kwargs):
|
||||
@ -64,10 +91,23 @@ def barename(name):
|
||||
return name.split('}', 1)[1]
|
||||
return name
|
||||
|
||||
def prefixname(name, nsrmap):
|
||||
prefix = nsrmap[namespace(name)]
|
||||
if not prefix:
|
||||
return barename(name)
|
||||
return ':'.join((prefix, barename(name)))
|
||||
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
URL_UNSAFE = r"""`!@#$%^&*[](){}?+=;:'",<>\| """
|
||||
def xml2str(root):
|
||||
return etree.tostring(root, encoding='utf-8', xml_declaration=True)
|
||||
|
||||
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||
URL_SAFE = set(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
u'abcdefghijklmnopqrstuvwxyz'
|
||||
u'0123456789' u'_.-/~')
|
||||
URL_UNSAFE = ASCII_CHARS - URL_SAFE
|
||||
def urlquote(href):
|
||||
result = []
|
||||
for char in href:
|
||||
@ -84,12 +124,20 @@ def urlnormalize(href):
|
||||
return urlunparse(parts)
|
||||
|
||||
|
||||
class OEBError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FauxLogger(object):
|
||||
def __getattr__(self, name):
|
||||
return self
|
||||
def __call__(self, message):
|
||||
print message
|
||||
|
||||
class Logger(LoggingInterface, object):
|
||||
def __getattr__(self, name):
|
||||
return object.__getattribute__(self, 'log_' + name)
|
||||
|
||||
|
||||
class AbstractContainer(object):
|
||||
def read_xml(self, path):
|
||||
@ -108,25 +156,45 @@ class DirContainer(AbstractContainer):
|
||||
|
||||
def write(self, path, data):
|
||||
path = os.path.join(self.rootdir, path)
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
with open(urlunquote(path), 'wb') as f:
|
||||
return f.write(data)
|
||||
|
||||
def exists(self, path):
|
||||
path = os.path.join(self.rootdir, path)
|
||||
return os.path.isfile(path)
|
||||
return os.path.isfile(urlunquote(path))
|
||||
|
||||
class DirWriter(object):
|
||||
def __init__(self, version=2.0):
|
||||
self.version = version
|
||||
|
||||
def dump(self, oeb, path):
|
||||
if not os.path.isdir(path):
|
||||
os.mkdir(path)
|
||||
output = DirContainer(path)
|
||||
for item in oeb.manifest.values():
|
||||
output.write(item.href, str(item))
|
||||
metadata = oeb.to_opf2() if self.version == 2 else oeb.to_opf1()
|
||||
for href, data in metadata.values():
|
||||
output.write(href, xml2str(data))
|
||||
return
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
||||
'format', 'identifier', 'language', 'publisher', 'relation',
|
||||
'rights', 'source', 'subject', 'title', 'type'])
|
||||
ATTRS = set(['role', 'file-as', 'scheme'])
|
||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||
'xsi': XSI_NS}
|
||||
|
||||
class Item(object):
|
||||
def __init__(self, term, value, fq_attrib={}):
|
||||
self.fq_attrib = dict(fq_attrib)
|
||||
def __init__(self, term, value, fq_attrib={}, **kwargs):
|
||||
self.fq_attrib = fq_attrib = dict(fq_attrib)
|
||||
fq_attrib.update(kwargs)
|
||||
if term == OPF('meta') and not value:
|
||||
term = self.fq_attrib.pop('name')
|
||||
value = self.fq_attrib.pop('content')
|
||||
@ -136,6 +204,11 @@ class Metadata(object):
|
||||
self.value = value
|
||||
self.attrib = attrib = {}
|
||||
for fq_attr in fq_attrib:
|
||||
if fq_attr in Metadata.ATTRS:
|
||||
attr = fq_attr
|
||||
fq_attr = OPF2(fq_attr)
|
||||
fq_attrib[fq_attr] = fq_attrib.pop(attr)
|
||||
else:
|
||||
attr = barename(fq_attr)
|
||||
attrib[attr] = fq_attrib[fq_attr]
|
||||
|
||||
@ -153,7 +226,7 @@ class Metadata(object):
|
||||
% (barename(self.term), self.value, self.attrib)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
return unicode(self.value).encode('ascii', 'xmlcharrefreplace')
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.value)
|
||||
@ -183,8 +256,8 @@ class Metadata(object):
|
||||
self.oeb = oeb
|
||||
self.items = defaultdict(list)
|
||||
|
||||
def add(self, term, value, attrib={}):
|
||||
item = self.Item(term, value, attrib)
|
||||
def add(self, term, value, attrib={}, **kwargs):
|
||||
item = self.Item(term, value, attrib, **kwargs)
|
||||
items = self.items[barename(item.term)]
|
||||
items.append(item)
|
||||
return item
|
||||
@ -225,7 +298,11 @@ class Metadata(object):
|
||||
|
||||
class Manifest(object):
|
||||
class Item(object):
|
||||
def __init__(self, id, href, media_type, fallback=None, loader=str):
|
||||
ENTITY_RE = re.compile(r'&([a-zA-Z_:][a-zA-Z0-9.-_:]+);')
|
||||
NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)')
|
||||
|
||||
def __init__(self, id, href, media_type,
|
||||
fallback=None, loader=str, data=None):
|
||||
self.id = id
|
||||
self.href = self.path = urlnormalize(href)
|
||||
self.media_type = media_type
|
||||
@ -233,26 +310,32 @@ class Manifest(object):
|
||||
self.spine_position = None
|
||||
self.linear = True
|
||||
self._loader = loader
|
||||
self._data = None
|
||||
self._data = data
|
||||
|
||||
def __repr__(self):
|
||||
return 'Item(id=%r, href=%r, media_type=%r)' \
|
||||
% (self.id, self.href, self.media_type)
|
||||
|
||||
def data():
|
||||
def fget(self):
|
||||
if self._data:
|
||||
return self._data
|
||||
data = self._loader(self.href)
|
||||
if self.media_type == XHTML_MIME:
|
||||
def _force_xhtml(self, data):
|
||||
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
|
||||
data = self.ENTITY_RE.sub(repl, data)
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
if namespace(data.tag) != XHTML_NS:
|
||||
data.attrib['xmlns'] = XHTML_NS
|
||||
data = etree.tostring(data)
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
elif self.media_type.startswith('application/') \
|
||||
and self.media_type.endswith('+xml'):
|
||||
return data
|
||||
|
||||
def data():
|
||||
def fget(self):
|
||||
if self._data is not None:
|
||||
return self._data
|
||||
data = self._loader(self.href)
|
||||
if self.media_type in OEB_DOCS:
|
||||
data = self._force_xhtml(data)
|
||||
elif self.media_type[-4:] in ('+xml', '/xml'):
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
self._data = data
|
||||
return data
|
||||
def fset(self, value):
|
||||
self._data = value
|
||||
@ -261,11 +344,47 @@ class Manifest(object):
|
||||
return property(fget, fset, fdel)
|
||||
data = data()
|
||||
|
||||
def __str__(self):
|
||||
data = self.data
|
||||
if isinstance(data, etree._Element):
|
||||
return xml2str(data)
|
||||
return str(data)
|
||||
|
||||
def __eq__(self, other):
|
||||
return id(self) == id(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __cmp__(self, other):
|
||||
result = cmp(self.spine_position, other.spine_position)
|
||||
if result != 0:
|
||||
return result
|
||||
return cmp(self.id, other.id)
|
||||
smatch = self.NUM_RE.search(self.href)
|
||||
sref = smatch.group(1) if smatch else self.href
|
||||
snum = float(smatch.group(2)) if smatch else 0.0
|
||||
skey = (sref, snum, self.id)
|
||||
omatch = self.NUM_RE.search(other.href)
|
||||
oref = omatch.group(1) if omatch else other.href
|
||||
onum = float(omatch.group(2)) if omatch else 0.0
|
||||
okey = (oref, onum, other.id)
|
||||
return cmp(skey, okey)
|
||||
|
||||
def relhref(self, href):
|
||||
if '/' not in self.href:
|
||||
return href
|
||||
base = os.path.dirname(self.href).split('/')
|
||||
target, frag = urldefrag(href)
|
||||
target = target.split('/')
|
||||
for index in xrange(min(len(base), len(target))):
|
||||
if base[index] != target[index]: break
|
||||
else:
|
||||
index += 1
|
||||
relhref = (['..'] * (len(base) - index)) + target[index:]
|
||||
relhref = '/'.join(relhref)
|
||||
if frag:
|
||||
relhref = '#'.join((relhref, frag))
|
||||
return relhref
|
||||
|
||||
def abshref(self, href):
|
||||
if '/' not in self.href:
|
||||
@ -277,42 +396,60 @@ class Manifest(object):
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.items = {}
|
||||
self.ids = {}
|
||||
self.hrefs = {}
|
||||
|
||||
def add(self, id, href, media_type, fallback=None):
|
||||
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
|
||||
loader = loader or self.oeb.container.read
|
||||
item = self.Item(
|
||||
id, href, media_type, fallback, self.oeb.container.read)
|
||||
self.items[item.id] = item
|
||||
id, href, media_type, fallback, loader, data)
|
||||
self.ids[item.id] = item
|
||||
self.hrefs[item.href] = item
|
||||
return item
|
||||
|
||||
def remove(self, id):
|
||||
href = self.items[id].href
|
||||
del self.items[id]
|
||||
del self.hrefs[href]
|
||||
def remove(self, item):
|
||||
if item in self.ids:
|
||||
item = self.ids[item]
|
||||
del self.ids[item.id]
|
||||
del self.hrefs[item.href]
|
||||
if item in self.oeb.spine:
|
||||
self.oeb.spine.remove(item)
|
||||
|
||||
def generate(self, id, href):
|
||||
href = urlnormalize(href)
|
||||
base = id
|
||||
index = 1
|
||||
while id in self.ids:
|
||||
id = base + str(index)
|
||||
index += 1
|
||||
base, ext = os.path.splitext(href)
|
||||
index = 1
|
||||
while href in self.hrefs:
|
||||
href = base + str(index) + ext
|
||||
index += 1
|
||||
return id, href
|
||||
|
||||
def __iter__(self):
|
||||
for id in self.items:
|
||||
for id in self.ids:
|
||||
yield id
|
||||
|
||||
def __getitem__(self, id):
|
||||
return self.items[id]
|
||||
return self.ids[id]
|
||||
|
||||
def values(self):
|
||||
for item in self.items.values():
|
||||
for item in self.ids.values():
|
||||
yield item
|
||||
|
||||
def items(self):
|
||||
for id, item in self.refs.items():
|
||||
yield id, items
|
||||
for id, item in self.ids.items():
|
||||
yield id, item
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.items
|
||||
return key in self.ids
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'manifest')
|
||||
for item in self.items.values():
|
||||
for item in self.ids.values():
|
||||
media_type = item.media_type
|
||||
if media_type == XHTML_MIME:
|
||||
media_type = OEB_DOC_MIME
|
||||
@ -327,7 +464,7 @@ class Manifest(object):
|
||||
|
||||
def to_opf2(self, parent=None):
|
||||
elem = element(parent, OPF('manifest'))
|
||||
for item in self.items.values():
|
||||
for item in self.ids.values():
|
||||
attrib = {'id': item.id, 'href': item.href,
|
||||
'media-type': item.media_type}
|
||||
if item.fallback:
|
||||
@ -341,18 +478,35 @@ class Spine(object):
|
||||
self.oeb = oeb
|
||||
self.items = []
|
||||
|
||||
def add(self, item, linear):
|
||||
def _linear(self, linear):
|
||||
if isinstance(linear, StringTypes):
|
||||
linear = linear.lower()
|
||||
if linear is None or linear in ('yes', 'true'):
|
||||
linear = True
|
||||
elif linear in ('no', 'false'):
|
||||
linear = False
|
||||
item.linear = linear
|
||||
return linear
|
||||
|
||||
def add(self, item, linear=None):
|
||||
item.linear = self._linear(linear)
|
||||
item.spine_position = len(self.items)
|
||||
self.items.append(item)
|
||||
return item
|
||||
|
||||
def insert(self, index, item, linear):
|
||||
item.linear = self._linear(linear)
|
||||
item.spine_position = index
|
||||
self.items.insert(index, item)
|
||||
for i in xrange(index, len(self.items)):
|
||||
self.items[i].spine_position = i
|
||||
return item
|
||||
|
||||
def remove(self, item):
|
||||
index = item.spine_position
|
||||
self.items.pop(index)
|
||||
for i in xrange(index, len(self.items)):
|
||||
self.items[i].spine_position = i
|
||||
|
||||
def __iter__(self):
|
||||
for item in self.items:
|
||||
yield item
|
||||
@ -385,7 +539,27 @@ class Spine(object):
|
||||
|
||||
class Guide(object):
|
||||
class Reference(object):
|
||||
_TYPES_TITLES = [('cover', 'Cover'), ('title-page', 'Title Page'),
|
||||
('toc', 'Table of Contents'), ('index', 'Index'),
|
||||
('glossary', 'Glossary'), ('acknowledgements', 'Acknowledgements'),
|
||||
('bibliography', 'Bibliography'), ('colophon', 'Colophon'),
|
||||
('copyright-page', 'Copyright'), ('dedication', 'Dedication'),
|
||||
('epigraph', 'Epigraph'), ('foreword', 'Foreword'),
|
||||
('loi', 'List of Illustrations'), ('lot', 'List of Tables'),
|
||||
('notes', 'Notes'), ('preface', 'Preface'),
|
||||
('text', 'Main Text')]
|
||||
TYPES = set(t for t, _ in _TYPES_TITLES)
|
||||
TITLES = dict(_TYPES_TITLES)
|
||||
ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0)))
|
||||
|
||||
def __init__(self, type, title, href):
|
||||
if type.lower() in self.TYPES:
|
||||
type = type.lower()
|
||||
elif type not in self.TYPES and \
|
||||
not type.startswith('other.'):
|
||||
type = 'other.' + type
|
||||
if not title:
|
||||
title = self.TITLES.get(type, None)
|
||||
self.type = type
|
||||
self.title = title
|
||||
self.href = urlnormalize(href)
|
||||
@ -394,6 +568,17 @@ class Guide(object):
|
||||
return 'Reference(type=%r, title=%r, href=%r)' \
|
||||
% (self.type, self.title, self.href)
|
||||
|
||||
def _order():
|
||||
def fget(self):
|
||||
return self.ORDER.get(self.type, self.type)
|
||||
return property(fget=fget)
|
||||
_order = _order()
|
||||
|
||||
def __cmp__(self, other):
|
||||
if not isinstance(other, Guide.Reference):
|
||||
return NotImplemented
|
||||
return cmp(self._order, other._order)
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.refs = {}
|
||||
@ -403,28 +588,32 @@ class Guide(object):
|
||||
self.refs[type] = ref
|
||||
return ref
|
||||
|
||||
def by_type(self, type):
|
||||
return self.ref_types[type]
|
||||
|
||||
def iterkeys(self):
|
||||
for type in self.refs:
|
||||
yield type
|
||||
__iter__ = iterkeys
|
||||
|
||||
def values(self):
|
||||
for ref in self.refs.values():
|
||||
yield ref
|
||||
values = list(self.refs.values())
|
||||
values.sort()
|
||||
return values
|
||||
|
||||
def items(self):
|
||||
for type, ref in self.refs.items():
|
||||
yield type, ref
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.refs[index]
|
||||
def __getitem__(self, key):
|
||||
return self.refs[key]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self.refs[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.refs
|
||||
|
||||
def __len__(self):
|
||||
return len(self.refs)
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'guide')
|
||||
for ref in self.refs.values():
|
||||
@ -457,6 +646,12 @@ class TOC(object):
|
||||
self.nodes.append(node)
|
||||
return node
|
||||
|
||||
def iterdescendants(self):
|
||||
for node in self.nodes:
|
||||
yield node
|
||||
for child in node.iterdescendants():
|
||||
yield child
|
||||
|
||||
def __iter__(self):
|
||||
for node in self.nodes:
|
||||
yield node
|
||||
@ -464,6 +659,15 @@ class TOC(object):
|
||||
def __getitem__(self, index):
|
||||
return self.nodes[index]
|
||||
|
||||
def autolayer(self):
|
||||
prev = None
|
||||
for node in list(self.nodes):
|
||||
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
|
||||
self.nodes.remove(node)
|
||||
prev.nodes.append(node)
|
||||
else:
|
||||
prev = node
|
||||
|
||||
def depth(self, level=0):
|
||||
if self.nodes:
|
||||
return self.nodes[0].depth(level+1)
|
||||
@ -496,23 +700,33 @@ class TOC(object):
|
||||
|
||||
|
||||
class OEBBook(object):
|
||||
def __init__(self, opfpath, container=None, logger=FauxLogger()):
|
||||
if not container:
|
||||
def __init__(self, opfpath=None, container=None, logger=FauxLogger()):
|
||||
if opfpath and not container:
|
||||
container = DirContainer(os.path.dirname(opfpath))
|
||||
opfpath = os.path.basename(opfpath)
|
||||
self.container = container
|
||||
self.logger = logger
|
||||
if opfpath or container:
|
||||
opf = self._read_opf(opfpath)
|
||||
self._all_from_opf(opf)
|
||||
|
||||
def _convert_opf1(self, opf):
|
||||
# Seriously, seriously wrong
|
||||
if namespace(opf.tag) == OPF1_NS:
|
||||
opf.tag = barename(opf.tag)
|
||||
for elem in opf.iterdescendants():
|
||||
if isinstance(elem.tag, basestring) \
|
||||
and namespace(elem.tag) == OPF1_NS:
|
||||
elem.tag = barename(elem.tag)
|
||||
attrib = dict(opf.attrib)
|
||||
attrib['version'] = '2.0'
|
||||
nroot = etree.Element(OPF('package'),
|
||||
nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib))
|
||||
nsmap={None: OPF2_NS}, attrib=attrib)
|
||||
metadata = etree.SubElement(nroot, OPF('metadata'),
|
||||
nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
|
||||
'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
|
||||
for prefix in ('d11', 'd10', 'd09'):
|
||||
elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix)
|
||||
elements = xpath(opf, 'metadata//%s:*' % prefix)
|
||||
if elements: break
|
||||
for element in elements:
|
||||
if not element.text: continue
|
||||
@ -524,7 +738,7 @@ class OEBBook(object):
|
||||
element.attrib[nsname] = element.attrib[name]
|
||||
del element.attrib[name]
|
||||
metadata.append(element)
|
||||
for element in opf.xpath('metadata/x-metadata/meta'):
|
||||
for element in opf.xpath('metadata//meta'):
|
||||
metadata.append(element)
|
||||
for item in opf.xpath('manifest/item'):
|
||||
media_type = item.attrib['media-type'].lower()
|
||||
@ -541,30 +755,56 @@ class OEBBook(object):
|
||||
def _read_opf(self, opfpath):
|
||||
opf = self.container.read_xml(opfpath)
|
||||
version = float(opf.get('version', 1.0))
|
||||
if version < 2.0:
|
||||
ns = namespace(opf.tag)
|
||||
if ns not in ('', OPF1_NS, OPF2_NS):
|
||||
raise OEBError('Invalid namespace %r for OPF document' % ns)
|
||||
if ns != OPF2_NS or version < 2.0:
|
||||
opf = self._convert_opf1(opf)
|
||||
return opf
|
||||
|
||||
def _metadata_from_opf(self, opf):
|
||||
uid = opf.attrib['unique-identifier']
|
||||
uid = opf.get('unique-identifier', 'calibre-uuid')
|
||||
self.uid = None
|
||||
self.metadata = metadata = Metadata(self)
|
||||
for elem in xpath(opf, '/o2:package/o2:metadata/*'):
|
||||
if elem.text or elem.attrib:
|
||||
ignored = (OPF('dc-metadata'), OPF('x-metadata'))
|
||||
for elem in xpath(opf, '/o2:package/o2:metadata//*'):
|
||||
if elem.tag not in ignored and (elem.text or elem.attrib):
|
||||
metadata.add(elem.tag, elem.text, elem.attrib)
|
||||
haveuuid = haveid = False
|
||||
for ident in metadata.identifier:
|
||||
if unicode(ident).startswith('urn:uuid:'):
|
||||
haveuuid = True
|
||||
if 'id' in ident.attrib:
|
||||
haveid = True
|
||||
if not haveuuid and haveid:
|
||||
bookid = "urn:uuid:%s" % str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='calibre-uuid')
|
||||
for item in metadata.identifier:
|
||||
if item.id == uid:
|
||||
self.uid = item
|
||||
break
|
||||
else:
|
||||
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
|
||||
self.logger.warn(u'Unique-identifier %r not found.' % uid)
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.uid = metadata.identifier[0]
|
||||
break
|
||||
if not metadata.language:
|
||||
self.logger.warn(u'Language not specified.')
|
||||
metadata.add('language', 'en')
|
||||
if not metadata.creator:
|
||||
self.logger.warn(u'Creator not specified.')
|
||||
metadata.add('creator', 'Unknown')
|
||||
if not metadata.title:
|
||||
self.logger.warn(u'Title not specified.')
|
||||
metadata.add('title', 'Unknown')
|
||||
|
||||
def _manifest_from_opf(self, opf):
|
||||
self.manifest = manifest = Manifest(self)
|
||||
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
|
||||
href = elem.get('href')
|
||||
if not self.container.exists(href):
|
||||
self.logger.log_warn(u'Manifest item %r not found.' % href)
|
||||
self.logger.warn(u'Manifest item %r not found.' % href)
|
||||
continue
|
||||
manifest.add(elem.get('id'), href, elem.get('media-type'),
|
||||
elem.get('fallback'))
|
||||
@ -574,7 +814,7 @@ class OEBBook(object):
|
||||
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
|
||||
idref = elem.get('idref')
|
||||
if idref not in self.manifest:
|
||||
self.logger.log_warn(u'Spine item %r not found.' % idref)
|
||||
self.logger.warn(u'Spine item %r not found.' % idref)
|
||||
continue
|
||||
item = self.manifest[idref]
|
||||
spine.add(item, elem.get('linear'))
|
||||
@ -593,7 +833,7 @@ class OEBBook(object):
|
||||
href = elem.get('href')
|
||||
path, frag = urldefrag(href)
|
||||
if path not in self.manifest.hrefs:
|
||||
self.logger.log_warn(u'Guide reference %r not found' % href)
|
||||
self.logger.warn(u'Guide reference %r not found' % href)
|
||||
continue
|
||||
guide.add(elem.get('type'), elem.get('title'), href)
|
||||
|
||||
@ -696,12 +936,39 @@ class OEBBook(object):
|
||||
if self._toc_from_html(opf): return
|
||||
self._toc_from_spine(opf)
|
||||
|
||||
def _ensure_cover_image(self):
|
||||
cover = None
|
||||
spine0 = self.spine[0]
|
||||
html = spine0.data
|
||||
if self.metadata.cover:
|
||||
id = str(self.metadata.cover[0])
|
||||
cover = self.manifest.ids[id]
|
||||
elif MS_COVER_TYPE in self.guide:
|
||||
href = self.guide[MS_COVER_TYPE].href
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif xpath(html, '//h:img[position()=1]'):
|
||||
img = xpath(html, '//h:img[position()=1]')[0]
|
||||
href = spine0.abshref(img.get('src'))
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif xpath(html, '//h:object[position()=1]'):
|
||||
object = xpath(html, '//h:object[position()=1]')[0]
|
||||
href = spine0.abshref(object.get('data'))
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif xpath(html, '//svg:svg[position()=1]'):
|
||||
svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0])
|
||||
href = os.path.splitext(spine0.href)[0] + '.svg'
|
||||
id, href = self.manifest.generate(spine0.id, href)
|
||||
cover = self.manifest.add(id, href, SVG_MIME, data=svg)
|
||||
if cover and not self.metadata.cover:
|
||||
self.metadata.add('cover', cover.id)
|
||||
|
||||
def _all_from_opf(self, opf):
|
||||
self._metadata_from_opf(opf)
|
||||
self._manifest_from_opf(opf)
|
||||
self._spine_from_opf(opf)
|
||||
self._guide_from_opf(opf)
|
||||
self._toc_from_opf(opf)
|
||||
self._ensure_cover_image()
|
||||
|
||||
def to_opf1(self):
|
||||
package = etree.Element('package',
|
@ -35,7 +35,8 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */
|
||||
@namespace url(http://www.w3.org/1999/xhtml);
|
||||
@namespace svg url(http://www.w3.org/2000/svg);
|
||||
|
||||
/* blocks */
|
||||
|
||||
@ -45,7 +46,6 @@ html, div, map, dt, isindex, form {
|
||||
|
||||
body {
|
||||
display: block;
|
||||
margin: 8px;
|
||||
}
|
||||
|
||||
p, dl, multicol {
|
||||
@ -59,7 +59,7 @@ dd {
|
||||
|
||||
blockquote {
|
||||
display: block;
|
||||
margin: 1em 40px;
|
||||
margin: 1em;
|
||||
}
|
||||
|
||||
address {
|
||||
@ -74,7 +74,7 @@ center {
|
||||
|
||||
blockquote[type=cite] {
|
||||
display: block;
|
||||
margin: 1em 0px;
|
||||
margin: 1em 0em;
|
||||
border-color: blue;
|
||||
border-width: thin;
|
||||
}
|
||||
@ -234,14 +234,6 @@ th {
|
||||
|
||||
/* inlines */
|
||||
|
||||
q:before {
|
||||
content: open-quote;
|
||||
}
|
||||
|
||||
q:after {
|
||||
content: close-quote;
|
||||
}
|
||||
|
||||
b, strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
@ -392,22 +384,6 @@ spacer {
|
||||
float: none ! important;
|
||||
}
|
||||
|
||||
/* focusable content: anything w/ tabindex >=0 is focusable */
|
||||
abbr:focus, acronym:focus, address:focus, applet:focus, b:focus,
|
||||
base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
|
||||
center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
|
||||
del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
|
||||
fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
|
||||
h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus,
|
||||
kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus,
|
||||
object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus,
|
||||
small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus,
|
||||
table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus,
|
||||
tr:focus, tt:focus, u:focus, ul:focus, var:focus {
|
||||
/* Don't specify the outline-color, we should always use initial value. */
|
||||
outline: 1px dotted;
|
||||
}
|
||||
|
||||
/* hidden elements */
|
||||
area, base, basefont, head, meta, script, style, title,
|
||||
noembed, param, link {
|
||||
@ -424,3 +400,8 @@ br {
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* Images, embedded object, and SVG size defaults */
|
||||
img, object, svg|svg {
|
||||
width: auto;
|
||||
height: auto;
|
||||
}
|
75
src/calibre/ebooks/oeb/profile.py
Normal file
75
src/calibre/ebooks/oeb/profile.py
Normal file
@ -0,0 +1,75 @@
|
||||
'''
|
||||
Device profiles.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from itertools import izip
|
||||
|
||||
FONT_SIZES = [('xx-small', 1),
|
||||
('x-small', None),
|
||||
('small', 2),
|
||||
('medium', 3),
|
||||
('large', 4),
|
||||
('x-large', 5),
|
||||
('xx-large', 6),
|
||||
(None, 7)]
|
||||
|
||||
|
||||
class Profile(object):
|
||||
def __init__(self, width, height, dpi, fbase, fsizes):
|
||||
self.width = (float(width) / dpi) * 72.
|
||||
self.height = (float(height) / dpi) * 72.
|
||||
self.dpi = float(dpi)
|
||||
self.fbase = float(fbase)
|
||||
self.fsizes = []
|
||||
for (name, num), size in izip(FONT_SIZES, fsizes):
|
||||
self.fsizes.append((name, num, float(size)))
|
||||
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
||||
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
||||
|
||||
|
||||
PROFILES = {
|
||||
'PRS505':
|
||||
Profile(width=584, height=754, dpi=168.451, fbase=12,
|
||||
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
|
||||
|
||||
'MSReader':
|
||||
Profile(width=480, height=652, dpi=96, fbase=13,
|
||||
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
|
||||
|
||||
# Not really, but let's pretend
|
||||
'Mobipocket':
|
||||
Profile(width=600, height=800, dpi=96, fbase=18,
|
||||
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
|
||||
|
||||
# No clue on usable screen size; DPI should be good
|
||||
'HanlinV3':
|
||||
Profile(width=584, height=754, dpi=168.451, fbase=16,
|
||||
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||
|
||||
'CybookG3':
|
||||
Profile(width=600, height=800, dpi=168.451, fbase=16,
|
||||
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||
|
||||
'Kindle':
|
||||
Profile(width=525, height=640, dpi=168.451, fbase=16,
|
||||
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||
|
||||
'Browser':
|
||||
Profile(width=800, height=600, dpi=100.0, fbase=12,
|
||||
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
|
||||
}
|
||||
|
||||
|
||||
class Context(object):
|
||||
PROFILES = PROFILES
|
||||
|
||||
def __init__(self, source, dest):
|
||||
if source in PROFILES:
|
||||
source = PROFILES[source]
|
||||
if dest in PROFILES:
|
||||
dest = PROFILES[dest]
|
||||
self.source = source
|
||||
self.dest = dest
|
@ -16,16 +16,20 @@ import itertools
|
||||
import types
|
||||
import re
|
||||
import copy
|
||||
from itertools import izip
|
||||
import cssutils
|
||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||
CSSValueList, cssproperties
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
|
||||
from calibre.ebooks.lit.oeb import barename, urlnormalize
|
||||
from lxml.cssselect import css_to_xpath, ExpressionError
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize
|
||||
from calibre.ebooks.oeb.profile import PROFILES
|
||||
from calibre.resources import html_css
|
||||
|
||||
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
||||
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
||||
XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n"
|
||||
HTML_CSS_STYLESHEET.namespaces['h'] = XHTML_NS
|
||||
|
||||
INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
|
||||
'caption-side', 'color', 'cursor', 'direction', 'elevation',
|
||||
@ -72,7 +76,7 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
||||
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
|
||||
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
|
||||
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
|
||||
'text-align': 'left', 'text-decoration': 'none', 'text-indent':
|
||||
'text-align': 'auto', 'text-decoration': 'none', 'text-indent':
|
||||
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
|
||||
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
|
||||
'voice-family': 'default', 'volume': 'medium', 'white-space':
|
||||
@ -82,42 +86,30 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
||||
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
|
||||
'x-large', 'xx-large'])
|
||||
|
||||
FONT_SIZE_LIST = [('xx-small', 1, 6.),
|
||||
('x-small', None, 7.),
|
||||
('small', 2, 8.),
|
||||
('medium', 3, 9.),
|
||||
('large', 4, 11.),
|
||||
('x-large', 5, 13.),
|
||||
('xx-large', 6, 15.),
|
||||
(None, 7, 17.)]
|
||||
|
||||
FONT_SIZE_BY_NAME = {}
|
||||
FONT_SIZE_BY_NUM = {}
|
||||
for name, num, size in FONT_SIZE_LIST:
|
||||
FONT_SIZE_BY_NAME[name] = size
|
||||
FONT_SIZE_BY_NUM[num] = size
|
||||
class CSSSelector(etree.XPath):
|
||||
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
|
||||
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
|
||||
|
||||
XPNSMAP = {'h': XHTML_NS,}
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
def __init__(self, css, namespaces=XPNSMAP):
|
||||
css = self.MIN_SPACE_RE.sub(r'\1', css)
|
||||
path = css_to_xpath(css)
|
||||
path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
|
||||
etree.XPath.__init__(self, path, namespaces=namespaces)
|
||||
self.css = css
|
||||
|
||||
|
||||
class Page(object):
|
||||
def __init__(self, width, height, dpi):
|
||||
self.width = float(width)
|
||||
self.height = float(height)
|
||||
self.dpi = float(dpi)
|
||||
|
||||
class Profiles(object):
|
||||
PRS500 = Page(584, 754, 168.451)
|
||||
PRS505 = PRS500
|
||||
def __repr__(self):
|
||||
return '<%s %s for %r>' % (
|
||||
self.__class__.__name__,
|
||||
hex(abs(id(self)))[2:],
|
||||
self.css)
|
||||
|
||||
|
||||
class Stylizer(object):
|
||||
STYLESHEETS = {}
|
||||
|
||||
def __init__(self, tree, path, oeb, page=Profiles.PRS505):
|
||||
self.page = page
|
||||
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
||||
self.profile = profile
|
||||
base = os.path.dirname(path)
|
||||
basename = os.path.basename(path)
|
||||
cssname = os.path.splitext(basename)[0] + '.css'
|
||||
@ -126,12 +118,13 @@ class Stylizer(object):
|
||||
parser = cssutils.CSSParser()
|
||||
parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path)))
|
||||
for elem in head:
|
||||
tag = barename(elem.tag)
|
||||
if tag == 'style':
|
||||
text = ''.join(elem.text)
|
||||
if elem.tag == XHTML('style') and elem.text \
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
text = XHTML_CSS_NAMESPACE + elem.text
|
||||
stylesheet = parser.parseString(text, href=cssname)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
stylesheets.append(stylesheet)
|
||||
elif tag == 'link' \
|
||||
elif elem.tag == XHTML('link') and elem.get('href') \
|
||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
href = urlnormalize(elem.attrib['href'])
|
||||
@ -143,11 +136,13 @@ class Stylizer(object):
|
||||
data = XHTML_CSS_NAMESPACE
|
||||
data += oeb.manifest.hrefs[path].data
|
||||
stylesheet = parser.parseString(data, href=path)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
self.STYLESHEETS[path] = stylesheet
|
||||
stylesheets.append(stylesheet)
|
||||
rules = []
|
||||
index = 0
|
||||
self.stylesheets = set()
|
||||
self.page_rule = {}
|
||||
for stylesheet in stylesheets:
|
||||
href = stylesheet.href
|
||||
self.stylesheets.add(href)
|
||||
@ -157,6 +152,15 @@ class Stylizer(object):
|
||||
rules.sort()
|
||||
self.rules = rules
|
||||
self._styles = {}
|
||||
for _, _, cssdict, text, _ in rules:
|
||||
try:
|
||||
selector = CSSSelector(text)
|
||||
except ExpressionError, e:
|
||||
continue
|
||||
for elem in selector(tree):
|
||||
self.style(elem)._update_cssdict(cssdict)
|
||||
for elem in xpath(tree, '//h:*[@style]'):
|
||||
self.style(elem)._apply_style_attr()
|
||||
|
||||
def flatten_rule(self, rule, href, index):
|
||||
results = []
|
||||
@ -169,7 +173,7 @@ class Stylizer(object):
|
||||
results.append((specificity, selector, style, text, href))
|
||||
elif isinstance(rule, CSSPageRule):
|
||||
style = self.flatten_style(rule.style)
|
||||
results.append(((0, 0, 0, 0), [], style, '@page', href))
|
||||
self.page_rule.update(style)
|
||||
return results
|
||||
|
||||
def flatten_style(self, cssstyle):
|
||||
@ -186,7 +190,7 @@ class Stylizer(object):
|
||||
size = style['font-size']
|
||||
if size == 'normal': size = 'medium'
|
||||
if size in FONT_SIZE_NAMES:
|
||||
style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size]
|
||||
style['font-size'] = "%dpt" % self.profile.fnames[size]
|
||||
return style
|
||||
|
||||
def _normalize_edge(self, cssvalue, name):
|
||||
@ -233,8 +237,9 @@ class Stylizer(object):
|
||||
return style
|
||||
|
||||
def style(self, element):
|
||||
try: return self._styles[element]
|
||||
except: pass
|
||||
try:
|
||||
return self._styles[element]
|
||||
except KeyError:
|
||||
return Style(element, self)
|
||||
|
||||
def stylesheet(self, name, font_scale=None):
|
||||
@ -250,79 +255,36 @@ class Stylizer(object):
|
||||
rules.append('%s {\n %s;\n}' % (selector, style))
|
||||
return '\n'.join(rules)
|
||||
|
||||
|
||||
class Style(object):
|
||||
def __init__(self, element, stylizer):
|
||||
self._element = element
|
||||
self._page = stylizer.page
|
||||
self._profile = stylizer.profile
|
||||
self._stylizer = stylizer
|
||||
self._style = self._assemble_style(element, stylizer)
|
||||
self._style = {}
|
||||
self._fontSize = None
|
||||
self._width = None
|
||||
self._height = None
|
||||
self._lineHeight = None
|
||||
stylizer._styles[element] = self
|
||||
|
||||
def _assemble_style(self, element, stylizer):
|
||||
result = {}
|
||||
rules = stylizer.rules
|
||||
for _, selector, style, _, _ in rules:
|
||||
if self._selects_element(element, selector):
|
||||
result.update(style)
|
||||
try:
|
||||
style = CSSStyleDeclaration(element.attrib['style'])
|
||||
result.update(stylizer.flatten_style(style))
|
||||
except KeyError:
|
||||
pass
|
||||
return result
|
||||
def _update_cssdict(self, cssdict):
|
||||
self._style.update(cssdict)
|
||||
|
||||
def _selects_element(self, element, selector):
|
||||
def _selects_element(element, items, index):
|
||||
if index == -1:
|
||||
return True
|
||||
item = items[index]
|
||||
if item.type == 'universal':
|
||||
pass
|
||||
elif item.type == 'type-selector':
|
||||
name1 = ("{%s}%s" % item.value).lower()
|
||||
name2 = element.tag.lower()
|
||||
if name1 != name2:
|
||||
return False
|
||||
elif item.type == 'id':
|
||||
name1 = item.value[1:]
|
||||
name2 = element.get('id', '')
|
||||
if name1 != name2:
|
||||
return False
|
||||
elif item.type == 'class':
|
||||
name = item.value[1:].lower()
|
||||
classes = element.get('class', '').lower().split()
|
||||
if name not in classes:
|
||||
return False
|
||||
elif item.type == 'child':
|
||||
parent = element.getparent()
|
||||
if parent is None:
|
||||
return False
|
||||
element = parent
|
||||
elif item.type == 'descendant':
|
||||
element = element.getparent()
|
||||
while element is not None:
|
||||
if _selects_element(element, items, index - 1):
|
||||
return True
|
||||
element = element.getparent()
|
||||
return False
|
||||
elif item.type == 'pseudo-class':
|
||||
if item.value == ':first-child':
|
||||
e = element.getprevious()
|
||||
if e is not None:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
elif item.type == 'pseudo-element':
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
return _selects_element(element, items, index - 1)
|
||||
return _selects_element(element, selector, len(selector) - 1)
|
||||
def _apply_style_attr(self):
|
||||
attrib = self._element.attrib
|
||||
if 'style' in attrib:
|
||||
style = CSSStyleDeclaration(attrib['style'])
|
||||
self._style.update(self._stylizer.flatten_style(style))
|
||||
|
||||
def _has_parent(self):
|
||||
parent = self._element.getparent()
|
||||
return (parent is not None) \
|
||||
and (parent in self._stylizer._styles)
|
||||
return (self._element.getparent() is not None)
|
||||
|
||||
def _get_parent(self):
|
||||
elem = self._element.getparent()
|
||||
if elem is None:
|
||||
return None
|
||||
return self._stylizer.style(elem)
|
||||
|
||||
def __getitem__(self, name):
|
||||
domname = cssproperties._toDOMname(name)
|
||||
@ -337,8 +299,8 @@ class Style(object):
|
||||
if (result == 'inherit'
|
||||
or (result is None and name in INHERITED
|
||||
and self._has_parent())):
|
||||
styles = self._stylizer._styles
|
||||
result = styles[self._element.getparent()]._get(name)
|
||||
stylizer = self._stylizer
|
||||
result = stylizer.style(self._element.getparent())._get(name)
|
||||
if result is None:
|
||||
result = DEFAULTS[name]
|
||||
return result
|
||||
@ -361,7 +323,7 @@ class Style(object):
|
||||
base = base or self.width
|
||||
result = (value / 100.0) * base
|
||||
elif unit == 'px':
|
||||
result = value * 72.0 / self._page.dpi
|
||||
result = value * 72.0 / self._profile.dpi
|
||||
elif unit == 'in':
|
||||
result = value * 72.0
|
||||
elif unit == 'pt':
|
||||
@ -379,22 +341,22 @@ class Style(object):
|
||||
|
||||
@property
|
||||
def fontSize(self):
|
||||
def normalize_fontsize(value, base=None):
|
||||
def normalize_fontsize(value, base):
|
||||
result = None
|
||||
factor = None
|
||||
if value == 'inherit':
|
||||
value = 'medium'
|
||||
value = base
|
||||
if value in FONT_SIZE_NAMES:
|
||||
result = FONT_SIZE_BY_NAME[value]
|
||||
result = self._profile.fnames[value]
|
||||
elif value == 'smaller':
|
||||
factor = 1.0/1.2
|
||||
for _, _, size in FONT_SIZE_LIST:
|
||||
for _, _, size in self._profile.fsizes:
|
||||
if base <= size: break
|
||||
factor = None
|
||||
result = size
|
||||
elif value == 'larger':
|
||||
factor = 1.2
|
||||
for _, _, size in reversed(FONT_SIZE_LIST):
|
||||
for _, _, size in reversed(self._profile.fsizes):
|
||||
if base >= size: break
|
||||
factor = None
|
||||
result = size
|
||||
@ -405,40 +367,108 @@ class Style(object):
|
||||
if factor:
|
||||
result = factor * base
|
||||
return result
|
||||
if self._fontSize is None:
|
||||
result = None
|
||||
if self._has_parent():
|
||||
styles = self._stylizer._styles
|
||||
base = styles[self._element.getparent()].fontSize
|
||||
parent = self._get_parent()
|
||||
if parent is not None:
|
||||
base = parent.fontSize
|
||||
else:
|
||||
base = normalize_fontsize(DEFAULTS['font-size'])
|
||||
base = self._profile.fbase
|
||||
if 'font-size' in self._style:
|
||||
size = self._style['font-size']
|
||||
result = normalize_fontsize(size, base)
|
||||
else:
|
||||
result = base
|
||||
self.__dict__['fontSize'] = result
|
||||
return result
|
||||
self._fontSize = result
|
||||
return self._fontSize
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
result = None
|
||||
if self._width is None:
|
||||
width = None
|
||||
base = None
|
||||
if self._has_parent():
|
||||
styles = self._stylizer._styles
|
||||
base = styles[self._element.getparent()].width
|
||||
parent = self._get_parent()
|
||||
if parent is not None:
|
||||
base = parent.width
|
||||
else:
|
||||
base = self._page.width
|
||||
if 'width' in self._style:
|
||||
base = self._profile.width
|
||||
if 'width' is self._element.attrib:
|
||||
width = self._element.attrib['width']
|
||||
elif 'width' in self._style:
|
||||
width = self._style['width']
|
||||
if width == 'auto':
|
||||
if not width or width == 'auto':
|
||||
result = base
|
||||
else:
|
||||
result = self._unit_convert(width, base=base)
|
||||
self._width = result
|
||||
return self._width
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
if self._height is None:
|
||||
height = None
|
||||
base = None
|
||||
parent = self._get_parent()
|
||||
if parent is not None:
|
||||
base = parent.height
|
||||
else:
|
||||
base = self._profile.height
|
||||
if 'height' is self._element.attrib:
|
||||
height = self._element.attrib['height']
|
||||
elif 'height' in self._style:
|
||||
height = self._style['height']
|
||||
if not height or height == 'auto':
|
||||
result = base
|
||||
self.__dict__['width'] = result
|
||||
return result
|
||||
else:
|
||||
result = self._unit_convert(height, base=base)
|
||||
self._height = result
|
||||
return self._height
|
||||
|
||||
@property
|
||||
def lineHeight(self):
|
||||
if self._lineHeight is None:
|
||||
result = None
|
||||
parent = self._getparent()
|
||||
if 'line-height' in self._style:
|
||||
lineh = self._style['line-height']
|
||||
try:
|
||||
float(lineh)
|
||||
except ValueError:
|
||||
result = self._unit_convert(lineh, base=self.fontSize)
|
||||
else:
|
||||
result = float(lineh) * self.fontSize
|
||||
elif parent is not None:
|
||||
# TODO: proper inheritance
|
||||
result = parent.lineHeight
|
||||
else:
|
||||
result = 1.2 * self.fontSize
|
||||
self._lineHeight = result
|
||||
return self._lineHeight
|
||||
|
||||
@property
|
||||
def marginTop(self):
|
||||
return self._unit_convert(
|
||||
self._get('margin-top'), base=self.height)
|
||||
|
||||
@property
|
||||
def marginBottom(self):
|
||||
return self._unit_convert(
|
||||
self._get('margin-bottom'), base=self.height)
|
||||
|
||||
@property
|
||||
def paddingTop(self):
|
||||
return self._unit_convert(
|
||||
self._get('padding-top'), base=self.height)
|
||||
|
||||
@property
|
||||
def paddingBottom(self):
|
||||
return self._unit_convert(
|
||||
self._get('padding-bottom'), base=self.height)
|
||||
|
||||
def __str__(self):
|
||||
items = self._style.items()
|
||||
items.sort()
|
||||
return '; '.join("%s: %s" % (key, val) for key, val in items)
|
||||
|
||||
def cssdict(self):
|
||||
return dict(self._style)
|
0
src/calibre/ebooks/oeb/transforms/__init__.py
Normal file
0
src/calibre/ebooks/oeb/transforms/__init__.py
Normal file
270
src/calibre/ebooks/oeb/transforms/flatcss.py
Normal file
270
src/calibre/ebooks/oeb/transforms/flatcss.py
Normal file
@ -0,0 +1,270 @@
|
||||
'''
|
||||
CSS flattening transform.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import operator
|
||||
import math
|
||||
from itertools import chain
|
||||
from collections import defaultdict
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES
|
||||
from calibre.ebooks.oeb.base import namespace, barename
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
STRIPNUM = re.compile(r'[-0-9]+$')
|
||||
|
||||
class KeyMapper(object):
|
||||
def __init__(self, sbase, dbase, dkey):
|
||||
self.sbase = float(sbase)
|
||||
self.dprop = [(self.relate(x, dbase), float(x)) for x in dkey]
|
||||
self.cache = {}
|
||||
|
||||
@staticmethod
|
||||
def relate(size, base):
|
||||
size = float(size)
|
||||
base = float(base)
|
||||
if abs(size - base) < 0.1: return 0
|
||||
sign = -1 if size < base else 1
|
||||
endp = 0 if size < base else 36
|
||||
diff = (abs(base - size) * 3) + ((36 - size) / 100)
|
||||
logb = abs(base - endp)
|
||||
result = sign * math.log(diff, logb)
|
||||
return result
|
||||
|
||||
def __getitem__(self, ssize):
|
||||
if ssize in self.cache:
|
||||
return self.cache[ssize]
|
||||
dsize = self.map(ssize)
|
||||
self.cache[ssize] = dsize
|
||||
return dsize
|
||||
|
||||
def map(self, ssize):
|
||||
sbase = self.sbase
|
||||
prop = self.relate(ssize, sbase)
|
||||
diff = [(abs(prop - p), s) for p, s in self.dprop]
|
||||
dsize = min(diff)[1]
|
||||
return dsize
|
||||
|
||||
class ScaleMapper(object):
|
||||
def __init__(self, sbase, dbase):
|
||||
self.dscale = float(dbase) / float(sbase)
|
||||
|
||||
def __getitem__(self, ssize):
|
||||
dsize = ssize * self.dscale
|
||||
return dsize
|
||||
|
||||
class NullMapper(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __getitem__(self, ssize):
|
||||
return ssize
|
||||
|
||||
def FontMapper(sbase=None, dbase=None, dkey=None):
|
||||
if sbase and dbase and dkey:
|
||||
return KeyMapper(sbase, dbase, dkey)
|
||||
elif sbase and dbase:
|
||||
return ScaleMapper(sbase, dbase)
|
||||
else:
|
||||
return NullMapper()
|
||||
|
||||
|
||||
class CSSFlattener(object):
|
||||
def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
|
||||
untable=False):
|
||||
self.fbase = fbase
|
||||
self.fkey = fkey
|
||||
self.lineh = lineh
|
||||
self.unfloat = unfloat
|
||||
self.untable = untable
|
||||
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Flattening CSS and remapping font sizes...')
|
||||
self.oeb = oeb
|
||||
self.context = context
|
||||
self.stylize_spine()
|
||||
self.sbase = self.baseline_spine() if self.fbase else None
|
||||
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
|
||||
self.flatten_spine()
|
||||
|
||||
def stylize_spine(self):
|
||||
self.stylizers = {}
|
||||
profile = self.context.source
|
||||
for item in self.oeb.spine:
|
||||
html = item.data
|
||||
stylizer = Stylizer(html, item.href, self.oeb, profile)
|
||||
self.stylizers[item] = stylizer
|
||||
|
||||
def baseline_node(self, node, stylizer, sizes, csize):
|
||||
csize = stylizer.style(node)['font-size']
|
||||
if node.text:
|
||||
sizes[csize] += len(COLLAPSE.sub(' ', node.text))
|
||||
for child in node:
|
||||
self.baseline_node(child, stylizer, sizes, csize)
|
||||
if child.tail:
|
||||
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
|
||||
|
||||
def baseline_spine(self):
|
||||
sizes = defaultdict(float)
|
||||
for item in self.oeb.spine:
|
||||
html = item.data
|
||||
stylizer = self.stylizers[item]
|
||||
body = html.find(XHTML('body'))
|
||||
fsize = self.context.source.fbase
|
||||
self.baseline_node(body, stylizer, sizes, fsize)
|
||||
sbase = max(sizes.items(), key=operator.itemgetter(1))[0]
|
||||
self.oeb.logger.info(
|
||||
"Source base font size is %0.05fpt" % sbase)
|
||||
return sbase
|
||||
|
||||
def clean_edges(self, cssdict, style, fsize):
|
||||
slineh = self.sbase * 1.26
|
||||
dlineh = self.lineh
|
||||
for kind in ('margin', 'padding'):
|
||||
for edge in ('bottom', 'top'):
|
||||
property = "%s-%s" % (kind, edge)
|
||||
if property not in cssdict: continue
|
||||
if '%' in cssdict[property]: continue
|
||||
value = style[property]
|
||||
if value == 0:
|
||||
continue
|
||||
elif value <= slineh:
|
||||
cssdict[property] = "%0.5fem" % (dlineh / fsize)
|
||||
else:
|
||||
value = round(value / slineh) * dlineh
|
||||
cssdict[property] = "%0.5fem" % (value / fsize)
|
||||
|
||||
def flatten_node(self, node, stylizer, names, styles, psize, left=0):
|
||||
if not isinstance(node.tag, basestring) \
|
||||
or namespace(node.tag) != XHTML_NS:
|
||||
return
|
||||
tag = barename(node.tag)
|
||||
style = stylizer.style(node)
|
||||
cssdict = style.cssdict()
|
||||
if 'align' in node.attrib:
|
||||
cssdict['text-align'] = node.attrib['align']
|
||||
del node.attrib['align']
|
||||
if node.tag == XHTML('font'):
|
||||
node.tag = XHTML('span')
|
||||
if 'size' in node.attrib:
|
||||
size = node.attrib['size'].strip()
|
||||
if size:
|
||||
fnums = self.context.source.fnums
|
||||
if size[0] in ('+', '-'):
|
||||
# Oh, the warcrimes
|
||||
cssdict['font-size'] = fnums[3+int(size)]
|
||||
else:
|
||||
cssdict['font-size'] = fnums[int(size)]
|
||||
del node.attrib['size']
|
||||
if 'color' in node.attrib:
|
||||
cssdict['color'] = node.attrib['color']
|
||||
del node.attrib['color']
|
||||
if 'bgcolor' in node.attrib:
|
||||
cssdict['background-color'] = node.attrib['bgcolor']
|
||||
del node.attrib['bgcolor']
|
||||
if cssdict:
|
||||
if 'font-size' in cssdict:
|
||||
fsize = self.fmap[style['font-size']]
|
||||
cssdict['font-size'] = "%0.5fem" % (fsize / psize)
|
||||
psize = fsize
|
||||
if self.lineh and self.fbase and tag != 'body':
|
||||
self.clean_edges(cssdict, style, psize)
|
||||
margin = style['margin-left']
|
||||
left += margin if isinstance(margin, float) else 0
|
||||
if (left + style['text-indent']) < 0:
|
||||
percent = (margin - style['text-indent']) / style['width']
|
||||
cssdict['margin-left'] = "%d%%" % (percent * 100)
|
||||
left -= style['text-indent']
|
||||
if 'display' in cssdict and cssdict['display'] == 'in-line':
|
||||
cssdict['display'] = 'inline'
|
||||
if self.unfloat and 'float' in cssdict \
|
||||
and cssdict.get('display', 'none') != 'none':
|
||||
del cssdict['display']
|
||||
if self.untable and 'display' in cssdict \
|
||||
and cssdict['display'].startswith('table'):
|
||||
display = cssdict['display']
|
||||
if display == 'table-cell':
|
||||
cssdict['display'] = 'inline'
|
||||
else:
|
||||
cssdict['display'] = 'block'
|
||||
if 'vertical-align' in cssdict \
|
||||
and cssdict['vertical-align'] == 'sup':
|
||||
cssdict['vertical-align'] = 'super'
|
||||
if self.lineh and 'line-height' not in cssdict:
|
||||
lineh = self.lineh / psize
|
||||
cssdict['line-height'] = "%0.5fem" % lineh
|
||||
if cssdict:
|
||||
items = cssdict.items()
|
||||
items.sort()
|
||||
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
|
||||
klass = STRIPNUM.sub('', node.get('class', 'calibre').split()[0])
|
||||
if css in styles:
|
||||
match = styles[css]
|
||||
else:
|
||||
match = klass + str(names[klass] or '')
|
||||
styles[css] = match
|
||||
names[klass] += 1
|
||||
node.attrib['class'] = match
|
||||
elif 'class' in node.attrib:
|
||||
del node.attrib['class']
|
||||
if 'style' in node.attrib:
|
||||
del node.attrib['style']
|
||||
for child in node:
|
||||
self.flatten_node(child, stylizer, names, styles, psize, left)
|
||||
|
||||
def flatten_head(self, item, stylizer, href):
|
||||
html = item.data
|
||||
head = html.find(XHTML('head'))
|
||||
for node in head:
|
||||
if node.tag == XHTML('link') \
|
||||
and node.get('rel', 'stylesheet') == 'stylesheet' \
|
||||
and node.get('type', CSS_MIME) in OEB_STYLES:
|
||||
head.remove(node)
|
||||
elif node.tag == XHTML('style') \
|
||||
and node.get('type', CSS_MIME) in OEB_STYLES:
|
||||
head.remove(node)
|
||||
href = item.relhref(href)
|
||||
etree.SubElement(head, XHTML('link'),
|
||||
rel='stylesheet', type=CSS_MIME, href=href)
|
||||
if stylizer.page_rule:
|
||||
items = stylizer.page_rule.items()
|
||||
items.sort()
|
||||
css = '; '.join("%s: %s" % (key, val) for key, val in items)
|
||||
style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
|
||||
style.text = "@page { %s; }" % css
|
||||
|
||||
def replace_css(self, css):
|
||||
manifest = self.oeb.manifest
|
||||
id, href = manifest.generate('css', 'stylesheet.css')
|
||||
for item in manifest.values():
|
||||
if item.media_type in OEB_STYLES:
|
||||
manifest.remove(item)
|
||||
item = manifest.add(id, href, CSS_MIME, data=css)
|
||||
return href
|
||||
|
||||
def flatten_spine(self):
|
||||
names = defaultdict(int)
|
||||
styles = {}
|
||||
for item in self.oeb.spine:
|
||||
html = item.data
|
||||
stylizer = self.stylizers[item]
|
||||
body = html.find(XHTML('body'))
|
||||
fsize = self.context.dest.fbase
|
||||
self.flatten_node(body, stylizer, names, styles, fsize)
|
||||
items = [(key, val) for (val, key) in styles.items()]
|
||||
items.sort()
|
||||
css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
|
||||
href = self.replace_css(css)
|
||||
for item in self.oeb.spine:
|
||||
stylizer = self.stylizers[item]
|
||||
self.flatten_head(item, stylizer, href)
|
87
src/calibre/ebooks/oeb/transforms/htmltoc.py
Normal file
87
src/calibre/ebooks/oeb/transforms/htmltoc.py
Normal file
@ -0,0 +1,87 @@
|
||||
'''
|
||||
HTML-TOC-adding transform.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
|
||||
from calibre.ebooks.oeb.base import element
|
||||
|
||||
STYLE_CSS = {
|
||||
'nested': """
|
||||
.calibre_toc_header {
|
||||
text-align: center;
|
||||
}
|
||||
.calibre_toc_block {
|
||||
margin-left: 1.2em;
|
||||
text-indent: -1.2em;
|
||||
}
|
||||
.calibre_toc_block .calibre_toc_block {
|
||||
margin-left: 2.4em;
|
||||
}
|
||||
.calibre_toc_block .calibre_toc_block .calibre_toc_block {
|
||||
margin-left: 3.6em;
|
||||
}
|
||||
""",
|
||||
|
||||
'centered': """
|
||||
.calibre_toc_header {
|
||||
text-align: center;
|
||||
}
|
||||
.calibre_toc_block {
|
||||
text-align: center;
|
||||
}
|
||||
body > .calibre_toc_block {
|
||||
margin-top: 1.2em;
|
||||
}
|
||||
"""
|
||||
}
|
||||
|
||||
class HTMLTOCAdder(object):
|
||||
def __init__(self, style='nested'):
|
||||
self.style = style
|
||||
|
||||
def transform(self, oeb, context):
|
||||
if 'toc' in oeb.guide:
|
||||
return
|
||||
oeb.logger.info('Generating in-line TOC...')
|
||||
style = self.style
|
||||
if style not in STYLE_CSS:
|
||||
oeb.logger.error('Unknown TOC style %r' % style)
|
||||
style = 'nested'
|
||||
id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
|
||||
oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
|
||||
language = str(oeb.metadata.language[0])
|
||||
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
|
||||
attrib={XML('lang'): language})
|
||||
head = element(contents, XHTML('head'))
|
||||
title = element(head, XHTML('title'))
|
||||
title.text = 'Table of Contents'
|
||||
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
|
||||
href=css_href)
|
||||
body = element(contents, XHTML('body'),
|
||||
attrib={'class': 'calibre_toc'})
|
||||
h1 = element(body, XHTML('h1'),
|
||||
attrib={'class': 'calibre_toc_header'})
|
||||
h1.text = 'Table of Contents'
|
||||
self.add_toc_level(body, oeb.toc)
|
||||
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
|
||||
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
|
||||
oeb.spine.add(item, linear=False)
|
||||
oeb.guide.add('toc', 'Table of Contents', href)
|
||||
|
||||
def add_toc_level(self, elem, toc):
|
||||
for node in toc:
|
||||
block = element(elem, XHTML('div'),
|
||||
attrib={'class': 'calibre_toc_block'})
|
||||
line = element(block, XHTML('a'),
|
||||
attrib={'href': node.href,
|
||||
'class': 'calibre_toc_line'})
|
||||
line.text = node.title
|
||||
self.add_toc_level(block, node)
|
112
src/calibre/ebooks/oeb/transforms/manglecase.py
Normal file
112
src/calibre/ebooks/oeb/transforms/manglecase.py
Normal file
@ -0,0 +1,112 @@
|
||||
'''
|
||||
CSS case-mangling transform.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import operator
|
||||
import math
|
||||
from itertools import chain
|
||||
from collections import defaultdict
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import CSS_MIME
|
||||
from calibre.ebooks.oeb.base import namespace
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
|
||||
CASE_MANGLER_CSS = """
|
||||
.calibre_lowercase {
|
||||
font-variant: normal;
|
||||
font-size: 0.65em;
|
||||
}
|
||||
"""
|
||||
|
||||
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
|
||||
|
||||
class CaseMangler(object):
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Applying case-transforming CSS...')
|
||||
self.oeb = oeb
|
||||
self.profile = context.source
|
||||
self.mangle_spine()
|
||||
|
||||
def mangle_spine(self):
|
||||
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
|
||||
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
|
||||
for item in self.oeb.spine:
|
||||
html = item.data
|
||||
relhref = item.relhref(href)
|
||||
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
|
||||
rel='stylesheet', href=relhref, type=CSS_MIME)
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
||||
self.mangle_elem(html.find(XHTML('body')), stylizer)
|
||||
|
||||
def text_transform(self, transform, text):
|
||||
if transform == 'capitalize':
|
||||
return text.title()
|
||||
elif transform == 'uppercase':
|
||||
return text.upper()
|
||||
elif transform == 'lowercase':
|
||||
return text.lower()
|
||||
return text
|
||||
|
||||
def split_text(self, text):
|
||||
results = ['']
|
||||
isupper = text[0].isupper()
|
||||
for char in text:
|
||||
if char.isupper() == isupper:
|
||||
results[-1] += char
|
||||
else:
|
||||
isupper = not isupper
|
||||
results.append(char)
|
||||
return results
|
||||
|
||||
def smallcaps_elem(self, elem, attr):
|
||||
texts = self.split_text(getattr(elem, attr))
|
||||
setattr(elem, attr, None)
|
||||
last = elem if attr == 'tail' else None
|
||||
attrib = {'class': 'calibre_lowercase'}
|
||||
for text in texts:
|
||||
if text.isupper():
|
||||
if last is None:
|
||||
elem.text = text
|
||||
else:
|
||||
last.tail = text
|
||||
else:
|
||||
child = etree.Element(XHTML('span'), attrib=attrib)
|
||||
child.text = text.upper()
|
||||
if last is None:
|
||||
elem.insert(0, child)
|
||||
else:
|
||||
# addnext() moves the tail for some reason
|
||||
tail = last.tail
|
||||
last.addnext(child)
|
||||
last.tail = tail
|
||||
child.tail = None
|
||||
last = child
|
||||
|
||||
def mangle_elem(self, elem, stylizer):
|
||||
if not isinstance(elem.tag, basestring) or \
|
||||
namespace(elem.tag) != XHTML_NS:
|
||||
return
|
||||
children = list(elem)
|
||||
style = stylizer.style(elem)
|
||||
transform = style['text-transform']
|
||||
variant = style['font-variant']
|
||||
if elem.text:
|
||||
if transform in TEXT_TRANSFORMS:
|
||||
elem.text = self.text_transform(transform, elem.text)
|
||||
if variant == 'small-caps':
|
||||
self.smallcaps_elem(elem, 'text')
|
||||
for child in children:
|
||||
self.mangle_elem(child, stylizer)
|
||||
if child.tail:
|
||||
if transform in TEXT_TRANSFORMS:
|
||||
child.tail = self.text_transform(transform, child.tail)
|
||||
if variant == 'small-caps':
|
||||
self.smallcaps_elem(child, 'tail')
|
190
src/calibre/ebooks/oeb/transforms/rasterize.py
Normal file
190
src/calibre/ebooks/oeb/transforms/rasterize.py
Normal file
@ -0,0 +1,190 @@
|
||||
'''
|
||||
SVG rasterization transform.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from urlparse import urldefrag
|
||||
import base64
|
||||
from lxml import etree
|
||||
from PyQt4.QtCore import Qt
|
||||
from PyQt4.QtCore import QByteArray
|
||||
from PyQt4.QtCore import QBuffer
|
||||
from PyQt4.QtCore import QIODevice
|
||||
from PyQt4.QtGui import QColor
|
||||
from PyQt4.QtGui import QImage
|
||||
from PyQt4.QtGui import QPainter
|
||||
from PyQt4.QtSvg import QSvgRenderer
|
||||
from PyQt4.QtGui import QApplication
|
||||
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
|
||||
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
|
||||
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
|
||||
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
|
||||
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
|
||||
|
||||
class SVGRasterizer(object):
|
||||
def __init__(self):
|
||||
if QApplication.instance() is None:
|
||||
QApplication([])
|
||||
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Rasterizing SVG images...')
|
||||
self.oeb = oeb
|
||||
self.profile = context.dest
|
||||
self.images = {}
|
||||
self.dataize_manifest()
|
||||
self.rasterize_spine()
|
||||
self.rasterize_cover()
|
||||
|
||||
def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
|
||||
data = QByteArray(xml2str(elem))
|
||||
svg = QSvgRenderer(data)
|
||||
size = svg.defaultSize()
|
||||
if size.width() == 100 and size.height() == 100 \
|
||||
and 'viewBox' in elem.attrib:
|
||||
box = [float(x) for x in elem.attrib['viewBox'].split()]
|
||||
size.setWidth(box[2] - box[0])
|
||||
size.setHeight(box[3] - box[1])
|
||||
if width or height:
|
||||
size.scale(width, height, Qt.KeepAspectRatio)
|
||||
logger = self.oeb.logger
|
||||
logger.info('Rasterizing %r to %dx%d'
|
||||
% (elem, size.width(), size.height()))
|
||||
image = QImage(size, QImage.Format_ARGB32_Premultiplied)
|
||||
image.fill(QColor("white").rgb())
|
||||
painter = QPainter(image)
|
||||
svg.render(painter)
|
||||
painter.end()
|
||||
array = QByteArray()
|
||||
buffer = QBuffer(array)
|
||||
buffer.open(QIODevice.WriteOnly)
|
||||
image.save(buffer, format)
|
||||
return str(array)
|
||||
|
||||
def dataize_manifest(self):
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type == SVG_MIME:
|
||||
self.dataize_svg(item)
|
||||
|
||||
def dataize_svg(self, item, svg=None):
|
||||
if svg is None:
|
||||
svg = item.data
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||
href = elem.attrib[XLINK('href')]
|
||||
path, frag = urldefrag(href)
|
||||
if not path:
|
||||
continue
|
||||
abshref = item.abshref(path)
|
||||
if abshref not in hrefs:
|
||||
continue
|
||||
linkee = hrefs[abshref]
|
||||
data = base64.encodestring(str(linkee))
|
||||
data = "data:%s;base64,%s" % (linkee.media_type, data)
|
||||
elem.attrib[XLINK('href')] = data
|
||||
return svg
|
||||
|
||||
def rasterize_spine(self):
|
||||
for item in self.oeb.spine:
|
||||
html = item.data
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
||||
self.rasterize_item(item, stylizer)
|
||||
|
||||
def rasterize_item(self, item, stylizer):
|
||||
html = item.data
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
for elem in xpath(html, '//h:img'):
|
||||
src = elem.get('src', None)
|
||||
image = hrefs.get(item.abshref(src), None) if src else None
|
||||
if image and image.media_type == SVG_MIME:
|
||||
style = stylizer.style(elem)
|
||||
self.rasterize_external(elem, style, item, image)
|
||||
for elem in xpath(html, '//h:object[@type="%s"]' % SVG_MIME):
|
||||
data = elem.get('data', None)
|
||||
image = hrefs.get(item.abshref(data), None) if data else None
|
||||
if image and image.media_type == SVG_MIME:
|
||||
style = stylizer.style(elem)
|
||||
self.rasterize_external(elem, style, item, image)
|
||||
for elem in xpath(html, '//svg:svg'):
|
||||
style = stylizer.style(elem)
|
||||
self.rasterize_inline(elem, style, item)
|
||||
|
||||
def rasterize_inline(self, elem, style, item):
|
||||
width = style['width']
|
||||
height = style['height']
|
||||
width = (width / 72) * self.profile.dpi
|
||||
height = (height / 72) * self.profile.dpi
|
||||
elem = self.dataize_svg(item, elem)
|
||||
data = self.rasterize_svg(elem, width, height)
|
||||
manifest = self.oeb.manifest
|
||||
href = os.path.splitext(item.href)[0] + '.png'
|
||||
id, href = manifest.generate(item.id, href)
|
||||
manifest.add(id, href, PNG_MIME, data=data)
|
||||
img = etree.Element(XHTML('img'), src=item.relhref(href))
|
||||
elem.getparent().replace(elem, img)
|
||||
for prop in ('width', 'height'):
|
||||
if prop in elem.attrib:
|
||||
img.attrib[prop] = elem.attrib[prop]
|
||||
|
||||
def rasterize_external(self, elem, style, item, svgitem):
|
||||
width = style['width']
|
||||
height = style['height']
|
||||
width = (width / 72) * self.profile.dpi
|
||||
height = (height / 72) * self.profile.dpi
|
||||
data = QByteArray(str(svgitem))
|
||||
svg = QSvgRenderer(data)
|
||||
size = svg.defaultSize()
|
||||
size.scale(width, height, Qt.KeepAspectRatio)
|
||||
key = (svgitem.href, size.width(), size.height())
|
||||
if key in self.images:
|
||||
href = self.images[key]
|
||||
else:
|
||||
logger = self.oeb.logger
|
||||
logger.info('Rasterizing %r to %dx%d'
|
||||
% (svgitem.href, size.width(), size.height()))
|
||||
image = QImage(size, QImage.Format_ARGB32_Premultiplied)
|
||||
image.fill(QColor("white").rgb())
|
||||
painter = QPainter(image)
|
||||
svg.render(painter)
|
||||
painter.end()
|
||||
array = QByteArray()
|
||||
buffer = QBuffer(array)
|
||||
buffer.open(QIODevice.WriteOnly)
|
||||
image.save(buffer, 'PNG')
|
||||
data = str(array)
|
||||
manifest = self.oeb.manifest
|
||||
href = os.path.splitext(svgitem.href)[0] + '.png'
|
||||
id, href = manifest.generate(svgitem.id, href)
|
||||
manifest.add(id, href, PNG_MIME, data=data)
|
||||
self.images[key] = href
|
||||
elem.tag = XHTML('img')
|
||||
for attr in elem.attrib:
|
||||
if attr not in KEEP_ATTRS:
|
||||
del elem.attrib[attr]
|
||||
elem.attrib['src'] = item.relhref(href)
|
||||
if elem.text:
|
||||
elem.attrib['alt'] = elem.text
|
||||
elem.text = None
|
||||
for child in elem:
|
||||
elem.remove(child)
|
||||
|
||||
def rasterize_cover(self):
|
||||
covers = self.oeb.metadata.cover
|
||||
if not covers:
|
||||
return
|
||||
cover = self.oeb.manifest.ids[str(covers[0])]
|
||||
if not cover.media_type == SVG_MIME:
|
||||
return
|
||||
width = (self.profile.width / 72) * self.profile.dpi
|
||||
height = (self.profile.height / 72) * self.profile.dpi
|
||||
data = self.rasterize_svg(cover.data, width, height)
|
||||
href = os.path.splitext(cover.href)[0] + '.png'
|
||||
id, href = self.oeb.manifest.generate(cover.id, href)
|
||||
self.oeb.manifest.add(id, href, PNG_MIME, data=data)
|
||||
covers[0].value = id
|
68
src/calibre/ebooks/oeb/transforms/trimmanifest.py
Normal file
68
src/calibre/ebooks/oeb/transforms/trimmanifest.py
Normal file
@ -0,0 +1,68 @@
|
||||
'''
|
||||
OPF manifest trimming transform.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from itertools import chain
|
||||
from urlparse import urldefrag
|
||||
from lxml import etree
|
||||
import cssutils
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
||||
|
||||
LINK_SELECTORS = []
|
||||
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||
'//*/@xl:href'):
|
||||
LINK_SELECTORS.append(etree.XPath(expr, namespaces=XPNSMAP))
|
||||
|
||||
class ManifestTrimmer(object):
|
||||
def transform(self, oeb, context):
|
||||
oeb.logger.info('Trimming unused files from manifest...')
|
||||
used = set()
|
||||
hrefs = oeb.manifest.hrefs
|
||||
for term in oeb.metadata:
|
||||
for item in oeb.metadata[term]:
|
||||
if item.value in oeb.manifest.hrefs:
|
||||
used.add(oeb.manifest.hrefs[item.value])
|
||||
elif item.value in oeb.manifest.ids:
|
||||
used.add(oeb.manifest.ids[item.value])
|
||||
for ref in oeb.guide.values():
|
||||
path, _ = urldefrag(ref.href)
|
||||
if path in oeb.manifest.hrefs:
|
||||
used.add(oeb.manifest.hrefs[path])
|
||||
# TOC items are required to be in the spine
|
||||
for item in oeb.spine:
|
||||
used.add(item)
|
||||
unchecked = used
|
||||
while unchecked:
|
||||
new = set()
|
||||
for item in unchecked:
|
||||
if item.media_type in OEB_DOCS or \
|
||||
item.media_type[-4:] in ('/xml', '+xml'):
|
||||
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
||||
for href in chain(*hrefs):
|
||||
href = item.abshref(href)
|
||||
if href in oeb.manifest.hrefs:
|
||||
found = oeb.manifest.hrefs[href]
|
||||
if found not in used:
|
||||
new.add(found)
|
||||
elif item.media_type == CSS_MIME:
|
||||
def replacer(uri):
|
||||
absuri = item.abshref(uri)
|
||||
if absuri in oeb.manifest.hrefs:
|
||||
found = oeb.manifest.hrefs[href]
|
||||
if found not in used:
|
||||
new.add(found)
|
||||
return uri
|
||||
sheet = cssutils.parseString(item.data, href=item.href)
|
||||
cssutils.replaceUrls(sheet, replacer)
|
||||
used.update(new)
|
||||
unchecked = new
|
||||
for item in oeb.manifest.values():
|
||||
if item not in used:
|
||||
oeb.logger.info('Trimming %r from manifest' % item.href)
|
||||
oeb.manifest.remove(item)
|
@ -48,12 +48,14 @@ entry_points = {
|
||||
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
|
||||
'any2epub = calibre.ebooks.epub.from_any:main',
|
||||
'any2lit = calibre.ebooks.lit.from_any:main',
|
||||
'any2mobi = calibre.ebooks.mobi.from_any:main',
|
||||
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
|
||||
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
|
||||
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
|
||||
'isbndb = calibre.ebooks.metadata.isbndb:main',
|
||||
'librarything = calibre.ebooks.metadata.library_thing:main',
|
||||
'mobi2oeb = calibre.ebooks.mobi.reader:main',
|
||||
'oeb2mobi = calibre.ebooks.mobi.writer:main',
|
||||
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
|
||||
'lit2oeb = calibre.ebooks.lit.reader:main',
|
||||
'oeb2lit = calibre.ebooks.lit.writer:main',
|
||||
@ -190,6 +192,8 @@ def setup_completion(fatal_errors):
|
||||
from calibre.ebooks.epub.from_any import option_parser as any2epub
|
||||
from calibre.ebooks.lit.from_any import option_parser as any2lit
|
||||
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
|
||||
from calibre.ebooks.mobi.from_any import option_parser as any2mobi
|
||||
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
|
||||
from calibre.gui2.main import option_parser as guiop
|
||||
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
||||
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
|
||||
@ -214,6 +218,8 @@ def setup_completion(fatal_errors):
|
||||
f.write(opts_and_exts('calibre', guiop, any_formats))
|
||||
f.write(opts_and_exts('any2epub', any2epub, any_formats))
|
||||
f.write(opts_and_exts('any2lit', any2lit, any_formats))
|
||||
f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
|
||||
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['mobi', 'prc']))
|
||||
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
|
||||
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
||||
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
||||
|
Loading…
x
Reference in New Issue
Block a user