mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Output:Command line support for converting to MOBI via the command any2mobi (thanks to Marshall T. Vandegrift)
This commit is contained in:
commit
329fd4866f
2
setup.py
2
setup.py
@ -166,7 +166,7 @@ if __name__ == '__main__':
|
|||||||
metadata_sqlite = 'library/metadata_sqlite.sql',
|
metadata_sqlite = 'library/metadata_sqlite.sql',
|
||||||
jquery = 'gui2/viewer/jquery.js',
|
jquery = 'gui2/viewer/jquery.js',
|
||||||
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
|
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
|
||||||
html_css = 'ebooks/lit/html.css',
|
html_css = 'ebooks/oeb/html.css',
|
||||||
)
|
)
|
||||||
|
|
||||||
DEST = os.path.join('src', APPNAME, 'resources.py')
|
DEST = os.path.join('src', APPNAME, 'resources.py')
|
||||||
|
@ -798,8 +798,9 @@ class Processor(Parser):
|
|||||||
if face is not None:
|
if face is not None:
|
||||||
faces = []
|
faces = []
|
||||||
for face in face.split(','):
|
for face in face.split(','):
|
||||||
if ' ' in face:
|
face = face.strip()
|
||||||
face = "%s" % face
|
if ' ' in face and not (face[0] == face[-1] == '"'):
|
||||||
|
face = '"%s"' % face.replace('"', r'\"')
|
||||||
faces.append(face)
|
faces.append(face)
|
||||||
for generic in ('serif', 'sans-serif', 'monospace'):
|
for generic in ('serif', 'sans-serif', 'monospace'):
|
||||||
if generic in faces:
|
if generic in faces:
|
||||||
|
@ -15,7 +15,7 @@ from lxml import etree
|
|||||||
from calibre.ebooks.lit import LitError
|
from calibre.ebooks.lit import LitError
|
||||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||||
import calibre.ebooks.lit.mssha1 as mssha1
|
import calibre.ebooks.lit.mssha1 as mssha1
|
||||||
from calibre.ebooks.lit.oeb import urlnormalize
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
from calibre.ebooks import DRMError
|
from calibre.ebooks import DRMError
|
||||||
from calibre import plugins
|
from calibre import plugins
|
||||||
lzx, lxzerror = plugins['lzx']
|
lzx, lxzerror = plugins['lzx']
|
||||||
|
@ -23,14 +23,20 @@ from urllib import unquote as urlunquote
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.lit.reader import DirectoryEntry
|
from calibre.ebooks.lit.reader import DirectoryEntry
|
||||||
import calibre.ebooks.lit.maps as maps
|
import calibre.ebooks.lit.maps as maps
|
||||||
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
|
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
|
||||||
CSS_MIME, OPF_MIME, XML_NS, XML
|
CSS_MIME, OPF_MIME, XML_NS, XML
|
||||||
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
|
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
|
||||||
from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
|
urlnormalize, xpath
|
||||||
from calibre.ebooks.lit.stylizer import Stylizer
|
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||||
|
from calibre.ebooks.oeb.profile import Context
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||||
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||||
|
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||||
|
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||||
|
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||||
from calibre.ebooks.lit.lzx import Compressor
|
from calibre.ebooks.lit.lzx import Compressor
|
||||||
import calibre
|
import calibre
|
||||||
from calibre import LoggingInterface
|
|
||||||
from calibre import plugins
|
from calibre import plugins
|
||||||
msdes, msdeserror = plugins['msdes']
|
msdes, msdeserror = plugins['msdes']
|
||||||
import calibre.ebooks.lit.mssha1 as mssha1
|
import calibre.ebooks.lit.mssha1 as mssha1
|
||||||
@ -116,12 +122,6 @@ LZXC_CONTROL = \
|
|||||||
|
|
||||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
def prefixname(name, nsrmap):
|
|
||||||
prefix = nsrmap[namespace(name)]
|
|
||||||
if not prefix:
|
|
||||||
return barename(name)
|
|
||||||
return ':'.join((prefix, barename(name)))
|
|
||||||
|
|
||||||
def decint(value):
|
def decint(value):
|
||||||
bytes = []
|
bytes = []
|
||||||
while True:
|
while True:
|
||||||
@ -143,9 +143,9 @@ def warn(x):
|
|||||||
class ReBinary(object):
|
class ReBinary(object):
|
||||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||||
|
|
||||||
def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()):
|
def __init__(self, root, path, oeb, map=HTML_MAP):
|
||||||
self.item = item
|
self.item = item
|
||||||
self.logger = logger
|
self.logger = oeb.logger
|
||||||
self.manifest = oeb.manifest
|
self.manifest = oeb.manifest
|
||||||
self.tags, self.tattrs = map
|
self.tags, self.tattrs = map
|
||||||
self.buf = StringIO()
|
self.buf = StringIO()
|
||||||
@ -300,10 +300,9 @@ def preserve(function):
|
|||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
class LitWriter(object):
|
class LitWriter(object):
|
||||||
def __init__(self, oeb, logger=FauxLogger()):
|
def __init__(self):
|
||||||
self._oeb = oeb
|
# Wow, no options
|
||||||
self._logger = logger
|
pass
|
||||||
self._litize_oeb()
|
|
||||||
|
|
||||||
def _litize_oeb(self):
|
def _litize_oeb(self):
|
||||||
oeb = self._oeb
|
oeb = self._oeb
|
||||||
@ -312,32 +311,27 @@ class LitWriter(object):
|
|||||||
if oeb.metadata.cover:
|
if oeb.metadata.cover:
|
||||||
id = str(oeb.metadata.cover[0])
|
id = str(oeb.metadata.cover[0])
|
||||||
cover = oeb.manifest[id]
|
cover = oeb.manifest[id]
|
||||||
elif MS_COVER_TYPE in oeb.guide:
|
|
||||||
href = oeb.guide[MS_COVER_TYPE].href
|
|
||||||
cover = oeb.manifest.hrefs[href]
|
|
||||||
elif 'cover' in oeb.guide:
|
|
||||||
href = oeb.guide['cover'].href
|
|
||||||
cover = oeb.manifest.hrefs[href]
|
|
||||||
else:
|
|
||||||
html = oeb.spine[0].data
|
|
||||||
imgs = xpath(html, '//img[position()=1]')
|
|
||||||
href = imgs[0].get('src') if imgs else None
|
|
||||||
cover = oeb.manifest.hrefs[href] if href else None
|
|
||||||
if cover:
|
|
||||||
if not oeb.metadata.cover:
|
|
||||||
oeb.metadata.add('cover', cover.id)
|
|
||||||
for type, title in ALL_MS_COVER_TYPES:
|
for type, title in ALL_MS_COVER_TYPES:
|
||||||
if type not in oeb.guide:
|
if type not in oeb.guide:
|
||||||
oeb.guide.add(type, title, cover.href)
|
oeb.guide.add(type, title, cover.href)
|
||||||
else:
|
else:
|
||||||
self._logger.log_warn('No suitable cover image found.')
|
self._logger.warn('No suitable cover image found.')
|
||||||
|
|
||||||
def dump(self, stream):
|
def dump(self, oeb, path):
|
||||||
|
if hasattr(path, 'write'):
|
||||||
|
return self._dump_stream(oeb, path)
|
||||||
|
with open(path, 'w+b') as stream:
|
||||||
|
return self._dump_stream(oeb, stream)
|
||||||
|
|
||||||
|
def _dump_stream(self, oeb, stream):
|
||||||
|
self._oeb = oeb
|
||||||
|
self._logger = oeb.logger
|
||||||
self._stream = stream
|
self._stream = stream
|
||||||
self._sections = [StringIO() for i in xrange(4)]
|
self._sections = [StringIO() for i in xrange(4)]
|
||||||
self._directory = []
|
self._directory = []
|
||||||
self._meta = None
|
self._meta = None
|
||||||
self._dump()
|
self._litize_oeb()
|
||||||
|
self._write_content()
|
||||||
|
|
||||||
def _write(self, *data):
|
def _write(self, *data):
|
||||||
for datum in data:
|
for datum in data:
|
||||||
@ -351,7 +345,7 @@ class LitWriter(object):
|
|||||||
def _tell(self):
|
def _tell(self):
|
||||||
return self._stream.tell()
|
return self._stream.tell()
|
||||||
|
|
||||||
def _dump(self):
|
def _write_content(self):
|
||||||
# Build content sections
|
# Build content sections
|
||||||
self._build_sections()
|
self._build_sections()
|
||||||
|
|
||||||
@ -480,8 +474,7 @@ class LitWriter(object):
|
|||||||
secnum = 0
|
secnum = 0
|
||||||
if not isinstance(data, basestring):
|
if not isinstance(data, basestring):
|
||||||
self._add_folder(name)
|
self._add_folder(name)
|
||||||
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP,
|
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
|
||||||
logger=self._logger)
|
|
||||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||||
self._add_file(name + '/aht', rebin.aht, 0)
|
self._add_file(name + '/aht', rebin.aht, 0)
|
||||||
item.page_breaks = rebin.page_breaks
|
item.page_breaks = rebin.page_breaks
|
||||||
@ -560,8 +553,7 @@ class LitWriter(object):
|
|||||||
meta.attrib['ms--minimum_level'] = '0'
|
meta.attrib['ms--minimum_level'] = '0'
|
||||||
meta.attrib['ms--attr5'] = '1'
|
meta.attrib['ms--attr5'] = '1'
|
||||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||||
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP,
|
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
|
||||||
logger=self._logger)
|
|
||||||
meta = rebin.content
|
meta = rebin.content
|
||||||
self._meta = meta
|
self._meta = meta
|
||||||
self._add_file('/meta', meta)
|
self._add_file('/meta', meta)
|
||||||
@ -721,23 +713,35 @@ def option_parser():
|
|||||||
'-o', '--output', default=None,
|
'-o', '--output', default=None,
|
||||||
help=_('Output file. Default is derived from input filename.'))
|
help=_('Output file. Default is derived from input filename.'))
|
||||||
parser.add_option(
|
parser.add_option(
|
||||||
'--verbose', default=False, action='store_true',
|
'-v', '--verbose', default=0, action='count',
|
||||||
help=_('Useful for debugging.'))
|
help=_('Useful for debugging.'))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def oeb2lit(opts, opfpath):
|
def oeb2lit(opts, inpath):
|
||||||
logger = LoggingInterface(logging.getLogger('oeb2lit'))
|
logger = Logger(logging.getLogger('oeb2lit'))
|
||||||
logger.setup_cli_handler(opts.verbose)
|
logger.setup_cli_handler(opts.verbose)
|
||||||
litpath = opts.output
|
outpath = opts.output
|
||||||
if litpath is None:
|
if outpath is None:
|
||||||
litpath = os.path.basename(opfpath)
|
outpath = os.path.basename(inpath)
|
||||||
litpath = os.path.splitext(litpath)[0] + '.lit'
|
outpath = os.path.splitext(outpath)[0] + '.lit'
|
||||||
litpath = os.path.abspath(litpath)
|
outpath = os.path.abspath(outpath)
|
||||||
lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger)
|
context = Context('Firefox', 'MSReader')
|
||||||
with open(litpath, 'wb') as f:
|
oeb = OEBBook(inpath, logger=logger)
|
||||||
lit.dump(f)
|
tocadder = HTMLTOCAdder()
|
||||||
run_plugins_on_postprocess(litpath, 'lit')
|
tocadder.transform(oeb, context)
|
||||||
logger.log_info(_('Output written to ')+litpath)
|
mangler = CaseMangler()
|
||||||
|
mangler.transform(oeb, context)
|
||||||
|
fbase = context.dest.fbase
|
||||||
|
flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
|
||||||
|
flattener.transform(oeb, context)
|
||||||
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer.transform(oeb, context)
|
||||||
|
trimmer = ManifestTrimmer()
|
||||||
|
trimmer.transform(oeb, context)
|
||||||
|
lit = LitWriter()
|
||||||
|
lit.dump(oeb, outpath)
|
||||||
|
run_plugins_on_postprocess(outpath, 'lit')
|
||||||
|
logger.info(_('Output written to ') + outpath)
|
||||||
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
def main(argv=sys.argv):
|
||||||
@ -746,8 +750,8 @@ def main(argv=sys.argv):
|
|||||||
if len(args) != 1:
|
if len(args) != 1:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
return 1
|
return 1
|
||||||
opfpath = args[0]
|
inpath = args[0]
|
||||||
oeb2lit(opts, opfpath)
|
oeb2lit(opts, inpath)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
67
src/calibre/ebooks/mobi/from_any.py
Normal file
67
src/calibre/ebooks/mobi/from_any.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
'''
|
||||||
|
Convert any ebook format to Mobipocket.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
|
||||||
|
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os, glob, logging
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
|
||||||
|
from calibre.ebooks.epub import config as common_config
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
return common_config(defaults=defaults, name='mobi')
|
||||||
|
|
||||||
|
def option_parser(usage=USAGE):
|
||||||
|
usage = usage % ('Mobipocket', formats())
|
||||||
|
parser = config().option_parser(usage=usage)
|
||||||
|
add_mobi_options(parser)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def any2mobi(opts, path):
|
||||||
|
ext = os.path.splitext(path)[1]
|
||||||
|
if not ext:
|
||||||
|
raise ValueError('Unknown file type: '+path)
|
||||||
|
ext = ext.lower()[1:]
|
||||||
|
|
||||||
|
if opts.output is None:
|
||||||
|
opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
|
||||||
|
|
||||||
|
opts.output = os.path.abspath(opts.output)
|
||||||
|
orig_output = opts.output
|
||||||
|
|
||||||
|
with TemporaryDirectory('_any2mobi') as tdir:
|
||||||
|
oebdir = os.path.join(tdir, 'oeb')
|
||||||
|
os.mkdir(oebdir)
|
||||||
|
opts.output = os.path.join(tdir, 'dummy.epub')
|
||||||
|
opts.profile = 'None'
|
||||||
|
opts.dont_split_on_page_breaks = True
|
||||||
|
orig_bfs = opts.base_font_size2
|
||||||
|
opts.base_font_size2 = 0
|
||||||
|
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
|
||||||
|
opts.base_font_size2 = orig_bfs
|
||||||
|
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
|
||||||
|
opts.output = orig_output
|
||||||
|
logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
|
||||||
|
oeb2mobi(opts, opf)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
if len(args) < 2:
|
||||||
|
parser.print_help()
|
||||||
|
print 'No input file specified.'
|
||||||
|
return 1
|
||||||
|
any2mobi(opts, args[1])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -3,6 +3,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from struct import pack
|
||||||
|
|
||||||
main_language = {
|
main_language = {
|
||||||
0 : "NEUTRAL",
|
0 : "NEUTRAL",
|
||||||
54 : "AFRIKAANS",
|
54 : "AFRIKAANS",
|
||||||
@ -155,5 +157,170 @@ sub_language = {
|
|||||||
2 : "SWEDISH_FINLAND",
|
2 : "SWEDISH_FINLAND",
|
||||||
1 : "UZBEK_LATIN",
|
1 : "UZBEK_LATIN",
|
||||||
2 : "UZBEK_CYRILLIC",
|
2 : "UZBEK_CYRILLIC",
|
||||||
|
}
|
||||||
}
|
|
||||||
|
IANA_MOBI = \
|
||||||
|
{None: {None: (0, 0)},
|
||||||
|
'af': {None: (54, 0)},
|
||||||
|
'ar': {None: (1, 0),
|
||||||
|
'AE': (1, 56),
|
||||||
|
'BH': (1, 60),
|
||||||
|
'DZ': (1, 20),
|
||||||
|
'EG': (1, 12),
|
||||||
|
'JO': (1, 44),
|
||||||
|
'KW': (1, 52),
|
||||||
|
'LB': (1, 48),
|
||||||
|
'MA': (1, 24),
|
||||||
|
'OM': (1, 32),
|
||||||
|
'QA': (1, 64),
|
||||||
|
'SA': (1, 4),
|
||||||
|
'SY': (1, 40),
|
||||||
|
'TN': (1, 28),
|
||||||
|
'YE': (1, 36)},
|
||||||
|
'as': {None: (77, 0)},
|
||||||
|
'az': {None: (44, 0)},
|
||||||
|
'be': {None: (35, 0)},
|
||||||
|
'bg': {None: (2, 0)},
|
||||||
|
'bn': {None: (69, 0)},
|
||||||
|
'ca': {None: (3, 0)},
|
||||||
|
'cs': {None: (5, 0)},
|
||||||
|
'da': {None: (6, 0)},
|
||||||
|
'de': {None: (7, 0),
|
||||||
|
'AT': (7, 12),
|
||||||
|
'CH': (7, 8),
|
||||||
|
'LI': (7, 20),
|
||||||
|
'LU': (7, 16)},
|
||||||
|
'el': {None: (8, 0)},
|
||||||
|
'en': {None: (9, 0),
|
||||||
|
'AU': (9, 12),
|
||||||
|
'BZ': (9, 40),
|
||||||
|
'CA': (9, 16),
|
||||||
|
'GB': (9, 8),
|
||||||
|
'IE': (9, 24),
|
||||||
|
'JM': (9, 32),
|
||||||
|
'NZ': (9, 20),
|
||||||
|
'PH': (9, 52),
|
||||||
|
'TT': (9, 44),
|
||||||
|
'US': (9, 4),
|
||||||
|
'ZA': (9, 28),
|
||||||
|
'ZW': (9, 48)},
|
||||||
|
'es': {None: (10, 0),
|
||||||
|
'AR': (10, 44),
|
||||||
|
'BO': (10, 64),
|
||||||
|
'CL': (10, 52),
|
||||||
|
'CO': (10, 36),
|
||||||
|
'CR': (10, 20),
|
||||||
|
'DO': (10, 28),
|
||||||
|
'EC': (10, 48),
|
||||||
|
'ES': (10, 4),
|
||||||
|
'GT': (10, 16),
|
||||||
|
'HN': (10, 72),
|
||||||
|
'MX': (10, 8),
|
||||||
|
'NI': (10, 76),
|
||||||
|
'PA': (10, 24),
|
||||||
|
'PE': (10, 40),
|
||||||
|
'PR': (10, 80),
|
||||||
|
'PY': (10, 60),
|
||||||
|
'SV': (10, 68),
|
||||||
|
'UY': (10, 56),
|
||||||
|
'VE': (10, 32)},
|
||||||
|
'et': {None: (37, 0)},
|
||||||
|
'eu': {None: (45, 0)},
|
||||||
|
'fa': {None: (41, 0)},
|
||||||
|
'fi': {None: (11, 0)},
|
||||||
|
'fo': {None: (56, 0)},
|
||||||
|
'fr': {None: (12, 0),
|
||||||
|
'BE': (12, 8),
|
||||||
|
'CA': (12, 12),
|
||||||
|
'CH': (12, 16),
|
||||||
|
'FR': (12, 4),
|
||||||
|
'LU': (12, 20),
|
||||||
|
'MC': (12, 24)},
|
||||||
|
'gu': {None: (71, 0)},
|
||||||
|
'he': {None: (13, 0)},
|
||||||
|
'hi': {None: (57, 0)},
|
||||||
|
'hr': {None: (26, 0)},
|
||||||
|
'hu': {None: (14, 0)},
|
||||||
|
'hy': {None: (43, 0)},
|
||||||
|
'id': {None: (33, 0)},
|
||||||
|
'is': {None: (15, 0)},
|
||||||
|
'it': {None: (16, 0),
|
||||||
|
'CH': (16, 8),
|
||||||
|
'IT': (16, 4)},
|
||||||
|
'ja': {None: (17, 0)},
|
||||||
|
'ka': {None: (55, 0)},
|
||||||
|
'kk': {None: (63, 0)},
|
||||||
|
'kn': {None: (75, 0)},
|
||||||
|
'ko': {None: (18, 0)},
|
||||||
|
'kok': {None: (87, 0)},
|
||||||
|
'lt': {None: (39, 0)},
|
||||||
|
'lv': {None: (38, 0)},
|
||||||
|
'mk': {None: (47, 0)},
|
||||||
|
'ml': {None: (76, 0)},
|
||||||
|
'mr': {None: (78, 0)},
|
||||||
|
'ms': {None: (62, 0)},
|
||||||
|
'mt': {None: (58, 0)},
|
||||||
|
'ne': {None: (97, 0)},
|
||||||
|
'nl': {None: (19, 0),
|
||||||
|
'BE': (19, 8)},
|
||||||
|
'no': {None: (20, 0)},
|
||||||
|
'or': {None: (72, 0)},
|
||||||
|
'pa': {None: (70, 0)},
|
||||||
|
'pl': {None: (21, 0)},
|
||||||
|
'pt': {None: (22, 0),
|
||||||
|
'BR': (22, 4),
|
||||||
|
'PT': (22, 8)},
|
||||||
|
'rm': {None: (23, 0)},
|
||||||
|
'ro': {None: (24, 0)},
|
||||||
|
'ru': {None: (25, 0)},
|
||||||
|
'sa': {None: (79, 0)},
|
||||||
|
'se': {None: (59, 0)},
|
||||||
|
'sk': {None: (27, 0)},
|
||||||
|
'sl': {None: (36, 0)},
|
||||||
|
'sq': {None: (28, 0)},
|
||||||
|
'sr': {None: (26, 12),
|
||||||
|
'RS': (26, 12)},
|
||||||
|
'st': {None: (48, 0)},
|
||||||
|
'sv': {None: (29, 0),
|
||||||
|
'FI': (29, 8)},
|
||||||
|
'sw': {None: (65, 0)},
|
||||||
|
'ta': {None: (73, 0)},
|
||||||
|
'te': {None: (74, 0)},
|
||||||
|
'th': {None: (30, 0)},
|
||||||
|
'tn': {None: (50, 0)},
|
||||||
|
'tr': {None: (31, 0)},
|
||||||
|
'ts': {None: (49, 0)},
|
||||||
|
'tt': {None: (68, 0)},
|
||||||
|
'uk': {None: (34, 0)},
|
||||||
|
'ur': {None: (32, 0)},
|
||||||
|
'uz': {None: (67, 0),
|
||||||
|
'UZ': (67, 8)},
|
||||||
|
'vi': {None: (42, 0)},
|
||||||
|
'wen': {None: (46, 0)},
|
||||||
|
'xh': {None: (52, 0)},
|
||||||
|
'zh': {None: (4, 0),
|
||||||
|
'CN': (4, 8),
|
||||||
|
'HK': (4, 12),
|
||||||
|
'SG': (4, 16),
|
||||||
|
'TW': (4, 4)},
|
||||||
|
'zu': {None: (53, 0)}}
|
||||||
|
|
||||||
|
def iana2mobi(icode):
|
||||||
|
subtags = list(icode.split('-'))
|
||||||
|
langdict = IANA_MOBI[None]
|
||||||
|
while len(subtags) > 0:
|
||||||
|
lang = subtags.pop(0).lower()
|
||||||
|
if lang in IANA_MOBI:
|
||||||
|
langdict = IANA_MOBI[lang]
|
||||||
|
break
|
||||||
|
mcode = langdict[None]
|
||||||
|
while len(subtags) > 0:
|
||||||
|
subtag = subtags.pop(0)
|
||||||
|
if subtag not in langdict:
|
||||||
|
subtag = subtag.title()
|
||||||
|
if subtag not in langdict:
|
||||||
|
subtag = subtag.upper()
|
||||||
|
if subtag in langdict:
|
||||||
|
mcode = langdict[subtag]
|
||||||
|
break
|
||||||
|
return pack('>HBB', 0, mcode[1], mcode[0])
|
||||||
|
379
src/calibre/ebooks/mobi/mobiml.py
Normal file
379
src/calibre/ebooks/mobi/mobiml.py
Normal file
@ -0,0 +1,379 @@
|
|||||||
|
'''
|
||||||
|
Transform XHTML/OPS-ish content into Mobipocket HTML 3.2.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import copy
|
||||||
|
import re
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import namespace, barename
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||||
|
|
||||||
|
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||||
|
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
||||||
|
|
||||||
|
MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}
|
||||||
|
|
||||||
|
HEADER_TAGS = set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||||
|
NESTABLE_TAGS = set(['ol', 'ul', 'li', 'table', 'tr', 'td', 'th'])
|
||||||
|
TABLE_TAGS = set(['table', 'tr', 'td', 'th'])
|
||||||
|
SPECIAL_TAGS = set(['hr', 'br'])
|
||||||
|
CONTENT_TAGS = set(['img', 'hr', 'br'])
|
||||||
|
|
||||||
|
PAGE_BREAKS = set(['always', 'odd', 'even'])
|
||||||
|
|
||||||
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
|
def asfloat(value):
|
||||||
|
if not isinstance(value, (int, long, float)):
|
||||||
|
return 0.0
|
||||||
|
return float(value)
|
||||||
|
|
||||||
|
class BlockState(object):
|
||||||
|
def __init__(self, body):
|
||||||
|
self.body = body
|
||||||
|
self.nested = []
|
||||||
|
self.para = None
|
||||||
|
self.inline = None
|
||||||
|
self.anchor = None
|
||||||
|
self.vpadding = 0.
|
||||||
|
self.vmargin = 0.
|
||||||
|
self.pbreak = False
|
||||||
|
self.istate = None
|
||||||
|
self.content = False
|
||||||
|
|
||||||
|
class FormatState(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.left = 0.
|
||||||
|
self.halign = 'auto'
|
||||||
|
self.indent = 0.
|
||||||
|
self.fsize = 3
|
||||||
|
self.ids = set()
|
||||||
|
self.valign = 'baseline'
|
||||||
|
self.italic = False
|
||||||
|
self.bold = False
|
||||||
|
self.preserve = False
|
||||||
|
self.family = 'serif'
|
||||||
|
self.href = None
|
||||||
|
self.list_num = 0
|
||||||
|
self.attrib = {}
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.fsize == other.fsize \
|
||||||
|
and self.italic == other.italic \
|
||||||
|
and self.bold == other.bold \
|
||||||
|
and self.href == other.href \
|
||||||
|
and self.valign == other.valign \
|
||||||
|
and self.preserve == other.preserve \
|
||||||
|
and self.family == other.family
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
|
||||||
|
class MobiMLizer(object):
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||||
|
self.oeb = oeb
|
||||||
|
self.profile = profile = context.dest
|
||||||
|
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||||
|
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||||
|
self.remove_html_cover()
|
||||||
|
self.mobimlize_spine()
|
||||||
|
|
||||||
|
def remove_html_cover(self):
|
||||||
|
oeb = self.oeb
|
||||||
|
if not oeb.metadata.cover \
|
||||||
|
or 'cover' not in oeb.guide:
|
||||||
|
return
|
||||||
|
href = oeb.guide['cover'].href
|
||||||
|
del oeb.guide['cover']
|
||||||
|
item = oeb.manifest.hrefs[href]
|
||||||
|
oeb.manifest.remove(item)
|
||||||
|
|
||||||
|
def mobimlize_spine(self):
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
|
||||||
|
body = item.data.find(XHTML('body'))
|
||||||
|
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||||
|
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||||
|
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
||||||
|
[FormatState()])
|
||||||
|
item.data = nroot
|
||||||
|
|
||||||
|
def mobimlize_font(self, ptsize):
|
||||||
|
return self.fnums[self.fmap[ptsize]]
|
||||||
|
|
||||||
|
def mobimlize_measure(self, ptsize):
|
||||||
|
if isinstance(ptsize, basestring):
|
||||||
|
return ptsize
|
||||||
|
embase = self.profile.fbase
|
||||||
|
if round(ptsize) < embase:
|
||||||
|
return "%dpt" % int(round(ptsize))
|
||||||
|
return "%dem" % int(round(ptsize / embase))
|
||||||
|
|
||||||
|
def preize_text(self, text):
|
||||||
|
text = unicode(text).replace(u' ', u'\xa0')
|
||||||
|
text = text.replace('\r\n', '\n')
|
||||||
|
text = text.replace('\r', '\n')
|
||||||
|
lines = text.split('\n')
|
||||||
|
result = lines[:1]
|
||||||
|
for line in lines[1:]:
|
||||||
|
result.append(etree.Element(XHTML('br')))
|
||||||
|
if line:
|
||||||
|
result.append(line)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def mobimlize_content(self, tag, text, bstate, istates):
|
||||||
|
bstate.content = True
|
||||||
|
istate = istates[-1]
|
||||||
|
para = bstate.para
|
||||||
|
if tag in SPECIAL_TAGS and not text:
|
||||||
|
para = para if para is not None else bstate.body
|
||||||
|
elif para is None:
|
||||||
|
body = bstate.body
|
||||||
|
if bstate.pbreak:
|
||||||
|
etree.SubElement(body, MBP('pagebreak'))
|
||||||
|
bstate.pbreak = False
|
||||||
|
if istate.ids:
|
||||||
|
for id in istate.ids:
|
||||||
|
etree.SubElement(body, XHTML('a'), attrib={'id': id})
|
||||||
|
istate.ids.clear()
|
||||||
|
bstate.istate = None
|
||||||
|
bstate.anchor = None
|
||||||
|
parent = bstate.nested[-1] if bstate.nested else bstate.body
|
||||||
|
indent = istate.indent
|
||||||
|
left = istate.left
|
||||||
|
if indent < 0 and abs(indent) < left:
|
||||||
|
left += indent
|
||||||
|
indent = 0
|
||||||
|
elif indent != 0 and abs(indent) < self.profile.fbase:
|
||||||
|
indent = (indent / abs(indent)) * self.profile.fbase
|
||||||
|
if tag in NESTABLE_TAGS:
|
||||||
|
para = wrapper = etree.SubElement(parent, XHTML(tag))
|
||||||
|
bstate.nested.append(para)
|
||||||
|
if tag == 'li' and len(istates) > 1:
|
||||||
|
istates[-2].list_num += 1
|
||||||
|
para.attrib['value'] = str(istates[-2].list_num)
|
||||||
|
elif left > 0 and indent >= 0:
|
||||||
|
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
|
||||||
|
para = wrapper
|
||||||
|
emleft = int(round(left / self.profile.fbase)) - 1
|
||||||
|
emleft = min((emleft, 10))
|
||||||
|
while emleft > 0:
|
||||||
|
para = etree.SubElement(para, XHTML('blockquote'))
|
||||||
|
emleft -= 1
|
||||||
|
else:
|
||||||
|
para = wrapper = etree.SubElement(parent, XHTML('p'))
|
||||||
|
bstate.inline = bstate.para = para
|
||||||
|
vspace = bstate.vpadding + bstate.vmargin
|
||||||
|
bstate.vpadding = bstate.vmargin = 0
|
||||||
|
if tag not in TABLE_TAGS:
|
||||||
|
wrapper.attrib['height'] = self.mobimlize_measure(vspace)
|
||||||
|
para.attrib['width'] = self.mobimlize_measure(indent)
|
||||||
|
elif tag == 'table' and vspace > 0:
|
||||||
|
body = bstate.body
|
||||||
|
vspace = int(round(vspace / self.profile.fbase))
|
||||||
|
index = max((0, len(body) - 1))
|
||||||
|
while vspace > 0:
|
||||||
|
body.insert(index, etree.Element(XHTML('br')))
|
||||||
|
vspace -= 1
|
||||||
|
if istate.halign != 'auto':
|
||||||
|
para.attrib['align'] = istate.halign
|
||||||
|
pstate = bstate.istate
|
||||||
|
if tag in CONTENT_TAGS:
|
||||||
|
bstate.inline = para
|
||||||
|
pstate = bstate.istate = None
|
||||||
|
etree.SubElement(para, XHTML(tag), attrib=istate.attrib)
|
||||||
|
elif tag in TABLE_TAGS:
|
||||||
|
para.attrib['valign'] = 'top'
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
if not pstate or istate != pstate:
|
||||||
|
inline = para
|
||||||
|
valign = istate.valign
|
||||||
|
fsize = istate.fsize
|
||||||
|
href = istate.href
|
||||||
|
if not href:
|
||||||
|
bstate.anchor = None
|
||||||
|
elif pstate and pstate.href == href:
|
||||||
|
inline = bstate.anchor
|
||||||
|
else:
|
||||||
|
inline = etree.SubElement(inline, XHTML('a'), href=href)
|
||||||
|
bstate.anchor = inline
|
||||||
|
if valign == 'super':
|
||||||
|
inline = etree.SubElement(inline, XHTML('sup'))
|
||||||
|
elif valign == 'sub':
|
||||||
|
inline = etree.SubElement(inline, XHTML('sub'))
|
||||||
|
elif fsize != 3:
|
||||||
|
inline = etree.SubElement(inline, XHTML('font'),
|
||||||
|
size=str(fsize))
|
||||||
|
if istate.family == 'monospace':
|
||||||
|
inline = etree.SubElement(inline, XHTML('tt'))
|
||||||
|
if istate.italic:
|
||||||
|
inline = etree.SubElement(inline, XHTML('i'))
|
||||||
|
if istate.bold:
|
||||||
|
inline = etree.SubElement(inline, XHTML('b'))
|
||||||
|
bstate.inline = inline
|
||||||
|
bstate.istate = istate
|
||||||
|
inline = bstate.inline
|
||||||
|
content = self.preize_text(text) if istate.preserve else [text]
|
||||||
|
for item in content:
|
||||||
|
if isinstance(item, basestring):
|
||||||
|
if len(inline) == 0:
|
||||||
|
inline.text = (inline.text or '') + item
|
||||||
|
else:
|
||||||
|
last = inline[-1]
|
||||||
|
last.tail = (last.tail or '') + item
|
||||||
|
else:
|
||||||
|
inline.append(item)
|
||||||
|
|
||||||
|
def mobimlize_elem(self, elem, stylizer, bstate, istates):
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
return
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
# <mbp:frame-set/> does not exist lalalala
|
||||||
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
|
or style['visibility'] == 'hidden':
|
||||||
|
return
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
istate = copy.copy(istates[-1])
|
||||||
|
istate.list_num = 0
|
||||||
|
istates.append(istate)
|
||||||
|
left = 0
|
||||||
|
display = style['display']
|
||||||
|
isblock = not display.startswith('inline')
|
||||||
|
isblock = isblock and style['float'] == 'none'
|
||||||
|
isblock = isblock and tag != 'br'
|
||||||
|
if isblock:
|
||||||
|
bstate.para = None
|
||||||
|
istate.halign = style['text-align']
|
||||||
|
istate.indent = style['text-indent']
|
||||||
|
if style['margin-left'] == 'auto' \
|
||||||
|
and style['margin-right'] == 'auto':
|
||||||
|
istate.halign = 'center'
|
||||||
|
margin = asfloat(style['margin-left'])
|
||||||
|
padding = asfloat(style['padding-left'])
|
||||||
|
if tag != 'body':
|
||||||
|
left = margin + padding
|
||||||
|
istate.left += left
|
||||||
|
vmargin = asfloat(style['margin-top'])
|
||||||
|
bstate.vmargin = max((bstate.vmargin, vmargin))
|
||||||
|
vpadding = asfloat(style['padding-top'])
|
||||||
|
if vpadding > 0:
|
||||||
|
bstate.vpadding += bstate.vmargin
|
||||||
|
bstate.vmargin = 0
|
||||||
|
bstate.vpadding += vpadding
|
||||||
|
elif not istate.href:
|
||||||
|
margin = asfloat(style['margin-left'])
|
||||||
|
padding = asfloat(style['padding-left'])
|
||||||
|
lspace = margin + padding
|
||||||
|
if lspace > 0:
|
||||||
|
spaces = int(round((lspace * 3) / style['font-size']))
|
||||||
|
elem.text = (u'\xa0' * spaces) + (elem.text or '')
|
||||||
|
margin = asfloat(style['margin-right'])
|
||||||
|
padding = asfloat(style['padding-right'])
|
||||||
|
rspace = margin + padding
|
||||||
|
if rspace > 0:
|
||||||
|
spaces = int(round((rspace * 3) / style['font-size']))
|
||||||
|
if len(elem) == 0:
|
||||||
|
elem.text = (elem.text or '') + (u'\xa0' * spaces)
|
||||||
|
else:
|
||||||
|
last = elem[-1]
|
||||||
|
last.text = (last.text or '') + (u'\xa0' * spaces)
|
||||||
|
if bstate.content and style['page-break-before'] in PAGE_BREAKS:
|
||||||
|
bstate.pbreak = True
|
||||||
|
istate.fsize = self.mobimlize_font(style['font-size'])
|
||||||
|
istate.italic = True if style['font-style'] == 'italic' else False
|
||||||
|
weight = style['font-weight']
|
||||||
|
istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
|
||||||
|
istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
|
||||||
|
if 'monospace' in style['font-family']:
|
||||||
|
istate.family = 'monospace'
|
||||||
|
elif 'sans-serif' in style['font-family']:
|
||||||
|
istate.family = 'sans-serif'
|
||||||
|
else:
|
||||||
|
istate.family = 'serif'
|
||||||
|
valign = style['vertical-align']
|
||||||
|
if valign in ('super', 'text-top') or asfloat(valign) > 0:
|
||||||
|
istate.valign = 'super'
|
||||||
|
elif valign == 'sub' or asfloat(valign) < 0:
|
||||||
|
istate.valign = 'sub'
|
||||||
|
else:
|
||||||
|
istate.valign = 'baseline'
|
||||||
|
if 'id' in elem.attrib:
|
||||||
|
istate.ids.add(elem.attrib['id'])
|
||||||
|
if 'name' in elem.attrib:
|
||||||
|
istate.ids.add(elem.attrib['name'])
|
||||||
|
if tag == 'a' and 'href' in elem.attrib:
|
||||||
|
istate.href = elem.attrib['href']
|
||||||
|
istate.attrib.clear()
|
||||||
|
if tag == 'img' and 'src' in elem.attrib:
|
||||||
|
istate.attrib['src'] = elem.attrib['src']
|
||||||
|
istate.attrib['align'] = 'baseline'
|
||||||
|
for prop in ('width', 'height'):
|
||||||
|
if style[prop] != 'auto':
|
||||||
|
value = style[prop]
|
||||||
|
if value == getattr(self.profile, prop):
|
||||||
|
result = '100%'
|
||||||
|
else:
|
||||||
|
ems = int(round(value / self.profile.fbase))
|
||||||
|
result = "%dem" % ems
|
||||||
|
istate.attrib[prop] = result
|
||||||
|
elif tag == 'hr' and asfloat(style['width']) > 0:
|
||||||
|
prop = style['width'] / self.profile.width
|
||||||
|
istate.attrib['width'] = "%d%%" % int(round(prop * 100))
|
||||||
|
elif display == 'table':
|
||||||
|
tag = 'table'
|
||||||
|
elif display == 'table-row':
|
||||||
|
tag = 'tr'
|
||||||
|
elif display == 'table-cell':
|
||||||
|
tag = 'td'
|
||||||
|
text = None
|
||||||
|
if elem.text:
|
||||||
|
if istate.preserve:
|
||||||
|
text = elem.text
|
||||||
|
elif len(elem) > 0 and elem.text.isspace():
|
||||||
|
text = None
|
||||||
|
else:
|
||||||
|
text = COLLAPSE.sub(' ', elem.text)
|
||||||
|
if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
|
||||||
|
self.mobimlize_content(tag, text, bstate, istates)
|
||||||
|
for child in elem:
|
||||||
|
self.mobimlize_elem(child, stylizer, bstate, istates)
|
||||||
|
tail = None
|
||||||
|
if child.tail:
|
||||||
|
if istate.preserve:
|
||||||
|
tail = child.tail
|
||||||
|
elif bstate.para is None and child.tail.isspace():
|
||||||
|
tail = None
|
||||||
|
else:
|
||||||
|
tail = COLLAPSE.sub(' ', child.tail)
|
||||||
|
if tail:
|
||||||
|
self.mobimlize_content(tag, tail, bstate, istates)
|
||||||
|
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
|
||||||
|
bstate.pbreak = True
|
||||||
|
if isblock:
|
||||||
|
para = bstate.para
|
||||||
|
if para is not None and para.text == u'\xa0':
|
||||||
|
para.getparent().replace(para, etree.Element(XHTML('br')))
|
||||||
|
bstate.para = None
|
||||||
|
bstate.istate = None
|
||||||
|
vmargin = asfloat(style['margin-bottom'])
|
||||||
|
bstate.vmargin = max((bstate.vmargin, vmargin))
|
||||||
|
vpadding = asfloat(style['padding-bottom'])
|
||||||
|
if vpadding > 0:
|
||||||
|
bstate.vpadding += bstate.vmargin
|
||||||
|
bstate.vmargin = 0
|
||||||
|
bstate.vpadding += vpadding
|
||||||
|
if tag in NESTABLE_TAGS and bstate.nested:
|
||||||
|
bstate.nested.pop()
|
||||||
|
istates.pop()
|
@ -2,7 +2,11 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||||
|
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
from cStringIO import StringIO
|
||||||
|
from struct import pack
|
||||||
|
|
||||||
COUNT_BITS = 3
|
COUNT_BITS = 3
|
||||||
|
|
||||||
@ -31,4 +35,54 @@ def decompress_doc(data):
|
|||||||
res.append(res[j - di+k])
|
res.append(res[j - di+k])
|
||||||
|
|
||||||
return ''.join([chr(i) for i in res])
|
return ''.join([chr(i) for i in res])
|
||||||
|
|
||||||
|
def compress_doc(data):
|
||||||
|
out = StringIO()
|
||||||
|
i = 0
|
||||||
|
ldata = len(data)
|
||||||
|
while i < ldata:
|
||||||
|
if i > 10 and (ldata - i) > 10:
|
||||||
|
chunk = ''
|
||||||
|
match = -1
|
||||||
|
for j in xrange(10, 2, -1):
|
||||||
|
chunk = data[i:i+j]
|
||||||
|
try:
|
||||||
|
match = data.rindex(chunk, 0, i)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if (i - match) <= 2047:
|
||||||
|
break
|
||||||
|
match = -1
|
||||||
|
if match >= 0:
|
||||||
|
n = len(chunk)
|
||||||
|
m = i - match
|
||||||
|
code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3)
|
||||||
|
out.write(pack('>H', code))
|
||||||
|
i += n
|
||||||
|
continue
|
||||||
|
ch = data[i]
|
||||||
|
och = ord(ch)
|
||||||
|
i += 1
|
||||||
|
if ch == ' ' and (i + 1) < ldata:
|
||||||
|
onch = ord(data[i])
|
||||||
|
if onch >= 0x40 and onch < 0x80:
|
||||||
|
out.write(pack('>B', onch ^ 0x80))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
if och == 0 or (och > 8 and och < 0x80):
|
||||||
|
out.write(ch)
|
||||||
|
else:
|
||||||
|
j = i
|
||||||
|
binseq = [ch]
|
||||||
|
while j < ldata and len(binseq) < 8:
|
||||||
|
ch = data[j]
|
||||||
|
och = ord(ch)
|
||||||
|
if och == 0 or (och > 8 and och < 0x80):
|
||||||
|
break
|
||||||
|
binseq.append(ch)
|
||||||
|
j += 1
|
||||||
|
out.write(pack('>B', len(binseq)))
|
||||||
|
out.write(''.join(binseq))
|
||||||
|
i += len(binseq) - 1
|
||||||
|
return out.getvalue()
|
||||||
|
|
||||||
|
583
src/calibre/ebooks/mobi/writer.py
Normal file
583
src/calibre/ebooks/mobi/writer.py
Normal file
@ -0,0 +1,583 @@
|
|||||||
|
'''
|
||||||
|
Write content to Mobipocket books.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from struct import pack
|
||||||
|
import functools
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
from cStringIO import StringIO
|
||||||
|
import re
|
||||||
|
from itertools import izip, count
|
||||||
|
from collections import defaultdict
|
||||||
|
from urlparse import urldefrag
|
||||||
|
import logging
|
||||||
|
from lxml import etree
|
||||||
|
from PIL import Image
|
||||||
|
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
||||||
|
OEB_RASTER_IMAGES
|
||||||
|
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
|
||||||
|
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||||
|
from calibre.ebooks.oeb.profile import Context
|
||||||
|
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||||
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||||
|
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||||
|
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||||
|
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||||
|
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||||
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
|
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
|
||||||
|
from calibre.customize.ui import run_plugins_on_postprocess
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from optparse import OptionGroup
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# - Allow override CSS (?)
|
||||||
|
# - Generate index records
|
||||||
|
# - Optionally rasterize tables
|
||||||
|
|
||||||
|
EXTH_CODES = {
|
||||||
|
'creator': 100,
|
||||||
|
'publisher': 101,
|
||||||
|
'description': 103,
|
||||||
|
'identifier': 104,
|
||||||
|
'subject': 105,
|
||||||
|
'date': 106,
|
||||||
|
'review': 107,
|
||||||
|
'contributor': 108,
|
||||||
|
'rights': 109,
|
||||||
|
'type': 111,
|
||||||
|
'source': 112,
|
||||||
|
'title': 503,
|
||||||
|
}
|
||||||
|
|
||||||
|
RECORD_SIZE = 0x1000
|
||||||
|
|
||||||
|
UNCOMPRESSED = 1
|
||||||
|
PALMDOC = 2
|
||||||
|
HUFFDIC = 17480
|
||||||
|
|
||||||
|
PALM_MAX_IMAGE_SIZE = 63 * 1024
|
||||||
|
OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
|
||||||
|
MAX_THUMB_SIZE = 16 * 1024
|
||||||
|
MAX_THUMB_DIMEN = (180, 240)
|
||||||
|
|
||||||
|
def encode(data):
|
||||||
|
return data.encode('utf-8')
|
||||||
|
|
||||||
|
# Almost like the one for MS LIT, but not quite.
|
||||||
|
DECINT_FORWARD = 0
|
||||||
|
DECINT_BACKWARD = 1
|
||||||
|
def decint(value, direction):
|
||||||
|
bytes = []
|
||||||
|
while True:
|
||||||
|
b = value & 0x7f
|
||||||
|
value >>= 7
|
||||||
|
bytes.append(b)
|
||||||
|
if value == 0:
|
||||||
|
break
|
||||||
|
if direction == DECINT_FORWARD:
|
||||||
|
bytes[0] |= 0x80
|
||||||
|
elif direction == DECINT_BACKWARD:
|
||||||
|
bytes[-1] |= 0x80
|
||||||
|
return ''.join(chr(b) for b in reversed(bytes))
|
||||||
|
|
||||||
|
|
||||||
|
class Serializer(object):
|
||||||
|
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||||
|
|
||||||
|
def __init__(self, oeb, images):
|
||||||
|
self.oeb = oeb
|
||||||
|
self.images = images
|
||||||
|
self.id_offsets = {}
|
||||||
|
self.href_offsets = defaultdict(list)
|
||||||
|
self.breaks = []
|
||||||
|
buffer = self.buffer = StringIO()
|
||||||
|
buffer.write('<html>')
|
||||||
|
self.serialize_head()
|
||||||
|
self.serialize_body()
|
||||||
|
buffer.write('</html>')
|
||||||
|
self.fixup_links()
|
||||||
|
self.text = buffer.getvalue()
|
||||||
|
|
||||||
|
def serialize_head(self):
|
||||||
|
buffer = self.buffer
|
||||||
|
buffer.write('<head>')
|
||||||
|
if len(self.oeb.guide) > 0:
|
||||||
|
self.serialize_guide()
|
||||||
|
buffer.write('</head>')
|
||||||
|
|
||||||
|
def serialize_guide(self):
|
||||||
|
buffer = self.buffer
|
||||||
|
hrefs = self.oeb.manifest.hrefs
|
||||||
|
buffer.write('<guide>')
|
||||||
|
for ref in self.oeb.guide.values():
|
||||||
|
path, frag = urldefrag(ref.href)
|
||||||
|
if hrefs[path].media_type not in OEB_DOCS:
|
||||||
|
continue
|
||||||
|
buffer.write('<reference type="')
|
||||||
|
self.serialize_text(ref.type, quot=True)
|
||||||
|
buffer.write('" ')
|
||||||
|
if ref.title is not None:
|
||||||
|
buffer.write('title="')
|
||||||
|
self.serialize_text(ref.title, quot=True)
|
||||||
|
buffer.write('" ')
|
||||||
|
self.serialize_href(ref.href)
|
||||||
|
# Space required or won't work, I kid you not
|
||||||
|
buffer.write(' />')
|
||||||
|
buffer.write('</guide>')
|
||||||
|
|
||||||
|
def serialize_href(self, href, base=None):
|
||||||
|
hrefs = self.oeb.manifest.hrefs
|
||||||
|
path, frag = urldefrag(href)
|
||||||
|
if path and base:
|
||||||
|
path = base.abshref(path)
|
||||||
|
if path and path not in hrefs:
|
||||||
|
return False
|
||||||
|
buffer = self.buffer
|
||||||
|
item = hrefs[path] if path else None
|
||||||
|
if item and item.spine_position is None:
|
||||||
|
return False
|
||||||
|
id = item.id if item else base.id
|
||||||
|
href = '#'.join((id, frag)) if frag else id
|
||||||
|
buffer.write('filepos=')
|
||||||
|
self.href_offsets[href].append(buffer.tell())
|
||||||
|
buffer.write('0000000000')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def serialize_body(self):
|
||||||
|
buffer = self.buffer
|
||||||
|
buffer.write('<body>')
|
||||||
|
# CybookG3 'Start Reading' link
|
||||||
|
if 'text' in self.oeb.guide:
|
||||||
|
href = self.oeb.guide['text'].href
|
||||||
|
buffer.write('<a ')
|
||||||
|
self.serialize_href(href)
|
||||||
|
buffer.write(' />')
|
||||||
|
spine = [item for item in self.oeb.spine if item.linear]
|
||||||
|
spine.extend([item for item in self.oeb.spine if not item.linear])
|
||||||
|
for item in spine:
|
||||||
|
self.serialize_item(item)
|
||||||
|
buffer.write('</body>')
|
||||||
|
|
||||||
|
def serialize_item(self, item):
|
||||||
|
buffer = self.buffer
|
||||||
|
if not item.linear:
|
||||||
|
self.breaks.append(buffer.tell() - 1)
|
||||||
|
self.id_offsets[item.id] = buffer.tell()
|
||||||
|
for elem in item.data.find(XHTML('body')):
|
||||||
|
self.serialize_elem(elem, item)
|
||||||
|
buffer.write('<mbp:pagebreak/>')
|
||||||
|
|
||||||
|
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||||
|
buffer = self.buffer
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) not in nsrmap:
|
||||||
|
return
|
||||||
|
hrefs = self.oeb.manifest.hrefs
|
||||||
|
tag = prefixname(elem.tag, nsrmap)
|
||||||
|
for attr in ('name', 'id'):
|
||||||
|
if attr in elem.attrib:
|
||||||
|
id = '#'.join((item.id, elem.attrib[attr]))
|
||||||
|
self.id_offsets[id] = buffer.tell()
|
||||||
|
del elem.attrib[attr]
|
||||||
|
if tag == 'a' and not elem.attrib \
|
||||||
|
and not len(elem) and not elem.text:
|
||||||
|
return
|
||||||
|
buffer.write('<')
|
||||||
|
buffer.write(tag)
|
||||||
|
if elem.attrib:
|
||||||
|
for attr, val in elem.attrib.items():
|
||||||
|
if namespace(attr) not in nsrmap:
|
||||||
|
continue
|
||||||
|
attr = prefixname(attr, nsrmap)
|
||||||
|
buffer.write(' ')
|
||||||
|
if attr == 'href':
|
||||||
|
if self.serialize_href(val, item):
|
||||||
|
continue
|
||||||
|
elif attr == 'src':
|
||||||
|
href = item.abshref(val)
|
||||||
|
if href in hrefs:
|
||||||
|
index = self.images[href]
|
||||||
|
buffer.write('recindex="%05d"' % index)
|
||||||
|
continue
|
||||||
|
buffer.write(attr)
|
||||||
|
buffer.write('="')
|
||||||
|
self.serialize_text(val, quot=True)
|
||||||
|
buffer.write('"')
|
||||||
|
if elem.text or len(elem) > 0:
|
||||||
|
buffer.write('>')
|
||||||
|
if elem.text:
|
||||||
|
self.serialize_text(elem.text)
|
||||||
|
for child in elem:
|
||||||
|
self.serialize_elem(child, item)
|
||||||
|
if child.tail:
|
||||||
|
self.serialize_text(child.tail)
|
||||||
|
buffer.write('</%s>' % tag)
|
||||||
|
else:
|
||||||
|
buffer.write('/>')
|
||||||
|
|
||||||
|
def serialize_text(self, text, quot=False):
|
||||||
|
text = text.replace('&', '&')
|
||||||
|
text = text.replace('<', '<')
|
||||||
|
text = text.replace('>', '>')
|
||||||
|
if quot:
|
||||||
|
text = text.replace('"', '"')
|
||||||
|
self.buffer.write(encode(text))
|
||||||
|
|
||||||
|
def fixup_links(self):
|
||||||
|
buffer = self.buffer
|
||||||
|
for id, hoffs in self.href_offsets.items():
|
||||||
|
ioff = self.id_offsets[id]
|
||||||
|
for hoff in hoffs:
|
||||||
|
buffer.seek(hoff)
|
||||||
|
buffer.write('%010d' % ioff)
|
||||||
|
|
||||||
|
|
||||||
|
class MobiWriter(object):
|
||||||
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
|
def __init__(self, compression=None, imagemax=None):
|
||||||
|
self._compression = compression or UNCOMPRESSED
|
||||||
|
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
||||||
|
|
||||||
|
def dump(self, oeb, path):
|
||||||
|
if hasattr(path, 'write'):
|
||||||
|
return self._dump_stream(oeb, path)
|
||||||
|
with open(path, 'w+b') as stream:
|
||||||
|
return self._dump_stream(oeb, stream)
|
||||||
|
|
||||||
|
def _write(self, *data):
|
||||||
|
for datum in data:
|
||||||
|
self._stream.write(datum)
|
||||||
|
|
||||||
|
def _tell(self):
|
||||||
|
return self._stream.tell()
|
||||||
|
|
||||||
|
def _dump_stream(self, oeb, stream):
|
||||||
|
self._oeb = oeb
|
||||||
|
self._stream = stream
|
||||||
|
self._records = [None]
|
||||||
|
self._generate_content()
|
||||||
|
self._generate_record0()
|
||||||
|
self._write_header()
|
||||||
|
self._write_content()
|
||||||
|
|
||||||
|
def _generate_content(self):
|
||||||
|
self._map_image_names()
|
||||||
|
self._generate_text()
|
||||||
|
self._generate_images()
|
||||||
|
|
||||||
|
def _map_image_names(self):
|
||||||
|
index = 1
|
||||||
|
self._images = images = {}
|
||||||
|
for item in self._oeb.manifest.values():
|
||||||
|
if item.media_type in OEB_RASTER_IMAGES:
|
||||||
|
images[item.href] = index
|
||||||
|
index += 1
|
||||||
|
|
||||||
|
def _read_text_record(self, text):
|
||||||
|
pos = text.tell()
|
||||||
|
text.seek(0, 2)
|
||||||
|
npos = min((pos + RECORD_SIZE, text.tell()))
|
||||||
|
last = ''
|
||||||
|
while not last.decode('utf-8', 'ignore'):
|
||||||
|
size = len(last) + 1
|
||||||
|
text.seek(npos - size)
|
||||||
|
last = text.read(size)
|
||||||
|
extra = 0
|
||||||
|
try:
|
||||||
|
last.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
prev = len(last)
|
||||||
|
while True:
|
||||||
|
text.seek(npos - prev)
|
||||||
|
last = text.read(len(last) + 1)
|
||||||
|
try:
|
||||||
|
last.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
extra = len(last) - prev
|
||||||
|
text.seek(pos)
|
||||||
|
data = text.read(RECORD_SIZE)
|
||||||
|
overlap = text.read(extra)
|
||||||
|
text.seek(npos)
|
||||||
|
return data, overlap
|
||||||
|
|
||||||
|
def _generate_text(self):
|
||||||
|
self._oeb.logger.info('Serializing markup content...')
|
||||||
|
serializer = Serializer(self._oeb, self._images)
|
||||||
|
breaks = serializer.breaks
|
||||||
|
text = serializer.text
|
||||||
|
self._text_length = len(text)
|
||||||
|
text = StringIO(text)
|
||||||
|
nrecords = 0
|
||||||
|
offset = 0
|
||||||
|
if self._compression != UNCOMPRESSED:
|
||||||
|
self._oeb.logger.info('Compressing markup content...')
|
||||||
|
data, overlap = self._read_text_record(text)
|
||||||
|
while len(data) > 0:
|
||||||
|
if self._compression == PALMDOC:
|
||||||
|
data = compress_doc(data)
|
||||||
|
record = StringIO()
|
||||||
|
record.write(data)
|
||||||
|
record.write(overlap)
|
||||||
|
record.write(pack('>B', len(overlap)))
|
||||||
|
nextra = 0
|
||||||
|
pbreak = 0
|
||||||
|
running = offset
|
||||||
|
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
||||||
|
pbreak = (breaks.pop(0) - running) >> 3
|
||||||
|
encoded = decint(pbreak, DECINT_FORWARD)
|
||||||
|
record.write(encoded)
|
||||||
|
running += pbreak << 3
|
||||||
|
nextra += len(encoded)
|
||||||
|
lsize = 1
|
||||||
|
while True:
|
||||||
|
size = decint(nextra + lsize, DECINT_BACKWARD)
|
||||||
|
if len(size) == lsize:
|
||||||
|
break
|
||||||
|
lsize += 1
|
||||||
|
record.write(size)
|
||||||
|
self._records.append(record.getvalue())
|
||||||
|
nrecords += 1
|
||||||
|
offset += RECORD_SIZE
|
||||||
|
data, overlap = self._read_text_record(text)
|
||||||
|
self._text_nrecords = nrecords
|
||||||
|
|
||||||
|
def _rescale_image(self, data, maxsizeb, dimen=None):
|
||||||
|
image = Image.open(StringIO(data))
|
||||||
|
format = image.format
|
||||||
|
changed = False
|
||||||
|
if image.format not in ('JPEG', 'GIF'):
|
||||||
|
width, height = image.size
|
||||||
|
area = width * height
|
||||||
|
format = 'GIF' if area <= 40000 else 'JPEG'
|
||||||
|
changed = True
|
||||||
|
if dimen is not None:
|
||||||
|
image.thumbnail(dimen, Image.ANTIALIAS)
|
||||||
|
changed = True
|
||||||
|
if changed:
|
||||||
|
data = StringIO()
|
||||||
|
image.save(data, format)
|
||||||
|
data = data.getvalue()
|
||||||
|
if len(data) <= maxsizeb:
|
||||||
|
return data
|
||||||
|
image = image.convert('RGBA')
|
||||||
|
for quality in xrange(95, -1, -1):
|
||||||
|
data = StringIO()
|
||||||
|
image.save(data, 'JPEG', quality=quality)
|
||||||
|
data = data.getvalue()
|
||||||
|
if len(data) <= maxsizeb:
|
||||||
|
return data
|
||||||
|
width, height = image.size
|
||||||
|
for scale in xrange(99, 0, -1):
|
||||||
|
scale = scale / 100.
|
||||||
|
data = StringIO()
|
||||||
|
scaled = image.copy()
|
||||||
|
size = (int(width * scale), (height * scale))
|
||||||
|
scaled.thumbnail(size, Image.ANTIALIAS)
|
||||||
|
scaled.save(data, 'JPEG', quality=0)
|
||||||
|
data = data.getvalue()
|
||||||
|
if len(data) <= maxsizeb:
|
||||||
|
return data
|
||||||
|
# Well, we tried?
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _generate_images(self):
|
||||||
|
self._oeb.logger.warn('Serializing images...')
|
||||||
|
images = [(index, href) for href, index in self._images.items()]
|
||||||
|
images.sort()
|
||||||
|
metadata = self._oeb.metadata
|
||||||
|
coverid = metadata.cover[0] if metadata.cover else None
|
||||||
|
for _, href in images:
|
||||||
|
item = self._oeb.manifest.hrefs[href]
|
||||||
|
data = self._rescale_image(item.data, self._imagemax)
|
||||||
|
self._records.append(data)
|
||||||
|
|
||||||
|
def _generate_record0(self):
|
||||||
|
metadata = self._oeb.metadata
|
||||||
|
exth = self._build_exth()
|
||||||
|
record0 = StringIO()
|
||||||
|
record0.write(pack('>HHIHHHH', self._compression, 0,
|
||||||
|
self._text_length, self._text_nrecords, RECORD_SIZE, 0, 0))
|
||||||
|
uid = random.randint(0, 0xffffffff)
|
||||||
|
title = str(metadata.title[0])
|
||||||
|
record0.write('MOBI')
|
||||||
|
record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 6))
|
||||||
|
record0.write('\xff' * 40)
|
||||||
|
record0.write(pack('>I', self._text_nrecords + 1))
|
||||||
|
record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))
|
||||||
|
record0.write(iana2mobi(str(metadata.language[0])))
|
||||||
|
record0.write('\0' * 8)
|
||||||
|
record0.write(pack('>II', 6, self._text_nrecords + 1))
|
||||||
|
record0.write('\0' * 16)
|
||||||
|
record0.write(pack('>I', 0x50))
|
||||||
|
record0.write('\0' * 32)
|
||||||
|
record0.write(pack('>IIII', 0xffffffff, 0xffffffff, 0, 0))
|
||||||
|
# The '5' is a bitmask of extra record data at the end:
|
||||||
|
# - 0x1: <extra multibyte bytes><size> (?)
|
||||||
|
# - 0x4: <uncrossable breaks><size>
|
||||||
|
# Of course, the formats aren't quite the same.
|
||||||
|
# TODO: What the hell are the rest of these fields?
|
||||||
|
record0.write(pack('>IIIIIIIIIIIIIIIII',
|
||||||
|
0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff,
|
||||||
|
0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 5, 0xffffffff))
|
||||||
|
record0.write(exth)
|
||||||
|
record0.write(title)
|
||||||
|
record0 = record0.getvalue()
|
||||||
|
self._records[0] = record0 + ('\0' * (2452 - len(record0)))
|
||||||
|
|
||||||
|
def _build_exth(self):
|
||||||
|
oeb = self._oeb
|
||||||
|
exth = StringIO()
|
||||||
|
nrecs = 0
|
||||||
|
for term in oeb.metadata:
|
||||||
|
if term not in EXTH_CODES: continue
|
||||||
|
code = EXTH_CODES[term]
|
||||||
|
for item in oeb.metadata[term]:
|
||||||
|
data = self.COLLAPSE_RE.sub(' ', unicode(item))
|
||||||
|
data = data.encode('utf-8')
|
||||||
|
exth.write(pack('>II', code, len(data) + 8))
|
||||||
|
exth.write(data)
|
||||||
|
nrecs += 1
|
||||||
|
if oeb.metadata.cover:
|
||||||
|
id = str(oeb.metadata.cover[0])
|
||||||
|
item = oeb.manifest.ids[id]
|
||||||
|
href = item.href
|
||||||
|
index = self._images[href] - 1
|
||||||
|
exth.write(pack('>III', 0xc9, 0x0c, index))
|
||||||
|
exth.write(pack('>III', 0xcb, 0x0c, 0))
|
||||||
|
index = self._add_thumbnail(item) - 1
|
||||||
|
exth.write(pack('>III', 0xca, 0x0c, index))
|
||||||
|
nrecs += 3
|
||||||
|
exth = exth.getvalue()
|
||||||
|
trail = len(exth) % 4
|
||||||
|
pad = '' if not trail else '\0' * (4 - trail)
|
||||||
|
exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad]
|
||||||
|
return ''.join(exth)
|
||||||
|
|
||||||
|
def _add_thumbnail(self, item):
|
||||||
|
data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
|
||||||
|
manifest = self._oeb.manifest
|
||||||
|
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
|
||||||
|
manifest.add(id, href, 'image/jpeg', data=data)
|
||||||
|
index = len(self._images) + 1
|
||||||
|
self._images[href] = index
|
||||||
|
self._records.append(data)
|
||||||
|
return index
|
||||||
|
|
||||||
|
def _write_header(self):
|
||||||
|
title = str(self._oeb.metadata.title[0])
|
||||||
|
title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32]
|
||||||
|
title = title + ('\0' * (32 - len(title)))
|
||||||
|
now = int(time.time())
|
||||||
|
nrecords = len(self._records)
|
||||||
|
self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
|
||||||
|
'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
|
||||||
|
offset = self._tell() + (8 * nrecords) + 2
|
||||||
|
for id, record in izip(count(), self._records):
|
||||||
|
self._write(pack('>I', offset), '\0', pack('>I', id)[1:])
|
||||||
|
offset += len(record)
|
||||||
|
self._write('\0\0')
|
||||||
|
|
||||||
|
def _write_content(self):
|
||||||
|
for record in self._records:
|
||||||
|
self._write(record)
|
||||||
|
|
||||||
|
|
||||||
|
def add_mobi_options(parser):
|
||||||
|
profiles = Context.PROFILES.keys()
|
||||||
|
profiles.sort()
|
||||||
|
profiles = ', '.join(profiles)
|
||||||
|
group = OptionGroup(parser, _('Mobipocket'),
|
||||||
|
_('Mobipocket-specific options.'))
|
||||||
|
group.add_option(
|
||||||
|
'-c', '--compress', default=False, action='store_true',
|
||||||
|
help=_('Compress file text using PalmDOC compression.'))
|
||||||
|
group.add_option(
|
||||||
|
'-r', '--rescale-images', default=False, action='store_true',
|
||||||
|
help=_('Modify images to meet Palm device size limitations.'))
|
||||||
|
parser.add_option_group(group)
|
||||||
|
group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
|
||||||
|
'Affects conversion of default font sizes and rasterization '
|
||||||
|
'resolution. Valid profiles are: %s.') % profiles)
|
||||||
|
group.add_option(
|
||||||
|
'--source-profile', default='Browser', metavar='PROFILE',
|
||||||
|
help=_("Source renderer profile. Default is 'Browser'."))
|
||||||
|
group.add_option(
|
||||||
|
'--dest-profile', default='CybookG3', metavar='PROFILE',
|
||||||
|
help=_("Destination renderer profile. Default is 'CybookG3'."))
|
||||||
|
parser.add_option_group(group)
|
||||||
|
return
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
|
||||||
|
parser.add_option(
|
||||||
|
'-o', '--output', default=None,
|
||||||
|
help=_('Output file. Default is derived from input filename.'))
|
||||||
|
parser.add_option(
|
||||||
|
'-v', '--verbose', default=0, action='count',
|
||||||
|
help=_('Useful for debugging.'))
|
||||||
|
add_mobi_options(parser)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def oeb2mobi(opts, inpath):
|
||||||
|
logger = Logger(logging.getLogger('oeb2mobi'))
|
||||||
|
logger.setup_cli_handler(opts.verbose)
|
||||||
|
outpath = opts.output
|
||||||
|
if outpath is None:
|
||||||
|
outpath = os.path.basename(inpath)
|
||||||
|
outpath = os.path.splitext(outpath)[0] + '.mobi'
|
||||||
|
source = opts.source_profile
|
||||||
|
if source not in Context.PROFILES:
|
||||||
|
logger.error(_('Unknown source profile %r') % source)
|
||||||
|
return 1
|
||||||
|
dest = opts.dest_profile
|
||||||
|
if dest not in Context.PROFILES:
|
||||||
|
logger.error(_('Unknown destination profile %r') % dest)
|
||||||
|
return 1
|
||||||
|
compression = PALMDOC if opts.compress else UNCOMPRESSED
|
||||||
|
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||||
|
context = Context(source, dest)
|
||||||
|
oeb = OEBBook(inpath, logger=logger)
|
||||||
|
tocadder = HTMLTOCAdder()
|
||||||
|
tocadder.transform(oeb, context)
|
||||||
|
mangler = CaseMangler()
|
||||||
|
mangler.transform(oeb, context)
|
||||||
|
fbase = context.dest.fbase
|
||||||
|
fkey = context.dest.fnums.values()
|
||||||
|
flattener = CSSFlattener(
|
||||||
|
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
|
||||||
|
flattener.transform(oeb, context)
|
||||||
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer.transform(oeb, context)
|
||||||
|
trimmer = ManifestTrimmer()
|
||||||
|
trimmer.transform(oeb, context)
|
||||||
|
mobimlizer = MobiMLizer()
|
||||||
|
mobimlizer.transform(oeb, context)
|
||||||
|
writer = MobiWriter(compression=compression, imagemax=imagemax)
|
||||||
|
writer.dump(oeb, outpath)
|
||||||
|
run_plugins_on_postprocess(outpath, 'mobi')
|
||||||
|
logger.info(_('Output written to ') + outpath)
|
||||||
|
|
||||||
|
def main(argv=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(argv[1:])
|
||||||
|
if len(args) != 1:
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
inpath = args[0]
|
||||||
|
retval = oeb2mobi(opts, inpath)
|
||||||
|
return retval
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
2
src/calibre/ebooks/oeb/__init__.py
Normal file
2
src/calibre/ebooks/oeb/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
@ -14,10 +14,14 @@ from itertools import izip, count
|
|||||||
from urlparse import urldefrag, urlparse, urlunparse
|
from urlparse import urldefrag, urlparse, urlunparse
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
import htmlentitydefs
|
||||||
|
import uuid
|
||||||
|
import copy
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre import LoggingInterface
|
from calibre import LoggingInterface
|
||||||
|
|
||||||
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
|
XML_PARSER = etree.XMLParser(recover=True)
|
||||||
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||||
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
||||||
@ -28,25 +32,48 @@ DC11_NS = 'http://purl.org/dc/elements/1.1/'
|
|||||||
XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
|
XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
|
||||||
DCTERMS_NS = 'http://purl.org/dc/terms/'
|
DCTERMS_NS = 'http://purl.org/dc/terms/'
|
||||||
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
|
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
|
||||||
|
SVG_NS = 'http://www.w3.org/2000/svg'
|
||||||
|
XLINK_NS = 'http://www.w3.org/1999/xlink'
|
||||||
XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
|
XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
|
||||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||||
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
|
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS,
|
||||||
|
'svg': SVG_NS, 'xl': XLINK_NS}
|
||||||
|
|
||||||
def XML(name): return '{%s}%s' % (XML_NS, name)
|
def XML(name): return '{%s}%s' % (XML_NS, name)
|
||||||
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
||||||
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
||||||
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
||||||
def NCX(name): return '{%s}%s' % (NCX_NS, name)
|
def NCX(name): return '{%s}%s' % (NCX_NS, name)
|
||||||
|
def SVG(name): return '{%s}%s' % (SVG_NS, name)
|
||||||
|
def XLINK(name): return '{%s}%s' % (XLINK_NS, name)
|
||||||
|
|
||||||
|
EPUB_MIME = 'application/epub+zip'
|
||||||
XHTML_MIME = 'application/xhtml+xml'
|
XHTML_MIME = 'application/xhtml+xml'
|
||||||
CSS_MIME = 'text/css'
|
CSS_MIME = 'text/css'
|
||||||
NCX_MIME = 'application/x-dtbncx+xml'
|
NCX_MIME = 'application/x-dtbncx+xml'
|
||||||
OPF_MIME = 'application/oebps-package+xml'
|
OPF_MIME = 'application/oebps-package+xml'
|
||||||
OEB_DOC_MIME = 'text/x-oeb1-document'
|
OEB_DOC_MIME = 'text/x-oeb1-document'
|
||||||
OEB_CSS_MIME = 'text/x-oeb1-css'
|
OEB_CSS_MIME = 'text/x-oeb1-css'
|
||||||
|
OPENTYPE_MIME = 'font/opentype'
|
||||||
|
GIF_MIME = 'image/gif'
|
||||||
|
JPEG_MIME = 'image/jpeg'
|
||||||
|
PNG_MIME = 'image/png'
|
||||||
|
SVG_MIME = 'image/svg+xml'
|
||||||
|
|
||||||
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
|
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
|
||||||
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
|
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
|
||||||
|
OEB_RASTER_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME])
|
||||||
|
OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])
|
||||||
|
|
||||||
|
MS_COVER_TYPE = 'other.ms-coverimage-standard'
|
||||||
|
|
||||||
|
recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace')
|
||||||
|
ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items())
|
||||||
|
del ENTITYDEFS['lt']
|
||||||
|
del ENTITYDEFS['gt']
|
||||||
|
del ENTITYDEFS['quot']
|
||||||
|
del ENTITYDEFS['amp']
|
||||||
|
del recode
|
||||||
|
|
||||||
|
|
||||||
def element(parent, *args, **kwargs):
|
def element(parent, *args, **kwargs):
|
||||||
@ -64,10 +91,23 @@ def barename(name):
|
|||||||
return name.split('}', 1)[1]
|
return name.split('}', 1)[1]
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
def prefixname(name, nsrmap):
|
||||||
|
prefix = nsrmap[namespace(name)]
|
||||||
|
if not prefix:
|
||||||
|
return barename(name)
|
||||||
|
return ':'.join((prefix, barename(name)))
|
||||||
|
|
||||||
def xpath(elem, expr):
|
def xpath(elem, expr):
|
||||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||||
|
|
||||||
URL_UNSAFE = r"""`!@#$%^&*[](){}?+=;:'",<>\| """
|
def xml2str(root):
|
||||||
|
return etree.tostring(root, encoding='utf-8', xml_declaration=True)
|
||||||
|
|
||||||
|
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||||
|
URL_SAFE = set(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||||
|
u'abcdefghijklmnopqrstuvwxyz'
|
||||||
|
u'0123456789' u'_.-/~')
|
||||||
|
URL_UNSAFE = ASCII_CHARS - URL_SAFE
|
||||||
def urlquote(href):
|
def urlquote(href):
|
||||||
result = []
|
result = []
|
||||||
for char in href:
|
for char in href:
|
||||||
@ -84,12 +124,20 @@ def urlnormalize(href):
|
|||||||
return urlunparse(parts)
|
return urlunparse(parts)
|
||||||
|
|
||||||
|
|
||||||
|
class OEBError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FauxLogger(object):
|
class FauxLogger(object):
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
return self
|
return self
|
||||||
def __call__(self, message):
|
def __call__(self, message):
|
||||||
print message
|
print message
|
||||||
|
|
||||||
|
class Logger(LoggingInterface, object):
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return object.__getattribute__(self, 'log_' + name)
|
||||||
|
|
||||||
|
|
||||||
class AbstractContainer(object):
|
class AbstractContainer(object):
|
||||||
def read_xml(self, path):
|
def read_xml(self, path):
|
||||||
@ -108,25 +156,45 @@ class DirContainer(AbstractContainer):
|
|||||||
|
|
||||||
def write(self, path, data):
|
def write(self, path, data):
|
||||||
path = os.path.join(self.rootdir, path)
|
path = os.path.join(self.rootdir, path)
|
||||||
|
dir = os.path.dirname(path)
|
||||||
|
if not os.path.isdir(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
with open(urlunquote(path), 'wb') as f:
|
with open(urlunquote(path), 'wb') as f:
|
||||||
return f.write(data)
|
return f.write(data)
|
||||||
|
|
||||||
def exists(self, path):
|
def exists(self, path):
|
||||||
path = os.path.join(self.rootdir, path)
|
path = os.path.join(self.rootdir, path)
|
||||||
return os.path.isfile(path)
|
return os.path.isfile(urlunquote(path))
|
||||||
|
|
||||||
|
class DirWriter(object):
|
||||||
|
def __init__(self, version=2.0):
|
||||||
|
self.version = version
|
||||||
|
|
||||||
|
def dump(self, oeb, path):
|
||||||
|
if not os.path.isdir(path):
|
||||||
|
os.mkdir(path)
|
||||||
|
output = DirContainer(path)
|
||||||
|
for item in oeb.manifest.values():
|
||||||
|
output.write(item.href, str(item))
|
||||||
|
metadata = oeb.to_opf2() if self.version == 2 else oeb.to_opf1()
|
||||||
|
for href, data in metadata.values():
|
||||||
|
output.write(href, xml2str(data))
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
class Metadata(object):
|
class Metadata(object):
|
||||||
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
||||||
'format', 'identifier', 'language', 'publisher', 'relation',
|
'format', 'identifier', 'language', 'publisher', 'relation',
|
||||||
'rights', 'source', 'subject', 'title', 'type'])
|
'rights', 'source', 'subject', 'title', 'type'])
|
||||||
|
ATTRS = set(['role', 'file-as', 'scheme'])
|
||||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||||
'xsi': XSI_NS}
|
'xsi': XSI_NS}
|
||||||
|
|
||||||
class Item(object):
|
class Item(object):
|
||||||
def __init__(self, term, value, fq_attrib={}):
|
def __init__(self, term, value, fq_attrib={}, **kwargs):
|
||||||
self.fq_attrib = dict(fq_attrib)
|
self.fq_attrib = fq_attrib = dict(fq_attrib)
|
||||||
|
fq_attrib.update(kwargs)
|
||||||
if term == OPF('meta') and not value:
|
if term == OPF('meta') and not value:
|
||||||
term = self.fq_attrib.pop('name')
|
term = self.fq_attrib.pop('name')
|
||||||
value = self.fq_attrib.pop('content')
|
value = self.fq_attrib.pop('content')
|
||||||
@ -136,7 +204,12 @@ class Metadata(object):
|
|||||||
self.value = value
|
self.value = value
|
||||||
self.attrib = attrib = {}
|
self.attrib = attrib = {}
|
||||||
for fq_attr in fq_attrib:
|
for fq_attr in fq_attrib:
|
||||||
attr = barename(fq_attr)
|
if fq_attr in Metadata.ATTRS:
|
||||||
|
attr = fq_attr
|
||||||
|
fq_attr = OPF2(fq_attr)
|
||||||
|
fq_attrib[fq_attr] = fq_attrib.pop(attr)
|
||||||
|
else:
|
||||||
|
attr = barename(fq_attr)
|
||||||
attrib[attr] = fq_attrib[fq_attr]
|
attrib[attr] = fq_attrib[fq_attr]
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
@ -153,7 +226,7 @@ class Metadata(object):
|
|||||||
% (barename(self.term), self.value, self.attrib)
|
% (barename(self.term), self.value, self.attrib)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(self.value)
|
return unicode(self.value).encode('ascii', 'xmlcharrefreplace')
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return unicode(self.value)
|
return unicode(self.value)
|
||||||
@ -183,8 +256,8 @@ class Metadata(object):
|
|||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.items = defaultdict(list)
|
self.items = defaultdict(list)
|
||||||
|
|
||||||
def add(self, term, value, attrib={}):
|
def add(self, term, value, attrib={}, **kwargs):
|
||||||
item = self.Item(term, value, attrib)
|
item = self.Item(term, value, attrib, **kwargs)
|
||||||
items = self.items[barename(item.term)]
|
items = self.items[barename(item.term)]
|
||||||
items.append(item)
|
items.append(item)
|
||||||
return item
|
return item
|
||||||
@ -225,7 +298,11 @@ class Metadata(object):
|
|||||||
|
|
||||||
class Manifest(object):
|
class Manifest(object):
|
||||||
class Item(object):
|
class Item(object):
|
||||||
def __init__(self, id, href, media_type, fallback=None, loader=str):
|
ENTITY_RE = re.compile(r'&([a-zA-Z_:][a-zA-Z0-9.-_:]+);')
|
||||||
|
NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)')
|
||||||
|
|
||||||
|
def __init__(self, id, href, media_type,
|
||||||
|
fallback=None, loader=str, data=None):
|
||||||
self.id = id
|
self.id = id
|
||||||
self.href = self.path = urlnormalize(href)
|
self.href = self.path = urlnormalize(href)
|
||||||
self.media_type = media_type
|
self.media_type = media_type
|
||||||
@ -233,26 +310,32 @@ class Manifest(object):
|
|||||||
self.spine_position = None
|
self.spine_position = None
|
||||||
self.linear = True
|
self.linear = True
|
||||||
self._loader = loader
|
self._loader = loader
|
||||||
self._data = None
|
self._data = data
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Item(id=%r, href=%r, media_type=%r)' \
|
return 'Item(id=%r, href=%r, media_type=%r)' \
|
||||||
% (self.id, self.href, self.media_type)
|
% (self.id, self.href, self.media_type)
|
||||||
|
|
||||||
|
def _force_xhtml(self, data):
|
||||||
|
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
|
||||||
|
data = self.ENTITY_RE.sub(repl, data)
|
||||||
|
data = etree.fromstring(data, parser=XML_PARSER)
|
||||||
|
if namespace(data.tag) != XHTML_NS:
|
||||||
|
data.attrib['xmlns'] = XHTML_NS
|
||||||
|
data = etree.tostring(data)
|
||||||
|
data = etree.fromstring(data, parser=XML_PARSER)
|
||||||
|
return data
|
||||||
|
|
||||||
def data():
|
def data():
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if self._data:
|
if self._data is not None:
|
||||||
return self._data
|
return self._data
|
||||||
data = self._loader(self.href)
|
data = self._loader(self.href)
|
||||||
if self.media_type == XHTML_MIME:
|
if self.media_type in OEB_DOCS:
|
||||||
data = etree.fromstring(data, parser=XML_PARSER)
|
data = self._force_xhtml(data)
|
||||||
if namespace(data.tag) != XHTML_NS:
|
elif self.media_type[-4:] in ('+xml', '/xml'):
|
||||||
data.attrib['xmlns'] = XHTML_NS
|
|
||||||
data = etree.tostring(data)
|
|
||||||
data = etree.fromstring(data, parser=XML_PARSER)
|
|
||||||
elif self.media_type.startswith('application/') \
|
|
||||||
and self.media_type.endswith('+xml'):
|
|
||||||
data = etree.fromstring(data, parser=XML_PARSER)
|
data = etree.fromstring(data, parser=XML_PARSER)
|
||||||
|
self._data = data
|
||||||
return data
|
return data
|
||||||
def fset(self, value):
|
def fset(self, value):
|
||||||
self._data = value
|
self._data = value
|
||||||
@ -260,13 +343,49 @@ class Manifest(object):
|
|||||||
self._data = None
|
self._data = None
|
||||||
return property(fget, fset, fdel)
|
return property(fget, fset, fdel)
|
||||||
data = data()
|
data = data()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
data = self.data
|
||||||
|
if isinstance(data, etree._Element):
|
||||||
|
return xml2str(data)
|
||||||
|
return str(data)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return id(self) == id(other)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
def __cmp__(self, other):
|
def __cmp__(self, other):
|
||||||
result = cmp(self.spine_position, other.spine_position)
|
result = cmp(self.spine_position, other.spine_position)
|
||||||
if result != 0:
|
if result != 0:
|
||||||
return result
|
return result
|
||||||
return cmp(self.id, other.id)
|
smatch = self.NUM_RE.search(self.href)
|
||||||
|
sref = smatch.group(1) if smatch else self.href
|
||||||
|
snum = float(smatch.group(2)) if smatch else 0.0
|
||||||
|
skey = (sref, snum, self.id)
|
||||||
|
omatch = self.NUM_RE.search(other.href)
|
||||||
|
oref = omatch.group(1) if omatch else other.href
|
||||||
|
onum = float(omatch.group(2)) if omatch else 0.0
|
||||||
|
okey = (oref, onum, other.id)
|
||||||
|
return cmp(skey, okey)
|
||||||
|
|
||||||
|
def relhref(self, href):
|
||||||
|
if '/' not in self.href:
|
||||||
|
return href
|
||||||
|
base = os.path.dirname(self.href).split('/')
|
||||||
|
target, frag = urldefrag(href)
|
||||||
|
target = target.split('/')
|
||||||
|
for index in xrange(min(len(base), len(target))):
|
||||||
|
if base[index] != target[index]: break
|
||||||
|
else:
|
||||||
|
index += 1
|
||||||
|
relhref = (['..'] * (len(base) - index)) + target[index:]
|
||||||
|
relhref = '/'.join(relhref)
|
||||||
|
if frag:
|
||||||
|
relhref = '#'.join((relhref, frag))
|
||||||
|
return relhref
|
||||||
|
|
||||||
def abshref(self, href):
|
def abshref(self, href):
|
||||||
if '/' not in self.href:
|
if '/' not in self.href:
|
||||||
return href
|
return href
|
||||||
@ -277,42 +396,60 @@ class Manifest(object):
|
|||||||
|
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.items = {}
|
self.ids = {}
|
||||||
self.hrefs = {}
|
self.hrefs = {}
|
||||||
|
|
||||||
def add(self, id, href, media_type, fallback=None):
|
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
|
||||||
|
loader = loader or self.oeb.container.read
|
||||||
item = self.Item(
|
item = self.Item(
|
||||||
id, href, media_type, fallback, self.oeb.container.read)
|
id, href, media_type, fallback, loader, data)
|
||||||
self.items[item.id] = item
|
self.ids[item.id] = item
|
||||||
self.hrefs[item.href] = item
|
self.hrefs[item.href] = item
|
||||||
return item
|
return item
|
||||||
|
|
||||||
def remove(self, id):
|
def remove(self, item):
|
||||||
href = self.items[id].href
|
if item in self.ids:
|
||||||
del self.items[id]
|
item = self.ids[item]
|
||||||
del self.hrefs[href]
|
del self.ids[item.id]
|
||||||
|
del self.hrefs[item.href]
|
||||||
|
if item in self.oeb.spine:
|
||||||
|
self.oeb.spine.remove(item)
|
||||||
|
|
||||||
|
def generate(self, id, href):
|
||||||
|
href = urlnormalize(href)
|
||||||
|
base = id
|
||||||
|
index = 1
|
||||||
|
while id in self.ids:
|
||||||
|
id = base + str(index)
|
||||||
|
index += 1
|
||||||
|
base, ext = os.path.splitext(href)
|
||||||
|
index = 1
|
||||||
|
while href in self.hrefs:
|
||||||
|
href = base + str(index) + ext
|
||||||
|
index += 1
|
||||||
|
return id, href
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for id in self.items:
|
for id in self.ids:
|
||||||
yield id
|
yield id
|
||||||
|
|
||||||
def __getitem__(self, id):
|
def __getitem__(self, id):
|
||||||
return self.items[id]
|
return self.ids[id]
|
||||||
|
|
||||||
def values(self):
|
def values(self):
|
||||||
for item in self.items.values():
|
for item in self.ids.values():
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for id, item in self.refs.items():
|
for id, item in self.ids.items():
|
||||||
yield id, items
|
yield id, item
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
return key in self.items
|
return key in self.ids
|
||||||
|
|
||||||
def to_opf1(self, parent=None):
|
def to_opf1(self, parent=None):
|
||||||
elem = element(parent, 'manifest')
|
elem = element(parent, 'manifest')
|
||||||
for item in self.items.values():
|
for item in self.ids.values():
|
||||||
media_type = item.media_type
|
media_type = item.media_type
|
||||||
if media_type == XHTML_MIME:
|
if media_type == XHTML_MIME:
|
||||||
media_type = OEB_DOC_MIME
|
media_type = OEB_DOC_MIME
|
||||||
@ -327,7 +464,7 @@ class Manifest(object):
|
|||||||
|
|
||||||
def to_opf2(self, parent=None):
|
def to_opf2(self, parent=None):
|
||||||
elem = element(parent, OPF('manifest'))
|
elem = element(parent, OPF('manifest'))
|
||||||
for item in self.items.values():
|
for item in self.ids.values():
|
||||||
attrib = {'id': item.id, 'href': item.href,
|
attrib = {'id': item.id, 'href': item.href,
|
||||||
'media-type': item.media_type}
|
'media-type': item.media_type}
|
||||||
if item.fallback:
|
if item.fallback:
|
||||||
@ -341,18 +478,35 @@ class Spine(object):
|
|||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.items = []
|
self.items = []
|
||||||
|
|
||||||
def add(self, item, linear):
|
def _linear(self, linear):
|
||||||
if isinstance(linear, StringTypes):
|
if isinstance(linear, StringTypes):
|
||||||
linear = linear.lower()
|
linear = linear.lower()
|
||||||
if linear is None or linear in ('yes', 'true'):
|
if linear is None or linear in ('yes', 'true'):
|
||||||
linear = True
|
linear = True
|
||||||
elif linear in ('no', 'false'):
|
elif linear in ('no', 'false'):
|
||||||
linear = False
|
linear = False
|
||||||
item.linear = linear
|
return linear
|
||||||
|
|
||||||
|
def add(self, item, linear=None):
|
||||||
|
item.linear = self._linear(linear)
|
||||||
item.spine_position = len(self.items)
|
item.spine_position = len(self.items)
|
||||||
self.items.append(item)
|
self.items.append(item)
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
def insert(self, index, item, linear):
|
||||||
|
item.linear = self._linear(linear)
|
||||||
|
item.spine_position = index
|
||||||
|
self.items.insert(index, item)
|
||||||
|
for i in xrange(index, len(self.items)):
|
||||||
|
self.items[i].spine_position = i
|
||||||
|
return item
|
||||||
|
|
||||||
|
def remove(self, item):
|
||||||
|
index = item.spine_position
|
||||||
|
self.items.pop(index)
|
||||||
|
for i in xrange(index, len(self.items)):
|
||||||
|
self.items[i].spine_position = i
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for item in self.items:
|
for item in self.items:
|
||||||
yield item
|
yield item
|
||||||
@ -385,46 +539,81 @@ class Spine(object):
|
|||||||
|
|
||||||
class Guide(object):
|
class Guide(object):
|
||||||
class Reference(object):
|
class Reference(object):
|
||||||
|
_TYPES_TITLES = [('cover', 'Cover'), ('title-page', 'Title Page'),
|
||||||
|
('toc', 'Table of Contents'), ('index', 'Index'),
|
||||||
|
('glossary', 'Glossary'), ('acknowledgements', 'Acknowledgements'),
|
||||||
|
('bibliography', 'Bibliography'), ('colophon', 'Colophon'),
|
||||||
|
('copyright-page', 'Copyright'), ('dedication', 'Dedication'),
|
||||||
|
('epigraph', 'Epigraph'), ('foreword', 'Foreword'),
|
||||||
|
('loi', 'List of Illustrations'), ('lot', 'List of Tables'),
|
||||||
|
('notes', 'Notes'), ('preface', 'Preface'),
|
||||||
|
('text', 'Main Text')]
|
||||||
|
TYPES = set(t for t, _ in _TYPES_TITLES)
|
||||||
|
TITLES = dict(_TYPES_TITLES)
|
||||||
|
ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0)))
|
||||||
|
|
||||||
def __init__(self, type, title, href):
|
def __init__(self, type, title, href):
|
||||||
|
if type.lower() in self.TYPES:
|
||||||
|
type = type.lower()
|
||||||
|
elif type not in self.TYPES and \
|
||||||
|
not type.startswith('other.'):
|
||||||
|
type = 'other.' + type
|
||||||
|
if not title:
|
||||||
|
title = self.TITLES.get(type, None)
|
||||||
self.type = type
|
self.type = type
|
||||||
self.title = title
|
self.title = title
|
||||||
self.href = urlnormalize(href)
|
self.href = urlnormalize(href)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Reference(type=%r, title=%r, href=%r)' \
|
return 'Reference(type=%r, title=%r, href=%r)' \
|
||||||
% (self.type, self.title, self.href)
|
% (self.type, self.title, self.href)
|
||||||
|
|
||||||
|
def _order():
|
||||||
|
def fget(self):
|
||||||
|
return self.ORDER.get(self.type, self.type)
|
||||||
|
return property(fget=fget)
|
||||||
|
_order = _order()
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
if not isinstance(other, Guide.Reference):
|
||||||
|
return NotImplemented
|
||||||
|
return cmp(self._order, other._order)
|
||||||
|
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.refs = {}
|
self.refs = {}
|
||||||
|
|
||||||
def add(self, type, title, href):
|
def add(self, type, title, href):
|
||||||
ref = self.Reference(type, title, href)
|
ref = self.Reference(type, title, href)
|
||||||
self.refs[type] = ref
|
self.refs[type] = ref
|
||||||
return ref
|
return ref
|
||||||
|
|
||||||
def by_type(self, type):
|
|
||||||
return self.ref_types[type]
|
|
||||||
|
|
||||||
def iterkeys(self):
|
def iterkeys(self):
|
||||||
for type in self.refs:
|
for type in self.refs:
|
||||||
yield type
|
yield type
|
||||||
__iter__ = iterkeys
|
__iter__ = iterkeys
|
||||||
|
|
||||||
def values(self):
|
def values(self):
|
||||||
for ref in self.refs.values():
|
values = list(self.refs.values())
|
||||||
yield ref
|
values.sort()
|
||||||
|
return values
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for type, ref in self.refs.items():
|
for type, ref in self.refs.items():
|
||||||
yield type, ref
|
yield type, ref
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, key):
|
||||||
return self.refs[index]
|
return self.refs[key]
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
del self.refs[key]
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
return key in self.refs
|
return key in self.refs
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.refs)
|
||||||
|
|
||||||
def to_opf1(self, parent=None):
|
def to_opf1(self, parent=None):
|
||||||
elem = element(parent, 'guide')
|
elem = element(parent, 'guide')
|
||||||
for ref in self.refs.values():
|
for ref in self.refs.values():
|
||||||
@ -456,6 +645,12 @@ class TOC(object):
|
|||||||
node = TOC(title, href, klass, id)
|
node = TOC(title, href, klass, id)
|
||||||
self.nodes.append(node)
|
self.nodes.append(node)
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
def iterdescendants(self):
|
||||||
|
for node in self.nodes:
|
||||||
|
yield node
|
||||||
|
for child in node.iterdescendants():
|
||||||
|
yield child
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
@ -463,6 +658,15 @@ class TOC(object):
|
|||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
return self.nodes[index]
|
return self.nodes[index]
|
||||||
|
|
||||||
|
def autolayer(self):
|
||||||
|
prev = None
|
||||||
|
for node in list(self.nodes):
|
||||||
|
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
|
||||||
|
self.nodes.remove(node)
|
||||||
|
prev.nodes.append(node)
|
||||||
|
else:
|
||||||
|
prev = node
|
||||||
|
|
||||||
def depth(self, level=0):
|
def depth(self, level=0):
|
||||||
if self.nodes:
|
if self.nodes:
|
||||||
@ -496,23 +700,33 @@ class TOC(object):
|
|||||||
|
|
||||||
|
|
||||||
class OEBBook(object):
|
class OEBBook(object):
|
||||||
def __init__(self, opfpath, container=None, logger=FauxLogger()):
|
def __init__(self, opfpath=None, container=None, logger=FauxLogger()):
|
||||||
if not container:
|
if opfpath and not container:
|
||||||
container = DirContainer(os.path.dirname(opfpath))
|
container = DirContainer(os.path.dirname(opfpath))
|
||||||
opfpath = os.path.basename(opfpath)
|
opfpath = os.path.basename(opfpath)
|
||||||
self.container = container
|
self.container = container
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
opf = self._read_opf(opfpath)
|
if opfpath or container:
|
||||||
self._all_from_opf(opf)
|
opf = self._read_opf(opfpath)
|
||||||
|
self._all_from_opf(opf)
|
||||||
|
|
||||||
def _convert_opf1(self, opf):
|
def _convert_opf1(self, opf):
|
||||||
|
# Seriously, seriously wrong
|
||||||
|
if namespace(opf.tag) == OPF1_NS:
|
||||||
|
opf.tag = barename(opf.tag)
|
||||||
|
for elem in opf.iterdescendants():
|
||||||
|
if isinstance(elem.tag, basestring) \
|
||||||
|
and namespace(elem.tag) == OPF1_NS:
|
||||||
|
elem.tag = barename(elem.tag)
|
||||||
|
attrib = dict(opf.attrib)
|
||||||
|
attrib['version'] = '2.0'
|
||||||
nroot = etree.Element(OPF('package'),
|
nroot = etree.Element(OPF('package'),
|
||||||
nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib))
|
nsmap={None: OPF2_NS}, attrib=attrib)
|
||||||
metadata = etree.SubElement(nroot, OPF('metadata'),
|
metadata = etree.SubElement(nroot, OPF('metadata'),
|
||||||
nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
|
nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
|
||||||
'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
|
'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
|
||||||
for prefix in ('d11', 'd10', 'd09'):
|
for prefix in ('d11', 'd10', 'd09'):
|
||||||
elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix)
|
elements = xpath(opf, 'metadata//%s:*' % prefix)
|
||||||
if elements: break
|
if elements: break
|
||||||
for element in elements:
|
for element in elements:
|
||||||
if not element.text: continue
|
if not element.text: continue
|
||||||
@ -524,7 +738,7 @@ class OEBBook(object):
|
|||||||
element.attrib[nsname] = element.attrib[name]
|
element.attrib[nsname] = element.attrib[name]
|
||||||
del element.attrib[name]
|
del element.attrib[name]
|
||||||
metadata.append(element)
|
metadata.append(element)
|
||||||
for element in opf.xpath('metadata/x-metadata/meta'):
|
for element in opf.xpath('metadata//meta'):
|
||||||
metadata.append(element)
|
metadata.append(element)
|
||||||
for item in opf.xpath('manifest/item'):
|
for item in opf.xpath('manifest/item'):
|
||||||
media_type = item.attrib['media-type'].lower()
|
media_type = item.attrib['media-type'].lower()
|
||||||
@ -541,30 +755,56 @@ class OEBBook(object):
|
|||||||
def _read_opf(self, opfpath):
|
def _read_opf(self, opfpath):
|
||||||
opf = self.container.read_xml(opfpath)
|
opf = self.container.read_xml(opfpath)
|
||||||
version = float(opf.get('version', 1.0))
|
version = float(opf.get('version', 1.0))
|
||||||
if version < 2.0:
|
ns = namespace(opf.tag)
|
||||||
|
if ns not in ('', OPF1_NS, OPF2_NS):
|
||||||
|
raise OEBError('Invalid namespace %r for OPF document' % ns)
|
||||||
|
if ns != OPF2_NS or version < 2.0:
|
||||||
opf = self._convert_opf1(opf)
|
opf = self._convert_opf1(opf)
|
||||||
return opf
|
return opf
|
||||||
|
|
||||||
def _metadata_from_opf(self, opf):
|
def _metadata_from_opf(self, opf):
|
||||||
uid = opf.attrib['unique-identifier']
|
uid = opf.get('unique-identifier', 'calibre-uuid')
|
||||||
self.metadata = metadata = Metadata(self)
|
self.uid = None
|
||||||
for elem in xpath(opf, '/o2:package/o2:metadata/*'):
|
self.metadata = metadata = Metadata(self)
|
||||||
if elem.text or elem.attrib:
|
ignored = (OPF('dc-metadata'), OPF('x-metadata'))
|
||||||
|
for elem in xpath(opf, '/o2:package/o2:metadata//*'):
|
||||||
|
if elem.tag not in ignored and (elem.text or elem.attrib):
|
||||||
metadata.add(elem.tag, elem.text, elem.attrib)
|
metadata.add(elem.tag, elem.text, elem.attrib)
|
||||||
|
haveuuid = haveid = False
|
||||||
|
for ident in metadata.identifier:
|
||||||
|
if unicode(ident).startswith('urn:uuid:'):
|
||||||
|
haveuuid = True
|
||||||
|
if 'id' in ident.attrib:
|
||||||
|
haveid = True
|
||||||
|
if not haveuuid and haveid:
|
||||||
|
bookid = "urn:uuid:%s" % str(uuid.uuid4())
|
||||||
|
metadata.add('identifier', bookid, id='calibre-uuid')
|
||||||
for item in metadata.identifier:
|
for item in metadata.identifier:
|
||||||
if item.id == uid:
|
if item.id == uid:
|
||||||
self.uid = item
|
self.uid = item
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
|
self.logger.warn(u'Unique-identifier %r not found.' % uid)
|
||||||
self.uid = metadata.identifier[0]
|
for ident in metadata.identifier:
|
||||||
|
if 'id' in ident.attrib:
|
||||||
|
self.uid = metadata.identifier[0]
|
||||||
|
break
|
||||||
|
if not metadata.language:
|
||||||
|
self.logger.warn(u'Language not specified.')
|
||||||
|
metadata.add('language', 'en')
|
||||||
|
if not metadata.creator:
|
||||||
|
self.logger.warn(u'Creator not specified.')
|
||||||
|
metadata.add('creator', 'Unknown')
|
||||||
|
if not metadata.title:
|
||||||
|
self.logger.warn(u'Title not specified.')
|
||||||
|
metadata.add('title', 'Unknown')
|
||||||
|
|
||||||
def _manifest_from_opf(self, opf):
|
def _manifest_from_opf(self, opf):
|
||||||
self.manifest = manifest = Manifest(self)
|
self.manifest = manifest = Manifest(self)
|
||||||
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
|
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
|
||||||
href = elem.get('href')
|
href = elem.get('href')
|
||||||
if not self.container.exists(href):
|
if not self.container.exists(href):
|
||||||
self.logger.log_warn(u'Manifest item %r not found.' % href)
|
self.logger.warn(u'Manifest item %r not found.' % href)
|
||||||
continue
|
continue
|
||||||
manifest.add(elem.get('id'), href, elem.get('media-type'),
|
manifest.add(elem.get('id'), href, elem.get('media-type'),
|
||||||
elem.get('fallback'))
|
elem.get('fallback'))
|
||||||
@ -574,7 +814,7 @@ class OEBBook(object):
|
|||||||
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
|
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
|
||||||
idref = elem.get('idref')
|
idref = elem.get('idref')
|
||||||
if idref not in self.manifest:
|
if idref not in self.manifest:
|
||||||
self.logger.log_warn(u'Spine item %r not found.' % idref)
|
self.logger.warn(u'Spine item %r not found.' % idref)
|
||||||
continue
|
continue
|
||||||
item = self.manifest[idref]
|
item = self.manifest[idref]
|
||||||
spine.add(item, elem.get('linear'))
|
spine.add(item, elem.get('linear'))
|
||||||
@ -593,7 +833,7 @@ class OEBBook(object):
|
|||||||
href = elem.get('href')
|
href = elem.get('href')
|
||||||
path, frag = urldefrag(href)
|
path, frag = urldefrag(href)
|
||||||
if path not in self.manifest.hrefs:
|
if path not in self.manifest.hrefs:
|
||||||
self.logger.log_warn(u'Guide reference %r not found' % href)
|
self.logger.warn(u'Guide reference %r not found' % href)
|
||||||
continue
|
continue
|
||||||
guide.add(elem.get('type'), elem.get('title'), href)
|
guide.add(elem.get('type'), elem.get('title'), href)
|
||||||
|
|
||||||
@ -695,6 +935,32 @@ class OEBBook(object):
|
|||||||
if self._toc_from_tour(opf): return
|
if self._toc_from_tour(opf): return
|
||||||
if self._toc_from_html(opf): return
|
if self._toc_from_html(opf): return
|
||||||
self._toc_from_spine(opf)
|
self._toc_from_spine(opf)
|
||||||
|
|
||||||
|
def _ensure_cover_image(self):
|
||||||
|
cover = None
|
||||||
|
spine0 = self.spine[0]
|
||||||
|
html = spine0.data
|
||||||
|
if self.metadata.cover:
|
||||||
|
id = str(self.metadata.cover[0])
|
||||||
|
cover = self.manifest.ids[id]
|
||||||
|
elif MS_COVER_TYPE in self.guide:
|
||||||
|
href = self.guide[MS_COVER_TYPE].href
|
||||||
|
cover = self.manifest.hrefs[href]
|
||||||
|
elif xpath(html, '//h:img[position()=1]'):
|
||||||
|
img = xpath(html, '//h:img[position()=1]')[0]
|
||||||
|
href = spine0.abshref(img.get('src'))
|
||||||
|
cover = self.manifest.hrefs[href]
|
||||||
|
elif xpath(html, '//h:object[position()=1]'):
|
||||||
|
object = xpath(html, '//h:object[position()=1]')[0]
|
||||||
|
href = spine0.abshref(object.get('data'))
|
||||||
|
cover = self.manifest.hrefs[href]
|
||||||
|
elif xpath(html, '//svg:svg[position()=1]'):
|
||||||
|
svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0])
|
||||||
|
href = os.path.splitext(spine0.href)[0] + '.svg'
|
||||||
|
id, href = self.manifest.generate(spine0.id, href)
|
||||||
|
cover = self.manifest.add(id, href, SVG_MIME, data=svg)
|
||||||
|
if cover and not self.metadata.cover:
|
||||||
|
self.metadata.add('cover', cover.id)
|
||||||
|
|
||||||
def _all_from_opf(self, opf):
|
def _all_from_opf(self, opf):
|
||||||
self._metadata_from_opf(opf)
|
self._metadata_from_opf(opf)
|
||||||
@ -702,6 +968,7 @@ class OEBBook(object):
|
|||||||
self._spine_from_opf(opf)
|
self._spine_from_opf(opf)
|
||||||
self._guide_from_opf(opf)
|
self._guide_from_opf(opf)
|
||||||
self._toc_from_opf(opf)
|
self._toc_from_opf(opf)
|
||||||
|
self._ensure_cover_image()
|
||||||
|
|
||||||
def to_opf1(self):
|
def to_opf1(self):
|
||||||
package = etree.Element('package',
|
package = etree.Element('package',
|
@ -35,7 +35,8 @@
|
|||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */
|
@namespace url(http://www.w3.org/1999/xhtml);
|
||||||
|
@namespace svg url(http://www.w3.org/2000/svg);
|
||||||
|
|
||||||
/* blocks */
|
/* blocks */
|
||||||
|
|
||||||
@ -45,7 +46,6 @@ html, div, map, dt, isindex, form {
|
|||||||
|
|
||||||
body {
|
body {
|
||||||
display: block;
|
display: block;
|
||||||
margin: 8px;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p, dl, multicol {
|
p, dl, multicol {
|
||||||
@ -59,7 +59,7 @@ dd {
|
|||||||
|
|
||||||
blockquote {
|
blockquote {
|
||||||
display: block;
|
display: block;
|
||||||
margin: 1em 40px;
|
margin: 1em;
|
||||||
}
|
}
|
||||||
|
|
||||||
address {
|
address {
|
||||||
@ -74,7 +74,7 @@ center {
|
|||||||
|
|
||||||
blockquote[type=cite] {
|
blockquote[type=cite] {
|
||||||
display: block;
|
display: block;
|
||||||
margin: 1em 0px;
|
margin: 1em 0em;
|
||||||
border-color: blue;
|
border-color: blue;
|
||||||
border-width: thin;
|
border-width: thin;
|
||||||
}
|
}
|
||||||
@ -234,14 +234,6 @@ th {
|
|||||||
|
|
||||||
/* inlines */
|
/* inlines */
|
||||||
|
|
||||||
q:before {
|
|
||||||
content: open-quote;
|
|
||||||
}
|
|
||||||
|
|
||||||
q:after {
|
|
||||||
content: close-quote;
|
|
||||||
}
|
|
||||||
|
|
||||||
b, strong {
|
b, strong {
|
||||||
font-weight: bolder;
|
font-weight: bolder;
|
||||||
}
|
}
|
||||||
@ -392,22 +384,6 @@ spacer {
|
|||||||
float: none ! important;
|
float: none ! important;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* focusable content: anything w/ tabindex >=0 is focusable */
|
|
||||||
abbr:focus, acronym:focus, address:focus, applet:focus, b:focus,
|
|
||||||
base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
|
|
||||||
center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
|
|
||||||
del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
|
|
||||||
fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
|
|
||||||
h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus,
|
|
||||||
kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus,
|
|
||||||
object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus,
|
|
||||||
small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus,
|
|
||||||
table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus,
|
|
||||||
tr:focus, tt:focus, u:focus, ul:focus, var:focus {
|
|
||||||
/* Don't specify the outline-color, we should always use initial value. */
|
|
||||||
outline: 1px dotted;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* hidden elements */
|
/* hidden elements */
|
||||||
area, base, basefont, head, meta, script, style, title,
|
area, base, basefont, head, meta, script, style, title,
|
||||||
noembed, param, link {
|
noembed, param, link {
|
||||||
@ -424,3 +400,8 @@ br {
|
|||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Images, embedded object, and SVG size defaults */
|
||||||
|
img, object, svg|svg {
|
||||||
|
width: auto;
|
||||||
|
height: auto;
|
||||||
|
}
|
75
src/calibre/ebooks/oeb/profile.py
Normal file
75
src/calibre/ebooks/oeb/profile.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
'''
|
||||||
|
Device profiles.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
from itertools import izip
|
||||||
|
|
||||||
|
FONT_SIZES = [('xx-small', 1),
|
||||||
|
('x-small', None),
|
||||||
|
('small', 2),
|
||||||
|
('medium', 3),
|
||||||
|
('large', 4),
|
||||||
|
('x-large', 5),
|
||||||
|
('xx-large', 6),
|
||||||
|
(None, 7)]
|
||||||
|
|
||||||
|
|
||||||
|
class Profile(object):
|
||||||
|
def __init__(self, width, height, dpi, fbase, fsizes):
|
||||||
|
self.width = (float(width) / dpi) * 72.
|
||||||
|
self.height = (float(height) / dpi) * 72.
|
||||||
|
self.dpi = float(dpi)
|
||||||
|
self.fbase = float(fbase)
|
||||||
|
self.fsizes = []
|
||||||
|
for (name, num), size in izip(FONT_SIZES, fsizes):
|
||||||
|
self.fsizes.append((name, num, float(size)))
|
||||||
|
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
||||||
|
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
||||||
|
|
||||||
|
|
||||||
|
PROFILES = {
|
||||||
|
'PRS505':
|
||||||
|
Profile(width=584, height=754, dpi=168.451, fbase=12,
|
||||||
|
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
|
||||||
|
|
||||||
|
'MSReader':
|
||||||
|
Profile(width=480, height=652, dpi=96, fbase=13,
|
||||||
|
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
|
||||||
|
|
||||||
|
# Not really, but let's pretend
|
||||||
|
'Mobipocket':
|
||||||
|
Profile(width=600, height=800, dpi=96, fbase=18,
|
||||||
|
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
|
||||||
|
|
||||||
|
# No clue on usable screen size; DPI should be good
|
||||||
|
'HanlinV3':
|
||||||
|
Profile(width=584, height=754, dpi=168.451, fbase=16,
|
||||||
|
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||||
|
|
||||||
|
'CybookG3':
|
||||||
|
Profile(width=600, height=800, dpi=168.451, fbase=16,
|
||||||
|
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||||
|
|
||||||
|
'Kindle':
|
||||||
|
Profile(width=525, height=640, dpi=168.451, fbase=16,
|
||||||
|
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||||
|
|
||||||
|
'Browser':
|
||||||
|
Profile(width=800, height=600, dpi=100.0, fbase=12,
|
||||||
|
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Context(object):
|
||||||
|
PROFILES = PROFILES
|
||||||
|
|
||||||
|
def __init__(self, source, dest):
|
||||||
|
if source in PROFILES:
|
||||||
|
source = PROFILES[source]
|
||||||
|
if dest in PROFILES:
|
||||||
|
dest = PROFILES[dest]
|
||||||
|
self.source = source
|
||||||
|
self.dest = dest
|
@ -16,16 +16,20 @@ import itertools
|
|||||||
import types
|
import types
|
||||||
import re
|
import re
|
||||||
import copy
|
import copy
|
||||||
|
from itertools import izip
|
||||||
import cssutils
|
import cssutils
|
||||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||||
CSSValueList, cssproperties
|
CSSValueList, cssproperties
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
|
from lxml.cssselect import css_to_xpath, ExpressionError
|
||||||
from calibre.ebooks.lit.oeb import barename, urlnormalize
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
||||||
|
from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize
|
||||||
|
from calibre.ebooks.oeb.profile import PROFILES
|
||||||
from calibre.resources import html_css
|
from calibre.resources import html_css
|
||||||
|
|
||||||
|
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
||||||
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
||||||
XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n"
|
HTML_CSS_STYLESHEET.namespaces['h'] = XHTML_NS
|
||||||
|
|
||||||
INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
|
INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
|
||||||
'caption-side', 'color', 'cursor', 'direction', 'elevation',
|
'caption-side', 'color', 'cursor', 'direction', 'elevation',
|
||||||
@ -72,7 +76,7 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
|||||||
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
|
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
|
||||||
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
|
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
|
||||||
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
|
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
|
||||||
'text-align': 'left', 'text-decoration': 'none', 'text-indent':
|
'text-align': 'auto', 'text-decoration': 'none', 'text-indent':
|
||||||
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
|
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
|
||||||
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
|
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
|
||||||
'voice-family': 'default', 'volume': 'medium', 'white-space':
|
'voice-family': 'default', 'volume': 'medium', 'white-space':
|
||||||
@ -82,42 +86,30 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
|||||||
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
|
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
|
||||||
'x-large', 'xx-large'])
|
'x-large', 'xx-large'])
|
||||||
|
|
||||||
FONT_SIZE_LIST = [('xx-small', 1, 6.),
|
|
||||||
('x-small', None, 7.),
|
|
||||||
('small', 2, 8.),
|
|
||||||
('medium', 3, 9.),
|
|
||||||
('large', 4, 11.),
|
|
||||||
('x-large', 5, 13.),
|
|
||||||
('xx-large', 6, 15.),
|
|
||||||
(None, 7, 17.)]
|
|
||||||
|
|
||||||
FONT_SIZE_BY_NAME = {}
|
|
||||||
FONT_SIZE_BY_NUM = {}
|
|
||||||
for name, num, size in FONT_SIZE_LIST:
|
|
||||||
FONT_SIZE_BY_NAME[name] = size
|
|
||||||
FONT_SIZE_BY_NUM[num] = size
|
|
||||||
|
|
||||||
XPNSMAP = {'h': XHTML_NS,}
|
|
||||||
def xpath(elem, expr):
|
|
||||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
|
||||||
|
|
||||||
|
|
||||||
class Page(object):
|
|
||||||
def __init__(self, width, height, dpi):
|
|
||||||
self.width = float(width)
|
|
||||||
self.height = float(height)
|
|
||||||
self.dpi = float(dpi)
|
|
||||||
|
|
||||||
class Profiles(object):
|
|
||||||
PRS500 = Page(584, 754, 168.451)
|
|
||||||
PRS505 = PRS500
|
|
||||||
|
|
||||||
|
class CSSSelector(etree.XPath):
|
||||||
|
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
|
||||||
|
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
|
||||||
|
|
||||||
|
def __init__(self, css, namespaces=XPNSMAP):
|
||||||
|
css = self.MIN_SPACE_RE.sub(r'\1', css)
|
||||||
|
path = css_to_xpath(css)
|
||||||
|
path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
|
||||||
|
etree.XPath.__init__(self, path, namespaces=namespaces)
|
||||||
|
self.css = css
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<%s %s for %r>' % (
|
||||||
|
self.__class__.__name__,
|
||||||
|
hex(abs(id(self)))[2:],
|
||||||
|
self.css)
|
||||||
|
|
||||||
|
|
||||||
class Stylizer(object):
|
class Stylizer(object):
|
||||||
STYLESHEETS = {}
|
STYLESHEETS = {}
|
||||||
|
|
||||||
def __init__(self, tree, path, oeb, page=Profiles.PRS505):
|
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
||||||
self.page = page
|
self.profile = profile
|
||||||
base = os.path.dirname(path)
|
base = os.path.dirname(path)
|
||||||
basename = os.path.basename(path)
|
basename = os.path.basename(path)
|
||||||
cssname = os.path.splitext(basename)[0] + '.css'
|
cssname = os.path.splitext(basename)[0] + '.css'
|
||||||
@ -126,12 +118,13 @@ class Stylizer(object):
|
|||||||
parser = cssutils.CSSParser()
|
parser = cssutils.CSSParser()
|
||||||
parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path)))
|
parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path)))
|
||||||
for elem in head:
|
for elem in head:
|
||||||
tag = barename(elem.tag)
|
if elem.tag == XHTML('style') and elem.text \
|
||||||
if tag == 'style':
|
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
text = ''.join(elem.text)
|
text = XHTML_CSS_NAMESPACE + elem.text
|
||||||
stylesheet = parser.parseString(text, href=cssname)
|
stylesheet = parser.parseString(text, href=cssname)
|
||||||
|
stylesheet.namespaces['h'] = XHTML_NS
|
||||||
stylesheets.append(stylesheet)
|
stylesheets.append(stylesheet)
|
||||||
elif tag == 'link' \
|
elif elem.tag == XHTML('link') and elem.get('href') \
|
||||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
href = urlnormalize(elem.attrib['href'])
|
href = urlnormalize(elem.attrib['href'])
|
||||||
@ -143,11 +136,13 @@ class Stylizer(object):
|
|||||||
data = XHTML_CSS_NAMESPACE
|
data = XHTML_CSS_NAMESPACE
|
||||||
data += oeb.manifest.hrefs[path].data
|
data += oeb.manifest.hrefs[path].data
|
||||||
stylesheet = parser.parseString(data, href=path)
|
stylesheet = parser.parseString(data, href=path)
|
||||||
|
stylesheet.namespaces['h'] = XHTML_NS
|
||||||
self.STYLESHEETS[path] = stylesheet
|
self.STYLESHEETS[path] = stylesheet
|
||||||
stylesheets.append(stylesheet)
|
stylesheets.append(stylesheet)
|
||||||
rules = []
|
rules = []
|
||||||
index = 0
|
index = 0
|
||||||
self.stylesheets = set()
|
self.stylesheets = set()
|
||||||
|
self.page_rule = {}
|
||||||
for stylesheet in stylesheets:
|
for stylesheet in stylesheets:
|
||||||
href = stylesheet.href
|
href = stylesheet.href
|
||||||
self.stylesheets.add(href)
|
self.stylesheets.add(href)
|
||||||
@ -157,7 +152,16 @@ class Stylizer(object):
|
|||||||
rules.sort()
|
rules.sort()
|
||||||
self.rules = rules
|
self.rules = rules
|
||||||
self._styles = {}
|
self._styles = {}
|
||||||
|
for _, _, cssdict, text, _ in rules:
|
||||||
|
try:
|
||||||
|
selector = CSSSelector(text)
|
||||||
|
except ExpressionError, e:
|
||||||
|
continue
|
||||||
|
for elem in selector(tree):
|
||||||
|
self.style(elem)._update_cssdict(cssdict)
|
||||||
|
for elem in xpath(tree, '//h:*[@style]'):
|
||||||
|
self.style(elem)._apply_style_attr()
|
||||||
|
|
||||||
def flatten_rule(self, rule, href, index):
|
def flatten_rule(self, rule, href, index):
|
||||||
results = []
|
results = []
|
||||||
if isinstance(rule, CSSStyleRule):
|
if isinstance(rule, CSSStyleRule):
|
||||||
@ -169,9 +173,9 @@ class Stylizer(object):
|
|||||||
results.append((specificity, selector, style, text, href))
|
results.append((specificity, selector, style, text, href))
|
||||||
elif isinstance(rule, CSSPageRule):
|
elif isinstance(rule, CSSPageRule):
|
||||||
style = self.flatten_style(rule.style)
|
style = self.flatten_style(rule.style)
|
||||||
results.append(((0, 0, 0, 0), [], style, '@page', href))
|
self.page_rule.update(style)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def flatten_style(self, cssstyle):
|
def flatten_style(self, cssstyle):
|
||||||
style = {}
|
style = {}
|
||||||
for prop in cssstyle:
|
for prop in cssstyle:
|
||||||
@ -186,7 +190,7 @@ class Stylizer(object):
|
|||||||
size = style['font-size']
|
size = style['font-size']
|
||||||
if size == 'normal': size = 'medium'
|
if size == 'normal': size = 'medium'
|
||||||
if size in FONT_SIZE_NAMES:
|
if size in FONT_SIZE_NAMES:
|
||||||
style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size]
|
style['font-size'] = "%dpt" % self.profile.fnames[size]
|
||||||
return style
|
return style
|
||||||
|
|
||||||
def _normalize_edge(self, cssvalue, name):
|
def _normalize_edge(self, cssvalue, name):
|
||||||
@ -233,9 +237,10 @@ class Stylizer(object):
|
|||||||
return style
|
return style
|
||||||
|
|
||||||
def style(self, element):
|
def style(self, element):
|
||||||
try: return self._styles[element]
|
try:
|
||||||
except: pass
|
return self._styles[element]
|
||||||
return Style(element, self)
|
except KeyError:
|
||||||
|
return Style(element, self)
|
||||||
|
|
||||||
def stylesheet(self, name, font_scale=None):
|
def stylesheet(self, name, font_scale=None):
|
||||||
rules = []
|
rules = []
|
||||||
@ -250,86 +255,43 @@ class Stylizer(object):
|
|||||||
rules.append('%s {\n %s;\n}' % (selector, style))
|
rules.append('%s {\n %s;\n}' % (selector, style))
|
||||||
return '\n'.join(rules)
|
return '\n'.join(rules)
|
||||||
|
|
||||||
|
|
||||||
class Style(object):
|
class Style(object):
|
||||||
def __init__(self, element, stylizer):
|
def __init__(self, element, stylizer):
|
||||||
self._element = element
|
self._element = element
|
||||||
self._page = stylizer.page
|
self._profile = stylizer.profile
|
||||||
self._stylizer = stylizer
|
self._stylizer = stylizer
|
||||||
self._style = self._assemble_style(element, stylizer)
|
self._style = {}
|
||||||
|
self._fontSize = None
|
||||||
|
self._width = None
|
||||||
|
self._height = None
|
||||||
|
self._lineHeight = None
|
||||||
stylizer._styles[element] = self
|
stylizer._styles[element] = self
|
||||||
|
|
||||||
|
def _update_cssdict(self, cssdict):
|
||||||
|
self._style.update(cssdict)
|
||||||
|
|
||||||
def _assemble_style(self, element, stylizer):
|
def _apply_style_attr(self):
|
||||||
result = {}
|
attrib = self._element.attrib
|
||||||
rules = stylizer.rules
|
if 'style' in attrib:
|
||||||
for _, selector, style, _, _ in rules:
|
style = CSSStyleDeclaration(attrib['style'])
|
||||||
if self._selects_element(element, selector):
|
self._style.update(self._stylizer.flatten_style(style))
|
||||||
result.update(style)
|
|
||||||
try:
|
|
||||||
style = CSSStyleDeclaration(element.attrib['style'])
|
|
||||||
result.update(stylizer.flatten_style(style))
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _selects_element(self, element, selector):
|
|
||||||
def _selects_element(element, items, index):
|
|
||||||
if index == -1:
|
|
||||||
return True
|
|
||||||
item = items[index]
|
|
||||||
if item.type == 'universal':
|
|
||||||
pass
|
|
||||||
elif item.type == 'type-selector':
|
|
||||||
name1 = ("{%s}%s" % item.value).lower()
|
|
||||||
name2 = element.tag.lower()
|
|
||||||
if name1 != name2:
|
|
||||||
return False
|
|
||||||
elif item.type == 'id':
|
|
||||||
name1 = item.value[1:]
|
|
||||||
name2 = element.get('id', '')
|
|
||||||
if name1 != name2:
|
|
||||||
return False
|
|
||||||
elif item.type == 'class':
|
|
||||||
name = item.value[1:].lower()
|
|
||||||
classes = element.get('class', '').lower().split()
|
|
||||||
if name not in classes:
|
|
||||||
return False
|
|
||||||
elif item.type == 'child':
|
|
||||||
parent = element.getparent()
|
|
||||||
if parent is None:
|
|
||||||
return False
|
|
||||||
element = parent
|
|
||||||
elif item.type == 'descendant':
|
|
||||||
element = element.getparent()
|
|
||||||
while element is not None:
|
|
||||||
if _selects_element(element, items, index - 1):
|
|
||||||
return True
|
|
||||||
element = element.getparent()
|
|
||||||
return False
|
|
||||||
elif item.type == 'pseudo-class':
|
|
||||||
if item.value == ':first-child':
|
|
||||||
e = element.getprevious()
|
|
||||||
if e is not None:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
elif item.type == 'pseudo-element':
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
return _selects_element(element, items, index - 1)
|
|
||||||
return _selects_element(element, selector, len(selector) - 1)
|
|
||||||
|
|
||||||
def _has_parent(self):
|
def _has_parent(self):
|
||||||
parent = self._element.getparent()
|
return (self._element.getparent() is not None)
|
||||||
return (parent is not None) \
|
|
||||||
and (parent in self._stylizer._styles)
|
def _get_parent(self):
|
||||||
|
elem = self._element.getparent()
|
||||||
|
if elem is None:
|
||||||
|
return None
|
||||||
|
return self._stylizer.style(elem)
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
domname = cssproperties._toDOMname(name)
|
domname = cssproperties._toDOMname(name)
|
||||||
if hasattr(self, domname):
|
if hasattr(self, domname):
|
||||||
return getattr(self, domname)
|
return getattr(self, domname)
|
||||||
return self._unit_convert(self._get(name))
|
return self._unit_convert(self._get(name))
|
||||||
|
|
||||||
def _get(self, name):
|
def _get(self, name):
|
||||||
result = None
|
result = None
|
||||||
if name in self._style:
|
if name in self._style:
|
||||||
@ -337,8 +299,8 @@ class Style(object):
|
|||||||
if (result == 'inherit'
|
if (result == 'inherit'
|
||||||
or (result is None and name in INHERITED
|
or (result is None and name in INHERITED
|
||||||
and self._has_parent())):
|
and self._has_parent())):
|
||||||
styles = self._stylizer._styles
|
stylizer = self._stylizer
|
||||||
result = styles[self._element.getparent()]._get(name)
|
result = stylizer.style(self._element.getparent())._get(name)
|
||||||
if result is None:
|
if result is None:
|
||||||
result = DEFAULTS[name]
|
result = DEFAULTS[name]
|
||||||
return result
|
return result
|
||||||
@ -359,9 +321,9 @@ class Style(object):
|
|||||||
unit = m.group(2)
|
unit = m.group(2)
|
||||||
if unit == '%':
|
if unit == '%':
|
||||||
base = base or self.width
|
base = base or self.width
|
||||||
result = (value/100.0) * base
|
result = (value / 100.0) * base
|
||||||
elif unit == 'px':
|
elif unit == 'px':
|
||||||
result = value * 72.0 / self._page.dpi
|
result = value * 72.0 / self._profile.dpi
|
||||||
elif unit == 'in':
|
elif unit == 'in':
|
||||||
result = value * 72.0
|
result = value * 72.0
|
||||||
elif unit == 'pt':
|
elif unit == 'pt':
|
||||||
@ -379,22 +341,22 @@ class Style(object):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def fontSize(self):
|
def fontSize(self):
|
||||||
def normalize_fontsize(value, base=None):
|
def normalize_fontsize(value, base):
|
||||||
result = None
|
result = None
|
||||||
factor = None
|
factor = None
|
||||||
if value == 'inherit':
|
if value == 'inherit':
|
||||||
value = 'medium'
|
value = base
|
||||||
if value in FONT_SIZE_NAMES:
|
if value in FONT_SIZE_NAMES:
|
||||||
result = FONT_SIZE_BY_NAME[value]
|
result = self._profile.fnames[value]
|
||||||
elif value == 'smaller':
|
elif value == 'smaller':
|
||||||
factor = 1.0/1.2
|
factor = 1.0/1.2
|
||||||
for _, _, size in FONT_SIZE_LIST:
|
for _, _, size in self._profile.fsizes:
|
||||||
if base <= size: break
|
if base <= size: break
|
||||||
factor = None
|
factor = None
|
||||||
result = size
|
result = size
|
||||||
elif value == 'larger':
|
elif value == 'larger':
|
||||||
factor = 1.2
|
factor = 1.2
|
||||||
for _, _, size in reversed(FONT_SIZE_LIST):
|
for _, _, size in reversed(self._profile.fsizes):
|
||||||
if base >= size: break
|
if base >= size: break
|
||||||
factor = None
|
factor = None
|
||||||
result = size
|
result = size
|
||||||
@ -405,40 +367,108 @@ class Style(object):
|
|||||||
if factor:
|
if factor:
|
||||||
result = factor * base
|
result = factor * base
|
||||||
return result
|
return result
|
||||||
result = None
|
if self._fontSize is None:
|
||||||
if self._has_parent():
|
result = None
|
||||||
styles = self._stylizer._styles
|
parent = self._get_parent()
|
||||||
base = styles[self._element.getparent()].fontSize
|
if parent is not None:
|
||||||
else:
|
base = parent.fontSize
|
||||||
base = normalize_fontsize(DEFAULTS['font-size'])
|
else:
|
||||||
if 'font-size' in self._style:
|
base = self._profile.fbase
|
||||||
size = self._style['font-size']
|
if 'font-size' in self._style:
|
||||||
result = normalize_fontsize(size, base)
|
size = self._style['font-size']
|
||||||
else:
|
result = normalize_fontsize(size, base)
|
||||||
result = base
|
else:
|
||||||
self.__dict__['fontSize'] = result
|
result = base
|
||||||
return result
|
self._fontSize = result
|
||||||
|
return self._fontSize
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def width(self):
|
def width(self):
|
||||||
result = None
|
if self._width is None:
|
||||||
base = None
|
width = None
|
||||||
if self._has_parent():
|
base = None
|
||||||
styles = self._stylizer._styles
|
parent = self._get_parent()
|
||||||
base = styles[self._element.getparent()].width
|
if parent is not None:
|
||||||
else:
|
base = parent.width
|
||||||
base = self._page.width
|
else:
|
||||||
if 'width' in self._style:
|
base = self._profile.width
|
||||||
width = self._style['width']
|
if 'width' is self._element.attrib:
|
||||||
if width == 'auto':
|
width = self._element.attrib['width']
|
||||||
|
elif 'width' in self._style:
|
||||||
|
width = self._style['width']
|
||||||
|
if not width or width == 'auto':
|
||||||
result = base
|
result = base
|
||||||
else:
|
else:
|
||||||
result = self._unit_convert(width, base=base)
|
result = self._unit_convert(width, base=base)
|
||||||
else:
|
self._width = result
|
||||||
result = base
|
return self._width
|
||||||
self.__dict__['width'] = result
|
|
||||||
return result
|
@property
|
||||||
|
def height(self):
|
||||||
|
if self._height is None:
|
||||||
|
height = None
|
||||||
|
base = None
|
||||||
|
parent = self._get_parent()
|
||||||
|
if parent is not None:
|
||||||
|
base = parent.height
|
||||||
|
else:
|
||||||
|
base = self._profile.height
|
||||||
|
if 'height' is self._element.attrib:
|
||||||
|
height = self._element.attrib['height']
|
||||||
|
elif 'height' in self._style:
|
||||||
|
height = self._style['height']
|
||||||
|
if not height or height == 'auto':
|
||||||
|
result = base
|
||||||
|
else:
|
||||||
|
result = self._unit_convert(height, base=base)
|
||||||
|
self._height = result
|
||||||
|
return self._height
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lineHeight(self):
|
||||||
|
if self._lineHeight is None:
|
||||||
|
result = None
|
||||||
|
parent = self._getparent()
|
||||||
|
if 'line-height' in self._style:
|
||||||
|
lineh = self._style['line-height']
|
||||||
|
try:
|
||||||
|
float(lineh)
|
||||||
|
except ValueError:
|
||||||
|
result = self._unit_convert(lineh, base=self.fontSize)
|
||||||
|
else:
|
||||||
|
result = float(lineh) * self.fontSize
|
||||||
|
elif parent is not None:
|
||||||
|
# TODO: proper inheritance
|
||||||
|
result = parent.lineHeight
|
||||||
|
else:
|
||||||
|
result = 1.2 * self.fontSize
|
||||||
|
self._lineHeight = result
|
||||||
|
return self._lineHeight
|
||||||
|
|
||||||
|
@property
|
||||||
|
def marginTop(self):
|
||||||
|
return self._unit_convert(
|
||||||
|
self._get('margin-top'), base=self.height)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def marginBottom(self):
|
||||||
|
return self._unit_convert(
|
||||||
|
self._get('margin-bottom'), base=self.height)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def paddingTop(self):
|
||||||
|
return self._unit_convert(
|
||||||
|
self._get('padding-top'), base=self.height)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def paddingBottom(self):
|
||||||
|
return self._unit_convert(
|
||||||
|
self._get('padding-bottom'), base=self.height)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
items = self._style.items()
|
items = self._style.items()
|
||||||
|
items.sort()
|
||||||
return '; '.join("%s: %s" % (key, val) for key, val in items)
|
return '; '.join("%s: %s" % (key, val) for key, val in items)
|
||||||
|
|
||||||
|
def cssdict(self):
|
||||||
|
return dict(self._style)
|
0
src/calibre/ebooks/oeb/transforms/__init__.py
Normal file
0
src/calibre/ebooks/oeb/transforms/__init__.py
Normal file
270
src/calibre/ebooks/oeb/transforms/flatcss.py
Normal file
270
src/calibre/ebooks/oeb/transforms/flatcss.py
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
'''
|
||||||
|
CSS flattening transform.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import operator
|
||||||
|
import math
|
||||||
|
from itertools import chain
|
||||||
|
from collections import defaultdict
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES
|
||||||
|
from calibre.ebooks.oeb.base import namespace, barename
|
||||||
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
STRIPNUM = re.compile(r'[-0-9]+$')
|
||||||
|
|
||||||
|
class KeyMapper(object):
|
||||||
|
def __init__(self, sbase, dbase, dkey):
|
||||||
|
self.sbase = float(sbase)
|
||||||
|
self.dprop = [(self.relate(x, dbase), float(x)) for x in dkey]
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def relate(size, base):
|
||||||
|
size = float(size)
|
||||||
|
base = float(base)
|
||||||
|
if abs(size - base) < 0.1: return 0
|
||||||
|
sign = -1 if size < base else 1
|
||||||
|
endp = 0 if size < base else 36
|
||||||
|
diff = (abs(base - size) * 3) + ((36 - size) / 100)
|
||||||
|
logb = abs(base - endp)
|
||||||
|
result = sign * math.log(diff, logb)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __getitem__(self, ssize):
|
||||||
|
if ssize in self.cache:
|
||||||
|
return self.cache[ssize]
|
||||||
|
dsize = self.map(ssize)
|
||||||
|
self.cache[ssize] = dsize
|
||||||
|
return dsize
|
||||||
|
|
||||||
|
def map(self, ssize):
|
||||||
|
sbase = self.sbase
|
||||||
|
prop = self.relate(ssize, sbase)
|
||||||
|
diff = [(abs(prop - p), s) for p, s in self.dprop]
|
||||||
|
dsize = min(diff)[1]
|
||||||
|
return dsize
|
||||||
|
|
||||||
|
class ScaleMapper(object):
|
||||||
|
def __init__(self, sbase, dbase):
|
||||||
|
self.dscale = float(dbase) / float(sbase)
|
||||||
|
|
||||||
|
def __getitem__(self, ssize):
|
||||||
|
dsize = ssize * self.dscale
|
||||||
|
return dsize
|
||||||
|
|
||||||
|
class NullMapper(object):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __getitem__(self, ssize):
|
||||||
|
return ssize
|
||||||
|
|
||||||
|
def FontMapper(sbase=None, dbase=None, dkey=None):
|
||||||
|
if sbase and dbase and dkey:
|
||||||
|
return KeyMapper(sbase, dbase, dkey)
|
||||||
|
elif sbase and dbase:
|
||||||
|
return ScaleMapper(sbase, dbase)
|
||||||
|
else:
|
||||||
|
return NullMapper()
|
||||||
|
|
||||||
|
|
||||||
|
class CSSFlattener(object):
|
||||||
|
def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
|
||||||
|
untable=False):
|
||||||
|
self.fbase = fbase
|
||||||
|
self.fkey = fkey
|
||||||
|
self.lineh = lineh
|
||||||
|
self.unfloat = unfloat
|
||||||
|
self.untable = untable
|
||||||
|
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
oeb.logger.info('Flattening CSS and remapping font sizes...')
|
||||||
|
self.oeb = oeb
|
||||||
|
self.context = context
|
||||||
|
self.stylize_spine()
|
||||||
|
self.sbase = self.baseline_spine() if self.fbase else None
|
||||||
|
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
|
||||||
|
self.flatten_spine()
|
||||||
|
|
||||||
|
def stylize_spine(self):
|
||||||
|
self.stylizers = {}
|
||||||
|
profile = self.context.source
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = Stylizer(html, item.href, self.oeb, profile)
|
||||||
|
self.stylizers[item] = stylizer
|
||||||
|
|
||||||
|
def baseline_node(self, node, stylizer, sizes, csize):
|
||||||
|
csize = stylizer.style(node)['font-size']
|
||||||
|
if node.text:
|
||||||
|
sizes[csize] += len(COLLAPSE.sub(' ', node.text))
|
||||||
|
for child in node:
|
||||||
|
self.baseline_node(child, stylizer, sizes, csize)
|
||||||
|
if child.tail:
|
||||||
|
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
|
||||||
|
|
||||||
|
def baseline_spine(self):
|
||||||
|
sizes = defaultdict(float)
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = self.stylizers[item]
|
||||||
|
body = html.find(XHTML('body'))
|
||||||
|
fsize = self.context.source.fbase
|
||||||
|
self.baseline_node(body, stylizer, sizes, fsize)
|
||||||
|
sbase = max(sizes.items(), key=operator.itemgetter(1))[0]
|
||||||
|
self.oeb.logger.info(
|
||||||
|
"Source base font size is %0.05fpt" % sbase)
|
||||||
|
return sbase
|
||||||
|
|
||||||
|
def clean_edges(self, cssdict, style, fsize):
|
||||||
|
slineh = self.sbase * 1.26
|
||||||
|
dlineh = self.lineh
|
||||||
|
for kind in ('margin', 'padding'):
|
||||||
|
for edge in ('bottom', 'top'):
|
||||||
|
property = "%s-%s" % (kind, edge)
|
||||||
|
if property not in cssdict: continue
|
||||||
|
if '%' in cssdict[property]: continue
|
||||||
|
value = style[property]
|
||||||
|
if value == 0:
|
||||||
|
continue
|
||||||
|
elif value <= slineh:
|
||||||
|
cssdict[property] = "%0.5fem" % (dlineh / fsize)
|
||||||
|
else:
|
||||||
|
value = round(value / slineh) * dlineh
|
||||||
|
cssdict[property] = "%0.5fem" % (value / fsize)
|
||||||
|
|
||||||
|
def flatten_node(self, node, stylizer, names, styles, psize, left=0):
|
||||||
|
if not isinstance(node.tag, basestring) \
|
||||||
|
or namespace(node.tag) != XHTML_NS:
|
||||||
|
return
|
||||||
|
tag = barename(node.tag)
|
||||||
|
style = stylizer.style(node)
|
||||||
|
cssdict = style.cssdict()
|
||||||
|
if 'align' in node.attrib:
|
||||||
|
cssdict['text-align'] = node.attrib['align']
|
||||||
|
del node.attrib['align']
|
||||||
|
if node.tag == XHTML('font'):
|
||||||
|
node.tag = XHTML('span')
|
||||||
|
if 'size' in node.attrib:
|
||||||
|
size = node.attrib['size'].strip()
|
||||||
|
if size:
|
||||||
|
fnums = self.context.source.fnums
|
||||||
|
if size[0] in ('+', '-'):
|
||||||
|
# Oh, the warcrimes
|
||||||
|
cssdict['font-size'] = fnums[3+int(size)]
|
||||||
|
else:
|
||||||
|
cssdict['font-size'] = fnums[int(size)]
|
||||||
|
del node.attrib['size']
|
||||||
|
if 'color' in node.attrib:
|
||||||
|
cssdict['color'] = node.attrib['color']
|
||||||
|
del node.attrib['color']
|
||||||
|
if 'bgcolor' in node.attrib:
|
||||||
|
cssdict['background-color'] = node.attrib['bgcolor']
|
||||||
|
del node.attrib['bgcolor']
|
||||||
|
if cssdict:
|
||||||
|
if 'font-size' in cssdict:
|
||||||
|
fsize = self.fmap[style['font-size']]
|
||||||
|
cssdict['font-size'] = "%0.5fem" % (fsize / psize)
|
||||||
|
psize = fsize
|
||||||
|
if self.lineh and self.fbase and tag != 'body':
|
||||||
|
self.clean_edges(cssdict, style, psize)
|
||||||
|
margin = style['margin-left']
|
||||||
|
left += margin if isinstance(margin, float) else 0
|
||||||
|
if (left + style['text-indent']) < 0:
|
||||||
|
percent = (margin - style['text-indent']) / style['width']
|
||||||
|
cssdict['margin-left'] = "%d%%" % (percent * 100)
|
||||||
|
left -= style['text-indent']
|
||||||
|
if 'display' in cssdict and cssdict['display'] == 'in-line':
|
||||||
|
cssdict['display'] = 'inline'
|
||||||
|
if self.unfloat and 'float' in cssdict \
|
||||||
|
and cssdict.get('display', 'none') != 'none':
|
||||||
|
del cssdict['display']
|
||||||
|
if self.untable and 'display' in cssdict \
|
||||||
|
and cssdict['display'].startswith('table'):
|
||||||
|
display = cssdict['display']
|
||||||
|
if display == 'table-cell':
|
||||||
|
cssdict['display'] = 'inline'
|
||||||
|
else:
|
||||||
|
cssdict['display'] = 'block'
|
||||||
|
if 'vertical-align' in cssdict \
|
||||||
|
and cssdict['vertical-align'] == 'sup':
|
||||||
|
cssdict['vertical-align'] = 'super'
|
||||||
|
if self.lineh and 'line-height' not in cssdict:
|
||||||
|
lineh = self.lineh / psize
|
||||||
|
cssdict['line-height'] = "%0.5fem" % lineh
|
||||||
|
if cssdict:
|
||||||
|
items = cssdict.items()
|
||||||
|
items.sort()
|
||||||
|
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
|
||||||
|
klass = STRIPNUM.sub('', node.get('class', 'calibre').split()[0])
|
||||||
|
if css in styles:
|
||||||
|
match = styles[css]
|
||||||
|
else:
|
||||||
|
match = klass + str(names[klass] or '')
|
||||||
|
styles[css] = match
|
||||||
|
names[klass] += 1
|
||||||
|
node.attrib['class'] = match
|
||||||
|
elif 'class' in node.attrib:
|
||||||
|
del node.attrib['class']
|
||||||
|
if 'style' in node.attrib:
|
||||||
|
del node.attrib['style']
|
||||||
|
for child in node:
|
||||||
|
self.flatten_node(child, stylizer, names, styles, psize, left)
|
||||||
|
|
||||||
|
def flatten_head(self, item, stylizer, href):
|
||||||
|
html = item.data
|
||||||
|
head = html.find(XHTML('head'))
|
||||||
|
for node in head:
|
||||||
|
if node.tag == XHTML('link') \
|
||||||
|
and node.get('rel', 'stylesheet') == 'stylesheet' \
|
||||||
|
and node.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
|
head.remove(node)
|
||||||
|
elif node.tag == XHTML('style') \
|
||||||
|
and node.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
|
head.remove(node)
|
||||||
|
href = item.relhref(href)
|
||||||
|
etree.SubElement(head, XHTML('link'),
|
||||||
|
rel='stylesheet', type=CSS_MIME, href=href)
|
||||||
|
if stylizer.page_rule:
|
||||||
|
items = stylizer.page_rule.items()
|
||||||
|
items.sort()
|
||||||
|
css = '; '.join("%s: %s" % (key, val) for key, val in items)
|
||||||
|
style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
|
||||||
|
style.text = "@page { %s; }" % css
|
||||||
|
|
||||||
|
def replace_css(self, css):
|
||||||
|
manifest = self.oeb.manifest
|
||||||
|
id, href = manifest.generate('css', 'stylesheet.css')
|
||||||
|
for item in manifest.values():
|
||||||
|
if item.media_type in OEB_STYLES:
|
||||||
|
manifest.remove(item)
|
||||||
|
item = manifest.add(id, href, CSS_MIME, data=css)
|
||||||
|
return href
|
||||||
|
|
||||||
|
def flatten_spine(self):
|
||||||
|
names = defaultdict(int)
|
||||||
|
styles = {}
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = self.stylizers[item]
|
||||||
|
body = html.find(XHTML('body'))
|
||||||
|
fsize = self.context.dest.fbase
|
||||||
|
self.flatten_node(body, stylizer, names, styles, fsize)
|
||||||
|
items = [(key, val) for (val, key) in styles.items()]
|
||||||
|
items.sort()
|
||||||
|
css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
|
||||||
|
href = self.replace_css(css)
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
stylizer = self.stylizers[item]
|
||||||
|
self.flatten_head(item, stylizer, href)
|
87
src/calibre/ebooks/oeb/transforms/htmltoc.py
Normal file
87
src/calibre/ebooks/oeb/transforms/htmltoc.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
'''
|
||||||
|
HTML-TOC-adding transform.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
|
||||||
|
from calibre.ebooks.oeb.base import element
|
||||||
|
|
||||||
|
STYLE_CSS = {
|
||||||
|
'nested': """
|
||||||
|
.calibre_toc_header {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.calibre_toc_block {
|
||||||
|
margin-left: 1.2em;
|
||||||
|
text-indent: -1.2em;
|
||||||
|
}
|
||||||
|
.calibre_toc_block .calibre_toc_block {
|
||||||
|
margin-left: 2.4em;
|
||||||
|
}
|
||||||
|
.calibre_toc_block .calibre_toc_block .calibre_toc_block {
|
||||||
|
margin-left: 3.6em;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
|
||||||
|
'centered': """
|
||||||
|
.calibre_toc_header {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.calibre_toc_block {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
body > .calibre_toc_block {
|
||||||
|
margin-top: 1.2em;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
class HTMLTOCAdder(object):
|
||||||
|
def __init__(self, style='nested'):
|
||||||
|
self.style = style
|
||||||
|
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
if 'toc' in oeb.guide:
|
||||||
|
return
|
||||||
|
oeb.logger.info('Generating in-line TOC...')
|
||||||
|
style = self.style
|
||||||
|
if style not in STYLE_CSS:
|
||||||
|
oeb.logger.error('Unknown TOC style %r' % style)
|
||||||
|
style = 'nested'
|
||||||
|
id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
|
||||||
|
oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
|
||||||
|
language = str(oeb.metadata.language[0])
|
||||||
|
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
|
||||||
|
attrib={XML('lang'): language})
|
||||||
|
head = element(contents, XHTML('head'))
|
||||||
|
title = element(head, XHTML('title'))
|
||||||
|
title.text = 'Table of Contents'
|
||||||
|
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
|
||||||
|
href=css_href)
|
||||||
|
body = element(contents, XHTML('body'),
|
||||||
|
attrib={'class': 'calibre_toc'})
|
||||||
|
h1 = element(body, XHTML('h1'),
|
||||||
|
attrib={'class': 'calibre_toc_header'})
|
||||||
|
h1.text = 'Table of Contents'
|
||||||
|
self.add_toc_level(body, oeb.toc)
|
||||||
|
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
|
||||||
|
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
|
||||||
|
oeb.spine.add(item, linear=False)
|
||||||
|
oeb.guide.add('toc', 'Table of Contents', href)
|
||||||
|
|
||||||
|
def add_toc_level(self, elem, toc):
|
||||||
|
for node in toc:
|
||||||
|
block = element(elem, XHTML('div'),
|
||||||
|
attrib={'class': 'calibre_toc_block'})
|
||||||
|
line = element(block, XHTML('a'),
|
||||||
|
attrib={'href': node.href,
|
||||||
|
'class': 'calibre_toc_line'})
|
||||||
|
line.text = node.title
|
||||||
|
self.add_toc_level(block, node)
|
112
src/calibre/ebooks/oeb/transforms/manglecase.py
Normal file
112
src/calibre/ebooks/oeb/transforms/manglecase.py
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
'''
|
||||||
|
CSS case-mangling transform.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import operator
|
||||||
|
import math
|
||||||
|
from itertools import chain
|
||||||
|
from collections import defaultdict
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.base import CSS_MIME
|
||||||
|
from calibre.ebooks.oeb.base import namespace
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
|
CASE_MANGLER_CSS = """
|
||||||
|
.calibre_lowercase {
|
||||||
|
font-variant: normal;
|
||||||
|
font-size: 0.65em;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
|
||||||
|
|
||||||
|
class CaseMangler(object):
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
oeb.logger.info('Applying case-transforming CSS...')
|
||||||
|
self.oeb = oeb
|
||||||
|
self.profile = context.source
|
||||||
|
self.mangle_spine()
|
||||||
|
|
||||||
|
def mangle_spine(self):
|
||||||
|
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
|
||||||
|
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
relhref = item.relhref(href)
|
||||||
|
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
|
||||||
|
rel='stylesheet', href=relhref, type=CSS_MIME)
|
||||||
|
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
||||||
|
self.mangle_elem(html.find(XHTML('body')), stylizer)
|
||||||
|
|
||||||
|
def text_transform(self, transform, text):
|
||||||
|
if transform == 'capitalize':
|
||||||
|
return text.title()
|
||||||
|
elif transform == 'uppercase':
|
||||||
|
return text.upper()
|
||||||
|
elif transform == 'lowercase':
|
||||||
|
return text.lower()
|
||||||
|
return text
|
||||||
|
|
||||||
|
def split_text(self, text):
|
||||||
|
results = ['']
|
||||||
|
isupper = text[0].isupper()
|
||||||
|
for char in text:
|
||||||
|
if char.isupper() == isupper:
|
||||||
|
results[-1] += char
|
||||||
|
else:
|
||||||
|
isupper = not isupper
|
||||||
|
results.append(char)
|
||||||
|
return results
|
||||||
|
|
||||||
|
def smallcaps_elem(self, elem, attr):
|
||||||
|
texts = self.split_text(getattr(elem, attr))
|
||||||
|
setattr(elem, attr, None)
|
||||||
|
last = elem if attr == 'tail' else None
|
||||||
|
attrib = {'class': 'calibre_lowercase'}
|
||||||
|
for text in texts:
|
||||||
|
if text.isupper():
|
||||||
|
if last is None:
|
||||||
|
elem.text = text
|
||||||
|
else:
|
||||||
|
last.tail = text
|
||||||
|
else:
|
||||||
|
child = etree.Element(XHTML('span'), attrib=attrib)
|
||||||
|
child.text = text.upper()
|
||||||
|
if last is None:
|
||||||
|
elem.insert(0, child)
|
||||||
|
else:
|
||||||
|
# addnext() moves the tail for some reason
|
||||||
|
tail = last.tail
|
||||||
|
last.addnext(child)
|
||||||
|
last.tail = tail
|
||||||
|
child.tail = None
|
||||||
|
last = child
|
||||||
|
|
||||||
|
def mangle_elem(self, elem, stylizer):
|
||||||
|
if not isinstance(elem.tag, basestring) or \
|
||||||
|
namespace(elem.tag) != XHTML_NS:
|
||||||
|
return
|
||||||
|
children = list(elem)
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
transform = style['text-transform']
|
||||||
|
variant = style['font-variant']
|
||||||
|
if elem.text:
|
||||||
|
if transform in TEXT_TRANSFORMS:
|
||||||
|
elem.text = self.text_transform(transform, elem.text)
|
||||||
|
if variant == 'small-caps':
|
||||||
|
self.smallcaps_elem(elem, 'text')
|
||||||
|
for child in children:
|
||||||
|
self.mangle_elem(child, stylizer)
|
||||||
|
if child.tail:
|
||||||
|
if transform in TEXT_TRANSFORMS:
|
||||||
|
child.tail = self.text_transform(transform, child.tail)
|
||||||
|
if variant == 'small-caps':
|
||||||
|
self.smallcaps_elem(child, 'tail')
|
190
src/calibre/ebooks/oeb/transforms/rasterize.py
Normal file
190
src/calibre/ebooks/oeb/transforms/rasterize.py
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
'''
|
||||||
|
SVG rasterization transform.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from urlparse import urldefrag
|
||||||
|
import base64
|
||||||
|
from lxml import etree
|
||||||
|
from PyQt4.QtCore import Qt
|
||||||
|
from PyQt4.QtCore import QByteArray
|
||||||
|
from PyQt4.QtCore import QBuffer
|
||||||
|
from PyQt4.QtCore import QIODevice
|
||||||
|
from PyQt4.QtGui import QColor
|
||||||
|
from PyQt4.QtGui import QImage
|
||||||
|
from PyQt4.QtGui import QPainter
|
||||||
|
from PyQt4.QtSvg import QSvgRenderer
|
||||||
|
from PyQt4.QtGui import QApplication
|
||||||
|
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
|
||||||
|
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
|
||||||
|
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
|
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
|
||||||
|
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
|
||||||
|
|
||||||
|
class SVGRasterizer(object):
|
||||||
|
def __init__(self):
|
||||||
|
if QApplication.instance() is None:
|
||||||
|
QApplication([])
|
||||||
|
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
oeb.logger.info('Rasterizing SVG images...')
|
||||||
|
self.oeb = oeb
|
||||||
|
self.profile = context.dest
|
||||||
|
self.images = {}
|
||||||
|
self.dataize_manifest()
|
||||||
|
self.rasterize_spine()
|
||||||
|
self.rasterize_cover()
|
||||||
|
|
||||||
|
def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
|
||||||
|
data = QByteArray(xml2str(elem))
|
||||||
|
svg = QSvgRenderer(data)
|
||||||
|
size = svg.defaultSize()
|
||||||
|
if size.width() == 100 and size.height() == 100 \
|
||||||
|
and 'viewBox' in elem.attrib:
|
||||||
|
box = [float(x) for x in elem.attrib['viewBox'].split()]
|
||||||
|
size.setWidth(box[2] - box[0])
|
||||||
|
size.setHeight(box[3] - box[1])
|
||||||
|
if width or height:
|
||||||
|
size.scale(width, height, Qt.KeepAspectRatio)
|
||||||
|
logger = self.oeb.logger
|
||||||
|
logger.info('Rasterizing %r to %dx%d'
|
||||||
|
% (elem, size.width(), size.height()))
|
||||||
|
image = QImage(size, QImage.Format_ARGB32_Premultiplied)
|
||||||
|
image.fill(QColor("white").rgb())
|
||||||
|
painter = QPainter(image)
|
||||||
|
svg.render(painter)
|
||||||
|
painter.end()
|
||||||
|
array = QByteArray()
|
||||||
|
buffer = QBuffer(array)
|
||||||
|
buffer.open(QIODevice.WriteOnly)
|
||||||
|
image.save(buffer, format)
|
||||||
|
return str(array)
|
||||||
|
|
||||||
|
def dataize_manifest(self):
|
||||||
|
for item in self.oeb.manifest.values():
|
||||||
|
if item.media_type == SVG_MIME:
|
||||||
|
self.dataize_svg(item)
|
||||||
|
|
||||||
|
def dataize_svg(self, item, svg=None):
|
||||||
|
if svg is None:
|
||||||
|
svg = item.data
|
||||||
|
hrefs = self.oeb.manifest.hrefs
|
||||||
|
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||||
|
href = elem.attrib[XLINK('href')]
|
||||||
|
path, frag = urldefrag(href)
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
abshref = item.abshref(path)
|
||||||
|
if abshref not in hrefs:
|
||||||
|
continue
|
||||||
|
linkee = hrefs[abshref]
|
||||||
|
data = base64.encodestring(str(linkee))
|
||||||
|
data = "data:%s;base64,%s" % (linkee.media_type, data)
|
||||||
|
elem.attrib[XLINK('href')] = data
|
||||||
|
return svg
|
||||||
|
|
||||||
|
def rasterize_spine(self):
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
||||||
|
self.rasterize_item(item, stylizer)
|
||||||
|
|
||||||
|
def rasterize_item(self, item, stylizer):
|
||||||
|
html = item.data
|
||||||
|
hrefs = self.oeb.manifest.hrefs
|
||||||
|
for elem in xpath(html, '//h:img'):
|
||||||
|
src = elem.get('src', None)
|
||||||
|
image = hrefs.get(item.abshref(src), None) if src else None
|
||||||
|
if image and image.media_type == SVG_MIME:
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
self.rasterize_external(elem, style, item, image)
|
||||||
|
for elem in xpath(html, '//h:object[@type="%s"]' % SVG_MIME):
|
||||||
|
data = elem.get('data', None)
|
||||||
|
image = hrefs.get(item.abshref(data), None) if data else None
|
||||||
|
if image and image.media_type == SVG_MIME:
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
self.rasterize_external(elem, style, item, image)
|
||||||
|
for elem in xpath(html, '//svg:svg'):
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
self.rasterize_inline(elem, style, item)
|
||||||
|
|
||||||
|
def rasterize_inline(self, elem, style, item):
|
||||||
|
width = style['width']
|
||||||
|
height = style['height']
|
||||||
|
width = (width / 72) * self.profile.dpi
|
||||||
|
height = (height / 72) * self.profile.dpi
|
||||||
|
elem = self.dataize_svg(item, elem)
|
||||||
|
data = self.rasterize_svg(elem, width, height)
|
||||||
|
manifest = self.oeb.manifest
|
||||||
|
href = os.path.splitext(item.href)[0] + '.png'
|
||||||
|
id, href = manifest.generate(item.id, href)
|
||||||
|
manifest.add(id, href, PNG_MIME, data=data)
|
||||||
|
img = etree.Element(XHTML('img'), src=item.relhref(href))
|
||||||
|
elem.getparent().replace(elem, img)
|
||||||
|
for prop in ('width', 'height'):
|
||||||
|
if prop in elem.attrib:
|
||||||
|
img.attrib[prop] = elem.attrib[prop]
|
||||||
|
|
||||||
|
def rasterize_external(self, elem, style, item, svgitem):
|
||||||
|
width = style['width']
|
||||||
|
height = style['height']
|
||||||
|
width = (width / 72) * self.profile.dpi
|
||||||
|
height = (height / 72) * self.profile.dpi
|
||||||
|
data = QByteArray(str(svgitem))
|
||||||
|
svg = QSvgRenderer(data)
|
||||||
|
size = svg.defaultSize()
|
||||||
|
size.scale(width, height, Qt.KeepAspectRatio)
|
||||||
|
key = (svgitem.href, size.width(), size.height())
|
||||||
|
if key in self.images:
|
||||||
|
href = self.images[key]
|
||||||
|
else:
|
||||||
|
logger = self.oeb.logger
|
||||||
|
logger.info('Rasterizing %r to %dx%d'
|
||||||
|
% (svgitem.href, size.width(), size.height()))
|
||||||
|
image = QImage(size, QImage.Format_ARGB32_Premultiplied)
|
||||||
|
image.fill(QColor("white").rgb())
|
||||||
|
painter = QPainter(image)
|
||||||
|
svg.render(painter)
|
||||||
|
painter.end()
|
||||||
|
array = QByteArray()
|
||||||
|
buffer = QBuffer(array)
|
||||||
|
buffer.open(QIODevice.WriteOnly)
|
||||||
|
image.save(buffer, 'PNG')
|
||||||
|
data = str(array)
|
||||||
|
manifest = self.oeb.manifest
|
||||||
|
href = os.path.splitext(svgitem.href)[0] + '.png'
|
||||||
|
id, href = manifest.generate(svgitem.id, href)
|
||||||
|
manifest.add(id, href, PNG_MIME, data=data)
|
||||||
|
self.images[key] = href
|
||||||
|
elem.tag = XHTML('img')
|
||||||
|
for attr in elem.attrib:
|
||||||
|
if attr not in KEEP_ATTRS:
|
||||||
|
del elem.attrib[attr]
|
||||||
|
elem.attrib['src'] = item.relhref(href)
|
||||||
|
if elem.text:
|
||||||
|
elem.attrib['alt'] = elem.text
|
||||||
|
elem.text = None
|
||||||
|
for child in elem:
|
||||||
|
elem.remove(child)
|
||||||
|
|
||||||
|
def rasterize_cover(self):
|
||||||
|
covers = self.oeb.metadata.cover
|
||||||
|
if not covers:
|
||||||
|
return
|
||||||
|
cover = self.oeb.manifest.ids[str(covers[0])]
|
||||||
|
if not cover.media_type == SVG_MIME:
|
||||||
|
return
|
||||||
|
width = (self.profile.width / 72) * self.profile.dpi
|
||||||
|
height = (self.profile.height / 72) * self.profile.dpi
|
||||||
|
data = self.rasterize_svg(cover.data, width, height)
|
||||||
|
href = os.path.splitext(cover.href)[0] + '.png'
|
||||||
|
id, href = self.oeb.manifest.generate(cover.id, href)
|
||||||
|
self.oeb.manifest.add(id, href, PNG_MIME, data=data)
|
||||||
|
covers[0].value = id
|
68
src/calibre/ebooks/oeb/transforms/trimmanifest.py
Normal file
68
src/calibre/ebooks/oeb/transforms/trimmanifest.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
'''
|
||||||
|
OPF manifest trimming transform.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from itertools import chain
|
||||||
|
from urlparse import urldefrag
|
||||||
|
from lxml import etree
|
||||||
|
import cssutils
|
||||||
|
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
||||||
|
|
||||||
|
LINK_SELECTORS = []
|
||||||
|
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||||
|
'//*/@xl:href'):
|
||||||
|
LINK_SELECTORS.append(etree.XPath(expr, namespaces=XPNSMAP))
|
||||||
|
|
||||||
|
class ManifestTrimmer(object):
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
oeb.logger.info('Trimming unused files from manifest...')
|
||||||
|
used = set()
|
||||||
|
hrefs = oeb.manifest.hrefs
|
||||||
|
for term in oeb.metadata:
|
||||||
|
for item in oeb.metadata[term]:
|
||||||
|
if item.value in oeb.manifest.hrefs:
|
||||||
|
used.add(oeb.manifest.hrefs[item.value])
|
||||||
|
elif item.value in oeb.manifest.ids:
|
||||||
|
used.add(oeb.manifest.ids[item.value])
|
||||||
|
for ref in oeb.guide.values():
|
||||||
|
path, _ = urldefrag(ref.href)
|
||||||
|
if path in oeb.manifest.hrefs:
|
||||||
|
used.add(oeb.manifest.hrefs[path])
|
||||||
|
# TOC items are required to be in the spine
|
||||||
|
for item in oeb.spine:
|
||||||
|
used.add(item)
|
||||||
|
unchecked = used
|
||||||
|
while unchecked:
|
||||||
|
new = set()
|
||||||
|
for item in unchecked:
|
||||||
|
if item.media_type in OEB_DOCS or \
|
||||||
|
item.media_type[-4:] in ('/xml', '+xml'):
|
||||||
|
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
||||||
|
for href in chain(*hrefs):
|
||||||
|
href = item.abshref(href)
|
||||||
|
if href in oeb.manifest.hrefs:
|
||||||
|
found = oeb.manifest.hrefs[href]
|
||||||
|
if found not in used:
|
||||||
|
new.add(found)
|
||||||
|
elif item.media_type == CSS_MIME:
|
||||||
|
def replacer(uri):
|
||||||
|
absuri = item.abshref(uri)
|
||||||
|
if absuri in oeb.manifest.hrefs:
|
||||||
|
found = oeb.manifest.hrefs[href]
|
||||||
|
if found not in used:
|
||||||
|
new.add(found)
|
||||||
|
return uri
|
||||||
|
sheet = cssutils.parseString(item.data, href=item.href)
|
||||||
|
cssutils.replaceUrls(sheet, replacer)
|
||||||
|
used.update(new)
|
||||||
|
unchecked = new
|
||||||
|
for item in oeb.manifest.values():
|
||||||
|
if item not in used:
|
||||||
|
oeb.logger.info('Trimming %r from manifest' % item.href)
|
||||||
|
oeb.manifest.remove(item)
|
@ -48,12 +48,14 @@ entry_points = {
|
|||||||
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
|
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
|
||||||
'any2epub = calibre.ebooks.epub.from_any:main',
|
'any2epub = calibre.ebooks.epub.from_any:main',
|
||||||
'any2lit = calibre.ebooks.lit.from_any:main',
|
'any2lit = calibre.ebooks.lit.from_any:main',
|
||||||
|
'any2mobi = calibre.ebooks.mobi.from_any:main',
|
||||||
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
|
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
|
||||||
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
|
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
|
||||||
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
|
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
|
||||||
'isbndb = calibre.ebooks.metadata.isbndb:main',
|
'isbndb = calibre.ebooks.metadata.isbndb:main',
|
||||||
'librarything = calibre.ebooks.metadata.library_thing:main',
|
'librarything = calibre.ebooks.metadata.library_thing:main',
|
||||||
'mobi2oeb = calibre.ebooks.mobi.reader:main',
|
'mobi2oeb = calibre.ebooks.mobi.reader:main',
|
||||||
|
'oeb2mobi = calibre.ebooks.mobi.writer:main',
|
||||||
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
|
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
|
||||||
'lit2oeb = calibre.ebooks.lit.reader:main',
|
'lit2oeb = calibre.ebooks.lit.reader:main',
|
||||||
'oeb2lit = calibre.ebooks.lit.writer:main',
|
'oeb2lit = calibre.ebooks.lit.writer:main',
|
||||||
@ -190,6 +192,8 @@ def setup_completion(fatal_errors):
|
|||||||
from calibre.ebooks.epub.from_any import option_parser as any2epub
|
from calibre.ebooks.epub.from_any import option_parser as any2epub
|
||||||
from calibre.ebooks.lit.from_any import option_parser as any2lit
|
from calibre.ebooks.lit.from_any import option_parser as any2lit
|
||||||
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
|
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
|
||||||
|
from calibre.ebooks.mobi.from_any import option_parser as any2mobi
|
||||||
|
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
|
||||||
from calibre.gui2.main import option_parser as guiop
|
from calibre.gui2.main import option_parser as guiop
|
||||||
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
||||||
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
|
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
|
||||||
@ -214,6 +218,8 @@ def setup_completion(fatal_errors):
|
|||||||
f.write(opts_and_exts('calibre', guiop, any_formats))
|
f.write(opts_and_exts('calibre', guiop, any_formats))
|
||||||
f.write(opts_and_exts('any2epub', any2epub, any_formats))
|
f.write(opts_and_exts('any2epub', any2epub, any_formats))
|
||||||
f.write(opts_and_exts('any2lit', any2lit, any_formats))
|
f.write(opts_and_exts('any2lit', any2lit, any_formats))
|
||||||
|
f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
|
||||||
|
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['mobi', 'prc']))
|
||||||
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
|
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
|
||||||
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
||||||
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
||||||
@ -230,7 +236,7 @@ def setup_completion(fatal_errors):
|
|||||||
f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
|
f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
|
||||||
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
|
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
|
||||||
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
|
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
|
||||||
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
|
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
|
||||||
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
||||||
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
||||||
f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))
|
f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user