diff --git a/setup.py b/setup.py index 5111d16c45..e7c16e6aa6 100644 --- a/setup.py +++ b/setup.py @@ -166,7 +166,7 @@ if __name__ == '__main__': metadata_sqlite = 'library/metadata_sqlite.sql', jquery = 'gui2/viewer/jquery.js', jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js', - html_css = 'ebooks/lit/html.css', + html_css = 'ebooks/oeb/html.css', ) DEST = os.path.join('src', APPNAME, 'resources.py') diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index f14e153057..634963e775 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -798,8 +798,9 @@ class Processor(Parser): if face is not None: faces = [] for face in face.split(','): - if ' ' in face: - face = "%s" % face + face = face.strip() + if ' ' in face and not (face[0] == face[-1] == '"'): + face = '"%s"' % face.replace('"', r'\"') faces.append(face) for generic in ('serif', 'sans-serif', 'monospace'): if generic in faces: diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 90df14e2c0..461c067382 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -15,7 +15,7 @@ from lxml import etree from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP import calibre.ebooks.lit.mssha1 as mssha1 -from calibre.ebooks.lit.oeb import urlnormalize +from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks import DRMError from calibre import plugins lzx, lxzerror = plugins['lzx'] diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index cb7780138c..67450fb385 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -23,14 +23,20 @@ from urllib import unquote as urlunquote from lxml import etree from calibre.ebooks.lit.reader import DirectoryEntry import calibre.ebooks.lit.maps as maps -from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ +from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ CSS_MIME, OPF_MIME, XML_NS, XML -from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath -from calibre.ebooks.lit.oeb import FauxLogger, OEBBook -from calibre.ebooks.lit.stylizer import Stylizer +from calibre.ebooks.oeb.base import namespace, barename, prefixname, \ + urlnormalize, xpath +from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.profile import Context +from calibre.ebooks.oeb.stylizer import Stylizer +from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener +from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer +from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer +from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder +from calibre.ebooks.oeb.transforms.manglecase import CaseMangler from calibre.ebooks.lit.lzx import Compressor import calibre -from calibre import LoggingInterface from calibre import plugins msdes, msdeserror = plugins['msdes'] import calibre.ebooks.lit.mssha1 as mssha1 @@ -116,12 +122,6 @@ LZXC_CONTROL = \ COLLAPSE = re.compile(r'[ \t\r\n\v]+') -def prefixname(name, nsrmap): - prefix = nsrmap[namespace(name)] - if not prefix: - return barename(name) - return ':'.join((prefix, barename(name))) - def decint(value): bytes = [] while True: @@ -143,9 +143,9 @@ def warn(x): class ReBinary(object): NSRMAP = {'': None, XML_NS: 'xml'} - def __init__(self, root, item, oeb, map=HTML_MAP, logger=FauxLogger()): + def __init__(self, root, path, oeb, map=HTML_MAP): self.item = item - self.logger = logger + self.logger = oeb.logger self.manifest = oeb.manifest self.tags, self.tattrs = map self.buf = StringIO() @@ -300,10 +300,9 @@ def preserve(function): return wrapper class LitWriter(object): - def __init__(self, oeb, logger=FauxLogger()): - self._oeb = oeb - self._logger = logger - self._litize_oeb() + def __init__(self): + # Wow, no options + pass def _litize_oeb(self): oeb = self._oeb @@ -312,32 +311,27 @@ class LitWriter(object): if oeb.metadata.cover: id = str(oeb.metadata.cover[0]) cover = oeb.manifest[id] - elif MS_COVER_TYPE in oeb.guide: - href = oeb.guide[MS_COVER_TYPE].href - cover = oeb.manifest.hrefs[href] - elif 'cover' in oeb.guide: - href = oeb.guide['cover'].href - cover = oeb.manifest.hrefs[href] - else: - html = oeb.spine[0].data - imgs = xpath(html, '//img[position()=1]') - href = imgs[0].get('src') if imgs else None - cover = oeb.manifest.hrefs[href] if href else None - if cover: - if not oeb.metadata.cover: - oeb.metadata.add('cover', cover.id) for type, title in ALL_MS_COVER_TYPES: if type not in oeb.guide: oeb.guide.add(type, title, cover.href) else: - self._logger.log_warn('No suitable cover image found.') + self._logger.warn('No suitable cover image found.') - def dump(self, stream): + def dump(self, oeb, path): + if hasattr(path, 'write'): + return self._dump_stream(oeb, path) + with open(path, 'w+b') as stream: + return self._dump_stream(oeb, stream) + + def _dump_stream(self, oeb, stream): + self._oeb = oeb + self._logger = oeb.logger self._stream = stream self._sections = [StringIO() for i in xrange(4)] self._directory = [] self._meta = None - self._dump() + self._litize_oeb() + self._write_content() def _write(self, *data): for datum in data: @@ -351,7 +345,7 @@ class LitWriter(object): def _tell(self): return self._stream.tell() - def _dump(self): + def _write_content(self): # Build content sections self._build_sections() @@ -480,8 +474,7 @@ class LitWriter(object): secnum = 0 if not isinstance(data, basestring): self._add_folder(name) - rebin = ReBinary(data, item, self._oeb, map=HTML_MAP, - logger=self._logger) + rebin = ReBinary(data, item, self._oeb, map=HTML_MAP) self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/aht', rebin.aht, 0) item.page_breaks = rebin.page_breaks @@ -560,8 +553,7 @@ class LitWriter(object): meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--attr5'] = '1' meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() - rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP, - logger=self._logger) + rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP) meta = rebin.content self._meta = meta self._add_file('/meta', meta) @@ -721,23 +713,35 @@ def option_parser(): '-o', '--output', default=None, help=_('Output file. Default is derived from input filename.')) parser.add_option( - '--verbose', default=False, action='store_true', + '-v', '--verbose', default=0, action='count', help=_('Useful for debugging.')) return parser -def oeb2lit(opts, opfpath): - logger = LoggingInterface(logging.getLogger('oeb2lit')) +def oeb2lit(opts, inpath): + logger = Logger(logging.getLogger('oeb2lit')) logger.setup_cli_handler(opts.verbose) - litpath = opts.output - if litpath is None: - litpath = os.path.basename(opfpath) - litpath = os.path.splitext(litpath)[0] + '.lit' - litpath = os.path.abspath(litpath) - lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger) - with open(litpath, 'wb') as f: - lit.dump(f) - run_plugins_on_postprocess(litpath, 'lit') - logger.log_info(_('Output written to ')+litpath) + outpath = opts.output + if outpath is None: + outpath = os.path.basename(inpath) + outpath = os.path.splitext(outpath)[0] + '.lit' + outpath = os.path.abspath(outpath) + context = Context('Firefox', 'MSReader') + oeb = OEBBook(inpath, logger=logger) + tocadder = HTMLTOCAdder() + tocadder.transform(oeb, context) + mangler = CaseMangler() + mangler.transform(oeb, context) + fbase = context.dest.fbase + flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True) + flattener.transform(oeb, context) + rasterizer = SVGRasterizer() + rasterizer.transform(oeb, context) + trimmer = ManifestTrimmer() + trimmer.transform(oeb, context) + lit = LitWriter() + lit.dump(oeb, outpath) + run_plugins_on_postprocess(outpath, 'lit') + logger.info(_('Output written to ') + outpath) def main(argv=sys.argv): @@ -746,8 +750,8 @@ def main(argv=sys.argv): if len(args) != 1: parser.print_help() return 1 - opfpath = args[0] - oeb2lit(opts, opfpath) + inpath = args[0] + oeb2lit(opts, inpath) return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/mobi/from_any.py b/src/calibre/ebooks/mobi/from_any.py new file mode 100644 index 0000000000..9af2e5fe68 --- /dev/null +++ b/src/calibre/ebooks/mobi/from_any.py @@ -0,0 +1,67 @@ +''' +Convert any ebook format to Mobipocket. +''' + +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \ + 'and Marshall T. Vandegrift ' +__docformat__ = 'restructuredtext en' + +import sys, os, glob, logging + +from calibre.ebooks.epub.from_any import any2epub, formats, USAGE +from calibre.ebooks.epub import config as common_config +from calibre.ptempfile import TemporaryDirectory +from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options + +def config(defaults=None): + return common_config(defaults=defaults, name='mobi') + +def option_parser(usage=USAGE): + usage = usage % ('Mobipocket', formats()) + parser = config().option_parser(usage=usage) + add_mobi_options(parser) + return parser + +def any2mobi(opts, path): + ext = os.path.splitext(path)[1] + if not ext: + raise ValueError('Unknown file type: '+path) + ext = ext.lower()[1:] + + if opts.output is None: + opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi' + + opts.output = os.path.abspath(opts.output) + orig_output = opts.output + + with TemporaryDirectory('_any2mobi') as tdir: + oebdir = os.path.join(tdir, 'oeb') + os.mkdir(oebdir) + opts.output = os.path.join(tdir, 'dummy.epub') + opts.profile = 'None' + opts.dont_split_on_page_breaks = True + orig_bfs = opts.base_font_size2 + opts.base_font_size2 = 0 + any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir) + opts.base_font_size2 = orig_bfs + opf = glob.glob(os.path.join(oebdir, '*.opf'))[0] + opts.output = orig_output + logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...')) + oeb2mobi(opts, opf) + + +def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) + if len(args) < 2: + parser.print_help() + print 'No input file specified.' + return 1 + any2mobi(opts, args[1]) + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py index c0884e55a8..5df11c4e38 100644 --- a/src/calibre/ebooks/mobi/langcodes.py +++ b/src/calibre/ebooks/mobi/langcodes.py @@ -3,6 +3,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' +from struct import pack + main_language = { 0 : "NEUTRAL", 54 : "AFRIKAANS", @@ -155,5 +157,170 @@ sub_language = { 2 : "SWEDISH_FINLAND", 1 : "UZBEK_LATIN", 2 : "UZBEK_CYRILLIC", - - } \ No newline at end of file + } + +IANA_MOBI = \ + {None: {None: (0, 0)}, + 'af': {None: (54, 0)}, + 'ar': {None: (1, 0), + 'AE': (1, 56), + 'BH': (1, 60), + 'DZ': (1, 20), + 'EG': (1, 12), + 'JO': (1, 44), + 'KW': (1, 52), + 'LB': (1, 48), + 'MA': (1, 24), + 'OM': (1, 32), + 'QA': (1, 64), + 'SA': (1, 4), + 'SY': (1, 40), + 'TN': (1, 28), + 'YE': (1, 36)}, + 'as': {None: (77, 0)}, + 'az': {None: (44, 0)}, + 'be': {None: (35, 0)}, + 'bg': {None: (2, 0)}, + 'bn': {None: (69, 0)}, + 'ca': {None: (3, 0)}, + 'cs': {None: (5, 0)}, + 'da': {None: (6, 0)}, + 'de': {None: (7, 0), + 'AT': (7, 12), + 'CH': (7, 8), + 'LI': (7, 20), + 'LU': (7, 16)}, + 'el': {None: (8, 0)}, + 'en': {None: (9, 0), + 'AU': (9, 12), + 'BZ': (9, 40), + 'CA': (9, 16), + 'GB': (9, 8), + 'IE': (9, 24), + 'JM': (9, 32), + 'NZ': (9, 20), + 'PH': (9, 52), + 'TT': (9, 44), + 'US': (9, 4), + 'ZA': (9, 28), + 'ZW': (9, 48)}, + 'es': {None: (10, 0), + 'AR': (10, 44), + 'BO': (10, 64), + 'CL': (10, 52), + 'CO': (10, 36), + 'CR': (10, 20), + 'DO': (10, 28), + 'EC': (10, 48), + 'ES': (10, 4), + 'GT': (10, 16), + 'HN': (10, 72), + 'MX': (10, 8), + 'NI': (10, 76), + 'PA': (10, 24), + 'PE': (10, 40), + 'PR': (10, 80), + 'PY': (10, 60), + 'SV': (10, 68), + 'UY': (10, 56), + 'VE': (10, 32)}, + 'et': {None: (37, 0)}, + 'eu': {None: (45, 0)}, + 'fa': {None: (41, 0)}, + 'fi': {None: (11, 0)}, + 'fo': {None: (56, 0)}, + 'fr': {None: (12, 0), + 'BE': (12, 8), + 'CA': (12, 12), + 'CH': (12, 16), + 'FR': (12, 4), + 'LU': (12, 20), + 'MC': (12, 24)}, + 'gu': {None: (71, 0)}, + 'he': {None: (13, 0)}, + 'hi': {None: (57, 0)}, + 'hr': {None: (26, 0)}, + 'hu': {None: (14, 0)}, + 'hy': {None: (43, 0)}, + 'id': {None: (33, 0)}, + 'is': {None: (15, 0)}, + 'it': {None: (16, 0), + 'CH': (16, 8), + 'IT': (16, 4)}, + 'ja': {None: (17, 0)}, + 'ka': {None: (55, 0)}, + 'kk': {None: (63, 0)}, + 'kn': {None: (75, 0)}, + 'ko': {None: (18, 0)}, + 'kok': {None: (87, 0)}, + 'lt': {None: (39, 0)}, + 'lv': {None: (38, 0)}, + 'mk': {None: (47, 0)}, + 'ml': {None: (76, 0)}, + 'mr': {None: (78, 0)}, + 'ms': {None: (62, 0)}, + 'mt': {None: (58, 0)}, + 'ne': {None: (97, 0)}, + 'nl': {None: (19, 0), + 'BE': (19, 8)}, + 'no': {None: (20, 0)}, + 'or': {None: (72, 0)}, + 'pa': {None: (70, 0)}, + 'pl': {None: (21, 0)}, + 'pt': {None: (22, 0), + 'BR': (22, 4), + 'PT': (22, 8)}, + 'rm': {None: (23, 0)}, + 'ro': {None: (24, 0)}, + 'ru': {None: (25, 0)}, + 'sa': {None: (79, 0)}, + 'se': {None: (59, 0)}, + 'sk': {None: (27, 0)}, + 'sl': {None: (36, 0)}, + 'sq': {None: (28, 0)}, + 'sr': {None: (26, 12), + 'RS': (26, 12)}, + 'st': {None: (48, 0)}, + 'sv': {None: (29, 0), + 'FI': (29, 8)}, + 'sw': {None: (65, 0)}, + 'ta': {None: (73, 0)}, + 'te': {None: (74, 0)}, + 'th': {None: (30, 0)}, + 'tn': {None: (50, 0)}, + 'tr': {None: (31, 0)}, + 'ts': {None: (49, 0)}, + 'tt': {None: (68, 0)}, + 'uk': {None: (34, 0)}, + 'ur': {None: (32, 0)}, + 'uz': {None: (67, 0), + 'UZ': (67, 8)}, + 'vi': {None: (42, 0)}, + 'wen': {None: (46, 0)}, + 'xh': {None: (52, 0)}, + 'zh': {None: (4, 0), + 'CN': (4, 8), + 'HK': (4, 12), + 'SG': (4, 16), + 'TW': (4, 4)}, + 'zu': {None: (53, 0)}} + +def iana2mobi(icode): + subtags = list(icode.split('-')) + langdict = IANA_MOBI[None] + while len(subtags) > 0: + lang = subtags.pop(0).lower() + if lang in IANA_MOBI: + langdict = IANA_MOBI[lang] + break + mcode = langdict[None] + while len(subtags) > 0: + subtag = subtags.pop(0) + if subtag not in langdict: + subtag = subtag.title() + if subtag not in langdict: + subtag = subtag.upper() + if subtag in langdict: + mcode = langdict[subtag] + break + return pack('>HBB', 0, mcode[1], mcode[0]) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py new file mode 100644 index 0000000000..7a74bd9401 --- /dev/null +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -0,0 +1,379 @@ +''' +Transform XHTML/OPS-ish content into Mobipocket HTML 3.2. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +import copy +import re +from lxml import etree +from calibre.ebooks.oeb.base import namespace, barename +from calibre.ebooks.oeb.base import XHTML, XHTML_NS +from calibre.ebooks.oeb.stylizer import Stylizer +from calibre.ebooks.oeb.transforms.flatcss import KeyMapper + +MBP_NS = 'http://mobipocket.com/ns/mbp' +def MBP(name): return '{%s}%s' % (MBP_NS, name) + +MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS} + +HEADER_TAGS = set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) +NESTABLE_TAGS = set(['ol', 'ul', 'li', 'table', 'tr', 'td', 'th']) +TABLE_TAGS = set(['table', 'tr', 'td', 'th']) +SPECIAL_TAGS = set(['hr', 'br']) +CONTENT_TAGS = set(['img', 'hr', 'br']) + +PAGE_BREAKS = set(['always', 'odd', 'even']) + +COLLAPSE = re.compile(r'[ \t\r\n\v]+') + +def asfloat(value): + if not isinstance(value, (int, long, float)): + return 0.0 + return float(value) + +class BlockState(object): + def __init__(self, body): + self.body = body + self.nested = [] + self.para = None + self.inline = None + self.anchor = None + self.vpadding = 0. + self.vmargin = 0. + self.pbreak = False + self.istate = None + self.content = False + +class FormatState(object): + def __init__(self): + self.left = 0. + self.halign = 'auto' + self.indent = 0. + self.fsize = 3 + self.ids = set() + self.valign = 'baseline' + self.italic = False + self.bold = False + self.preserve = False + self.family = 'serif' + self.href = None + self.list_num = 0 + self.attrib = {} + + def __eq__(self, other): + return self.fsize == other.fsize \ + and self.italic == other.italic \ + and self.bold == other.bold \ + and self.href == other.href \ + and self.valign == other.valign \ + and self.preserve == other.preserve \ + and self.family == other.family + + def __ne__(self, other): + return not self.__eq__(other) + + +class MobiMLizer(object): + def transform(self, oeb, context): + oeb.logger.info('Converting XHTML to Mobipocket markup...') + self.oeb = oeb + self.profile = profile = context.dest + self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items()) + self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys()) + self.remove_html_cover() + self.mobimlize_spine() + + def remove_html_cover(self): + oeb = self.oeb + if not oeb.metadata.cover \ + or 'cover' not in oeb.guide: + return + href = oeb.guide['cover'].href + del oeb.guide['cover'] + item = oeb.manifest.hrefs[href] + oeb.manifest.remove(item) + + def mobimlize_spine(self): + for item in self.oeb.spine: + stylizer = Stylizer(item.data, item.href, self.oeb, self.profile) + body = item.data.find(XHTML('body')) + nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) + nbody = etree.SubElement(nroot, XHTML('body')) + self.mobimlize_elem(body, stylizer, BlockState(nbody), + [FormatState()]) + item.data = nroot + + def mobimlize_font(self, ptsize): + return self.fnums[self.fmap[ptsize]] + + def mobimlize_measure(self, ptsize): + if isinstance(ptsize, basestring): + return ptsize + embase = self.profile.fbase + if round(ptsize) < embase: + return "%dpt" % int(round(ptsize)) + return "%dem" % int(round(ptsize / embase)) + + def preize_text(self, text): + text = unicode(text).replace(u' ', u'\xa0') + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + lines = text.split('\n') + result = lines[:1] + for line in lines[1:]: + result.append(etree.Element(XHTML('br'))) + if line: + result.append(line) + return result + + def mobimlize_content(self, tag, text, bstate, istates): + bstate.content = True + istate = istates[-1] + para = bstate.para + if tag in SPECIAL_TAGS and not text: + para = para if para is not None else bstate.body + elif para is None: + body = bstate.body + if bstate.pbreak: + etree.SubElement(body, MBP('pagebreak')) + bstate.pbreak = False + if istate.ids: + for id in istate.ids: + etree.SubElement(body, XHTML('a'), attrib={'id': id}) + istate.ids.clear() + bstate.istate = None + bstate.anchor = None + parent = bstate.nested[-1] if bstate.nested else bstate.body + indent = istate.indent + left = istate.left + if indent < 0 and abs(indent) < left: + left += indent + indent = 0 + elif indent != 0 and abs(indent) < self.profile.fbase: + indent = (indent / abs(indent)) * self.profile.fbase + if tag in NESTABLE_TAGS: + para = wrapper = etree.SubElement(parent, XHTML(tag)) + bstate.nested.append(para) + if tag == 'li' and len(istates) > 1: + istates[-2].list_num += 1 + para.attrib['value'] = str(istates[-2].list_num) + elif left > 0 and indent >= 0: + para = wrapper = etree.SubElement(parent, XHTML('blockquote')) + para = wrapper + emleft = int(round(left / self.profile.fbase)) - 1 + emleft = min((emleft, 10)) + while emleft > 0: + para = etree.SubElement(para, XHTML('blockquote')) + emleft -= 1 + else: + para = wrapper = etree.SubElement(parent, XHTML('p')) + bstate.inline = bstate.para = para + vspace = bstate.vpadding + bstate.vmargin + bstate.vpadding = bstate.vmargin = 0 + if tag not in TABLE_TAGS: + wrapper.attrib['height'] = self.mobimlize_measure(vspace) + para.attrib['width'] = self.mobimlize_measure(indent) + elif tag == 'table' and vspace > 0: + body = bstate.body + vspace = int(round(vspace / self.profile.fbase)) + index = max((0, len(body) - 1)) + while vspace > 0: + body.insert(index, etree.Element(XHTML('br'))) + vspace -= 1 + if istate.halign != 'auto': + para.attrib['align'] = istate.halign + pstate = bstate.istate + if tag in CONTENT_TAGS: + bstate.inline = para + pstate = bstate.istate = None + etree.SubElement(para, XHTML(tag), attrib=istate.attrib) + elif tag in TABLE_TAGS: + para.attrib['valign'] = 'top' + if not text: + return + if not pstate or istate != pstate: + inline = para + valign = istate.valign + fsize = istate.fsize + href = istate.href + if not href: + bstate.anchor = None + elif pstate and pstate.href == href: + inline = bstate.anchor + else: + inline = etree.SubElement(inline, XHTML('a'), href=href) + bstate.anchor = inline + if valign == 'super': + inline = etree.SubElement(inline, XHTML('sup')) + elif valign == 'sub': + inline = etree.SubElement(inline, XHTML('sub')) + elif fsize != 3: + inline = etree.SubElement(inline, XHTML('font'), + size=str(fsize)) + if istate.family == 'monospace': + inline = etree.SubElement(inline, XHTML('tt')) + if istate.italic: + inline = etree.SubElement(inline, XHTML('i')) + if istate.bold: + inline = etree.SubElement(inline, XHTML('b')) + bstate.inline = inline + bstate.istate = istate + inline = bstate.inline + content = self.preize_text(text) if istate.preserve else [text] + for item in content: + if isinstance(item, basestring): + if len(inline) == 0: + inline.text = (inline.text or '') + item + else: + last = inline[-1] + last.tail = (last.tail or '') + item + else: + inline.append(item) + + def mobimlize_elem(self, elem, stylizer, bstate, istates): + if not isinstance(elem.tag, basestring) \ + or namespace(elem.tag) != XHTML_NS: + return + style = stylizer.style(elem) + # does not exist lalalala + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ + or style['visibility'] == 'hidden': + return + tag = barename(elem.tag) + istate = copy.copy(istates[-1]) + istate.list_num = 0 + istates.append(istate) + left = 0 + display = style['display'] + isblock = not display.startswith('inline') + isblock = isblock and style['float'] == 'none' + isblock = isblock and tag != 'br' + if isblock: + bstate.para = None + istate.halign = style['text-align'] + istate.indent = style['text-indent'] + if style['margin-left'] == 'auto' \ + and style['margin-right'] == 'auto': + istate.halign = 'center' + margin = asfloat(style['margin-left']) + padding = asfloat(style['padding-left']) + if tag != 'body': + left = margin + padding + istate.left += left + vmargin = asfloat(style['margin-top']) + bstate.vmargin = max((bstate.vmargin, vmargin)) + vpadding = asfloat(style['padding-top']) + if vpadding > 0: + bstate.vpadding += bstate.vmargin + bstate.vmargin = 0 + bstate.vpadding += vpadding + elif not istate.href: + margin = asfloat(style['margin-left']) + padding = asfloat(style['padding-left']) + lspace = margin + padding + if lspace > 0: + spaces = int(round((lspace * 3) / style['font-size'])) + elem.text = (u'\xa0' * spaces) + (elem.text or '') + margin = asfloat(style['margin-right']) + padding = asfloat(style['padding-right']) + rspace = margin + padding + if rspace > 0: + spaces = int(round((rspace * 3) / style['font-size'])) + if len(elem) == 0: + elem.text = (elem.text or '') + (u'\xa0' * spaces) + else: + last = elem[-1] + last.text = (last.text or '') + (u'\xa0' * spaces) + if bstate.content and style['page-break-before'] in PAGE_BREAKS: + bstate.pbreak = True + istate.fsize = self.mobimlize_font(style['font-size']) + istate.italic = True if style['font-style'] == 'italic' else False + weight = style['font-weight'] + istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400 + istate.preserve = (style['white-space'] in ('pre', 'pre-wrap')) + if 'monospace' in style['font-family']: + istate.family = 'monospace' + elif 'sans-serif' in style['font-family']: + istate.family = 'sans-serif' + else: + istate.family = 'serif' + valign = style['vertical-align'] + if valign in ('super', 'text-top') or asfloat(valign) > 0: + istate.valign = 'super' + elif valign == 'sub' or asfloat(valign) < 0: + istate.valign = 'sub' + else: + istate.valign = 'baseline' + if 'id' in elem.attrib: + istate.ids.add(elem.attrib['id']) + if 'name' in elem.attrib: + istate.ids.add(elem.attrib['name']) + if tag == 'a' and 'href' in elem.attrib: + istate.href = elem.attrib['href'] + istate.attrib.clear() + if tag == 'img' and 'src' in elem.attrib: + istate.attrib['src'] = elem.attrib['src'] + istate.attrib['align'] = 'baseline' + for prop in ('width', 'height'): + if style[prop] != 'auto': + value = style[prop] + if value == getattr(self.profile, prop): + result = '100%' + else: + ems = int(round(value / self.profile.fbase)) + result = "%dem" % ems + istate.attrib[prop] = result + elif tag == 'hr' and asfloat(style['width']) > 0: + prop = style['width'] / self.profile.width + istate.attrib['width'] = "%d%%" % int(round(prop * 100)) + elif display == 'table': + tag = 'table' + elif display == 'table-row': + tag = 'tr' + elif display == 'table-cell': + tag = 'td' + text = None + if elem.text: + if istate.preserve: + text = elem.text + elif len(elem) > 0 and elem.text.isspace(): + text = None + else: + text = COLLAPSE.sub(' ', elem.text) + if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS: + self.mobimlize_content(tag, text, bstate, istates) + for child in elem: + self.mobimlize_elem(child, stylizer, bstate, istates) + tail = None + if child.tail: + if istate.preserve: + tail = child.tail + elif bstate.para is None and child.tail.isspace(): + tail = None + else: + tail = COLLAPSE.sub(' ', child.tail) + if tail: + self.mobimlize_content(tag, tail, bstate, istates) + if bstate.content and style['page-break-after'] in PAGE_BREAKS: + bstate.pbreak = True + if isblock: + para = bstate.para + if para is not None and para.text == u'\xa0': + para.getparent().replace(para, etree.Element(XHTML('br'))) + bstate.para = None + bstate.istate = None + vmargin = asfloat(style['margin-bottom']) + bstate.vmargin = max((bstate.vmargin, vmargin)) + vpadding = asfloat(style['padding-bottom']) + if vpadding > 0: + bstate.vpadding += bstate.vmargin + bstate.vmargin = 0 + bstate.vpadding += vpadding + if tag in NESTABLE_TAGS and bstate.nested: + bstate.nested.pop() + istates.pop() diff --git a/src/calibre/ebooks/mobi/palmdoc.py b/src/calibre/ebooks/mobi/palmdoc.py index 30d0905973..eedab1c88f 100644 --- a/src/calibre/ebooks/mobi/palmdoc.py +++ b/src/calibre/ebooks/mobi/palmdoc.py @@ -2,7 +2,11 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' +__copyright__ = '2008, Kovid Goyal ' \ + 'and Marshall T. Vandegrift ' + +from cStringIO import StringIO +from struct import pack COUNT_BITS = 3 @@ -31,4 +35,54 @@ def decompress_doc(data): res.append(res[j - di+k]) return ''.join([chr(i) for i in res]) - \ No newline at end of file + +def compress_doc(data): + out = StringIO() + i = 0 + ldata = len(data) + while i < ldata: + if i > 10 and (ldata - i) > 10: + chunk = '' + match = -1 + for j in xrange(10, 2, -1): + chunk = data[i:i+j] + try: + match = data.rindex(chunk, 0, i) + except ValueError: + continue + if (i - match) <= 2047: + break + match = -1 + if match >= 0: + n = len(chunk) + m = i - match + code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3) + out.write(pack('>H', code)) + i += n + continue + ch = data[i] + och = ord(ch) + i += 1 + if ch == ' ' and (i + 1) < ldata: + onch = ord(data[i]) + if onch >= 0x40 and onch < 0x80: + out.write(pack('>B', onch ^ 0x80)) + i += 1 + continue + if och == 0 or (och > 8 and och < 0x80): + out.write(ch) + else: + j = i + binseq = [ch] + while j < ldata and len(binseq) < 8: + ch = data[j] + och = ord(ch) + if och == 0 or (och > 8 and och < 0x80): + break + binseq.append(ch) + j += 1 + out.write(pack('>B', len(binseq))) + out.write(''.join(binseq)) + i += len(binseq) - 1 + return out.getvalue() + diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py new file mode 100644 index 0000000000..a6dc18110c --- /dev/null +++ b/src/calibre/ebooks/mobi/writer.py @@ -0,0 +1,583 @@ +''' +Write content to Mobipocket books. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +from struct import pack +import functools +import time +import random +from cStringIO import StringIO +import re +from itertools import izip, count +from collections import defaultdict +from urlparse import urldefrag +import logging +from lxml import etree +from PIL import Image +from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ + OEB_RASTER_IMAGES +from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname +from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.profile import Context +from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener +from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer +from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer +from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder +from calibre.ebooks.oeb.transforms.manglecase import CaseMangler +from calibre.ebooks.mobi.palmdoc import compress_doc +from calibre.ebooks.mobi.langcodes import iana2mobi +from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer +from calibre.customize.ui import run_plugins_on_postprocess +from calibre.utils.config import OptionParser +from optparse import OptionGroup + +# TODO: +# - Allow override CSS (?) +# - Generate index records +# - Optionally rasterize tables + +EXTH_CODES = { + 'creator': 100, + 'publisher': 101, + 'description': 103, + 'identifier': 104, + 'subject': 105, + 'date': 106, + 'review': 107, + 'contributor': 108, + 'rights': 109, + 'type': 111, + 'source': 112, + 'title': 503, + } + +RECORD_SIZE = 0x1000 + +UNCOMPRESSED = 1 +PALMDOC = 2 +HUFFDIC = 17480 + +PALM_MAX_IMAGE_SIZE = 63 * 1024 +OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024 +MAX_THUMB_SIZE = 16 * 1024 +MAX_THUMB_DIMEN = (180, 240) + +def encode(data): + return data.encode('utf-8') + +# Almost like the one for MS LIT, but not quite. +DECINT_FORWARD = 0 +DECINT_BACKWARD = 1 +def decint(value, direction): + bytes = [] + while True: + b = value & 0x7f + value >>= 7 + bytes.append(b) + if value == 0: + break + if direction == DECINT_FORWARD: + bytes[0] |= 0x80 + elif direction == DECINT_BACKWARD: + bytes[-1] |= 0x80 + return ''.join(chr(b) for b in reversed(bytes)) + + +class Serializer(object): + NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} + + def __init__(self, oeb, images): + self.oeb = oeb + self.images = images + self.id_offsets = {} + self.href_offsets = defaultdict(list) + self.breaks = [] + buffer = self.buffer = StringIO() + buffer.write('') + self.serialize_head() + self.serialize_body() + buffer.write('') + self.fixup_links() + self.text = buffer.getvalue() + + def serialize_head(self): + buffer = self.buffer + buffer.write('') + if len(self.oeb.guide) > 0: + self.serialize_guide() + buffer.write('') + + def serialize_guide(self): + buffer = self.buffer + hrefs = self.oeb.manifest.hrefs + buffer.write('') + for ref in self.oeb.guide.values(): + path, frag = urldefrag(ref.href) + if hrefs[path].media_type not in OEB_DOCS: + continue + buffer.write('') + buffer.write('') + + def serialize_href(self, href, base=None): + hrefs = self.oeb.manifest.hrefs + path, frag = urldefrag(href) + if path and base: + path = base.abshref(path) + if path and path not in hrefs: + return False + buffer = self.buffer + item = hrefs[path] if path else None + if item and item.spine_position is None: + return False + id = item.id if item else base.id + href = '#'.join((id, frag)) if frag else id + buffer.write('filepos=') + self.href_offsets[href].append(buffer.tell()) + buffer.write('0000000000') + return True + + def serialize_body(self): + buffer = self.buffer + buffer.write('') + # CybookG3 'Start Reading' link + if 'text' in self.oeb.guide: + href = self.oeb.guide['text'].href + buffer.write('') + spine = [item for item in self.oeb.spine if item.linear] + spine.extend([item for item in self.oeb.spine if not item.linear]) + for item in spine: + self.serialize_item(item) + buffer.write('') + + def serialize_item(self, item): + buffer = self.buffer + if not item.linear: + self.breaks.append(buffer.tell() - 1) + self.id_offsets[item.id] = buffer.tell() + for elem in item.data.find(XHTML('body')): + self.serialize_elem(elem, item) + buffer.write('') + + def serialize_elem(self, elem, item, nsrmap=NSRMAP): + buffer = self.buffer + if not isinstance(elem.tag, basestring) \ + or namespace(elem.tag) not in nsrmap: + return + hrefs = self.oeb.manifest.hrefs + tag = prefixname(elem.tag, nsrmap) + for attr in ('name', 'id'): + if attr in elem.attrib: + id = '#'.join((item.id, elem.attrib[attr])) + self.id_offsets[id] = buffer.tell() + del elem.attrib[attr] + if tag == 'a' and not elem.attrib \ + and not len(elem) and not elem.text: + return + buffer.write('<') + buffer.write(tag) + if elem.attrib: + for attr, val in elem.attrib.items(): + if namespace(attr) not in nsrmap: + continue + attr = prefixname(attr, nsrmap) + buffer.write(' ') + if attr == 'href': + if self.serialize_href(val, item): + continue + elif attr == 'src': + href = item.abshref(val) + if href in hrefs: + index = self.images[href] + buffer.write('recindex="%05d"' % index) + continue + buffer.write(attr) + buffer.write('="') + self.serialize_text(val, quot=True) + buffer.write('"') + if elem.text or len(elem) > 0: + buffer.write('>') + if elem.text: + self.serialize_text(elem.text) + for child in elem: + self.serialize_elem(child, item) + if child.tail: + self.serialize_text(child.tail) + buffer.write('' % tag) + else: + buffer.write('/>') + + def serialize_text(self, text, quot=False): + text = text.replace('&', '&') + text = text.replace('<', '<') + text = text.replace('>', '>') + if quot: + text = text.replace('"', '"') + self.buffer.write(encode(text)) + + def fixup_links(self): + buffer = self.buffer + for id, hoffs in self.href_offsets.items(): + ioff = self.id_offsets[id] + for hoff in hoffs: + buffer.seek(hoff) + buffer.write('%010d' % ioff) + + +class MobiWriter(object): + COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') + + def __init__(self, compression=None, imagemax=None): + self._compression = compression or UNCOMPRESSED + self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE + + def dump(self, oeb, path): + if hasattr(path, 'write'): + return self._dump_stream(oeb, path) + with open(path, 'w+b') as stream: + return self._dump_stream(oeb, stream) + + def _write(self, *data): + for datum in data: + self._stream.write(datum) + + def _tell(self): + return self._stream.tell() + + def _dump_stream(self, oeb, stream): + self._oeb = oeb + self._stream = stream + self._records = [None] + self._generate_content() + self._generate_record0() + self._write_header() + self._write_content() + + def _generate_content(self): + self._map_image_names() + self._generate_text() + self._generate_images() + + def _map_image_names(self): + index = 1 + self._images = images = {} + for item in self._oeb.manifest.values(): + if item.media_type in OEB_RASTER_IMAGES: + images[item.href] = index + index += 1 + + def _read_text_record(self, text): + pos = text.tell() + text.seek(0, 2) + npos = min((pos + RECORD_SIZE, text.tell())) + last = '' + while not last.decode('utf-8', 'ignore'): + size = len(last) + 1 + text.seek(npos - size) + last = text.read(size) + extra = 0 + try: + last.decode('utf-8') + except UnicodeDecodeError: + prev = len(last) + while True: + text.seek(npos - prev) + last = text.read(len(last) + 1) + try: + last.decode('utf-8') + except UnicodeDecodeError: + pass + else: + break + extra = len(last) - prev + text.seek(pos) + data = text.read(RECORD_SIZE) + overlap = text.read(extra) + text.seek(npos) + return data, overlap + + def _generate_text(self): + self._oeb.logger.info('Serializing markup content...') + serializer = Serializer(self._oeb, self._images) + breaks = serializer.breaks + text = serializer.text + self._text_length = len(text) + text = StringIO(text) + nrecords = 0 + offset = 0 + if self._compression != UNCOMPRESSED: + self._oeb.logger.info('Compressing markup content...') + data, overlap = self._read_text_record(text) + while len(data) > 0: + if self._compression == PALMDOC: + data = compress_doc(data) + record = StringIO() + record.write(data) + record.write(overlap) + record.write(pack('>B', len(overlap))) + nextra = 0 + pbreak = 0 + running = offset + while breaks and (breaks[0] - offset) < RECORD_SIZE: + pbreak = (breaks.pop(0) - running) >> 3 + encoded = decint(pbreak, DECINT_FORWARD) + record.write(encoded) + running += pbreak << 3 + nextra += len(encoded) + lsize = 1 + while True: + size = decint(nextra + lsize, DECINT_BACKWARD) + if len(size) == lsize: + break + lsize += 1 + record.write(size) + self._records.append(record.getvalue()) + nrecords += 1 + offset += RECORD_SIZE + data, overlap = self._read_text_record(text) + self._text_nrecords = nrecords + + def _rescale_image(self, data, maxsizeb, dimen=None): + image = Image.open(StringIO(data)) + format = image.format + changed = False + if image.format not in ('JPEG', 'GIF'): + width, height = image.size + area = width * height + format = 'GIF' if area <= 40000 else 'JPEG' + changed = True + if dimen is not None: + image.thumbnail(dimen, Image.ANTIALIAS) + changed = True + if changed: + data = StringIO() + image.save(data, format) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + image = image.convert('RGBA') + for quality in xrange(95, -1, -1): + data = StringIO() + image.save(data, 'JPEG', quality=quality) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + width, height = image.size + for scale in xrange(99, 0, -1): + scale = scale / 100. + data = StringIO() + scaled = image.copy() + size = (int(width * scale), (height * scale)) + scaled.thumbnail(size, Image.ANTIALIAS) + scaled.save(data, 'JPEG', quality=0) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + # Well, we tried? + return data + + def _generate_images(self): + self._oeb.logger.warn('Serializing images...') + images = [(index, href) for href, index in self._images.items()] + images.sort() + metadata = self._oeb.metadata + coverid = metadata.cover[0] if metadata.cover else None + for _, href in images: + item = self._oeb.manifest.hrefs[href] + data = self._rescale_image(item.data, self._imagemax) + self._records.append(data) + + def _generate_record0(self): + metadata = self._oeb.metadata + exth = self._build_exth() + record0 = StringIO() + record0.write(pack('>HHIHHHH', self._compression, 0, + self._text_length, self._text_nrecords, RECORD_SIZE, 0, 0)) + uid = random.randint(0, 0xffffffff) + title = str(metadata.title[0]) + record0.write('MOBI') + record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 6)) + record0.write('\xff' * 40) + record0.write(pack('>I', self._text_nrecords + 1)) + record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title))) + record0.write(iana2mobi(str(metadata.language[0]))) + record0.write('\0' * 8) + record0.write(pack('>II', 6, self._text_nrecords + 1)) + record0.write('\0' * 16) + record0.write(pack('>I', 0x50)) + record0.write('\0' * 32) + record0.write(pack('>IIII', 0xffffffff, 0xffffffff, 0, 0)) + # The '5' is a bitmask of extra record data at the end: + # - 0x1: (?) + # - 0x4: + # Of course, the formats aren't quite the same. + # TODO: What the hell are the rest of these fields? + record0.write(pack('>IIIIIIIIIIIIIIIII', + 0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, + 0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 5, 0xffffffff)) + record0.write(exth) + record0.write(title) + record0 = record0.getvalue() + self._records[0] = record0 + ('\0' * (2452 - len(record0))) + + def _build_exth(self): + oeb = self._oeb + exth = StringIO() + nrecs = 0 + for term in oeb.metadata: + if term not in EXTH_CODES: continue + code = EXTH_CODES[term] + for item in oeb.metadata[term]: + data = self.COLLAPSE_RE.sub(' ', unicode(item)) + data = data.encode('utf-8') + exth.write(pack('>II', code, len(data) + 8)) + exth.write(data) + nrecs += 1 + if oeb.metadata.cover: + id = str(oeb.metadata.cover[0]) + item = oeb.manifest.ids[id] + href = item.href + index = self._images[href] - 1 + exth.write(pack('>III', 0xc9, 0x0c, index)) + exth.write(pack('>III', 0xcb, 0x0c, 0)) + index = self._add_thumbnail(item) - 1 + exth.write(pack('>III', 0xca, 0x0c, index)) + nrecs += 3 + exth = exth.getvalue() + trail = len(exth) % 4 + pad = '' if not trail else '\0' * (4 - trail) + exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] + return ''.join(exth) + + def _add_thumbnail(self, item): + data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) + manifest = self._oeb.manifest + id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') + manifest.add(id, href, 'image/jpeg', data=data) + index = len(self._images) + 1 + self._images[href] = index + self._records.append(data) + return index + + def _write_header(self): + title = str(self._oeb.metadata.title[0]) + title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32] + title = title + ('\0' * (32 - len(title))) + now = int(time.time()) + nrecords = len(self._records) + self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), + 'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords)) + offset = self._tell() + (8 * nrecords) + 2 + for id, record in izip(count(), self._records): + self._write(pack('>I', offset), '\0', pack('>I', id)[1:]) + offset += len(record) + self._write('\0\0') + + def _write_content(self): + for record in self._records: + self._write(record) + + +def add_mobi_options(parser): + profiles = Context.PROFILES.keys() + profiles.sort() + profiles = ', '.join(profiles) + group = OptionGroup(parser, _('Mobipocket'), + _('Mobipocket-specific options.')) + group.add_option( + '-c', '--compress', default=False, action='store_true', + help=_('Compress file text using PalmDOC compression.')) + group.add_option( + '-r', '--rescale-images', default=False, action='store_true', + help=_('Modify images to meet Palm device size limitations.')) + parser.add_option_group(group) + group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. ' + 'Affects conversion of default font sizes and rasterization ' + 'resolution. Valid profiles are: %s.') % profiles) + group.add_option( + '--source-profile', default='Browser', metavar='PROFILE', + help=_("Source renderer profile. Default is 'Browser'.")) + group.add_option( + '--dest-profile', default='CybookG3', metavar='PROFILE', + help=_("Destination renderer profile. Default is 'CybookG3'.")) + parser.add_option_group(group) + return + +def option_parser(): + parser = OptionParser(usage=_('%prog [options] OPFFILE')) + parser.add_option( + '-o', '--output', default=None, + help=_('Output file. Default is derived from input filename.')) + parser.add_option( + '-v', '--verbose', default=0, action='count', + help=_('Useful for debugging.')) + add_mobi_options(parser) + return parser + +def oeb2mobi(opts, inpath): + logger = Logger(logging.getLogger('oeb2mobi')) + logger.setup_cli_handler(opts.verbose) + outpath = opts.output + if outpath is None: + outpath = os.path.basename(inpath) + outpath = os.path.splitext(outpath)[0] + '.mobi' + source = opts.source_profile + if source not in Context.PROFILES: + logger.error(_('Unknown source profile %r') % source) + return 1 + dest = opts.dest_profile + if dest not in Context.PROFILES: + logger.error(_('Unknown destination profile %r') % dest) + return 1 + compression = PALMDOC if opts.compress else UNCOMPRESSED + imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None + context = Context(source, dest) + oeb = OEBBook(inpath, logger=logger) + tocadder = HTMLTOCAdder() + tocadder.transform(oeb, context) + mangler = CaseMangler() + mangler.transform(oeb, context) + fbase = context.dest.fbase + fkey = context.dest.fnums.values() + flattener = CSSFlattener( + fbase=fbase, fkey=fkey, unfloat=True, untable=True) + flattener.transform(oeb, context) + rasterizer = SVGRasterizer() + rasterizer.transform(oeb, context) + trimmer = ManifestTrimmer() + trimmer.transform(oeb, context) + mobimlizer = MobiMLizer() + mobimlizer.transform(oeb, context) + writer = MobiWriter(compression=compression, imagemax=imagemax) + writer.dump(oeb, outpath) + run_plugins_on_postprocess(outpath, 'mobi') + logger.info(_('Output written to ') + outpath) + +def main(argv=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(argv[1:]) + if len(args) != 1: + parser.print_help() + return 1 + inpath = args[0] + retval = oeb2mobi(opts, inpath) + return retval + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/oeb/__init__.py b/src/calibre/ebooks/oeb/__init__.py new file mode 100644 index 0000000000..4f8588535f --- /dev/null +++ b/src/calibre/ebooks/oeb/__init__.py @@ -0,0 +1,2 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/oeb/base.py similarity index 64% rename from src/calibre/ebooks/lit/oeb.py rename to src/calibre/ebooks/oeb/base.py index eb36f0c2cb..4248657e23 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/oeb/base.py @@ -14,10 +14,14 @@ from itertools import izip, count from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote import logging +import re +import htmlentitydefs +import uuid +import copy from lxml import etree from calibre import LoggingInterface -XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False) +XML_PARSER = etree.XMLParser(recover=True) XML_NS = 'http://www.w3.org/XML/1998/namespace' XHTML_NS = 'http://www.w3.org/1999/xhtml' OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/' @@ -28,25 +32,48 @@ DC11_NS = 'http://purl.org/dc/elements/1.1/' XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance' DCTERMS_NS = 'http://purl.org/dc/terms/' NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/' +SVG_NS = 'http://www.w3.org/2000/svg' +XLINK_NS = 'http://www.w3.org/1999/xlink' XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS, 'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS, - 'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS} + 'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS, + 'svg': SVG_NS, 'xl': XLINK_NS} def XML(name): return '{%s}%s' % (XML_NS, name) def XHTML(name): return '{%s}%s' % (XHTML_NS, name) def OPF(name): return '{%s}%s' % (OPF2_NS, name) def DC(name): return '{%s}%s' % (DC11_NS, name) def NCX(name): return '{%s}%s' % (NCX_NS, name) +def SVG(name): return '{%s}%s' % (SVG_NS, name) +def XLINK(name): return '{%s}%s' % (XLINK_NS, name) +EPUB_MIME = 'application/epub+zip' XHTML_MIME = 'application/xhtml+xml' CSS_MIME = 'text/css' NCX_MIME = 'application/x-dtbncx+xml' OPF_MIME = 'application/oebps-package+xml' OEB_DOC_MIME = 'text/x-oeb1-document' OEB_CSS_MIME = 'text/x-oeb1-css' +OPENTYPE_MIME = 'font/opentype' +GIF_MIME = 'image/gif' +JPEG_MIME = 'image/jpeg' +PNG_MIME = 'image/png' +SVG_MIME = 'image/svg+xml' OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css']) OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document']) +OEB_RASTER_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME]) +OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME]) + +MS_COVER_TYPE = 'other.ms-coverimage-standard' + +recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace') +ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items()) +del ENTITYDEFS['lt'] +del ENTITYDEFS['gt'] +del ENTITYDEFS['quot'] +del ENTITYDEFS['amp'] +del recode def element(parent, *args, **kwargs): @@ -64,10 +91,23 @@ def barename(name): return name.split('}', 1)[1] return name +def prefixname(name, nsrmap): + prefix = nsrmap[namespace(name)] + if not prefix: + return barename(name) + return ':'.join((prefix, barename(name))) + def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) -URL_UNSAFE = r"""`!@#$%^&*[](){}?+=;:'",<>\| """ +def xml2str(root): + return etree.tostring(root, encoding='utf-8', xml_declaration=True) + +ASCII_CHARS = set(chr(x) for x in xrange(128)) +URL_SAFE = set(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + u'abcdefghijklmnopqrstuvwxyz' + u'0123456789' u'_.-/~') +URL_UNSAFE = ASCII_CHARS - URL_SAFE def urlquote(href): result = [] for char in href: @@ -84,12 +124,20 @@ def urlnormalize(href): return urlunparse(parts) +class OEBError(Exception): + pass + + class FauxLogger(object): def __getattr__(self, name): return self def __call__(self, message): print message +class Logger(LoggingInterface, object): + def __getattr__(self, name): + return object.__getattribute__(self, 'log_' + name) + class AbstractContainer(object): def read_xml(self, path): @@ -108,25 +156,45 @@ class DirContainer(AbstractContainer): def write(self, path, data): path = os.path.join(self.rootdir, path) + dir = os.path.dirname(path) + if not os.path.isdir(dir): + os.makedirs(dir) with open(urlunquote(path), 'wb') as f: return f.write(data) def exists(self, path): path = os.path.join(self.rootdir, path) - return os.path.isfile(path) + return os.path.isfile(urlunquote(path)) + +class DirWriter(object): + def __init__(self, version=2.0): + self.version = version + + def dump(self, oeb, path): + if not os.path.isdir(path): + os.mkdir(path) + output = DirContainer(path) + for item in oeb.manifest.values(): + output.write(item.href, str(item)) + metadata = oeb.to_opf2() if self.version == 2 else oeb.to_opf1() + for href, data in metadata.values(): + output.write(href, xml2str(data)) + return class Metadata(object): TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']) + ATTRS = set(['role', 'file-as', 'scheme']) OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS} OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS, 'xsi': XSI_NS} class Item(object): - def __init__(self, term, value, fq_attrib={}): - self.fq_attrib = dict(fq_attrib) + def __init__(self, term, value, fq_attrib={}, **kwargs): + self.fq_attrib = fq_attrib = dict(fq_attrib) + fq_attrib.update(kwargs) if term == OPF('meta') and not value: term = self.fq_attrib.pop('name') value = self.fq_attrib.pop('content') @@ -136,7 +204,12 @@ class Metadata(object): self.value = value self.attrib = attrib = {} for fq_attr in fq_attrib: - attr = barename(fq_attr) + if fq_attr in Metadata.ATTRS: + attr = fq_attr + fq_attr = OPF2(fq_attr) + fq_attrib[fq_attr] = fq_attrib.pop(attr) + else: + attr = barename(fq_attr) attrib[attr] = fq_attrib[fq_attr] def __getattr__(self, name): @@ -153,7 +226,7 @@ class Metadata(object): % (barename(self.term), self.value, self.attrib) def __str__(self): - return str(self.value) + return unicode(self.value).encode('ascii', 'xmlcharrefreplace') def __unicode__(self): return unicode(self.value) @@ -183,8 +256,8 @@ class Metadata(object): self.oeb = oeb self.items = defaultdict(list) - def add(self, term, value, attrib={}): - item = self.Item(term, value, attrib) + def add(self, term, value, attrib={}, **kwargs): + item = self.Item(term, value, attrib, **kwargs) items = self.items[barename(item.term)] items.append(item) return item @@ -225,7 +298,11 @@ class Metadata(object): class Manifest(object): class Item(object): - def __init__(self, id, href, media_type, fallback=None, loader=str): + ENTITY_RE = re.compile(r'&([a-zA-Z_:][a-zA-Z0-9.-_:]+);') + NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)') + + def __init__(self, id, href, media_type, + fallback=None, loader=str, data=None): self.id = id self.href = self.path = urlnormalize(href) self.media_type = media_type @@ -233,26 +310,32 @@ class Manifest(object): self.spine_position = None self.linear = True self._loader = loader - self._data = None + self._data = data def __repr__(self): return 'Item(id=%r, href=%r, media_type=%r)' \ % (self.id, self.href, self.media_type) + def _force_xhtml(self, data): + repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0)) + data = self.ENTITY_RE.sub(repl, data) + data = etree.fromstring(data, parser=XML_PARSER) + if namespace(data.tag) != XHTML_NS: + data.attrib['xmlns'] = XHTML_NS + data = etree.tostring(data) + data = etree.fromstring(data, parser=XML_PARSER) + return data + def data(): def fget(self): - if self._data: + if self._data is not None: return self._data data = self._loader(self.href) - if self.media_type == XHTML_MIME: - data = etree.fromstring(data, parser=XML_PARSER) - if namespace(data.tag) != XHTML_NS: - data.attrib['xmlns'] = XHTML_NS - data = etree.tostring(data) - data = etree.fromstring(data, parser=XML_PARSER) - elif self.media_type.startswith('application/') \ - and self.media_type.endswith('+xml'): + if self.media_type in OEB_DOCS: + data = self._force_xhtml(data) + elif self.media_type[-4:] in ('+xml', '/xml'): data = etree.fromstring(data, parser=XML_PARSER) + self._data = data return data def fset(self, value): self._data = value @@ -260,13 +343,49 @@ class Manifest(object): self._data = None return property(fget, fset, fdel) data = data() - + + def __str__(self): + data = self.data + if isinstance(data, etree._Element): + return xml2str(data) + return str(data) + + def __eq__(self, other): + return id(self) == id(other) + + def __ne__(self, other): + return not self.__eq__(other) + def __cmp__(self, other): result = cmp(self.spine_position, other.spine_position) if result != 0: return result - return cmp(self.id, other.id) + smatch = self.NUM_RE.search(self.href) + sref = smatch.group(1) if smatch else self.href + snum = float(smatch.group(2)) if smatch else 0.0 + skey = (sref, snum, self.id) + omatch = self.NUM_RE.search(other.href) + oref = omatch.group(1) if omatch else other.href + onum = float(omatch.group(2)) if omatch else 0.0 + okey = (oref, onum, other.id) + return cmp(skey, okey) + def relhref(self, href): + if '/' not in self.href: + return href + base = os.path.dirname(self.href).split('/') + target, frag = urldefrag(href) + target = target.split('/') + for index in xrange(min(len(base), len(target))): + if base[index] != target[index]: break + else: + index += 1 + relhref = (['..'] * (len(base) - index)) + target[index:] + relhref = '/'.join(relhref) + if frag: + relhref = '#'.join((relhref, frag)) + return relhref + def abshref(self, href): if '/' not in self.href: return href @@ -277,42 +396,60 @@ class Manifest(object): def __init__(self, oeb): self.oeb = oeb - self.items = {} + self.ids = {} self.hrefs = {} - def add(self, id, href, media_type, fallback=None): + def add(self, id, href, media_type, fallback=None, loader=None, data=None): + loader = loader or self.oeb.container.read item = self.Item( - id, href, media_type, fallback, self.oeb.container.read) - self.items[item.id] = item + id, href, media_type, fallback, loader, data) + self.ids[item.id] = item self.hrefs[item.href] = item return item - def remove(self, id): - href = self.items[id].href - del self.items[id] - del self.hrefs[href] + def remove(self, item): + if item in self.ids: + item = self.ids[item] + del self.ids[item.id] + del self.hrefs[item.href] + if item in self.oeb.spine: + self.oeb.spine.remove(item) + + def generate(self, id, href): + href = urlnormalize(href) + base = id + index = 1 + while id in self.ids: + id = base + str(index) + index += 1 + base, ext = os.path.splitext(href) + index = 1 + while href in self.hrefs: + href = base + str(index) + ext + index += 1 + return id, href def __iter__(self): - for id in self.items: + for id in self.ids: yield id def __getitem__(self, id): - return self.items[id] + return self.ids[id] def values(self): - for item in self.items.values(): + for item in self.ids.values(): yield item def items(self): - for id, item in self.refs.items(): - yield id, items + for id, item in self.ids.items(): + yield id, item def __contains__(self, key): - return key in self.items + return key in self.ids def to_opf1(self, parent=None): elem = element(parent, 'manifest') - for item in self.items.values(): + for item in self.ids.values(): media_type = item.media_type if media_type == XHTML_MIME: media_type = OEB_DOC_MIME @@ -327,7 +464,7 @@ class Manifest(object): def to_opf2(self, parent=None): elem = element(parent, OPF('manifest')) - for item in self.items.values(): + for item in self.ids.values(): attrib = {'id': item.id, 'href': item.href, 'media-type': item.media_type} if item.fallback: @@ -341,18 +478,35 @@ class Spine(object): self.oeb = oeb self.items = [] - def add(self, item, linear): + def _linear(self, linear): if isinstance(linear, StringTypes): linear = linear.lower() if linear is None or linear in ('yes', 'true'): linear = True elif linear in ('no', 'false'): linear = False - item.linear = linear + return linear + + def add(self, item, linear=None): + item.linear = self._linear(linear) item.spine_position = len(self.items) self.items.append(item) return item + def insert(self, index, item, linear): + item.linear = self._linear(linear) + item.spine_position = index + self.items.insert(index, item) + for i in xrange(index, len(self.items)): + self.items[i].spine_position = i + return item + + def remove(self, item): + index = item.spine_position + self.items.pop(index) + for i in xrange(index, len(self.items)): + self.items[i].spine_position = i + def __iter__(self): for item in self.items: yield item @@ -385,46 +539,81 @@ class Spine(object): class Guide(object): class Reference(object): + _TYPES_TITLES = [('cover', 'Cover'), ('title-page', 'Title Page'), + ('toc', 'Table of Contents'), ('index', 'Index'), + ('glossary', 'Glossary'), ('acknowledgements', 'Acknowledgements'), + ('bibliography', 'Bibliography'), ('colophon', 'Colophon'), + ('copyright-page', 'Copyright'), ('dedication', 'Dedication'), + ('epigraph', 'Epigraph'), ('foreword', 'Foreword'), + ('loi', 'List of Illustrations'), ('lot', 'List of Tables'), + ('notes', 'Notes'), ('preface', 'Preface'), + ('text', 'Main Text')] + TYPES = set(t for t, _ in _TYPES_TITLES) + TITLES = dict(_TYPES_TITLES) + ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0))) + def __init__(self, type, title, href): + if type.lower() in self.TYPES: + type = type.lower() + elif type not in self.TYPES and \ + not type.startswith('other.'): + type = 'other.' + type + if not title: + title = self.TITLES.get(type, None) self.type = type self.title = title self.href = urlnormalize(href) - + def __repr__(self): return 'Reference(type=%r, title=%r, href=%r)' \ % (self.type, self.title, self.href) + + def _order(): + def fget(self): + return self.ORDER.get(self.type, self.type) + return property(fget=fget) + _order = _order() + + def __cmp__(self, other): + if not isinstance(other, Guide.Reference): + return NotImplemented + return cmp(self._order, other._order) def __init__(self, oeb): self.oeb = oeb self.refs = {} - + def add(self, type, title, href): ref = self.Reference(type, title, href) self.refs[type] = ref return ref - - def by_type(self, type): - return self.ref_types[type] - + def iterkeys(self): for type in self.refs: yield type __iter__ = iterkeys - + def values(self): - for ref in self.refs.values(): - yield ref - + values = list(self.refs.values()) + values.sort() + return values + def items(self): for type, ref in self.refs.items(): yield type, ref - def __getitem__(self, index): - return self.refs[index] - + def __getitem__(self, key): + return self.refs[key] + + def __delitem__(self, key): + del self.refs[key] + def __contains__(self, key): return key in self.refs - + + def __len__(self): + return len(self.refs) + def to_opf1(self, parent=None): elem = element(parent, 'guide') for ref in self.refs.values(): @@ -456,6 +645,12 @@ class TOC(object): node = TOC(title, href, klass, id) self.nodes.append(node) return node + + def iterdescendants(self): + for node in self.nodes: + yield node + for child in node.iterdescendants(): + yield child def __iter__(self): for node in self.nodes: @@ -463,6 +658,15 @@ class TOC(object): def __getitem__(self, index): return self.nodes[index] + + def autolayer(self): + prev = None + for node in list(self.nodes): + if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]: + self.nodes.remove(node) + prev.nodes.append(node) + else: + prev = node def depth(self, level=0): if self.nodes: @@ -496,23 +700,33 @@ class TOC(object): class OEBBook(object): - def __init__(self, opfpath, container=None, logger=FauxLogger()): - if not container: + def __init__(self, opfpath=None, container=None, logger=FauxLogger()): + if opfpath and not container: container = DirContainer(os.path.dirname(opfpath)) opfpath = os.path.basename(opfpath) self.container = container self.logger = logger - opf = self._read_opf(opfpath) - self._all_from_opf(opf) + if opfpath or container: + opf = self._read_opf(opfpath) + self._all_from_opf(opf) def _convert_opf1(self, opf): + # Seriously, seriously wrong + if namespace(opf.tag) == OPF1_NS: + opf.tag = barename(opf.tag) + for elem in opf.iterdescendants(): + if isinstance(elem.tag, basestring) \ + and namespace(elem.tag) == OPF1_NS: + elem.tag = barename(elem.tag) + attrib = dict(opf.attrib) + attrib['version'] = '2.0' nroot = etree.Element(OPF('package'), - nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib)) + nsmap={None: OPF2_NS}, attrib=attrib) metadata = etree.SubElement(nroot, OPF('metadata'), nsmap={'opf': OPF2_NS, 'dc': DC11_NS, 'xsi': XSI_NS, 'dcterms': DCTERMS_NS}) for prefix in ('d11', 'd10', 'd09'): - elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix) + elements = xpath(opf, 'metadata//%s:*' % prefix) if elements: break for element in elements: if not element.text: continue @@ -524,7 +738,7 @@ class OEBBook(object): element.attrib[nsname] = element.attrib[name] del element.attrib[name] metadata.append(element) - for element in opf.xpath('metadata/x-metadata/meta'): + for element in opf.xpath('metadata//meta'): metadata.append(element) for item in opf.xpath('manifest/item'): media_type = item.attrib['media-type'].lower() @@ -541,30 +755,56 @@ class OEBBook(object): def _read_opf(self, opfpath): opf = self.container.read_xml(opfpath) version = float(opf.get('version', 1.0)) - if version < 2.0: + ns = namespace(opf.tag) + if ns not in ('', OPF1_NS, OPF2_NS): + raise OEBError('Invalid namespace %r for OPF document' % ns) + if ns != OPF2_NS or version < 2.0: opf = self._convert_opf1(opf) return opf def _metadata_from_opf(self, opf): - uid = opf.attrib['unique-identifier'] - self.metadata = metadata = Metadata(self) - for elem in xpath(opf, '/o2:package/o2:metadata/*'): - if elem.text or elem.attrib: + uid = opf.get('unique-identifier', 'calibre-uuid') + self.uid = None + self.metadata = metadata = Metadata(self) + ignored = (OPF('dc-metadata'), OPF('x-metadata')) + for elem in xpath(opf, '/o2:package/o2:metadata//*'): + if elem.tag not in ignored and (elem.text or elem.attrib): metadata.add(elem.tag, elem.text, elem.attrib) + haveuuid = haveid = False + for ident in metadata.identifier: + if unicode(ident).startswith('urn:uuid:'): + haveuuid = True + if 'id' in ident.attrib: + haveid = True + if not haveuuid and haveid: + bookid = "urn:uuid:%s" % str(uuid.uuid4()) + metadata.add('identifier', bookid, id='calibre-uuid') for item in metadata.identifier: if item.id == uid: self.uid = item break else: - self.logger.log_warn(u'Unique-identifier %r not found.' % uid) - self.uid = metadata.identifier[0] + self.logger.warn(u'Unique-identifier %r not found.' % uid) + for ident in metadata.identifier: + if 'id' in ident.attrib: + self.uid = metadata.identifier[0] + break + if not metadata.language: + self.logger.warn(u'Language not specified.') + metadata.add('language', 'en') + if not metadata.creator: + self.logger.warn(u'Creator not specified.') + metadata.add('creator', 'Unknown') + if not metadata.title: + self.logger.warn(u'Title not specified.') + metadata.add('title', 'Unknown') def _manifest_from_opf(self, opf): self.manifest = manifest = Manifest(self) for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): href = elem.get('href') if not self.container.exists(href): - self.logger.log_warn(u'Manifest item %r not found.' % href) + self.logger.warn(u'Manifest item %r not found.' % href) continue manifest.add(elem.get('id'), href, elem.get('media-type'), elem.get('fallback')) @@ -574,7 +814,7 @@ class OEBBook(object): for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): idref = elem.get('idref') if idref not in self.manifest: - self.logger.log_warn(u'Spine item %r not found.' % idref) + self.logger.warn(u'Spine item %r not found.' % idref) continue item = self.manifest[idref] spine.add(item, elem.get('linear')) @@ -593,7 +833,7 @@ class OEBBook(object): href = elem.get('href') path, frag = urldefrag(href) if path not in self.manifest.hrefs: - self.logger.log_warn(u'Guide reference %r not found' % href) + self.logger.warn(u'Guide reference %r not found' % href) continue guide.add(elem.get('type'), elem.get('title'), href) @@ -695,6 +935,32 @@ class OEBBook(object): if self._toc_from_tour(opf): return if self._toc_from_html(opf): return self._toc_from_spine(opf) + + def _ensure_cover_image(self): + cover = None + spine0 = self.spine[0] + html = spine0.data + if self.metadata.cover: + id = str(self.metadata.cover[0]) + cover = self.manifest.ids[id] + elif MS_COVER_TYPE in self.guide: + href = self.guide[MS_COVER_TYPE].href + cover = self.manifest.hrefs[href] + elif xpath(html, '//h:img[position()=1]'): + img = xpath(html, '//h:img[position()=1]')[0] + href = spine0.abshref(img.get('src')) + cover = self.manifest.hrefs[href] + elif xpath(html, '//h:object[position()=1]'): + object = xpath(html, '//h:object[position()=1]')[0] + href = spine0.abshref(object.get('data')) + cover = self.manifest.hrefs[href] + elif xpath(html, '//svg:svg[position()=1]'): + svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0]) + href = os.path.splitext(spine0.href)[0] + '.svg' + id, href = self.manifest.generate(spine0.id, href) + cover = self.manifest.add(id, href, SVG_MIME, data=svg) + if cover and not self.metadata.cover: + self.metadata.add('cover', cover.id) def _all_from_opf(self, opf): self._metadata_from_opf(opf) @@ -702,6 +968,7 @@ class OEBBook(object): self._spine_from_opf(opf) self._guide_from_opf(opf) self._toc_from_opf(opf) + self._ensure_cover_image() def to_opf1(self): package = etree.Element('package', diff --git a/src/calibre/ebooks/lit/html.css b/src/calibre/ebooks/oeb/html.css similarity index 86% rename from src/calibre/ebooks/lit/html.css rename to src/calibre/ebooks/oeb/html.css index 9401b19cf2..63d57a3e29 100644 --- a/src/calibre/ebooks/lit/html.css +++ b/src/calibre/ebooks/oeb/html.css @@ -35,7 +35,8 @@ * * ***** END LICENSE BLOCK ***** */ -@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */ +@namespace url(http://www.w3.org/1999/xhtml); +@namespace svg url(http://www.w3.org/2000/svg); /* blocks */ @@ -45,7 +46,6 @@ html, div, map, dt, isindex, form { body { display: block; - margin: 8px; } p, dl, multicol { @@ -59,7 +59,7 @@ dd { blockquote { display: block; - margin: 1em 40px; + margin: 1em; } address { @@ -74,7 +74,7 @@ center { blockquote[type=cite] { display: block; - margin: 1em 0px; + margin: 1em 0em; border-color: blue; border-width: thin; } @@ -234,14 +234,6 @@ th { /* inlines */ -q:before { - content: open-quote; -} - -q:after { - content: close-quote; -} - b, strong { font-weight: bolder; } @@ -392,22 +384,6 @@ spacer { float: none ! important; } -/* focusable content: anything w/ tabindex >=0 is focusable */ -abbr:focus, acronym:focus, address:focus, applet:focus, b:focus, -base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus, -center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus, -del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus, -fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus, -h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus, -kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus, -object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus, -small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus, -table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus, -tr:focus, tt:focus, u:focus, ul:focus, var:focus { - /* Don't specify the outline-color, we should always use initial value. */ - outline: 1px dotted; -} - /* hidden elements */ area, base, basefont, head, meta, script, style, title, noembed, param, link { @@ -424,3 +400,8 @@ br { display: block; } +/* Images, embedded object, and SVG size defaults */ +img, object, svg|svg { + width: auto; + height: auto; +} diff --git a/src/calibre/ebooks/oeb/profile.py b/src/calibre/ebooks/oeb/profile.py new file mode 100644 index 0000000000..17408fac78 --- /dev/null +++ b/src/calibre/ebooks/oeb/profile.py @@ -0,0 +1,75 @@ +''' +Device profiles. +''' + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +from itertools import izip + +FONT_SIZES = [('xx-small', 1), + ('x-small', None), + ('small', 2), + ('medium', 3), + ('large', 4), + ('x-large', 5), + ('xx-large', 6), + (None, 7)] + + +class Profile(object): + def __init__(self, width, height, dpi, fbase, fsizes): + self.width = (float(width) / dpi) * 72. + self.height = (float(height) / dpi) * 72. + self.dpi = float(dpi) + self.fbase = float(fbase) + self.fsizes = [] + for (name, num), size in izip(FONT_SIZES, fsizes): + self.fsizes.append((name, num, float(size))) + self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name) + self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num) + + +PROFILES = { + 'PRS505': + Profile(width=584, height=754, dpi=168.451, fbase=12, + fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]), + + 'MSReader': + Profile(width=480, height=652, dpi=96, fbase=13, + fsizes=[10, 11, 13, 16, 18, 20, 22, 26]), + + # Not really, but let's pretend + 'Mobipocket': + Profile(width=600, height=800, dpi=96, fbase=18, + fsizes=[14, 14, 16, 18, 20, 22, 24, 26]), + + # No clue on usable screen size; DPI should be good + 'HanlinV3': + Profile(width=584, height=754, dpi=168.451, fbase=16, + fsizes=[12, 12, 14, 16, 18, 20, 22, 24]), + + 'CybookG3': + Profile(width=600, height=800, dpi=168.451, fbase=16, + fsizes=[12, 12, 14, 16, 18, 20, 22, 24]), + + 'Kindle': + Profile(width=525, height=640, dpi=168.451, fbase=16, + fsizes=[12, 12, 14, 16, 18, 20, 22, 24]), + + 'Browser': + Profile(width=800, height=600, dpi=100.0, fbase=12, + fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24]) + } + + +class Context(object): + PROFILES = PROFILES + + def __init__(self, source, dest): + if source in PROFILES: + source = PROFILES[source] + if dest in PROFILES: + dest = PROFILES[dest] + self.source = source + self.dest = dest diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py similarity index 65% rename from src/calibre/ebooks/lit/stylizer.py rename to src/calibre/ebooks/oeb/stylizer.py index 7a89474d89..8668d89975 100644 --- a/src/calibre/ebooks/lit/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -16,16 +16,20 @@ import itertools import types import re import copy +from itertools import izip import cssutils from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \ CSSValueList, cssproperties from lxml import etree -from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES -from calibre.ebooks.lit.oeb import barename, urlnormalize +from lxml.cssselect import css_to_xpath, ExpressionError +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES +from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize +from calibre.ebooks.oeb.profile import PROFILES from calibre.resources import html_css +XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS HTML_CSS_STYLESHEET = cssutils.parseString(html_css) -XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n" +HTML_CSS_STYLESHEET.namespaces['h'] = XHTML_NS INHERITED = set(['azimuth', 'border-collapse', 'border-spacing', 'caption-side', 'color', 'cursor', 'direction', 'elevation', @@ -72,7 +76,7 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll', '50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once', 'speak-numeral': 'continuous', 'speak-punctuation': 'none', 'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto', - 'text-align': 'left', 'text-decoration': 'none', 'text-indent': + 'text-align': 'auto', 'text-decoration': 'none', 'text-indent': 0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi': 'normal', 'vertical-align': 'baseline', 'visibility': 'visible', 'voice-family': 'default', 'volume': 'medium', 'white-space': @@ -82,42 +86,30 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll', FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large']) -FONT_SIZE_LIST = [('xx-small', 1, 6.), - ('x-small', None, 7.), - ('small', 2, 8.), - ('medium', 3, 9.), - ('large', 4, 11.), - ('x-large', 5, 13.), - ('xx-large', 6, 15.), - (None, 7, 17.)] - -FONT_SIZE_BY_NAME = {} -FONT_SIZE_BY_NUM = {} -for name, num, size in FONT_SIZE_LIST: - FONT_SIZE_BY_NAME[name] = size - FONT_SIZE_BY_NUM[num] = size - -XPNSMAP = {'h': XHTML_NS,} -def xpath(elem, expr): - return elem.xpath(expr, namespaces=XPNSMAP) - - -class Page(object): - def __init__(self, width, height, dpi): - self.width = float(width) - self.height = float(height) - self.dpi = float(dpi) - -class Profiles(object): - PRS500 = Page(584, 754, 168.451) - PRS505 = PRS500 +class CSSSelector(etree.XPath): + MIN_SPACE_RE = re.compile(r' *([>~+]) *') + LOCAL_NAME_RE = re.compile(r"(?' % ( + self.__class__.__name__, + hex(abs(id(self)))[2:], + self.css) + + class Stylizer(object): STYLESHEETS = {} - def __init__(self, tree, path, oeb, page=Profiles.PRS505): - self.page = page + def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']): + self.profile = profile base = os.path.dirname(path) basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' @@ -126,12 +118,13 @@ class Stylizer(object): parser = cssutils.CSSParser() parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path))) for elem in head: - tag = barename(elem.tag) - if tag == 'style': - text = ''.join(elem.text) + if elem.tag == XHTML('style') and elem.text \ + and elem.get('type', CSS_MIME) in OEB_STYLES: + text = XHTML_CSS_NAMESPACE + elem.text stylesheet = parser.parseString(text, href=cssname) + stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) - elif tag == 'link' \ + elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet') == 'stylesheet' \ and elem.get('type', CSS_MIME) in OEB_STYLES: href = urlnormalize(elem.attrib['href']) @@ -143,11 +136,13 @@ class Stylizer(object): data = XHTML_CSS_NAMESPACE data += oeb.manifest.hrefs[path].data stylesheet = parser.parseString(data, href=path) + stylesheet.namespaces['h'] = XHTML_NS self.STYLESHEETS[path] = stylesheet stylesheets.append(stylesheet) rules = [] index = 0 self.stylesheets = set() + self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) @@ -157,7 +152,16 @@ class Stylizer(object): rules.sort() self.rules = rules self._styles = {} - + for _, _, cssdict, text, _ in rules: + try: + selector = CSSSelector(text) + except ExpressionError, e: + continue + for elem in selector(tree): + self.style(elem)._update_cssdict(cssdict) + for elem in xpath(tree, '//h:*[@style]'): + self.style(elem)._apply_style_attr() + def flatten_rule(self, rule, href, index): results = [] if isinstance(rule, CSSStyleRule): @@ -169,9 +173,9 @@ class Stylizer(object): results.append((specificity, selector, style, text, href)) elif isinstance(rule, CSSPageRule): style = self.flatten_style(rule.style) - results.append(((0, 0, 0, 0), [], style, '@page', href)) + self.page_rule.update(style) return results - + def flatten_style(self, cssstyle): style = {} for prop in cssstyle: @@ -186,7 +190,7 @@ class Stylizer(object): size = style['font-size'] if size == 'normal': size = 'medium' if size in FONT_SIZE_NAMES: - style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size] + style['font-size'] = "%dpt" % self.profile.fnames[size] return style def _normalize_edge(self, cssvalue, name): @@ -233,9 +237,10 @@ class Stylizer(object): return style def style(self, element): - try: return self._styles[element] - except: pass - return Style(element, self) + try: + return self._styles[element] + except KeyError: + return Style(element, self) def stylesheet(self, name, font_scale=None): rules = [] @@ -250,86 +255,43 @@ class Stylizer(object): rules.append('%s {\n %s;\n}' % (selector, style)) return '\n'.join(rules) + class Style(object): def __init__(self, element, stylizer): self._element = element - self._page = stylizer.page + self._profile = stylizer.profile self._stylizer = stylizer - self._style = self._assemble_style(element, stylizer) + self._style = {} + self._fontSize = None + self._width = None + self._height = None + self._lineHeight = None stylizer._styles[element] = self + + def _update_cssdict(self, cssdict): + self._style.update(cssdict) - def _assemble_style(self, element, stylizer): - result = {} - rules = stylizer.rules - for _, selector, style, _, _ in rules: - if self._selects_element(element, selector): - result.update(style) - try: - style = CSSStyleDeclaration(element.attrib['style']) - result.update(stylizer.flatten_style(style)) - except KeyError: - pass - return result - - def _selects_element(self, element, selector): - def _selects_element(element, items, index): - if index == -1: - return True - item = items[index] - if item.type == 'universal': - pass - elif item.type == 'type-selector': - name1 = ("{%s}%s" % item.value).lower() - name2 = element.tag.lower() - if name1 != name2: - return False - elif item.type == 'id': - name1 = item.value[1:] - name2 = element.get('id', '') - if name1 != name2: - return False - elif item.type == 'class': - name = item.value[1:].lower() - classes = element.get('class', '').lower().split() - if name not in classes: - return False - elif item.type == 'child': - parent = element.getparent() - if parent is None: - return False - element = parent - elif item.type == 'descendant': - element = element.getparent() - while element is not None: - if _selects_element(element, items, index - 1): - return True - element = element.getparent() - return False - elif item.type == 'pseudo-class': - if item.value == ':first-child': - e = element.getprevious() - if e is not None: - return False - else: - return False - elif item.type == 'pseudo-element': - return False - else: - return False - return _selects_element(element, items, index - 1) - return _selects_element(element, selector, len(selector) - 1) + def _apply_style_attr(self): + attrib = self._element.attrib + if 'style' in attrib: + style = CSSStyleDeclaration(attrib['style']) + self._style.update(self._stylizer.flatten_style(style)) def _has_parent(self): - parent = self._element.getparent() - return (parent is not None) \ - and (parent in self._stylizer._styles) - + return (self._element.getparent() is not None) + + def _get_parent(self): + elem = self._element.getparent() + if elem is None: + return None + return self._stylizer.style(elem) + def __getitem__(self, name): domname = cssproperties._toDOMname(name) if hasattr(self, domname): return getattr(self, domname) return self._unit_convert(self._get(name)) - + def _get(self, name): result = None if name in self._style: @@ -337,8 +299,8 @@ class Style(object): if (result == 'inherit' or (result is None and name in INHERITED and self._has_parent())): - styles = self._stylizer._styles - result = styles[self._element.getparent()]._get(name) + stylizer = self._stylizer + result = stylizer.style(self._element.getparent())._get(name) if result is None: result = DEFAULTS[name] return result @@ -359,9 +321,9 @@ class Style(object): unit = m.group(2) if unit == '%': base = base or self.width - result = (value/100.0) * base + result = (value / 100.0) * base elif unit == 'px': - result = value * 72.0 / self._page.dpi + result = value * 72.0 / self._profile.dpi elif unit == 'in': result = value * 72.0 elif unit == 'pt': @@ -379,22 +341,22 @@ class Style(object): @property def fontSize(self): - def normalize_fontsize(value, base=None): + def normalize_fontsize(value, base): result = None factor = None if value == 'inherit': - value = 'medium' + value = base if value in FONT_SIZE_NAMES: - result = FONT_SIZE_BY_NAME[value] + result = self._profile.fnames[value] elif value == 'smaller': factor = 1.0/1.2 - for _, _, size in FONT_SIZE_LIST: + for _, _, size in self._profile.fsizes: if base <= size: break factor = None result = size elif value == 'larger': factor = 1.2 - for _, _, size in reversed(FONT_SIZE_LIST): + for _, _, size in reversed(self._profile.fsizes): if base >= size: break factor = None result = size @@ -405,40 +367,108 @@ class Style(object): if factor: result = factor * base return result - result = None - if self._has_parent(): - styles = self._stylizer._styles - base = styles[self._element.getparent()].fontSize - else: - base = normalize_fontsize(DEFAULTS['font-size']) - if 'font-size' in self._style: - size = self._style['font-size'] - result = normalize_fontsize(size, base) - else: - result = base - self.__dict__['fontSize'] = result - return result + if self._fontSize is None: + result = None + parent = self._get_parent() + if parent is not None: + base = parent.fontSize + else: + base = self._profile.fbase + if 'font-size' in self._style: + size = self._style['font-size'] + result = normalize_fontsize(size, base) + else: + result = base + self._fontSize = result + return self._fontSize @property def width(self): - result = None - base = None - if self._has_parent(): - styles = self._stylizer._styles - base = styles[self._element.getparent()].width - else: - base = self._page.width - if 'width' in self._style: - width = self._style['width'] - if width == 'auto': + if self._width is None: + width = None + base = None + parent = self._get_parent() + if parent is not None: + base = parent.width + else: + base = self._profile.width + if 'width' is self._element.attrib: + width = self._element.attrib['width'] + elif 'width' in self._style: + width = self._style['width'] + if not width or width == 'auto': result = base else: result = self._unit_convert(width, base=base) - else: - result = base - self.__dict__['width'] = result - return result + self._width = result + return self._width + + @property + def height(self): + if self._height is None: + height = None + base = None + parent = self._get_parent() + if parent is not None: + base = parent.height + else: + base = self._profile.height + if 'height' is self._element.attrib: + height = self._element.attrib['height'] + elif 'height' in self._style: + height = self._style['height'] + if not height or height == 'auto': + result = base + else: + result = self._unit_convert(height, base=base) + self._height = result + return self._height + + @property + def lineHeight(self): + if self._lineHeight is None: + result = None + parent = self._getparent() + if 'line-height' in self._style: + lineh = self._style['line-height'] + try: + float(lineh) + except ValueError: + result = self._unit_convert(lineh, base=self.fontSize) + else: + result = float(lineh) * self.fontSize + elif parent is not None: + # TODO: proper inheritance + result = parent.lineHeight + else: + result = 1.2 * self.fontSize + self._lineHeight = result + return self._lineHeight + + @property + def marginTop(self): + return self._unit_convert( + self._get('margin-top'), base=self.height) + + @property + def marginBottom(self): + return self._unit_convert( + self._get('margin-bottom'), base=self.height) + + @property + def paddingTop(self): + return self._unit_convert( + self._get('padding-top'), base=self.height) + + @property + def paddingBottom(self): + return self._unit_convert( + self._get('padding-bottom'), base=self.height) def __str__(self): items = self._style.items() + items.sort() return '; '.join("%s: %s" % (key, val) for key, val in items) + + def cssdict(self): + return dict(self._style) diff --git a/src/calibre/ebooks/oeb/transforms/__init__.py b/src/calibre/ebooks/oeb/transforms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py new file mode 100644 index 0000000000..7110c2db2d --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -0,0 +1,270 @@ +''' +CSS flattening transform. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +import re +import operator +import math +from itertools import chain +from collections import defaultdict +from lxml import etree +from calibre.ebooks.oeb.base import XHTML, XHTML_NS +from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES +from calibre.ebooks.oeb.base import namespace, barename +from calibre.ebooks.oeb.base import OEBBook +from calibre.ebooks.oeb.stylizer import Stylizer + +COLLAPSE = re.compile(r'[ \t\r\n\v]+') +STRIPNUM = re.compile(r'[-0-9]+$') + +class KeyMapper(object): + def __init__(self, sbase, dbase, dkey): + self.sbase = float(sbase) + self.dprop = [(self.relate(x, dbase), float(x)) for x in dkey] + self.cache = {} + + @staticmethod + def relate(size, base): + size = float(size) + base = float(base) + if abs(size - base) < 0.1: return 0 + sign = -1 if size < base else 1 + endp = 0 if size < base else 36 + diff = (abs(base - size) * 3) + ((36 - size) / 100) + logb = abs(base - endp) + result = sign * math.log(diff, logb) + return result + + def __getitem__(self, ssize): + if ssize in self.cache: + return self.cache[ssize] + dsize = self.map(ssize) + self.cache[ssize] = dsize + return dsize + + def map(self, ssize): + sbase = self.sbase + prop = self.relate(ssize, sbase) + diff = [(abs(prop - p), s) for p, s in self.dprop] + dsize = min(diff)[1] + return dsize + +class ScaleMapper(object): + def __init__(self, sbase, dbase): + self.dscale = float(dbase) / float(sbase) + + def __getitem__(self, ssize): + dsize = ssize * self.dscale + return dsize + +class NullMapper(object): + def __init__(self): + pass + + def __getitem__(self, ssize): + return ssize + +def FontMapper(sbase=None, dbase=None, dkey=None): + if sbase and dbase and dkey: + return KeyMapper(sbase, dbase, dkey) + elif sbase and dbase: + return ScaleMapper(sbase, dbase) + else: + return NullMapper() + + +class CSSFlattener(object): + def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False, + untable=False): + self.fbase = fbase + self.fkey = fkey + self.lineh = lineh + self.unfloat = unfloat + self.untable = untable + + def transform(self, oeb, context): + oeb.logger.info('Flattening CSS and remapping font sizes...') + self.oeb = oeb + self.context = context + self.stylize_spine() + self.sbase = self.baseline_spine() if self.fbase else None + self.fmap = FontMapper(self.sbase, self.fbase, self.fkey) + self.flatten_spine() + + def stylize_spine(self): + self.stylizers = {} + profile = self.context.source + for item in self.oeb.spine: + html = item.data + stylizer = Stylizer(html, item.href, self.oeb, profile) + self.stylizers[item] = stylizer + + def baseline_node(self, node, stylizer, sizes, csize): + csize = stylizer.style(node)['font-size'] + if node.text: + sizes[csize] += len(COLLAPSE.sub(' ', node.text)) + for child in node: + self.baseline_node(child, stylizer, sizes, csize) + if child.tail: + sizes[csize] += len(COLLAPSE.sub(' ', child.tail)) + + def baseline_spine(self): + sizes = defaultdict(float) + for item in self.oeb.spine: + html = item.data + stylizer = self.stylizers[item] + body = html.find(XHTML('body')) + fsize = self.context.source.fbase + self.baseline_node(body, stylizer, sizes, fsize) + sbase = max(sizes.items(), key=operator.itemgetter(1))[0] + self.oeb.logger.info( + "Source base font size is %0.05fpt" % sbase) + return sbase + + def clean_edges(self, cssdict, style, fsize): + slineh = self.sbase * 1.26 + dlineh = self.lineh + for kind in ('margin', 'padding'): + for edge in ('bottom', 'top'): + property = "%s-%s" % (kind, edge) + if property not in cssdict: continue + if '%' in cssdict[property]: continue + value = style[property] + if value == 0: + continue + elif value <= slineh: + cssdict[property] = "%0.5fem" % (dlineh / fsize) + else: + value = round(value / slineh) * dlineh + cssdict[property] = "%0.5fem" % (value / fsize) + + def flatten_node(self, node, stylizer, names, styles, psize, left=0): + if not isinstance(node.tag, basestring) \ + or namespace(node.tag) != XHTML_NS: + return + tag = barename(node.tag) + style = stylizer.style(node) + cssdict = style.cssdict() + if 'align' in node.attrib: + cssdict['text-align'] = node.attrib['align'] + del node.attrib['align'] + if node.tag == XHTML('font'): + node.tag = XHTML('span') + if 'size' in node.attrib: + size = node.attrib['size'].strip() + if size: + fnums = self.context.source.fnums + if size[0] in ('+', '-'): + # Oh, the warcrimes + cssdict['font-size'] = fnums[3+int(size)] + else: + cssdict['font-size'] = fnums[int(size)] + del node.attrib['size'] + if 'color' in node.attrib: + cssdict['color'] = node.attrib['color'] + del node.attrib['color'] + if 'bgcolor' in node.attrib: + cssdict['background-color'] = node.attrib['bgcolor'] + del node.attrib['bgcolor'] + if cssdict: + if 'font-size' in cssdict: + fsize = self.fmap[style['font-size']] + cssdict['font-size'] = "%0.5fem" % (fsize / psize) + psize = fsize + if self.lineh and self.fbase and tag != 'body': + self.clean_edges(cssdict, style, psize) + margin = style['margin-left'] + left += margin if isinstance(margin, float) else 0 + if (left + style['text-indent']) < 0: + percent = (margin - style['text-indent']) / style['width'] + cssdict['margin-left'] = "%d%%" % (percent * 100) + left -= style['text-indent'] + if 'display' in cssdict and cssdict['display'] == 'in-line': + cssdict['display'] = 'inline' + if self.unfloat and 'float' in cssdict \ + and cssdict.get('display', 'none') != 'none': + del cssdict['display'] + if self.untable and 'display' in cssdict \ + and cssdict['display'].startswith('table'): + display = cssdict['display'] + if display == 'table-cell': + cssdict['display'] = 'inline' + else: + cssdict['display'] = 'block' + if 'vertical-align' in cssdict \ + and cssdict['vertical-align'] == 'sup': + cssdict['vertical-align'] = 'super' + if self.lineh and 'line-height' not in cssdict: + lineh = self.lineh / psize + cssdict['line-height'] = "%0.5fem" % lineh + if cssdict: + items = cssdict.items() + items.sort() + css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) + klass = STRIPNUM.sub('', node.get('class', 'calibre').split()[0]) + if css in styles: + match = styles[css] + else: + match = klass + str(names[klass] or '') + styles[css] = match + names[klass] += 1 + node.attrib['class'] = match + elif 'class' in node.attrib: + del node.attrib['class'] + if 'style' in node.attrib: + del node.attrib['style'] + for child in node: + self.flatten_node(child, stylizer, names, styles, psize, left) + + def flatten_head(self, item, stylizer, href): + html = item.data + head = html.find(XHTML('head')) + for node in head: + if node.tag == XHTML('link') \ + and node.get('rel', 'stylesheet') == 'stylesheet' \ + and node.get('type', CSS_MIME) in OEB_STYLES: + head.remove(node) + elif node.tag == XHTML('style') \ + and node.get('type', CSS_MIME) in OEB_STYLES: + head.remove(node) + href = item.relhref(href) + etree.SubElement(head, XHTML('link'), + rel='stylesheet', type=CSS_MIME, href=href) + if stylizer.page_rule: + items = stylizer.page_rule.items() + items.sort() + css = '; '.join("%s: %s" % (key, val) for key, val in items) + style = etree.SubElement(head, XHTML('style'), type=CSS_MIME) + style.text = "@page { %s; }" % css + + def replace_css(self, css): + manifest = self.oeb.manifest + id, href = manifest.generate('css', 'stylesheet.css') + for item in manifest.values(): + if item.media_type in OEB_STYLES: + manifest.remove(item) + item = manifest.add(id, href, CSS_MIME, data=css) + return href + + def flatten_spine(self): + names = defaultdict(int) + styles = {} + for item in self.oeb.spine: + html = item.data + stylizer = self.stylizers[item] + body = html.find(XHTML('body')) + fsize = self.context.dest.fbase + self.flatten_node(body, stylizer, names, styles, fsize) + items = [(key, val) for (val, key) in styles.items()] + items.sort() + css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items) + href = self.replace_css(css) + for item in self.oeb.spine: + stylizer = self.stylizers[item] + self.flatten_head(item, stylizer, href) diff --git a/src/calibre/ebooks/oeb/transforms/htmltoc.py b/src/calibre/ebooks/oeb/transforms/htmltoc.py new file mode 100644 index 0000000000..9eaa04d41d --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py @@ -0,0 +1,87 @@ +''' +HTML-TOC-adding transform. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +from lxml import etree +from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS +from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME +from calibre.ebooks.oeb.base import element + +STYLE_CSS = { + 'nested': """ +.calibre_toc_header { + text-align: center; +} +.calibre_toc_block { + margin-left: 1.2em; + text-indent: -1.2em; +} +.calibre_toc_block .calibre_toc_block { + margin-left: 2.4em; +} +.calibre_toc_block .calibre_toc_block .calibre_toc_block { + margin-left: 3.6em; +} +""", + + 'centered': """ +.calibre_toc_header { + text-align: center; +} +.calibre_toc_block { + text-align: center; +} +body > .calibre_toc_block { + margin-top: 1.2em; +} +""" + } + +class HTMLTOCAdder(object): + def __init__(self, style='nested'): + self.style = style + + def transform(self, oeb, context): + if 'toc' in oeb.guide: + return + oeb.logger.info('Generating in-line TOC...') + style = self.style + if style not in STYLE_CSS: + oeb.logger.error('Unknown TOC style %r' % style) + style = 'nested' + id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css') + oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style]) + language = str(oeb.metadata.language[0]) + contents = element(None, XHTML('html'), nsmap={None: XHTML_NS}, + attrib={XML('lang'): language}) + head = element(contents, XHTML('head')) + title = element(head, XHTML('title')) + title.text = 'Table of Contents' + element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, + href=css_href) + body = element(contents, XHTML('body'), + attrib={'class': 'calibre_toc'}) + h1 = element(body, XHTML('h1'), + attrib={'class': 'calibre_toc_header'}) + h1.text = 'Table of Contents' + self.add_toc_level(body, oeb.toc) + id, href = oeb.manifest.generate('contents', 'contents.xhtml') + item = oeb.manifest.add(id, href, XHTML_MIME, data=contents) + oeb.spine.add(item, linear=False) + oeb.guide.add('toc', 'Table of Contents', href) + + def add_toc_level(self, elem, toc): + for node in toc: + block = element(elem, XHTML('div'), + attrib={'class': 'calibre_toc_block'}) + line = element(block, XHTML('a'), + attrib={'href': node.href, + 'class': 'calibre_toc_line'}) + line.text = node.title + self.add_toc_level(block, node) diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py new file mode 100644 index 0000000000..3a3d91364f --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/manglecase.py @@ -0,0 +1,112 @@ +''' +CSS case-mangling transform. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +import re +import operator +import math +from itertools import chain +from collections import defaultdict +from lxml import etree +from calibre.ebooks.oeb.base import XHTML, XHTML_NS +from calibre.ebooks.oeb.base import CSS_MIME +from calibre.ebooks.oeb.base import namespace +from calibre.ebooks.oeb.stylizer import Stylizer + +CASE_MANGLER_CSS = """ +.calibre_lowercase { + font-variant: normal; + font-size: 0.65em; +} +""" + +TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase']) + +class CaseMangler(object): + def transform(self, oeb, context): + oeb.logger.info('Applying case-transforming CSS...') + self.oeb = oeb + self.profile = context.source + self.mangle_spine() + + def mangle_spine(self): + id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css') + self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS) + for item in self.oeb.spine: + html = item.data + relhref = item.relhref(href) + etree.SubElement(html.find(XHTML('head')), XHTML('link'), + rel='stylesheet', href=relhref, type=CSS_MIME) + stylizer = Stylizer(html, item.href, self.oeb, self.profile) + self.mangle_elem(html.find(XHTML('body')), stylizer) + + def text_transform(self, transform, text): + if transform == 'capitalize': + return text.title() + elif transform == 'uppercase': + return text.upper() + elif transform == 'lowercase': + return text.lower() + return text + + def split_text(self, text): + results = [''] + isupper = text[0].isupper() + for char in text: + if char.isupper() == isupper: + results[-1] += char + else: + isupper = not isupper + results.append(char) + return results + + def smallcaps_elem(self, elem, attr): + texts = self.split_text(getattr(elem, attr)) + setattr(elem, attr, None) + last = elem if attr == 'tail' else None + attrib = {'class': 'calibre_lowercase'} + for text in texts: + if text.isupper(): + if last is None: + elem.text = text + else: + last.tail = text + else: + child = etree.Element(XHTML('span'), attrib=attrib) + child.text = text.upper() + if last is None: + elem.insert(0, child) + else: + # addnext() moves the tail for some reason + tail = last.tail + last.addnext(child) + last.tail = tail + child.tail = None + last = child + + def mangle_elem(self, elem, stylizer): + if not isinstance(elem.tag, basestring) or \ + namespace(elem.tag) != XHTML_NS: + return + children = list(elem) + style = stylizer.style(elem) + transform = style['text-transform'] + variant = style['font-variant'] + if elem.text: + if transform in TEXT_TRANSFORMS: + elem.text = self.text_transform(transform, elem.text) + if variant == 'small-caps': + self.smallcaps_elem(elem, 'text') + for child in children: + self.mangle_elem(child, stylizer) + if child.tail: + if transform in TEXT_TRANSFORMS: + child.tail = self.text_transform(transform, child.tail) + if variant == 'small-caps': + self.smallcaps_elem(child, 'tail') diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py new file mode 100644 index 0000000000..69f1d0d133 --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -0,0 +1,190 @@ +''' +SVG rasterization transform. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +from urlparse import urldefrag +import base64 +from lxml import etree +from PyQt4.QtCore import Qt +from PyQt4.QtCore import QByteArray +from PyQt4.QtCore import QBuffer +from PyQt4.QtCore import QIODevice +from PyQt4.QtGui import QColor +from PyQt4.QtGui import QImage +from PyQt4.QtGui import QPainter +from PyQt4.QtSvg import QSvgRenderer +from PyQt4.QtGui import QApplication +from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK +from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME +from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename +from calibre.ebooks.oeb.stylizer import Stylizer + +IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) +KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align']) + +class SVGRasterizer(object): + def __init__(self): + if QApplication.instance() is None: + QApplication([]) + + def transform(self, oeb, context): + oeb.logger.info('Rasterizing SVG images...') + self.oeb = oeb + self.profile = context.dest + self.images = {} + self.dataize_manifest() + self.rasterize_spine() + self.rasterize_cover() + + def rasterize_svg(self, elem, width=0, height=0, format='PNG'): + data = QByteArray(xml2str(elem)) + svg = QSvgRenderer(data) + size = svg.defaultSize() + if size.width() == 100 and size.height() == 100 \ + and 'viewBox' in elem.attrib: + box = [float(x) for x in elem.attrib['viewBox'].split()] + size.setWidth(box[2] - box[0]) + size.setHeight(box[3] - box[1]) + if width or height: + size.scale(width, height, Qt.KeepAspectRatio) + logger = self.oeb.logger + logger.info('Rasterizing %r to %dx%d' + % (elem, size.width(), size.height())) + image = QImage(size, QImage.Format_ARGB32_Premultiplied) + image.fill(QColor("white").rgb()) + painter = QPainter(image) + svg.render(painter) + painter.end() + array = QByteArray() + buffer = QBuffer(array) + buffer.open(QIODevice.WriteOnly) + image.save(buffer, format) + return str(array) + + def dataize_manifest(self): + for item in self.oeb.manifest.values(): + if item.media_type == SVG_MIME: + self.dataize_svg(item) + + def dataize_svg(self, item, svg=None): + if svg is None: + svg = item.data + hrefs = self.oeb.manifest.hrefs + for elem in xpath(svg, '//svg:*[@xl:href]'): + href = elem.attrib[XLINK('href')] + path, frag = urldefrag(href) + if not path: + continue + abshref = item.abshref(path) + if abshref not in hrefs: + continue + linkee = hrefs[abshref] + data = base64.encodestring(str(linkee)) + data = "data:%s;base64,%s" % (linkee.media_type, data) + elem.attrib[XLINK('href')] = data + return svg + + def rasterize_spine(self): + for item in self.oeb.spine: + html = item.data + stylizer = Stylizer(html, item.href, self.oeb, self.profile) + self.rasterize_item(item, stylizer) + + def rasterize_item(self, item, stylizer): + html = item.data + hrefs = self.oeb.manifest.hrefs + for elem in xpath(html, '//h:img'): + src = elem.get('src', None) + image = hrefs.get(item.abshref(src), None) if src else None + if image and image.media_type == SVG_MIME: + style = stylizer.style(elem) + self.rasterize_external(elem, style, item, image) + for elem in xpath(html, '//h:object[@type="%s"]' % SVG_MIME): + data = elem.get('data', None) + image = hrefs.get(item.abshref(data), None) if data else None + if image and image.media_type == SVG_MIME: + style = stylizer.style(elem) + self.rasterize_external(elem, style, item, image) + for elem in xpath(html, '//svg:svg'): + style = stylizer.style(elem) + self.rasterize_inline(elem, style, item) + + def rasterize_inline(self, elem, style, item): + width = style['width'] + height = style['height'] + width = (width / 72) * self.profile.dpi + height = (height / 72) * self.profile.dpi + elem = self.dataize_svg(item, elem) + data = self.rasterize_svg(elem, width, height) + manifest = self.oeb.manifest + href = os.path.splitext(item.href)[0] + '.png' + id, href = manifest.generate(item.id, href) + manifest.add(id, href, PNG_MIME, data=data) + img = etree.Element(XHTML('img'), src=item.relhref(href)) + elem.getparent().replace(elem, img) + for prop in ('width', 'height'): + if prop in elem.attrib: + img.attrib[prop] = elem.attrib[prop] + + def rasterize_external(self, elem, style, item, svgitem): + width = style['width'] + height = style['height'] + width = (width / 72) * self.profile.dpi + height = (height / 72) * self.profile.dpi + data = QByteArray(str(svgitem)) + svg = QSvgRenderer(data) + size = svg.defaultSize() + size.scale(width, height, Qt.KeepAspectRatio) + key = (svgitem.href, size.width(), size.height()) + if key in self.images: + href = self.images[key] + else: + logger = self.oeb.logger + logger.info('Rasterizing %r to %dx%d' + % (svgitem.href, size.width(), size.height())) + image = QImage(size, QImage.Format_ARGB32_Premultiplied) + image.fill(QColor("white").rgb()) + painter = QPainter(image) + svg.render(painter) + painter.end() + array = QByteArray() + buffer = QBuffer(array) + buffer.open(QIODevice.WriteOnly) + image.save(buffer, 'PNG') + data = str(array) + manifest = self.oeb.manifest + href = os.path.splitext(svgitem.href)[0] + '.png' + id, href = manifest.generate(svgitem.id, href) + manifest.add(id, href, PNG_MIME, data=data) + self.images[key] = href + elem.tag = XHTML('img') + for attr in elem.attrib: + if attr not in KEEP_ATTRS: + del elem.attrib[attr] + elem.attrib['src'] = item.relhref(href) + if elem.text: + elem.attrib['alt'] = elem.text + elem.text = None + for child in elem: + elem.remove(child) + + def rasterize_cover(self): + covers = self.oeb.metadata.cover + if not covers: + return + cover = self.oeb.manifest.ids[str(covers[0])] + if not cover.media_type == SVG_MIME: + return + width = (self.profile.width / 72) * self.profile.dpi + height = (self.profile.height / 72) * self.profile.dpi + data = self.rasterize_svg(cover.data, width, height) + href = os.path.splitext(cover.href)[0] + '.png' + id, href = self.oeb.manifest.generate(cover.id, href) + self.oeb.manifest.add(id, href, PNG_MIME, data=data) + covers[0].value = id diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py new file mode 100644 index 0000000000..bd2c388245 --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -0,0 +1,68 @@ +''' +OPF manifest trimming transform. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +from itertools import chain +from urlparse import urldefrag +from lxml import etree +import cssutils +from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS + +LINK_SELECTORS = [] +for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data', + '//*/@xl:href'): + LINK_SELECTORS.append(etree.XPath(expr, namespaces=XPNSMAP)) + +class ManifestTrimmer(object): + def transform(self, oeb, context): + oeb.logger.info('Trimming unused files from manifest...') + used = set() + hrefs = oeb.manifest.hrefs + for term in oeb.metadata: + for item in oeb.metadata[term]: + if item.value in oeb.manifest.hrefs: + used.add(oeb.manifest.hrefs[item.value]) + elif item.value in oeb.manifest.ids: + used.add(oeb.manifest.ids[item.value]) + for ref in oeb.guide.values(): + path, _ = urldefrag(ref.href) + if path in oeb.manifest.hrefs: + used.add(oeb.manifest.hrefs[path]) + # TOC items are required to be in the spine + for item in oeb.spine: + used.add(item) + unchecked = used + while unchecked: + new = set() + for item in unchecked: + if item.media_type in OEB_DOCS or \ + item.media_type[-4:] in ('/xml', '+xml'): + hrefs = [sel(item.data) for sel in LINK_SELECTORS] + for href in chain(*hrefs): + href = item.abshref(href) + if href in oeb.manifest.hrefs: + found = oeb.manifest.hrefs[href] + if found not in used: + new.add(found) + elif item.media_type == CSS_MIME: + def replacer(uri): + absuri = item.abshref(uri) + if absuri in oeb.manifest.hrefs: + found = oeb.manifest.hrefs[href] + if found not in used: + new.add(found) + return uri + sheet = cssutils.parseString(item.data, href=item.href) + cssutils.replaceUrls(sheet, replacer) + used.update(new) + unchecked = new + for item in oeb.manifest.values(): + if item not in used: + oeb.logger.info('Trimming %r from manifest' % item.href) + oeb.manifest.remove(item) diff --git a/src/calibre/linux.py b/src/calibre/linux.py index cef2e5ddb7..be5864033a 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -48,12 +48,14 @@ entry_points = { 'any2lrf = calibre.ebooks.lrf.any.convert_from:main', 'any2epub = calibre.ebooks.epub.from_any:main', 'any2lit = calibre.ebooks.lit.from_any:main', + 'any2mobi = calibre.ebooks.mobi.from_any:main', 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', 'isbndb = calibre.ebooks.metadata.isbndb:main', 'librarything = calibre.ebooks.metadata.library_thing:main', 'mobi2oeb = calibre.ebooks.mobi.reader:main', + 'oeb2mobi = calibre.ebooks.mobi.writer:main', 'lrf2html = calibre.ebooks.lrf.html.convert_to:main', 'lit2oeb = calibre.ebooks.lit.reader:main', 'oeb2lit = calibre.ebooks.lit.writer:main', @@ -190,6 +192,8 @@ def setup_completion(fatal_errors): from calibre.ebooks.epub.from_any import option_parser as any2epub from calibre.ebooks.lit.from_any import option_parser as any2lit from calibre.ebooks.epub.from_comic import option_parser as comic2epub + from calibre.ebooks.mobi.from_any import option_parser as any2mobi + from calibre.ebooks.mobi.writer import option_parser as oeb2mobi from calibre.gui2.main import option_parser as guiop any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] @@ -214,6 +218,8 @@ def setup_completion(fatal_errors): f.write(opts_and_exts('calibre', guiop, any_formats)) f.write(opts_and_exts('any2epub', any2epub, any_formats)) f.write(opts_and_exts('any2lit', any2lit, any_formats)) + f.write(opts_and_exts('any2mobi', any2mobi, any_formats)) + f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['mobi', 'prc'])) f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf'])) f.write(opts_and_exts('lrf-meta', metaop, ['lrf'])) f.write(opts_and_exts('rtf-meta', metaop, ['rtf'])) @@ -230,7 +236,7 @@ def setup_completion(fatal_errors): f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit'])) f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr'])) - f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr'])) + f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles)) f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))