Plugin for LIT Output

This commit is contained in:
Kovid Goyal 2009-05-02 18:23:41 -07:00
parent 538d310bb8
commit aec5beb4c3
5 changed files with 71 additions and 88 deletions

View File

@ -292,6 +292,7 @@ from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.mobi.output import MOBIOutput from calibre.ebooks.mobi.output import MOBIOutput
from calibre.ebooks.lrf.output import LRFOutput from calibre.ebooks.lrf.output import LRFOutput
from calibre.ebooks.lit.output import LITOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.input import PMLInput from calibre.ebooks.pml.input import PMLInput
@ -311,7 +312,7 @@ from calibre.devices.jetbook.driver import JETBOOK
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput, FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
PMLOutput, MOBIOutput, LRFOutput] PMLOutput, MOBIOutput, LRFOutput, LITOutput]
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
EB600, JETBOOK] EB600, JETBOOK]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import OutputFormatPlugin
class LITOutput(OutputFormatPlugin):
name = 'LIT Output'
author = 'Marshall T. Vandegrift'
file_type = 'lit'
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.lit.writer import LitWriter
tocadder = HTMLTOCAdder()
tocadder(oeb, opts)
mangler = CaseMangler()
mangler(oeb, opts)
rasterizer = SVGRasterizer()
rasterizer(oeb, opts)
lit = LitWriter()
lit(oeb, output_path)

View File

@ -6,8 +6,6 @@ from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from cStringIO import StringIO from cStringIO import StringIO
from struct import pack from struct import pack
from itertools import izip, count, chain from itertools import izip, count, chain
@ -17,7 +15,6 @@ import re
import copy import copy
import uuid import uuid
import functools import functools
import logging
from urlparse import urldefrag from urlparse import urldefrag
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
@ -25,22 +22,14 @@ from calibre.ebooks.lit.reader import DirectoryEntry
import calibre.ebooks.lit.maps as maps import calibre.ebooks.lit.maps as maps
from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \ from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \
CSS_MIME, OPF_MIME, XML_NS, XML CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \ from calibre.ebooks.oeb.base import prefixname, \
urlnormalize, xpath urlnormalize
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.lit.lzx import Compressor from calibre.ebooks.lit.lzx import Compressor
import calibre import calibre
from calibre import plugins from calibre import plugins
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
from calibre.customize.ui import run_plugins_on_postprocess
__all__ = ['LitWriter'] __all__ = ['LitWriter']
@ -144,7 +133,7 @@ def warn(x):
class ReBinary(object): class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'} NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, item, oeb, map=HTML_MAP): def __init__(self, root, item, oeb, map=HTML_MAP):
self.item = item self.item = item
self.logger = oeb.logger self.logger = oeb.logger
@ -168,7 +157,7 @@ class ReBinary(object):
def is_block(self, style): def is_block(self, style):
return style['display'] not in ('inline', 'inline-block') return style['display'] not in ('inline', 'inline-block')
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[], def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
inhead=False, preserve=False): inhead=False, preserve=False):
if not isinstance(elem.tag, basestring): if not isinstance(elem.tag, basestring):
@ -277,7 +266,7 @@ class ReBinary(object):
def build_ahc(self): def build_ahc(self):
if len(self.anchors) > 6: if len(self.anchors) > 6:
self.logger.log_warn("More than six anchors in file %r. " \ self.logger.warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.item.href) "Some links may not work properly." % self.item.href)
data = StringIO() data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8')) data.write(unichr(len(self.anchors)).encode('utf-8'))
@ -300,7 +289,7 @@ def preserve(function):
self._stream.seek(opos) self._stream.seek(opos)
functools.update_wrapper(wrapper, function) functools.update_wrapper(wrapper, function)
return wrapper return wrapper
class LitWriter(object): class LitWriter(object):
def __init__(self): def __init__(self):
# Wow, no options # Wow, no options
@ -308,7 +297,7 @@ class LitWriter(object):
def _litize_oeb(self): def _litize_oeb(self):
oeb = self._oeb oeb = self._oeb
oeb.metadata.add('calibre-oeb2lit-version', calibre.__version__) oeb.metadata.add('calibre-version', calibre.__version__)
cover = None cover = None
if oeb.metadata.cover: if oeb.metadata.cover:
id = str(oeb.metadata.cover[0]) id = str(oeb.metadata.cover[0])
@ -319,12 +308,12 @@ class LitWriter(object):
else: else:
self._logger.warn('No suitable cover image found.') self._logger.warn('No suitable cover image found.')
def dump(self, oeb, path): def __call__(self, oeb, path):
if hasattr(path, 'write'): if hasattr(path, 'write'):
return self._dump_stream(oeb, path) return self._dump_stream(oeb, path)
with open(path, 'w+b') as stream: with open(path, 'w+b') as stream:
return self._dump_stream(oeb, stream) return self._dump_stream(oeb, stream)
def _dump_stream(self, oeb, stream): def _dump_stream(self, oeb, stream):
self._oeb = oeb self._oeb = oeb
self._logger = oeb.logger self._logger = oeb.logger
@ -334,7 +323,7 @@ class LitWriter(object):
self._meta = None self._meta = None
self._litize_oeb() self._litize_oeb()
self._write_content() self._write_content()
def _write(self, *data): def _write(self, *data):
for datum in data: for datum in data:
self._stream.write(datum) self._stream.write(datum)
@ -346,7 +335,7 @@ class LitWriter(object):
def _tell(self): def _tell(self):
return self._stream.tell() return self._stream.tell()
def _write_content(self): def _write_content(self):
# Build content sections # Build content sections
self._build_sections() self._build_sections()
@ -414,13 +403,13 @@ class LitWriter(object):
self._write(cchunk, filler, pack('<H', len(dcounts))) self._write(cchunk, filler, pack('<H', len(dcounts)))
self._writeat(pieces[2], pack('<QQ', self._writeat(pieces[2], pack('<QQ',
piece2_offset, self._tell() - piece2_offset)) piece2_offset, self._tell() - piece2_offset))
# Piece #3: GUID3 # Piece #3: GUID3
piece3_offset = self._tell() piece3_offset = self._tell()
self._write(packguid(PIECE3_GUID)) self._write(packguid(PIECE3_GUID))
self._writeat(pieces[3], pack('<QQ', self._writeat(pieces[3], pack('<QQ',
piece3_offset, self._tell() - piece3_offset)) piece3_offset, self._tell() - piece3_offset))
# Piece #4: GUID4 # Piece #4: GUID4
piece4_offset = self._tell() piece4_offset = self._tell()
self._write(packguid(PIECE4_GUID)) self._write(packguid(PIECE4_GUID))
@ -451,7 +440,7 @@ class LitWriter(object):
def _djoin(self, *names): def _djoin(self, *names):
return '/'.join(names) return '/'.join(names)
def _build_sections(self): def _build_sections(self):
self._add_folder('/', ROOT_OFFSET, ROOT_SIZE) self._add_folder('/', ROOT_OFFSET, ROOT_SIZE)
self._build_data() self._build_data()
@ -468,7 +457,7 @@ class LitWriter(object):
self._add_folder('/data') self._add_folder('/data')
for item in self._oeb.manifest.values(): for item in self._oeb.manifest.values():
if item.media_type not in LIT_MIMES: if item.media_type not in LIT_MIMES:
self._logger.log_warn("File %r of unknown media-type %r " \ self._logger.warn("File %r of unknown media-type %r " \
"excluded from output." % (item.href, item.media_type)) "excluded from output." % (item.href, item.media_type))
continue continue
name = '/data/' + item.id name = '/data/' + item.id
@ -485,6 +474,8 @@ class LitWriter(object):
secnum = 1 secnum = 1
elif isinstance(data, unicode): elif isinstance(data, unicode):
data = data.encode('utf-8') data = data.encode('utf-8')
elif hasattr(data, 'cssText'):
data = str(data)
self._add_file(name, data, secnum) self._add_file(name, data, secnum)
item.size = len(data) item.size = len(data)
@ -561,7 +552,7 @@ class LitWriter(object):
self._add_file('/pb1', pb1.getvalue(), 0) self._add_file('/pb1', pb1.getvalue(), 0)
self._add_file('/pb2', pb2.getvalue(), 0) self._add_file('/pb2', pb2.getvalue(), 0)
self._add_file('/pb3', pb3.getvalue(), 0) self._add_file('/pb3', pb3.getvalue(), 0)
def _build_meta(self): def _build_meta(self):
_, meta = self._oeb.to_opf1()[OPF_MIME] _, meta = self._oeb.to_opf1()[OPF_MIME]
meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--minimum_level'] = '0'
@ -571,7 +562,7 @@ class LitWriter(object):
meta = rebin.content meta = rebin.content
self._meta = meta self._meta = meta
self._add_file('/meta', meta) self._add_file('/meta', meta)
def _build_drm_storage(self): def _build_drm_storage(self):
drmsource = u'Free as in freedom\0'.encode('utf-16-le') drmsource = u'Free as in freedom\0'.encode('utf-16-le')
self._add_file('/DRMStorage/DRMSource', drmsource) self._add_file('/DRMStorage/DRMSource', drmsource)
@ -641,7 +632,7 @@ class LitWriter(object):
def _build_transforms(self): def _build_transforms(self):
for guid in (LZXCOMPRESS_GUID, DESENCRYPT_GUID): for guid in (LZXCOMPRESS_GUID, DESENCRYPT_GUID):
self._add_folder('::Transform/'+ guid) self._add_folder('::Transform/'+ guid)
def _calculate_deskey(self, hashdata): def _calculate_deskey(self, hashdata):
prepad = 2 prepad = 2
hash = mssha1.new() hash = mssha1.new()
@ -658,7 +649,7 @@ class LitWriter(object):
for i in xrange(0, len(digest)): for i in xrange(0, len(digest)):
key[i % 8] ^= ord(digest[i]) key[i % 8] ^= ord(digest[i])
return ''.join(chr(x) for x in key) return ''.join(chr(x) for x in key)
def _build_dchunks(self): def _build_dchunks(self):
ddata = [] ddata = []
directory = list(self._directory) directory = list(self._directory)
@ -720,53 +711,3 @@ class LitWriter(object):
return dcounts, dchunks, ichunk return dcounts, dchunks, ichunk
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
return parser
def oeb2lit(opts, inpath):
logger = logging.getLogger('oeb2lit')
logger.setup_cli_handler(opts.verbose)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.lit'
outpath = os.path.abspath(outpath)
context = Context('Browser', 'MSReader')
oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder()
tocadder.transform(oeb, context)
mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase
flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context)
trimmer = ManifestTrimmer()
trimmer.transform(oeb, context)
lit = LitWriter()
lit.dump(oeb, outpath)
run_plugins_on_postprocess(outpath, 'lit')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.print_help()
return 1
inpath = args[0]
oeb2lit(opts, inpath)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -134,7 +134,7 @@ class EbookIterator(object):
plumber.opts, plumber.input_fmt, self.log, plumber.opts, plumber.input_fmt, self.log,
{}, self.base) {}, self.base)
if hasattr(self.pathtoopf, 'manifest'): if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir) self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))

View File

@ -11,7 +11,8 @@ import os
class MergeMetadata(object): class MergeMetadata(object):
'Merge in user metadata, including cover' 'Merge in user metadata, including cover'
def __call__(self, oeb, mi, prefer_metadata_cover=False): def __call__(self, oeb, mi, prefer_metadata_cover=False,
prefer_author_sort=False):
from calibre.ebooks.oeb.base import DC from calibre.ebooks.oeb.base import DC
self.oeb, self.log = oeb, oeb.log self.oeb, self.log = oeb, oeb.log
m = self.oeb.metadata m = self.oeb.metadata
@ -23,6 +24,8 @@ class MergeMetadata(object):
if not m.title: if not m.title:
m.add(DC('title'), mi.title_sort) m.add(DC('title'), mi.title_sort)
m.title[0].file_as = mi.title_sort m.title[0].file_as = mi.title_sort
if prefer_author_sort and mi.author_sort:
mi.authors = [mi.author_sort]
if mi.authors: if mi.authors:
m.filter('creator', lambda x : x.role.lower() == 'aut') m.filter('creator', lambda x : x.role.lower() == 'aut')
for a in mi.authors: for a in mi.authors:
@ -64,7 +67,10 @@ class MergeMetadata(object):
for t in mi.tags: for t in mi.tags:
m.add('subject', t) m.add('subject', t)
self.set_cover(mi, prefer_metadata_cover) cover_id = self.set_cover(mi, prefer_metadata_cover)
m.clear('cover')
if cover_id is not None:
m.add('cover', cover_id)
def set_cover(self, mi, prefer_metadata_cover): def set_cover(self, mi, prefer_metadata_cover):
cdata = '' cdata = ''
@ -72,13 +78,15 @@ class MergeMetadata(object):
cdata = open(mi.cover, 'rb').read() cdata = open(mi.cover, 'rb').read()
elif mi.cover_data and mi.cover_data[-1]: elif mi.cover_data and mi.cover_data[-1]:
cdata = mi.cover_data[1] cdata = mi.cover_data[1]
if not cdata: return id = None
if 'cover' in self.oeb.guide: if 'cover' in self.oeb.guide:
if not prefer_metadata_cover: href = self.oeb.guide['cover'].href
href = self.oeb.guide['cover'].href id = self.oeb.manifest.hrefs[href].id
if not prefer_metadata_cover and cdata:
self.oeb.manifest.hrefs[href]._data = cdata self.oeb.manifest.hrefs[href]._data = cdata
else: elif cdata:
id, href = self.oeb.manifest.generate('cover', 'cover.jpg') id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata) self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)
self.oeb.guide.add('cover', 'Cover', href) self.oeb.guide.add('cover', 'Cover', href)
return id