Conversion pipeline now works for conversion from MOBI to OEB

This commit is contained in:
Kovid Goyal 2009-03-31 18:51:46 -07:00
parent 9aa2fbfbec
commit e624b088d7
10 changed files with 74 additions and 45 deletions

View File

@ -170,7 +170,8 @@ class InputFormatPlugin(Plugin):
if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input)
shutil.rmtree(options.debug_input)
shutil.copytree('.', options.debug_input)
shutil.copytree(output_dir, options.debug_input)
log.info('Input debug saved to:', options.debug_input)
return ret
@ -195,7 +196,14 @@ class OutputFormatPlugin(Plugin):
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
common_options = set([])
common_options = set([
OptionRecommendation(name='pretty_print',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('If specified, the output plugin will try to create output '
'that is as human readable as possible. May not have any effect '
'for some output plugins.')
),
])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.

View File

@ -25,9 +25,11 @@ class Plugin(_Plugin):
screen_size = (800, 600)
dpi = 100
def initialize(self):
def __init__(self, *args, **kwargs):
_Plugin.__init__(self, *args, **kwargs)
self.width, self.height = self.screen_size
fsizes = list(self.fsizes)
self.fkey = list(self.fsizes)
self.fsizes = []
for (name, num), size in izip(FONT_SIZES, fsizes):
self.fsizes.append((name, num, float(size)))

View File

@ -92,9 +92,9 @@ def add_input_output_options(parser, plumber):
parser.add_option_group(io)
if output_options:
title = plumber.output_fmt.upper() + ' ' + _('OPTIONS')
title = _('OUTPUT OPTIONS')
oo = OptionGroup(parser, title, _('Options to control the processing'
' of the output %s file')%plumber.input_fmt)
' of the output %s')%plumber.output_fmt)
add_options(oo.add_option, output_options)
parser.add_option_group(oo)

View File

@ -9,6 +9,7 @@ from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.ptempfile import PersistentTemporaryDirectory
class OptionValues(object):
pass
@ -289,6 +290,8 @@ OptionRecommendation(name='language',
'''
# Setup baseline option values
self.setup_options()
if self.opts.verbose:
self.log.filter_level = self.log.DEBUG
# Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess
@ -300,9 +303,11 @@ OptionRecommendation(name='language',
from calibre.ebooks.oeb.base import OEBBook
accelerators = {}
tdir = PersistentTemporaryDirectory('_plumber')
opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, self.log,
accelerators)
accelerators, tdir)
html_preprocessor = HTMLPreProcessor()
self.reader = OEBReader()
self.oeb = OEBBook(self.log, html_preprocessor=html_preprocessor)
@ -316,15 +321,16 @@ OptionRecommendation(name='language',
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size
if fbase == 0:
fbase = self.opts.dest.fbase
fbase = float(self.opts.dest.fbase)
fkey = self.opts.font_size_mapping
if fkey is None:
fkey = self.opts.dest.fsizes
fkey = self.opts.dest.fkey
else:
fkey = map(float, fkey.split(','))
flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=self.opts.line_height,
untable=self.opts.linearize_tables)
self.log.info('Flattening CSS...')
flattener(self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@ -334,7 +340,7 @@ OptionRecommendation(name='language',
trimmer(self.oeb, self.opts)
self.log.info('Creating %s output...'%self.output_plugin.name)
self.output_plugin(self.oeb, self.output, self.input_plugin, self.opts,
self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts,
self.log)

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files
'''
import struct, os, cStringIO, re, functools, datetime
import struct, os, cStringIO, re, functools, datetime, textwrap
try:
from PIL import Image as PILImage
@ -162,7 +162,7 @@ class MobiReader(object):
self.log = log
self.debug = debug
self.embedded_mi = None
self.base_css_rules = '''
self.base_css_rules = textwrap.dedent('''
blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
p { margin: 0em; text-align: justify }
@ -174,7 +174,7 @@ class MobiReader(object):
.mbp_pagebreak {
page-break-after: always; margin: 0; display: block
}
'''
''')
self.tag_css_rules = []
if hasattr(filename_or_stream, 'read'):
@ -223,7 +223,7 @@ class MobiReader(object):
processed_records = self.extract_text()
if self.debug is not None:
self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec,
'ignore')
@ -265,7 +265,6 @@ class MobiReader(object):
pass
parse_cache[htmlfile] = root
self.htmlfile = htmlfile
self.log.debug('Creating OPF...')
ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root)
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
@ -283,8 +282,7 @@ class MobiReader(object):
if self.book_header.exth is not None or self.embedded_mi is not None:
if self.verbose:
print 'Creating OPF...'
self.log.debug('Creating OPF...')
ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)

View File

@ -658,9 +658,9 @@ class Manifest(object):
def _parse_css(self, data):
data = self.oeb.decode(data)
data = self.CSSPreProcessor(data)
data = self.oeb.css_preprocessor(data)
data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
parser = CSSParser(loglevel=logging.WARNING,
fetcher=self._fetch_css)
data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS

View File

@ -25,7 +25,7 @@ class OEBOutput(OutputFormatPlugin):
with CurrentDir(output_path):
results = oeb_book.to_opf2(page_map=True)
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
href, root = results.pop(key, None)
href, root = results.pop(key, [None, None])
if root is not None:
raw = etree.tostring(root, pretty_print=True,
encoding='utf-8')
@ -33,6 +33,21 @@ class OEBOutput(OutputFormatPlugin):
f.write(raw)
for item in oeb_book.manifest:
print item.href
path = os.path.abspath(item.href)
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)
raw = item.data
if not isinstance(raw, basestring):
if hasattr(raw, 'cssText'):
raw = raw.cssText
else:
raw = etree.tostring(raw, encoding='utf-8',
pretty_print=opts.pretty_print)
raw = raw + '<?xml version="1.0" encoding="utf-8" ?>\n'
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
with open(path, 'wb') as f:
f.write(raw)

View File

@ -168,7 +168,7 @@ class OEBReader(object):
data.
'''
bad = []
check = OEB_DOCS+OEB_STYLES
check = OEB_DOCS.union(OEB_STYLES)
for item in list(self.oeb.manifest.values()):
if item.media_type in check:
try:

View File

@ -44,7 +44,7 @@ class KeyMapper(object):
logb = abs(base - endp)
result = sign * math.log(diff, logb)
return result
def __getitem__(self, ssize):
ssize = asfloat(ssize, 0)
if ssize in self.cache:
@ -75,7 +75,7 @@ class NullMapper(object):
def __getitem__(self, ssize):
return ssize
def FontMapper(sbase=None, dbase=None, dkey=None):
if sbase and dbase and dkey:
return KeyMapper(sbase, dbase, dkey)
@ -101,7 +101,7 @@ class CSSFlattener(object):
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Flattening CSS and remapping font sizes...')
self.oeb = oeb
@ -127,7 +127,7 @@ class CSSFlattener(object):
self.baseline_node(child, stylizer, sizes, csize)
if child.tail:
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
def baseline_spine(self):
sizes = defaultdict(float)
for item in self.oeb.spine:
@ -157,7 +157,7 @@ class CSSFlattener(object):
else:
value = round(value / slineh) * dlineh
cssdict[property] = "%0.5fem" % (value / fsize)
def flatten_node(self, node, stylizer, names, styles, psize, left=0):
if not isinstance(node.tag, basestring) \
or namespace(node.tag) != XHTML_NS:
@ -267,7 +267,7 @@ class CSSFlattener(object):
manifest.remove(item)
item = manifest.add(id, href, CSS_MIME, data=css)
return href
def flatten_spine(self):
names = defaultdict(int)
styles = {}

View File

@ -17,18 +17,18 @@ from functools import partial
class Stream(object):
def __init__(self, stream):
from calibre import prints
self._prints = prints
self.stream = stream
def flush(self):
self.stream.flush()
class ANSIStream(Stream):
def __init__(self, stream=sys.stdout):
Stream.__init__(self, stream)
from calibre.utils.terminfo import TerminalController
@ -40,18 +40,18 @@ class ANSIStream(Stream):
ERROR: tc.RED
}
self.normal = tc.NORMAL
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
self._prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class HTMLStream(Stream):
def __init__(self, stream=sys.stdout):
Stream.__init__(self, stream)
self.color = {
@ -61,13 +61,13 @@ class HTMLStream(Stream):
ERROR: '<span style="color:red">'
}
self.normal = '</span>'
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
self._prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
@ -77,28 +77,28 @@ class Log(object):
INFO = INFO
WARN = WARN
ERROR = ERROR
def __init__(self, level=INFO):
self.filter_level = level
default_output = ANSIStream()
self.outputs = [default_output]
self.debug = partial(self.prints, DEBUG)
self.debug = partial(self.prints, DEBUG)
self.info = partial(self.prints, INFO)
self.warn = self.warning = partial(self.prints, WARN)
self.error = partial(self.prints, ERROR)
self.error = partial(self.prints, ERROR)
def prints(self, level, *args, **kwargs):
if level < self.filter_level:
return
for output in self.outputs:
output.prints(level, *args, **kwargs)
def exception(self, *args, **kwargs):
limit = kwargs.pop('limit', None)
self.prints(ERROR, *args, **kwargs)
self.prints(DEBUG, traceback.format_exc(limit))
def __call__(self, *args, **kwargs):
self.prints(INFO, *args, **kwargs)
self.prints(INFO, *args, **kwargs)