Conversion pipeline now works for conversion from MOBI to OEB

This commit is contained in:
Kovid Goyal 2009-03-31 18:51:46 -07:00
parent 9aa2fbfbec
commit e624b088d7
10 changed files with 74 additions and 45 deletions

View File

@ -170,7 +170,8 @@ class InputFormatPlugin(Plugin):
if not os.path.exists(options.debug_input): if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input) os.makedirs(options.debug_input)
shutil.rmtree(options.debug_input) shutil.rmtree(options.debug_input)
shutil.copytree('.', options.debug_input) shutil.copytree(output_dir, options.debug_input)
log.info('Input debug saved to:', options.debug_input)
return ret return ret
@ -195,7 +196,14 @@ class OutputFormatPlugin(Plugin):
#: Options shared by all Input format plugins. Do not override #: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an #: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`. #: instance of :class:`OptionRecommendation`.
common_options = set([]) common_options = set([
OptionRecommendation(name='pretty_print',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('If specified, the output plugin will try to create output '
'that is as human readable as possible. May not have any effect '
'for some output plugins.')
),
])
#: Options to customize the behavior of this plugin. Every option must be an #: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`. #: instance of :class:`OptionRecommendation`.

View File

@ -25,9 +25,11 @@ class Plugin(_Plugin):
screen_size = (800, 600) screen_size = (800, 600)
dpi = 100 dpi = 100
def initialize(self): def __init__(self, *args, **kwargs):
_Plugin.__init__(self, *args, **kwargs)
self.width, self.height = self.screen_size self.width, self.height = self.screen_size
fsizes = list(self.fsizes) fsizes = list(self.fsizes)
self.fkey = list(self.fsizes)
self.fsizes = [] self.fsizes = []
for (name, num), size in izip(FONT_SIZES, fsizes): for (name, num), size in izip(FONT_SIZES, fsizes):
self.fsizes.append((name, num, float(size))) self.fsizes.append((name, num, float(size)))

View File

@ -92,9 +92,9 @@ def add_input_output_options(parser, plumber):
parser.add_option_group(io) parser.add_option_group(io)
if output_options: if output_options:
title = plumber.output_fmt.upper() + ' ' + _('OPTIONS') title = _('OUTPUT OPTIONS')
oo = OptionGroup(parser, title, _('Options to control the processing' oo = OptionGroup(parser, title, _('Options to control the processing'
' of the output %s file')%plumber.input_fmt) ' of the output %s')%plumber.output_fmt)
add_options(oo.add_option, output_options) add_options(oo.add_option, output_options)
parser.add_option_group(oo) parser.add_option_group(oo)

View File

@ -9,6 +9,7 @@ from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles, output_profiles, \ from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format plugin_for_input_format, plugin_for_output_format
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.ptempfile import PersistentTemporaryDirectory
class OptionValues(object): class OptionValues(object):
pass pass
@ -289,6 +290,8 @@ OptionRecommendation(name='language',
''' '''
# Setup baseline option values # Setup baseline option values
self.setup_options() self.setup_options()
if self.opts.verbose:
self.log.filter_level = self.log.DEBUG
# Run any preprocess plugins # Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess from calibre.customize.ui import run_plugins_on_preprocess
@ -300,9 +303,11 @@ OptionRecommendation(name='language',
from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.base import OEBBook
accelerators = {} accelerators = {}
tdir = PersistentTemporaryDirectory('_plumber')
opfpath = self.input_plugin(open(self.input, 'rb'), self.opts, opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, self.log, self.input_fmt, self.log,
accelerators) accelerators, tdir)
html_preprocessor = HTMLPreProcessor() html_preprocessor = HTMLPreProcessor()
self.reader = OEBReader() self.reader = OEBReader()
self.oeb = OEBBook(self.log, html_preprocessor=html_preprocessor) self.oeb = OEBBook(self.log, html_preprocessor=html_preprocessor)
@ -316,15 +321,16 @@ OptionRecommendation(name='language',
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size fbase = self.opts.base_font_size
if fbase == 0: if fbase == 0:
fbase = self.opts.dest.fbase fbase = float(self.opts.dest.fbase)
fkey = self.opts.font_size_mapping fkey = self.opts.font_size_mapping
if fkey is None: if fkey is None:
fkey = self.opts.dest.fsizes fkey = self.opts.dest.fkey
else:
fkey = map(float, fkey.split(','))
flattener = CSSFlattener(fbase=fbase, fkey=fkey, flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=self.opts.line_height, lineh=self.opts.line_height,
untable=self.opts.linearize_tables) untable=self.opts.linearize_tables)
self.log.info('Flattening CSS...')
flattener(self.oeb, self.opts) flattener(self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@ -334,7 +340,7 @@ OptionRecommendation(name='language',
trimmer(self.oeb, self.opts) trimmer(self.oeb, self.opts)
self.log.info('Creating %s output...'%self.output_plugin.name) self.log.info('Creating %s output...'%self.output_plugin.name)
self.output_plugin(self.oeb, self.output, self.input_plugin, self.opts, self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts,
self.log) self.log)

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files Read data from .mobi files
''' '''
import struct, os, cStringIO, re, functools, datetime import struct, os, cStringIO, re, functools, datetime, textwrap
try: try:
from PIL import Image as PILImage from PIL import Image as PILImage
@ -162,7 +162,7 @@ class MobiReader(object):
self.log = log self.log = log
self.debug = debug self.debug = debug
self.embedded_mi = None self.embedded_mi = None
self.base_css_rules = ''' self.base_css_rules = textwrap.dedent('''
blockquote { margin: 0em 0em 0em 1.25em; text-align: justify } blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
p { margin: 0em; text-align: justify } p { margin: 0em; text-align: justify }
@ -174,7 +174,7 @@ class MobiReader(object):
.mbp_pagebreak { .mbp_pagebreak {
page-break-after: always; margin: 0; display: block page-break-after: always; margin: 0; display: block
} }
''' ''')
self.tag_css_rules = [] self.tag_css_rules = []
if hasattr(filename_or_stream, 'read'): if hasattr(filename_or_stream, 'read'):
@ -223,7 +223,7 @@ class MobiReader(object):
processed_records = self.extract_text() processed_records = self.extract_text()
if self.debug is not None: if self.debug is not None:
self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
self.add_anchors() self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec, self.processed_html = self.processed_html.decode(self.book_header.codec,
'ignore') 'ignore')
@ -265,7 +265,6 @@ class MobiReader(object):
pass pass
parse_cache[htmlfile] = root parse_cache[htmlfile] = root
self.htmlfile = htmlfile self.htmlfile = htmlfile
self.log.debug('Creating OPF...')
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root) opf = self.create_opf(htmlfile, guide, root)
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf' self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
@ -283,8 +282,7 @@ class MobiReader(object):
if self.book_header.exth is not None or self.embedded_mi is not None: if self.book_header.exth is not None or self.embedded_mi is not None:
if self.verbose: self.log.debug('Creating OPF...')
print 'Creating OPF...'
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root) opf = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)

View File

@ -658,9 +658,9 @@ class Manifest(object):
def _parse_css(self, data): def _parse_css(self, data):
data = self.oeb.decode(data) data = self.oeb.decode(data)
data = self.CSSPreProcessor(data) data = self.oeb.css_preprocessor(data)
data = XHTML_CSS_NAMESPACE + data data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING, parser = CSSParser(loglevel=logging.WARNING,
fetcher=self._fetch_css) fetcher=self._fetch_css)
data = parser.parseString(data, href=self.href) data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS data.namespaces['h'] = XHTML_NS

View File

@ -25,7 +25,7 @@ class OEBOutput(OutputFormatPlugin):
with CurrentDir(output_path): with CurrentDir(output_path):
results = oeb_book.to_opf2(page_map=True) results = oeb_book.to_opf2(page_map=True)
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
href, root = results.pop(key, None) href, root = results.pop(key, [None, None])
if root is not None: if root is not None:
raw = etree.tostring(root, pretty_print=True, raw = etree.tostring(root, pretty_print=True,
encoding='utf-8') encoding='utf-8')
@ -33,6 +33,21 @@ class OEBOutput(OutputFormatPlugin):
f.write(raw) f.write(raw)
for item in oeb_book.manifest: for item in oeb_book.manifest:
print item.href path = os.path.abspath(item.href)
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)
raw = item.data
if not isinstance(raw, basestring):
if hasattr(raw, 'cssText'):
raw = raw.cssText
else:
raw = etree.tostring(raw, encoding='utf-8',
pretty_print=opts.pretty_print)
raw = raw + '<?xml version="1.0" encoding="utf-8" ?>\n'
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
with open(path, 'wb') as f:
f.write(raw)

View File

@ -168,7 +168,7 @@ class OEBReader(object):
data. data.
''' '''
bad = [] bad = []
check = OEB_DOCS+OEB_STYLES check = OEB_DOCS.union(OEB_STYLES)
for item in list(self.oeb.manifest.values()): for item in list(self.oeb.manifest.values()):
if item.media_type in check: if item.media_type in check:
try: try: