mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to pluginize
This commit is contained in:
commit
9aae507c07
@ -290,6 +290,7 @@ from calibre.ebooks.comic.input import ComicInput
|
|||||||
from calibre.web.feeds.input import RecipeInput
|
from calibre.web.feeds.input import RecipeInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
|
from calibre.ebooks.mobi.output import MOBIOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
from calibre.ebooks.pdf.output import PDFOutput
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
from calibre.ebooks.pml.input import PMLInput
|
from calibre.ebooks.pml.input import PMLInput
|
||||||
@ -309,9 +310,9 @@ from calibre.devices.jetbook.driver import JETBOOK
|
|||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
||||||
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
||||||
PMLOutput]
|
PMLOutput, MOBIOutput]
|
||||||
plugins += [PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, \
|
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
|
||||||
JETBOOK]
|
EB600, JETBOOK]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -149,6 +149,18 @@ class InputFormatPlugin(Plugin):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def preprocess_html(self, html):
|
||||||
|
'''
|
||||||
|
This method is called by the conversion pipeline on all HTML before it
|
||||||
|
is parsed. It is meant to be used to do any required preprocessing on
|
||||||
|
the HTML, like removing hard line breaks, etc.
|
||||||
|
|
||||||
|
:param html: A unicode string
|
||||||
|
:return: A unicode string
|
||||||
|
'''
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
'''
|
'''
|
||||||
This method must be implemented in sub-classes. It must return
|
This method must be implemented in sub-classes. It must return
|
||||||
|
@ -126,9 +126,10 @@ def add_pipeline_options(parser, plumber):
|
|||||||
'STRUCTURE DETECTION' : (
|
'STRUCTURE DETECTION' : (
|
||||||
_('Control auto-detection of document structure.'),
|
_('Control auto-detection of document structure.'),
|
||||||
[
|
[
|
||||||
'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
|
'chapter', 'chapter_mark',
|
||||||
'prefer_metadata_cover', 'remove_first_image',
|
'prefer_metadata_cover', 'remove_first_image',
|
||||||
'insert_metadata', 'page_breaks_before',
|
'insert_metadata', 'page_breaks_before',
|
||||||
|
'preprocess_html',
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
|
||||||
|
@ -131,18 +131,6 @@ OptionRecommendation(name='linearize_tables',
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='dont_split_on_page_breaks',
|
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
|
||||||
help=_('Turn off splitting at page breaks. Normally, input '
|
|
||||||
'files are automatically split at every page break into '
|
|
||||||
'two files. This gives an output ebook that can be '
|
|
||||||
'parsed faster and with less resources. However, '
|
|
||||||
'splitting is slow and if your source file contains a '
|
|
||||||
'very large number of page breaks, you should turn off '
|
|
||||||
'splitting on page breaks.'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
|
|
||||||
OptionRecommendation(name='level1_toc',
|
OptionRecommendation(name='level1_toc',
|
||||||
recommended_value=None, level=OptionRecommendation.LOW,
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
help=_('XPath expression that specifies all tags that '
|
help=_('XPath expression that specifies all tags that '
|
||||||
@ -312,6 +300,14 @@ OptionRecommendation(name='insert_metadata',
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='preprocess_html',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Attempt to detect and correct hard line breaks and other '
|
||||||
|
'problems in the source file. This may make things worse, so use '
|
||||||
|
'with care.'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
|
||||||
OptionRecommendation(name='read_metadata_from_opf',
|
OptionRecommendation(name='read_metadata_from_opf',
|
||||||
recommended_value=None, level=OptionRecommendation.LOW,
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
@ -580,7 +576,8 @@ OptionRecommendation(name='list_recipes',
|
|||||||
self.log('Debug input called, aborting the rest of the pipeline.')
|
self.log('Debug input called, aborting the rest of the pipeline.')
|
||||||
return
|
return
|
||||||
if not hasattr(self.oeb, 'manifest'):
|
if not hasattr(self.oeb, 'manifest'):
|
||||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
|
||||||
|
self.input_plugin)
|
||||||
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
||||||
pr(0., _('Running transforms on ebook...'))
|
pr(0., _('Running transforms on ebook...'))
|
||||||
|
|
||||||
@ -619,20 +616,14 @@ OptionRecommendation(name='list_recipes',
|
|||||||
|
|
||||||
flattener = CSSFlattener(fbase=fbase, fkey=fkey,
|
flattener = CSSFlattener(fbase=fbase, fkey=fkey,
|
||||||
lineh=self.opts.line_height,
|
lineh=self.opts.line_height,
|
||||||
untable=self.opts.linearize_tables)
|
untable=self.output_plugin.file_type in ('mobi','lit'),
|
||||||
|
unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
|
||||||
flattener(self.oeb, self.opts)
|
flattener(self.oeb, self.opts)
|
||||||
|
|
||||||
if self.opts.linearize_tables:
|
if self.opts.linearize_tables and \
|
||||||
|
self.output_plugin.file_type not in ('mobi', 'lrf'):
|
||||||
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
||||||
LinearizeTables()(self.oeb, self.opts)
|
LinearizeTables()(self.oeb, self.opts)
|
||||||
pr(0.7)
|
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.split import Split
|
|
||||||
pbx = accelerators.get('pagebreaks', None)
|
|
||||||
split = Split(not self.opts.dont_split_on_page_breaks,
|
|
||||||
max_flow_size=self.opts.output_profile.flow_size,
|
|
||||||
page_breaks_xpath=pbx)
|
|
||||||
split(self.oeb, self.opts)
|
|
||||||
pr(0.9)
|
pr(0.9)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||||
@ -652,13 +643,14 @@ OptionRecommendation(name='list_recipes',
|
|||||||
self.opts, self.log)
|
self.opts, self.log)
|
||||||
self.ui_reporter(1.)
|
self.ui_reporter(1.)
|
||||||
|
|
||||||
def create_oebbook(log, path_or_stream, opts, reader=None):
|
def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None):
|
||||||
'''
|
'''
|
||||||
Create an OEBBook.
|
Create an OEBBook.
|
||||||
'''
|
'''
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
html_preprocessor = HTMLPreProcessor()
|
html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
|
||||||
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
opts.preprocess_html)
|
||||||
|
oeb = OEBBook(log, html_preprocessor,
|
||||||
pretty_print=opts.pretty_print)
|
pretty_print=opts.pretty_print)
|
||||||
# Read OEB Book into OEBBook
|
# Read OEB Book into OEBBook
|
||||||
log('Parsing all content...')
|
log('Parsing all content...')
|
||||||
|
@ -26,16 +26,16 @@ def sanitize_head(match):
|
|||||||
def chap_head(match):
|
def chap_head(match):
|
||||||
chap = match.group('chap')
|
chap = match.group('chap')
|
||||||
title = match.group('title')
|
title = match.group('title')
|
||||||
if not title:
|
if not title:
|
||||||
return '<h1>'+chap+'</h1><br/>\n'
|
return '<h1>'+chap+'</h1><br/>\n'
|
||||||
else:
|
else:
|
||||||
return '<h1>'+chap+'<br/>\n'+title+'</h1><br/>\n'
|
return '<h1>'+chap+'<br/>\n'+title+'</h1><br/>\n'
|
||||||
|
|
||||||
def wrap_lines(match):
|
def wrap_lines(match):
|
||||||
ital = match.group('ital')
|
ital = match.group('ital')
|
||||||
if not ital:
|
if not ital:
|
||||||
return ' '
|
return ' '
|
||||||
else:
|
else:
|
||||||
return ital+' '
|
return ital+' '
|
||||||
|
|
||||||
def line_length(raw, percent):
|
def line_length(raw, percent):
|
||||||
@ -106,7 +106,7 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
|
(re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
|
||||||
(re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
|
(re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
|
||||||
(re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
|
(re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
|
||||||
|
|
||||||
# Remove page links
|
# Remove page links
|
||||||
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
@ -151,6 +151,9 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
||||||
]
|
]
|
||||||
|
def __init__(self, input_plugin_preprocess, plugin_preprocess):
|
||||||
|
self.input_plugin_preprocess = input_plugin_preprocess
|
||||||
|
self.plugin_preprocess = plugin_preprocess
|
||||||
|
|
||||||
def is_baen(self, src):
|
def is_baen(self, src):
|
||||||
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
||||||
@ -175,7 +178,7 @@ class HTMLPreProcessor(object):
|
|||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .4), re.UNICODE), wrap_lines),
|
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .4), re.UNICODE), wrap_lines),
|
||||||
]
|
]
|
||||||
|
|
||||||
rules = self.PDFTOHTML + line_length_rules
|
rules = self.PDFTOHTML + line_length_rules
|
||||||
else:
|
else:
|
||||||
rules = []
|
rules = []
|
||||||
@ -192,5 +195,8 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
html = XMLDECL_RE.sub('', html)
|
html = XMLDECL_RE.sub('', html)
|
||||||
|
|
||||||
|
if self.plugin_preprocess:
|
||||||
|
html = self.input_plugin_preprocess(html)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
@ -28,7 +28,21 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
OptionRecommendation(name='extract_to',
|
OptionRecommendation(name='extract_to',
|
||||||
help=_('Extract the contents of the generated EPUB file to the '
|
help=_('Extract the contents of the generated EPUB file to the '
|
||||||
'specified directory. The contents of the directory are first '
|
'specified directory. The contents of the directory are first '
|
||||||
'deleted, so be careful.'))
|
'deleted, so be careful.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='dont_split_on_page_breaks',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Turn off splitting at page breaks. Normally, input '
|
||||||
|
'files are automatically split at every page break into '
|
||||||
|
'two files. This gives an output ebook that can be '
|
||||||
|
'parsed faster and with less resources. However, '
|
||||||
|
'splitting is slow and if your source file contains a '
|
||||||
|
'very large number of page breaks, you should turn off '
|
||||||
|
'splitting on page breaks.'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
@ -88,6 +102,13 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.transforms.split import Split
|
||||||
|
split = Split(not self.opts.dont_split_on_page_breaks,
|
||||||
|
max_flow_size=self.opts.output_profile.flow_size
|
||||||
|
)
|
||||||
|
split(self.oeb, self.opts)
|
||||||
|
|
||||||
|
|
||||||
self.workaround_ade_quirks()
|
self.workaround_ade_quirks()
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
||||||
|
@ -288,7 +288,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
return opfpath
|
return opfpath
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
oeb = create_oebbook(log, opfpath, opts)
|
oeb = create_oebbook(log, opfpath, opts, self)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.package import Package
|
from calibre.ebooks.oeb.transforms.package import Package
|
||||||
Package(os.getcwdu())(oeb, opts)
|
Package(os.getcwdu())(oeb, opts)
|
||||||
|
@ -19,6 +19,6 @@ class LITInput(InputFormatPlugin):
|
|||||||
accelerators):
|
accelerators):
|
||||||
from calibre.ebooks.lit.reader import LitReader
|
from calibre.ebooks.lit.reader import LitReader
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
return create_oebbook(log, stream, options, reader=LitReader)
|
return create_oebbook(log, stream, options, self, reader=LitReader)
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,19 +80,6 @@ class MobiMLizer(object):
|
|||||||
def __init__(self, ignore_tables=False):
|
def __init__(self, ignore_tables=False):
|
||||||
self.ignore_tables = ignore_tables
|
self.ignore_tables = ignore_tables
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def config(cls, cfg):
|
|
||||||
group = cfg.add_group('mobiml', _('Mobipocket markup options.'))
|
|
||||||
group('ignore_tables', ['--ignore-tables'], default=False,
|
|
||||||
help=_('Render HTML tables as blocks of text instead of actual '
|
|
||||||
'tables. This is neccessary if the HTML contains very '
|
|
||||||
'large or complex tables.'))
|
|
||||||
return cfg
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def generate(cls, opts):
|
|
||||||
return cls(ignore_tables=opts.ignore_tables)
|
|
||||||
|
|
||||||
def __call__(self, oeb, context):
|
def __call__(self, oeb, context):
|
||||||
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
|
51
src/calibre/ebooks/mobi/output.py
Normal file
51
src/calibre/ebooks/mobi/output.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
|
|
||||||
|
class MOBIOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'MOBI Output'
|
||||||
|
author = 'Marshall T. Vandegrift'
|
||||||
|
file_type = 'mobi'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='rescale_images', recommended_value=False,
|
||||||
|
help=_('Modify images to meet Palm device size limitations.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='prefer_author_sort',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('When present, use author sort field as author.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
||||||
|
help=_('Title for any generated in-line table of contents.')
|
||||||
|
),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, MobiWriter
|
||||||
|
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
||||||
|
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||||
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||||
|
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||||
|
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||||
|
tocadder = HTMLTOCAdder(title=opts.toc_title)
|
||||||
|
tocadder(oeb, opts)
|
||||||
|
mangler = CaseMangler()
|
||||||
|
mangler(oeb, opts)
|
||||||
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer(oeb, opts)
|
||||||
|
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||||
|
mobimlizer(oeb, opts)
|
||||||
|
writer = MobiWriter(imagemax=imagemax,
|
||||||
|
prefer_author_sort=opts.prefer_author_sort)
|
||||||
|
writer(oeb, output_path)
|
||||||
|
|
@ -6,8 +6,6 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
from struct import pack
|
from struct import pack
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
@ -16,24 +14,14 @@ import re
|
|||||||
from itertools import izip, count
|
from itertools import izip, count
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
import logging
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
||||||
OEB_RASTER_IMAGES
|
OEB_RASTER_IMAGES
|
||||||
from calibre.ebooks.oeb.base import namespace, prefixname
|
from calibre.ebooks.oeb.base import namespace, prefixname
|
||||||
from calibre.ebooks.oeb.base import urlnormalize
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
|
||||||
from calibre.ebooks.oeb.profile import Context
|
|
||||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
|
||||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
|
||||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
|
||||||
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
|
||||||
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
|
||||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.ebooks.mobi.mobiml import MBP_NS, MobiMLizer
|
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||||
from calibre.customize.ui import run_plugins_on_postprocess
|
|
||||||
from calibre.utils.config import Config, StringConfig
|
|
||||||
|
|
||||||
# TODO:
|
# TODO:
|
||||||
# - Allow override CSS (?)
|
# - Allow override CSS (?)
|
||||||
@ -293,58 +281,22 @@ class Serializer(object):
|
|||||||
buffer.write('%010d' % ioff)
|
buffer.write('%010d' % ioff)
|
||||||
|
|
||||||
|
|
||||||
class MobiFlattener(object):
|
|
||||||
def config(self, cfg):
|
|
||||||
return cfg
|
|
||||||
|
|
||||||
def generate(self, opts):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __call__(self, oeb, context):
|
|
||||||
fbase = context.dest.fbase
|
|
||||||
fkey = context.dest.fnums.values()
|
|
||||||
flattener = CSSFlattener(
|
|
||||||
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
|
|
||||||
return flattener(oeb, context)
|
|
||||||
|
|
||||||
|
|
||||||
class MobiWriter(object):
|
class MobiWriter(object):
|
||||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
DEFAULT_PROFILE = 'CybookG3'
|
def __init__(self, compression=PALMDOC, imagemax=None,
|
||||||
|
|
||||||
TRANSFORMS = [HTMLTOCAdder, CaseMangler, MobiFlattener(), SVGRasterizer,
|
|
||||||
ManifestTrimmer, MobiMLizer]
|
|
||||||
|
|
||||||
def __init__(self, compression=None, imagemax=None,
|
|
||||||
prefer_author_sort=False):
|
prefer_author_sort=False):
|
||||||
self._compression = compression or UNCOMPRESSED
|
self._compression = compression or UNCOMPRESSED
|
||||||
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
||||||
self._prefer_author_sort = prefer_author_sort
|
self._prefer_author_sort = prefer_author_sort
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def config(cls, cfg):
|
|
||||||
"""Add any book-writing options to the :class:`Config` object
|
|
||||||
:param:`cfg`.
|
|
||||||
"""
|
|
||||||
mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
|
|
||||||
mobi('compress', ['--compress'], default=False,
|
|
||||||
help=_('Compress file text using PalmDOC compression. '
|
|
||||||
'Results in smaller files, but takes a long time to run.'))
|
|
||||||
mobi('rescale_images', ['--rescale-images'], default=False,
|
|
||||||
help=_('Modify images to meet Palm device size limitations.'))
|
|
||||||
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
|
|
||||||
help=_('When present, use the author sorting information for '
|
|
||||||
'generating the Mobipocket author metadata.'))
|
|
||||||
return cfg
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate(cls, opts):
|
def generate(cls, opts):
|
||||||
"""Generate a Writer instance from command-line options."""
|
"""Generate a Writer instance from command-line options."""
|
||||||
compression = PALMDOC if opts.compress else UNCOMPRESSED
|
|
||||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||||
prefer_author_sort = opts.prefer_author_sort
|
prefer_author_sort = opts.prefer_author_sort
|
||||||
return cls(compression=compression, imagemax=imagemax,
|
return cls(compression=PALMDOC, imagemax=imagemax,
|
||||||
prefer_author_sort=prefer_author_sort)
|
prefer_author_sort=prefer_author_sort)
|
||||||
|
|
||||||
def __call__(self, oeb, path):
|
def __call__(self, oeb, path):
|
||||||
@ -577,88 +529,4 @@ class MobiWriter(object):
|
|||||||
self._write(record)
|
self._write(record)
|
||||||
|
|
||||||
|
|
||||||
def config(defaults=None):
|
|
||||||
desc = _('Options to control the conversion to MOBI')
|
|
||||||
_profiles = list(sorted(Context.PROFILES.keys()))
|
|
||||||
if defaults is None:
|
|
||||||
c = Config('mobi', desc)
|
|
||||||
else:
|
|
||||||
c = StringConfig(defaults, desc)
|
|
||||||
|
|
||||||
profiles = c.add_group('profiles', _('Device renderer profiles. '
|
|
||||||
'Affects conversion of font sizes, image rescaling and rasterization '
|
|
||||||
'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
|
|
||||||
profiles('source_profile', ['--source-profile'],
|
|
||||||
default='Browser', choices=_profiles,
|
|
||||||
help=_("Source renderer profile. Default is %default."))
|
|
||||||
profiles('dest_profile', ['--dest-profile'],
|
|
||||||
default='CybookG3', choices=_profiles,
|
|
||||||
help=_("Destination renderer profile. Default is %default."))
|
|
||||||
c.add_opt('encoding', ['--encoding'], default=None,
|
|
||||||
help=_('Character encoding for HTML files. Default is to auto detect.'))
|
|
||||||
return c
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
c = config()
|
|
||||||
parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
|
|
||||||
parser.add_option(
|
|
||||||
'-o', '--output', default=None,
|
|
||||||
help=_('Output file. Default is derived from input filename.'))
|
|
||||||
parser.add_option(
|
|
||||||
'-v', '--verbose', default=0, action='count',
|
|
||||||
help=_('Useful for debugging.'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def oeb2mobi(opts, inpath):
|
|
||||||
logger = Logger(logging.getLogger('oeb2mobi'))
|
|
||||||
logger.setup_cli_handler(opts.verbose)
|
|
||||||
outpath = opts.output
|
|
||||||
if outpath is None:
|
|
||||||
outpath = os.path.basename(inpath)
|
|
||||||
outpath = os.path.splitext(outpath)[0] + '.mobi'
|
|
||||||
source = opts.source_profile
|
|
||||||
if source not in Context.PROFILES:
|
|
||||||
logger.error(_('Unknown source profile %r') % source)
|
|
||||||
return 1
|
|
||||||
dest = opts.dest_profile
|
|
||||||
if dest not in Context.PROFILES:
|
|
||||||
logger.error(_('Unknown destination profile %r') % dest)
|
|
||||||
return 1
|
|
||||||
compression = PALMDOC if opts.compress else UNCOMPRESSED
|
|
||||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
|
||||||
context = Context(source, dest)
|
|
||||||
oeb = OEBBook(inpath, logger=logger, encoding=opts.encoding)
|
|
||||||
tocadder = HTMLTOCAdder(title=opts.toc_title)
|
|
||||||
tocadder.transform(oeb, context)
|
|
||||||
mangler = CaseMangler()
|
|
||||||
mangler.transform(oeb, context)
|
|
||||||
fbase = context.dest.fbase
|
|
||||||
fkey = context.dest.fnums.values()
|
|
||||||
flattener = CSSFlattener(
|
|
||||||
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
|
|
||||||
flattener.transform(oeb, context)
|
|
||||||
rasterizer = SVGRasterizer()
|
|
||||||
rasterizer.transform(oeb, context)
|
|
||||||
trimmer = ManifestTrimmer()
|
|
||||||
trimmer.transform(oeb, context)
|
|
||||||
mobimlizer = MobiMLizer(ignore_tables=opts.ignore_tables)
|
|
||||||
mobimlizer.transform(oeb, context)
|
|
||||||
writer = MobiWriter(compression=compression, imagemax=imagemax,
|
|
||||||
prefer_author_sort=opts.prefer_author_sort)
|
|
||||||
writer.dump(oeb, outpath)
|
|
||||||
run_plugins_on_postprocess(outpath, 'mobi')
|
|
||||||
logger.info(_('Output written to ') + outpath)
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(argv[1:])
|
|
||||||
if len(args) != 1:
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
inpath = args[0]
|
|
||||||
retval = oeb2mobi(opts, inpath)
|
|
||||||
return retval
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
||||||
|
@ -22,8 +22,7 @@ from cssutils import CSSParser
|
|||||||
from calibre.translations.dynamic import translate
|
from calibre.translations.dynamic import translate
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, \
|
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
CSSPreProcessor
|
|
||||||
|
|
||||||
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||||
@ -1506,7 +1505,7 @@ class OEBBook(object):
|
|||||||
COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
|
COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
|
||||||
|
|
||||||
def __init__(self, logger,
|
def __init__(self, logger,
|
||||||
html_preprocessor=HTMLPreProcessor(),
|
html_preprocessor,
|
||||||
css_preprocessor=CSSPreProcessor(),
|
css_preprocessor=CSSPreProcessor(),
|
||||||
encoding='utf-8', pretty_print=False):
|
encoding='utf-8', pretty_print=False):
|
||||||
"""Create empty book. Arguments:
|
"""Create empty book. Arguments:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user