Add option to control line length when preprocessing PDF input

This commit is contained in:
Kovid Goyal 2009-06-23 07:42:30 -07:00
parent 22ec9df720
commit 032c2b0fdc
5 changed files with 17 additions and 3 deletions

View File

@ -694,7 +694,7 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
'''
from calibre.ebooks.oeb.base import OEBBook
html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
opts.preprocess_html)
opts.preprocess_html, getattr(opts, 'pdf_line_length', 0.5))
oeb = OEBBook(log, html_preprocessor,
pretty_print=opts.pretty_print, input_encoding=encoding)
if not populate:

View File

@ -159,9 +159,11 @@ class HTMLPreProcessor(object):
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
]
def __init__(self, input_plugin_preprocess, plugin_preprocess):
def __init__(self, input_plugin_preprocess, plugin_preprocess,
pdf_line_length):
self.input_plugin_preprocess = input_plugin_preprocess
self.plugin_preprocess = plugin_preprocess
self.pdf_line_length = pdf_line_length
def is_baen(self, src):
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@ -182,7 +184,7 @@ class HTMLPreProcessor(object):
elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html):
length = line_length(html, .3)
length = line_length(html, self.pdf_line_length)
line_length_rules = []
if length:
line_length_rules = [

View File

@ -261,6 +261,11 @@ class HTMLInput(InputFormatPlugin):
'nasty side effects in the rest of of the conversion pipeline.'
)
),
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
help=_('Average line length for line breaking if the HTML is from a '
'previous partial conversion of a PDF file.')),
])
def convert(self, stream, opts, file_ext, log,

View File

@ -20,6 +20,8 @@ class PDFInput(InputFormatPlugin):
options = set([
OptionRecommendation(name='no_images', recommended_value=False,
help=_('Do not extract images from the document')),
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
help=_('Average line length for line breaking')),
])
def convert(self, stream, options, file_ext, log,

View File

@ -140,6 +140,11 @@ sudo calibre_postinstall
</form>
</div>
<hr/>
<h3>Note</h3>
<p>
If your kernel is compiled with CONFIG_SYSFS_DEPRECATED device detection may not work.
</p>
<hr/>
<h3>Dependencies</h3>
${app} has the following dependencies (the listed version is the minimum version)
<br/><br/>