mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Add option to control line length when preprocessing PDF input
This commit is contained in:
parent
22ec9df720
commit
032c2b0fdc
@ -694,7 +694,7 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
|
||||
opts.preprocess_html)
|
||||
opts.preprocess_html, getattr(opts, 'pdf_line_length', 0.5))
|
||||
oeb = OEBBook(log, html_preprocessor,
|
||||
pretty_print=opts.pretty_print, input_encoding=encoding)
|
||||
if not populate:
|
||||
|
@ -159,9 +159,11 @@ class HTMLPreProcessor(object):
|
||||
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
||||
]
|
||||
def __init__(self, input_plugin_preprocess, plugin_preprocess):
|
||||
def __init__(self, input_plugin_preprocess, plugin_preprocess,
|
||||
pdf_line_length):
|
||||
self.input_plugin_preprocess = input_plugin_preprocess
|
||||
self.plugin_preprocess = plugin_preprocess
|
||||
self.pdf_line_length = pdf_line_length
|
||||
|
||||
def is_baen(self, src):
|
||||
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
||||
@ -182,7 +184,7 @@ class HTMLPreProcessor(object):
|
||||
elif self.is_book_designer(html):
|
||||
rules = self.BOOK_DESIGNER
|
||||
elif self.is_pdftohtml(html):
|
||||
length = line_length(html, .3)
|
||||
length = line_length(html, self.pdf_line_length)
|
||||
line_length_rules = []
|
||||
if length:
|
||||
line_length_rules = [
|
||||
|
@ -261,6 +261,11 @@ class HTMLInput(InputFormatPlugin):
|
||||
'nasty side effects in the rest of of the conversion pipeline.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
|
||||
help=_('Average line length for line breaking if the HTML is from a '
|
||||
'previous partial conversion of a PDF file.')),
|
||||
|
||||
])
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
|
@ -20,6 +20,8 @@ class PDFInput(InputFormatPlugin):
|
||||
options = set([
|
||||
OptionRecommendation(name='no_images', recommended_value=False,
|
||||
help=_('Do not extract images from the document')),
|
||||
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
|
||||
help=_('Average line length for line breaking')),
|
||||
])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
|
@ -140,6 +140,11 @@ sudo calibre_postinstall
|
||||
</form>
|
||||
</div>
|
||||
<hr/>
|
||||
<h3>Note</h3>
|
||||
<p>
|
||||
If your kernel is compiled with CONFIG_SYSFS_DEPRECATED device detection may not work.
|
||||
</p>
|
||||
<hr/>
|
||||
<h3>Dependencies</h3>
|
||||
${app} has the following dependencies (the listed version is the minimum version)
|
||||
<br/><br/>
|
||||
|
Loading…
x
Reference in New Issue
Block a user