mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Add option to control line length when preprocessing PDF input
This commit is contained in:
parent
22ec9df720
commit
032c2b0fdc
@ -694,7 +694,7 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
|
|||||||
'''
|
'''
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
|
html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
|
||||||
opts.preprocess_html)
|
opts.preprocess_html, getattr(opts, 'pdf_line_length', 0.5))
|
||||||
oeb = OEBBook(log, html_preprocessor,
|
oeb = OEBBook(log, html_preprocessor,
|
||||||
pretty_print=opts.pretty_print, input_encoding=encoding)
|
pretty_print=opts.pretty_print, input_encoding=encoding)
|
||||||
if not populate:
|
if not populate:
|
||||||
|
@ -159,9 +159,11 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
||||||
]
|
]
|
||||||
def __init__(self, input_plugin_preprocess, plugin_preprocess):
|
def __init__(self, input_plugin_preprocess, plugin_preprocess,
|
||||||
|
pdf_line_length):
|
||||||
self.input_plugin_preprocess = input_plugin_preprocess
|
self.input_plugin_preprocess = input_plugin_preprocess
|
||||||
self.plugin_preprocess = plugin_preprocess
|
self.plugin_preprocess = plugin_preprocess
|
||||||
|
self.pdf_line_length = pdf_line_length
|
||||||
|
|
||||||
def is_baen(self, src):
|
def is_baen(self, src):
|
||||||
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
||||||
@ -182,7 +184,7 @@ class HTMLPreProcessor(object):
|
|||||||
elif self.is_book_designer(html):
|
elif self.is_book_designer(html):
|
||||||
rules = self.BOOK_DESIGNER
|
rules = self.BOOK_DESIGNER
|
||||||
elif self.is_pdftohtml(html):
|
elif self.is_pdftohtml(html):
|
||||||
length = line_length(html, .3)
|
length = line_length(html, self.pdf_line_length)
|
||||||
line_length_rules = []
|
line_length_rules = []
|
||||||
if length:
|
if length:
|
||||||
line_length_rules = [
|
line_length_rules = [
|
||||||
|
@ -261,6 +261,11 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
'nasty side effects in the rest of of the conversion pipeline.'
|
'nasty side effects in the rest of of the conversion pipeline.'
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
|
||||||
|
help=_('Average line length for line breaking if the HTML is from a '
|
||||||
|
'previous partial conversion of a PDF file.')),
|
||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log,
|
def convert(self, stream, opts, file_ext, log,
|
||||||
|
@ -20,6 +20,8 @@ class PDFInput(InputFormatPlugin):
|
|||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='no_images', recommended_value=False,
|
OptionRecommendation(name='no_images', recommended_value=False,
|
||||||
help=_('Do not extract images from the document')),
|
help=_('Do not extract images from the document')),
|
||||||
|
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
|
||||||
|
help=_('Average line length for line breaking')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
@ -140,6 +140,11 @@ sudo calibre_postinstall
|
|||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
<hr/>
|
<hr/>
|
||||||
|
<h3>Note</h3>
|
||||||
|
<p>
|
||||||
|
If your kernel is compiled with CONFIG_SYSFS_DEPRECATED device detection may not work.
|
||||||
|
</p>
|
||||||
|
<hr/>
|
||||||
<h3>Dependencies</h3>
|
<h3>Dependencies</h3>
|
||||||
${app} has the following dependencies (the listed version is the minimum version)
|
${app} has the following dependencies (the listed version is the minimum version)
|
||||||
<br/><br/>
|
<br/><br/>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user