mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
HTML input: Use correct name for pdf line unwrapping option. Conversion preprocessor: Allow non pdf to html input to use the line unwrapping code.
This commit is contained in:
parent
26a0327943
commit
f08775dda7
@ -223,16 +223,7 @@ class HTMLPreProcessor(object):
|
|||||||
elif self.is_book_designer(html):
|
elif self.is_book_designer(html):
|
||||||
rules = self.BOOK_DESIGNER
|
rules = self.BOOK_DESIGNER
|
||||||
elif self.is_pdftohtml(html):
|
elif self.is_pdftohtml(html):
|
||||||
end_rules = []
|
rules = self.PDFTOHTML
|
||||||
if getattr(self.extra_opts, 'unwrap_factor', None):
|
|
||||||
length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
|
|
||||||
if length:
|
|
||||||
end_rules.append(
|
|
||||||
# Un wrap using punctuation
|
|
||||||
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
|
|
||||||
)
|
|
||||||
|
|
||||||
rules = self.PDFTOHTML + end_rules
|
|
||||||
else:
|
else:
|
||||||
rules = []
|
rules = []
|
||||||
|
|
||||||
@ -246,7 +237,16 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
|
(re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
|
||||||
)
|
)
|
||||||
|
|
||||||
for rule in self.PREPROCESS + pre_rules + rules:
|
end_rules = []
|
||||||
|
if getattr(self.extra_opts, 'unwrap_factor', None):
|
||||||
|
length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
|
||||||
|
if length:
|
||||||
|
end_rules.append(
|
||||||
|
# Un wrap using punctuation
|
||||||
|
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
|
||||||
|
)
|
||||||
|
|
||||||
|
for rule in self.PREPROCESS + pre_rules + rules + end_rules:
|
||||||
html = rule[0].sub(rule[1], html)
|
html = rule[0].sub(rule[1], html)
|
||||||
|
|
||||||
# Handle broken XHTML w/ SVG (ugh)
|
# Handle broken XHTML w/ SVG (ugh)
|
||||||
|
@ -262,7 +262,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
|
OptionRecommendation(name='unwrap_factor', recommended_value=0.5,
|
||||||
help=_('Average line length for line breaking if the HTML is from a '
|
help=_('Average line length for line breaking if the HTML is from a '
|
||||||
'previous partial conversion of a PDF file.')),
|
'previous partial conversion of a PDF file.')),
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user