HTML input: Use correct name for pdf line unwrapping option. Conversion preprocessor: Allow non pdf to html input to use the line unwrapping code.

This commit is contained in:
John Schember 2009-09-01 17:27:24 -04:00
parent 26a0327943
commit f08775dda7
2 changed files with 12 additions and 12 deletions

View File

@ -223,16 +223,7 @@ class HTMLPreProcessor(object):
elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html):
end_rules = []
if getattr(self.extra_opts, 'unwrap_factor', None):
length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
if length:
end_rules.append(
# Un wrap using punctuation
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
)
rules = self.PDFTOHTML + end_rules
rules = self.PDFTOHTML
else:
rules = []
@ -246,7 +237,16 @@ class HTMLPreProcessor(object):
(re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
)
for rule in self.PREPROCESS + pre_rules + rules:
end_rules = []
if getattr(self.extra_opts, 'unwrap_factor', None):
length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
if length:
end_rules.append(
# Un wrap using punctuation
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
)
for rule in self.PREPROCESS + pre_rules + rules + end_rules:
html = rule[0].sub(rule[1], html)
# Handle broken XHTML w/ SVG (ugh)

View File

@ -262,7 +262,7 @@ class HTMLInput(InputFormatPlugin):
)
),
OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
OptionRecommendation(name='unwrap_factor', recommended_value=0.5,
help=_('Average line length for line breaking if the HTML is from a '
'previous partial conversion of a PDF file.')),