diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index cb2564ec0a..029b9752e1 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -223,16 +223,7 @@ class HTMLPreProcessor(object): elif self.is_book_designer(html): rules = self.BOOK_DESIGNER elif self.is_pdftohtml(html): - end_rules = [] - if getattr(self.extra_opts, 'unwrap_factor', None): - length = line_length(html, getattr(self.extra_opts, 'unwrap_factor')) - if length: - end_rules.append( - # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P)?\s*()\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), - ) - - rules = self.PDFTOHTML + end_rules + rules = self.PDFTOHTML else: rules = [] @@ -246,7 +237,16 @@ class HTMLPreProcessor(object): (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '') ) - for rule in self.PREPROCESS + pre_rules + rules: + end_rules = [] + if getattr(self.extra_opts, 'unwrap_factor', None): + length = line_length(html, getattr(self.extra_opts, 'unwrap_factor')) + if length: + end_rules.append( + # Un wrap using punctuation + (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P)?\s*()\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), + ) + + for rule in self.PREPROCESS + pre_rules + rules + end_rules: html = rule[0].sub(rule[1], html) # Handle broken XHTML w/ SVG (ugh) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 92c2df9690..7b7bfdf3aa 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -262,7 +262,7 @@ class HTMLInput(InputFormatPlugin): ) ), - OptionRecommendation(name='pdf_line_length', recommended_value=0.5, + OptionRecommendation(name='unwrap_factor', recommended_value=0.5, help=_('Average line length for line breaking if the HTML is from a ' 'previous partial conversion of a PDF file.')),