HTML input: Use correct name for pdf line unwrapping option. Conversion preprocessor: Allow non pdf to html input to use the line unwrapping code.

2025-07-09 03:04:10 -04:00 · 2009-09-01 17:27:24 -04:00 · 2009-09-01 17:27:24 -04:00 · f08775dda7
commit f08775dda7
parent 26a0327943
2 changed files with 12 additions and 12 deletions
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -223,16 +223,7 @@ class HTMLPreProcessor(object):
        elif self.is_book_designer(html):
            rules = self.BOOK_DESIGNER
        elif self.is_pdftohtml(html):
-            end_rules = []
-            if getattr(self.extra_opts, 'unwrap_factor', None):
-                length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
-                if length:
-                    end_rules.append(
-                        # Un wrap using punctuation
-                        (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
-                    )
-
-            rules = self.PDFTOHTML + end_rules
+            rules = self.PDFTOHTML
        else:
            rules = []

@ -246,7 +237,16 @@ class HTMLPreProcessor(object):
                (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
            )

-        for rule in self.PREPROCESS + pre_rules + rules:
+            end_rules = []
+            if getattr(self.extra_opts, 'unwrap_factor', None):
+                length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
+                if length:
+                    end_rules.append(
+                        # Un wrap using punctuation
+                        (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    )
+
+        for rule in self.PREPROCESS + pre_rules + rules + end_rules:
            html = rule[0].sub(rule[1], html)

        # Handle broken XHTML w/ SVG (ugh)
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -262,7 +262,7 @@ class HTMLInput(InputFormatPlugin):
                )
        ),

-        OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
+        OptionRecommendation(name='unwrap_factor', recommended_value=0.5,
            help=_('Average line length for line breaking if the HTML is from a '
                'previous partial conversion of a PDF file.')),