Fix #8904 (Lit in Pre tags, extra nbsp paragraph inserted between paragraphs.)

2025-07-09 03:04:10 -04:00 · 2011-02-10 08:55:34 -07:00 · 2011-02-10 08:55:34 -07:00 · 8da5c59f02
commit 8da5c59f02
parent 7a6634d405 740e855547
2 changed files with 4 additions and 5 deletions
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -342,11 +342,9 @@ class HeuristicProcessor(object):
        return content
    def txt_process(self, match):
-        from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+        from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs_single_line
        separate_paragraphs_single_line
        content = match.group('text')
        content = separate_paragraphs_single_line(content)
        content = preserve_spaces(content)
        content = convert_basic(content, epub_split_size_kb=0)
        return content
@ -356,6 +354,8 @@ class HeuristicProcessor(object):
            self.log.debug("Running Text Processing")
            outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
            html = outerhtml.sub(self.txt_process, html)
            from calibre.ebooks.conversion.preprocess import convert_entities
            html = re.sub(r'&(\S+?);', convert_entities, html)
        else:
            # Add markup naively
            # TODO - find out if there are cases where there are more than one <pre> tag or
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -37,13 +37,12 @@ class LITInput(InputFormatPlugin):
                body = body[0]
                if len(body) == 1 and body[0].tag == XHTML('pre'):
                    pre = body[0]
-                    from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+                    from calibre.ebooks.txt.processor import convert_basic, \
                        separate_paragraphs_single_line
                    from calibre.ebooks.chardet import xml_to_unicode
                    from lxml import etree
                    import copy
                    html = separate_paragraphs_single_line(pre.text)
                    html = preserve_spaces(html)
                    html = convert_basic(html).replace('<html>',
                            '<html xmlns="%s">'%XHTML_NS)
                    html = xml_to_unicode(html, strip_encoding_pats=True,