Fix #8904 (Lit in Pre tags, extra nbsp paragraph inserted between paragraphs.)

This commit is contained in:
Kovid Goyal 2011-02-10 08:55:34 -07:00
commit 8da5c59f02
2 changed files with 4 additions and 5 deletions

View File

@ -342,11 +342,9 @@ class HeuristicProcessor(object):
return content return content
def txt_process(self, match): def txt_process(self, match):
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \ from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs_single_line
separate_paragraphs_single_line
content = match.group('text') content = match.group('text')
content = separate_paragraphs_single_line(content) content = separate_paragraphs_single_line(content)
content = preserve_spaces(content)
content = convert_basic(content, epub_split_size_kb=0) content = convert_basic(content, epub_split_size_kb=0)
return content return content
@ -356,6 +354,8 @@ class HeuristicProcessor(object):
self.log.debug("Running Text Processing") self.log.debug("Running Text Processing")
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL) outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
html = outerhtml.sub(self.txt_process, html) html = outerhtml.sub(self.txt_process, html)
from calibre.ebooks.conversion.preprocess import convert_entities
html = re.sub(r'&(\S+?);', convert_entities, html)
else: else:
# Add markup naively # Add markup naively
# TODO - find out if there are cases where there are more than one <pre> tag or # TODO - find out if there are cases where there are more than one <pre> tag or

View File

@ -37,13 +37,12 @@ class LITInput(InputFormatPlugin):
body = body[0] body = body[0]
if len(body) == 1 and body[0].tag == XHTML('pre'): if len(body) == 1 and body[0].tag == XHTML('pre'):
pre = body[0] pre = body[0]
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \ from calibre.ebooks.txt.processor import convert_basic, \
separate_paragraphs_single_line separate_paragraphs_single_line
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from lxml import etree from lxml import etree
import copy import copy
html = separate_paragraphs_single_line(pre.text) html = separate_paragraphs_single_line(pre.text)
html = preserve_spaces(html)
html = convert_basic(html).replace('<html>', html = convert_basic(html).replace('<html>',
'<html xmlns="%s">'%XHTML_NS) '<html xmlns="%s">'%XHTML_NS)
html = xml_to_unicode(html, strip_encoding_pats=True, html = xml_to_unicode(html, strip_encoding_pats=True,