mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8904 (Lit in Pre tags, extra nbsp paragraph inserted between paragraphs.)
This commit is contained in:
commit
8da5c59f02
@ -342,11 +342,9 @@ class HeuristicProcessor(object):
|
||||
return content
|
||||
|
||||
def txt_process(self, match):
|
||||
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
|
||||
separate_paragraphs_single_line
|
||||
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs_single_line
|
||||
content = match.group('text')
|
||||
content = separate_paragraphs_single_line(content)
|
||||
content = preserve_spaces(content)
|
||||
content = convert_basic(content, epub_split_size_kb=0)
|
||||
return content
|
||||
|
||||
@ -356,6 +354,8 @@ class HeuristicProcessor(object):
|
||||
self.log.debug("Running Text Processing")
|
||||
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
|
||||
html = outerhtml.sub(self.txt_process, html)
|
||||
from calibre.ebooks.conversion.preprocess import convert_entities
|
||||
html = re.sub(r'&(\S+?);', convert_entities, html)
|
||||
else:
|
||||
# Add markup naively
|
||||
# TODO - find out if there are cases where there are more than one <pre> tag or
|
||||
|
@ -37,13 +37,12 @@ class LITInput(InputFormatPlugin):
|
||||
body = body[0]
|
||||
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
||||
pre = body[0]
|
||||
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
|
||||
from calibre.ebooks.txt.processor import convert_basic, \
|
||||
separate_paragraphs_single_line
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from lxml import etree
|
||||
import copy
|
||||
html = separate_paragraphs_single_line(pre.text)
|
||||
html = preserve_spaces(html)
|
||||
html = convert_basic(html).replace('<html>',
|
||||
'<html xmlns="%s">'%XHTML_NS)
|
||||
html = xml_to_unicode(html, strip_encoding_pats=True,
|
||||
|
Loading…
x
Reference in New Issue
Block a user