mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8904 (Lit in Pre tags, extra nbsp paragraph inserted between paragraphs.)
This commit is contained in:
commit
8da5c59f02
@ -342,11 +342,9 @@ class HeuristicProcessor(object):
|
|||||||
return content
|
return content
|
||||||
|
|
||||||
def txt_process(self, match):
|
def txt_process(self, match):
|
||||||
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
|
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs_single_line
|
||||||
separate_paragraphs_single_line
|
|
||||||
content = match.group('text')
|
content = match.group('text')
|
||||||
content = separate_paragraphs_single_line(content)
|
content = separate_paragraphs_single_line(content)
|
||||||
content = preserve_spaces(content)
|
|
||||||
content = convert_basic(content, epub_split_size_kb=0)
|
content = convert_basic(content, epub_split_size_kb=0)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@ -356,6 +354,8 @@ class HeuristicProcessor(object):
|
|||||||
self.log.debug("Running Text Processing")
|
self.log.debug("Running Text Processing")
|
||||||
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
|
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
|
||||||
html = outerhtml.sub(self.txt_process, html)
|
html = outerhtml.sub(self.txt_process, html)
|
||||||
|
from calibre.ebooks.conversion.preprocess import convert_entities
|
||||||
|
html = re.sub(r'&(\S+?);', convert_entities, html)
|
||||||
else:
|
else:
|
||||||
# Add markup naively
|
# Add markup naively
|
||||||
# TODO - find out if there are cases where there are more than one <pre> tag or
|
# TODO - find out if there are cases where there are more than one <pre> tag or
|
||||||
|
@ -37,13 +37,12 @@ class LITInput(InputFormatPlugin):
|
|||||||
body = body[0]
|
body = body[0]
|
||||||
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
||||||
pre = body[0]
|
pre = body[0]
|
||||||
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
|
from calibre.ebooks.txt.processor import convert_basic, \
|
||||||
separate_paragraphs_single_line
|
separate_paragraphs_single_line
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import copy
|
import copy
|
||||||
html = separate_paragraphs_single_line(pre.text)
|
html = separate_paragraphs_single_line(pre.text)
|
||||||
html = preserve_spaces(html)
|
|
||||||
html = convert_basic(html).replace('<html>',
|
html = convert_basic(html).replace('<html>',
|
||||||
'<html xmlns="%s">'%XHTML_NS)
|
'<html xmlns="%s">'%XHTML_NS)
|
||||||
html = xml_to_unicode(html, strip_encoding_pats=True,
|
html = xml_to_unicode(html, strip_encoding_pats=True,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user