From c769c35b1d3b6f42c656c87df0857bd6e300dda0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 7 Oct 2008 18:22:01 -0700 Subject: [PATCH] Fix #1140 (text has random underlining) --- src/calibre/ebooks/html.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index f13f5ee2f5..e3f8f516e1 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -436,11 +436,13 @@ class Parser(PreProcessor, LoggingInterface): ''' Create lxml ElementTree from HTML ''' self.log_info('\tParsing '+os.sep.join(self.htmlfile.path.split(os.sep)[-3:])) src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip() - src = src[src.find('<'):] src = self.preprocess(src) # lxml chokes on unicode input when it contains encoding declarations for pat in ENCODING_PATS: src = pat.sub('', src) + src = src[src.find('<'):] + # Remove unclosed