From 38e08a7bd39b868ac496f0e0dcbf1bacb7f231b8 Mon Sep 17 00:00:00 2001 From: ldolse Date: Mon, 22 Nov 2010 19:19:47 +0800 Subject: [PATCH] had to include heading tags with the previous change, because lxml/beautifulsoup were equally unhappy when heading tags contained paragraph tags --- src/calibre/ebooks/mobi/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 972b049643..355fc87c0c 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -477,8 +477,8 @@ class MobiReader(object): self.processed_html = self.processed_html.replace(']*>', '', self.processed_html) # Rearrange various style tags & paragraph tags so that lxml and BeautifulSoup don't get upset - self.processed_html = re.sub(r'(?i)(?P(<(i|b|u|em|small|big|strong|tt)>\s*){1,})(?P]*>)', '\g'+'\g', self.processed_html) - self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P(\s*){1,})', '\g'+'\g', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P]*>)', '\g'+'\g', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P(\s*){1,})', '\g'+'\g', self.processed_html) def remove_random_bytes(self, html): return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08',