This commit is contained in:
ldolse 2010-11-22 21:09:16 +08:00
parent b71a2874ff
commit 8c15219933

View File

@ -477,7 +477,7 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:') self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
self.processed_html = re.sub(r'<?xml[^>]*>', '', self.processed_html) self.processed_html = re.sub(r'<?xml[^>]*>', '', self.processed_html)
# Swap inline and block level elements, and order block level elements according to priority # Swap inline and block level elements, and order block level elements according to priority
# - lxml and beautifulsoup live in the modern world and expect/assume a specific order # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html) self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html) self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)