mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Input:Fix handling of numeric entities and convert empty <pre> tags to <div> as they cause incorrect rendering in most HTML renderers
This commit is contained in:
parent
49c1818125
commit
2cca07250b
@ -186,6 +186,8 @@ class MobiReader(object):
|
|||||||
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
|
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
|
||||||
for pat in ENCODING_PATS:
|
for pat in ENCODING_PATS:
|
||||||
self.processed_html = pat.sub('', self.processed_html)
|
self.processed_html = pat.sub('', self.processed_html)
|
||||||
|
self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode,
|
||||||
|
self.processed_html)
|
||||||
self.extract_images(processed_records, output_dir)
|
self.extract_images(processed_records, output_dir)
|
||||||
self.replace_page_breaks()
|
self.replace_page_breaks()
|
||||||
self.cleanup_html()
|
self.cleanup_html()
|
||||||
@ -271,6 +273,8 @@ class MobiReader(object):
|
|||||||
for key in tag.attrib.keys():
|
for key in tag.attrib.keys():
|
||||||
tag.attrib.pop(key)
|
tag.attrib.pop(key)
|
||||||
continue
|
continue
|
||||||
|
if tag.tag == 'pre' and not tag.text:
|
||||||
|
tag.tag = 'div'
|
||||||
styles, attrib = [], tag.attrib
|
styles, attrib = [], tag.attrib
|
||||||
if attrib.has_key('style'):
|
if attrib.has_key('style'):
|
||||||
style = attrib.pop('style').strip()
|
style = attrib.pop('style').strip()
|
||||||
@ -452,6 +456,7 @@ class MobiReader(object):
|
|||||||
pos = end
|
pos = end
|
||||||
self.processed_html += self.mobi_html[pos:]
|
self.processed_html += self.mobi_html[pos:]
|
||||||
|
|
||||||
|
|
||||||
def extract_images(self, processed_records, output_dir):
|
def extract_images(self, processed_records, output_dir):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print 'Extracting images...'
|
print 'Extracting images...'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user