Integrated own cleanup patch

This commit is contained in:
Marshall T. Vandegrift 2008-07-11 14:37:27 -04:00
parent 8c5edb39f8
commit da29a58363

View File

@ -13,7 +13,7 @@ except ImportError:
import Image as PILImage
from calibre import __appname__
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
@ -165,13 +165,14 @@ class MobiReader(object):
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
self.extract_images(processed_records, output_dir)
self.replace_page_breaks()
self.cleanup()
self.cleanup_html()
self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
self.processed_html)
soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
self.cleanup_soup(soup)
guide = soup.find('guide')
for elem in soup.findAll(['metadata', 'guide']):
elem.extract()
@ -192,10 +193,29 @@ class MobiReader(object):
if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
def cleanup(self):
def cleanup_html(self):
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"', r'<\1 style="margin-top: \2"', self.processed_html)
self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"', r'<\1 style="text-indent: \2"', self.processed_html)
def cleanup_soup(self, soup):
for tag in soup.recursiveChildGenerator():
if not isinstance(tag, Tag): continue
styles = []
try:
styles.append(tag['style'])
except KeyError:
pass
try:
styles.append('margin-top: %s' % tag['height'])
del tag['height']
except KeyError:
pass
try:
styles.append('text-indent: %s' % tag['width'])
del tag['width']
except KeyError:
pass
if styles:
tag['style'] = '; '.join(styles)
def create_opf(self, htmlfile, guide=None):
mi = self.book_header.exth.mi