Implement #870 (Convert Mobipocket proprietary attributes to CSS)

This commit is contained in:
Kovid Goyal 2008-07-11 08:02:40 -07:00
parent 2f1a931bf7
commit b28b734354

View File

@ -13,7 +13,7 @@ except ImportError:
import Image as PILImage import Image as PILImage
from calibre import __appname__ from calibre import __appname__
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc from calibre.ebooks.mobi.palmdoc import decompress_doc
@ -165,13 +165,14 @@ class MobiReader(object):
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
self.extract_images(processed_records, output_dir) self.extract_images(processed_records, output_dir)
self.replace_page_breaks() self.replace_page_breaks()
self.cleanup() self.cleanup_html()
self.processed_html = re.compile('<head>', re.IGNORECASE).sub( self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', '<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
self.processed_html) self.processed_html)
soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<')) soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
self.cleanup_soup(soup)
guide = soup.find('guide') guide = soup.find('guide')
for elem in soup.findAll(['metadata', 'guide']): for elem in soup.findAll(['metadata', 'guide']):
elem.extract() elem.extract()
@ -192,9 +193,30 @@ class MobiReader(object):
if ncx: if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
def cleanup(self): def cleanup_html(self):
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html) self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
def cleanup_soup(self, soup):
for tag in soup.recursiveChildGenerator():
if not isinstance(tag, Tag): continue
styles = []
try:
styles.append(tag['style'])
except KeyError:
pass
try:
styles.append('margin-top: %s' % tag['height'])
del tag['height']
except KeyError:
pass
try:
styles.append('text-indent: %s' % tag['width'])
del tag['width']
except KeyError:
pass
if styles:
tag['style'] = '; '.join(styles)
def create_opf(self, htmlfile, guide=None): def create_opf(self, htmlfile, guide=None):
mi = self.book_header.exth.mi mi = self.book_header.exth.mi
opf = OPFCreator(os.path.dirname(htmlfile), mi) opf = OPFCreator(os.path.dirname(htmlfile), mi)