Implement #870 (Convert Mobipocket proprietary attributes to CSS)

2025-07-09 03:04:10 -04:00 · 2008-07-11 08:02:40 -07:00 · 2008-07-11 08:02:40 -07:00 · b28b734354
commit b28b734354
parent 2f1a931bf7
1 changed files with 25 additions and 3 deletions
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -13,7 +13,7 @@ except ImportError:
    import Image as PILImage
 from calibre import __appname__
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
 from calibre.ebooks.mobi.palmdoc import decompress_doc
@ -165,13 +165,14 @@ class MobiReader(object):
        self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
        self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()
-        self.cleanup()
+        self.cleanup_html()
        self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
            '<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
                                     self.processed_html)
        soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
        self.cleanup_soup(soup)
        guide = soup.find('guide')
        for elem in soup.findAll(['metadata', 'guide']):
            elem.extract()
@ -192,9 +193,30 @@ class MobiReader(object):
            if ncx:
                open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
-    def cleanup(self):
+    def cleanup_html(self):
        self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
    def cleanup_soup(self, soup):
        for tag in soup.recursiveChildGenerator():
            if not isinstance(tag, Tag): continue
            styles = []
            try:
                styles.append(tag['style'])
            except KeyError:
                pass
            try:
                styles.append('margin-top: %s' % tag['height'])
                del tag['height']
            except KeyError:
                pass
            try:
                styles.append('text-indent: %s' % tag['width'])
                del tag['width']
            except KeyError:
                pass
            if styles:
                tag['style'] = '; '.join(styles)
    def create_opf(self, htmlfile, guide=None):
        mi = self.book_header.exth.mi
        opf = OPFCreator(os.path.dirname(htmlfile), mi)