mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Integrated own cleanup patch
This commit is contained in:
parent
8c5edb39f8
commit
da29a58363
@ -13,7 +13,7 @@ except ImportError:
|
||||
import Image as PILImage
|
||||
|
||||
from calibre import __appname__
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||
@ -165,13 +165,14 @@ class MobiReader(object):
|
||||
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
|
||||
self.extract_images(processed_records, output_dir)
|
||||
self.replace_page_breaks()
|
||||
self.cleanup()
|
||||
self.cleanup_html()
|
||||
|
||||
self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
|
||||
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
|
||||
self.processed_html)
|
||||
|
||||
soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
|
||||
self.cleanup_soup(soup)
|
||||
guide = soup.find('guide')
|
||||
for elem in soup.findAll(['metadata', 'guide']):
|
||||
elem.extract()
|
||||
@ -192,10 +193,29 @@ class MobiReader(object):
|
||||
if ncx:
|
||||
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
|
||||
|
||||
def cleanup(self):
|
||||
def cleanup_html(self):
|
||||
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
|
||||
self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"', r'<\1 style="margin-top: \2"', self.processed_html)
|
||||
self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"', r'<\1 style="text-indent: \2"', self.processed_html)
|
||||
|
||||
def cleanup_soup(self, soup):
|
||||
for tag in soup.recursiveChildGenerator():
|
||||
if not isinstance(tag, Tag): continue
|
||||
styles = []
|
||||
try:
|
||||
styles.append(tag['style'])
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
styles.append('margin-top: %s' % tag['height'])
|
||||
del tag['height']
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
styles.append('text-indent: %s' % tag['width'])
|
||||
del tag['width']
|
||||
except KeyError:
|
||||
pass
|
||||
if styles:
|
||||
tag['style'] = '; '.join(styles)
|
||||
|
||||
def create_opf(self, htmlfile, guide=None):
|
||||
mi = self.book_header.exth.mi
|
||||
|
Loading…
x
Reference in New Issue
Block a user