From dc09be0385e6d8e46cd9599962c58ca3d02f531d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Mar 2008 19:10:38 +0000 Subject: [PATCH] Get mobi2oeb to produce nicer looking HTML output --- src/libprs500/ebooks/mobi/reader.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/libprs500/ebooks/mobi/reader.py b/src/libprs500/ebooks/mobi/reader.py index fca79dd8ea..03eaa9ddea 100644 --- a/src/libprs500/ebooks/mobi/reader.py +++ b/src/libprs500/ebooks/mobi/reader.py @@ -26,6 +26,7 @@ except ImportError: import Image as PILImage from libprs500 import __appname__ +from libprs500.ebooks.BeautifulSoup import BeautifulSoup from libprs500.ebooks.mobi import MobiError from libprs500.ebooks.mobi.huffcdic import HuffReader from libprs500.ebooks.mobi.palmdoc import decompress_doc @@ -177,9 +178,12 @@ class MobiReader(object): self.processed_html = re.compile('', re.IGNORECASE).sub( '\n\n', self.processed_html) - + + soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<')) + for elem in soup.findAll(['metadata', 'guide']): + elem.extract() htmlfile = os.path.join(output_dir, self.name+'.html') - open(htmlfile, 'wb').write(self.processed_html.encode('utf8')) + open(htmlfile, 'wb').write(unicode(soup).encode('utf8')) self.htmlfile = htmlfile if self.book_header.exth is not None: