diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 79f4f7631e..416fe61789 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -59,10 +59,9 @@ class HTMLRenderer(object): def render_html(path_to_html, width=590, height=750): from PyQt4.QtWebKit import QWebPage - from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize, \ - QApplication - if QApplication.instance() is None: - QApplication([]) + from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize + from calibre.gui2 import is_ok_to_use_qt + if not is_ok_to_use_qt(): return None path_to_html = os.path.abspath(path_to_html) with CurrentDir(os.path.dirname(path_to_html)): page = QWebPage() diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index f134ea6abd..b748429725 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -80,8 +80,10 @@ class EPUBInput(InputFormatPlugin): t.set('href', guide_cover) t.set('title', 'Title Page') from calibre.ebooks import render_html - open('calibre_raster_cover.jpg', 'wb').write( - render_html(guide_cover).data) + renderer = render_html(guide_cover) + if renderer is not None: + open('calibre_raster_cover.jpg', 'wb').write( + renderer.data) def convert(self, stream, options, file_ext, log, accelerators): diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index ff398ca3bb..b787ce7e7c 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -290,14 +290,6 @@ class MobiReader(object): self.replace_page_breaks() self.cleanup_html() - if self.processed_html.startswith('' - self.processed_html = \ - re.compile('', re.IGNORECASE).sub( - '\n\n' - '\t\n', - self.processed_html) - self.log.debug('Parsing HTML...') root = html.fromstring(self.processed_html) if root.xpath('descendant::p/descendant::p'): @@ -305,7 +297,7 @@ class MobiReader(object): self.log.warning('Markup contains unclosed

tags, parsing using', 'BeatifulSoup') root = soupparser.fromstring(self.processed_html) - if root[0].tag != 'html': + if root.tag != 'html': self.log.warn('File does not have opening tag') nroot = html.fromstring('') bod = nroot.find('body') @@ -314,6 +306,35 @@ class MobiReader(object): bod.append(child) root = nroot + htmls = list(root.xpath('//html')) + if len(htmls) > 1: + self.log.warn('Markup contains multiple tags') + # Keep only the largest head and body + bodies, heads = root.xpath('//body'), root.xpath('//head') + def sz(x): return len(list(x.iter())) + def scmp(x, y): return cmp(sz(x), sz(y)) + body = list(sorted(bodies, cmp=scmp)) + head = list(sorted(heads, cmp=scmp)) + for x in root: root.remove(x) + if head: + root.append(head[-1]) + if body: + root.append(body[-1]) + for x in root.xpath('//script'): + x.getparent().remove(x) + + head = root.xpath('//head') + if head: + head = head[0] + else: + head = root.makeelement('head', {}) + root.insert(0, head) + head.text = '\n\t' + link = head.makeelement('link', {'type':'text/css', + 'href':'styles.css'}) + head.insert(0, link) + link.tail = '\n\t' + self.upshift_markup(root) guides = root.xpath('//guide') guide = guides[0] if guides else None diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 86e60a7784..0cc17bd14f 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -369,13 +369,13 @@ class FlowSplitter(object): for path in ( '//*[re:match(name(), "h[1-6]", "i")]', - '/html/body/div', - '//pre', - '//hr', - '//p', - '//div', - '//br', - '//li', + '/h:html/h:body/h:div', + '//h:pre', + '//h:hr', + '//h:p', + '//h:div', + '//h:br', + '//h:li', ): elems = root.xpath(path, namespaces=NAMESPACES) elem = pick_elem(elems)