Miscellaneous fixes

2025-07-09 03:04:10 -04:00 · 2009-05-09 01:03:53 -07:00 · 2009-05-09 01:03:53 -07:00 · bd26c92eb9
commit bd26c92eb9
parent f8471e4457
4 changed files with 44 additions and 22 deletions
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -59,10 +59,9 @@ class HTMLRenderer(object):

 def render_html(path_to_html, width=590, height=750):
    from PyQt4.QtWebKit import QWebPage
-    from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize, \
-                         QApplication
-    if QApplication.instance() is None:
-        QApplication([])
+    from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
+    from calibre.gui2 import is_ok_to_use_qt
+    if not is_ok_to_use_qt(): return None
    path_to_html = os.path.abspath(path_to_html)
    with CurrentDir(os.path.dirname(path_to_html)):
        page = QWebPage()
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -80,8 +80,10 @@ class EPUBInput(InputFormatPlugin):
        t.set('href', guide_cover)
        t.set('title', 'Title Page')
        from calibre.ebooks import render_html
+        renderer = render_html(guide_cover)
+        if renderer is not None:
            open('calibre_raster_cover.jpg', 'wb').write(
-                render_html(guide_cover).data)
+                renderer.data)


    def convert(self, stream, options, file_ext, log, accelerators):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -290,14 +290,6 @@ class MobiReader(object):
        self.replace_page_breaks()
        self.cleanup_html()

-        if self.processed_html.startswith('<body'):
-            self.processed_html = '<html><head></head>'+self.processed_html+'</html>'
-        self.processed_html = \
-            re.compile('<head>', re.IGNORECASE).sub(
-                '\n<head>\n'
-                '\t<link type="text/css" href="styles.css" />\n',
-                self.processed_html)
-
        self.log.debug('Parsing HTML...')
        root = html.fromstring(self.processed_html)
        if root.xpath('descendant::p/descendant::p'):
@ -305,7 +297,7 @@ class MobiReader(object):
            self.log.warning('Markup contains unclosed <p> tags, parsing using',
                    'BeatifulSoup')
            root = soupparser.fromstring(self.processed_html)
-        if root[0].tag != 'html':
+        if root.tag != 'html':
            self.log.warn('File does not have opening <html> tag')
            nroot = html.fromstring('<html><head></head><body></body></html>')
            bod = nroot.find('body')
@ -314,6 +306,35 @@ class MobiReader(object):
                bod.append(child)
            root = nroot

+        htmls = list(root.xpath('//html'))
+        if len(htmls) > 1:
+            self.log.warn('Markup contains multiple <html> tags')
+            # Keep only the largest head and body
+            bodies, heads = root.xpath('//body'), root.xpath('//head')
+            def sz(x): return len(list(x.iter()))
+            def scmp(x, y): return cmp(sz(x), sz(y))
+            body = list(sorted(bodies, cmp=scmp))
+            head = list(sorted(heads, cmp=scmp))
+            for x in root: root.remove(x)
+            if head:
+                root.append(head[-1])
+            if body:
+                root.append(body[-1])
+        for x in root.xpath('//script'):
+            x.getparent().remove(x)
+
+        head = root.xpath('//head')
+        if head:
+            head = head[0]
+        else:
+            head = root.makeelement('head', {})
+            root.insert(0, head)
+        head.text = '\n\t'
+        link = head.makeelement('link', {'type':'text/css',
+            'href':'styles.css'})
+        head.insert(0, link)
+        link.tail = '\n\t'
+
        self.upshift_markup(root)
        guides = root.xpath('//guide')
        guide = guides[0] if guides else None
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -369,13 +369,13 @@ class FlowSplitter(object):

        for path in (
                     '//*[re:match(name(), "h[1-6]", "i")]',
-                     '/html/body/div',
-                     '//pre',
-                     '//hr',
-                     '//p',
-                     '//div',
-                     '//br',
-                     '//li',
+                     '/h:html/h:body/h:div',
+                     '//h:pre',
+                     '//h:hr',
+                     '//h:p',
+                     '//h:div',
+                     '//h:br',
+                     '//h:li',
                     ):
            elems = root.xpath(path, namespaces=NAMESPACES)
            elem = pick_elem(elems)