Miscellaneous fixes

This commit is contained in:
Kovid Goyal 2009-05-09 01:03:53 -07:00
parent f8471e4457
commit bd26c92eb9
4 changed files with 44 additions and 22 deletions

View File

@ -59,10 +59,9 @@ class HTMLRenderer(object):
def render_html(path_to_html, width=590, height=750):
from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize, \
QApplication
if QApplication.instance() is None:
QApplication([])
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
from calibre.gui2 import is_ok_to_use_qt
if not is_ok_to_use_qt(): return None
path_to_html = os.path.abspath(path_to_html)
with CurrentDir(os.path.dirname(path_to_html)):
page = QWebPage()

View File

@ -80,8 +80,10 @@ class EPUBInput(InputFormatPlugin):
t.set('href', guide_cover)
t.set('title', 'Title Page')
from calibre.ebooks import render_html
open('calibre_raster_cover.jpg', 'wb').write(
render_html(guide_cover).data)
renderer = render_html(guide_cover)
if renderer is not None:
open('calibre_raster_cover.jpg', 'wb').write(
renderer.data)
def convert(self, stream, options, file_ext, log, accelerators):

View File

@ -290,14 +290,6 @@ class MobiReader(object):
self.replace_page_breaks()
self.cleanup_html()
if self.processed_html.startswith('<body'):
self.processed_html = '<html><head></head>'+self.processed_html+'</html>'
self.processed_html = \
re.compile('<head>', re.IGNORECASE).sub(
'\n<head>\n'
'\t<link type="text/css" href="styles.css" />\n',
self.processed_html)
self.log.debug('Parsing HTML...')
root = html.fromstring(self.processed_html)
if root.xpath('descendant::p/descendant::p'):
@ -305,7 +297,7 @@ class MobiReader(object):
self.log.warning('Markup contains unclosed <p> tags, parsing using',
'BeatifulSoup')
root = soupparser.fromstring(self.processed_html)
if root[0].tag != 'html':
if root.tag != 'html':
self.log.warn('File does not have opening <html> tag')
nroot = html.fromstring('<html><head></head><body></body></html>')
bod = nroot.find('body')
@ -314,6 +306,35 @@ class MobiReader(object):
bod.append(child)
root = nroot
htmls = list(root.xpath('//html'))
if len(htmls) > 1:
self.log.warn('Markup contains multiple <html> tags')
# Keep only the largest head and body
bodies, heads = root.xpath('//body'), root.xpath('//head')
def sz(x): return len(list(x.iter()))
def scmp(x, y): return cmp(sz(x), sz(y))
body = list(sorted(bodies, cmp=scmp))
head = list(sorted(heads, cmp=scmp))
for x in root: root.remove(x)
if head:
root.append(head[-1])
if body:
root.append(body[-1])
for x in root.xpath('//script'):
x.getparent().remove(x)
head = root.xpath('//head')
if head:
head = head[0]
else:
head = root.makeelement('head', {})
root.insert(0, head)
head.text = '\n\t'
link = head.makeelement('link', {'type':'text/css',
'href':'styles.css'})
head.insert(0, link)
link.tail = '\n\t'
self.upshift_markup(root)
guides = root.xpath('//guide')
guide = guides[0] if guides else None

View File

@ -369,13 +369,13 @@ class FlowSplitter(object):
for path in (
'//*[re:match(name(), "h[1-6]", "i")]',
'/html/body/div',
'//pre',
'//hr',
'//p',
'//div',
'//br',
'//li',
'/h:html/h:body/h:div',
'//h:pre',
'//h:hr',
'//h:p',
'//h:div',
'//h:br',
'//h:li',
):
elems = root.xpath(path, namespaces=NAMESPACES)
elem = pick_elem(elems)