EPUB metadata: If parsing of cover file as XHTML fails, retry parsing as plain HTML

This commit is contained in:
Kovid Goyal 2010-12-15 18:54:33 -07:00
parent 5cd412a7d5
commit a7e22ca871

View File

@ -22,6 +22,9 @@ class UnknownFormatError(Exception):
class DRMError(ValueError): class DRMError(ValueError):
pass pass
class ParserError(ValueError):
pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
@ -39,6 +42,10 @@ class HTMLRenderer(object):
try: try:
if not ok: if not ok:
raise RuntimeError('Rendering of HTML failed.') raise RuntimeError('Rendering of HTML failed.')
de = self.page.mainFrame().documentElement()
pe = de.findFirst('parsererror')
if not pe.isNull():
raise ParserError(pe.toPlainText())
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54)) image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54)) image.setDotsPerMeterY(96*(100/2.54))
@ -104,7 +111,7 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
return data return data
def render_html(path_to_html, width=590, height=750): def render_html(path_to_html, width=590, height=750, as_xhtml=True):
from PyQt4.QtWebKit import QWebPage from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
from calibre.gui2 import is_ok_to_use_qt from calibre.gui2 import is_ok_to_use_qt
@ -122,15 +129,18 @@ def render_html(path_to_html, width=590, height=750):
renderer = HTMLRenderer(page, loop) renderer = HTMLRenderer(page, loop)
page.connect(page, SIGNAL('loadFinished(bool)'), renderer, page.connect(page, SIGNAL('loadFinished(bool)'), renderer,
Qt.QueuedConnection) Qt.QueuedConnection)
# Can't use load as if the extension of path is not xhtml if as_xhtml:
# then it won't render SVG correctly, so set mimetype page.mainFrame().setContent(open(path_to_html, 'rb').read(),
# explicitly 'application/xhtml+xml', QUrl.fromLocalFile(path_to_html))
page.mainFrame().setContent(open(path_to_html, 'rb').read(), else:
'application/xhtml+xml', QUrl.fromLocalFile(path_to_html)) page.mainFrame().load(QUrl.fromLocalFile(path_to_html))
loop.exec_() loop.exec_()
renderer.loop = renderer.page = None renderer.loop = renderer.page = None
del page del page
del loop del loop
if isinstance(renderer.exception, ParserError) and as_xhtml:
return render_html(path_to_html, width=width, height=height,
as_xhtml=False)
return renderer return renderer
def check_ebook_format(stream, current_guess): def check_ebook_format(stream, current_guess):