HTMLZ Output: Various fixes

HTMLZ Output: Fix <style> tag placed inside <body> instead of <head>.
See #1239530 (Htmlz conversion places <head> in <body>, not before)

HTMLZ Output: Fix inline styles not escaping quotes properly. See #1239527 (Htmlz inline css doesn't single quote fonts & thus destroys html)

HTMLZ Output: Fix incorrect handling of some self closing tags like
<br>. See #1239555 (Htmlz conversion incorrectly handles <br/>)

Merge branch 'master' of https://github.com/user-none/calibre
This commit is contained in:
Kovid Goyal 2013-10-15 08:52:33 +05:30
commit 3348817b2e

View File

@ -17,11 +17,13 @@ from lxml import html
from urlparse import urldefrag from urlparse import urldefrag
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace,\ from calibre.ebooks.oeb.base import (
OEB_IMAGES, XLINK, rewrite_links, urlnormalize XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
class OEB2HTML(object): class OEB2HTML(object):
''' '''
Base class. All subclasses should implement dump_text to actually transform Base class. All subclasses should implement dump_text to actually transform
@ -49,7 +51,7 @@ class OEB2HTML(object):
return self.mlize_spine(oeb_book) return self.mlize_spine(oeb_book)
def mlize_spine(self, oeb_book): def mlize_spine(self, oeb_book):
output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>'] output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head><body>']
for item in oeb_book.spine: for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href) self.log.debug('Converting %s to HTML...' % item.href)
self.rewrite_ids(item.data, item) self.rewrite_ids(item.data, item)
@ -183,7 +185,11 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True)) at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
# Write the tag. # Write the tag.
text.append('<%s%s>' % (tag, at)) text.append('<%s%s' % (tag, at))
if tag in SELF_CLOSING_TAGS:
text.append(' />')
else:
text.append('>')
# Turn styles into tags. # Turn styles into tags.
if style['font-weight'] in ('bold', 'bolder'): if style['font-weight'] in ('bold', 'bolder'):
@ -210,6 +216,7 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
# Close all open tags. # Close all open tags.
tags.reverse() tags.reverse()
for t in tags: for t in tags:
if t not in SELF_CLOSING_TAGS:
text.append('</%s>' % t) text.append('</%s>' % t)
# Add the text that is outside of the tag. # Add the text that is outside of the tag.
@ -267,10 +274,14 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
# Turn style into strings for putting in the tag. # Turn style into strings for putting in the tag.
style_t = '' style_t = ''
if style_a: if style_a:
style_t = ' style="%s"' % style_a style_t = ' style="%s"' % style_a.replace('"', "'")
# Write the tag. # Write the tag.
text.append('<%s%s%s>' % (tag, at, style_t)) text.append('<%s%s%s' % (tag, at, style_t))
if tag in SELF_CLOSING_TAGS:
text.append(' />')
else:
text.append('>')
# Process tags that contain text. # Process tags that contain text.
if hasattr(elem, 'text') and elem.text: if hasattr(elem, 'text') and elem.text:
@ -283,6 +294,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
# Close all open tags. # Close all open tags.
tags.reverse() tags.reverse()
for t in tags: for t in tags:
if t not in SELF_CLOSING_TAGS:
text.append('</%s>' % t) text.append('</%s>' % t)
# Add the text that is outside of the tag. # Add the text that is outside of the tag.
@ -312,7 +324,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
css = u'<link href="style.css" rel="stylesheet" type="text/css" />' css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
else: else:
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>' css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + [css] + [u'</head><body>'] + output + [u'</body></html>'] output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
[css] + [u'</head><body>'] + output + [u'</body></html>']
return ''.join(output) return ''.join(output)
def dump_text(self, elem, stylizer, page): def dump_text(self, elem, stylizer, page):
@ -350,7 +363,11 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True)) at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
# Write the tag. # Write the tag.
text.append('<%s%s>' % (tag, at)) text.append('<%s%s' % (tag, at))
if tag in SELF_CLOSING_TAGS:
text.append(' />')
else:
text.append('>')
# Process tags that contain text. # Process tags that contain text.
if hasattr(elem, 'text') and elem.text: if hasattr(elem, 'text') and elem.text:
@ -363,6 +380,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
# Close all open tags. # Close all open tags.
tags.reverse() tags.reverse()
for t in tags: for t in tags:
if t not in SELF_CLOSING_TAGS:
text.append('</%s>' % t) text.append('</%s>' % t)
# Add the text that is outside of the tag. # Add the text that is outside of the tag.