diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py index b162d9e19c..06ba2b63d3 100644 --- a/src/calibre/ebooks/htmlz/oeb2html.py +++ b/src/calibre/ebooks/htmlz/oeb2html.py @@ -17,11 +17,13 @@ from lxml import html from urlparse import urldefrag from calibre import prepare_string_for_xml -from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace,\ - OEB_IMAGES, XLINK, rewrite_links, urlnormalize +from calibre.ebooks.oeb.base import ( + XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize) from calibre.ebooks.oeb.stylizer import Stylizer from calibre.utils.logging import default_log +SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'} + class OEB2HTML(object): ''' Base class. All subclasses should implement dump_text to actually transform @@ -49,7 +51,7 @@ class OEB2HTML(object): return self.mlize_spine(oeb_book) def mlize_spine(self, oeb_book): - output = [u'
'] + output = [u''] for item in oeb_book.spine: self.log.debug('Converting %s to HTML...' % item.href) self.rewrite_ids(item.data, item) @@ -183,7 +185,11 @@ class OEB2HTMLNoCSSizer(OEB2HTML): at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True)) # Write the tag. - text.append('<%s%s>' % (tag, at)) + text.append('<%s%s' % (tag, at)) + if tag in SELF_CLOSING_TAGS: + text.append(' />') + else: + text.append('>') # Turn styles into tags. if style['font-weight'] in ('bold', 'bolder'): @@ -210,7 +216,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML): # Close all open tags. tags.reverse() for t in tags: - text.append('%s>' % t) + if t not in SELF_CLOSING_TAGS: + text.append('%s>' % t) # Add the text that is outside of the tag. if hasattr(elem, 'tail') and elem.tail: @@ -267,10 +274,14 @@ class OEB2HTMLInlineCSSizer(OEB2HTML): # Turn style into strings for putting in the tag. style_t = '' if style_a: - style_t = ' style="%s"' % style_a + style_t = ' style="%s"' % style_a.replace('"', "'") # Write the tag. - text.append('<%s%s%s>' % (tag, at, style_t)) + text.append('<%s%s%s' % (tag, at, style_t)) + if tag in SELF_CLOSING_TAGS: + text.append(' />') + else: + text.append('>') # Process tags that contain text. if hasattr(elem, 'text') and elem.text: @@ -283,7 +294,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML): # Close all open tags. tags.reverse() for t in tags: - text.append('%s>' % t) + if t not in SELF_CLOSING_TAGS: + text.append('%s>' % t) # Add the text that is outside of the tag. if hasattr(elem, 'tail') and elem.tail: @@ -312,7 +324,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML): css = u'' else: css = u'' - output = [u''] + [css] + [u''] + output + [u''] + output = [u''] + \ + [css] + [u''] + output + [u''] return ''.join(output) def dump_text(self, elem, stylizer, page): @@ -350,7 +363,11 @@ class OEB2HTMLClassCSSizer(OEB2HTML): at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True)) # Write the tag. - text.append('<%s%s>' % (tag, at)) + text.append('<%s%s' % (tag, at)) + if tag in SELF_CLOSING_TAGS: + text.append(' />') + else: + text.append('>') # Process tags that contain text. if hasattr(elem, 'text') and elem.text: @@ -363,7 +380,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML): # Close all open tags. tags.reverse() for t in tags: - text.append('%s>' % t) + if t not in SELF_CLOSING_TAGS: + text.append('%s>' % t) # Add the text that is outside of the tag. if hasattr(elem, 'tail') and elem.tail: