From 973ca9abad7f377d2b37b4c653e1b0b3c3b9da5f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 May 2012 10:46:24 +0530 Subject: [PATCH] EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers. Fixes #994861 (Code font does not terminate) --- src/calibre/ebooks/oeb/base.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index a18e528a51..252d5b34b3 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -81,6 +81,23 @@ _css_url_re = re.compile(r'url\s*\([\'"]{0,1}(.*?)[\'"]{0,1}\)', re.I) _css_import_re = re.compile(r'@import "(.*?)"') _archive_re = re.compile(r'[^ ]+') +# Tags that should not be self closed in epub output +self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', +'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details', +'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer', +'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd', +'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p', +'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small', +'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var', +'video'} + +_self_closing_pat = re.compile( + r'<(?P%s)(?=[\s/])(?P[^>]*)/>'%('|'.join(self_closing_bad_tags)), + re.IGNORECASE) + +def close_self_closing_tags(raw): + return _self_closing_pat.sub(r'<\g\g>>', raw) + def iterlinks(root, find_links_in_css=True): ''' Iterate over all links in a OEB Document. @@ -938,13 +955,10 @@ class Manifest(object): if isinstance(data, etree._Element): ans = xml2str(data, pretty_print=self.oeb.pretty_print) if self.media_type in OEB_DOCS: - # Convert self closing div|span|a|video|audio|iframe tags + # Convert self closing div|span|a|video|audio|iframe|etc tags # to normally closed ones, as they are interpreted # incorrectly by some browser based renderers - ans = re.sub( - # tag name followed by either a space or a / - r'<(?Pdiv|a|span|video|audio|iframe)(?=[\s/])(?P[^>]*)/>', - r'<\g\g>>', ans) + ans = close_self_closing_tags(ans) return ans if isinstance(data, unicode): return data.encode('utf-8')