Convert self closed <div> and <a> tags to empty closed tags as many browser based renderers refuse to recognize them as closed without a proper DOCTYPE. Fixes #3095 (Some epub files can only be advanced chapter to chapter)

This commit is contained in:
Kovid Goyal 2009-08-07 19:12:37 -06:00
parent 91b53e3d0c
commit c9f0b74f11
3 changed files with 7 additions and 1 deletions

View File

@ -1010,7 +1010,10 @@ class Manifest(object):
def __str__(self):
data = self.data
if isinstance(data, etree._Element):
return xml2str(data, pretty_print=self.oeb.pretty_print)
ans = xml2str(data, pretty_print=self.oeb.pretty_print)
if self.media_type in OEB_DOCS:
ans = re.sub(r'<(div|a)([^>]*)/>', r'<\1\2></\1>', ans)
return ans
if isinstance(data, unicode):
return data.encode('utf-8')
if hasattr(data, 'cssText'):

View File

@ -78,6 +78,7 @@ class DetectStructure(object):
page_break_after = 'display: block; page-break-after: always'
for item, elem in self.detected_chapters:
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
text = text.strip()
self.log('\tDetected chapter:', text[:50])
if chapter_mark == 'none':
continue

View File

@ -4,3 +4,5 @@
E-book Conversion
===================
This section is under construction. In the meantime, you can see some documentation of the command line interface to conversion at :ref:`ebook-convert`.