Fix #4087 (Conversion from .LIT to .EPUB loses a chapter from the book)

This commit is contained in:
Kovid Goyal 2009-11-30 11:25:51 -07:00
parent 7ed11edf44
commit 0bcaa648bd
4 changed files with 10 additions and 1 deletions

View File

@ -49,6 +49,9 @@
- title: Add 0x0c01 to the list of product ids for HTC Hero (Android) driver
tickets: [4088]
- title: "LIT Input: Remove more invalid markup present in LIT files created by Microsoft Word plugins"
tickets: [4087]
new recipes:
- title: The Economist (no subscription required)
author: Kovid Goyal

View File

@ -312,7 +312,6 @@ class EPUBOutput(OutputFormatPlugin):
for tag in XPath('//h:center')(root):
tag.tag = XHTML('div')
tag.set('style', 'text-align:center')
# ADE can't handle & in an img url
for tag in XPath('//h:img[@src]')(root):
tag.set('src', tag.get('src', '').replace('&', ''))
@ -340,6 +339,7 @@ class EPUBOutput(OutputFormatPlugin):
else:
self.oeb.log.warn('No stylesheet found')
def workaround_sony_quirks(self):
'''
Perform toc link transforms to alleviate slow loading.

View File

@ -866,6 +866,10 @@ class LitContainer(object):
atoms = self._litfile.get_atoms(entry)
unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
content = HTML_DECL + str(unbin)
tags = ('personname', 'place', 'city', 'country-region')
pat = r'(?i)</{0,1}st1:(%s)>'%('|'.join(tags))
content = re.sub(pat, '', content)
content = re.sub(r'<(/{0,1})form>', r'<\1div>', content)
else:
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)

View File

@ -799,6 +799,7 @@ class Manifest(object):
try:
data = etree.fromstring(data)
except etree.XMLSyntaxError, err:
self.log.exception('Initial parse failed:')
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
data = ENTITY_RE.sub(repl, data)
try:
@ -843,6 +844,7 @@ class Manifest(object):
# Force into the XHTML namespace
if not namespace(data.tag):
self.oeb.log.warn('Forcing', self.href, 'into XHTML namespace')
data.attrib['xmlns'] = XHTML_NS
data = etree.tostring(data, encoding=unicode)