PML Input: Various fixes

This commit is contained in:
Kovid Goyal 2009-08-01 15:35:04 -06:00
commit 152d52e1d5
3 changed files with 17 additions and 17 deletions

View File

@ -148,21 +148,6 @@ class EPUBOutput(OutputFormatPlugin):
if not pre.text and len(pre) == 0:
pre.tag = 'div'
# Remove hyperlinks with no content as they cause rendering
# artifacts in browser based renderers
for a in body.xpath('//a[@href]'):
if a.get('id', None) is None and a.get('name', None) is None \
and len(a) == 0 and not a.text:
p = a.getparent()
idx = p.index(a) -1
p.remove(a)
if a.tail:
if idx <= 0:
p.text += a.tail
else:
p[idx].tail += a.tail
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb

View File

@ -904,6 +904,20 @@ class Manifest(object):
if key == 'lang' or key.endswith('}lang'):
body.attrib.pop(key)
# Remove hyperlinks with no content as they cause rendering
# artifacts in browser based renderers
for a in xpath(data, '//h:a[@href]'):
if a.get('id', None) is None and a.get('name', None) is None \
and len(a) == 0 and not a.text:
p = a.getparent()
idx = p.index(a) -1
p.remove(a)
if a.tail:
if idx <= 0:
p.text += a.tail
else:
p[idx].tail += a.tail
return data
def _parse_txt(self, data):

View File

@ -43,7 +43,8 @@ PML_HTML_RULES = [
(re.compile(r'\\-'), lambda match: ''),
(re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
(re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
(re.compile(r'\\I'), lambda match: ''),
# Just italicize index items as that is how the eReader software renders them.
(re.compile(r'\\I(?P<text>.*?)\\I', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
# Sidebar and Footnotes
(re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
@ -55,7 +56,7 @@ PML_HTML_RULES = [
# Remove empty <p>'s.
(re.compile('<p>[ ]*</p>'), lambda match: ''),
# Ensure empty lines carry over.
(re.compile('^$', re.MULTILINE), lambda match: '<br />'),
(re.compile('(\r\n|\n|\r){3}'), lambda match: '<br />'),
# Remove unmatched plm codes.
(re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),