mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PML Input: Various fixes
This commit is contained in:
commit
152d52e1d5
@ -148,21 +148,6 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
if not pre.text and len(pre) == 0:
|
if not pre.text and len(pre) == 0:
|
||||||
pre.tag = 'div'
|
pre.tag = 'div'
|
||||||
|
|
||||||
# Remove hyperlinks with no content as they cause rendering
|
|
||||||
# artifacts in browser based renderers
|
|
||||||
for a in body.xpath('//a[@href]'):
|
|
||||||
if a.get('id', None) is None and a.get('name', None) is None \
|
|
||||||
and len(a) == 0 and not a.text:
|
|
||||||
p = a.getparent()
|
|
||||||
idx = p.index(a) -1
|
|
||||||
p.remove(a)
|
|
||||||
if a.tail:
|
|
||||||
if idx <= 0:
|
|
||||||
p.text += a.tail
|
|
||||||
else:
|
|
||||||
p[idx].tail += a.tail
|
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
|
||||||
|
@ -904,6 +904,20 @@ class Manifest(object):
|
|||||||
if key == 'lang' or key.endswith('}lang'):
|
if key == 'lang' or key.endswith('}lang'):
|
||||||
body.attrib.pop(key)
|
body.attrib.pop(key)
|
||||||
|
|
||||||
|
# Remove hyperlinks with no content as they cause rendering
|
||||||
|
# artifacts in browser based renderers
|
||||||
|
for a in xpath(data, '//h:a[@href]'):
|
||||||
|
if a.get('id', None) is None and a.get('name', None) is None \
|
||||||
|
and len(a) == 0 and not a.text:
|
||||||
|
p = a.getparent()
|
||||||
|
idx = p.index(a) -1
|
||||||
|
p.remove(a)
|
||||||
|
if a.tail:
|
||||||
|
if idx <= 0:
|
||||||
|
p.text += a.tail
|
||||||
|
else:
|
||||||
|
p[idx].tail += a.tail
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _parse_txt(self, data):
|
def _parse_txt(self, data):
|
||||||
|
@ -43,7 +43,8 @@ PML_HTML_RULES = [
|
|||||||
(re.compile(r'\\-'), lambda match: ''),
|
(re.compile(r'\\-'), lambda match: ''),
|
||||||
(re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
|
(re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
|
||||||
(re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
|
(re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
|
||||||
(re.compile(r'\\I'), lambda match: ''),
|
# Just italicize index items as that is how the eReader software renders them.
|
||||||
|
(re.compile(r'\\I(?P<text>.*?)\\I', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
|
||||||
|
|
||||||
# Sidebar and Footnotes
|
# Sidebar and Footnotes
|
||||||
(re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
|
(re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
|
||||||
@ -55,7 +56,7 @@ PML_HTML_RULES = [
|
|||||||
# Remove empty <p>'s.
|
# Remove empty <p>'s.
|
||||||
(re.compile('<p>[ ]*</p>'), lambda match: ''),
|
(re.compile('<p>[ ]*</p>'), lambda match: ''),
|
||||||
# Ensure empty lines carry over.
|
# Ensure empty lines carry over.
|
||||||
(re.compile('^$', re.MULTILINE), lambda match: '<br />'),
|
(re.compile('(\r\n|\n|\r){3}'), lambda match: '<br />'),
|
||||||
|
|
||||||
# Remove unmatched plm codes.
|
# Remove unmatched plm codes.
|
||||||
(re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),
|
(re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user