From 81300843beccad12258bf6c9418c186dc458846a Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 1 Aug 2009 17:21:10 -0400 Subject: [PATCH] PML fixes. --- src/calibre/ebooks/epub/output.py | 15 --------------- src/calibre/ebooks/oeb/base.py | 14 ++++++++++++++ src/calibre/ebooks/pml/pmlconverter.py | 5 +++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index d8bab20b1c..2676e664ee 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -148,21 +148,6 @@ class EPUBOutput(OutputFormatPlugin): if not pre.text and len(pre) == 0: pre.tag = 'div' - # Remove hyperlinks with no content as they cause rendering - # artifacts in browser based renderers - for a in body.xpath('//a[@href]'): - if a.get('id', None) is None and a.get('name', None) is None \ - and len(a) == 0 and not a.text: - p = a.getparent() - idx = p.index(a) -1 - p.remove(a) - if a.tail: - if idx <= 0: - p.text += a.tail - else: - p[idx].tail += a.tail - - def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 8941f97304..d5d4b01cee 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -904,6 +904,20 @@ class Manifest(object): if key == 'lang' or key.endswith('}lang'): body.attrib.pop(key) + # Remove hyperlinks with no content as they cause rendering + # artifacts in browser based renderers + for a in xpath(data, '//h:a[@href]'): + if a.get('id', None) is None and a.get('name', None) is None \ + and len(a) == 0 and not a.text: + p = a.getparent() + idx = p.index(a) -1 + p.remove(a) + if a.tail: + if idx <= 0: + p.text += a.tail + else: + p[idx].tail += a.tail + return data def _parse_txt(self, data): diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 291a1f0f9e..1c3c749f76 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -43,7 +43,8 @@ PML_HTML_RULES = [ (re.compile(r'\\-'), lambda match: ''), (re.compile(r'\\Fn="(?P.+?)"(?P.*?)\\Fn'), lambda match: '%s' % (match.group('target'), match.group('text')) if match.group('text') else ''), (re.compile(r'\\Sd="(?P.+?)"(?P.*?)\\Sd'), lambda match: '%s' % (match.group('target'), match.group('text')) if match.group('text') else ''), - (re.compile(r'\\I'), lambda match: ''), + # Just italicize index items as that is how the eReader software renders them. + (re.compile(r'\\I(?P.*?)\\I', re.DOTALL), lambda match: '%s' % match.group('text') if match.group('text') else ''), # Sidebar and Footnotes (re.compile(r'.+?)">\s*(?P.*?)\s*', re.DOTALL), lambda match: '' % (match.group('target'), match.group('text')) if match.group('text') else ''), @@ -55,7 +56,7 @@ PML_HTML_RULES = [ # Remove empty

's. (re.compile('

[ ]*

'), lambda match: ''), # Ensure empty lines carry over. - (re.compile('^$', re.MULTILINE), lambda match: '
'), + (re.compile('(\r\n|\n|\r){3}'), lambda match: '
'), # Remove unmatched plm codes. (re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),