PML Input: Various fixes

2025-08-30 23:00:21 -04:00 · 2009-08-01 15:35:04 -06:00 · 2009-08-01 15:35:04 -06:00 · 152d52e1d5
commit 152d52e1d5
parent 929d74f718 81300843be
3 changed files with 17 additions and 17 deletions
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -148,21 +148,6 @@ class EPUBOutput(OutputFormatPlugin):
                if not pre.text and len(pre) == 0:
                    pre.tag = 'div'
            # Remove hyperlinks with no content as they cause rendering
            # artifacts in browser based renderers
            for a in body.xpath('//a[@href]'):
                if a.get('id', None) is None and a.get('name', None) is None \
                        and len(a) == 0 and not a.text:
                    p = a.getparent()
                    idx = p.index(a) -1
                    p.remove(a)
                    if a.tail:
                        if idx <= 0:
                            p.text += a.tail
                        else:
                            p[idx].tail += a.tail
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -904,6 +904,20 @@ class Manifest(object):
                if key == 'lang' or key.endswith('}lang'):
                    body.attrib.pop(key)
            # Remove hyperlinks with no content as they cause rendering
            # artifacts in browser based renderers
            for a in xpath(data, '//h:a[@href]'):
                if a.get('id', None) is None and a.get('name', None) is None \
                        and len(a) == 0 and not a.text:
                    p = a.getparent()
                    idx = p.index(a) -1
                    p.remove(a)
                    if a.tail:
                        if idx <= 0:
                            p.text += a.tail
                        else:
                            p[idx].tail += a.tail
            return data
        def _parse_txt(self, data):
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -43,7 +43,8 @@ PML_HTML_RULES = [
    (re.compile(r'\\-'), lambda match: ''),
    (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
    (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\I'), lambda match: ''),
+    # Just italicize index items as that is how the eReader software renders them.
    (re.compile(r'\\I(?P<text>.*?)\\I', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
    # Sidebar and Footnotes
    (re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
@ -55,7 +56,7 @@ PML_HTML_RULES = [
    # Remove empty <p>'s.
    (re.compile('<p>[ ]*</p>'), lambda match: ''),
    # Ensure empty lines carry over.
-    (re.compile('^$', re.MULTILINE), lambda match: '<br />'),
+    (re.compile('(\r\n|\n|\r){3}'), lambda match: '<br />'),
    # Remove unmatched plm codes.
    (re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),