PML Output: Don't write \Cn tags when we are within a heading. Put images in DropBook required image subfolder. Remove \Cn tags written inside of \x and \Xn tags.

2026-04-30 12:40:47 -04:00 · 2011-02-17 07:40:36 -07:00 · 2011-02-17 07:40:36 -07:00 · fc314b2a9b
commit fc314b2a9b
parent 0c7089cc87 1345a05339
3 changed files with 15 additions and 5 deletions
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@ -1,4 +1,3 @@
-import os.path
 # -*- coding: utf-8 -*-

 __license__   = 'GPL v3'
--- a/src/calibre/ebooks/pml/output.py
+++ b/src/calibre/ebooks/pml/output.py
@ -50,7 +50,10 @@ class PMLOutput(OutputFormatPlugin):
            with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
                out.write(pml.encode(opts.pml_output_encoding, 'replace'))

-            self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts)
+            img_path = os.path.join(tdir, 'index_img')
+            if not os.path.exists(img_path):
+                os.makedirs(img_path)
+            self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts)

            log.debug('Compressing output...')
            pmlz = ZipFile(output_path, 'w')
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -180,6 +180,9 @@ class PMLMLizer(object):
        links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
        for unused in anchors.difference(links):
            text = text.replace('\\Q="%s"' % unused, '')
+            
+        # Remove \Cn tags that are within \x and \Xn tags
+        text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)

        # Replace bad characters.
        text = text.replace(u'\xc2', '')
@ -255,7 +258,12 @@ class PMLMLizer(object):
        # TOC markers.
        toc_name = elem.attrib.get('name', None)
        toc_id = elem.attrib.get('id', None)
-        if (toc_id or toc_name) and tag  not in ('h1', 'h2','h3','h4','h5','h6',):
+        # Only write the TOC marker if the tag isn't a heading and we aren't in one.
+        if (toc_id or toc_name) and tag not in ('h1', 'h2','h3','h4','h5','h6') and \
+            'x' not in tag_stack+tags and 'X0' not in tag_stack+tags and \
+            'X1' not in tag_stack+tags and 'X2' not in tag_stack+tags and \
+            'X3' not in tag_stack+tags and 'X4' not in tag_stack+tags:
+
            toc_page = page.href
            if self.toc.get(toc_page, None):
                for toc_x in (toc_name, toc_id):
@ -264,8 +272,8 @@ class PMLMLizer(object):
                        toc_depth = max(min(toc_depth, 4), 0)
                        text.append('\\C%s="%s"' % (toc_depth, toc_title))

-        # Process style information that needs holds a single tag
-        # Commented out because every page in an OEB book starts with this style
+        # Process style information that needs holds a single tag.
+        # Commented out because every page in an OEB book starts with this style.
        if style['page-break-before'] == 'always':
            text.append('\\p')