From f2dc294770d2f77e8156e68317173957607ec55e Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 17 Feb 2011 07:19:08 -0500 Subject: [PATCH 1/2] PML Input: Clean import. PMLZ Output: Put images in DropBook required image subfolder. Remove \Cn tags written inside of \x and \Xn tags. --- src/calibre/ebooks/pml/input.py | 1 - src/calibre/ebooks/pml/output.py | 5 ++++- src/calibre/ebooks/pml/pmlml.py | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index 5453665a55..91cd457c27 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -1,4 +1,3 @@ -import os.path # -*- coding: utf-8 -*- __license__ = 'GPL v3' diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py index 58dc9a2138..9d2ddc6ca6 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/pml/output.py @@ -50,7 +50,10 @@ class PMLOutput(OutputFormatPlugin): with open(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(pml.encode(opts.pml_output_encoding, 'replace')) - self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts) + img_path = os.path.join(tdir, 'index_img') + if not os.path.exists(img_path): + os.makedirs(img_path) + self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts) log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 4b2f924c7d..12f84e5a5b 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -180,6 +180,9 @@ class PMLMLizer(object): links = set(re.findall(r'(?<=\\q="#).+?(?=")', text)) for unused in anchors.difference(links): text = text.replace('\\Q="%s"' % unused, '') + + # Remove \Cn tags that are within \x and \Xn tags + text = re.sub(ur'(?msu)(?P\\(x|X[0-4]))(?P.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)', '\g\g\g\g', text) # Replace bad characters. text = text.replace(u'\xc2', '') From 1345a053395532fb243a3e0697f7a14ea2fff0e7 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 17 Feb 2011 07:45:57 -0500 Subject: [PATCH 2/2] PMLML: Don't write \Cn tags when we are within a heading. --- src/calibre/ebooks/pml/pmlml.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 12f84e5a5b..779e75d713 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -258,7 +258,12 @@ class PMLMLizer(object): # TOC markers. toc_name = elem.attrib.get('name', None) toc_id = elem.attrib.get('id', None) - if (toc_id or toc_name) and tag not in ('h1', 'h2','h3','h4','h5','h6',): + # Only write the TOC marker if the tag isn't a heading and we aren't in one. + if (toc_id or toc_name) and tag not in ('h1', 'h2','h3','h4','h5','h6') and \ + 'x' not in tag_stack+tags and 'X0' not in tag_stack+tags and \ + 'X1' not in tag_stack+tags and 'X2' not in tag_stack+tags and \ + 'X3' not in tag_stack+tags and 'X4' not in tag_stack+tags: + toc_page = page.href if self.toc.get(toc_page, None): for toc_x in (toc_name, toc_id): @@ -267,8 +272,8 @@ class PMLMLizer(object): toc_depth = max(min(toc_depth, 4), 0) text.append('\\C%s="%s"' % (toc_depth, toc_title)) - # Process style information that needs holds a single tag - # Commented out because every page in an OEB book starts with this style + # Process style information that needs holds a single tag. + # Commented out because every page in an OEB book starts with this style. if style['page-break-before'] == 'always': text.append('\\p')