From f2dc294770d2f77e8156e68317173957607ec55e Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 17 Feb 2011 07:19:08 -0500 Subject: [PATCH] PML Input: Clean import. PMLZ Output: Put images in DropBook required image subfolder. Remove \Cn tags written inside of \x and \Xn tags. --- src/calibre/ebooks/pml/input.py | 1 - src/calibre/ebooks/pml/output.py | 5 ++++- src/calibre/ebooks/pml/pmlml.py | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index 5453665a55..91cd457c27 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -1,4 +1,3 @@ -import os.path # -*- coding: utf-8 -*- __license__ = 'GPL v3' diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py index 58dc9a2138..9d2ddc6ca6 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/pml/output.py @@ -50,7 +50,10 @@ class PMLOutput(OutputFormatPlugin): with open(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(pml.encode(opts.pml_output_encoding, 'replace')) - self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts) + img_path = os.path.join(tdir, 'index_img') + if not os.path.exists(img_path): + os.makedirs(img_path) + self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts) log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 4b2f924c7d..12f84e5a5b 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -180,6 +180,9 @@ class PMLMLizer(object): links = set(re.findall(r'(?<=\\q="#).+?(?=")', text)) for unused in anchors.difference(links): text = text.replace('\\Q="%s"' % unused, '') + + # Remove \Cn tags that are within \x and \Xn tags + text = re.sub(ur'(?msu)(?P\\(x|X[0-4]))(?P.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)', '\g\g\g\g', text) # Replace bad characters. text = text.replace(u'\xc2', '')