PML Input: Clean import. PMLZ Output: Put images in DropBook required image subfolder. Remove \Cn tags written inside of \x and \Xn tags.

This commit is contained in:
John Schember 2011-02-17 07:19:08 -05:00
parent 7234c3e5f2
commit f2dc294770
3 changed files with 7 additions and 2 deletions

View File

@ -1,4 +1,3 @@
import os.path
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'

View File

@ -50,7 +50,10 @@ class PMLOutput(OutputFormatPlugin):
with open(os.path.join(tdir, 'index.pml'), 'wb') as out: with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
out.write(pml.encode(opts.pml_output_encoding, 'replace')) out.write(pml.encode(opts.pml_output_encoding, 'replace'))
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts) img_path = os.path.join(tdir, 'index_img')
if not os.path.exists(img_path):
os.makedirs(img_path)
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts)
log.debug('Compressing output...') log.debug('Compressing output...')
pmlz = ZipFile(output_path, 'w') pmlz = ZipFile(output_path, 'w')

View File

@ -180,6 +180,9 @@ class PMLMLizer(object):
links = set(re.findall(r'(?<=\\q="#).+?(?=")', text)) links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
for unused in anchors.difference(links): for unused in anchors.difference(links):
text = text.replace('\\Q="%s"' % unused, '') text = text.replace('\\Q="%s"' % unused, '')
# Remove \Cn tags that are within \x and \Xn tags
text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)
# Replace bad characters. # Replace bad characters.
text = text.replace(u'\xc2', '') text = text.replace(u'\xc2', '')