PML Output: Add .png to image names. Fix removing excessive newlines from PML output. PMLZ Output: Name images correctly.

2025-11-23 06:53:02 -05:00 · 2009-10-23 19:29:13 -06:00 · 2009-10-23 19:29:13 -06:00 · 5770808fcf
commit 5770808fcf
parent 4b822e5700
3 changed files with 32 additions and 40 deletions
--- a/src/calibre/ebooks/pdb/ereader/init.py
+++ b/src/calibre/ebooks/pdb/ereader/init.py
@ -16,11 +16,11 @@ def image_name(name, taken_names=[]):
        cut = len(name) - 32
        names = name[:10]
        namee = name[10+cut:]
-        name = names + namee
+        name = '%s%s.png' % (names, namee)
    
    while name in taken_names:
-        for i in xrange(9999999999999999999999999999999):
-            name = '%s%s' % (name[:-len('%s' % i)], i)
+        for i in xrange(999999999999999999999999999):
+            name = '%s%s.png' % (name[:-len('%s' % i)], i)

    name = name.ljust(32, '\x00')[:32]
    
--- a/src/calibre/ebooks/pml/output.py
+++ b/src/calibre/ebooks/pml/output.py
@ -18,7 +18,7 @@ from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
-from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.ebooks.pml.pmlml import PMLMLizer

 class PMLOutput(OutputFormatPlugin):
@ -40,28 +40,26 @@ class PMLOutput(OutputFormatPlugin):
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        with TemporaryDirectory('_pmlz_output') as tdir:
            pmlmlizer = PMLMLizer(log)
-            content = pmlmlizer.extract_content(oeb_book, opts)
+            pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
            with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
-                out.write(content.encode(opts.output_encoding, 'replace'))
+                out.write(pml.encode(opts.output_encoding, 'replace'))

-            self.write_images(oeb_book.manifest, tdir)
+            self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir)

            log.debug('Compressing output...')
            pmlz = ZipFile(output_path, 'w')
            pmlz.add_dir(tdir)

-    def write_images(self, manifest, out_dir):
+    def write_images(self, manifest, image_hrefs, out_dir):
        for item in manifest:
-            if item.media_type in OEB_IMAGES:
+            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
                im = Image.open(cStringIO.StringIO(item.data))

                data = cStringIO.StringIO()
                im.save(data, 'PNG')
                data = data.getvalue()

-                name = os.path.splitext(os.path.basename(item.href))[0] + '.png'
-                path = os.path.join(out_dir, name)
+                path = os.path.join(out_dir, image_hrefs[item.href])

                with open(path, 'wb') as out:
                    out.write(data)
-
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into PML markup
 '''

-import os
 import re

 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@ -138,16 +137,13 @@ class PMLMLizer(object):
        aid = self.link_hrefs[aid]
        return u'\\Q="%s"' % aid

+    def remove_newlines(self, text):
+        text = text.replace('\r\n', ' ')
+        text = text.replace('\n', ' ')
+        text = text.replace('\r', ' ')
+        return text
+
    def clean_text(self, text):
-        # Remove excess spaces at beginning and end of lines
-        text = re.sub('(?m)^[ ]+', '', text)
-        text = re.sub('(?m)[ ]+$', '', text)
-
-        # Remove excessive newlines
-        text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
-        text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
-        text = re.sub('[ ]{2,}', ' ', text)
-
        # Remove excessive \p tags
        text = re.sub(r'\\p\s*\\p', '', text)

@ -166,6 +162,17 @@ class PMLMLizer(object):
        # Turn all unicode characters into their PML hex equivelent
        text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text)

+        # Remove excess spaces at beginning and end of lines
+        text = re.sub('(?m)^[ ]+', '', text)
+        text = re.sub('(?m)[ ]+$', '', text)
+
+        # Remove excessive spaces
+        text = re.sub('[ ]{2,}', ' ', text)
+
+        # Remove excessive newlines
+        text = re.sub('\n[ ]+\n', '\n\n', text)
+        text = re.sub('\n\n\n+', '\n\n', text)
+
        return text

    def dump_text(self, elem, stylizer, page, tag_stack=[]):
@ -197,7 +204,7 @@ class PMLMLizer(object):
                    if len(self.image_hrefs.keys()) == 0:
                        self.image_hrefs[page.abshref(elem.attrib['src'])] = 'cover.png'
                    else:
-                        self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
+                        self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
                text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
        if tag == 'hr':
            w = '\\w'
@ -251,7 +258,7 @@ class PMLMLizer(object):

        # Proccess tags that contain text.
        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            text.append(self.elem_text(elem, tag_stack))
+            text.append(self.remove_newlines(elem.text))

        for item in elem:
            text += self.dump_text(item, stylizer, page, tag_stack)
@ -261,32 +268,19 @@ class PMLMLizer(object):
            close_tag_list.insert(0, tag_stack.pop())
        text += self.close_tags(close_tag_list)
        if tag in SEPARATE_TAGS:
-            text.append(os.linesep + os.linesep)
+            text.append('\n\n')

        if 'block' not in tag_stack:
-            text.append(os.linesep + os.linesep)
+            text.append('\n\n')

        #if style['page-break-after'] == 'always':
        #    text.append('\\p')

        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
-            text.append(self.elem_tail(elem, tag_stack))
+            text.append(self.remove_newlines(elem.tail))

        return text

-    def elem_text(self, elem, tag_stack):
-        return self.block_text(elem.text, 'block' in tag_stack)
-
-    def elem_tail(self, elem, tag_stack):
-        return self.block_text(elem.tail, 'block' in tag_stack)
-
-    def block_text(self, text, in_block):
-        if in_block:
-            text = text.replace('\n\r', ' ')
-            text = text.replace('\n', ' ')
-            text = text.replace('\r', ' ')
-        return text
-
    def close_tags(self, tags):
        text = [u'']
        for i in range(0, len(tags)):