mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
PML Output: Add .png to image names. Fix removing excessive newlines from PML output. PMLZ Output: Name images correctly.
This commit is contained in:
parent
4b822e5700
commit
5770808fcf
@ -16,11 +16,11 @@ def image_name(name, taken_names=[]):
|
|||||||
cut = len(name) - 32
|
cut = len(name) - 32
|
||||||
names = name[:10]
|
names = name[:10]
|
||||||
namee = name[10+cut:]
|
namee = name[10+cut:]
|
||||||
name = names + namee
|
name = '%s%s.png' % (names, namee)
|
||||||
|
|
||||||
while name in taken_names:
|
while name in taken_names:
|
||||||
for i in xrange(9999999999999999999999999999999):
|
for i in xrange(999999999999999999999999999):
|
||||||
name = '%s%s' % (name[:-len('%s' % i)], i)
|
name = '%s%s.png' % (name[:-len('%s' % i)], i)
|
||||||
|
|
||||||
name = name.ljust(32, '\x00')[:32]
|
name = name.ljust(32, '\x00')[:32]
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ from calibre.customize.conversion import OutputFormatPlugin
|
|||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||||
from calibre.ebooks.pml.pmlml import PMLMLizer
|
from calibre.ebooks.pml.pmlml import PMLMLizer
|
||||||
|
|
||||||
class PMLOutput(OutputFormatPlugin):
|
class PMLOutput(OutputFormatPlugin):
|
||||||
@ -40,28 +40,26 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
with TemporaryDirectory('_pmlz_output') as tdir:
|
with TemporaryDirectory('_pmlz_output') as tdir:
|
||||||
pmlmlizer = PMLMLizer(log)
|
pmlmlizer = PMLMLizer(log)
|
||||||
content = pmlmlizer.extract_content(oeb_book, opts)
|
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
|
||||||
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
||||||
out.write(content.encode(opts.output_encoding, 'replace'))
|
out.write(pml.encode(opts.output_encoding, 'replace'))
|
||||||
|
|
||||||
self.write_images(oeb_book.manifest, tdir)
|
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir)
|
||||||
|
|
||||||
log.debug('Compressing output...')
|
log.debug('Compressing output...')
|
||||||
pmlz = ZipFile(output_path, 'w')
|
pmlz = ZipFile(output_path, 'w')
|
||||||
pmlz.add_dir(tdir)
|
pmlz.add_dir(tdir)
|
||||||
|
|
||||||
def write_images(self, manifest, out_dir):
|
def write_images(self, manifest, image_hrefs, out_dir):
|
||||||
for item in manifest:
|
for item in manifest:
|
||||||
if item.media_type in OEB_IMAGES:
|
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
|
||||||
im = Image.open(cStringIO.StringIO(item.data))
|
im = Image.open(cStringIO.StringIO(item.data))
|
||||||
|
|
||||||
data = cStringIO.StringIO()
|
data = cStringIO.StringIO()
|
||||||
im.save(data, 'PNG')
|
im.save(data, 'PNG')
|
||||||
data = data.getvalue()
|
data = data.getvalue()
|
||||||
|
|
||||||
name = os.path.splitext(os.path.basename(item.href))[0] + '.png'
|
path = os.path.join(out_dir, image_hrefs[item.href])
|
||||||
path = os.path.join(out_dir, name)
|
|
||||||
|
|
||||||
with open(path, 'wb') as out:
|
with open(path, 'wb') as out:
|
||||||
out.write(data)
|
out.write(data)
|
||||||
|
|
||||||
|
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Transform OEB content into PML markup
|
Transform OEB content into PML markup
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
@ -138,16 +137,13 @@ class PMLMLizer(object):
|
|||||||
aid = self.link_hrefs[aid]
|
aid = self.link_hrefs[aid]
|
||||||
return u'\\Q="%s"' % aid
|
return u'\\Q="%s"' % aid
|
||||||
|
|
||||||
|
def remove_newlines(self, text):
|
||||||
|
text = text.replace('\r\n', ' ')
|
||||||
|
text = text.replace('\n', ' ')
|
||||||
|
text = text.replace('\r', ' ')
|
||||||
|
return text
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
# Remove excess spaces at beginning and end of lines
|
|
||||||
text = re.sub('(?m)^[ ]+', '', text)
|
|
||||||
text = re.sub('(?m)[ ]+$', '', text)
|
|
||||||
|
|
||||||
# Remove excessive newlines
|
|
||||||
text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
|
|
||||||
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
|
|
||||||
text = re.sub('[ ]{2,}', ' ', text)
|
|
||||||
|
|
||||||
# Remove excessive \p tags
|
# Remove excessive \p tags
|
||||||
text = re.sub(r'\\p\s*\\p', '', text)
|
text = re.sub(r'\\p\s*\\p', '', text)
|
||||||
|
|
||||||
@ -166,6 +162,17 @@ class PMLMLizer(object):
|
|||||||
# Turn all unicode characters into their PML hex equivelent
|
# Turn all unicode characters into their PML hex equivelent
|
||||||
text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text)
|
text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text)
|
||||||
|
|
||||||
|
# Remove excess spaces at beginning and end of lines
|
||||||
|
text = re.sub('(?m)^[ ]+', '', text)
|
||||||
|
text = re.sub('(?m)[ ]+$', '', text)
|
||||||
|
|
||||||
|
# Remove excessive spaces
|
||||||
|
text = re.sub('[ ]{2,}', ' ', text)
|
||||||
|
|
||||||
|
# Remove excessive newlines
|
||||||
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
|
text = re.sub('\n\n\n+', '\n\n', text)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
||||||
@ -197,7 +204,7 @@ class PMLMLizer(object):
|
|||||||
if len(self.image_hrefs.keys()) == 0:
|
if len(self.image_hrefs.keys()) == 0:
|
||||||
self.image_hrefs[page.abshref(elem.attrib['src'])] = 'cover.png'
|
self.image_hrefs[page.abshref(elem.attrib['src'])] = 'cover.png'
|
||||||
else:
|
else:
|
||||||
self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
|
self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
|
||||||
text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
|
text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
|
||||||
if tag == 'hr':
|
if tag == 'hr':
|
||||||
w = '\\w'
|
w = '\\w'
|
||||||
@ -251,7 +258,7 @@ class PMLMLizer(object):
|
|||||||
|
|
||||||
# Proccess tags that contain text.
|
# Proccess tags that contain text.
|
||||||
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
|
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
|
||||||
text.append(self.elem_text(elem, tag_stack))
|
text.append(self.remove_newlines(elem.text))
|
||||||
|
|
||||||
for item in elem:
|
for item in elem:
|
||||||
text += self.dump_text(item, stylizer, page, tag_stack)
|
text += self.dump_text(item, stylizer, page, tag_stack)
|
||||||
@ -261,32 +268,19 @@ class PMLMLizer(object):
|
|||||||
close_tag_list.insert(0, tag_stack.pop())
|
close_tag_list.insert(0, tag_stack.pop())
|
||||||
text += self.close_tags(close_tag_list)
|
text += self.close_tags(close_tag_list)
|
||||||
if tag in SEPARATE_TAGS:
|
if tag in SEPARATE_TAGS:
|
||||||
text.append(os.linesep + os.linesep)
|
text.append('\n\n')
|
||||||
|
|
||||||
if 'block' not in tag_stack:
|
if 'block' not in tag_stack:
|
||||||
text.append(os.linesep + os.linesep)
|
text.append('\n\n')
|
||||||
|
|
||||||
#if style['page-break-after'] == 'always':
|
#if style['page-break-after'] == 'always':
|
||||||
# text.append('\\p')
|
# text.append('\\p')
|
||||||
|
|
||||||
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
|
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
|
||||||
text.append(self.elem_tail(elem, tag_stack))
|
text.append(self.remove_newlines(elem.tail))
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def elem_text(self, elem, tag_stack):
|
|
||||||
return self.block_text(elem.text, 'block' in tag_stack)
|
|
||||||
|
|
||||||
def elem_tail(self, elem, tag_stack):
|
|
||||||
return self.block_text(elem.tail, 'block' in tag_stack)
|
|
||||||
|
|
||||||
def block_text(self, text, in_block):
|
|
||||||
if in_block:
|
|
||||||
text = text.replace('\n\r', ' ')
|
|
||||||
text = text.replace('\n', ' ')
|
|
||||||
text = text.replace('\r', ' ')
|
|
||||||
return text
|
|
||||||
|
|
||||||
def close_tags(self, tags):
|
def close_tags(self, tags):
|
||||||
text = [u'']
|
text = [u'']
|
||||||
for i in range(0, len(tags)):
|
for i in range(0, len(tags)):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user