mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
fbdef6b460
@ -142,6 +142,9 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
|
||||||
|
#from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
|
||||||
|
#UniqueFilenames()(oeb, opts)
|
||||||
|
|
||||||
self.workaround_ade_quirks()
|
self.workaround_ade_quirks()
|
||||||
self.workaround_webkit_quirks()
|
self.workaround_webkit_quirks()
|
||||||
self.upshift_markup()
|
self.upshift_markup()
|
||||||
|
@ -8,15 +8,11 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Transform OEB content into FB2 markup
|
Transform OEB content into FB2 markup
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import cStringIO
|
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
from datetime import datetime
|
||||||
|
from mimetypes import types_map
|
||||||
import re
|
import re
|
||||||
|
import uuid
|
||||||
try:
|
|
||||||
from PIL import Image
|
|
||||||
Image
|
|
||||||
except ImportError:
|
|
||||||
import Image
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -25,32 +21,7 @@ from calibre.constants import __appname__, __version__
|
|||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||||
|
from calibre.utils.magick import Image
|
||||||
TAG_MAP = {
|
|
||||||
'b' : 'strong',
|
|
||||||
'i' : 'emphasis',
|
|
||||||
'p' : 'p',
|
|
||||||
'li' : 'p',
|
|
||||||
'div': 'p',
|
|
||||||
}
|
|
||||||
|
|
||||||
TAG_SPACE = []
|
|
||||||
|
|
||||||
TAG_IMAGES = [
|
|
||||||
'img',
|
|
||||||
]
|
|
||||||
|
|
||||||
TAG_LINKS = [
|
|
||||||
]
|
|
||||||
|
|
||||||
BLOCK = [
|
|
||||||
'p',
|
|
||||||
]
|
|
||||||
|
|
||||||
STYLES = [
|
|
||||||
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
|
|
||||||
('font-style', {'italic' : 'emphasis'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
class FB2MLizer(object):
|
class FB2MLizer(object):
|
||||||
'''
|
'''
|
||||||
@ -63,24 +34,32 @@ class FB2MLizer(object):
|
|||||||
def __init__(self, log):
|
def __init__(self, log):
|
||||||
self.log = log
|
self.log = log
|
||||||
self.image_hrefs = {}
|
self.image_hrefs = {}
|
||||||
|
self.reset_state()
|
||||||
|
|
||||||
|
def reset_state(self):
|
||||||
# Used to ensure text and tags are always within <p> and </p>
|
# Used to ensure text and tags are always within <p> and </p>
|
||||||
self.in_p = False
|
self.in_p = False
|
||||||
|
# Mapping of image names. OEB allows for images to have the same name but be stored
|
||||||
|
# in different directories. FB2 images are all in a flat layout so we rename all images
|
||||||
|
# into a sequential numbering system to ensure there are no collisions between image names.
|
||||||
|
self.image_hrefs = {}
|
||||||
|
|
||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
self.log.info('Converting XHTML to FB2 markup...')
|
self.log.info('Converting XHTML to FB2 markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
|
||||||
return self.fb2mlize_spine()
|
return self.fb2mlize_spine()
|
||||||
|
|
||||||
def fb2mlize_spine(self):
|
def fb2mlize_spine(self):
|
||||||
self.image_hrefs = {}
|
self.reset_state()
|
||||||
self.link_hrefs = {}
|
|
||||||
output = [self.fb2_header()]
|
output = [self.fb2_header()]
|
||||||
output.append(self.get_text())
|
output.append(self.get_text())
|
||||||
output.append(self.fb2_body_footer())
|
|
||||||
output.append(self.fb2mlize_images())
|
output.append(self.fb2mlize_images())
|
||||||
output.append(self.fb2_footer())
|
output.append(self.fb2_footer())
|
||||||
output = self.clean_text(u''.join(output))
|
output = self.clean_text(u''.join(output))
|
||||||
|
|
||||||
if self.opts.pretty_print:
|
if self.opts.pretty_print:
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
||||||
else:
|
else:
|
||||||
@ -97,65 +76,85 @@ class FB2MLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def fb2_header(self):
|
def fb2_header(self):
|
||||||
author_first = u''
|
metadata = {}
|
||||||
author_middle = u''
|
metadata['author_first'] = u''
|
||||||
author_last = u''
|
metadata['author_middle'] = u''
|
||||||
|
metadata['author_last'] = u''
|
||||||
|
metadata['title'] = self.oeb_book.metadata.title[0].value
|
||||||
|
metadata['appname'] = __appname__
|
||||||
|
metadata['version'] = __version__
|
||||||
|
metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
|
||||||
|
metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
|
||||||
|
metadata['id'] = '%s' % uuid.uuid4()
|
||||||
|
|
||||||
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
|
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
|
||||||
|
|
||||||
if len(author_parts) == 1:
|
if len(author_parts) == 1:
|
||||||
author_last = author_parts[0]
|
metadata['author_last'] = author_parts[0]
|
||||||
elif len(author_parts) == 2:
|
elif len(author_parts) == 2:
|
||||||
author_first = author_parts[0]
|
metadata['author_first'] = author_parts[0]
|
||||||
author_last = author_parts[1]
|
metadata['author_last'] = author_parts[1]
|
||||||
else:
|
else:
|
||||||
author_first = author_parts[0]
|
metadata['author_first'] = author_parts[0]
|
||||||
author_middle = ' '.join(author_parts[1:-2])
|
metadata['author_middle'] = ' '.join(author_parts[1:-2])
|
||||||
author_last = author_parts[-1]
|
metadata['author_last'] = author_parts[-1]
|
||||||
|
|
||||||
|
for key, value in metadata.items():
|
||||||
|
metadata[key] = prepare_string_for_xml(value)
|
||||||
|
|
||||||
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
|
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
|
||||||
'<description>' \
|
'<description>' \
|
||||||
'<title-info>' \
|
'<title-info>' \
|
||||||
'<genre></genre>' \
|
'<genre>antique</genre>' \
|
||||||
'<author>' \
|
'<author>' \
|
||||||
'<first-name>%s</first-name>' \
|
'<first-name>%(author_first)s</first-name>' \
|
||||||
'<middle-name>%s</middle-name>' \
|
'<middle-name>%(author_middle)s</middle-name>' \
|
||||||
'<last-name>%s</last-name>' \
|
'<last-name>%(author_last)s</last-name>' \
|
||||||
'</author>' \
|
'</author>' \
|
||||||
'<book-title>%s</book-title>' \
|
'<book-title>%(title)s</book-title>' \
|
||||||
'<annotation><p/></annotation>' \
|
'<lang>%(lang)s</lang>' \
|
||||||
'</title-info>' \
|
'</title-info>' \
|
||||||
'<document-info>' \
|
'<document-info>' \
|
||||||
'<program-used>%s %s</program-used>' \
|
'<author>' \
|
||||||
|
'<first-name></first-name>' \
|
||||||
|
'<middle-name></middle-name>' \
|
||||||
|
'<last-name></last-name>' \
|
||||||
|
'</author>' \
|
||||||
|
'<program-used>%(appname)s %(version)s</program-used>' \
|
||||||
|
'<date>%(date)s</date>' \
|
||||||
|
'<id>%(id)s</id>' \
|
||||||
|
'<version>1.0</version>' \
|
||||||
'</document-info>' \
|
'</document-info>' \
|
||||||
'</description><body>' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last,
|
'</description>' % metadata
|
||||||
self.oeb_book.metadata.title[0].value, __appname__, __version__)))
|
|
||||||
|
def fb2_footer(self):
|
||||||
|
return u'</FictionBook>'
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
text = []
|
text = ['<body>']
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
text.append('<section>')
|
text.append('<section>')
|
||||||
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||||
text.append('</section>')
|
text.append('</section>')
|
||||||
return ''.join(text)
|
return ''.join(text) + '</body>'
|
||||||
|
|
||||||
def fb2_body_footer(self):
|
|
||||||
return u'</body>'
|
|
||||||
|
|
||||||
def fb2_footer(self):
|
|
||||||
return u'</FictionBook>'
|
|
||||||
|
|
||||||
def fb2mlize_images(self):
|
def fb2mlize_images(self):
|
||||||
|
'''
|
||||||
|
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
|
||||||
|
'''
|
||||||
images = []
|
images = []
|
||||||
for item in self.oeb_book.manifest:
|
for item in self.oeb_book.manifest:
|
||||||
|
# Don't write the image if it's not referenced in the document's text.
|
||||||
|
if item.href not in self.image_hrefs:
|
||||||
|
continue
|
||||||
if item.media_type in OEB_RASTER_IMAGES:
|
if item.media_type in OEB_RASTER_IMAGES:
|
||||||
try:
|
try:
|
||||||
im = Image.open(cStringIO.StringIO(item.data)).convert('RGB')
|
if not item.media_type == types_map['.jpeg'] or not item.media_type == types_map['.jpg']:
|
||||||
data = cStringIO.StringIO()
|
im = Image()
|
||||||
im.save(data, 'JPEG')
|
im.load(item.data)
|
||||||
data = data.getvalue()
|
im.set_compression_quality(70)
|
||||||
|
data = im.export('jpg')
|
||||||
raw_data = b64encode(data)
|
raw_data = b64encode(data)
|
||||||
# Don't put the encoded image on a single line.
|
# Don't put the encoded image on a single line.
|
||||||
data = ''
|
data = ''
|
||||||
@ -166,7 +165,7 @@ class FB2MLizer(object):
|
|||||||
col = 1
|
col = 1
|
||||||
col += 1
|
col += 1
|
||||||
data += char
|
data += char
|
||||||
images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '_0000.JPEG'), item.media_type, data))
|
images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log.error('Error: Could not include file %s because ' \
|
self.log.error('Error: Could not include file %s because ' \
|
||||||
'%s.' % (item.href, e))
|
'%s.' % (item.href, e))
|
||||||
@ -178,29 +177,11 @@ class FB2MLizer(object):
|
|||||||
else:
|
else:
|
||||||
self.in_p = True
|
self.in_p = True
|
||||||
return ['<p>'], ['p']
|
return ['<p>'], ['p']
|
||||||
|
|
||||||
def insert_empty_line(self, tags):
|
|
||||||
if self.in_p:
|
|
||||||
text = ['']
|
|
||||||
closed_tags = []
|
|
||||||
tags.reverse()
|
|
||||||
for t in tags:
|
|
||||||
text.append('</%s>' % t)
|
|
||||||
closed_tags.append(t)
|
|
||||||
if t == 'p':
|
|
||||||
break
|
|
||||||
text.append('<empty-line />')
|
|
||||||
closed_tags.reverse()
|
|
||||||
for t in closed_tags:
|
|
||||||
text.append('<%s>' % t)
|
|
||||||
return text
|
|
||||||
else:
|
|
||||||
return ['<empty-line />']
|
|
||||||
|
|
||||||
def close_open_p(self, tags):
|
def close_open_p(self, tags):
|
||||||
text = ['']
|
text = ['']
|
||||||
added_p = False
|
added_p = False
|
||||||
|
|
||||||
if self.in_p:
|
if self.in_p:
|
||||||
# Close all up to p. Close p. Reopen all closed tags including p.
|
# Close all up to p. Close p. Reopen all closed tags including p.
|
||||||
closed_tags = []
|
closed_tags = []
|
||||||
@ -217,86 +198,128 @@ class FB2MLizer(object):
|
|||||||
text.append('<p>')
|
text.append('<p>')
|
||||||
added_p = True
|
added_p = True
|
||||||
self.in_p = True
|
self.in_p = True
|
||||||
|
|
||||||
return text, added_p
|
return text, added_p
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
def handle_simple_tag(self, tag, tags):
|
||||||
if not isinstance(elem.tag, basestring) \
|
s_out = []
|
||||||
or namespace(elem.tag) != XHTML_NS:
|
s_tags = []
|
||||||
|
if tag not in tags:
|
||||||
|
p_out, p_tags = self.ensure_p()
|
||||||
|
s_out += p_out
|
||||||
|
s_tags += p_tags
|
||||||
|
s_out.append('<%s>' % tag)
|
||||||
|
s_tags.append(tag)
|
||||||
|
return s_out, s_tags
|
||||||
|
|
||||||
|
def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
|
||||||
|
'''
|
||||||
|
This function is intended to be used in a recursive manner. dump_text will
|
||||||
|
run though all elements in the elem_tree and call itself on each element.
|
||||||
|
|
||||||
|
self.image_hrefs will be populated by calling this function.
|
||||||
|
|
||||||
|
@param elem_tree: etree representation of XHTML content to be transformed.
|
||||||
|
@param stylizer: Used to track the style of elements within the tree.
|
||||||
|
@param page: OEB page used to determine absolute urls.
|
||||||
|
@param tag_stack: List of open FB2 tags to take into account.
|
||||||
|
|
||||||
|
@return: List of string representing the XHTML converted to FB2 markup.
|
||||||
|
'''
|
||||||
|
# Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
|
||||||
|
if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
style = stylizer.style(elem)
|
style = stylizer.style(elem_tree)
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden':
|
||||||
or style['visibility'] == 'hidden':
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
fb2_text = []
|
# FB2 generated output.
|
||||||
|
fb2_out = []
|
||||||
|
# FB2 tags in the order they are opened. This will be used to close the tags.
|
||||||
tags = []
|
tags = []
|
||||||
|
# First tag in tree
|
||||||
|
tag = barename(elem_tree.tag)
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
# Process the XHTML tag if it needs to be converted to an FB2 tag.
|
||||||
|
|
||||||
if tag in TAG_IMAGES:
|
|
||||||
if elem.attrib.get('src', None):
|
|
||||||
if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
|
|
||||||
self.image_hrefs[page.abshref(elem.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys())
|
|
||||||
p_txt, p_tag = self.ensure_p()
|
|
||||||
fb2_text += p_txt
|
|
||||||
tags += p_tag
|
|
||||||
fb2_text.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])])
|
|
||||||
|
|
||||||
if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
|
if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
|
||||||
fb2_text.append('<title>')
|
fb2_out.append('<title>')
|
||||||
tags.append('title')
|
tags.append('title')
|
||||||
if tag == 'br':
|
if tag == 'img':
|
||||||
fb2_text += self.insert_empty_line(tag_stack+tags)
|
if elem_tree.attrib.get('src', None):
|
||||||
|
# Only write the image tag if it is in the manifest.
|
||||||
fb2_tag = TAG_MAP.get(tag, None)
|
if page.abshref(elem_tree.attrib['src']) in self.oeb_book.manifest.hrefs.keys():
|
||||||
if fb2_tag == 'p':
|
if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys():
|
||||||
|
self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys())
|
||||||
|
p_txt, p_tag = self.ensure_p()
|
||||||
|
fb2_out += p_txt
|
||||||
|
tags += p_tag
|
||||||
|
fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
|
||||||
|
elif tag == 'br':
|
||||||
|
if self.in_p:
|
||||||
|
closed_tags = []
|
||||||
|
open_tags = tag_stack+tags
|
||||||
|
open_tags.reverse()
|
||||||
|
for t in open_tags:
|
||||||
|
fb2_out.append('</%s>' % t)
|
||||||
|
closed_tags.append(t)
|
||||||
|
if t == 'p':
|
||||||
|
break
|
||||||
|
fb2_out.append('<empty-line />')
|
||||||
|
closed_tags.reverse()
|
||||||
|
for t in closed_tags:
|
||||||
|
fb2_out.append('<%s>' % t)
|
||||||
|
else:
|
||||||
|
fb2_out.append('<empty-line />')
|
||||||
|
elif tag in ('div', 'li', 'p'):
|
||||||
p_text, added_p = self.close_open_p(tag_stack+tags)
|
p_text, added_p = self.close_open_p(tag_stack+tags)
|
||||||
fb2_text += p_text
|
fb2_out += p_text
|
||||||
if added_p:
|
if added_p:
|
||||||
tags.append('p')
|
tags.append('p')
|
||||||
elif fb2_tag and fb2_tag not in tag_stack+tags:
|
elif tag == 'b':
|
||||||
p_text, p_tags = self.ensure_p()
|
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
||||||
fb2_text += p_text
|
fb2_out += s_out
|
||||||
tags += p_tags
|
tags += s_tags
|
||||||
fb2_text.append('<%s>' % fb2_tag)
|
elif tag == 'i':
|
||||||
tags.append(fb2_tag)
|
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
||||||
|
fb2_out += s_out
|
||||||
|
tags += s_tags
|
||||||
|
|
||||||
# Processes style information
|
# Processes style information.
|
||||||
for s in STYLES:
|
if style['font-style'] == 'italic':
|
||||||
style_tag = s[1].get(style[s[0]], None)
|
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
||||||
if style_tag and style_tag not in tag_stack+tags:
|
fb2_out += s_out
|
||||||
p_text, p_tags = self.ensure_p()
|
tags += s_tags
|
||||||
fb2_text += p_text
|
elif style['font-weight'] in ('bold', 'bolder'):
|
||||||
tags += p_tags
|
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
||||||
fb2_text.append('<%s>' % style_tag)
|
fb2_out += s_out
|
||||||
tags.append(style_tag)
|
tags += s_tags
|
||||||
|
|
||||||
if tag in TAG_SPACE:
|
# Process element text.
|
||||||
fb2_text.append(' ')
|
if hasattr(elem_tree, 'text') and elem_tree.text:
|
||||||
|
|
||||||
if hasattr(elem, 'text') and elem.text:
|
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_text.append('<p>')
|
fb2_out.append('<p>')
|
||||||
fb2_text.append(prepare_string_for_xml(elem.text))
|
fb2_out.append(prepare_string_for_xml(elem_tree.text))
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_text.append('</p>')
|
fb2_out.append('</p>')
|
||||||
|
|
||||||
for item in elem:
|
# Process sub-elements.
|
||||||
fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags)
|
for item in elem_tree:
|
||||||
|
fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags)
|
||||||
|
|
||||||
|
# Close open FB2 tags.
|
||||||
tags.reverse()
|
tags.reverse()
|
||||||
fb2_text += self.close_tags(tags)
|
fb2_out += self.close_tags(tags)
|
||||||
|
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
# Process element text that comes after the close of the XHTML tag but before the next XHTML tag.
|
||||||
|
if hasattr(elem_tree, 'tail') and elem_tree.tail:
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_text.append('<p>')
|
fb2_out.append('<p>')
|
||||||
fb2_text.append(prepare_string_for_xml(elem.tail))
|
fb2_out.append(prepare_string_for_xml(elem_tree.tail))
|
||||||
if not self.in_p:
|
if not self.in_p:
|
||||||
fb2_text.append('</p>')
|
fb2_out.append('</p>')
|
||||||
|
|
||||||
return fb2_text
|
return fb2_out
|
||||||
|
|
||||||
def close_tags(self, tags):
|
def close_tags(self, tags):
|
||||||
text = []
|
text = []
|
||||||
|
@ -29,6 +29,14 @@ class FB2Output(OutputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
|
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
|
||||||
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||||
|
|
||||||
|
try:
|
||||||
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer(oeb_book, opts)
|
||||||
|
except Unavailable:
|
||||||
|
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
||||||
|
|
||||||
linearize_jacket(oeb_book)
|
linearize_jacket(oeb_book)
|
||||||
|
|
||||||
fb2mlizer = FB2MLizer(log)
|
fb2mlizer = FB2MLizer(log)
|
||||||
|
@ -775,6 +775,7 @@ class Manifest(object):
|
|||||||
return u'Item(id=%r, href=%r, media_type=%r)' \
|
return u'Item(id=%r, href=%r, media_type=%r)' \
|
||||||
% (self.id, self.href, self.media_type)
|
% (self.id, self.href, self.media_type)
|
||||||
|
|
||||||
|
# Parsing {{{
|
||||||
def _parse_xml(self, data):
|
def _parse_xml(self, data):
|
||||||
data = xml_to_unicode(data, strip_encoding_pats=True,
|
data = xml_to_unicode(data, strip_encoding_pats=True,
|
||||||
assume_utf8=True, resolve_entities=True)[0]
|
assume_utf8=True, resolve_entities=True)[0]
|
||||||
@ -1035,6 +1036,8 @@ class Manifest(object):
|
|||||||
data = item.data.cssText
|
data = item.data.cssText
|
||||||
return ('utf-8', data)
|
return ('utf-8', data)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def data(self):
|
def data(self):
|
||||||
doc = """Provides MIME type sensitive access to the manifest
|
doc = """Provides MIME type sensitive access to the manifest
|
||||||
|
130
src/calibre/ebooks/oeb/transforms/filenames.py
Normal file
130
src/calibre/ebooks/oeb/transforms/filenames.py
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import posixpath
|
||||||
|
from urlparse import urldefrag
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
import cssutils
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
|
||||||
|
|
||||||
|
class RenameFiles(object):
|
||||||
|
|
||||||
|
'''
|
||||||
|
Rename files and adjust all links pointing to them. Note that the spine
|
||||||
|
and manifest are not touched by this transform.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, rename_map):
|
||||||
|
self.rename_map = rename_map
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
for item in oeb.manifest.items:
|
||||||
|
self.current_item = item
|
||||||
|
if etree.iselement(item.data):
|
||||||
|
rewrite_links(self.current_item.data, self.url_replacer)
|
||||||
|
elif hasattr(item.data, 'cssText'):
|
||||||
|
cssutils.replaceUrls(item.data, self.url_replacer)
|
||||||
|
|
||||||
|
if self.oeb.guide:
|
||||||
|
for ref in self.oeb.guide.values():
|
||||||
|
href = urlnormalize(ref.href)
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
if replacement is not None:
|
||||||
|
nhref = replacement
|
||||||
|
if frag:
|
||||||
|
nhref += '#' + frag
|
||||||
|
ref.href = nhref
|
||||||
|
|
||||||
|
if self.oeb.toc:
|
||||||
|
self.fix_toc_entry(self.oeb.toc)
|
||||||
|
|
||||||
|
|
||||||
|
def fix_toc_entry(self, toc):
|
||||||
|
if toc.href:
|
||||||
|
href = urlnormalize(toc.href)
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
|
||||||
|
if replacement is not None:
|
||||||
|
nhref = replacement
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((nhref, frag))
|
||||||
|
toc.href = nhref
|
||||||
|
|
||||||
|
for x in toc:
|
||||||
|
self.fix_toc_entry(x)
|
||||||
|
|
||||||
|
def url_replacer(self, orig_url):
|
||||||
|
url = urlnormalize(orig_url)
|
||||||
|
path, frag = urldefrag(url)
|
||||||
|
href = self.current_item.abshref(path)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
if replacement is None:
|
||||||
|
return orig_url
|
||||||
|
replacement = self.current_item.relhref(replacement)
|
||||||
|
if frag:
|
||||||
|
replacement += '#' + frag
|
||||||
|
return replacement
|
||||||
|
|
||||||
|
class UniqueFilenames(object):
|
||||||
|
|
||||||
|
'Ensure that every item in the manifest has a unique filename'
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
self.seen_filenames = set([])
|
||||||
|
self.rename_map = {}
|
||||||
|
|
||||||
|
for item in list(oeb.manifest.items):
|
||||||
|
fname = posixpath.basename(item.href)
|
||||||
|
if fname in self.seen_filenames:
|
||||||
|
suffix = self.unique_suffix(fname)
|
||||||
|
data = item.data
|
||||||
|
base, ext = posixpath.splitext(item.href)
|
||||||
|
nhref = base + suffix + ext
|
||||||
|
nhref = oeb.manifest.generate(href=nhref)[1]
|
||||||
|
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
|
||||||
|
fallback=item.fallback)
|
||||||
|
self.seen_filenames.add(posixpath.basename(nhref))
|
||||||
|
self.rename_map[item.href] = nhref
|
||||||
|
if item.spine_position is not None:
|
||||||
|
oeb.spine.insert(item.spine_position, nitem, item.linear)
|
||||||
|
oeb.spine.remove(item)
|
||||||
|
oeb.manifest.remove(item)
|
||||||
|
else:
|
||||||
|
self.seen_filenames.add(fname)
|
||||||
|
|
||||||
|
if self.rename_map:
|
||||||
|
self.log('Found non-unique filenames, renaming to support broken'
|
||||||
|
' EPUB readers like FBReader, Aldiko and Stanza...')
|
||||||
|
from pprint import pformat
|
||||||
|
self.log.debug(pformat(self.rename_map))
|
||||||
|
|
||||||
|
renamer = RenameFiles(self.rename_map)
|
||||||
|
renamer(oeb, opts)
|
||||||
|
|
||||||
|
|
||||||
|
def unique_suffix(self, fname):
|
||||||
|
base, ext = posixpath.splitext(fname)
|
||||||
|
c = 0
|
||||||
|
while True:
|
||||||
|
c += 1
|
||||||
|
suffix = '_u%d'%c
|
||||||
|
candidate = base + suffix + ext
|
||||||
|
if candidate not in self.seen_filenames:
|
||||||
|
return suffix
|
||||||
|
|
@ -114,27 +114,27 @@ Montag
|
|||||||
Dienstag
|
Dienstag
|
||||||
Januar
|
Januar
|
||||||
Februar
|
Februar
|
||||||
März
|
März
|
||||||
Fuße
|
Fuße
|
||||||
Fluße
|
Fluße
|
||||||
Flusse
|
Flusse
|
||||||
flusse
|
flusse
|
||||||
fluße
|
fluße
|
||||||
flüße
|
flüße
|
||||||
flüsse
|
flüsse
|
||||||
'''
|
'''
|
||||||
german_good = '''
|
german_good = '''
|
||||||
Dienstag
|
Dienstag
|
||||||
Februar
|
Februar
|
||||||
flusse
|
flusse
|
||||||
Flusse
|
Flusse
|
||||||
fluße
|
fluße
|
||||||
Fluße
|
Fluße
|
||||||
flüsse
|
flüsse
|
||||||
flüße
|
flüße
|
||||||
Fuße
|
Fuße
|
||||||
Januar
|
Januar
|
||||||
März
|
März
|
||||||
Montag
|
Montag
|
||||||
Sonntag'''
|
Sonntag'''
|
||||||
french = '''
|
french = '''
|
||||||
@ -142,49 +142,49 @@ dimanche
|
|||||||
lundi
|
lundi
|
||||||
mardi
|
mardi
|
||||||
janvier
|
janvier
|
||||||
février
|
février
|
||||||
mars
|
mars
|
||||||
déjÃ
|
déjà
|
||||||
Meme
|
Meme
|
||||||
deja
|
deja
|
||||||
même
|
même
|
||||||
dejÃ
|
dejà
|
||||||
bpef
|
bpef
|
||||||
bœg
|
bœg
|
||||||
Boef
|
Boef
|
||||||
Mémé
|
Mémé
|
||||||
bœf
|
bœf
|
||||||
boef
|
boef
|
||||||
bnef
|
bnef
|
||||||
pêche
|
pêche
|
||||||
pèché
|
pèché
|
||||||
pêché
|
pêché
|
||||||
pêche
|
pêche
|
||||||
pêché'''
|
pêché'''
|
||||||
french_good = '''
|
french_good = '''
|
||||||
bnef
|
bnef
|
||||||
boef
|
boef
|
||||||
Boef
|
Boef
|
||||||
bœf
|
bœf
|
||||||
bœg
|
bœg
|
||||||
bpef
|
bpef
|
||||||
deja
|
deja
|
||||||
dejÃ
|
dejà
|
||||||
déjÃ
|
déjà
|
||||||
dimanche
|
dimanche
|
||||||
février
|
février
|
||||||
janvier
|
janvier
|
||||||
lundi
|
lundi
|
||||||
mardi
|
mardi
|
||||||
mars
|
mars
|
||||||
Meme
|
Meme
|
||||||
Mémé
|
Mémé
|
||||||
même
|
même
|
||||||
pèché
|
pèché
|
||||||
pêche
|
pêche
|
||||||
pêche
|
pêche
|
||||||
pêché
|
pêché
|
||||||
pêché'''
|
pêché'''
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def create(l):
|
def create(l):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user