diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 17f6eb9f46..c5d11edc2b 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -142,6 +142,9 @@ class EPUBOutput(OutputFormatPlugin): def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb + #from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames + #UniqueFilenames()(oeb, opts) + self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 252453d25e..46861357e6 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -8,15 +8,11 @@ __docformat__ = 'restructuredtext en' Transform OEB content into FB2 markup ''' -import cStringIO from base64 import b64encode +from datetime import datetime +from mimetypes import types_map import re - -try: - from PIL import Image - Image -except ImportError: - import Image +import uuid from lxml import etree @@ -25,32 +21,7 @@ from calibre.constants import __appname__, __version__ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES - -TAG_MAP = { - 'b' : 'strong', - 'i' : 'emphasis', - 'p' : 'p', - 'li' : 'p', - 'div': 'p', -} - -TAG_SPACE = [] - -TAG_IMAGES = [ - 'img', -] - -TAG_LINKS = [ -] - -BLOCK = [ - 'p', -] - -STYLES = [ - ('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}), - ('font-style', {'italic' : 'emphasis'}), -] +from calibre.utils.magick import Image class FB2MLizer(object): ''' @@ -63,24 +34,32 @@ class FB2MLizer(object): def __init__(self, log): self.log = log self.image_hrefs = {} + self.reset_state() + + def reset_state(self): # Used to ensure text and tags are always within
and
self.in_p = False + # Mapping of image names. OEB allows for images to have the same name but be stored + # in different directories. FB2 images are all in a flat layout so we rename all images + # into a sequential numbering system to ensure there are no collisions between image names. + self.image_hrefs = {} def extract_content(self, oeb_book, opts): self.log.info('Converting XHTML to FB2 markup...') self.oeb_book = oeb_book self.opts = opts + return self.fb2mlize_spine() def fb2mlize_spine(self): - self.image_hrefs = {} - self.link_hrefs = {} + self.reset_state() + output = [self.fb2_header()] output.append(self.get_text()) - output.append(self.fb2_body_footer()) output.append(self.fb2mlize_images()) output.append(self.fb2_footer()) output = self.clean_text(u''.join(output)) + if self.opts.pretty_print: return u'\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) else: @@ -97,65 +76,85 @@ class FB2MLizer(object): return text def fb2_header(self): - author_first = u'' - author_middle = u'' - author_last = u'' + metadata = {} + metadata['author_first'] = u'' + metadata['author_middle'] = u'' + metadata['author_last'] = u'' + metadata['title'] = self.oeb_book.metadata.title[0].value + metadata['appname'] = __appname__ + metadata['version'] = __version__ + metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) + metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' + metadata['id'] = '%s' % uuid.uuid4() + author_parts = self.oeb_book.metadata.creator[0].value.split(' ') - if len(author_parts) == 1: - author_last = author_parts[0] + metadata['author_last'] = author_parts[0] elif len(author_parts) == 2: - author_first = author_parts[0] - author_last = author_parts[1] + metadata['author_first'] = author_parts[0] + metadata['author_last'] = author_parts[1] else: - author_first = author_parts[0] - author_middle = ' '.join(author_parts[1:-2]) - author_last = author_parts[-1] + metadata['author_first'] = author_parts[0] + metadata['author_middle'] = ' '.join(author_parts[1:-2]) + metadata['author_last'] = author_parts[-1] + + for key, value in metadata.items(): + metadata[key] = prepare_string_for_xml(value) return u''], ['p']
-
- def insert_empty_line(self, tags):
- if self.in_p:
- text = ['']
- closed_tags = []
- tags.reverse()
- for t in tags:
- text.append('%s>' % t)
- closed_tags.append(t)
- if t == 'p':
- break
- text.append('
')
added_p = True
self.in_p = True
-
+
return text, added_p
- def dump_text(self, elem, stylizer, page, tag_stack=[]):
- if not isinstance(elem.tag, basestring) \
- or namespace(elem.tag) != XHTML_NS:
+ def handle_simple_tag(self, tag, tags):
+ s_out = []
+ s_tags = []
+ if tag not in tags:
+ p_out, p_tags = self.ensure_p()
+ s_out += p_out
+ s_tags += p_tags
+ s_out.append('<%s>' % tag)
+ s_tags.append(tag)
+ return s_out, s_tags
+
+ def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
+ '''
+ This function is intended to be used in a recursive manner. dump_text will
+ run though all elements in the elem_tree and call itself on each element.
+
+ self.image_hrefs will be populated by calling this function.
+
+ @param elem_tree: etree representation of XHTML content to be transformed.
+ @param stylizer: Used to track the style of elements within the tree.
+ @param page: OEB page used to determine absolute urls.
+ @param tag_stack: List of open FB2 tags to take into account.
+
+ @return: List of string representing the XHTML converted to FB2 markup.
+ '''
+ # Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
+ if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
return []
- style = stylizer.style(elem)
- if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
- or style['visibility'] == 'hidden':
+ style = stylizer.style(elem_tree)
+ if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden':
return []
- fb2_text = []
+ # FB2 generated output.
+ fb2_out = []
+ # FB2 tags in the order they are opened. This will be used to close the tags.
tags = []
+ # First tag in tree
+ tag = barename(elem_tree.tag)
- tag = barename(elem.tag)
-
- if tag in TAG_IMAGES:
- if elem.attrib.get('src', None):
- if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
- self.image_hrefs[page.abshref(elem.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys())
- p_txt, p_tag = self.ensure_p()
- fb2_text += p_txt
- tags += p_tag
- fb2_text.append('
') - fb2_text.append(prepare_string_for_xml(elem.text)) + fb2_out.append('
') + fb2_out.append(prepare_string_for_xml(elem_tree.text)) if not self.in_p: - fb2_text.append('
') + fb2_out.append('') - for item in elem: - fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags) + # Process sub-elements. + for item in elem_tree: + fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags) + # Close open FB2 tags. tags.reverse() - fb2_text += self.close_tags(tags) + fb2_out += self.close_tags(tags) - if hasattr(elem, 'tail') and elem.tail: + # Process element text that comes after the close of the XHTML tag but before the next XHTML tag. + if hasattr(elem_tree, 'tail') and elem_tree.tail: if not self.in_p: - fb2_text.append('') - fb2_text.append(prepare_string_for_xml(elem.tail)) + fb2_out.append('
') + fb2_out.append(prepare_string_for_xml(elem_tree.tail)) if not self.in_p: - fb2_text.append('
') + fb2_out.append('') - return fb2_text + return fb2_out def close_tags(self, tags): text = [] diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index 88508b83e0..33714c6e6e 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -29,6 +29,14 @@ class FB2Output(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.oeb.transforms.jacket import linearize_jacket + from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable + + try: + rasterizer = SVGRasterizer() + rasterizer(oeb_book, opts) + except Unavailable: + self.log.warn('SVG rasterizer unavailable, SVG will not be converted') + linearize_jacket(oeb_book) fb2mlizer = FB2MLizer(log) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 2e480a9941..a077fb0225 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -775,6 +775,7 @@ class Manifest(object): return u'Item(id=%r, href=%r, media_type=%r)' \ % (self.id, self.href, self.media_type) + # Parsing {{{ def _parse_xml(self, data): data = xml_to_unicode(data, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] @@ -1035,6 +1036,8 @@ class Manifest(object): data = item.data.cssText return ('utf-8', data) + # }}} + @dynamic_property def data(self): doc = """Provides MIME type sensitive access to the manifest diff --git a/src/calibre/ebooks/oeb/transforms/filenames.py b/src/calibre/ebooks/oeb/transforms/filenames.py new file mode 100644 index 0000000000..2b22474d30 --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/filenames.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal