From f8642e8eb39ec21f86ac096e78791d81d3d5c656 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 13 May 2009 15:16:08 -0400 Subject: [PATCH 1/2] FB2 Output --- src/calibre/customize/builtins.py | 3 +- src/calibre/ebooks/fb2/fb2ml.py | 134 ++++++++++++++++++++++++++++++ src/calibre/ebooks/fb2/output.py | 37 +++++++++ 3 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 src/calibre/ebooks/fb2/fb2ml.py create mode 100644 src/calibre/ebooks/fb2/output.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 24d960f4c7..1205775922 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -294,6 +294,7 @@ from calibre.ebooks.pdf.input import PDFInput from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.lit.input import LITInput from calibre.ebooks.fb2.input import FB2Input +from calibre.ebooks.fb2.output import FB2Output from calibre.ebooks.odt.input import ODTInput from calibre.ebooks.rtf.input import RTFInput from calibre.ebooks.html.input import HTMLInput @@ -324,7 +325,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, - FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput, + FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput, PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput] plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, JETBOOK, BEBOOK, BEBOOK_MINI] diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py new file mode 100644 index 0000000000..be220ebd38 --- /dev/null +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +''' +Transform OEB content into FB2 markup +''' + +import os +from base64 import b64encode + +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace +from calibre.ebooks.oeb.stylizer import Stylizer +from calibre.ebooks.oeb.base import OEB_IMAGES +from calibre.constants import __appname__, __version__ + +from BeautifulSoup import BeautifulSoup +from lxml import etree + +TAG_MAP = { + 'b' : 'strong', + 'i' : 'emphasis', + 'p' : 'p', + 'div' : 'p', +} + +STYLE_MAP = { + 'bold' : 'strong', + 'bolder' : 'strong', + 'italic' : 'emphasis', +} + +STYLES = [ + 'font-weight', + 'font-style', +] + +class FB2MLizer(object): + def __init__(self, ignore_tables=False): + self.ignore_tables = ignore_tables + + def extract_content(self, oeb_book, opts): + oeb_book.logger.info('Converting XHTML to FB2 markup...') + self.oeb_book = oeb_book + self.opts = opts + return self.fb2mlize_spine() + + def fb2mlize_spine(self): + output = self.fb2_header() + for item in self.oeb_book.spine: + stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + output += self.dump_text(item.data.find(XHTML('body')), stylizer) + output += self.fb2_body_footer() + output += self.fb2mlize_images() + output += self.fb2_footer() + output = self.clean_text(output) + return BeautifulSoup(output.encode('utf-8')).prettify() + + def fb2_header(self): + return u' ' \ + ' ' \ + '%s ' \ + ' ' \ + '%s - %s ' \ + '
' % (self.oeb_book.metadata.title[0].value, __appname__, __version__) + + def fb2_body_footer(self): + return u'
' + + def fb2_footer(self): + return u'
' + + def fb2mlize_images(self): + images = u'' + for item in self.oeb_book.manifest: + if item.media_type in OEB_IMAGES: + data = b64encode(item.data) + images += '%s' % (os.path.basename(item.href), item.media_type, data) + return images + + def clean_text(self, text): + return text.replace('&', '') + + def dump_text(self, elem, stylizer, tag_stack=[]): + if not isinstance(elem.tag, basestring) \ + or namespace(elem.tag) != XHTML_NS: + return u'' + + fb2_text = u'' + style = stylizer.style(elem) + + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ + or style['visibility'] == 'hidden': + return u'' + + tag = barename(elem.tag) + if tag == 'img': + fb2_text += '' % os.path.basename(elem.attrib['src']) + + tag_count = 0 + if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '': + fb2_tag = TAG_MAP.get(tag, 'p') + if fb2_tag and fb2_tag not in tag_stack: + tag_count += 1 + fb2_text += '<%s>' % fb2_tag + tag_stack.append(fb2_tag) + + for s in STYLES: + style_tag = STYLE_MAP.get(style[s], None) + if style_tag: + tag_count += 1 + fb2_text += '<%s>' % style_tag + tag_stack.append(style_tag) + + fb2_text += elem.text + + if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '': + if 'p' not in tag_stack: + fb2_text += '

%s

' % elem.tail + else: + fb2_text += elem.tail + + for item in elem: + fb2_text += self.dump_text(item, stylizer, tag_stack) + + for i in range(0, tag_count): + fb2_tag = tag_stack.pop() + fb2_text += '' % fb2_tag + + return fb2_text + diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py new file mode 100644 index 0000000000..67ee9f468e --- /dev/null +++ b/src/calibre/ebooks/fb2/output.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import OutputFormatPlugin +from calibre.ebooks.fb2.fb2ml import FB2MLizer + +class FB2Output(OutputFormatPlugin): + + name = 'FB2 Output' + author = 'John Schember' + file_type = 'fb2' + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables) + fb2_content = fb2mlizer.extract_content(oeb_book, opts) + + close = False + if not hasattr(output_path, 'write'): + close = True + if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '': + os.makedirs(os.path.dirname(output_path)) + out_stream = open(output_path, 'wb') + else: + out_stream = output_path + + out_stream.seek(0) + out_stream.truncate() + out_stream.write(fb2_content) + + if close: + out_stream.close() + From f14a3a8601487521c5c5ea22a177bc3d7a3cf780 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 13 May 2009 15:18:49 -0400 Subject: [PATCH 2/2] remove unused import --- src/calibre/ebooks/fb2/fb2ml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index be220ebd38..e328c3744b 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -17,7 +17,6 @@ from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.constants import __appname__, __version__ from BeautifulSoup import BeautifulSoup -from lxml import etree TAG_MAP = { 'b' : 'strong',