FB2 Output

2025-08-30 23:00:21 -04:00 · 2009-05-13 15:16:08 -04:00 · 2009-05-13 15:16:08 -04:00 · f8642e8eb3
commit f8642e8eb3
parent 8577e979aa
3 changed files with 173 additions and 1 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -294,6 +294,7 @@ from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.fb2.input import FB2Input
+from calibre.ebooks.fb2.output import FB2Output
 from calibre.ebooks.odt.input import ODTInput
 from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.html.input import HTMLInput
@ -324,7 +325,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI

 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
-        FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
+        FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
        PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
 plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
        EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into FB2 markup
+'''
+
+import os
+from base64 import b64encode
+
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.constants import __appname__, __version__
+
+from BeautifulSoup import BeautifulSoup
+from lxml import etree
+
+TAG_MAP = {
+    'b' : 'strong',
+    'i' : 'emphasis',
+    'p' : 'p',
+    'div' : 'p',
+}
+
+STYLE_MAP = {
+    'bold'   : 'strong',
+    'bolder' : 'strong',
+    'italic' : 'emphasis',
+}
+
+STYLES = [
+    'font-weight',
+    'font-style',
+]
+
+class FB2MLizer(object):
+    def __init__(self, ignore_tables=False):
+        self.ignore_tables = ignore_tables
+        
+    def extract_content(self, oeb_book, opts):
+        oeb_book.logger.info('Converting XHTML to FB2 markup...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+        return self.fb2mlize_spine()
+        
+    def fb2mlize_spine(self):
+        output = self.fb2_header()
+        for item in self.oeb_book.spine:
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+        output += self.fb2_body_footer()
+        output += self.fb2mlize_images()
+        output += self.fb2_footer()
+        output = self.clean_text(output)
+        return BeautifulSoup(output.encode('utf-8')).prettify()
+
+    def fb2_header(self):
+        return u'<?xml version="1.0" encoding="utf-8"?> ' \
+        '<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
+        'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"> ' \
+        '<description><title-info><book-title>%s</book-title> ' \
+        '</title-info><document-info> ' \
+        '<program-used>%s - %s</program-used></document-info> ' \
+        '</description><body><section>' % (self.oeb_book.metadata.title[0].value, __appname__, __version__)
+        
+    def fb2_body_footer(self):
+        return u'</section></body>'
+        
+    def fb2_footer(self):
+        return u'</FictionBook>'
+
+    def fb2mlize_images(self):
+        images = u''
+        for item in self.oeb_book.manifest:
+            if item.media_type in OEB_IMAGES:
+                data = b64encode(item.data)
+                images += '<binary id="%s" content-type="%s">%s</binary>' % (os.path.basename(item.href),  item.media_type, data)
+        return images
+
+    def clean_text(self, text):
+        return text.replace('&', '')
+
+    def dump_text(self, elem, stylizer, tag_stack=[]):
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            return u''
+            
+        fb2_text = u''
+        style = stylizer.style(elem)
+
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return u''
+        
+        tag = barename(elem.tag)
+        if tag == 'img':
+            fb2_text += '<image xlink:herf="#%s" />' % os.path.basename(elem.attrib['src'])
+        
+        tag_count = 0
+        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            fb2_tag = TAG_MAP.get(tag, 'p')
+            if fb2_tag and fb2_tag not in tag_stack:
+                tag_count += 1
+                fb2_text += '<%s>' % fb2_tag
+                tag_stack.append(fb2_tag)
+        
+            for s in STYLES:
+                style_tag = STYLE_MAP.get(style[s], None)
+                if style_tag:
+                    tag_count += 1
+                    fb2_text += '<%s>' % style_tag
+                    tag_stack.append(style_tag)
+
+            fb2_text += elem.text
+
+        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+            if 'p' not in tag_stack:
+                fb2_text += '<p>%s</p>' % elem.tail
+            else:
+                fb2_text += elem.tail
+        
+        for item in elem:
+            fb2_text += self.dump_text(item, stylizer, tag_stack)
+
+        for i in range(0, tag_count):
+            fb2_tag = tag_stack.pop()
+            fb2_text += '</%s>' % fb2_tag
+            
+        return fb2_text
+
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ebooks.fb2.fb2ml import FB2MLizer
+
+class FB2Output(OutputFormatPlugin):
+
+    name = 'FB2 Output'
+    author = 'John Schember'
+    file_type = 'fb2'
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):    
+        fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
+        fb2_content = fb2mlizer.extract_content(oeb_book, opts)
+
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+        
+        out_stream.seek(0)
+        out_stream.truncate()
+        out_stream.write(fb2_content)
+        
+        if close:
+            out_stream.close()
+