FB2 Output

This commit is contained in:
John Schember 2009-05-13 15:16:08 -04:00
parent 8577e979aa
commit f8642e8eb3
3 changed files with 173 additions and 1 deletions

View File

@ -294,6 +294,7 @@ from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.fb2.output import FB2Output
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
@ -324,7 +325,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
EB600, JETBOOK, BEBOOK, BEBOOK_MINI]

View File

@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into FB2 markup
'''
import os
from base64 import b64encode
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.constants import __appname__, __version__
from BeautifulSoup import BeautifulSoup
from lxml import etree
TAG_MAP = {
'b' : 'strong',
'i' : 'emphasis',
'p' : 'p',
'div' : 'p',
}
STYLE_MAP = {
'bold' : 'strong',
'bolder' : 'strong',
'italic' : 'emphasis',
}
STYLES = [
'font-weight',
'font-style',
]
class FB2MLizer(object):
def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables
def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to FB2 markup...')
self.oeb_book = oeb_book
self.opts = opts
return self.fb2mlize_spine()
def fb2mlize_spine(self):
output = self.fb2_header()
for item in self.oeb_book.spine:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.fb2_body_footer()
output += self.fb2mlize_images()
output += self.fb2_footer()
output = self.clean_text(output)
return BeautifulSoup(output.encode('utf-8')).prettify()
def fb2_header(self):
return u'<?xml version="1.0" encoding="utf-8"?> ' \
'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"> ' \
'<description><title-info><book-title>%s</book-title> ' \
'</title-info><document-info> ' \
'<program-used>%s - %s</program-used></document-info> ' \
'</description><body><section>' % (self.oeb_book.metadata.title[0].value, __appname__, __version__)
def fb2_body_footer(self):
return u'</section></body>'
def fb2_footer(self):
return u'</FictionBook>'
def fb2mlize_images(self):
images = u''
for item in self.oeb_book.manifest:
if item.media_type in OEB_IMAGES:
data = b64encode(item.data)
images += '<binary id="%s" content-type="%s">%s</binary>' % (os.path.basename(item.href), item.media_type, data)
return images
def clean_text(self, text):
return text.replace('&', '')
def dump_text(self, elem, stylizer, tag_stack=[]):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return u''
fb2_text = u''
style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return u''
tag = barename(elem.tag)
if tag == 'img':
fb2_text += '<image xlink:herf="#%s" />' % os.path.basename(elem.attrib['src'])
tag_count = 0
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
fb2_tag = TAG_MAP.get(tag, 'p')
if fb2_tag and fb2_tag not in tag_stack:
tag_count += 1
fb2_text += '<%s>' % fb2_tag
tag_stack.append(fb2_tag)
for s in STYLES:
style_tag = STYLE_MAP.get(style[s], None)
if style_tag:
tag_count += 1
fb2_text += '<%s>' % style_tag
tag_stack.append(style_tag)
fb2_text += elem.text
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
if 'p' not in tag_stack:
fb2_text += '<p>%s</p>' % elem.tail
else:
fb2_text += elem.tail
for item in elem:
fb2_text += self.dump_text(item, stylizer, tag_stack)
for i in range(0, tag_count):
fb2_tag = tag_stack.pop()
fb2_text += '</%s>' % fb2_tag
return fb2_text

View File

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin
from calibre.ebooks.fb2.fb2ml import FB2MLizer
class FB2Output(OutputFormatPlugin):
name = 'FB2 Output'
author = 'John Schember'
file_type = 'fb2'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
fb2_content = fb2mlizer.extract_content(oeb_book, opts)
close = False
if not hasattr(output_path, 'write'):
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
out_stream = output_path
out_stream.seek(0)
out_stream.truncate()
out_stream.write(fb2_content)
if close:
out_stream.close()