mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
FB2 Output
This commit is contained in:
parent
8577e979aa
commit
f8642e8eb3
@ -294,6 +294,7 @@ from calibre.ebooks.pdf.input import PDFInput
|
||||
from calibre.ebooks.txt.input import TXTInput
|
||||
from calibre.ebooks.lit.input import LITInput
|
||||
from calibre.ebooks.fb2.input import FB2Input
|
||||
from calibre.ebooks.fb2.output import FB2Output
|
||||
from calibre.ebooks.odt.input import ODTInput
|
||||
from calibre.ebooks.rtf.input import RTFInput
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
@ -324,7 +325,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
|
||||
|
||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
||||
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
||||
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
||||
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
|
||||
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
|
||||
EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
|
||||
|
134
src/calibre/ebooks/fb2/fb2ml.py
Normal file
134
src/calibre/ebooks/fb2/fb2ml.py
Normal file
@ -0,0 +1,134 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Transform OEB content into FB2 markup
|
||||
'''
|
||||
|
||||
import os
|
||||
from base64 import b64encode
|
||||
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
from calibre.constants import __appname__, __version__
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from lxml import etree
|
||||
|
||||
TAG_MAP = {
|
||||
'b' : 'strong',
|
||||
'i' : 'emphasis',
|
||||
'p' : 'p',
|
||||
'div' : 'p',
|
||||
}
|
||||
|
||||
STYLE_MAP = {
|
||||
'bold' : 'strong',
|
||||
'bolder' : 'strong',
|
||||
'italic' : 'emphasis',
|
||||
}
|
||||
|
||||
STYLES = [
|
||||
'font-weight',
|
||||
'font-style',
|
||||
]
|
||||
|
||||
class FB2MLizer(object):
|
||||
def __init__(self, ignore_tables=False):
|
||||
self.ignore_tables = ignore_tables
|
||||
|
||||
def extract_content(self, oeb_book, opts):
|
||||
oeb_book.logger.info('Converting XHTML to FB2 markup...')
|
||||
self.oeb_book = oeb_book
|
||||
self.opts = opts
|
||||
return self.fb2mlize_spine()
|
||||
|
||||
def fb2mlize_spine(self):
|
||||
output = self.fb2_header()
|
||||
for item in self.oeb_book.spine:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||
output += self.fb2_body_footer()
|
||||
output += self.fb2mlize_images()
|
||||
output += self.fb2_footer()
|
||||
output = self.clean_text(output)
|
||||
return BeautifulSoup(output.encode('utf-8')).prettify()
|
||||
|
||||
def fb2_header(self):
|
||||
return u'<?xml version="1.0" encoding="utf-8"?> ' \
|
||||
'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
|
||||
'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"> ' \
|
||||
'<description><title-info><book-title>%s</book-title> ' \
|
||||
'</title-info><document-info> ' \
|
||||
'<program-used>%s - %s</program-used></document-info> ' \
|
||||
'</description><body><section>' % (self.oeb_book.metadata.title[0].value, __appname__, __version__)
|
||||
|
||||
def fb2_body_footer(self):
|
||||
return u'</section></body>'
|
||||
|
||||
def fb2_footer(self):
|
||||
return u'</FictionBook>'
|
||||
|
||||
def fb2mlize_images(self):
|
||||
images = u''
|
||||
for item in self.oeb_book.manifest:
|
||||
if item.media_type in OEB_IMAGES:
|
||||
data = b64encode(item.data)
|
||||
images += '<binary id="%s" content-type="%s">%s</binary>' % (os.path.basename(item.href), item.media_type, data)
|
||||
return images
|
||||
|
||||
def clean_text(self, text):
|
||||
return text.replace('&', '')
|
||||
|
||||
def dump_text(self, elem, stylizer, tag_stack=[]):
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
or namespace(elem.tag) != XHTML_NS:
|
||||
return u''
|
||||
|
||||
fb2_text = u''
|
||||
style = stylizer.style(elem)
|
||||
|
||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||
or style['visibility'] == 'hidden':
|
||||
return u''
|
||||
|
||||
tag = barename(elem.tag)
|
||||
if tag == 'img':
|
||||
fb2_text += '<image xlink:herf="#%s" />' % os.path.basename(elem.attrib['src'])
|
||||
|
||||
tag_count = 0
|
||||
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
|
||||
fb2_tag = TAG_MAP.get(tag, 'p')
|
||||
if fb2_tag and fb2_tag not in tag_stack:
|
||||
tag_count += 1
|
||||
fb2_text += '<%s>' % fb2_tag
|
||||
tag_stack.append(fb2_tag)
|
||||
|
||||
for s in STYLES:
|
||||
style_tag = STYLE_MAP.get(style[s], None)
|
||||
if style_tag:
|
||||
tag_count += 1
|
||||
fb2_text += '<%s>' % style_tag
|
||||
tag_stack.append(style_tag)
|
||||
|
||||
fb2_text += elem.text
|
||||
|
||||
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
|
||||
if 'p' not in tag_stack:
|
||||
fb2_text += '<p>%s</p>' % elem.tail
|
||||
else:
|
||||
fb2_text += elem.tail
|
||||
|
||||
for item in elem:
|
||||
fb2_text += self.dump_text(item, stylizer, tag_stack)
|
||||
|
||||
for i in range(0, tag_count):
|
||||
fb2_tag = tag_stack.pop()
|
||||
fb2_text += '</%s>' % fb2_tag
|
||||
|
||||
return fb2_text
|
||||
|
37
src/calibre/ebooks/fb2/output.py
Normal file
37
src/calibre/ebooks/fb2/output.py
Normal file
@ -0,0 +1,37 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.ebooks.fb2.fb2ml import FB2MLizer
|
||||
|
||||
class FB2Output(OutputFormatPlugin):
|
||||
|
||||
name = 'FB2 Output'
|
||||
author = 'John Schember'
|
||||
file_type = 'fb2'
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
|
||||
fb2_content = fb2mlizer.extract_content(oeb_book, opts)
|
||||
|
||||
close = False
|
||||
if not hasattr(output_path, 'write'):
|
||||
close = True
|
||||
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
||||
os.makedirs(os.path.dirname(output_path))
|
||||
out_stream = open(output_path, 'wb')
|
||||
else:
|
||||
out_stream = output_path
|
||||
|
||||
out_stream.seek(0)
|
||||
out_stream.truncate()
|
||||
out_stream.write(fb2_content)
|
||||
|
||||
if close:
|
||||
out_stream.close()
|
||||
|
Loading…
x
Reference in New Issue
Block a user