Baisc RTF output.

This commit is contained in:
John Schember 2009-05-25 21:31:51 -04:00
parent f35ceec77c
commit b92c2dc002
5 changed files with 211 additions and 2 deletions

View File

@ -337,6 +337,7 @@ from calibre.ebooks.pdb.output import PDBOutput
from calibre.ebooks.pdf.output import PDFOutput from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.output import PMLOutput from calibre.ebooks.pml.output import PMLOutput
from calibre.ebooks.rb.output import RBOutput from calibre.ebooks.rb.output import RBOutput
from calibre.ebooks.rtf.output import RTFOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
@ -382,6 +383,7 @@ plugins += [
PDFOutput, PDFOutput,
PMLOutput, PMLOutput,
RBOutput, RBOutput,
RTFOutput,
TXTOutput, TXTOutput,
] ]
plugins += [ plugins += [

View File

@ -6,7 +6,8 @@ __docformat__ = 'restructuredtext en'
import os import os
import Image, cStringIO import Image
import cStringIO
from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OutputFormatPlugin
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory

View File

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rtf.rtfml import RTFMLizer
from calibre.customize.conversion import OutputFormatPlugin
class RTFOutput(OutputFormatPlugin):
name = 'RTF Output'
author = 'John Schember'
file_type = 'rtf'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
rtfmlitzer = RTFMLizer(ignore_tables=opts.linearize_tables)
content = rtfmlitzer.extract_content(oeb_book, opts)
close = False
if not hasattr(output_path, 'write'):
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
out_stream = output_path
out_stream.seek(0)
out_stream.truncate()
out_stream.write(content.encode('cp1252', 'replace'))
if close:
out_stream.close()

View File

@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into RTF markup
'''
import os
import re
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
TAGS = {
'b': '\\b',
'del': '\\deleted',
'h1': '\\b \\par \\pard \\hyphpar \\keep',
'h2': '\\b \\par \\pard \\hyphpar \\keep',
'h3': '\\b \\par \\pard \\hyphpar \\keep',
'h4': '\\b \\par \\pard \\hyphpar \\keep',
'h5': '\\b \\par \\pard \\hyphpar \\keep',
'h6': '\\b \\par \\pard \\hyphpar \\keep',
'li': '\\par \\pard \\hyphpar \\keep \t',
'p': '\\par \\pard \\hyphpar \\keep \t',
#'ol': '\\pn \\pnrestart \\pnlvlblt',
'sub': '\\sub',
'sup': '\\super',
'u': '\\ul',
#'ul': '\\pn \\pnrestart \\pndec',
}
SINGLE_TAGS = {
'br': '{\\line }',
'div': '{\\line }',
}
STYLES = [
('display', {'block': '\\par \\pard \\hyphpar \\keep'}),
('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
('font-style', {'italic': '\\i'}),
# ('page-break-before', {'always': '\\pagebb '}),
('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr', 'justify': '\\qj'}),
('text-decoration', {'line-through': '\\strike', 'underline': '\\ul'}),
]
BLOCK_TAGS = [
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'li',
]
BLOCK_STYLES = [
'block'
]
'''
TODO:
* Tables
* Images
* Fonts
'''
class RTFMLizer(object):
def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables
def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to RTF markup...')
self.oeb_book = oeb_book
self.opts = opts
return self.mlize_spine()
def mlize_spine(self):
output = self.header()
for item in self.oeb_book.spine:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.footer()
output = self.clean_text(output)
return output
def header(self):
return u'{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033'
def footer(self):
return ' }'
def clean_text(self, text):
# Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text)
# Remove excessive newlines
#text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
# Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text)
text = re.sub(r'(\{\\line \}){3,}', r'{\\line }{\\line }', text)
text = re.sub(r'(\{\\line \})+\{\\par', r'{\\par', text)
return text
def dump_text(self, elem, stylizer, tag_stack=[]):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return u''
text = u''
style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return u''
tag = barename(elem.tag)
tag_count = 0
# Are we in a paragraph block?
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
if 'block' not in tag_stack:
tag_count += 1
tag_stack.append('block')
single_tag = SINGLE_TAGS.get(tag, None)
if single_tag:
text += single_tag
rtf_tag = TAGS.get(tag, None)
if rtf_tag and rtf_tag not in tag_stack:
tag_count += 1
text += '{%s\n' % rtf_tag
tag_stack.append(rtf_tag)
# Processes style information
for s in STYLES:
style_tag = s[1].get(style[s[0]], None)
if style_tag and style_tag not in tag_stack:
tag_count += 1
text += '{%s\n' % style_tag
tag_stack.append(style_tag)
# Proccess tags that contain text.
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
text += '%s' % elem.text
for item in elem:
text += self.dump_text(item, stylizer, tag_stack)
for i in range(0, tag_count):
end_tag = tag_stack.pop()
if end_tag != 'block':
text += u'}'
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
if 'block' in tag_stack:
text += '%s ' % elem.tail
else:
text += '{\\par \\pard \\hyphpar \\keep %s}' % elem.tail
return text

View File

@ -9,7 +9,6 @@ import os
from calibre.customize.conversion import OutputFormatPlugin, \ from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation OptionRecommendation
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
from calibre.ebooks.metadata import authors_to_string
class TXTOutput(OutputFormatPlugin): class TXTOutput(OutputFormatPlugin):