Baisc RTF output.

This commit is contained in:
John Schember 2009-05-25 21:31:51 -04:00
parent f35ceec77c
commit b92c2dc002
5 changed files with 211 additions and 2 deletions

View File

@ -337,6 +337,7 @@ from calibre.ebooks.pdb.output import PDBOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.output import PMLOutput
from calibre.ebooks.rb.output import RBOutput
from calibre.ebooks.rtf.output import RTFOutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.customize.profiles import input_profiles, output_profiles
@ -382,6 +383,7 @@ plugins += [
PDFOutput,
PMLOutput,
RBOutput,
RTFOutput,
TXTOutput,
]
plugins += [

View File

@ -6,7 +6,8 @@ __docformat__ = 'restructuredtext en'
import os
import Image, cStringIO
import Image
import cStringIO
from calibre.customize.conversion import OutputFormatPlugin
from calibre.ptempfile import TemporaryDirectory

View File

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rtf.rtfml import RTFMLizer
from calibre.customize.conversion import OutputFormatPlugin
class RTFOutput(OutputFormatPlugin):
name = 'RTF Output'
author = 'John Schember'
file_type = 'rtf'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
rtfmlitzer = RTFMLizer(ignore_tables=opts.linearize_tables)
content = rtfmlitzer.extract_content(oeb_book, opts)
close = False
if not hasattr(output_path, 'write'):
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
out_stream = output_path
out_stream.seek(0)
out_stream.truncate()
out_stream.write(content.encode('cp1252', 'replace'))
if close:
out_stream.close()

View File

@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into RTF markup
'''
import os
import re
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
TAGS = {
'b': '\\b',
'del': '\\deleted',
'h1': '\\b \\par \\pard \\hyphpar \\keep',
'h2': '\\b \\par \\pard \\hyphpar \\keep',
'h3': '\\b \\par \\pard \\hyphpar \\keep',
'h4': '\\b \\par \\pard \\hyphpar \\keep',
'h5': '\\b \\par \\pard \\hyphpar \\keep',
'h6': '\\b \\par \\pard \\hyphpar \\keep',
'li': '\\par \\pard \\hyphpar \\keep \t',
'p': '\\par \\pard \\hyphpar \\keep \t',
#'ol': '\\pn \\pnrestart \\pnlvlblt',
'sub': '\\sub',
'sup': '\\super',
'u': '\\ul',
#'ul': '\\pn \\pnrestart \\pndec',
}
SINGLE_TAGS = {
'br': '{\\line }',
'div': '{\\line }',
}
STYLES = [
('display', {'block': '\\par \\pard \\hyphpar \\keep'}),
('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
('font-style', {'italic': '\\i'}),
# ('page-break-before', {'always': '\\pagebb '}),
('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr', 'justify': '\\qj'}),
('text-decoration', {'line-through': '\\strike', 'underline': '\\ul'}),
]
BLOCK_TAGS = [
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'li',
]
BLOCK_STYLES = [
'block'
]
'''
TODO:
* Tables
* Images
* Fonts
'''
class RTFMLizer(object):
def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables
def extract_content(self, oeb_book, opts):
oeb_book.logger.info('Converting XHTML to RTF markup...')
self.oeb_book = oeb_book
self.opts = opts
return self.mlize_spine()
def mlize_spine(self):
output = self.header()
for item in self.oeb_book.spine:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.footer()
output = self.clean_text(output)
return output
def header(self):
return u'{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033'
def footer(self):
return ' }'
def clean_text(self, text):
# Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text)
# Remove excessive newlines
#text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
# Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text)
text = re.sub(r'(\{\\line \}){3,}', r'{\\line }{\\line }', text)
text = re.sub(r'(\{\\line \})+\{\\par', r'{\\par', text)
return text
def dump_text(self, elem, stylizer, tag_stack=[]):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return u''
text = u''
style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return u''
tag = barename(elem.tag)
tag_count = 0
# Are we in a paragraph block?
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
if 'block' not in tag_stack:
tag_count += 1
tag_stack.append('block')
single_tag = SINGLE_TAGS.get(tag, None)
if single_tag:
text += single_tag
rtf_tag = TAGS.get(tag, None)
if rtf_tag and rtf_tag not in tag_stack:
tag_count += 1
text += '{%s\n' % rtf_tag
tag_stack.append(rtf_tag)
# Processes style information
for s in STYLES:
style_tag = s[1].get(style[s[0]], None)
if style_tag and style_tag not in tag_stack:
tag_count += 1
text += '{%s\n' % style_tag
tag_stack.append(style_tag)
# Proccess tags that contain text.
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
text += '%s' % elem.text
for item in elem:
text += self.dump_text(item, stylizer, tag_stack)
for i in range(0, tag_count):
end_tag = tag_stack.pop()
if end_tag != 'block':
text += u'}'
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
if 'block' in tag_stack:
text += '%s ' % elem.tail
else:
text += '{\\par \\pard \\hyphpar \\keep %s}' % elem.tail
return text

View File

@ -9,7 +9,6 @@ import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
from calibre.ebooks.metadata import authors_to_string
class TXTOutput(OutputFormatPlugin):