RTF Input: Speed up processing and add support for small-caps and colors

This commit is contained in:
Kovid Goyal 2009-09-15 01:35:48 -06:00
parent 4c6bcf512f
commit 2fafc08735
2 changed files with 83 additions and 116 deletions

View File

@ -1,27 +1,10 @@
#########################################################################
# #
# #
# copyright 2002 Paul Henry Tremblay #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program; if not, write to the Free Software #
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
# 02111-1307 USA #
# #
# #
#########################################################################
xhtml = '''\
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:html="http://www.w3.org/1999/xhtml"
xmlns:rtf="http://rtf2xml.sourceforge.net/"
xmlns:c="calibre"
extension-element-prefixes="c"
exclude-result-prefixes="rtf"
>
@ -147,7 +130,7 @@ xhtml = '''\
<xsl:text>generator</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:text>http://rtf2xml.sourceforge.net/</xsl:text>
<xsl:text>http://calibre-ebook.com</xsl:text>
</xsl:attribute>
</xsl:element>
@ -233,9 +216,7 @@ xhtml = '''\
<xsl:text>span.italic-bold{font-style:italic;font-weight:bold}&#xA;</xsl:text>
<xsl:text>span.italic-underline{font-style:italic;text-decoration:underline}&#xA;</xsl:text>
<xsl:text>span.bold-underline{font-weight:bold;text-decoration:underline}&#xA;</xsl:text>
<xsl:for-each select="//rtf:inline">
<xsl:call-template name="parse-inline"/>
</xsl:for-each>
<xsl:text>&#xA;</xsl:text>
</xsl:document>
</xsl:template>
@ -287,52 +268,6 @@ xhtml = '''\
</xsl:if>
</xsl:template>
<xsl:template name="parse-inline">
<xsl:variable name="num-attrs" select="count(@*)"/>
<xsl:choose>
<xsl:when test="$num-attrs = 1 and @italics"/>
<xsl:when test="$num-attrs = 1 and @bold"/>
<xsl:when test="$num-attrs = 1 and @underline"/>
<xsl:when test="$num-attrs = 2 and @italics and @bold"/>
<xsl:when test="$num-attrs = 2 and @italcs and @underline"/>
<xsl:when test="$num-attrs = 2 and @bold and @underline"/>
<xsl:otherwise>
<xsl:text>span.</xsl:text>
<xsl:value-of select="generate-id(.)"/>
<xsl:text>{</xsl:text>
<xsl:if test="@italics = 'true'">
<xsl:text>font-style:italic;</xsl:text>
</xsl:if>
<xsl:if test="@italics = 'false'">
<xsl:text>font-style:normal;</xsl:text>
</xsl:if>
<xsl:if test="@bold = 'true'">
<xsl:text>font-weight:bold;</xsl:text>
</xsl:if>
<xsl:if test="@bold = 'false'">
<xsl:text>font-weight:normal;</xsl:text>
</xsl:if>
<xsl:if test="@underline and @underline != 'false'">
<xsl:text>text-decoration:underline;</xsl:text>
</xsl:if>
<xsl:if test="@underline= 'false'">
<xsl:text>text-decoration:none;</xsl:text>
</xsl:if>
<xsl:if test="@strike-through = 'true'">
<xsl:text>text-decoration:line-through;</xsl:text>
</xsl:if>
<xsl:if test="@strike-through = 'false'">
<xsl:text>text-decoration:none;</xsl:text>
</xsl:if>
<xsl:if test="@font-size">
<xsl:text>font-size:</xsl:text>
<xsl:value-of select="@font-size"/>
<xsl:text>pt;</xsl:text>
</xsl:if>
<xsl:text>}</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="rtf:inline">
<xsl:variable name="num-attrs" select="count(@*)"/>
@ -345,45 +280,7 @@ xhtml = '''\
<xsl:otherwise>
<xsl:element name="span">
<xsl:attribute name="class">
<xsl:choose>
<xsl:when test="$num-attrs=1 and @italics='true'">
<xsl:text>italic</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=1 and @italics='false'">
<xsl:text>no-italic</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=1 and @bold='true'">
<xsl:text>bold</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=1 and @bold='true'">
<xsl:text>bold</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=1 and @bold='false'">
<xsl:text>no-bold</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=1 and @underlined">
<xsl:choose>
<xsl:when test="not(@underlined='false')">
<xsl:text>underline</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:text>no-underline</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:when test="$num-attrs=2 and @bold='true' and @italics='true'">
<xsl:text>italic-bold</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=2 and @italics='true' and @underline and @underline != 'false'">
<xsl:text>italic-underline</xsl:text>
</xsl:when>
<xsl:when test="$num-attrs=2 and @bold='true' and @underline and @underline != 'false'">
<xsl:text>bold-underline</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="generate-id(.)"/>
</xsl:otherwise>
</xsl:choose>
<c:inline-class/>
</xsl:attribute>
<xsl:apply-templates/>
</xsl:element>
@ -539,4 +436,3 @@ xhtml = '''\
</xsl:template>
</xsl:stylesheet>
'''

View File

@ -2,12 +2,48 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, glob, re
import os, glob, re, textwrap
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
class InlineClass(etree.XSLTExtension):
FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
def __init__(self, log):
etree.XSLTExtension.__init__(self)
self.log = log
self.font_sizes = []
self.colors = []
def execute(self, context, self_node, input_node, output_parent):
classes = []
for x in self.FMTS:
cls = x if input_node.get(x, None) == 'true' else 'no-'+x
classes.append(cls)
none = True
for x in self.FMTS:
if 'no-'+x not in classes:
none = False
break
if none:
classes = ['none']
fs = input_node.get('font-size', False)
if fs:
if fs not in self.font_sizes:
self.font_sizes.append(fs)
classes.append('fs%d'%self.font_sizes.index(fs))
fc = input_node.get('font-color', False)
if fc:
if fc not in self.colors:
self.colors.append(fc)
classes.append('col%d'%self.colors.index(fc))
output_parent.text = ' '.join(classes)
class RTFInput(InputFormatPlugin):
name = 'RTF Input'
@ -21,15 +57,15 @@ class RTFInput(InputFormatPlugin):
parser = ParseRtf(
in_file = stream,
out_file = ofile,
# Convert symbol fonts to unicode equivelents. Default
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivelents. Default
# Convert Zapf fonts to unicode equivalents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivelents.
# Convert Wingding fonts to unicode equivalents.
# Default is 1.
convert_wingdings = 1,
@ -94,9 +130,41 @@ class RTFInput(InputFormatPlugin):
#open(name+'.hex', 'wb').write(enc)
return imap
def write_inline_css(self, ic):
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
enumerate(ic.font_sizes)]
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
enumerate(ic.colors)]
css = textwrap.dedent('''
span.none {
text-decoration: none; font-weight: normal;
font-style: normal; font-variant: normal
}
span.italics { font-style: italic }
span.no-italics { font-style: normal }
span.bold { font-weight: bold }
span.no-bold { font-weight: normal }
span.small-caps { font-variant: small-caps }
span.no-small-caps { font-variant: normal }
span.underlined { text-decoration: underline }
span.no-underlined { text-decoration: none }
span.strike-through { text-decoration: line-through }
span.no-strike-through { text-decoration: none }
''')
css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes)
with open('styles.css', 'ab') as f:
f.write(css)
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.rtf.xsl import xhtml
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
@ -124,15 +192,18 @@ class RTFInput(InputFormatPlugin):
if name is not None:
pict.set('num', name)
self.log('Converting XML to HTML...')
styledoc = etree.fromstring(xhtml)
inline_class = InlineClass(self.log)
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
transform = etree.XSLT(styledoc)
extensions = { ('calibre', 'inline-class') : inline_class }
transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc)
html = 'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
f.write(res)
self.write_inline_css(inline_class)
stream.seek(0)
mi = get_metadata(stream, 'rtf')
if not mi.title: