RTF: Improve empty paragraphs handling & clean html file

This commit is contained in:
Sengian 2011-10-16 13:55:54 +02:00
parent 441d5ccfe2
commit b9c6f154c0
2 changed files with 22 additions and 12 deletions

View File

@ -1,7 +1,7 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<xsl:stylesheet version="1.0" <xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:html="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml"
xmlns:rtf="http://rtf2xml.sourceforge.net/" xmlns:rtf="http://rtf2xml.sourceforge.net/"
xmlns:c="calibre" xmlns:c="calibre"
extension-element-prefixes="c" extension-element-prefixes="c"
@ -63,11 +63,16 @@
</xsl:template> </xsl:template>
<xsl:template name = "para"> <xsl:template name = "para">
<xsl:if test = "normalize-space(.) or child::*"> <xsl:element name = "p">
<xsl:element name = "p"> <xsl:choose>
<xsl:call-template name = "para-content"/> <xsl:when test = "normalize-space(.) or child::*">
</xsl:element> <xsl:call-template name = "para-content"/>
</xsl:if> </xsl:when>
<xsl:otherwise>
<xsl:text>&#160;</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:template> </xsl:template>
<xsl:template name = "para_off"> <xsl:template name = "para_off">
@ -149,7 +154,7 @@
<xsl:template match="rtf:doc-information" mode="header"> <xsl:template match="rtf:doc-information" mode="header">
<link rel="stylesheet" type="text/css" href="styles.css"/> <link rel="stylesheet" type="text/css" href="styles.css"/>
<xsl:if test="not(rtf:title)"> <xsl:if test="not(rtf:title)">
<title>unamed</title> <title>unnamed</title>
</xsl:if> </xsl:if>
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:template> </xsl:template>
@ -445,7 +450,10 @@
<xsl:template match = "rtf:field[@type='hyperlink']"> <xsl:template match = "rtf:field[@type='hyperlink']">
<xsl:element name ="a"> <xsl:element name ="a">
<xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute> <xsl:attribute name = "href">
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
<xsl:value-of select = "@link"/>
</xsl:attribute>
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:element> </xsl:element>
</xsl:template> </xsl:template>

View File

@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin):
html = 'index.xhtml' html = 'index.xhtml'
with open(html, 'wb') as f: with open(html, 'wb') as f:
res = transform.tostring(result) res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
#clean multiple \n
res = re.sub('\n+', '\n', res)
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
res = re.sub('\s*<body>', '<body>', res) # res = re.sub('\s*<body>', '<body>', res)
res = re.sub('(?<=\n)\n{2}', # res = re.sub('(?<=\n)\n{2}',
u'<p>\u00a0</p>\n'.encode('utf-8'), res) # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
f.write(res) f.write(res)
self.write_inline_css(inline_class, border_styles) self.write_inline_css(inline_class, border_styles)
stream.seek(0) stream.seek(0)