Fix #863735 (Calibre ignores the space size beetween paragraphs (RTF ->MOBI))

This commit is contained in:
Kovid Goyal 2011-10-16 18:43:57 +05:30
commit 6bbe3c82d7
4 changed files with 38 additions and 19 deletions

View File

@ -1,7 +1,7 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<xsl:stylesheet version="1.0" <xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:html="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml"
xmlns:rtf="http://rtf2xml.sourceforge.net/" xmlns:rtf="http://rtf2xml.sourceforge.net/"
xmlns:c="calibre" xmlns:c="calibre"
extension-element-prefixes="c" extension-element-prefixes="c"
@ -63,11 +63,16 @@
</xsl:template> </xsl:template>
<xsl:template name = "para"> <xsl:template name = "para">
<xsl:if test = "normalize-space(.) or child::*">
<xsl:element name = "p"> <xsl:element name = "p">
<xsl:choose>
<xsl:when test = "normalize-space(.) or child::*">
<xsl:call-template name = "para-content"/> <xsl:call-template name = "para-content"/>
</xsl:when>
<xsl:otherwise>
<xsl:text>&#160;</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:element> </xsl:element>
</xsl:if>
</xsl:template> </xsl:template>
<xsl:template name = "para_off"> <xsl:template name = "para_off">
@ -149,7 +154,7 @@
<xsl:template match="rtf:doc-information" mode="header"> <xsl:template match="rtf:doc-information" mode="header">
<link rel="stylesheet" type="text/css" href="styles.css"/> <link rel="stylesheet" type="text/css" href="styles.css"/>
<xsl:if test="not(rtf:title)"> <xsl:if test="not(rtf:title)">
<title>unamed</title> <title>unnamed</title>
</xsl:if> </xsl:if>
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:template> </xsl:template>
@ -445,7 +450,10 @@
<xsl:template match = "rtf:field[@type='hyperlink']"> <xsl:template match = "rtf:field[@type='hyperlink']">
<xsl:element name ="a"> <xsl:element name ="a">
<xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute> <xsl:attribute name = "href">
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
<xsl:value-of select = "@link"/>
</xsl:attribute>
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:element> </xsl:element>
</xsl:template> </xsl:template>

View File

@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin):
html = 'index.xhtml' html = 'index.xhtml'
with open(html, 'wb') as f: with open(html, 'wb') as f:
res = transform.tostring(result) res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
#clean multiple \n
res = re.sub('\n+', '\n', res)
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
res = re.sub('\s*<body>', '<body>', res) # res = re.sub('\s*<body>', '<body>', res)
res = re.sub('(?<=\n)\n{2}', # res = re.sub('(?<=\n)\n{2}',
u'<p>\u00a0</p>\n'.encode('utf-8'), res) # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
f.write(res) f.write(res)
self.write_inline_css(inline_class, border_styles) self.write_inline_css(inline_class, border_styles)
stream.seek(0) stream.seek(0)

View File

@ -376,13 +376,13 @@ class ParseRtf:
msg += 'self.__run_level is "%s"\n' % self.__run_level msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write(_('File could be older RTF...\n')) sys.stderr.write('File could be older RTF...\n')
if found_destination: if found_destination:
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write(_( sys.stderr.write(
'File also has newer RTF.\n' 'File also has newer RTF.\n'
'Will do the best to convert.\n' 'Will do the best to convert.\n'
)) )
add_brackets_obj = add_brackets.AddBrackets( add_brackets_obj = add_brackets.AddBrackets(
in_file = self.__temp_file, in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,

View File

@ -12,10 +12,10 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy, check_brackets from calibre.ebooks.rtf2xml import copy, check_brackets
# note to self. This is the first module in which I use tempfile. A good idea? # note to self. This is the first module in which I use tempfile. A good idea?
"""
"""
class AddBrackets: class AddBrackets:
""" """
Add brackets for old RTF. Add brackets for old RTF.
@ -41,6 +41,7 @@ class AddBrackets:
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):
""" """
""" """
@ -82,14 +83,16 @@ class AddBrackets:
'cw<ci<subscript_' , 'cw<ci<subscript_' ,
'cw<ci<superscrip', 'cw<ci<superscrip',
'cw<ci<underlined' , 'cw<ci<underlined' ,
'cw<ul<underlined' , # 'cw<ul<underlined' ,
] ]
def __before_body_func(self, line): def __before_body_func(self, line):
""" """
""" """
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body' self.__state = 'in_body'
self.__write_obj.write(line) self.__write_obj.write(line)
def __in_body_func(self, line): def __in_body_func(self, line):
""" """
""" """
@ -108,6 +111,7 @@ class AddBrackets:
self.__state = 'after_control_word' self.__state = 'after_control_word'
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __after_control_word_func(self, line): def __after_control_word_func(self, line):
""" """
""" """
@ -122,6 +126,7 @@ class AddBrackets:
self.__ignore_count = self.__ob_count self.__ignore_count = self.__ob_count
else: else:
self.__state = 'in_body' self.__state = 'in_body'
def __write_group(self): def __write_group(self):
""" """
""" """
@ -141,6 +146,7 @@ class AddBrackets:
self.__write_obj.write(inline_string) self.__write_obj.write(inline_string)
self.__open_bracket = 1 self.__open_bracket = 1
self.__temp_group = [] self.__temp_group = []
def __change_permanent_group(self): def __change_permanent_group(self):
""" """
use temp group to change permanent group use temp group to change permanent group
@ -150,6 +156,7 @@ class AddBrackets:
if token_info in self.__accept: if token_info in self.__accept:
att = line[20:-1] att = line[20:-1]
self.__inline[token_info] = att self.__inline[token_info] = att
def __ignore_func(self, line): def __ignore_func(self, line):
""" """
Don't add any brackets while inside of brackets RTF has already Don't add any brackets while inside of brackets RTF has already
@ -159,12 +166,14 @@ class AddBrackets:
if self.__token_info == 'cb<nu<clos-brack'and\ if self.__token_info == 'cb<nu<clos-brack'and\
self.__cb_count == self.__ignore_count: self.__cb_count == self.__ignore_count:
self.__state = 'in_body' self.__state = 'in_body'
def __check_brackets(self, in_file): def __check_brackets(self, in_file):
self.__check_brack_obj = check_brackets.CheckBrackets\ self.__check_brack_obj = check_brackets.CheckBrackets\
(file = in_file) (file = in_file)
good_br = self.__check_brack_obj.check_brackets()[0] good_br = self.__check_brack_obj.check_brackets()[0]
if not good_br: if not good_br:
return 1 return 1
def add_brackets(self): def add_brackets(self):
""" """
""" """