RTF Input: Handle underlined text. Fixes #845328 (Underlined text in RTF not propogated when converted to ePub)

2025-07-09 03:04:10 -04:00 · 2011-09-10 08:45:45 -06:00 · 2011-09-10 08:45:45 -06:00 · f1867f1128
commit f1867f1128
parent cce0b15293 6328279091
18 changed files with 139 additions and 117 deletions
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -98,7 +98,7 @@
                    <xsl:apply-templates/>
               </emph>
            </xsl:when>
-            <xsl:when test = "@underlined">
+            <xsl:when test = "@underlined and @underlined != 'false'">
               <emph rend = "paragraph-emph-underlined">
                    <xsl:apply-templates/>
               </emph>
@ -220,7 +220,7 @@
    </xsl:template>
    <xsl:template name="parse-styles-attrs">
-        <!--<xsl:text>position:relative;</xsl:text>-->
+        <!--<xsl:text>position:relative;</xsl:text>
        <xsl:if test="@space-before">
            <xsl:text>padding-top:</xsl:text>
            <xsl:value-of select="@space-before"/>
@ -230,7 +230,7 @@
            <xsl:text>padding-bottom:</xsl:text>
            <xsl:value-of select="@space-after"/>
            <xsl:text>pt;</xsl:text>
-        </xsl:if>
+        </xsl:if>-->
        <xsl:if test="@left-indent">
            <xsl:text>padding-left:</xsl:text>
            <xsl:value-of select="@left-indent"/>
@ -256,15 +256,15 @@
            <xsl:value-of select="'italic'"/>
            <xsl:text>;</xsl:text>
        </xsl:if>
-        <xsl:if test="@underline and @underline != 'false'">
+        <xsl:if test="@underlined and @underlined != 'false'">
            <xsl:text>text-decoration:underline</xsl:text>
            <xsl:text>;</xsl:text>
        </xsl:if>
-        <xsl:if test="@line-spacing">
+        <!--<xsl:if test="@line-spacing">
            <xsl:text>line-height:</xsl:text>
            <xsl:value-of select="@line-spacing"/>
            <xsl:text>pt;</xsl:text>
-        </xsl:if>
+        </xsl:if>-->
        <xsl:if test="(@align = 'just')">
            <xsl:text>text-align: justify;</xsl:text>
        </xsl:if>
@ -314,7 +314,6 @@
                    </xsl:attribute>
                    <xsl:apply-templates/>
                </xsl:element>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
@ -452,6 +451,15 @@
            <xsl:apply-templates/>
        </xsl:element>
    </xsl:template>
    <xsl:template match = "rtf:field[@type='bookmark-start']">
        <xsl:element name ="a">
            <xsl:attribute name = "id">
               <xsl:value-of select = "@number"/>
            </xsl:attribute>
            <xsl:apply-templates/>
        </xsl:element>
    </xsl:template>
    <xsl:template match = "rtf:field">
        <xsl:apply-templates/>
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -93,7 +93,7 @@ def get_metadata(stream):
    stream.seek(0)
    cpg = detect_codepage(stream)
    stream.seek(0)
-
+    
    title_match = title_pat.search(block)
    if title_match is not None:
        title = decode(title_match.group(1).strip(), cpg)
@ -162,7 +162,8 @@ def set_metadata(stream, options):
        index = src.rindex('}')
        return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
    src, pos = get_document_info(stream)
-    if not src:
+    print 'I was thre'
    if src is not None:
        create_metadata(stream, options)
    else:
        olen = len(src)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -41,7 +41,7 @@ border_style_map = {
 class InlineClass(etree.XSLTExtension):
-    FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
+    FMTS = ('italics', 'bold', 'strike-through', 'small-caps')
    def __init__(self, log):
        etree.XSLTExtension.__init__(self)
@ -54,6 +54,9 @@ class InlineClass(etree.XSLTExtension):
        for x in self.FMTS:
            if input_node.get(x, None) == 'true':
                classes.append(x)
        #underlined is special
        if input_node.get('underlined', 'false') != 'false':
                classes.append('underlined')
        fs = input_node.get('font-size', False)
        if fs:
            if fs not in self.font_sizes:
@ -78,12 +81,13 @@ class RTFInput(InputFormatPlugin):
    def generate_xml(self, stream):
        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
        ofile = 'dataxml.xml'
-        run_lev, debug_dir = 1, None
+        run_lev, debug_dir, indent_out = 1, None, 0
        if getattr(self.opts, 'debug_pipeline', None) is not None:
            try:
-                os.mkdir(debug_dir)
+                os.mkdir('rtfdebug')
                debug_dir = 'rtfdebug'
                run_lev = 4
                indent_out = 1
                self.log('Running RTFParser in debug mode')
            except:
                self.log.warn('Impossible to run RTFParser in debug mode')
@ -108,7 +112,7 @@ class RTFInput(InputFormatPlugin):
            # Indent resulting XML.
            # Default is 0 (no indent).
-            indent = 1,
+            indent = indent_out,
            # Form lists from RTF. Default is 1.
            form_lists = 1,
@ -157,7 +161,8 @@ class RTFInput(InputFormatPlugin):
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
-            #open(name+'.hex', 'wb').write(enc)
+            # with open(name+'.hex', 'wb') as f:
                # f.write(enc)
        return self.convert_images(imap)
    def convert_images(self, imap):
@ -319,4 +324,6 @@ class RTFInput(InputFormatPlugin):
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
-
+#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
 # os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
 # debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -372,17 +372,17 @@ class ParseRtf:
        old_rtf = old_rtf_obj.check_if_old_rtf()
        if old_rtf:
            if self.__run_level > 5:
-                msg = 'older RTF\n'
+                msg = 'Older RTF\n'
                msg += 'self.__run_level is "%s"\n' % self.__run_level
                raise RtfInvalidCodeException, msg
            if self.__run_level > 1:
-                sys.stderr.write('File could be older RTF...\n')
+                sys.stderr.write(_('File could be older RTF...\n'))
            if found_destination:
                if self.__run_level > 1:
-                    sys.stderr.write(
+                    sys.stderr.write(_(
                        'File also has newer RTF.\n'
                        'Will do the best to convert.\n'
-                    )
+                    ))
            add_brackets_obj = add_brackets.AddBrackets(
                    in_file = self.__temp_file,
                    bug_handler = RtfInvalidCodeException,
--- a/src/calibre/ebooks/rtf2xml/check_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/check_brackets.py
@ -53,4 +53,3 @@ class CheckBrackets:
                        'total number of brackets is %s') % self.__bracket_count
            return (False, msg)
        return (True, "Brackets match!")
--- a/src/calibre/ebooks/rtf2xml/configure_txt.py
+++ b/src/calibre/ebooks/rtf2xml/configure_txt.py
@ -25,7 +25,7 @@ class Configure:
        if self.__show_config_file and self.__configuration_file:
            sys.stderr.write('configuration file is "%s"\n' % self.__configuration_file)
        if self.__show_config_file and not self.__configuration_file:
-            sys.stderr.write('No configuraiton file found; using default vaules\n')
+            sys.stderr.write('No configuraiton file found; using default values\n')
        if self.__configuration_file:
            read_obj = open(self.__configuration_file, 'r')
            line_to_read = 1
--- a/src/calibre/ebooks/rtf2xml/delete_info.py
+++ b/src/calibre/ebooks/rtf2xml/delete_info.py
@ -43,6 +43,7 @@ class DeleteInfo:
                            'cw<it<listtable_',
                            'cw<it<revi-table',
                            'cw<ls<list-lev-d',
                            # Field allowed
                            'cw<fd<field-inst',
                            'cw<an<book-mk-st',
                            'cw<an<book-mk-en',
@ -81,7 +82,7 @@ class DeleteInfo:
            self.__ob = line
            return False
        else:
-            # write previous bracket, since didn't fine asterisk
+            # write previous bracket, since didn't find asterisk
            if self.__ob:
                self.__write_obj.write(self.__ob)
                self.__ob = 0
@ -104,7 +105,7 @@ class DeleteInfo:
        If you find that you are in a delete group, and the previous
        token in not an open bracket (self.__ob = 0), that means
        that the delete group is nested inside another acceptable
-        detination group. In this case, you have alrady written
+        detination group. In this case, you have already written
        the open bracket, so you will need to write the closed one
        as well.
        """
--- a/src/calibre/ebooks/rtf2xml/fields_small.py
+++ b/src/calibre/ebooks/rtf2xml/fields_small.py
@ -10,8 +10,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, tempfile,   re
+import sys, os, tempfile, re
 from calibre.ebooks.rtf2xml import field_strings, copy
 class FieldsSmall:
    """
 =================
@ -19,7 +21,7 @@ Purpose
 =================
 Write tags for bookmarks, index and toc entry fields in a tokenized file.
 This module does not handle toc or index tables.  (This module won't be any
-use to use to you unless you use it as part of the other modules.)
+use to you unless you use it as part of the other modules.)
 -----------
 Method
 -----------
@ -50,6 +52,7 @@ file.
        self.__copy = copy
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        Initiate all values.
@ -76,6 +79,7 @@ file.
        tx = 'tx<nu<__________<(.*?)'
        reg_st = ob + bk_st + tx + cb
        self.__book_start = re.compile(r'%s' % reg_st)
    def __before_body_func(self, line):
        """
        Requires:
@ -89,6 +93,7 @@ file.
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'body'
        self.__write_obj.write(line)
    def __body_func(self, line):
        """
        Requires:
@ -105,6 +110,7 @@ file.
            action(line, tag)
        else:
            self.__write_obj.write(line)
    def __found_bookmark_func(self, line, tag):
        """
        Requires:
@ -120,6 +126,7 @@ file.
        self.__cb_count = 0
        self.__state = 'bookmark'
        self.__type_of_bookmark = tag
    def __bookmark_func(self, line):
        """
        Requires:
@ -148,6 +155,7 @@ file.
            self.__write_obj.write(line)
        elif line[0:2] == 'tx':
            self.__text_string += line[17:-1]
    def __parse_index_func(self, my_string):
        """
        Requires:
@ -196,6 +204,7 @@ file.
            my_changed_string += '<sub-entry>%s' % sub_entry
        my_changed_string += '\n'
        return my_changed_string
    def __index_see_func(self, my_string):
        in_see = 0
        bracket_count = 0
@ -221,6 +230,7 @@ file.
                    in_see = 1
                changed_string += '%s\n' % line
        return changed_string, see_string
    def __index_bookmark_func(self, my_string):
        """
        Requries:
@ -257,6 +267,7 @@ file.
                    in_bookmark = 1
                index_string += '%s\n' % line
        return index_string, bookmark_string
    def __index__format_func(self, my_string):
        italics = 0
        bold =0
@ -268,6 +279,7 @@ file.
            if token_info == 'cw<in<index-ital':
                italics = 1
        return italics, bold
    def __parse_toc_func(self, my_string):
        """
        Requires:
@ -303,6 +315,7 @@ file.
        my_changed_string += '<main-entry>%s' % main_entry
        my_changed_string += '\n'
        return my_changed_string
    def __parse_bookmark_for_toc(self, my_string):
        """
        Requires:
@ -348,6 +361,7 @@ file.
                    in_bookmark = 1
                toc_string += '%s\n' % line
        return toc_string, book_start_string, book_end_string
    def __parse_bookmark_func(self, my_string, type):
        """
        Requires:
@ -362,6 +376,7 @@ file.
        my_changed_string = ('mi<tg<empty-att_<field<type>%s'
        '<number>%s<update>none\n' % (type, my_string))
        return my_changed_string
    def __found_toc_index_func(self, line, tag):
        """
        Requires:
@ -377,6 +392,7 @@ file.
        self.__cb_count = 0
        self.__state = 'toc_index'
        self.__tag = tag
    def __toc_index_func(self, line):
        """
        Requires:
@ -404,6 +420,7 @@ file.
            self.__write_obj.write(line)
        else:
            self.__text_string += line
    def fix_fields(self):
        """
        Requires:
@ -418,24 +435,19 @@ file.
           bookmark.
        """
        self.__initiate_values()
-        read_obj = open(self.__file)
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = '1'
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    if self.__token_info == 'ob<nu<open-brack':
-            line = line_to_read
+                        self.__ob_count = line[-5:-1]
-            self.__token_info = line[:16]
+                    if self.__token_info == 'cb<nu<clos-brack':
-            if self.__token_info == 'ob<nu<open-brack':
+                        self.__cb_count = line[-5:-1]
-                self.__ob_count = line[-5:-1]
+                    action = self.__state_dict.get(self.__state)
-            if self.__token_info == 'cb<nu<clos-brack':
+                    if action is None:
-                self.__cb_count = line[-5:-1]
+                        sys.stderr.write('No matching state in module fields_small.py\n')
-            action = self.__state_dict.get(self.__state)
+                        sys.stderr.write(self.__state + '\n')
-            if action == None:
+                    action(line)
                sys.stderr.write('no no matching state in module fields_small.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "fields_small.data")
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -25,8 +25,6 @@ class GetCharMap:
            'char_file'--the file with the mappings
        Returns:
            nothing
@ -57,7 +55,6 @@ class GetCharMap:
                fields[1].replace('\\colon', ':')
                map_dict[fields[1]] = fields[3]
        if not found_map:
            msg = 'no map found\nmap is "%s"\n'%(map,)
            raise self.__bug_handler, msg
--- a/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
+++ b/src/calibre/ebooks/rtf2xml/hex_2_utf8.py
@ -11,8 +11,10 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile, cStringIO
 from calibre.ebooks.rtf2xml import get_char_map, copy
 from calibre.ebooks.rtf2xml.char_set import char_set
 class Hex2Utf8:
    """
    Convert Microsoft hexidecimal numbers to utf-8
@ -108,7 +110,7 @@ class Hex2Utf8:
            """
        self.__file=file
        self.__copy = copy
-        if area_to_convert != 'preamble' and area_to_convert != 'body':
+        if area_to_convert not in ('preamble', 'body'):
            msg = (
            'in module "hex_2_utf8.py\n'
            '"area_to_convert" must be "body" or "preamble"\n'
@ -136,12 +138,12 @@ class Hex2Utf8:
        Set values, including those for the dictionaries.
        The file that contains the maps is broken down into many different
        sets. For example, for the Symbol font, there is the standard part for
-        hexidecimal numbers, and the the part for Microsoft charcters. Read
+        hexidecimal numbers, and the part for Microsoft characters. Read
        each part in, and then combine them.
        """
        # the default encoding system, the lower map for characters 0 through
        # 128, and the encoding system for Microsoft characters.
-        # New on 2004-05-8: the self.__char_map is not in diretory with other
+        # New on 2004-05-8: the self.__char_map is not in directory with other
        # modules
        self.__char_file = cStringIO.StringIO(char_set)
        char_map_obj =  get_char_map.GetCharMap(
@ -188,7 +190,6 @@ class Hex2Utf8:
            'body'          :       self.__body_func,
            'mi<mk<body-open_'  :   self.__found_body_func,
            'tx<hx<__________'  :   self.__hex_text_func,
            # 'tx<nu<__________'  :   self.__text_func,
            }
        self.__body_state_dict = {
            'preamble'      :       self.__preamble_for_body_func,
@ -228,9 +229,7 @@ class Hex2Utf8:
                font = self.__current_dict_name
                if self.__convert_caps\
                and self.__caps_list[-1] == 'true'\
-                and font != 'Symbol'\
+                and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
                and font != 'Wingdings'\
                and font != 'Zapf Dingbats':
                    converted = self.__utf_token_to_caps_func(converted)
                self.__write_obj.write(
                'tx<ut<__________<%s\n' % converted
@ -240,9 +239,7 @@ class Hex2Utf8:
                font = self.__current_dict_name
                if self.__convert_caps\
                and self.__caps_list[-1] == 'true'\
-                and font != 'Symbol'\
+                and font not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
                and font != 'Wingdings'\
                and font != 'Zapf Dingbats':
                    converted = converted.upper()
                self.__write_obj.write(
                'tx<nu<__________<%s\n' % converted
@ -282,17 +279,16 @@ class Hex2Utf8:
    def __convert_preamble(self):
        self.__state = 'preamble'
-        self.__write_obj = open(self.__write_to, 'w')
+        with open(self.__write_to, 'w') as self.__write_obj:
-        with open(self.__file, 'r') as read_obj:
+            with open(self.__file, 'r') as read_obj:
-           for line in read_obj:
+               for line in read_obj:
-                self.__token_info = line[:16]
+                    self.__token_info = line[:16]
-                action = self.__preamble_state_dict.get(self.__state)
+                    action = self.__preamble_state_dict.get(self.__state)
-                if action is None:
+                    if action is None:
-                    sys.stderr.write(_('error no state found in hex_2_utf8'),
+                        sys.stderr.write('error no state found in hex_2_utf8',
-                    self.__state
+                        self.__state
-                    )
+                        )
-                action(line)
+                    action(line)
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
@ -461,9 +457,9 @@ class Hex2Utf8:
        if len(self.__caps_list) > 1:
            self.__caps_list.pop()
        else:
-            sys.stderr.write('Module is hex_2_utf8\n')
+            sys.stderr.write('Module is hex_2_utf8\n'
-            sys.stderr.write('method is __end_caps_func\n')
+            'method is __end_caps_func\n'
-            sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
+            'caps list should be more than one?\n') #self.__in_caps not set
    def __text_func(self, line):
        """
@ -486,8 +482,7 @@ class Hex2Utf8:
                hex_num = '\'%s' % hex_num
                converted = self.__current_dict.get(hex_num)
                if converted is None:
-                    sys.stderr.write('module is hex_2_ut8\n')
+                    sys.stderr.write('module is hex_2_ut8\nmethod is __text_func\n')
                    sys.stderr.write('method is __text_func\n')
                    sys.stderr.write('no hex value for "%s"\n' % hex_num)
                else:
                    the_string += converted
@ -543,16 +538,15 @@ class Hex2Utf8:
    def __convert_body(self):
        self.__state = 'body'
        with open(self.__file, 'r') as read_obj:
-            self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-            for line in read_obj:
+                for line in read_obj:
-                self.__token_info = line[:16]
+                    self.__token_info = line[:16]
-                action = self.__body_state_dict.get(self.__state)
+                    action = self.__body_state_dict.get(self.__state)
-                if action is None:
+                    if action is None:
-                    sys.stderr.write('error no state found in hex_2_utf8',
+                        sys.stderr.write('error no state found in hex_2_utf8',
-                    self.__state
+                        self.__state
-                    )
+                        )
-                action(line)
+                    action(line)
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
--- a/src/calibre/ebooks/rtf2xml/info.py
+++ b/src/calibre/ebooks/rtf2xml/info.py
@ -68,7 +68,6 @@ class Info:
        'cw<di<create-tim'  : (self.__found_tag_with_tokens_func, 'creation-time'),
        'cw<di<revis-time'  : (self.__found_tag_with_tokens_func, 'revision-time'),
        'cw<di<edit-time_'  : (self.__found_tag_with_tokens_func, 'editing-time'),
        'cw<di<print-time'  : (self.__found_tag_with_tokens_func, 'printing-time'),
        'cw<di<backuptime'  : (self.__found_tag_with_tokens_func, 'backup-time'),
@ -77,6 +76,7 @@ class Info:
        'cw<di<numofchrws'  : (self.__single_field_func, 'number-of-characters-without-space'),
        'cw<di<num-of-pag'  : (self.__single_field_func, 'number-of-pages'),
        'cw<di<version___'  : (self.__single_field_func, 'version'),
        'cw<di<edit-time_'  : (self.__single_field_func, 'editing-time'),
        'cw<di<intern-ver'  : (self.__single_field_func, 'internal-version-number'),
        'cw<di<internalID'  : (self.__single_field_func, 'internal-id-number'),
        }
--- a/src/calibre/ebooks/rtf2xml/inline.py
+++ b/src/calibre/ebooks/rtf2xml/inline.py
@ -411,11 +411,11 @@ class Inline:
                    self.__set_list_func(line)
                    action = self.__state_dict.get(self.__state)
                    if action is None:
-                        sys.stderr.write('No matching state in module inline_for_lists.py\n')
+                        sys.stderr.write('No matching state in module inline.py\n')
                        sys.stderr.write(self.__state + '\n')
                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "inline.data")
        copy_obj.rename(self.__write_to, self.__file)
-        os.remove(self.__write_to)
+        os.remove(self.__write_to)
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@ -214,7 +214,27 @@ class ProcessTokens:
        'nosupersub'         :	('ci', 'no-su-supe', self.__no_sup_sub_func),
        'up'                 :	('ci', 'font-up___', self.divide_by_2),
        'v'                  :	('ci', 'hidden____', self.default_func),
-        #  table => tb
+        # underline
        # can't see why it isn't a char info: 'ul'=>'ci'
        'ul'                 :	('ci', 'underlined<continous', self.two_part_func),
        'uld'                :	('ci', 'underlined<dotted', self.two_part_func),
        'uldash'             :	('ci', 'underlined<dash', self.two_part_func),
        'uldashd'            :	('ci', 'underlined<dash-dot', self.two_part_func),
        'uldashdd'           :	('ci', 'underlined<dash-dot-dot', self.two_part_func),
        'uldb'               :	('ci', 'underlined<double', self.two_part_func),
        'ulhwave'            :	('ci', 'underlined<heavy-wave', self.two_part_func),
        'ulldash'            :	('ci', 'underlined<long-dash', self.two_part_func),
        'ulth'               :	('ci', 'underlined<thich', self.two_part_func),
        'ulthd'              :	('ci', 'underlined<thick-dotted', self.two_part_func),
        'ulthdash'           :	('ci', 'underlined<thick-dash', self.two_part_func),
        'ulthdashd'          :	('ci', 'underlined<thick-dash-dot', self.two_part_func),
        'ulthdashdd'         :	('ci', 'underlined<thick-dash-dot-dot', self.two_part_func),
        'ulthldash'          :	('ci', 'underlined<thick-long-dash', self.two_part_func),
        'ululdbwave'         :	('ci', 'underlined<double-wave', self.two_part_func),
        'ulw'                :	('ci', 'underlined<word', self.two_part_func),
        'ulwave'             :	('ci', 'underlined<wave', self.two_part_func),
        'ulnone'             :	('ci', 'underlined<false', self.two_part_func),
        # table => tb
        'trowd'              :	('tb', 'row-def___', self.default_func),
        'cell'               :	('tb', 'cell______', self.default_func),
        'row'                :	('tb', 'row_______', self.default_func),
@ -274,25 +294,6 @@ class ProcessTokens:
        'paperh'             :	('pa', 'paper-hght', self.divide_by_20),
        # annotation => an
        'annotation'         :  ('an', 'annotation', self.default_func),
        # underline
        'ul'                 :	('ul', 'underlined<continous', self.two_part_func),
        'uld'                :	('ul', 'underlined<dotted', self.two_part_func),
        'uldash'             :	('ul', 'underlined<dash', self.two_part_func),
        'uldashd'            :	('ul', 'underlined<dash-dot', self.two_part_func),
        'uldashdd'           :	('ul', 'underlined<dash-dot-dot', self.two_part_func),
        'uldb'               :	('ul', 'underlined<double', self.two_part_func),
        'ulhwave'            :	('ul', 'underlined<heavy-wave', self.two_part_func),
        'ulldash'            :	('ul', 'underlined<long-dash', self.two_part_func),
        'ulth'               :	('ul', 'underlined<thich', self.two_part_func),
        'ulthd'              :	('ul', 'underlined<thick-dotted', self.two_part_func),
        'ulthdash'           :	('ul', 'underlined<thick-dash', self.two_part_func),
        'ulthdashd'          :	('ul', 'underlined<thick-dash-dot', self.two_part_func),
        'ulthdashdd'         :	('ul', 'underlined<thick-dash-dot-dot', self.two_part_func),
        'ulthldash'          :	('ul', 'underlined<thick-long-dash', self.two_part_func),
        'ululdbwave'         :	('ul', 'underlined<double-wave', self.two_part_func),
        'ulw'                :	('ul', 'underlined<word', self.two_part_func),
        'ulwave'             :	('ul', 'underlined<wave', self.two_part_func),
        'ulnone'             :	('ul', 'underlined<false', self.two_part_func),
        # border => bd
        'trbrdrh'            :	('bd', 'bor-t-r-hi', self.default_func),
        'trbrdrv'            :	('bd', 'bor-t-r-vi', self.default_func),
@ -757,7 +758,7 @@ class ProcessTokens:
    def process_cw(self, token):
        """Change the value of the control word by determining what dictionary
        it belongs to"""
-        special = [  '*', ':', '}', '{',   '~', '_', '-', ';' ]
+        special = [ '*', ':', '}', '{', '~', '_', '-', ';' ]
        ##if token != "{" or token != "}":
        token = token[1:] # strip off leading \
        token = token.replace(" ", "")
@ -793,7 +794,7 @@ class ProcessTokens:
                            raise self.__exception_handler, msg
                    the_index = token.find('\\ ')
-                    if token is not None and  the_index > -1:
+                    if token is not None and the_index > -1:
                        msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
                            % line_count
                        raise self.__exception_handler, msg
@ -832,4 +833,4 @@ class ProcessTokens:
            msg = '\nInvalid RTF: document does not have matching brackets.\n'
            raise self.__exception_handler, msg
        else:
-            return self.__return_code
+            return self.__return_code
--- a/src/calibre/ebooks/rtf2xml/sections.py
+++ b/src/calibre/ebooks/rtf2xml/sections.py
@ -496,7 +496,7 @@ Instead, ingore all section information in a field-block.
            self.__token_info = line[:16]
            action = self.__state_dict.get(self.__state)
            if action == None:
-                sys.stderr.write('no no matching state in module sections.py\n')
+                sys.stderr.write('no matching state in module sections.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
--- a/src/calibre/ebooks/rtf2xml/styles.py
+++ b/src/calibre/ebooks/rtf2xml/styles.py
@ -103,8 +103,6 @@ class Styles:
        'sect-note_'    :	'endnotes-in-section',
        # list=> ls
        'list-text_'    :	'list-text',
        # this line must be wrong because it duplicates an earlier one
        'list-text_'    :	'list-text',
        'list______'    :	'list',
        'list-lev-d'    :	'list-level-definition',
        'list-cardi'    :	'list-cardinal-numbering',
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -114,6 +114,7 @@ class Tokenize:
        # this is for older RTF
        input_file = self.__par_exp.sub('\n\\par \n', input_file)
        input_file = self.__cwdigit_exp.sub("\g<1>\n\g<2>", input_file)
        input_file = self.__cs_ast.sub("\g<1>", input_file)
        input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
        input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
        #remove \n in bin data
@ -163,6 +164,8 @@ class Tokenize:
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #this is for old RTF
        self.__par_exp = re.compile(r'(\\\n+|\\ )')
        #handle improper cs char-style with \* before without {
        self.__cs_ast = re.compile(r'\\\*([\n ]*\\cs\d+[\n \\]+)')
        #handle cw using a digit as argument and without space as delimiter
        self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -12,6 +12,7 @@ import os, re
 from calibre import prepare_string_for_xml, isbytestring
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.conversion.preprocess import DocAnalysis
 from calibre.utils.cleantext import clean_ascii_chars
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -477,17 +477,17 @@ class BIBTEX(CatalogPlugin): # {{{
            if opts.bibfile_enc in bibfile_enc :
                bibfile_enc = opts.bibfile_enc
            else :
-                log(" WARNING: incorrect --choose-encoding flag, revert to default")
+                log.warn("Incorrect --choose-encoding flag, revert to default")
                bibfile_enc = bibfile_enc[0]
            if opts.bibfile_enctag in bibfile_enctag :
                bibfile_enctag = opts.bibfile_enctag
            else :
-                log(" WARNING: incorrect --choose-encoding-configuration flag, revert to default")
+                log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
                bibfile_enctag = bibfile_enctag[0]
            if opts.bib_entry in bib_entry :
                bib_entry = opts.bib_entry
            else :
-                log(" WARNING: incorrect --entry-type flag, revert to default")
+                log.warn("Incorrect --entry-type flag, revert to default")
                bib_entry = bib_entry[0]
        if opts.verbose:
@ -544,7 +544,7 @@ class BIBTEX(CatalogPlugin): # {{{
            elif opts.impcit == 'True' :
                citation_bibtex= True
            else :
-                log(" WARNING: incorrect --create-citation, revert to default")
+                log.warn("Incorrect --create-citation, revert to default")
                citation_bibtex= True
        else :
            citation_bibtex= opts.impcit
@ -556,7 +556,7 @@ class BIBTEX(CatalogPlugin): # {{{
            elif opts.addfiles == 'True' :
                addfiles_bibtex = True
            else :
-                log(" WARNING: incorrect --add-files-path, revert to default")
+                log.warn("Incorrect --add-files-path, revert to default")
                addfiles_bibtex= True
        else :
            addfiles_bibtex = opts.addfiles
@ -574,7 +574,7 @@ class BIBTEX(CatalogPlugin): # {{{
            if bib_entry == 'book' :
                nb_books = len(filter(check_entry_book_valid, data))
                if nb_books < nb_entries :
-                    log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
+                    log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
                    nb_entries = nb_books
            # If connected device, add 'On Device' values to data