RTFInput: Cleanup & small improvments

2025-07-09 03:04:10 -04:00 · 2012-05-05 15:34:00 +02:00 · 2012-05-05 15:34:00 +02:00 · 60b53045e4
commit 60b53045e4
parent bd5e6585ff
7 changed files with 298 additions and 264 deletions
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -372,8 +372,8 @@ class ParseRtf:
        old_rtf = old_rtf_obj.check_if_old_rtf()
        if old_rtf:
            if self.__run_level > 5:
-                msg = 'Older RTF\n'
-                msg += 'self.__run_level is "%s"\n' % self.__run_level
+                msg = 'Older RTF\n' \
+                'self.__run_level is "%s"\n' % self.__run_level
                raise RtfInvalidCodeException, msg
            if self.__run_level > 1:
                sys.stderr.write('File could be older RTF...\n')
@ -381,7 +381,7 @@ class ParseRtf:
                if self.__run_level > 1:
                    sys.stderr.write(
                        'File also has newer RTF.\n'
-                        'Will do the best to convert.\n'
+                        'Will do the best to convert...\n'
                    )
            add_brackets_obj = add_brackets.AddBrackets(
                    in_file = self.__temp_file,
--- a/src/calibre/ebooks/rtf2xml/add_brackets.py
+++ b/src/calibre/ebooks/rtf2xml/add_brackets.py
@ -20,6 +20,9 @@ class AddBrackets:
    """
    Add brackets for old RTF.
    Logic:
+    When control words without their own brackets are encountered
+    and in the list of allowed words, this will add brackets
+    to facilitate the treatment of the file
    """
    def __init__(self, in_file,
            bug_handler,
@ -41,50 +44,51 @@ class AddBrackets:
        self.__copy = copy
        self.__write_to = better_mktemp()
        self.__run_level = run_level
-
-    def __initiate_values(self):
-        """
-        """
        self.__state_dict = {
            'before_body'           : self.__before_body_func,
            'in_body'               : self.__in_body_func,
            'after_control_word'    : self.__after_control_word_func,
            'in_ignore'             : self.__ignore_func,
        }
+        self.__accept = [
+            'cw<ci<bold______' ,
+            'cw<ci<annotation' ,
+            'cw<ci<blue______' ,
+            # 'cw<ci<bold______' ,
+            'cw<ci<caps______' ,
+            'cw<ci<char-style' ,
+            'cw<ci<dbl-strike' ,
+            'cw<ci<emboss____' ,
+            'cw<ci<engrave___' ,
+            'cw<ci<font-color' ,
+            'cw<ci<font-down_' ,
+            'cw<ci<font-size_' ,
+            'cw<ci<font-style' ,
+            'cw<ci<font-up___' ,
+            'cw<ci<footnot-mk' ,
+            'cw<ci<green_____' ,
+            'cw<ci<hidden____' ,
+            'cw<ci<italics___' ,
+            'cw<ci<outline___' ,
+            'cw<ci<red_______' ,
+            'cw<ci<shadow____' ,
+            'cw<ci<small-caps' ,
+            'cw<ci<strike-thr' ,
+            'cw<ci<subscript_' ,
+            'cw<ci<superscrip' ,
+            'cw<ci<underlined' ,
+            # 'cw<ul<underlined' ,
+        ]
+
+    def __initiate_values(self):
+        """
+        """
        self.__state = 'before_body'
        self.__inline = {}
        self.__temp_group = []
        self.__open_bracket = False
        self.__found_brackets = False
-        self.__accept = [
-        'cw<ci<bold______',
-        'cw<ci<annotation'  ,
-        'cw<ci<blue______' ,
-        'cw<ci<bold______' ,
-        'cw<ci<caps______' ,
-        'cw<ci<char-style' ,
-        'cw<ci<dbl-strike' ,
-        'cw<ci<emboss____'  ,
-        'cw<ci<engrave___' ,
-        'cw<ci<font-color' ,
-        'cw<ci<font-down_' ,
-        'cw<ci<font-size_' ,
-        'cw<ci<font-style' ,
-        'cw<ci<font-up___',
-        'cw<ci<footnot-mk',
-        'cw<ci<green_____' ,
-        'cw<ci<hidden____',
-        'cw<ci<italics___' ,
-        'cw<ci<outline___',
-        'cw<ci<red_______' ,
-        'cw<ci<shadow____',
-        'cw<ci<small-caps' ,
-        'cw<ci<strike-thr',
-        'cw<ci<subscript_' ,
-        'cw<ci<superscrip',
-        'cw<ci<underlined' ,
-        # 'cw<ul<underlined' ,
-        ]
+        

    def __before_body_func(self, line):
        """
--- a/src/calibre/ebooks/rtf2xml/header.py
+++ b/src/calibre/ebooks/rtf2xml/header.py
@ -11,6 +11,7 @@
 #                                                                       #
 #########################################################################
 import sys, os
+
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp

@ -31,29 +32,29 @@ class Header:
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__write_to = better_mktemp()
-        self.__found_a_header = 0
+        self.__found_a_header = False
+
    def __in_header_func(self, line):
        """
        Handle all tokens that are part of header
        """
        if self.__cb_count == self.__header_bracket_count:
-            self.__in_header = 0
+            self.__in_header = False
            self.__write_obj.write(line)
            self.__write_to_head_obj.write(
-            'mi<mk<head___clo\n')
-            self.__write_to_head_obj.write(
-            'mi<tg<close_____<header-or-footer\n')
-            self.__write_to_head_obj.write(
+            'mi<mk<head___clo\n' \
+            'mi<tg<close_____<header-or-footer\n' \
            'mi<mk<header-clo\n')
        else:
            self.__write_to_head_obj.write(line)
+
    def __found_header(self, line):
        """
        Found a header
        """
        # but this could be header or footer
-        self.__found_a_header = 1
-        self.__in_header = 1
+        self.__found_a_header = True
+        self.__in_header = True
        self.__header_count += 1
        # temporarily set this to zero so I can enter loop
        self.__cb_count = 0
@ -69,18 +70,23 @@ class Header:
                    'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
                    )
        else:
-            sys.stderr.write('module is header\n')
-            sys.stderr.write('method is __found_header\n')
-            sys.stderr.write('no dict entry\n')
-            sys.stderr.write('line is %s' % line)
+            sys.stderr.write(
+            'module is header\n' \
+            'method is __found_header\n' \
+            'no dict entry\n' \
+            'line is %s' % line)
            self.__write_to_head_obj.write(
                    'mi<tg<open-att__<header-or-footer<type>none\n'
                    )
+
    def __default_sep(self, line):
-        """Handle all tokens that are not header tokens"""
+        """
+        Handle all tokens that are not header tokens
+        """
        if self.__token_info[3:5] == 'hf':
            self.__found_header(line)
        self.__write_obj.write(line)
+
    def __initiate_sep_values(self):
        """
        initiate counters for separate_footnotes method.
@ -89,7 +95,7 @@ class Header:
        self.__ob_count = 0
        self.__cb_count = 0
        self.__header_bracket_count = 0
-        self.__in_header = 0
+        self.__in_header = False
        self.__header_count = 0
        self.__head_dict = {
            'head-left_'        :   ('header-left'),
@ -101,6 +107,7 @@ class Header:
            'header____'        :   ('header' ),
            'footer____'        :   ('footer' ),
        }
+
    def separate_headers(self):
        """
        Separate all the footnotes in an RTF file and put them at the bottom,
@ -110,53 +117,47 @@ class Header:
        bottom of the main file.
        """
        self.__initiate_sep_values()
-        read_obj = open(self.__file)
-        self.__write_obj = open(self.__write_to, 'w')
        self.__header_holder = better_mktemp()
-        self.__write_to_head_obj = open(self.__header_holder, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            # keep track of opening and closing brackets
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__cb_count = line[-5:-1]
-            # In the middle of footnote text
-            if self.__in_header:
-                self.__in_header_func(line)
-            # not in the middle of footnote text
-            else:
-                self.__default_sep(line)
-        self.__write_obj.close()
-        read_obj.close()
-        self.__write_to_head_obj.close()
-        read_obj = open(self.__header_holder, 'r')
-        write_obj = open(self.__write_to, 'a')
-        write_obj.write(
-        'mi<mk<header-beg\n')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            write_obj.write(line)
-        write_obj.write(
-        'mi<mk<header-end\n')
-        read_obj.close()
-        write_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__header_holder, 'w') as self.__write_to_head_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        # keep track of opening and closing brackets
+                        if self.__token_info == 'ob<nu<open-brack':
+                            self.__ob_count = line[-5:-1]
+                        if self.__token_info == 'cb<nu<clos-brack':
+                            self.__cb_count = line[-5:-1]
+                        # In the middle of footnote text
+                        if self.__in_header:
+                            self.__in_header_func(line)
+                        # not in the middle of footnote text
+                        else:
+                            self.__default_sep(line)
+        
+        with open(self.__header_holder, 'r') as read_obj:
+            with open(self.__write_to, 'a') as write_obj:
+                write_obj.write(
+                'mi<mk<header-beg\n')
+                for line in read_obj:
+                    write_obj.write(line)
+                write_obj.write(
+                'mi<mk<header-end\n')
        os.remove(self.__header_holder)
+
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
-            copy_obj.copy_file(self.__write_to, "header_separate.info")
+            copy_obj.copy_file(self.__write_to, "header_separate.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
+
    def update_info(self, file, copy):
        """
        Unused method
        """
        self.__file = file
        self.__copy = copy
+
    def __get_head_body_func(self, line):
        """
        Process lines in main body and look for beginning of headers.
@ -166,6 +167,7 @@ class Header:
            self.__state = 'head'
        else:
            self.__write_obj.write(line)
+
    def __get_head_head_func(self, line):
        """
        Copy headers and footers from bottom of file to a separate, temporary file.
@ -174,6 +176,7 @@ class Header:
            self.__state = 'body'
        else:
            self.__write_to_head_obj.write(line)
+
    def __get_headers(self):
        """
        Private method to remove footnotes from main file.  Read one line from
@ -182,21 +185,16 @@ class Header:
        These two functions do the work of separating the footnotes form the
        body.
        """
-        read_obj = open(self.__file)
-        self.__write_obj = open(self.__write_to, 'w')
-            # self.__write_to = "footnote_info.data"
-        self.__write_to_head_obj = open(self.__header_holder, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            self.__token_info = line[:16]
-            if self.__state == 'body':
-                self.__get_head_body_func(line)
-            elif self.__state == 'head':
-                self.__get_head_head_func(line)
-        read_obj.close()
-        self.__write_obj.close()
-        self.__write_to_head_obj.close()
+        with open(self.__file) as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                with open(self.__header_holder, 'w') as self.__write_to_head_obj:
+                    for line in read_obj:
+                        self.__token_info = line[:16]
+                        if self.__state == 'body':
+                            self.__get_head_body_func(line)
+                        elif self.__state == 'head':
+                            self.__get_head_head_func(line)
+
    def __get_head_from_temp(self, num):
        """
        Private method for joining headers and footers to body. This method
@ -205,18 +203,17 @@ class Header:
        returns them as a string.
        """
        look_for = 'mi<mk<header-ope<' + num + '\n'
-        found_head = 0
+        found_head = False
        string_to_return = ''
-        line = 1
-        while line:
-            line = self.__read_from_head_obj.readline()
+        for line in self.__read_from_head_obj:
            if found_head:
                if line == 'mi<mk<header-clo\n':
                    return string_to_return
-                string_to_return = string_to_return + line
+                string_to_return += line
            else:
                if line == look_for:
-                    found_head = 1
+                    found_head = True
+
    def __join_from_temp(self):
        """
        Private method for rejoining footnotes to body.  Read from the
@ -227,15 +224,13 @@ class Header:
        If no footnote marker is found, simply print out the token (line).
        """
        self.__read_from_head_obj = open(self.__header_holder, 'r')
-        read_obj = open(self.__write_to, 'r')
        self.__write_obj = open(self.__write_to2, 'w')
-        line = 1
-        while line:
-            line = read_obj.readline()
-            if line[:16] == 'mi<mk<header-ind':
-                line = self.__get_head_from_temp(line[17:-1])
-            self.__write_obj.write(line)
-        read_obj.close()
+        with open(self.__write_to, 'r') as read_obj:
+            for line in read_obj:
+                if line[:16] == 'mi<mk<header-ind':
+                    line = self.__get_head_from_temp(line[17:-1])
+                self.__write_obj.write(line)
+
    def join_headers(self):
        """
        Join the footnotes from the bottom of the file and put them in their
--- a/src/calibre/ebooks/rtf2xml/old_rtf.py
+++ b/src/calibre/ebooks/rtf2xml/old_rtf.py
@ -11,14 +11,18 @@
 #                                                                       #
 #########################################################################
 import sys
-"""
-"""
+
 class OldRtf:
    """
    Check to see if the RTF is an older version
    Logic:
+    If allowable control word/properties happen in text without being enclosed
+    in brackets the file will be considered old rtf
    """
-    def __init__(self, in_file, bug_handler, run_level ):
+    def __init__(self, in_file,
+                bug_handler,
+                run_level,
+                ):
        """
        Required:
            'file'--file to parse
@ -32,46 +36,46 @@ class OldRtf:
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
-        self.__initiate_values()
-        self.__ob_group = 0
-    def __initiate_values(self):
-        self.__previous_token = ''
-        self.__new_found = 0
+        self.__run_level = run_level
        self.__allowable = [
-        'annotation' ,
-        'blue______'  ,
-        'bold______',
-        'caps______',
-        'char-style' ,
-        'dbl-strike' ,
-        'emboss____',
-        'engrave___' ,
-        'font-color',
-        'font-down_' ,
-        'font-size_',
-        'font-style',
-        'font-up___',
-        'footnot-mk' ,
-        'green_____' ,
-        'hidden____',
-        'italics___',
-        'outline___',
-        'red_______',
-        'shadow____' ,
-        'small-caps',
-        'strike-thr',
-        'subscript_',
-        'superscrip' ,
-        'underlined' ,
+            'annotation' ,
+            'blue______'  ,
+            'bold______',
+            'caps______',
+            'char-style' ,
+            'dbl-strike' ,
+            'emboss____',
+            'engrave___' ,
+            'font-color',
+            'font-down_' ,
+            'font-size_',
+            'font-style',
+            'font-up___',
+            'footnot-mk' ,
+            'green_____' ,
+            'hidden____',
+            'italics___',
+            'outline___',
+            'red_______',
+            'shadow____' ,
+            'small-caps',
+            'strike-thr',
+            'subscript_',
+            'superscrip' ,
+            'underlined' ,
        ]
-        self.__state = 'before_body'
        self.__action_dict = {
            'before_body'   : self.__before_body_func,
            'in_body'       : self.__check_tokens_func,
            'after_pard'    : self.__after_pard_func,
        }
-        self.__is_old = 0
+
+    def __initiate_values(self):
+        self.__previous_token = ''
+        self.__state = 'before_body'
        self.__found_new = 0
+        self.__ob_group = 0
+
    def __check_tokens_func(self, line):
        if self.__inline_info in self.__allowable:
            if self.__ob_group == self.__base_ob_count:
@ -80,48 +84,56 @@ class OldRtf:
                self.__found_new += 1
        elif self.__token_info ==  'cw<pf<par-def___':
            self.__state = 'after_pard'
+
    def __before_body_func(self, line):
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'in_body'
            self.__base_ob_count = self.__ob_group
+
    def __after_pard_func(self, line):
        if line[0:2] != 'cw':
            self.__state = 'in_body'
+
    def check_if_old_rtf(self):
        """
        Requires:
            nothing
        Returns:
-            1 if file is older RTf
-            0 if file is newer RTF
+            True if file is older RTf
+            False if file is newer RTF
        """
-
-        read_obj = open(self.__file, 'r')
-        line = 1
+        self.__initiate_values()
        line_num = 0
-        while line:
-            line = read_obj.readline()
-            line_num += 1
-            self.__token_info = line[:16]
-            if self.__token_info == 'mi<mk<body-close':
-                return 0
-                self.__ob_group = 0
-            if self.__token_info == 'ob<nu<open-brack':
-                self.__ob_group += 1
-                self.__ob_count = line[-5:-1]
-            if self.__token_info == 'cb<nu<clos-brack':
-                self.__ob_group -= 1
-                self.__cb_count = line[-5:-1]
-            self.__inline_info = line[6:16]
-            if self.__state == 'after_body':
-                return 0
-            action = self.__action_dict.get(self.__state)
-            if not action:
-                sys.stderr.write('No action for state!\n')
-            result = action(line)
-            if result == 'new_rtf':
-                return 0
-            elif result == 'old_rtf':
-                return 1
-            self.__previous_token = line[6:16]
-        return 0
+        with open(self.__file, 'r') as read_obj:
+            for line in read_obj:
+                line_num += 1
+                self.__token_info = line[:16]
+                if self.__token_info == 'mi<mk<body-close':
+                    return False
+                if self.__token_info == 'ob<nu<open-brack':
+                    self.__ob_group += 1
+                    self.__ob_count = line[-5:-1]
+                if self.__token_info == 'cb<nu<clos-brack':
+                    self.__ob_group -= 1
+                    self.__cb_count = line[-5:-1]
+                self.__inline_info = line[6:16]
+                if self.__state == 'after_body':
+                    return False
+                action = self.__action_dict.get(self.__state)
+                if action is None:
+                    try:
+                        sys.stderr.write('No action for this state!\n')
+                    except:
+                        pass
+                result = action(line)
+                if result == 'new_rtf':
+                    return False
+                elif result == 'old_rtf':
+                    if self.__run_level > 3:
+                        sys.stderr.write(
+                            'Old rtf construction %s (bracket %s, line %s)\n' 
+                                % (self.__inline_info, str(self.__ob_group), line_num)
+                        )
+                    return True
+                self.__previous_token = line[6:16]
+        return False
--- a/src/calibre/ebooks/rtf2xml/paragraphs.py
+++ b/src/calibre/ebooks/rtf2xml/paragraphs.py
@ -11,31 +11,32 @@
 #                                                                       #
 #########################################################################
 import sys, os
+
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp

 class Paragraphs:
    """
-=================
-Purpose
-=================
-Write paragraph tags for a tokenized file. (This module won't be any use to use
-to you unless you use it as part of the other modules.)
-------------
-Method
-------------
-RTF does not tell you when a paragraph begins. It only tells you when the
-paragraph ends.
-In order to make paragraphs out of this limited info, the parser starts in the
-body of the documents and assumes it is not in a paragraph. It looks for clues
-to begin a paragraph. Text starts a paragraph; so does an inline field or
-list-text. If an end of paragraph marker (\par) is found, then this indicates
-a blank paragraph.
-Once a paragraph is found, the state changes to 'paragraph.' In this state,
-clues are looked to for the end of a paragraph. The end of a paragraph marker
-(\par) marks the end of a paragraph. So does the end of a footnote or heading;
-a paragraph definintion; the end of a field-block; and the beginning of a
-section. (How about the end of a section or the end of a field-block?)
+    =================
+    Purpose
+    =================
+    Write paragraph tags for a tokenized file. (This module won't be any use to use
+    to you unless you use it as part of the other modules.)
+    -------------
+    Method
+    -------------
+    RTF does not tell you when a paragraph begins. It only tells you when the
+    paragraph ends.
+    In order to make paragraphs out of this limited info, the parser starts in the
+    body of the documents and assumes it is not in a paragraph. It looks for clues
+    to begin a paragraph. Text starts a paragraph; so does an inline field or
+    list-text. If an end of paragraph marker (\par) is found, then this indicates
+    a blank paragraph.
+    Once a paragraph is found, the state changes to 'paragraph.' In this state,
+    clues are looked to for the end of a paragraph. The end of a paragraph marker
+    (\par) marks the end of a paragraph. So does the end of a footnote or heading;
+    a paragraph definition; the end of a field-block; and the beginning of a
+    section. (How about the end of a section or the end of a field-block?)
    """
    def __init__(self,
            in_file,
@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__write_empty_para = write_empty_para
        self.__run_level = run_level
        self.__write_to = better_mktemp()
+
    def __initiate_values(self):
        """
        Initiate all values.
@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__paragraph_dict = {
        'cw<pf<par-end___'      : self.__close_para_func,   # end of paragraph
        'mi<mk<headi_-end'      : self.__close_para_func,   # end of header or footer
-        ##'cw<pf<par-def___'      : self.__close_para_func,   # paragraph definition
+        ## 'cw<pf<par-def___'      : self.__close_para_func,   # paragraph definition
        # 'mi<mk<fld-bk-end'      : self.__close_para_func,   # end of field-block
        'mi<mk<fldbk-end_'      : self.__close_para_func,   # end of field-block
        'mi<mk<body-close'      : self.__close_para_func,   # end of body
@ -99,6 +101,7 @@ section. (How about the end of a section or the end of a field-block?)
        'mi<mk<pict-start'      : self.__start_para_func,
        'cw<pf<page-break'      : self.__empty_pgbk_func,    # page break
        }
+
    def __before_body_func(self, line):
        """
        Required:
@ -112,6 +115,7 @@ section. (How about the end of a section or the end of a field-block?)
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'not_paragraph'
        self.__write_obj.write(line)
+
    def __not_paragraph_func(self, line):
        """
        Required:
@ -127,6 +131,7 @@ section. (How about the end of a section or the end of a field-block?)
        if action:
            action(line)
        self.__write_obj.write(line)
+
    def __paragraph_func(self, line):
        """
        Required:
@ -144,6 +149,7 @@ section. (How about the end of a section or the end of a field-block?)
            action(line)
        else:
            self.__write_obj.write(line)
+
    def __start_para_func(self, line):
        """
        Requires:
@ -160,6 +166,7 @@ section. (How about the end of a section or the end of a field-block?)
        )
        self.__write_obj.write(self.__start2_marker)
        self.__state = 'paragraph'
+
    def __empty_para_func(self, line):
        """
        Requires:
@ -176,6 +183,7 @@ section. (How about the end of a section or the end of a field-block?)
            'mi<tg<empty_____<para\n'
            )
            self.__write_obj.write(self.__end_marker)   # marker for later parsing
+
    def __empty_pgbk_func(self, line):
        """
        Requires:
@ -188,6 +196,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__write_obj.write(
        'mi<tg<empty_____<page-break\n'
        )
+
    def __close_para_func(self, line):
        """
        Requires:
@ -205,6 +214,7 @@ section. (How about the end of a section or the end of a field-block?)
        self.__write_obj.write(self.__end_marker) # marker for later parser
        self.__write_obj.write(line)
        self.__state = 'not_paragraph'
+
    def __bogus_para__def_func(self, line):
        """
        Requires:
@ -215,6 +225,7 @@ section. (How about the end of a section or the end of a field-block?)
            if a \pard occurs in a paragraph, I want to ignore it. (I believe)
        """
        self.__write_obj.write('mi<mk<bogus-pard\n')
+
    def make_paragraphs(self):
        """
        Requires:
@ -229,20 +240,18 @@ section. (How about the end of a section or the end of a field-block?)
            only other state is 'paragraph'.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('no no matching state in module sections.py\n')
-                sys.stderr.write(self.__state + '\n')
-            action(line)
-        read_obj.close()
-        self.__write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        try:
+                            sys.stderr.write('no matching state in module paragraphs.py\n')
+                            sys.stderr.write(self.__state + '\n')
+                        except:
+                            pass
+                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "paragraphs.data")
--- a/src/calibre/ebooks/rtf2xml/preamble_rest.py
+++ b/src/calibre/ebooks/rtf2xml/preamble_rest.py
@ -11,16 +11,24 @@
 #                                                                       #
 #########################################################################
 import sys,os
+
 from calibre.ebooks.rtf2xml import copy
+
 class Preamble:
    """
    Fix the reamaing parts of the preamble. This module does very little. It
    makes sure that no text gets put in the revision of list table. In the
-    future, when I understand how to interprett he revision table and list
+    future, when I understand how to interpret the revision table and list
    table, I will make these methods more functional.
    """
-    def __init__(self, file, bug_handler,  platform, default_font, code_page,
-    copy=None, temp_dir=None):
+    def __init__(self, file,
+                bug_handler,
+                platform,
+                default_font,
+                code_page,
+                copy=None,
+                temp_dir=None,
+                ):
        """
        Required:
            file--file to parse
@ -44,6 +52,7 @@ class Preamble:
            self.__write_to = os.path.join(temp_dir,"info_table_info.data")
        else:
            self.__write_to = "info_table_info.data"
+
    def __initiate_values(self):
        """
        Initiate all values.
@ -62,12 +71,14 @@ class Preamble:
        'mi<mk<revtbl-beg'      : self.__found_revision_table_func,
        'mi<mk<body-open_'      : self.__found_body_func,
        }
+
    def __default_func(self, line):
        action = self.__default_dict.get(self.__token_info)
        if action:
            action(line)
        else:
            self.__write_obj.write(line)
+
    def __found_rtf_head_func(self, line):
        """
        Requires:
@ -84,8 +95,10 @@ class Preamble:
            '<platform>%s\n' % (self.__default_font, self.__code_page,
            self.__platform)
        )
+
    def __found_list_table_func(self, line):
        self.__state = 'list_table'
+
    def __list_table_func(self, line):
        if self.__token_info == 'mi<mk<listabend_':
            self.__state = 'default'
@ -93,8 +106,10 @@ class Preamble:
            pass
        else:
            self.__write_obj.write(line)
+
    def __found_revision_table_func(self, line):
        self.__state = 'revision'
+
    def __revision_table_func(self, line):
        if self.__token_info == 'mi<mk<revtbl-end':
            self.__state = 'default'
@ -102,11 +117,14 @@ class Preamble:
            pass
        else:
            self.__write_obj.write(line)
+
    def __found_body_func(self, line):
        self.__state = 'body'
        self.__write_obj.write(line)
+
    def __body_func(self, line):
        self.__write_obj.write(line)
+
    def fix_preamble(self):
        """
        Requires:
@ -119,20 +137,15 @@ class Preamble:
            the list table.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('no no matching state in module preamble_rest.py\n')
-                sys.stderr.write(self.__state + '\n')
-            action(line)
-        read_obj.close()
-        self.__write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        sys.stderr.write(
+                        'no matching state in module preamble_rest.py\n' + self.__state + '\n')
+                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "preamble_div.data")
--- a/src/calibre/ebooks/rtf2xml/sections.py
+++ b/src/calibre/ebooks/rtf2xml/sections.py
@ -11,43 +11,44 @@
 #                                                                       #
 #########################################################################
 import sys, os
+
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp

 class Sections:
    """
-=================
-Purpose
-=================
-Write section tags for a tokenized file. (This module won't be any use to use
-to you unless you use it as part of the other modules.)
---------------
-logic
---------------
-The tags for the first section breaks have already been written.
-RTF stores section breaks with the \sect tag. Each time this tag is
-encountered, add one to the counter.
-When I encounter the \sectd tag, I want to collect all the appropriate tokens
-that describe the section. When I reach a \pard, I know I an stop collecting
-tokens and write the section tags.
-The exception to this method occurs when sections occur in field blocks, such
-as the index. Normally, two section break occur within the index and other
-field-blocks. (If less or more section breaks occurr, this code may not work.)
-I want the sections to occurr outside of the index. That is, the index
-should be nested inside one section tag. After the index is complete, a new
-section should begin.
-In order to write the sections outside of the field blocks, I have to store
-all of the field block as a string. When I ecounter the \sect tag, add one to
-the section counter, but store this number in a list. Likewise, store the
-information describing the section in another list.
-When I reach the end of the field block, choose the first item from the
-numbered list as the section number. Choose the first item in the description
-list as the values and attributes of the section. Enclose the field string
-between the section tags.
-Start a new section outside the field-block strings. Use the second number in
-the list; use the second item in the description list.
-CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
-Instead, ingore all section information in a field-block.
+    =================
+    Purpose
+    =================
+    Write section tags for a tokenized file. (This module won't be any use to use
+    to you unless you use it as part of the other modules.)
+    ---------------
+    logic
+    ---------------
+    The tags for the first section breaks have already been written.
+    RTF stores section breaks with the \sect tag. Each time this tag is
+    encountered, add one to the counter.
+    When I encounter the \sectd tag, I want to collect all the appropriate tokens
+    that describe the section. When I reach a \pard, I know I an stop collecting
+    tokens and write the section tags.
+    The exception to this method occurs when sections occur in field blocks, such
+    as the index. Normally, two section break occur within the index and other
+    field-blocks. (If less or more section breaks occurr, this code may not work.)
+    I want the sections to occur outside of the index. That is, the index
+    should be nested inside one section tag. After the index is complete, a new
+    section should begin.
+    In order to write the sections outside of the field blocks, I have to store
+    all of the field block as a string. When I ecounter the \sect tag, add one to
+    the section counter, but store this number in a list. Likewise, store the
+    information describing the section in another list.
+    When I reach the end of the field block, choose the first item from the
+    numbered list as the section number. Choose the first item in the description
+    list as the values and attributes of the section. Enclose the field string
+    between the section tags.
+    Start a new section outside the field-block strings. Use the second number in
+    the list; use the second item in the description list.
+    CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
+    Instead, ingore all section information in a field-block.
    """
    def __init__(self,
            in_file,