From 55616a4e2d8c525463e6c440f7e4112ac0782f5f Mon Sep 17 00:00:00 2001 From: Sengian Date: Sat, 15 Jan 2011 20:51:39 +0100 Subject: [PATCH] Update info handling to rev RTF 1.9.1 TODO: integrate \userprops --- src/calibre/ebooks/rtf2xml/info.py | 69 ++++++++++++++------ src/calibre/ebooks/rtf2xml/process_tokens.py | 16 ++++- 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/rtf2xml/info.py b/src/calibre/ebooks/rtf2xml/info.py index ad0fb8ec06..9f2905f31b 100755 --- a/src/calibre/ebooks/rtf2xml/info.py +++ b/src/calibre/ebooks/rtf2xml/info.py @@ -16,7 +16,9 @@ # # ######################################################################### import sys, os, tempfile + from calibre.ebooks.rtf2xml import copy + class Info: """ Make tags for document-information @@ -42,6 +44,7 @@ class Info: self.__copy = copy self.__run_level = run_level self.__write_to = tempfile.mktemp() + def __initiate_values(self): """ Initiate all values. @@ -58,27 +61,49 @@ class Info: self.__info_table_dict = { 'cw33\n + def __collect_tokens_func(self, line): """ Requires: @@ -194,18 +224,19 @@ class Info: att = line[6:16] value = line[20:-1] att_changed = self.__token_dict.get(att) - if att_changed == None: + if att_changed is None: if self.__run_level > 3: - msg = 'no dictionary match for %s\n' % att + msg = 'No dictionary match for %s\n' % att raise self.__bug_handler, msg else: self.__text_string += '<%s>%s' % (att_changed, value) + def __single_field_func(self, line, tag): value = line[20:-1] self.__write_obj.write( - 'mi%s\n' % (tag, tag, value) + 'mi%s\n' % (tag, tag, value) ) + def __after_info_table_func(self, line): """ Requires: @@ -217,6 +248,7 @@ class Info: the file. """ self.__write_obj.write(line) + def fix_info(self): """ Requires: @@ -234,20 +266,15 @@ class Info: information table, simply write the line to the output file. """ self.__initiate_values() - read_obj = open(self.__file, 'r') - self.__write_obj = open(self.__write_to, 'w') - line_to_read = 1 - while line_to_read: - line_to_read = read_obj.readline() - line = line_to_read - self.__token_info = line[:16] - action = self.__state_dict.get(self.__state) - if action == None: - sys.stderr.write('no no matching state in module styles.py\n') - sys.stderr.write(self.__state + '\n') - action(line) - read_obj.close() - self.__write_obj.close() + with open(self.__file, 'r') as read_obj: + with open(self.__write_to, 'wb') as self.__write_obj: + for line in read_obj: + self.__token_info = line[:16] + action = self.__state_dict.get(self.__state) + if action is None: + sys.stderr.write('No matching state in module styles.py\n') + sys.stderr.write(self.__state + '\n') + action(line) copy_obj = copy.Copy(bug_handler = self.__bug_handler) if self.__copy: copy_obj.copy_file(self.__write_to, "info.data") diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py index 6ff0519dc2..56e61d2b60 100755 --- a/src/calibre/ebooks/rtf2xml/process_tokens.py +++ b/src/calibre/ebooks/rtf2xml/process_tokens.py @@ -230,11 +230,15 @@ class ProcessTokens: 'trhdr' : ('tb', 'row-header', self.default_func), # preamble => pr # document information => di + # TODO integrate \userprops 'info' : ('di', 'doc-info__', self.default_func), + 'title' : ('di', 'title_____', self.default_func), 'author' : ('di', 'author____', self.default_func), 'operator' : ('di', 'operator__', self.default_func), - 'title' : ('di', 'title_____', self.default_func), + 'manager' : ('di', 'manager___', self.default_func), + 'company' : ('di', 'company___', self.default_func), 'keywords' : ('di', 'keywords__', self.default_func), + 'category' : ('di', 'category__', self.default_func), 'doccomm' : ('di', 'doc-notes_', self.default_func), 'comment' : ('di', 'doc-notes_', self.default_func), 'subject' : ('di', 'subject___', self.default_func), @@ -243,11 +247,19 @@ class ProcessTokens: 'mo' : ('di', 'month_____', self.default_func), 'dy' : ('di', 'day_______', self.default_func), 'min' : ('di', 'minute____', self.default_func), + 'sec' : ('di', 'second____', self.default_func), 'revtim' : ('di', 'revis-time', self.default_func), + 'edmins' : ('di', 'edit-time_', self.default_func), + 'printim' : ('di', 'print-time', self.default_func), + 'buptim' : ('di', 'backuptime', self.default_func), 'nofwords' : ('di', 'num-of-wor', self.default_func), 'nofchars' : ('di', 'num-of-chr', self.default_func), + 'nofcharsws' : ('di', 'numofchrws', self.default_func), 'nofpages' : ('di', 'num-of-pag', self.default_func), - 'edmins' : ('di', 'edit-time_', self.default_func), + 'version' : ('di', 'version___', self.default_func), + 'vern' : ('di', 'intern-ver', self.default_func), + 'hlinkbase' : ('di', 'linkbase__', self.default_func), + 'id' : ('di', 'internalID', self.default_func), # headers and footers => hf 'headerf' : ('hf', 'head-first', self.default_func), 'headerl' : ('hf', 'head-left_', self.default_func),