Update info handling to rev RTF 1.9.1 TODO: integrate \userprops

This commit is contained in:
Sengian 2011-01-15 20:51:39 +01:00
parent 93ef1699df
commit 55616a4e2d
2 changed files with 62 additions and 23 deletions

View File

@ -16,7 +16,9 @@
# #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
class Info:
"""
Make tags for document-information
@ -42,6 +44,7 @@ class Info:
self.__copy = copy
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
def __initiate_values(self):
"""
Initiate all values.
@ -58,27 +61,49 @@ class Info:
self.__info_table_dict = {
'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'),
'cw<di<author____' : (self.__found_tag_with_text_func, 'author'),
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'),
'cw<di<manager___' : (self.__found_tag_with_text_func, 'manager'),
'cw<di<company___' : (self.__found_tag_with_text_func, 'company'),
'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'),
'cw<di<category__' : (self.__found_tag_with_text_func, 'category'),
'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'),
'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'),
'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'),
'cw<di<linkbase__' : (self.__found_tag_with_text_func, 'hyperlink-base'),
'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'),
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'),
'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'),
'cw<di<edit-time_' : (self.__found_tag_with_tokens_func, 'editing-time'),
'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'),
'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'),
'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'),
'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'),
'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'),
'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'),
'cw<di<version___' : (self.__single_field_func, 'version'),
'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'),
'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'),
}
self.__token_dict = {
'year______' : 'year',
'month_____' : 'month',
'day_______' : 'day',
'minute____' : 'minute',
'second____' : 'second',
'revis-time' : 'revision-time',
'create-tim' : 'creation-time',
'edit-time_' : 'editing-time',
'print-time' : 'printing-time',
'backuptime' : 'backup-time',
'num-of-wor' : 'number-of-words',
'num-of-chr' : 'number-of-characters',
'numofchrws' : 'number-of-characters-without-space',
'num-of-pag' : 'number-of-pages',
'version___' : 'version',
'intern-ver' : 'internal-version-number',
'internalID' : 'internal-id-number',
}
def __before_info_table_func(self, line):
"""
Required:
@ -92,6 +117,7 @@ class Info:
if self.__token_info == 'mi<mk<doc-in-beg':
self.__state = 'in_info_table'
self.__write_obj.write(line)
def __in_info_table_func(self, line):
"""
Requires:
@ -112,6 +138,7 @@ class Info:
action(line, tag)
else:
self.__write_obj.write(line)
def __found_tag_with_text_func(self, line, tag):
"""
Requires:
@ -126,6 +153,7 @@ class Info:
"""
self.__tag = tag
self.__state = 'collect_text'
def __collect_text_func(self, line):
"""
Requires:
@ -147,6 +175,7 @@ class Info:
self.__text_string = ''
elif line[0:2] == 'tx':
self.__text_string += line[17:-1]
def __found_tag_with_tokens_func(self, line, tag):
"""
Requires:
@ -163,6 +192,7 @@ class Info:
self.__state = 'collect_tokens'
self.__text_string = 'mi<tg<empty-att_<%s' % tag
#mi<tg<empty-att_<page-definition<margin>33\n
def __collect_tokens_func(self, line):
"""
Requires:
@ -194,18 +224,19 @@ class Info:
att = line[6:16]
value = line[20:-1]
att_changed = self.__token_dict.get(att)
if att_changed == None:
if att_changed is None:
if self.__run_level > 3:
msg = 'no dictionary match for %s\n' % att
msg = 'No dictionary match for %s\n' % att
raise self.__bug_handler, msg
else:
self.__text_string += '<%s>%s' % (att_changed, value)
def __single_field_func(self, line, tag):
value = line[20:-1]
self.__write_obj.write(
'mi<tg<empty-att_<%s'
'<%s>%s\n' % (tag, tag, value)
'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value)
)
def __after_info_table_func(self, line):
"""
Requires:
@ -217,6 +248,7 @@ class Info:
the file.
"""
self.__write_obj.write(line)
def fix_info(self):
"""
Requires:
@ -234,20 +266,15 @@ class Info:
information table, simply write the line to the output file.
"""
self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module styles.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'wb') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write('No matching state in module styles.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "info.data")

View File

@ -230,11 +230,15 @@ class ProcessTokens:
'trhdr' : ('tb', 'row-header', self.default_func),
# preamble => pr
# document information => di
# TODO integrate \userprops
'info' : ('di', 'doc-info__', self.default_func),
'title' : ('di', 'title_____', self.default_func),
'author' : ('di', 'author____', self.default_func),
'operator' : ('di', 'operator__', self.default_func),
'title' : ('di', 'title_____', self.default_func),
'manager' : ('di', 'manager___', self.default_func),
'company' : ('di', 'company___', self.default_func),
'keywords' : ('di', 'keywords__', self.default_func),
'category' : ('di', 'category__', self.default_func),
'doccomm' : ('di', 'doc-notes_', self.default_func),
'comment' : ('di', 'doc-notes_', self.default_func),
'subject' : ('di', 'subject___', self.default_func),
@ -243,11 +247,19 @@ class ProcessTokens:
'mo' : ('di', 'month_____', self.default_func),
'dy' : ('di', 'day_______', self.default_func),
'min' : ('di', 'minute____', self.default_func),
'sec' : ('di', 'second____', self.default_func),
'revtim' : ('di', 'revis-time', self.default_func),
'edmins' : ('di', 'edit-time_', self.default_func),
'printim' : ('di', 'print-time', self.default_func),
'buptim' : ('di', 'backuptime', self.default_func),
'nofwords' : ('di', 'num-of-wor', self.default_func),
'nofchars' : ('di', 'num-of-chr', self.default_func),
'nofcharsws' : ('di', 'numofchrws', self.default_func),
'nofpages' : ('di', 'num-of-pag', self.default_func),
'edmins' : ('di', 'edit-time_', self.default_func),
'version' : ('di', 'version___', self.default_func),
'vern' : ('di', 'intern-ver', self.default_func),
'hlinkbase' : ('di', 'linkbase__', self.default_func),
'id' : ('di', 'internalID', self.default_func),
# headers and footers => hf
'headerf' : ('hf', 'head-first', self.default_func),
'headerl' : ('hf', 'head-left_', self.default_func),