mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-12-11 07:35:14 -05:00
147 lines
5.2 KiB
Python
147 lines
5.2 KiB
Python
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
#########################################################################
|
|
# #
|
|
# #
|
|
# copyright 2002 Paul Henry Tremblay #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
# General Public License for more details. #
|
|
# #
|
|
# #
|
|
#########################################################################
|
|
import sys
|
|
|
|
from polyglot.builtins import unicode_type
|
|
|
|
from . import open_for_read
|
|
|
|
|
|
class OldRtf:
|
|
"""
|
|
Check to see if the RTF is an older version
|
|
Logic:
|
|
If allowable control word/properties happen in text without being enclosed
|
|
in brackets the file will be considered old rtf
|
|
"""
|
|
|
|
def __init__(self, in_file,
|
|
bug_handler,
|
|
run_level,
|
|
):
|
|
"""
|
|
Required:
|
|
'file'--file to parse
|
|
'table_data' -- a dictionary for each table.
|
|
Optional:
|
|
'copy'-- whether to make a copy of result for debugging
|
|
'temp_dir' --where to output temporary results (default is
|
|
directory from which the script is run.)
|
|
Returns:
|
|
nothing
|
|
"""
|
|
self.__file = in_file
|
|
self.__bug_handler = bug_handler
|
|
self.__run_level = run_level
|
|
self.__allowable = [
|
|
'annotation' ,
|
|
'blue______' ,
|
|
'bold______',
|
|
'caps______',
|
|
'char-style' ,
|
|
'dbl-strike' ,
|
|
'emboss____',
|
|
'engrave___' ,
|
|
'font-color',
|
|
'font-down_' ,
|
|
'font-size_',
|
|
'font-style',
|
|
'font-up___',
|
|
'footnot-mk' ,
|
|
'green_____' ,
|
|
'hidden____',
|
|
'italics___',
|
|
'outline___',
|
|
'red_______',
|
|
'shadow____' ,
|
|
'small-caps',
|
|
'strike-thr',
|
|
'subscript_',
|
|
'superscrip' ,
|
|
'underlined' ,
|
|
]
|
|
self.__action_dict = {
|
|
'before_body' : self.__before_body_func,
|
|
'in_body' : self.__check_tokens_func,
|
|
'after_pard' : self.__after_pard_func,
|
|
}
|
|
|
|
def __initiate_values(self):
|
|
self.__previous_token = ''
|
|
self.__state = 'before_body'
|
|
self.__found_new = 0
|
|
self.__ob_group = 0
|
|
|
|
def __check_tokens_func(self, line):
|
|
if self.__inline_info in self.__allowable:
|
|
if self.__ob_group == self.__base_ob_count:
|
|
return 'old_rtf'
|
|
else:
|
|
self.__found_new += 1
|
|
elif self.__token_info == 'cw<pf<par-def___':
|
|
self.__state = 'after_pard'
|
|
|
|
def __before_body_func(self, line):
|
|
if self.__token_info == 'mi<mk<body-open_':
|
|
self.__state = 'in_body'
|
|
self.__base_ob_count = self.__ob_group
|
|
|
|
def __after_pard_func(self, line):
|
|
if line[0:2] != 'cw':
|
|
self.__state = 'in_body'
|
|
|
|
def check_if_old_rtf(self):
|
|
"""
|
|
Requires:
|
|
nothing
|
|
Returns:
|
|
True if file is older RTf
|
|
False if file is newer RTF
|
|
"""
|
|
self.__initiate_values()
|
|
line_num = 0
|
|
with open_for_read(self.__file) as read_obj:
|
|
for line in read_obj:
|
|
line_num += 1
|
|
self.__token_info = line[:16]
|
|
if self.__token_info == 'mi<mk<body-close':
|
|
return False
|
|
if self.__token_info == 'ob<nu<open-brack':
|
|
self.__ob_group += 1
|
|
self.__ob_count = line[-5:-1]
|
|
if self.__token_info == 'cb<nu<clos-brack':
|
|
self.__ob_group -= 1
|
|
self.__cb_count = line[-5:-1]
|
|
self.__inline_info = line[6:16]
|
|
if self.__state == 'after_body':
|
|
return False
|
|
action = self.__action_dict.get(self.__state)
|
|
if action is None:
|
|
try:
|
|
sys.stderr.write('No action for this state!\n')
|
|
except:
|
|
pass
|
|
result = action(line)
|
|
if result == 'new_rtf':
|
|
return False
|
|
elif result == 'old_rtf':
|
|
if self.__run_level > 3:
|
|
sys.stderr.write(
|
|
'Old rtf construction %s (bracket %s, line %s)\n' % (
|
|
self.__inline_info, unicode_type(self.__ob_group), line_num)
|
|
)
|
|
return True
|
|
self.__previous_token = line[6:16]
|
|
return False
|