from __future__ import absolute_import, division, print_function, unicode_literals ######################################################################### # # # # # copyright 2002 Paul Henry Tremblay # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # # General Public License for more details. # # # # # ######################################################################### import sys from polyglot.builtins import unicode_type from . import open_for_read class OldRtf: """ Check to see if the RTF is an older version Logic: If allowable control word/properties happen in text without being enclosed in brackets the file will be considered old rtf """ def __init__(self, in_file, bug_handler, run_level, ): """ Required: 'file'--file to parse 'table_data' -- a dictionary for each table. Optional: 'copy'-- whether to make a copy of result for debugging 'temp_dir' --where to output temporary results (default is directory from which the script is run.) Returns: nothing """ self.__file = in_file self.__bug_handler = bug_handler self.__run_level = run_level self.__allowable = [ 'annotation' , 'blue______' , 'bold______', 'caps______', 'char-style' , 'dbl-strike' , 'emboss____', 'engrave___' , 'font-color', 'font-down_' , 'font-size_', 'font-style', 'font-up___', 'footnot-mk' , 'green_____' , 'hidden____', 'italics___', 'outline___', 'red_______', 'shadow____' , 'small-caps', 'strike-thr', 'subscript_', 'superscrip' , 'underlined' , ] self.__action_dict = { 'before_body' : self.__before_body_func, 'in_body' : self.__check_tokens_func, 'after_pard' : self.__after_pard_func, } def __initiate_values(self): self.__previous_token = '' self.__state = 'before_body' self.__found_new = 0 self.__ob_group = 0 def __check_tokens_func(self, line): if self.__inline_info in self.__allowable: if self.__ob_group == self.__base_ob_count: return 'old_rtf' else: self.__found_new += 1 elif self.__token_info == 'cw 3: sys.stderr.write( 'Old rtf construction %s (bracket %s, line %s)\n' % ( self.__inline_info, unicode_type(self.__ob_group), line_num) ) return True self.__previous_token = line[6:16] return False