From e1899e9f1ccbb074635fb8ffa4bf9cfbb0a273c7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Dec 2007 22:11:26 +0000 Subject: [PATCH] Move rtf2xml into libprs500 tree and clean it up --- src/libprs500/ebooks/lrf/rtf/convert_from.py | 2 +- src/libprs500/ebooks/rtf2xml/ParseRtf.py | 563 +++ src/libprs500/ebooks/rtf2xml/__init__.py | 3 + src/libprs500/ebooks/rtf2xml/add_brackets.py | 205 ++ src/libprs500/ebooks/rtf2xml/body_styles.py | 81 + src/libprs500/ebooks/rtf2xml/border_parse.py | 189 + src/libprs500/ebooks/rtf2xml/char_set.py | 3075 +++++++++++++++++ .../ebooks/rtf2xml/check_brackets.py | 61 + .../ebooks/rtf2xml/check_encoding.py | 29 + src/libprs500/ebooks/rtf2xml/colors.py | 247 ++ .../ebooks/rtf2xml/combine_borders.py | 92 + src/libprs500/ebooks/rtf2xml/configure_txt.py | 151 + .../ebooks/rtf2xml/convert_to_tags.py | 242 ++ src/libprs500/ebooks/rtf2xml/copy.py | 88 + .../ebooks/rtf2xml/correct_unicode.py | 94 + .../ebooks/rtf2xml/default_encoding.py | 61 + src/libprs500/ebooks/rtf2xml/delete_info.py | 219 ++ src/libprs500/ebooks/rtf2xml/field_strings.py | 795 +++++ src/libprs500/ebooks/rtf2xml/fields_large.py | 358 ++ src/libprs500/ebooks/rtf2xml/fields_small.py | 448 +++ src/libprs500/ebooks/rtf2xml/fonts.py | 223 ++ src/libprs500/ebooks/rtf2xml/footnote.py | 268 ++ src/libprs500/ebooks/rtf2xml/get_char_map.py | 67 + src/libprs500/ebooks/rtf2xml/get_options.py | 332 ++ src/libprs500/ebooks/rtf2xml/group_borders.py | 292 ++ src/libprs500/ebooks/rtf2xml/group_styles.py | 241 ++ src/libprs500/ebooks/rtf2xml/header.py | 265 ++ .../ebooks/rtf2xml/headings_to_sections.py | 215 ++ src/libprs500/ebooks/rtf2xml/hex_2_utf8.py | 579 ++++ src/libprs500/ebooks/rtf2xml/info.py | 255 ++ src/libprs500/ebooks/rtf2xml/inline.py | 411 +++ src/libprs500/ebooks/rtf2xml/line_endings.py | 67 + src/libprs500/ebooks/rtf2xml/list_numbers.py | 193 ++ src/libprs500/ebooks/rtf2xml/list_table.py | 431 +++ src/libprs500/ebooks/rtf2xml/make_lists.py | 442 +++ src/libprs500/ebooks/rtf2xml/old_rtf.py | 132 + src/libprs500/ebooks/rtf2xml/options_trem.py | 274 ++ src/libprs500/ebooks/rtf2xml/output.py | 147 + .../ebooks/rtf2xml/override_table.py | 203 ++ src/libprs500/ebooks/rtf2xml/paragraph_def.py | 739 ++++ src/libprs500/ebooks/rtf2xml/paragraphs.py | 253 ++ src/libprs500/ebooks/rtf2xml/pict.py | 186 + src/libprs500/ebooks/rtf2xml/preamble_div.py | 554 +++ src/libprs500/ebooks/rtf2xml/preamble_rest.py | 145 + .../ebooks/rtf2xml/process_tokens.py | 826 +++++ .../ebooks/rtf2xml/replace_illegals.py | 52 + src/libprs500/ebooks/rtf2xml/sections.py | 513 +++ src/libprs500/ebooks/rtf2xml/styles.py | 705 ++++ src/libprs500/ebooks/rtf2xml/table.py | 543 +++ src/libprs500/ebooks/rtf2xml/table_info.py | 85 + src/libprs500/ebooks/rtf2xml/tokenize.py | 116 + src/libprs500/trac/download/download.py | 2 +- 52 files changed, 16757 insertions(+), 2 deletions(-) create mode 100755 src/libprs500/ebooks/rtf2xml/ParseRtf.py create mode 100755 src/libprs500/ebooks/rtf2xml/__init__.py create mode 100755 src/libprs500/ebooks/rtf2xml/add_brackets.py create mode 100755 src/libprs500/ebooks/rtf2xml/body_styles.py create mode 100755 src/libprs500/ebooks/rtf2xml/border_parse.py create mode 100755 src/libprs500/ebooks/rtf2xml/char_set.py create mode 100755 src/libprs500/ebooks/rtf2xml/check_brackets.py create mode 100755 src/libprs500/ebooks/rtf2xml/check_encoding.py create mode 100755 src/libprs500/ebooks/rtf2xml/colors.py create mode 100755 src/libprs500/ebooks/rtf2xml/combine_borders.py create mode 100755 src/libprs500/ebooks/rtf2xml/configure_txt.py create mode 100755 src/libprs500/ebooks/rtf2xml/convert_to_tags.py create mode 100755 src/libprs500/ebooks/rtf2xml/copy.py create mode 100755 src/libprs500/ebooks/rtf2xml/correct_unicode.py create mode 100755 src/libprs500/ebooks/rtf2xml/default_encoding.py create mode 100755 src/libprs500/ebooks/rtf2xml/delete_info.py create mode 100755 src/libprs500/ebooks/rtf2xml/field_strings.py create mode 100755 src/libprs500/ebooks/rtf2xml/fields_large.py create mode 100755 src/libprs500/ebooks/rtf2xml/fields_small.py create mode 100755 src/libprs500/ebooks/rtf2xml/fonts.py create mode 100755 src/libprs500/ebooks/rtf2xml/footnote.py create mode 100755 src/libprs500/ebooks/rtf2xml/get_char_map.py create mode 100755 src/libprs500/ebooks/rtf2xml/get_options.py create mode 100755 src/libprs500/ebooks/rtf2xml/group_borders.py create mode 100755 src/libprs500/ebooks/rtf2xml/group_styles.py create mode 100755 src/libprs500/ebooks/rtf2xml/header.py create mode 100755 src/libprs500/ebooks/rtf2xml/headings_to_sections.py create mode 100755 src/libprs500/ebooks/rtf2xml/hex_2_utf8.py create mode 100755 src/libprs500/ebooks/rtf2xml/info.py create mode 100755 src/libprs500/ebooks/rtf2xml/inline.py create mode 100755 src/libprs500/ebooks/rtf2xml/line_endings.py create mode 100755 src/libprs500/ebooks/rtf2xml/list_numbers.py create mode 100755 src/libprs500/ebooks/rtf2xml/list_table.py create mode 100755 src/libprs500/ebooks/rtf2xml/make_lists.py create mode 100755 src/libprs500/ebooks/rtf2xml/old_rtf.py create mode 100755 src/libprs500/ebooks/rtf2xml/options_trem.py create mode 100755 src/libprs500/ebooks/rtf2xml/output.py create mode 100755 src/libprs500/ebooks/rtf2xml/override_table.py create mode 100755 src/libprs500/ebooks/rtf2xml/paragraph_def.py create mode 100755 src/libprs500/ebooks/rtf2xml/paragraphs.py create mode 100755 src/libprs500/ebooks/rtf2xml/pict.py create mode 100755 src/libprs500/ebooks/rtf2xml/preamble_div.py create mode 100755 src/libprs500/ebooks/rtf2xml/preamble_rest.py create mode 100755 src/libprs500/ebooks/rtf2xml/process_tokens.py create mode 100755 src/libprs500/ebooks/rtf2xml/replace_illegals.py create mode 100755 src/libprs500/ebooks/rtf2xml/sections.py create mode 100755 src/libprs500/ebooks/rtf2xml/styles.py create mode 100755 src/libprs500/ebooks/rtf2xml/table.py create mode 100755 src/libprs500/ebooks/rtf2xml/table_info.py create mode 100755 src/libprs500/ebooks/rtf2xml/tokenize.py diff --git a/src/libprs500/ebooks/lrf/rtf/convert_from.py b/src/libprs500/ebooks/lrf/rtf/convert_from.py index b2414c8460..21b66dc9ec 100644 --- a/src/libprs500/ebooks/lrf/rtf/convert_from.py +++ b/src/libprs500/ebooks/lrf/rtf/convert_from.py @@ -122,7 +122,7 @@ def main(args=sys.argv, logger=None): def generate_xml(rtfpath): - from rtf2xml.ParseRtf import ParseRtf + from libprs500.ebooks.rtf2xml.ParseRtf import ParseRtf tdir = tempfile.mkdtemp(prefix=__appname__+'_') ofile = os.path.join(tdir, 'index.xml') cwd = os.getcwdu() diff --git a/src/libprs500/ebooks/rtf2xml/ParseRtf.py b/src/libprs500/ebooks/rtf2xml/ParseRtf.py new file mode 100755 index 0000000000..5d816e1160 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/ParseRtf.py @@ -0,0 +1,563 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +# $Revision: 1.41 $ +# $Date: 2006/03/24 23:50:07 $ +import sys,os +from libprs500.ebooks.rtf2xml import headings_to_sections, \ + line_endings, footnote, fields_small, default_encoding, \ + make_lists, preamble_div, header, colors, group_borders, \ + check_encoding, add_brackets, table, combine_borders, \ + fields_large, process_tokens, hex_2_utf8, tokenize, \ + delete_info, sections, check_brackets, styles, \ + paragraph_def, convert_to_tags, output, copy, \ + list_numbers, info, pict, table_info, fonts, paragraphs, \ + body_styles, preamble_rest, group_styles, \ + inline, correct_unicode +from libprs500.ebooks.rtf2xml.old_rtf import OldRtf + +""" +Here is an example script using the ParseRTF module directly +#!/usr/bin/env python + +def Handle_Main(): + # Handles options and creates a parse object + parse_obj =ParseRtf.ParseRtf( + in_file = 'in.rtf', + # All values from here on are optional + # determine the output file + out_file = 'out.xml', + # determine the run level. The default is 1. + run_level = 3, + # The name of a debug directory, if you are running at + # run level 3 or higer. + debug = 'debug_dir', + # Convert RTF caps to real caps. + # Default is 1. + convert_caps = 1, + # Indent resulting XML. + # Default is 0 (no indent). + indent = 1, + # Form lists from RTF. Default is 1. + form_lists = 1, + # Convert headings to sections. Default is 0. + headings_to_sections = 1, + # Group paragraphs with the same style name. Default is 1. + group_styles = 1, + # Group borders. Default is 1. + group_borders = 1, + # Write or do not write paragraphs. Default is 0. + empty_paragraphs = 0, + ) + try: + parse_obj.parse_rtf() + except ParseRtf.InvalidRtfException, msg: + sys.stderr.write(msg) + except ParseRtf.RtfInvalidCodeException, msg: + sys.stderr.write(msg) +""" +class InvalidRtfException(Exception): + """ + handle invalid RTF + """ + pass +class RtfInvalidCodeException(Exception): + """ + handle bugs in program + """ + pass + +class ParseRtf: + """ + Main class for controlling the rest of the parsing. + """ + def __init__(self, + in_file, + out_file = '', + out_dir = None, + dtd = '', + debug = 0, + deb_dir=None, + convert_symbol = None, + convert_wingdings = None, + convert_zapf = None, + convert_caps = None, + run_level = 1, + indent = None, + replace_illegals = 1, + form_lists = 1, + headings_to_sections = 1, + group_styles = 1, + group_borders = 1, + empty_paragraphs = 1, + no_dtd = 0, + char_data = '', + ): + """ + Requires: + 'file' --file to parse + 'char_data' --file containing character maps + 'dtd' --path to dtd + Possible parameters, but not necessary: + 'output' --a file to output the parsed file. (Default is standard + output.) + 'temp_dir' --directory for temporary output (If not provided, the + script tries to output to directory where is script is exectued.) + 'deb_dir' --debug directory. If a debug_dir is provided, the script + will copy each run through as a file to examine in the debug_dir + 'perl_script'--use perl to make tokens. This runs just a bit faster. + (I will probably phase this out.) + 'check_brackets' -- make sure the brackets match up after each run + through a file. Only for debugging. + Returns: Nothing + """ + self.__file = in_file + self.__out_file = out_file + self.__out_dir = out_dir + self.__temp_dir = out_dir + self.__dtd_path = dtd + self.__check_file(in_file,"file_to_parse") + self.__char_data = char_data + self.__debug_dir = debug + self.__check_dir(self.__temp_dir) + self.__copy = self.__check_dir(self.__debug_dir) + self.__convert_caps = convert_caps + self.__convert_symbol = convert_symbol + self.__convert_wingdings = convert_wingdings + self.__convert_zapf = convert_zapf + self.__run_level = run_level + self.__exit_level = 0 + self.__indent = indent + self.__replace_illegals = replace_illegals + self.__form_lists = form_lists + self.__headings_to_sections = headings_to_sections + self.__group_styles = group_styles + self.__group_borders = group_borders + self.__empty_paragraphs = empty_paragraphs + self.__no_dtd = no_dtd + + def __check_file(self, the_file, type): + """Check to see if files exist""" + if the_file == None: + if type == "file_to_parse": + message = "You must provide a file for the script to work" + msg = message + raise RtfInvalidCodeException, msg + elif os.path.exists(the_file): + pass # do nothing + else: + message = "The file '%s' cannot be found" % the_file + msg = message + raise RtfInvalidCodeException, msg + def __check_dir(self, the_dir): + """Check to see if directory exists""" + if not the_dir : + return + dir_exists = os.path.isdir(the_dir) + if not dir_exists: + message = "%s is not a directory" % the_dir + msg = message + raise RtfInvalidCodeException, msg + return 1 + def parse_rtf(self): + """ + Parse the file by calling on other classes. + Requires: + Nothing + Returns: + A parsed file in XML, either to standard output or to a file, + depending on the value of 'output' when the instance was created. + """ + self.__temp_file = self.__make_temp_file(self.__file) + # if the self.__deb_dir is true, then create a copy object, + # set the directory to write to, remove files, and copy + # the new temporary file to this directory + if self.__debug_dir: + copy_obj = copy.Copy( + bug_handler = RtfInvalidCodeException, + ) + copy_obj.set_dir(self.__debug_dir) + copy_obj.remove_files() + copy_obj.copy_file(self.__temp_file, "original_file") + # new as of 2005-08-02. Do I want this? + if self.__debug_dir or self.__run_level > 2: + self.__check_brack_obj = check_brackets.CheckBrackets\ + (file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + ) + # convert Macintosh line endings to Unix line endings + line_obj = line_endings.FixLineEndings( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + replace_illegals = self.__replace_illegals, + ) + return_value = line_obj.fix_endings() + self.__return_code(return_value) + tokenize_obj = tokenize.Tokenize( + bug_handler = RtfInvalidCodeException, + in_file = self.__temp_file, + copy = self.__copy, + run_level = self.__run_level,) + tokenize_obj.tokenize() + process_tokens_obj = process_tokens.ProcessTokens( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + exception_handler = InvalidRtfException, + ) + try: + return_value = process_tokens_obj.process_tokens() + except InvalidRtfException, msg: + try: + os.remove(self.__temp_file) + except OSError: + pass + check_encoding_obj = check_encoding.CheckEncoding( + bug_handler = RtfInvalidCodeException, + ) + check_encoding_obj.check_encoding(self.__file) + sys.stderr.write('File "%s" does not appear to be RTF.\n' % self.__file) + raise InvalidRtfException, msg + delete_info_obj = delete_info.DeleteInfo( + in_file = self.__temp_file, + copy = self.__copy, + bug_handler = RtfInvalidCodeException, + run_level = self.__run_level,) + # found destination means {\*\destination + # if found, the RTF should be newer RTF + found_destination = delete_info_obj.delete_info() + self.__bracket_match('delete_data_info') + # put picts in a separate file + pict_obj = pict.Pict( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + orig_file = self.__file, + out_file = self.__out_file, + run_level = self.__run_level, + ) + pict_obj.process_pict() + self.__bracket_match('pict_data_info') + correct_uni_obj = correct_unicode.CorrectUnicode( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + exception_handler = InvalidRtfException, + ) + correct_uni_obj.correct_unicode() + self.__bracket_match('correct_unicode_info') + combine_obj = combine_borders.CombineBorders( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level,) + combine_obj.combine_borders() + self.__bracket_match('combine_borders_info') + footnote_obj = footnote.Footnote( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + footnote_obj.separate_footnotes() + self.__bracket_match('separate_footnotes_info') + header_obj = header.Header( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + header_obj.separate_headers() + self.__bracket_match('separate_headers_info') + list_numbers_obj = list_numbers.ListNumbers( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + list_numbers_obj.fix_list_numbers() + self.__bracket_match('list_number_info') + preamble_div_obj = preamble_div.PreambleDiv( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + list_of_lists = preamble_div_obj.make_preamble_divisions() + self.__bracket_match('make_preamble_divisions') + encode_obj = default_encoding.DefaultEncoding( + in_file = self.__temp_file, + run_level = self.__run_level, + bug_handler = RtfInvalidCodeException, + ) + platform, code_page, default_font_num = encode_obj.find_default_encoding() + hex2utf_obj = hex_2_utf8.Hex2Utf8( + in_file = self.__temp_file, + copy = self.__copy, + area_to_convert = 'preamble', + char_file = self.__char_data, + default_char_map = code_page, + run_level = self.__run_level, + bug_handler = RtfInvalidCodeException, + invalid_rtf_handler = InvalidRtfException, + ) + hex2utf_obj.convert_hex_2_utf8() + self.__bracket_match('hex_2_utf_preamble') + fonts_obj = fonts.Fonts( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + default_font_num = default_font_num, + run_level = self.__run_level, + ) + special_font_dict = fonts_obj.convert_fonts() + self.__bracket_match('fonts_info') + color_obj = colors.Colors( + in_file = self.__temp_file, + copy = self.__copy, + bug_handler = RtfInvalidCodeException, + run_level = self.__run_level, + ) + color_obj.convert_colors() + self.__bracket_match('colors_info') + style_obj = styles.Styles( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + style_obj.convert_styles() + self.__bracket_match('styles_info') + info_obj = info.Info( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + info_obj.fix_info() + default_font = special_font_dict.get('default-font') + preamble_rest_obj = preamble_rest.Preamble( + file = self.__temp_file, copy = self.__copy, + bug_handler = RtfInvalidCodeException, + platform = platform, default_font = default_font, + code_page = code_page) + preamble_rest_obj.fix_preamble() + self.__bracket_match('preamble_rest_info') + old_rtf_obj = OldRtf( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + run_level = self.__run_level, + ) + # RTF can actually have destination groups and old RTF. + # BAH! + old_rtf = old_rtf_obj.check_if_old_rtf() + if old_rtf: + if self.__run_level > 5: + msg = 'older RTF\n' + msg += 'self.__run_level is "%s"\n' % self.__run_level + raise RtfInvalidCodeException, msg + if self.__run_level > 1: + sys.stderr.write('File could be older RTF...\n') + if found_destination: + if self.__run_level > 1: + sys.stderr.write( + 'File also has newer RTF.\n' + 'Will do the best to convert.\n' + ) + add_brackets_obj = add_brackets.AddBrackets( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level, + ) + add_brackets_obj.add_brackets() + fields_small_obj = fields_small.FieldsSmall( + in_file = self.__temp_file, + copy = self.__copy, + bug_handler = RtfInvalidCodeException, + run_level = self.__run_level,) + fields_small_obj.fix_fields() + self.__bracket_match('fix_small_fields_info') + fields_large_obj = fields_large.FieldsLarge( + in_file = self.__temp_file, + copy = self.__copy, + bug_handler = RtfInvalidCodeException, + run_level = self.__run_level) + fields_large_obj.fix_fields() + self.__bracket_match('fix_large_fields_info') + sections_obj = sections.Sections( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level,) + sections_obj.make_sections() + self.__bracket_match('sections_info') + paragraphs_obj = paragraphs.Paragraphs( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + write_empty_para = self.__empty_paragraphs, + run_level = self.__run_level,) + paragraphs_obj.make_paragraphs() + self.__bracket_match('paragraphs_info') + default_font = special_font_dict['default-font'] + paragraph_def_obj = paragraph_def.ParagraphDef( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + default_font = default_font, + run_level = self.__run_level,) + list_of_styles = paragraph_def_obj.make_paragraph_def() + body_styles_obj = body_styles.BodyStyles( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + list_of_styles = list_of_styles, + run_level = self.__run_level,) + body_styles_obj.insert_info() + self.__bracket_match('body_styles_info') + self.__bracket_match('paragraph_def_info') + table_obj = table.Table( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level,) + table_data = table_obj.make_table() + self.__bracket_match('table_info') + table_info_obj = table_info.TableInfo( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + table_data = table_data, + run_level = self.__run_level,) + table_info_obj.insert_info() + self.__bracket_match('table__data_info') + if self.__form_lists: + make_list_obj = make_lists.MakeLists( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + headings_to_sections = self.__headings_to_sections, + run_level = self.__run_level, + list_of_lists = list_of_lists, + ) + make_list_obj.make_lists() + self.__bracket_match('form_lists_info') + if self.__headings_to_sections: + headings_to_sections_obj = headings_to_sections.HeadingsToSections( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level,) + headings_to_sections_obj.make_sections() + self.__bracket_match('headings_to_sections_info') + if self.__group_styles: + group_styles_obj = group_styles.GroupStyles( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + wrap = 1, + run_level = self.__run_level,) + group_styles_obj.group_styles() + self.__bracket_match('group_styles_info') + if self.__group_borders: + group_borders_obj = group_borders.GroupBorders( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + wrap = 1, + run_level = self.__run_level,) + group_borders_obj.group_borders() + self.__bracket_match('group_borders_info') + inline_obj = inline.Inline( + in_file = self.__temp_file, + bug_handler = RtfInvalidCodeException, + copy = self.__copy, + run_level = self.__run_level,) + inline_obj.form_tags() + self.__bracket_match('inline_info') + hex2utf_obj.update_values(file = self.__temp_file, + area_to_convert = 'body', + copy = self.__copy, + char_file = self.__char_data, + convert_caps = self.__convert_caps, + convert_symbol = self.__convert_symbol, + convert_wingdings = self.__convert_wingdings, + convert_zapf = self.__convert_zapf, + symbol = 1, + wingdings = 1, + dingbats = 1, + ) + hex2utf_obj.convert_hex_2_utf8() + header_obj.join_headers() + footnote_obj.join_footnotes() + tags_obj = convert_to_tags.ConvertToTags( + in_file = self.__temp_file, + copy = self.__copy, + dtd_path = self.__dtd_path, + indent = self.__indent, + run_level = self.__run_level, + no_dtd = self.__no_dtd, + bug_handler = RtfInvalidCodeException, + ) + tags_obj.convert_to_tags() + output_obj = output.Output( + file = self.__temp_file, + orig_file = self.__file, + output_dir = self.__out_dir, + out_file = self.__out_file, + ) + output_obj.output() + os.remove(self.__temp_file) + return self.__exit_level + def __bracket_match(self, file_name): + if self.__run_level > 2: + good_br, msg = self.__check_brack_obj.check_brackets() + if good_br: + pass + # sys.stderr.write( msg + ' in ' + file_name + "\n") + else: + msg += msg + " in file '" + file_name + "'\n" + raise RtfInvalidCodeException, msg + def __return_code(self, num): + if num == None: + return + if int(num) > self.__exit_level: + self.__exit_level = num + def __make_temp_file(self,file): + """Make a temporary file to parse""" + write_file="rtf_write_file" + read_obj = open(file,'r') + write_obj = open(write_file, 'w') + line = "dummy" + while line: + line = read_obj.read(1000) + write_obj.write(line ) + read_obj.close() + write_obj.close() + return write_file + """ +mi1\n +mi33\n +mi 0: + sys.stderr.write( + 'Sorry, but this files has a mix of old and new RTF.\n' + 'Some characteristics cannot be converted.\n') + os.remove(self.__write_to) diff --git a/src/libprs500/ebooks/rtf2xml/body_styles.py b/src/libprs500/ebooks/rtf2xml/body_styles.py new file mode 100755 index 0000000000..d0c7788109 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/body_styles.py @@ -0,0 +1,81 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import os, tempfile +from libprs500.ebooks.rtf2xml import copy +""" +Simply write the list of strings after style table +""" +class BodyStyles: + """ + Insert table data for tables. + Logic: + """ + def __init__(self, + in_file, + list_of_styles, + bug_handler, + copy=None, + run_level = 1,): + """ + Required: + 'file'--file to parse + 'table_data' -- a dictionary for each table. + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__list_of_styles = list_of_styles + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + # self.__write_to = 'table_info.data' + def insert_info(self): + """ + """ + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + if line == 'mi 0: + self.__write_obj.write('mi 3: + msg = 'Not enough data for each table\n' + raise self.__bug_handler, msg + # why was this line even here? + # self.__write_obj.write('mi bt + 'bdr-li-wid' : 'line-width', + 'bdr-sp-wid' : 'padding', + 'bdr-color_' : 'color', + } + self.__border_style_dict = { + 'bdr-single' : 'single', + 'bdr-doubtb' : 'double-thickness-border', + 'bdr-shadow' : 'shadowed-border', + 'bdr-double' : 'double-border', + 'bdr-dotted' : 'dotted-border', + 'bdr-dashed' : 'dashed', + 'bdr-hair__' : 'hairline', + 'bdr-inset_' : 'inset', + 'bdr-das-sm' : 'dash-small', + 'bdr-dot-sm' : 'dot-dash', + 'bdr-dot-do' : 'dot-dot-dash', + 'bdr-outset' : 'outset', + 'bdr-trippl' : 'tripple', + 'bdr-thsm__' : 'thick-thin-small', + 'bdr-htsm__' : 'thin-thick-small', + 'bdr-hthsm_' : 'thin-thick-thin-small', + 'bdr-thm___' : 'thick-thin-medium', + 'bdr-htm___' : 'thin-thick-medium', + 'bdr-hthm__' : 'thin-thick-thin-medium', + 'bdr-thl___' : 'thick-thin-large', + 'bdr-hthl__' : 'thin-thick-thin-large', + 'bdr-wavy__' : 'wavy', + 'bdr-d-wav_' : 'double-wavy', + 'bdr-strip_' : 'striped', + 'bdr-embos_' : 'emboss', + 'bdr-engra_' : 'engrave', + 'bdr-frame_' : 'frame', + } + def parse_border(self, line): + """ + Requires: + line -- line with border definition in it + Returns: + ? + Logic: + """ + border_dict = {} + border_style_dict = {} + border_style_list = [] + border_type = self.__border_dict.get(line[6:16]) + if not border_type: + sys.stderr.write( + 'module is border_parse.py\n' + 'function is parse_border\n' + 'token does not have a dictionary value\n' + 'token is "%s"' % line + ) + return border_dict + att_line = line[20:-1] + atts = att_line.split('|') + # cw +NON-BREAKING HYPEHN:_:8290:‑ +LEFT DOUBLE QUOTATION MARK:ldblquote:8220:“ +RIGHT DOUBLE QUOTATION MARK:rdblquote:8221:” +LEFT SINGLE QUOTATION MARK:lquote:8216:‘ +RIGHT SINGLE QUOTATION MARK:rquote:8217:’ +EN DASH:endash:8211:– +EM DASH:emdash:8212:— +MIDDLE DOT:bullet:183:· +:tab:9: +NO-BREAK SPACE:~:160:  +SOFT-HYPHEN:-:173:­ + + +REGISTERED SIGN:ldblquote:174:® +COPYRIGHT SIGN:rdblquote:169:© +N-ARY PRODUCT:rquote:8719:∏ +TRADE MARK SIGN:lquote:8482:™ +ANGLE:emdash:8736:∠ +WHITE DOWN-POINTING TRIANGLE:endash:9661:▽ +INFINITY:bullet:8734:∞ +:tab:9: +NO-BREAK SPACE:~:160:  +NON-BREAKING HYPEHN:_:8209:‑ + + +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE:10130:ldblquote:➒ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TEN:rdblquote:10131:➓ +HEAVY WIDE-HEADED RIGHTWARDS ARROW:lquote:10132:➔ +RIGHTWARDS ARROW:rquote:8594:→ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN:endash:10128:➐ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT:emdash:10129:➑ +ROTATED HEAVY BLACK HEART BULLET:bullet:10085:❥ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE:~:10122:➊ +WRITING HAND:_:9997:✍ + + +:tab:9: +PROPOSE "HEDERA UPPER RIGHT":ldblquote:none: +PROPOSE "HEDERA LOWER LEFT":rdblquote:none: +PROPOSE "HEDERA LOWER RIGHT":lquote:none: +ERASE TO THE LEFT:rquote:9003:⌫ +AMPERSAND:endash:38:& +PROPOSE "HEDERA UPPER LEFT":emdash:none: +SUN:bullet:9737:☉ +PROPOSE "NOTCHED RIGHTWARDS DOUBLE ARROW WITH TIP DOWNWARDS":~:none: +PROPOSE "MAIL FLAG UP":_:none: + + +NULL (NUL):'00:0:� +START OF HEADING (SOH):'01:1: +START OF TEXT (STX):'02:2: +END OF TEXT (ETX):'03:3: +END OF TRANSMISSION (EOT):'04:4: +ENQUIRY (ENQ):'05:5: +ACKNOWLEDGE (ACK):'06:6: +BELL (BEL):'07:7: +BACKSPACE (BS):'08:8: +LINE TABULATION (VT):'0B:11: +FORM FEED (FF):'0C:12: +SHIFT OUT (SO):'0E:14: +SHIFT IN (SI):'0F:15: +DATALINK ESCAPE (DLE):'10:16: +DEVICE CONTROL ONE (DC1):'11:17: +DEVICE CONTROL TWO (DC2):'12:18: +DEVICE CONTROL THREE (DC3):'13:19: +DEVICE CONTROL FOUR (DC4):'14:20: +NEGATIVE ACKNOWLEDGE (NAK):'15:21: +SYNCHRONOUS IDLE (SYN):'16:22: +END OF TRANSMISSION BLOCK (ETB):'17:23: +CANCEL (CAN):'18:24: +END OF MEDIUM (EM):'19:25: +SUBSTITUTE (SUB):'1A:26: +ESCAPE (ESC):'1B:27: +FILE SEPARATOR (IS4):'1C:28: +GROUP SEPARATOR (IS3):'1D:29: +RECORD SEPARATOR (IS2):'1E:30: +UNIT SEPARATOR (IS1):'1F:31: + + +CHARACTER TABULATION (HT):'09:9: +LINE FEED (LF):'0A:10: +CARRIAGE RETURN (CR):'0D:13: +SPACE:'20:32: +EXCLAMATION MARK:'21:33:! +QUOTATION MARK:'22:34:" +NUMBER SIGN:'23:35:# +DOLLAR SIGN:'24:36:$ +PERCENT SIGN:'25:37:% +AMPERSAND:'26:38:& +APOSTROPHE:'27:39:' +LEFT PARENTHESIS:'28:40:( +RIGHT PARENTHESIS:'29:41:) +ASTERISK:'2A:42:* +PLUS SIGN:'2B:43:+ +COMMA:'2C:44:, +HYPHEN-MINUS:'2D:45:- +FULL STOP:'2E:46:. +SOLIDUS:'2F:47:/ +DIGIT ZERO:'30:48:0 +DIGIT ONE:'31:49:1 +DIGIT TWO:'32:50:2 +DIGIT THREE:'33:51:3 +DIGIT FOUR:'34:52:4 +DIGIT FIVE:'35:53:5 +DIGIT SIX:'36:54:6 +DIGIT SEVEN:'37:55:7 +DIGIT EIGHT:'38:56:8 +DIGIT NINE:'39:57:9 +COLON:'3A:58:\colon +SEMICOLON:'3B:59:; +LESS-THAN SIGN:'3C:60:< +EQUALS SIGN:'3D:61:= +GREATER-THAN SIGN:'3E:62:> +QUESTION MARK:'3F:63:? +COMMERCIAL AT:'40:64:@ +LATIN CAPITAL LETTER A:'41:65:A +LATIN CAPITAL LETTER B:'42:66:B +LATIN CAPITAL LETTER C:'43:67:C +LATIN CAPITAL LETTER D:'44:68:D +LATIN CAPITAL LETTER E:'45:69:E +LATIN CAPITAL LETTER F:'46:70:F +LATIN CAPITAL LETTER G:'47:71:G +LATIN CAPITAL LETTER H:'48:72:H +LATIN CAPITAL LETTER I:'49:73:I +LATIN CAPITAL LETTER J:'4A:74:J +LATIN CAPITAL LETTER K:'4B:75:K +LATIN CAPITAL LETTER L:'4C:76:L +LATIN CAPITAL LETTER M:'4D:77:M +LATIN CAPITAL LETTER N:'4E:78:N +LATIN CAPITAL LETTER O:'4F:79:O +LATIN CAPITAL LETTER P:'50:80:P +LATIN CAPITAL LETTER Q:'51:81:Q +LATIN CAPITAL LETTER R:'52:82:R +LATIN CAPITAL LETTER S:'53:83:S +LATIN CAPITAL LETTER T:'54:84:T +LATIN CAPITAL LETTER U:'55:85:U +LATIN CAPITAL LETTER V:'56:86:V +LATIN CAPITAL LETTER W:'57:87:W +LATIN CAPITAL LETTER X:'58:88:X +LATIN CAPITAL LETTER Y:'59:89:Y +LATIN CAPITAL LETTER Z:'5A:90:Z +LEFT SQUARE BRACKET:'5B:91:[ +REVERSE SOLIDUS:'5C:92:\ +RIGHT SQUARE BRACKET:'5D:93:] +CIRCUMFLEX ACCENT:'5E:94:^ +LOW LINE:'5F:95:_ +GRAVE ACCENT:'60:96:` +LATIN SMALL LETTER A:'61:97:a +LATIN SMALL LETTER B:'62:98:b +LATIN SMALL LETTER C:'63:99:c +LATIN SMALL LETTER D:'64:100:d +LATIN SMALL LETTER E:'65:101:e +LATIN SMALL LETTER F:'66:102:f +LATIN SMALL LETTER G:'67:103:g +LATIN SMALL LETTER H:'68:104:h +LATIN SMALL LETTER I:'69:105:i +LATIN SMALL LETTER J:'6A:106:j +LATIN SMALL LETTER K:'6B:107:k +LATIN SMALL LETTER L:'6C:108:l +LATIN SMALL LETTER M:'6D:109:m +LATIN SMALL LETTER N:'6E:110:n +LATIN SMALL LETTER O:'6F:111:o +LATIN SMALL LETTER P:'70:112:p +LATIN SMALL LETTER Q:'71:113:q +LATIN SMALL LETTER R:'72:114:r +LATIN SMALL LETTER S:'73:115:s +LATIN SMALL LETTER T:'74:116:t +LATIN SMALL LETTER U:'75:117:u +LATIN SMALL LETTER V:'76:118:v +LATIN SMALL LETTER W:'77:119:w +LATIN SMALL LETTER X:'78:120:x +LATIN SMALL LETTER Y:'79:121:y +LATIN SMALL LETTER Z:'7A:122:z +LEFT CURLY BRACKET:'7B:123:{ +VERTICAL LINE:'7C:124:| +RIGHT CURLY BRACKET:'7D:125:} +TILDE:'7E:126:~ +DELETE (DEL):'7F:127: + + +NULL (NUL):'00:0:� +START OF HEADING (SOH):'01:1: +START OF TEXT (STX):'02:2: +END OF TEXT (ETX):'03:3: +END OF TRANSMISSION (EOT):'04:4: +ENQUIRY (ENQ):'05:5: +ACKNOWLEDGE (ACK):'06:6: +BELL (BEL):'07:7: +BACKSPACE (BS):'08:8: +CHARACTER TABULATION (HT):'09:9: +LINE FEED (LF):'0A:10: +LINE TABULATION (VT):'0B:11: +FORM FEED (FF):'0C:12: +CARRIAGE RETURN (CR):'0D:13: +SHIFT OUT (SO):'0E:14: +SHIFT IN (SI):'0F:15: +DATALINK ESCAPE (DLE):'10:16: +DEVICE CONTROL ONE (DC1):'11:17: +DEVICE CONTROL TWO (DC2):'12:18: +DEVICE CONTROL THREE (DC3):'13:19: +DEVICE CONTROL FOUR (DC4):'14:20: +NEGATIVE ACKNOWLEDGE (NAK):'15:21: +SYNCHRONOUS IDLE (SYN):'16:22: +END OF TRANSMISSION BLOCK (ETB):'17:23: +CANCEL (CAN):'18:24: +END OF MEDIUM (EM):'19:25: +SUBSTITUTE (SUB):'1A:26: +ESCAPE (ESC):'1B:27: +FILE SEPARATOR (IS4):'1C:28: +GROUP SEPARATOR (IS3):'1D:29: +RECORD SEPARATOR (IS2):'1E:30: +UNIT SEPARATOR (IS1):'1F:31: +SPACE:'20:32: +EXCLAMATION MARK:'21:33:! +QUOTATION MARK:'22:34:" +NUMBER SIGN:'23:35:# +DOLLAR SIGN:'24:36:$ +PERCENT SIGN:'25:37:% +AMPERSAND:'26:38:& +APOSTROPHE:'27:39:' +LEFT PARENTHESIS:'28:40:( +RIGHT PARENTHESIS:'29:41:) +ASTERISK:'2A:42:* +PLUS SIGN:'2B:43:+ +COMMA:'2C:44:, +HYPHEN-MINUS:'2D:45:- +FULL STOP:'2E:46:. +SOLIDUS:'2F:47:/ +DIGIT ZERO:'30:48:0 +DIGIT ONE:'31:49:1 +DIGIT TWO:'32:50:2 +DIGIT THREE:'33:51:3 +DIGIT FOUR:'34:52:4 +DIGIT FIVE:'35:53:5 +DIGIT SIX:'36:54:6 +DIGIT SEVEN:'37:55:7 +DIGIT EIGHT:'38:56:8 +DIGIT NINE:'39:57:9 +COLON:'3A:58:: +SEMICOLON:'3B:59:; +LESS-THAN SIGN:'3C:60:< +EQUALS SIGN:'3D:61:= +GREATER-THAN SIGN:'3E:62:> +QUESTION MARK:'3F:63:? +COMMERCIAL AT:'40:64:@ +LATIN CAPITAL LETTER A:'41:65:A +LATIN CAPITAL LETTER B:'42:66:B +LATIN CAPITAL LETTER C:'43:67:C +LATIN CAPITAL LETTER D:'44:68:D +LATIN CAPITAL LETTER E:'45:69:E +LATIN CAPITAL LETTER F:'46:70:F +LATIN CAPITAL LETTER G:'47:71:G +LATIN CAPITAL LETTER H:'48:72:H +LATIN CAPITAL LETTER I:'49:73:I +LATIN CAPITAL LETTER J:'4A:74:J +LATIN CAPITAL LETTER K:'4B:75:K +LATIN CAPITAL LETTER L:'4C:76:L +LATIN CAPITAL LETTER M:'4D:77:M +LATIN CAPITAL LETTER N:'4E:78:N +LATIN CAPITAL LETTER O:'4F:79:O +LATIN CAPITAL LETTER P:'50:80:P +LATIN CAPITAL LETTER Q:'51:81:Q +LATIN CAPITAL LETTER R:'52:82:R +LATIN CAPITAL LETTER S:'53:83:S +LATIN CAPITAL LETTER T:'54:84:T +LATIN CAPITAL LETTER U:'55:85:U +LATIN CAPITAL LETTER V:'56:86:V +LATIN CAPITAL LETTER W:'57:87:W +LATIN CAPITAL LETTER X:'58:88:X +LATIN CAPITAL LETTER Y:'59:89:Y +LATIN CAPITAL LETTER Z:'5A:90:Z +LEFT SQUARE BRACKET:'5B:91:[ +REVERSE SOLIDUS:'5C:92:\ +RIGHT SQUARE BRACKET:'5D:93:] +CIRCUMFLEX ACCENT:'5E:94:^ +LOW LINE:'5F:95:_ +GRAVE ACCENT:'60:96:` +LATIN SMALL LETTER A:'61:97:a +LATIN SMALL LETTER B:'62:98:b +LATIN SMALL LETTER C:'63:99:c +LATIN SMALL LETTER D:'64:100:d +LATIN SMALL LETTER E:'65:101:e +LATIN SMALL LETTER F:'66:102:f +LATIN SMALL LETTER G:'67:103:g +LATIN SMALL LETTER H:'68:104:h +LATIN SMALL LETTER I:'69:105:i +LATIN SMALL LETTER J:'6A:106:j +LATIN SMALL LETTER K:'6B:107:k +LATIN SMALL LETTER L:'6C:108:l +LATIN SMALL LETTER M:'6D:109:m +LATIN SMALL LETTER N:'6E:110:n +LATIN SMALL LETTER O:'6F:111:o +LATIN SMALL LETTER P:'70:112:p +LATIN SMALL LETTER Q:'71:113:q +LATIN SMALL LETTER R:'72:114:r +LATIN SMALL LETTER S:'73:115:s +LATIN SMALL LETTER T:'74:116:t +LATIN SMALL LETTER U:'75:117:u +LATIN SMALL LETTER V:'76:118:v +LATIN SMALL LETTER W:'77:119:w +LATIN SMALL LETTER X:'78:120:x +LATIN SMALL LETTER Y:'79:121:y +LATIN SMALL LETTER Z:'7A:122:z + + +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +PER MILLE SIGN:'89:8240:‰ +LATIN CAPITAL LETTER S WITH CARON:'8A:352:Š +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +LATIN CAPITAL LETTER S WITH ACUTE:'8C:346:Ś +LATIN CAPITAL LETTER T WITH CARON:'8D:356:Ť +LATIN CAPITAL LETTER Z WITH CARON:'8E:381:Ž +LATIN CAPITAL LETTER Z WITH ACUTE:'8F:377:Ź +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +TRADE MARK SIGN:'99:8482:™ +LATIN SMALL LETTER S WITH CARON:'9A:353:š +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +LATIN SMALL LETTER S WITH ACUTE:'9C:347:ś +LATIN SMALL LETTER T WITH CARON:'9D:357:ť +LATIN SMALL LETTER Z WITH CARON:'9E:382:ž +LATIN SMALL LETTER Z WITH ACUTE:'9F:378:ź +NO-BREAK SPACE:'A0:160:  +CARON (MANDARIN CHINESE THIRD TONE):'A1:711:ˇ +BREVE:'A2:728:˘ +LATIN CAPITAL LETTER L WITH STROKE:'A3:321:Ł +CURRENCY SIGN:'A4:164:¤ +LATIN CAPITAL LETTER A WITH OGONEK:'A5:260:Ą +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +DIAERESIS:'A8:168:¨ +COPYRIGHT SIGN:'A9:169:© +LATIN CAPITAL LETTER S WITH CEDILLA:'AA:350:Ş +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +LATIN CAPITAL LETTER Z WITH DOT ABOVE:'AF:379:Ż +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +OGONEK:'B2:731:˛ +LATIN SMALL LETTER L WITH STROKE:'B3:322:ł +ACUTE ACCENT:'B4:180:´ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +CEDILLA:'B8:184:¸ +LATIN SMALL LETTER A WITH OGONEK:'B9:261:ą +LATIN SMALL LETTER S WITH CEDILLA:'BA:351:ş +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +LATIN CAPITAL LETTER L WITH CARON:'BC:317:Ľ +DOUBLE ACUTE ACCENT:'BD:733:˝ +LATIN SMALL LETTER L WITH CARON:'BE:318:ľ +LATIN SMALL LETTER Z WITH DOT ABOVE:'BF:380:ż +LATIN CAPITAL LETTER R WITH ACUTE:'C0:340:Ŕ +LATIN CAPITAL LETTER A WITH ACUTE:'C1:193:Á +LATIN CAPITAL LETTER A WITH CIRCUMFLEX:'C2:194: +LATIN CAPITAL LETTER A WITH BREVE:'C3:258:Ă +LATIN CAPITAL LETTER A WITH DIAERESIS:'C4:196:Ä +LATIN CAPITAL LETTER L WITH ACUTE:'C5:313:Ĺ +LATIN CAPITAL LETTER C WITH ACUTE:'C6:262:Ć +LATIN CAPITAL LETTER C WITH CEDILLA:'C7:199:Ç +LATIN CAPITAL LETTER C WITH CARON:'C8:268:Č +LATIN CAPITAL LETTER E WITH ACUTE:'C9:201:É +LATIN CAPITAL LETTER E WITH OGONEK:'CA:280:Ę +LATIN CAPITAL LETTER E WITH DIAERESIS:'CB:203:Ë +LATIN CAPITAL LETTER E WITH CARON:'CC:282:Ě +LATIN CAPITAL LETTER I WITH ACUTE:'CD:205:Í +LATIN CAPITAL LETTER I WITH CIRCUMFLEX:'CE:206:Î +LATIN CAPITAL LETTER D WITH CARON:'CF:270:Ď +LATIN CAPITAL LETTER D WITH STROKE:'D0:272:Đ +LATIN CAPITAL LETTER N WITH ACUTE:'D1:323:Ń +LATIN CAPITAL LETTER N WITH CARON:'D2:327:Ň +LATIN CAPITAL LETTER O WITH ACUTE:'D3:211:Ó +LATIN CAPITAL LETTER O WITH CIRCUMFLEX:'D4:212:Ô +LATIN CAPITAL LETTER O WITH DOUBLE ACUTE:'D5:336:Ő +LATIN CAPITAL LETTER O WITH DIAERESIS:'D6:214:Ö +MULTIPLICATION SIGN:'D7:215:× +LATIN CAPITAL LETTER R WITH CARON:'D8:344:Ř +LATIN CAPITAL LETTER U WITH RING ABOVE:'D9:366:Ů +LATIN CAPITAL LETTER U WITH ACUTE:'DA:218:Ú +LATIN CAPITAL LETTER U WITH DOUBLE ACUTE:'DB:368:Ű +LATIN CAPITAL LETTER U WITH DIAERESIS:'DC:220:Ü +LATIN CAPITAL LETTER Y WITH ACUTE:'DD:221:Ý +LATIN CAPITAL LETTER T WITH CEDILLA:'DE:354:Ţ +LATIN SMALL LETTER SHARP S (GERMAN):'DF:223:ß +LATIN SMALL LETTER R WITH ACUTE:'E0:341:ŕ +LATIN SMALL LETTER A WITH ACUTE:'E1:225:á +LATIN SMALL LETTER A WITH CIRCUMFLEX:'E2:226:â +LATIN SMALL LETTER A WITH BREVE:'E3:259:ă +LATIN SMALL LETTER A WITH DIAERESIS:'E4:228:ä +LATIN SMALL LETTER L WITH ACUTE:'E5:314:ĺ +LATIN SMALL LETTER C WITH ACUTE:'E6:263:ć +LATIN SMALL LETTER C WITH CEDILLA:'E7:231:ç +LATIN SMALL LETTER C WITH CARON:'E8:269:č +LATIN SMALL LETTER E WITH ACUTE:'E9:233:é +LATIN SMALL LETTER E WITH OGONEK:'EA:281:ę +LATIN SMALL LETTER E WITH DIAERESIS:'EB:235:ë +LATIN SMALL LETTER E WITH CARON:'EC:283:ě +LATIN SMALL LETTER I WITH ACUTE:'ED:237:í +LATIN SMALL LETTER I WITH CIRCUMFLEX:'EE:238:î +LATIN SMALL LETTER D WITH CARON:'EF:271:ď +LATIN SMALL LETTER D WITH STROKE:'F0:273:đ +LATIN SMALL LETTER N WITH ACUTE:'F1:324:ń +LATIN SMALL LETTER N WITH CARON:'F2:328:ň +LATIN SMALL LETTER O WITH ACUTE:'F3:243:ó +LATIN SMALL LETTER O WITH CIRCUMFLEX:'F4:244:ô +LATIN SMALL LETTER O WITH DOUBLE ACUTE:'F5:337:ő +LATIN SMALL LETTER O WITH DIAERESIS:'F6:246:ö +DIVISION SIGN:'F7:247:÷ +LATIN SMALL LETTER R WITH CARON:'F8:345:ř +LATIN SMALL LETTER U WITH RING ABOVE:'F9:367:ů +LATIN SMALL LETTER U WITH ACUTE:'FA:250:ú +LATIN SMALL LETTER U WITH DOUBLE ACUTE:'FB:369:ű +LATIN SMALL LETTER U WITH DIAERESIS:'FC:252:ü +LATIN SMALL LETTER Y WITH ACUTE:'FD:253:ý +LATIN SMALL LETTER T WITH CEDILLA:'FE:355:ţ +DOT ABOVE (MANDARIN CHINESE LIGHT TONE):'FF:729:˙ + + +CYRILLIC CAPITAL LETTER DJE (SERBOCROATIAN):'80:1026:Ђ +CYRILLIC CAPITAL LETTER GJE:'81:1027:Ѓ +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +CYRILLIC SMALL LETTER GJE:'83:1107:ѓ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +PER MILLE SIGN:'89:8240:‰ +CYRILLIC CAPITAL LETTER LJE:'8A:1033:Љ +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +CYRILLIC CAPITAL LETTER NJE:'8C:1034:Њ +CYRILLIC CAPITAL LETTER KJE:'8D:1036:Ќ +CYRILLIC CAPITAL LETTER TSHE (SERBOCROATIAN):'8E:1035:Ћ +CYRILLIC CAPITAL LETTER DZHE:'8F:1039:Џ +CYRILLIC SMALL LETTER DJE (SERBOCROATIAN):'90:1106:ђ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +TRADE MARK SIGN:'99:8482:™ +CYRILLIC SMALL LETTER LJE:'9A:1113:љ +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +CYRILLIC SMALL LETTER NJE:'9C:1114:њ +CYRILLIC SMALL LETTER KJE:'9D:1116:ќ +CYRILLIC SMALL LETTER TSHE (SERBOCROATIAN):'9E:1115:ћ +CYRILLIC SMALL LETTER DZHE:'9F:1119:џ +NO-BREAK SPACE:'A0:160:  +CYRILLIC CAPITAL LETTER SHORT U (BYELORUSSIAN):'A1:1038:Ў +CYRILLIC SMALL LETTER SHORT U (BYELORUSSIAN):'A2:1118:ў +CYRILLIC CAPITAL LETTER JE:'A3:1032:Ј +CURRENCY SIGN:'A4:164:¤ +CYRILLIC CAPITAL LETTER GHE WITH UPTURN:'A5:1168:Ґ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +CYRILLIC CAPITAL LETTER IO:'A8:1025:Ё +COPYRIGHT SIGN:'A9:169:© +CYRILLIC CAPITAL LETTER UKRAINIAN IE:'AA:1028:Є +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +CYRILLIC CAPITAL LETTER YI (UKRAINIAN):'AF:1031:Ї +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I:'B2:1030:І +CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I:'B3:1110:і +CYRILLIC SMALL LETTER GHE WITH UPTURN:'B4:1169:ґ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +CYRILLIC SMALL LETTER IO:'B8:1105:ё +NUMERO SIGN:'B9:8470:№ +CYRILLIC SMALL LETTER UKRAINIAN IE:'BA:1108:є +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +CYRILLIC SMALL LETTER JE:'BC:1112:ј +CYRILLIC CAPITAL LETTER DZE:'BD:1029:Ѕ +CYRILLIC SMALL LETTER DZE:'BE:1109:ѕ +CYRILLIC SMALL LETTER YI (UKRAINIAN):'BF:1111:ї +CYRILLIC CAPITAL LETTER A:'C0:1040:А +CYRILLIC CAPITAL LETTER BE:'C1:1041:Б +CYRILLIC CAPITAL LETTER VE:'C2:1042:В +CYRILLIC CAPITAL LETTER GHE:'C3:1043:Г +CYRILLIC CAPITAL LETTER DE:'C4:1044:Д +CYRILLIC CAPITAL LETTER IE:'C5:1045:Е +CYRILLIC CAPITAL LETTER ZHE:'C6:1046:Ж +CYRILLIC CAPITAL LETTER ZE:'C7:1047:З +CYRILLIC CAPITAL LETTER I:'C8:1048:И +CYRILLIC CAPITAL LETTER SHORT I:'C9:1049:Й +CYRILLIC CAPITAL LETTER KA:'CA:1050:К +CYRILLIC CAPITAL LETTER EL:'CB:1051:Л +CYRILLIC CAPITAL LETTER EM:'CC:1052:М +CYRILLIC CAPITAL LETTER EN:'CD:1053:Н +CYRILLIC CAPITAL LETTER O:'CE:1054:О +CYRILLIC CAPITAL LETTER PE:'CF:1055:П +CYRILLIC CAPITAL LETTER ER:'D0:1056:Р +CYRILLIC CAPITAL LETTER ES:'D1:1057:С +CYRILLIC CAPITAL LETTER TE:'D2:1058:Т +CYRILLIC CAPITAL LETTER U:'D3:1059:У +CYRILLIC CAPITAL LETTER EF:'D4:1060:Ф +CYRILLIC CAPITAL LETTER HA:'D5:1061:Х +CYRILLIC CAPITAL LETTER TSE:'D6:1062:Ц +CYRILLIC CAPITAL LETTER CHE:'D7:1063:Ч +CYRILLIC CAPITAL LETTER SHA:'D8:1064:Ш +CYRILLIC CAPITAL LETTER SHCHA:'D9:1065:Щ +CYRILLIC CAPITAL LETTER HARD SIGN:'DA:1066:Ъ +CYRILLIC CAPITAL LETTER YERU:'DB:1067:Ы +CYRILLIC CAPITAL LETTER SOFT SIGN:'DC:1068:Ь +CYRILLIC CAPITAL LETTER E:'DD:1069:Э +CYRILLIC CAPITAL LETTER YU:'DE:1070:Ю +CYRILLIC CAPITAL LETTER YA:'DF:1071:Я +CYRILLIC SMALL LETTER A:'E0:1072:а +CYRILLIC SMALL LETTER BE:'E1:1073:б +CYRILLIC SMALL LETTER VE:'E2:1074:в +CYRILLIC SMALL LETTER GHE:'E3:1075:г +CYRILLIC SMALL LETTER DE:'E4:1076:д +CYRILLIC SMALL LETTER IE:'E5:1077:е +CYRILLIC SMALL LETTER ZHE:'E6:1078:ж +CYRILLIC SMALL LETTER ZE:'E7:1079:з +CYRILLIC SMALL LETTER I:'E8:1080:и +CYRILLIC SMALL LETTER SHORT I:'E9:1081:й +CYRILLIC SMALL LETTER KA:'EA:1082:к +CYRILLIC SMALL LETTER EL:'EB:1083:л +CYRILLIC SMALL LETTER EM:'EC:1084:м +CYRILLIC SMALL LETTER EN:'ED:1085:н +CYRILLIC SMALL LETTER O:'EE:1086:о +CYRILLIC SMALL LETTER PE:'EF:1087:п +CYRILLIC SMALL LETTER ER:'F0:1088:р +CYRILLIC SMALL LETTER ES:'F1:1089:с +CYRILLIC SMALL LETTER TE:'F2:1090:т +CYRILLIC SMALL LETTER U:'F3:1091:у +CYRILLIC SMALL LETTER EF:'F4:1092:ф +CYRILLIC SMALL LETTER HA:'F5:1093:х +CYRILLIC SMALL LETTER TSE:'F6:1094:ц +CYRILLIC SMALL LETTER CHE:'F7:1095:ч +CYRILLIC SMALL LETTER SHA:'F8:1096:ш +CYRILLIC SMALL LETTER SHCHA:'F9:1097:щ +CYRILLIC SMALL LETTER HARD SIGN:'FA:1098:ъ +CYRILLIC SMALL LETTER YERU:'FB:1099:ы +CYRILLIC SMALL LETTER SOFT SIGN:'FC:1100:ь +CYRILLIC SMALL LETTER E:'FD:1101:э +CYRILLIC SMALL LETTER YU:'FE:1102:ю +CYRILLIC SMALL LETTER YA:'FF:1103:я + + +LATIN SMALL LETTER Y WITH DIAERESIS:'00:00:ÿ +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +LATIN SMALL LETTER F WITH HOOK:'83:402:ƒ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +MODIFIER LETTER CIRCUMFLEX ACCENT:'88:710:ˆ +PER MILLE SIGN:'89:8240:‰ +LATIN CAPITAL LETTER S WITH CARON:'8A:352:Š +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +LATIN CAPITAL LIGATURE OE:'8C:338:Œ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +SMALL TILDE:'98:732:˜ +TRADE MARK SIGN:'99:8482:™ +LATIN SMALL LETTER S WITH CARON:'9A:353:š +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +LATIN SMALL LIGATURE OE:'9C:339:œ +LATIN CAPITAL LETTER Y WITH DIAERESIS:'9F:376:Ÿ +NO-BREAK SPACE:'A0:160:  +INVERTED EXCLAMATION MARK:'A1:161:¡ +CENT SIGN:'A2:162:¢ +POUND SIGN:'A3:163:£ +CURRENCY SIGN:'A4:164:¤ +YEN SIGN:'A5:165:¥ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +DIAERESIS:'A8:168:¨ +COPYRIGHT SIGN:'A9:169:© +FEMININE ORDINAL INDICATOR:'AA:170:ª +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +MACRON:'AF:175:¯ +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +SUPERSCRIPT TWO:'B2:178:² +SUPERSCRIPT THREE:'B3:179:³ +ACUTE ACCENT:'B4:180:´ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +CEDILLA:'B8:184:¸ +SUPERSCRIPT ONE:'B9:185:¹ +MASCULINE ORDINAL INDICATOR:'BA:186:º +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +VULGAR FRACTION ONE QUARTER:'BC:188:¼ +VULGAR FRACTION ONE HALF:'BD:189:½ +VULGAR FRACTION THREE QUARTERS:'BE:190:¾ +INVERTED QUESTION MARK:'BF:191:¿ +LATIN CAPITAL LETTER A WITH GRAVE:'C0:192:À +LATIN CAPITAL LETTER A WITH ACUTE:'C1:193:Á +LATIN CAPITAL LETTER A WITH CIRCUMFLEX:'C2:194: +LATIN CAPITAL LETTER A WITH TILDE:'C3:195:à +LATIN CAPITAL LETTER A WITH DIAERESIS:'C4:196:Ä +LATIN CAPITAL LETTER A WITH RING ABOVE:'C5:197:Å +LATIN CAPITAL LETTER AE:'C6:198:Æ +LATIN CAPITAL LETTER C WITH CEDILLA:'C7:199:Ç +LATIN CAPITAL LETTER E WITH GRAVE:'C8:200:È +LATIN CAPITAL LETTER E WITH ACUTE:'C9:201:É +LATIN CAPITAL LETTER E WITH CIRCUMFLEX:'CA:202:Ê +LATIN CAPITAL LETTER E WITH DIAERESIS:'CB:203:Ë +LATIN CAPITAL LETTER I WITH GRAVE:'CC:204:Ì +LATIN CAPITAL LETTER I WITH ACUTE:'CD:205:Í +LATIN CAPITAL LETTER I WITH CIRCUMFLEX:'CE:206:Î +LATIN CAPITAL LETTER I WITH DIAERESIS:'CF:207:Ï +LATIN CAPITAL LETTER ETH (ICELANDIC):'D0:208:Ð +LATIN CAPITAL LETTER N WITH TILDE:'D1:209:Ñ +LATIN CAPITAL LETTER O WITH GRAVE:'D2:210:Ò +LATIN CAPITAL LETTER O WITH ACUTE:'D3:211:Ó +LATIN CAPITAL LETTER O WITH CIRCUMFLEX:'D4:212:Ô +LATIN CAPITAL LETTER O WITH TILDE:'D5:213:Õ +LATIN CAPITAL LETTER O WITH DIAERESIS:'D6:214:Ö +MULTIPLICATION SIGN:'D7:215:× +LATIN CAPITAL LETTER O WITH STROKE:'D8:216:Ø +LATIN CAPITAL LETTER U WITH GRAVE:'D9:217:Ù +LATIN CAPITAL LETTER U WITH ACUTE:'DA:218:Ú +LATIN CAPITAL LETTER U WITH CIRCUMFLEX:'DB:219:Û +LATIN CAPITAL LETTER U WITH DIAERESIS:'DC:220:Ü +LATIN CAPITAL LETTER Y WITH ACUTE:'DD:221:Ý +LATIN CAPITAL LETTER THORN (ICELANDIC):'DE:222:Þ +LATIN SMALL LETTER SHARP S (GERMAN):'DF:223:ß +LATIN SMALL LETTER A WITH GRAVE:'E0:224:à +LATIN SMALL LETTER A WITH ACUTE:'E1:225:á +LATIN SMALL LETTER A WITH CIRCUMFLEX:'E2:226:â +LATIN SMALL LETTER A WITH TILDE:'E3:227:ã +LATIN SMALL LETTER A WITH DIAERESIS:'E4:228:ä +LATIN SMALL LETTER A WITH RING ABOVE:'E5:229:å +LATIN SMALL LETTER AE:'E6:230:æ +LATIN SMALL LETTER C WITH CEDILLA:'E7:231:ç +LATIN SMALL LETTER E WITH GRAVE:'E8:232:è +LATIN SMALL LETTER E WITH ACUTE:'E9:233:é +LATIN SMALL LETTER E WITH CIRCUMFLEX:'EA:234:ê +LATIN SMALL LETTER E WITH DIAERESIS:'EB:235:ë +LATIN SMALL LETTER I WITH GRAVE:'EC:236:ì +LATIN SMALL LETTER I WITH ACUTE:'ED:237:í +LATIN SMALL LETTER I WITH CIRCUMFLEX:'EE:238:î +LATIN SMALL LETTER I WITH DIAERESIS:'EF:239:ï +LATIN SMALL LETTER ETH (ICELANDIC):'F0:240:ð +LATIN SMALL LETTER N WITH TILDE:'F1:241:ñ +LATIN SMALL LETTER O WITH GRAVE:'F2:242:ò +LATIN SMALL LETTER O WITH ACUTE:'F3:243:ó +LATIN SMALL LETTER O WITH CIRCUMFLEX:'F4:244:ô +LATIN SMALL LETTER O WITH TILDE:'F5:245:õ +LATIN SMALL LETTER O WITH DIAERESIS:'F6:246:ö +DIVISION SIGN:'F7:247:÷ +LATIN SMALL LETTER O WITH STROKE:'F8:248:ø +LATIN SMALL LETTER U WITH GRAVE:'F9:249:ù +LATIN SMALL LETTER U WITH ACUTE:'FA:250:ú +LATIN SMALL LETTER U WITH CIRCUMFLEX:'FB:251:û +LATIN SMALL LETTER U WITH DIAERESIS:'FC:252:ü +LATIN SMALL LETTER Y WITH ACUTE:'FD:253:ý +LATIN SMALL LETTER THORN (ICELANDIC):'FE:254:þ +LATIN SMALL LETTER Y WITH DIAERESIS:'FF:255:ÿ +MY UNDEFINED SYMBOL:\'8D:141: +MY UNDEFINED SYMBOL:\'8E:142: +MY UNDEFINED SYMBOL:\'8F:143: +MY UNDEFINED SYMBOL:\'90:144: +MY UNDEFINED SYMBOL:\'9D:157: +MY UNDEFINED SYMBOL:\'9E:158: + + +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +LATIN SMALL LETTER F WITH HOOK:'83:402:ƒ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +PER MILLE SIGN:'89:8240:‰ +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +TRADE MARK SIGN:'99:8482:™ +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +NO-BREAK SPACE:'A0:160:  +GREEK DIALYTIKA TONOS:'A1:901:΅ +GREEK CAPITAL LETTER ALPHA WITH TONOS:'A2:902:Ά +POUND SIGN:'A3:163:£ +CURRENCY SIGN:'A4:164:¤ +YEN SIGN:'A5:165:¥ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +DIAERESIS:'A8:168:¨ +COPYRIGHT SIGN:'A9:169:© +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +HORIZONTAL BAR:'AF:8213:― +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +SUPERSCRIPT TWO:'B2:178:² +SUPERSCRIPT THREE:'B3:179:³ +GREEK TONOS:'B4:900:΄ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +GREEK CAPITAL LETTER EPSILON WITH TONOS:'B8:904:Έ +GREEK CAPITAL LETTER ETA WITH TONOS:'B9:905:Ή +GREEK CAPITAL LETTER IOTA WITH TONOS:'BA:906:Ί +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +GREEK CAPITAL LETTER OMICRON WITH TONOS:'BC:908:Ό +VULGAR FRACTION ONE HALF:'BD:189:½ +GREEK CAPITAL LETTER UPSILON WITH TONOS:'BE:910:Ύ +GREEK CAPITAL LETTER OMEGA WITH TONOS:'BF:911:Ώ +GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS:'C0:912:ΐ +GREEK CAPITAL LETTER ALPHA:'C1:913:Α +GREEK CAPITAL LETTER BETA:'C2:914:Β +GREEK CAPITAL LETTER GAMMA:'C3:915:Γ +GREEK CAPITAL LETTER DELTA:'C4:916:Δ +GREEK CAPITAL LETTER EPSILON:'C5:917:Ε +GREEK CAPITAL LETTER ZETA:'C6:918:Ζ +GREEK CAPITAL LETTER ETA:'C7:919:Η +GREEK CAPITAL LETTER THETA:'C8:920:Θ +GREEK CAPITAL LETTER IOTA:'C9:921:Ι +GREEK CAPITAL LETTER KAPPA:'CA:922:Κ +GREEK CAPITAL LETTER LAMDA:'CB:923:Λ +GREEK CAPITAL LETTER MU:'CC:924:Μ +GREEK CAPITAL LETTER NU:'CD:925:Ν +GREEK CAPITAL LETTER XI:'CE:926:Ξ +GREEK CAPITAL LETTER OMICRON:'CF:927:Ο +GREEK CAPITAL LETTER PI:'D0:928:Π +GREEK CAPITAL LETTER RHO:'D1:929:Ρ +GREEK CAPITAL LETTER SIGMA:'D3:931:Σ +GREEK CAPITAL LETTER TAU:'D4:932:Τ +GREEK CAPITAL LETTER UPSILON:'D5:933:Υ +GREEK CAPITAL LETTER PHI:'D6:934:Φ +GREEK CAPITAL LETTER CHI:'D7:935:Χ +GREEK CAPITAL LETTER PSI:'D8:936:Ψ +GREEK CAPITAL LETTER OMEGA:'D9:937:Ω +GREEK CAPITAL LETTER IOTA WITH DIALYTIKA:'DA:938:Ϊ +GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA:'DB:939:Ϋ +GREEK SMALL LETTER ALPHA WITH TONOS:'DC:940:ά +GREEK SMALL LETTER EPSILON WITH TONOS:'DD:941:έ +GREEK SMALL LETTER ETA WITH TONOS:'DE:942:ή +GREEK SMALL LETTER IOTA WITH TONOS:'DF:943:ί +GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS:'E0:944:ΰ +GREEK SMALL LETTER ALPHA:'E1:945:α +GREEK SMALL LETTER BETA:'E2:946:β +GREEK SMALL LETTER GAMMA:'E3:947:γ +GREEK SMALL LETTER DELTA:'E4:948:δ +GREEK SMALL LETTER EPSILON:'E5:949:ε +GREEK SMALL LETTER ZETA:'E6:950:ζ +GREEK SMALL LETTER ETA:'E7:951:η +GREEK SMALL LETTER THETA:'E8:952:θ +GREEK SMALL LETTER IOTA:'E9:953:ι +GREEK SMALL LETTER KAPPA:'EA:954:κ +GREEK SMALL LETTER LAMDA:'EB:955:λ +GREEK SMALL LETTER MU:'EC:956:μ +GREEK SMALL LETTER NU:'ED:957:ν +GREEK SMALL LETTER XI:'EE:958:ξ +GREEK SMALL LETTER OMICRON:'EF:959:ο +GREEK SMALL LETTER PI:'F0:960:π +GREEK SMALL LETTER RHO:'F1:961:ρ +GREEK SMALL LETTER FINAL SIGMA:'F2:962:ς +GREEK SMALL LETTER SIGMA:'F3:963:σ +GREEK SMALL LETTER TAU:'F4:964:τ +GREEK SMALL LETTER UPSILON:'F5:965:υ +GREEK SMALL LETTER PHI:'F6:966:φ +GREEK SMALL LETTER CHI:'F7:967:χ +GREEK SMALL LETTER PSI:'F8:968:ψ +GREEK SMALL LETTER OMEGA:'F9:969:ω +GREEK SMALL LETTER IOTA WITH DIALYTIKA:'FA:970:ϊ +GREEK SMALL LETTER UPSILON WITH DIALYTIKA:'FB:971:ϋ +GREEK SMALL LETTER OMICRON WITH TONOS:'FC:972:ό +GREEK SMALL LETTER UPSILON WITH TONOS:'FD:973:ύ +GREEK SMALL LETTER OMEGA WITH TONOS:'FE:974:ώ + + +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +LATIN SMALL LETTER F WITH HOOK:'83:402:ƒ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +MODIFIER LETTER CIRCUMFLEX ACCENT:'88:710:ˆ +PER MILLE SIGN:'89:8240:‰ +LATIN CAPITAL LETTER S WITH CARON:'8A:352:Š +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +LATIN CAPITAL LIGATURE OE:'8C:338:Œ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +SMALL TILDE:'98:732:˜ +TRADE MARK SIGN:'99:8482:™ +LATIN SMALL LETTER S WITH CARON:'9A:353:š +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +LATIN SMALL LIGATURE OE:'9C:339:œ +LATIN CAPITAL LETTER Y WITH DIAERESIS:'9F:376:Ÿ +NO-BREAK SPACE:'A0:160:  +INVERTED EXCLAMATION MARK:'A1:161:¡ +CENT SIGN:'A2:162:¢ +POUND SIGN:'A3:163:£ +CURRENCY SIGN:'A4:164:¤ +YEN SIGN:'A5:165:¥ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +DIAERESIS:'A8:168:¨ +COPYRIGHT SIGN:'A9:169:© +FEMININE ORDINAL INDICATOR:'AA:170:ª +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +MACRON:'AF:175:¯ +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +SUPERSCRIPT TWO:'B2:178:² +SUPERSCRIPT THREE:'B3:179:³ +ACUTE ACCENT:'B4:180:´ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +CEDILLA:'B8:184:¸ +SUPERSCRIPT ONE:'B9:185:¹ +MASCULINE ORDINAL INDICATOR:'BA:186:º +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +VULGAR FRACTION ONE QUARTER:'BC:188:¼ +VULGAR FRACTION ONE HALF:'BD:189:½ +VULGAR FRACTION THREE QUARTERS:'BE:190:¾ +INVERTED QUESTION MARK:'BF:191:¿ +LATIN CAPITAL LETTER A WITH GRAVE:'C0:192:À +LATIN CAPITAL LETTER A WITH ACUTE:'C1:193:Á +LATIN CAPITAL LETTER A WITH CIRCUMFLEX:'C2:194: +LATIN CAPITAL LETTER A WITH TILDE:'C3:195:à +LATIN CAPITAL LETTER A WITH DIAERESIS:'C4:196:Ä +LATIN CAPITAL LETTER A WITH RING ABOVE:'C5:197:Å +LATIN CAPITAL LETTER AE:'C6:198:Æ +LATIN CAPITAL LETTER C WITH CEDILLA:'C7:199:Ç +LATIN CAPITAL LETTER E WITH GRAVE:'C8:200:È +LATIN CAPITAL LETTER E WITH ACUTE:'C9:201:É +LATIN CAPITAL LETTER E WITH CIRCUMFLEX:'CA:202:Ê +LATIN CAPITAL LETTER E WITH DIAERESIS:'CB:203:Ë +LATIN CAPITAL LETTER I WITH GRAVE:'CC:204:Ì +LATIN CAPITAL LETTER I WITH ACUTE:'CD:205:Í +LATIN CAPITAL LETTER I WITH CIRCUMFLEX:'CE:206:Î +LATIN CAPITAL LETTER I WITH DIAERESIS:'CF:207:Ï +LATIN CAPITAL LETTER G WITH BREVE:'D0:286:Ğ +LATIN CAPITAL LETTER N WITH TILDE:'D1:209:Ñ +LATIN CAPITAL LETTER O WITH GRAVE:'D2:210:Ò +LATIN CAPITAL LETTER O WITH ACUTE:'D3:211:Ó +LATIN CAPITAL LETTER O WITH CIRCUMFLEX:'D4:212:Ô +LATIN CAPITAL LETTER O WITH TILDE:'D5:213:Õ +LATIN CAPITAL LETTER O WITH DIAERESIS:'D6:214:Ö +MULTIPLICATION SIGN:'D7:215:× +LATIN CAPITAL LETTER O WITH STROKE:'D8:216:Ø +LATIN CAPITAL LETTER U WITH GRAVE:'D9:217:Ù +LATIN CAPITAL LETTER U WITH ACUTE:'DA:218:Ú +LATIN CAPITAL LETTER U WITH CIRCUMFLEX:'DB:219:Û +LATIN CAPITAL LETTER U WITH DIAERESIS:'DC:220:Ü +LATIN CAPITAL LETTER I WITH DOT ABOVE:'DD:304:İ +LATIN CAPITAL LETTER S WITH CEDILLA:'DE:350:Ş +LATIN SMALL LETTER SHARP S (GERMAN):'DF:223:ß +LATIN SMALL LETTER A WITH GRAVE:'E0:224:à +LATIN SMALL LETTER A WITH ACUTE:'E1:225:á +LATIN SMALL LETTER A WITH CIRCUMFLEX:'E2:226:â +LATIN SMALL LETTER A WITH TILDE:'E3:227:ã +LATIN SMALL LETTER A WITH DIAERESIS:'E4:228:ä +LATIN SMALL LETTER A WITH RING ABOVE:'E5:229:å +LATIN SMALL LETTER AE:'E6:230:æ +LATIN SMALL LETTER C WITH CEDILLA:'E7:231:ç +LATIN SMALL LETTER E WITH GRAVE:'E8:232:è +LATIN SMALL LETTER E WITH ACUTE:'E9:233:é +LATIN SMALL LETTER E WITH OGONEK:'EA:281:ę +LATIN SMALL LETTER E WITH DIAERESIS:'EB:235:ë +LATIN SMALL LETTER E WITH DOT ABOVE:'EC:279:ė +LATIN SMALL LETTER I WITH ACUTE:'ED:237:í +LATIN SMALL LETTER I WITH CIRCUMFLEX:'EE:238:î +LATIN SMALL LETTER I WITH MACRON:'EF:299:ī +LATIN SMALL LETTER G WITH BREVE:'F0:287:ğ +LATIN SMALL LETTER N WITH TILDE:'F1:241:ñ +LATIN SMALL LETTER O WITH GRAVE:'F2:242:ò +LATIN SMALL LETTER O WITH ACUTE:'F3:243:ó +LATIN SMALL LETTER O WITH CIRCUMFLEX:'F4:244:ô +LATIN SMALL LETTER O WITH TILDE:'F5:245:õ +LATIN SMALL LETTER O WITH DIAERESIS:'F6:246:ö +DIVISION SIGN:'F7:247:÷ +LATIN SMALL LETTER O WITH STROKE:'F8:248:ø +LATIN SMALL LETTER U WITH GRAVE:'F9:249:ù +LATIN SMALL LETTER U WITH ACUTE:'FA:250:ú +LATIN SMALL LETTER U WITH CIRCUMFLEX:'FB:251:û +LATIN SMALL LETTER U WITH DIAERESIS:'FC:252:ü +LATIN SMALL LETTER DOTLESS I:'FD:305:ı +LATIN SMALL LETTER S WITH CEDILLA:'FE:351:ş +LATIN SMALL LETTER Y WITH DIAERESIS:'FF:255:ÿ + + +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +LATIN SMALL LETTER F WITH HOOK:'83:402:ƒ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +PER MILLE SIGN:'89:8240:‰ +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +TRADE MARK SIGN:'99:8482:™ +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +NO-BREAK SPACE:'A0:160:  +CENT SIGN:'A2:162:¢ +POUND SIGN:'A3:163:£ +CURRENCY SIGN:'A4:164:¤ +YEN SIGN:'A5:165:¥ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +DIAERESIS:'A8:168:¨ +COPYRIGHT SIGN:'A9:169:© +MULTIPLICATION SIGN:'AA:215:× +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +OVERLINE:'AF:8254:‾ +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +SUPERSCRIPT TWO:'B2:178:² +SUPERSCRIPT THREE:'B3:179:³ +ACUTE ACCENT:'B4:180:´ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +CEDILLA:'B8:184:¸ +SUPERSCRIPT ONE:'B9:185:¹ +DIVISION SIGN:'BA:247:÷ +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +VULGAR FRACTION ONE QUARTER:'BC:188:¼ +VULGAR FRACTION ONE HALF:'BD:189:½ +VULGAR FRACTION THREE QUARTERS:'BE:190:¾ +DOUBLE LOW LINE:'DF:8215:‗ +HEBREW LETTER ALEF:'E0:1488:א +HEBREW LETTER BET:'E1:1489:ב +HEBREW LETTER GIMEL:'E2:1490:ג +HEBREW LETTER DALET:'E3:1491:ד +HEBREW LETTER HE:'E4:1492:ה +HEBREW LETTER VAV:'E5:1493:ו +HEBREW LETTER ZAYIN:'E6:1494:ז +HEBREW LETTER HET:'E7:1495:ח +HEBREW LETTER TET:'E8:1496:ט +HEBREW LETTER YOD:'E9:1497:י +HEBREW LETTER FINAL KAF:'EA:1498:ך +HEBREW LETTER KAF:'EB:1499:כ +HEBREW LETTER LAMED:'EC:1500:ל +HEBREW LETTER FINAL MEM:'ED:1501:ם +HEBREW LETTER MEM:'EE:1502:מ +HEBREW LETTER FINAL NUN:'EF:1503:ן +HEBREW LETTER NUN:'F0:1504:נ +HEBREW LETTER SAMEKH:'F1:1505:ס +HEBREW LETTER AYIN:'F2:1506:ע +HEBREW LETTER FINAL PE:'F3:1507:ף +HEBREW LETTER PE:'F4:1508:פ +HEBREW LETTER FINAL TSADI:'F5:1509:ץ +HEBREW LETTER TSADI:'F6:1510:צ +HEBREW LETTER QOF:'F7:1511:ק +HEBREW LETTER RESH:'F8:1512:ר +HEBREW LETTER SHIN:'F9:1513:ש +HEBREW LETTER TAV:'FA:1514:ת +LEFT-TO-RIGHT MARK:'FD:8206:‎ +RIGHT-TO-LEFT MARK:'FE:8207:‏ +NUL:'00:0:� + + +ARABIC COMMA:'80:1548:، +ARABIC-INDIC DIGIT ZERO:'81:1632:٠ +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +ARABIC-INDIC DIGIT ONE:'83:1633:١ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +ARABIC-INDIC DIGIT TWO:'88:1634:٢ +ARABIC-INDIC DIGIT THREE:'89:1635:٣ +ARABIC-INDIC DIGIT FOUR:'8A:1636:٤ +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +ARABIC-INDIC DIGIT FIVE:'8C:1637:٥ +ARABIC-INDIC DIGIT SIX:'8D:1638:٦ +ARABIC-INDIC DIGIT SEVEN:'8E:1639:٧ +ARABIC-INDIC DIGIT EIGHT:'8F:1640:٨ +ARABIC-INDIC DIGIT NINE:'90:1641:٩ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +ARABIC SEMICOLON:'98:1563:؛ +TRADE MARK SIGN:'99:8482:™ +ARABIC QUESTION MARK:'9A:1567:؟ +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +ARABIC LETTER HAMZA:'9C:1569:ء +ARABIC LETTER ALEF WITH MADDA ABOVE:'9D:1570:آ +ARABIC LETTER ALEF WITH HAMZA ABOVE:'9E:1571:أ +LATIN CAPITAL LETTER Y WITH DIAERESIS:'9F:376:Ÿ +NO-BREAK SPACE:'A0:160:  +ARABIC LETTER WAW WITH HAMZA ABOVE:'A1:1572:ؤ +ARABIC LETTER ALEF WITH HAMZA BELOW:'A2:1573:إ +POUND SIGN:'A3:163:£ +CURRENCY SIGN:'A4:164:¤ +ARABIC LETTER YEH WITH HAMZA ABOVE:'A5:1574:ئ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +ARABIC LETTER ALEF:'A8:1575:ا +COPYRIGHT SIGN:'A9:169:© +ARABIC LETTER BEH:'AA:1576:ب +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +ARABIC LETTER PEH:'AF:1662:پ +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +ARABIC LETTER TEH MARBUTA:'B2:1577:ة +ARABIC LETTER TEH:'B3:1578:ت +ARABIC LETTER THEH:'B4:1579:ث +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +ARABIC LETTER JEEM:'B8:1580:ج +ARABIC LETTER TCHEH:'B9:1670:چ +ARABIC LETTER HAH:'BA:1581:ح +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +ARABIC LETTER KHAH:'BC:1582:خ +ARABIC LETTER DAL:'BD:1583:د +ARABIC LETTER THAL:'BE:1584:ذ +ARABIC LETTER REH:'BF:1585:ر +LATIN CAPITAL LETTER A WITH GRAVE:'C0:192:À +ARABIC LETTER ZAIN:'C1:1586:ز +LATIN CAPITAL LETTER A WITH CIRCUMFLEX:'C2:194: +ARABIC LETTER JEH:'C3:1688:ژ +ARABIC LETTER SEEN:'C4:1587:س +ARABIC LETTER SHEEN:'C5:1588:ش +ARABIC LETTER SAD:'C6:1589:ص +LATIN CAPITAL LETTER C WITH CEDILLA:'C7:199:Ç +LATIN CAPITAL LETTER E WITH GRAVE:'C8:200:È +LATIN CAPITAL LETTER E WITH ACUTE:'C9:201:É +LATIN CAPITAL LETTER E WITH CIRCUMFLEX:'CA:202:Ê +LATIN CAPITAL LETTER E WITH DIAERESIS:'CB:203:Ë +ARABIC LETTER DAD:'CC:1590:ض +ARABIC LETTER TAH:'CD:1591:ط +LATIN CAPITAL LETTER I WITH CIRCUMFLEX:'CE:206:Î +LATIN CAPITAL LETTER I WITH DIAERESIS:'CF:207:Ï +BOPOMOFO LETTER ZH:'D0:12563:ㄓ +ARABIC LETTER AIN:'D1:1593:ع +ARABIC LETTER GHAIN:'D2:1594:غ +ARABIC TATWEEL:'D3:1600:ـ +LATIN CAPITAL LETTER O WITH CIRCUMFLEX:'D4:212:Ô +ARABIC LETTER FEH:'D5:1601:ف +ARABIC LETTER QAF:'D6:1602:ق +MULTIPLICATION SIGN:'D7:215:× +ARABIC LETTER KAF:'D8:1603:ك +LATIN CAPITAL LETTER U WITH GRAVE:'D9:217:Ù +ARABIC LETTER GAF:'DA:1711:گ +LATIN CAPITAL LETTER U WITH CIRCUMFLEX:'DB:219:Û +LATIN CAPITAL LETTER U WITH DIAERESIS:'DC:220:Ü +ARABIC LETTER LAM:'DD:1604:ل +ARABIC LETTER MEEM:'DE:1605:م +ARABIC LETTER NOON:'DF:1606:ن +LATIN SMALL LETTER A WITH GRAVE:'E0:224:à +ARABIC LETTER HEH:'E1:1607:ه +LATIN SMALL LETTER A WITH CIRCUMFLEX:'E2:226:â +ARABIC LETTER HAH WITH HAMZA ABOVE:'E3:1665:ځ +ARABIC LETTER WAW:'E4:1608:و +ARABIC LETTER ALEF MAKSURA:'E5:1609:ى +ARABIC LETTER YEH:'E6:1610:ي +LATIN SMALL LETTER C WITH CEDILLA:'E7:231:ç +LATIN SMALL LETTER E WITH GRAVE:'E8:232:è +LATIN SMALL LETTER E WITH ACUTE:'E9:233:é +LATIN SMALL LETTER E WITH CIRCUMFLEX:'EA:234:ê +LATIN SMALL LETTER E WITH DIAERESIS:'EB:235:ë +ARABIC FATHATAN:'EC:1611:ً +ARABIC DAMMATAN:'ED:1612:ٌ +LATIN SMALL LETTER I WITH CIRCUMFLEX:'EE:238:î +LATIN SMALL LETTER I WITH DIAERESIS:'EF:239:ï +ARABIC KASRATAN:'F0:1613:ٍ +ARABIC FATHA:'F1:1614:َ +ARABIC DAMMA:'F2:1615:ُ +ARABIC KASRA:'F3:1616:ِ +LATIN SMALL LETTER O WITH CIRCUMFLEX:'F4:244:ô +ARABIC SHADDA:'F5:1617:ّ +ARABIC SUKUN:'F6:1618:ْ +DIVISION SIGN:'F7:247:÷ +LATIN SMALL LETTER U WITH GRAVE:'F9:249:ù +LATIN SMALL LETTER U WITH CIRCUMFLEX:'FB:251:û +LATIN SMALL LETTER U WITH DIAERESIS:'FC:252:ü +LEFT-TO-RIGHT MARK:'FD:8206:‎ +RIGHT-TO-LEFT MARK:'FE:8207:‏ +LATIN SMALL LETTER Y WITH DIAERESIS:'FF:255:ÿ + + +SINGLE LOW-9 QUOTATION MARK:'82:8218:‚ +DOUBLE LOW-9 QUOTATION MARK:'84:8222:„ +HORIZONTAL ELLIPSIS:'85:8230:… +DAGGER:'86:8224:† +DOUBLE DAGGER:'87:8225:‡ +PER MILLE SIGN:'89:8240:‰ +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'8B:8249:‹ +LEFT SINGLE QUOTATION MARK:'91:8216:‘ +RIGHT SINGLE QUOTATION MARK:'92:8217:’ +LEFT DOUBLE QUOTATION MARK:'93:8220:“ +RIGHT DOUBLE QUOTATION MARK:'94:8221:” +BULLET:'95:8226:• +EN DASH:'96:8211:– +EM DASH:'97:8212:— +TRADE MARK SIGN:'99:8482:™ +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'9B:8250:› +NO-BREAK SPACE:'A0:160:  +CENT SIGN:'A2:162:¢ +POUND SIGN:'A3:163:£ +CURRENCY SIGN:'A4:164:¤ +BROKEN BAR:'A6:166:¦ +SECTION SIGN:'A7:167:§ +LATIN CAPITAL LETTER O WITH STROKE:'A8:216:Ø +COPYRIGHT SIGN:'A9:169:© +LATIN CAPITAL LETTER R WITH CEDILLA:'AA:342:Ŗ +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'AB:171:« +NOT SIGN:'AC:172:¬ +SOFT HYPHEN:'AD:173:­ +REGISTERED SIGN:'AE:174:® +LATIN CAPITAL LETTER AE:'AF:198:Æ +DEGREE SIGN:'B0:176:° +PLUS-MINUS SIGN:'B1:177:± +SUPERSCRIPT TWO:'B2:178:² +SUPERSCRIPT THREE:'B3:179:³ +MICRO SIGN:'B5:181:µ +PILCROW SIGN:'B6:182:¶ +MIDDLE DOT:'B7:183:· +LATIN SMALL LETTER O WITH STROKE:'B8:248:ø +SUPERSCRIPT ONE:'B9:185:¹ +LATIN SMALL LETTER R WITH CEDILLA:'BA:343:ŗ +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'BB:187:» +VULGAR FRACTION ONE QUARTER:'BC:188:¼ +VULGAR FRACTION ONE HALF:'BD:189:½ +VULGAR FRACTION THREE QUARTERS:'BE:190:¾ +LATIN SMALL LETTER AE:'BF:230:æ +LATIN CAPITAL LETTER A WITH OGONEK:'C0:260:Ą +LATIN CAPITAL LETTER I WITH OGONEK:'C1:302:Į +LATIN CAPITAL LETTER A WITH MACRON:'C2:256:Ā +LATIN CAPITAL LETTER C WITH ACUTE:'C3:262:Ć +LATIN CAPITAL LETTER A WITH DIAERESIS:'C4:196:Ä +LATIN CAPITAL LETTER A WITH RING ABOVE:'C5:197:Å +LATIN CAPITAL LETTER E WITH OGONEK:'C6:280:Ę +LATIN CAPITAL LETTER E WITH MACRON:'C7:274:Ē +LATIN CAPITAL LETTER C WITH CARON:'C8:268:Č +LATIN CAPITAL LETTER E WITH ACUTE:'C9:201:É +LATIN CAPITAL LETTER Z WITH ACUTE:'CA:377:Ź +LATIN CAPITAL LETTER E WITH DOT ABOVE:'CB:278:Ė +LATIN CAPITAL LETTER G WITH CEDILLA:'CC:290:Ģ +LATIN CAPITAL LETTER K WITH CEDILLA:'CD:310:Ķ +LATIN CAPITAL LETTER I WITH MACRON:'CE:298:Ī +LATIN CAPITAL LETTER L WITH CEDILLA:'CF:315:Ļ +LATIN CAPITAL LETTER S WITH CARON:'D0:352:Š +LATIN CAPITAL LETTER N WITH ACUTE:'D1:323:Ń +LATIN CAPITAL LETTER N WITH CEDILLA:'D2:325:Ņ +LATIN CAPITAL LETTER O WITH ACUTE:'D3:211:Ó +LATIN CAPITAL LETTER O WITH MACRON:'D4:332:Ō +LATIN CAPITAL LETTER O WITH TILDE:'D5:213:Õ +LATIN CAPITAL LETTER O WITH DIAERESIS:'D6:214:Ö +MULTIPLICATION SIGN:'D7:215:× +LATIN CAPITAL LETTER U WITH OGONEK:'D8:370:Ų +LATIN CAPITAL LETTER L WITH STROKE:'D9:321:Ł +LATIN CAPITAL LETTER S WITH ACUTE:'DA:346:Ś +LATIN CAPITAL LETTER U WITH MACRON:'DB:362:Ū +LATIN CAPITAL LETTER U WITH DIAERESIS:'DC:220:Ü +LATIN CAPITAL LETTER Z WITH DOT ABOVE:'DD:379:Ż +LATIN CAPITAL LETTER Z WITH CARON:'DE:381:Ž +LATIN SMALL LETTER SHARP S (GERMAN):'DF:223:ß +LATIN SMALL LETTER A WITH OGONEK:'E0:261:ą +LATIN SMALL LETTER I WITH OGONEK:'E1:303:į +LATIN SMALL LETTER A WITH MACRON:'E2:257:ā +LATIN SMALL LETTER C WITH ACUTE:'E3:263:ć +LATIN SMALL LETTER A WITH DIAERESIS:'E4:228:ä +LATIN SMALL LETTER A WITH RING ABOVE:'E5:229:å +LATIN SMALL LETTER E WITH OGONEK:'E6:281:ę +LATIN SMALL LETTER E WITH MACRON:'E7:275:ē +LATIN SMALL LETTER C WITH CARON:'E8:269:č +LATIN SMALL LETTER E WITH ACUTE:'E9:233:é +LATIN SMALL LETTER Z WITH ACUTE:'EA:378:ź +LATIN SMALL LETTER E WITH DOT ABOVE:'EB:279:ė +LATIN SMALL LETTER G WITH CEDILLA:'EC:291:ģ +LATIN SMALL LETTER K WITH CEDILLA:'ED:311:ķ +LATIN SMALL LETTER I WITH MACRON:'EE:299:ī +LATIN SMALL LETTER L WITH CEDILLA:'EF:316:ļ +LATIN SMALL LETTER S WITH CARON:'F0:353:š +LATIN SMALL LETTER N WITH ACUTE:'F1:324:ń +LATIN SMALL LETTER N WITH CEDILLA:'F2:326:ņ +LATIN SMALL LETTER O WITH ACUTE:'F3:243:ó +LATIN SMALL LETTER O WITH MACRON:'F4:333:ō +LATIN SMALL LETTER O WITH TILDE:'F5:245:õ +LATIN SMALL LETTER O WITH DIAERESIS:'F6:246:ö +DIVISION SIGN:'F7:247:÷ +LATIN SMALL LETTER U WITH OGONEK:'F8:371:ų +LATIN SMALL LETTER L WITH STROKE:'F9:322:ł +LATIN SMALL LETTER S WITH ACUTE:'FA:347:ś +LATIN SMALL LETTER U WITH MACRON:'FB:363:ū +LATIN SMALL LETTER U WITH DIAERESIS:'FC:252:ü +LATIN SMALL LETTER Z WITH DOT ABOVE:'FD:380:ż +LATIN SMALL LETTER Z WITH CARON:'FE:382:ž + + +LATIN CAPITAL LETTER A WITH DIAERESIS:'80:196:Ä +LATIN CAPITAL LETTER A WITH RING ABOVE:'81:197:Å +LATIN CAPITAL LETTER C WITH CEDILLA:'82:199:Ç +LATIN CAPITAL LETTER E WITH ACUTE:'83:201:É +LATIN CAPITAL LETTER N WITH TILDE:'84:209:Ñ +LATIN CAPITAL LETTER O WITH DIAERESIS:'85:214:Ö +LATIN CAPITAL LETTER U WITH DIAERESIS:'86:220:Ü +LATIN SMALL LETTER A WITH ACUTE:'87:225:á +LATIN SMALL LETTER A WITH GRAVE:'88:224:à +LATIN SMALL LETTER A WITH CIRCUMFLEX:'89:226:â +LATIN SMALL LETTER A WITH DIAERESIS:'8A:228:ä +LATIN SMALL LETTER A WITH TILDE:'8B:227:ã +LATIN SMALL LETTER A WITH RING ABOVE:'8C:229:å +LATIN SMALL LETTER C WITH CEDILLA:'8D:231:ç +LATIN SMALL LETTER E WITH ACUTE:'8E:233:é +LATIN SMALL LETTER E WITH GRAVE:'8F:232:è +LATIN SMALL LETTER E WITH CIRCUMFLEX:'90:234:ê +LATIN SMALL LETTER E WITH DIAERESIS:'91:235:ë +LATIN SMALL LETTER I WITH ACUTE:'92:237:í +LATIN SMALL LETTER I WITH GRAVE:'93:236:ì +LATIN SMALL LETTER I WITH CIRCUMFLEX:'94:238:î +LATIN SMALL LETTER I WITH DIAERESIS:'95:239:ï +LATIN SMALL LETTER N WITH TILDE:'96:241:ñ +LATIN SMALL LETTER O WITH ACUTE:'97:243:ó +LATIN SMALL LETTER O WITH GRAVE:'98:242:ò +LATIN SMALL LETTER O WITH CIRCUMFLEX:'99:244:ô +LATIN SMALL LETTER O WITH DIAERESIS:'9A:246:ö +LATIN SMALL LETTER O WITH TILDE:'9B:245:õ +LATIN SMALL LETTER U WITH ACUTE:'9C:250:ú +LATIN SMALL LETTER U WITH GRAVE:'9D:249:ù +LATIN SMALL LETTER U WITH CIRCUMFLEX:'9E:251:û +LATIN SMALL LETTER U WITH DIAERESIS:'9F:252:ü +DAGGER:'A0:8224:† +DEGREE SIGN:'A1:176:° +CENT SIGN:'A2:162:¢ +POUND SIGN:'A3:163:£ +SECTION SIGN:'A4:167:§ +BULLET:'A5:8226:• +PILCROW SIGN:'A6:182:¶ +LATIN SMALL LETTER SHARP S:'A7:223:ß +REGISTERED SIGN:'A8:174:® +COPYRIGHT SIGN:'A9:169:© +TRADE MARK SIGN:'AA:8482:™ +ACUTE ACCENT:'AB:180:´ +DIAERESIS:'AC:168:¨ +NOT EQUAL TO:'AD:8800:≠ +LATIN CAPITAL LETTER AE:'AE:198:Æ +LATIN CAPITAL LETTER O WITH STROKE:'AF:216:Ø +INFINITY:'B0:8734:∞ +PLUS-MINUS SIGN:'B1:177:± +LESS-THAN OR EQUAL TO:'B2:8804:≤ +GREATER-THAN OR EQUAL TO:'B3:8805:≥ +YEN SIGN:'B4:165:¥ +MICRO SIGN:'B5:181:µ +PARTIAL DIFFERENTIAL:'B6:8706:∂ +BULLET:'B7:8226:• +N-ARY PRODUCT:'B8:8719:∏ +GREEK SMALL LETTER PI:'B9:960:π +INTEGRAL:'BA:8747:∫ +FEMININE ORDINAL INDICATOR:'BB:170:ª +MASCULINE ORDINAL INDICATOR:'BC:186:º +GREEK CAPITAL LETTER OMEGA:'BD:937:Ω +LATIN SMALL LETTER AE:'BE:230:æ +LATIN SMALL LETTER O WITH STROKE:'BF:248:ø +INVERTED QUESTION MARK:'C0:191:¿ +INVERTED EXCLAMATION MARK:'C1:161:¡ +NOT SIGN:'C2:172:¬ +SQUARE ROOT:'C3:8730:√ +LATIN SMALL LETTER F WITH HOOK:'C4:402:ƒ +ALMOST EQUAL TO:'C5:8776:≈ +INCREMENT:'C6:8710:∆ +LEFT-POINTING DOUBLE ANGLE QUOTATION MARK:'C7:171:« +RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK:'C8:187:» +HORIZONTAL ELLIPSIS:'C9:8230:… +NO-BREAK SPACE:'CA:160:  +LATIN CAPITAL LETTER A WITH GRAVE:'CB:192:À +LATIN CAPITAL LETTER A WITH TILDE:'CC:195:à +LATIN CAPITAL LETTER O WITH TILDE:'CD:213:Õ +LATIN CAPITAL LIGATURE OE:'CE:338:Œ +LATIN SMALL LIGATURE OE:'CF:339:œ +EN DASH:'D0:8211:– +EM DASH:'D1:8212:— +LEFT DOUBLE QUOTATION MARK:'D2:8220:“ +RIGHT DOUBLE QUOTATION MARK:'D3:8221:” +LEFT SINGLE QUOTATION MARK:'D4:8216:‘ +RIGHT SINGLE QUOTATION MARK:'D5:8217:’ +DIVISION SIGN:'D6:247:÷ +LOZENGE:'D7:9674:◊ +LATIN SMALL LETTER Y WITH DIAERESIS:'D8:255:ÿ +LATIN CAPITAL LETTER Y WITH DIAERESIS:'D9:376:Ÿ +FRACTION SLASH:'DA:8260:⁄ +EURO SIGN:'DB:8364:€ +SINGLE LEFT-POINTING ANGLE QUOTATION MARK:'DC:8249:‹ +SINGLE RIGHT-POINTING ANGLE QUOTATION MARK:'DD:8250:› +LATIN SMALL LIGATURE FI:'DE:64257:fi +LATIN SMALL LIGATURE FL:'DF:64258:fl +DOUBLE DAGGER:'E0:8225:‡ +MIDDLE DOT:'E1:183:· +SINGLE LOW-9 QUOTATION MARK:'E2:8218:‚ +DOUBLE LOW-9 QUOTATION MARK:'E3:8222:„ +PER MILLE SIGN:'E4:8240:‰ +LATIN CAPITAL LETTER A WITH CIRCUMFLEX:'E5:194: +LATIN CAPITAL LETTER E WITH CIRCUMFLEX:'E6:202:Ê +LATIN CAPITAL LETTER A WITH ACUTE:'E7:193:Á +LATIN CAPITAL LETTER E WITH DIAERESIS:'E8:203:Ë +LATIN CAPITAL LETTER E WITH GRAVE:'E9:200:È +LATIN CAPITAL LETTER I WITH ACUTE:'EA:205:Í +LATIN CAPITAL LETTER I WITH CIRCUMFLEX:'EB:206:Î +LATIN CAPITAL LETTER I WITH DIAERESIS:'EC:207:Ï +LATIN CAPITAL LETTER I WITH GRAVE:'ED:204:Ì +LATIN CAPITAL LETTER O WITH ACUTE:'EE:211:Ó +LATIN CAPITAL LETTER O WITH CIRCUMFLEX:'EF:212:Ô +APPLE LOGO:'F0:63743: +LATIN CAPITAL LETTER O WITH GRAVE:'F1:210:Ò +LATIN CAPITAL LETTER U WITH ACUTE:'F2:218:Ú +LATIN CAPITAL LETTER U WITH CIRCUMFLEX:'F3:219:Û +LATIN CAPITAL LETTER U WITH GRAVE:'F4:217:Ù +LATIN SMALL LETTER DOTLESS I:'F5:305:ı +MODIFIER LETTER CIRCUMFLEX ACCENT:'F6:710:ˆ +SMALL TILDE:'F7:732:˜ +MACRON:'F8:175:¯ +BREVE:'F9:728:˘ +DOT ABOVE:'FA:729:˙ +RING ABOVE:'FB:730:˚ +CEDILLA:'FC:184:¸ +DOUBLE ACUTE ACCENT:'FD:733:˝ +OGONEK:'FE:731:˛ +CARON:'FF:711:ˇ + + +LATIN SMALL LETTER A:'61:97:'41 +LATIN SMALL LETTER B:'62:98:'42 +LATIN SMALL LETTER C:'63:99:'43 +LATIN SMALL LETTER D:'64:100:'44 +LATIN SMALL LETTER E:'65:101:'45 +LATIN SMALL LETTER F:'66:102:'46 +LATIN SMALL LETTER G:'67:103:'47 +LATIN SMALL LETTER H:'68:104:48 +LATIN SMALL LETTER I:'69:105:'49 +LATIN SMALL LETTER J:'6A:106:'4a +LATIN SMALL LETTER K:'6B:107:'4b +LATIN SMALL LETTER L:'6C:108:'4c +LATIN SMALL LETTER M:'6D:109:'4d +LATIN SMALL LETTER N:'6E:110:'4e +LATIN SMALL LETTER O:'6F:111:'4f +LATIN SMALL LETTER P:'70:112:'50 +LATIN SMALL LETTER Q:'71:113:'51 +LATIN SMALL LETTER R:'72:114:'52 +LATIN SMALL LETTER S:'73:115:'53 +LATIN SMALL LETTER T:'74:116:'54 +LATIN SMALL LETTER U:'75:117:'55 +LATIN SMALL LETTER V:'76:118:'56 +LATIN SMALL LETTER W:'77:119:'57 +LATIN SMALL LETTER X:'78:120:'58 +LATIN SMALL LETTER Y:'79:121:'59 +LATIN SMALL LETTER Z:'7A:122:'5a +NO UNICODE VALUE:'87:135:\'E7 +NO UNICODE VALUE:'8E:142:\'83 +NO UNICODE VALUE:'EA:234:\'92 +NO UNICODE VALUE:'97:151:\'EE +NO UNICODE VALUE:'9C:156:\'F2 +NO UNICODE VALUE:'88:136:\'CB +NO UNICODE VALUE:'8F:143:\'E9 +NO UNICODE VALUE:'93:147:\'ED +NO UNICODE VALUE:'98:152:\'F1 +NO UNICODE VALUE:'9D:157:\'F4 +NO UNICODE VALUE:'89:137:\'D5 +NO UNICODE VALUE:'90:144:\'E6 +NO UNICODE VALUE:'94:148:\'EB +NO UNICODE VALUE:'99:153:\'EF +NO UNICODE VALUE:'9E:158:\'F3 +NO UNICODE VALUE:'BF:191:\'AF +NO UNICODE VALUE:'96:150:\'84 +NO UNICODE VALUE:'9B:155:\'CD +NO UNICODE VALUE:'8B:139:\'CC +NO UNICODE VALUE:'8A:138:\'80 +NO UNICODE VALUE:'91:145:\'E8 +NO UNICODE VALUE:'95:149:\'EC +NO UNICODE VALUE:'9A:154:\'85 +NO UNICODE VALUE:'9F:159:\'86 +NO UNICODE VALUE:'8D:141:\'82 +NO UNICODE VALUE:'8C:140:\'81 + + +LATIN SMALL LETTER A:a:97:A +LATIN SMALL LETTER B:b:98:B +LATIN SMALL LETTER C:c:99:C +LATIN SMALL LETTER D:d:100:D +LATIN SMALL LETTER E:e:101:E +LATIN SMALL LETTER F:f:102:F +LATIN SMALL LETTER G:g:103:G +LATIN SMALL LETTER H:h:104:H +LATIN SMALL LETTER I:i:105:I +LATIN SMALL LETTER J:j:106:J +LATIN SMALL LETTER K:k:107:K +LATIN SMALL LETTER L:l:108:L +LATIN SMALL LETTER M:m:109:M +LATIN SMALL LETTER N:n:110:N +LATIN SMALL LETTER O:o:111:O +LATIN SMALL LETTER P:p:112:P +LATIN SMALL LETTER Q:q:113:Q +LATIN SMALL LETTER R:r:114:R +LATIN SMALL LETTER S:s:115:S +LATIN SMALL LETTER T:t:116:T +LATIN SMALL LETTER U:u:117:U +LATIN SMALL LETTER V:v:118:V +LATIN SMALL LETTER W:w:119:W +LATIN SMALL LETTER X:x:120:X +LATIN SMALL LETTER Y:y:121:Y +LATIN SMALL LETTER Z:z:122:Z + + +MY UNDEFINED SYMBOL:'C3:00: +SPACE:'20:32: +EXCLAMATION MARK:'21:33:! +FOR ALL:'22:8704:∀ +NUMBER SIGN:'23:35:# +THERE EXISTS:'24:8707:∃ +PERCENTAGE SIGN:'25:37:% +AMPERSAND:'26:38:& +CONTAINS AS A MEMBER:'27:8715:∋ +LEFT PARENTHESIS:'28:40:( +RIGHT PERENTHESIS:'29:41:) +ASTERISK OPERATOR:'2A:8727:∗ +PLUS SIGN:'2B:43:+ +COMMA:'2C:44:, +MINUS SIGN:'2D:8722:− +FULL STOP:'2E:46:. +DIVISION SLASH:'2F:8725:∕ +DIGIT ZERO:'30:48:0 +DIGIT ONE:'31:49:1 +DIGIT TWO:'32:50:2 +DIGIT THREE:'33:51:3 +DIGIT FOUR:'34:52:4 +DIGIT FIVE:'35:53:5 +DIGIT SIX:'36:54:6 +DIGIT SEVEN:'37:55:7 +DIGIT EIGHT:'38:56:8 +DIGIT NINE:'39:57:9 +RATIO:'3A:8758:∶ +SEMICOLON:'3B:59:; +LESS-THAN SIGN:'3C:60:< +EQUALS SIGN TO:'3D:61:= +GREATER-THAN SIGN:'3E:62:> +QUESTION MARK:'3F:63:? +APPROXTIMATELY EQUAL TO:'40:8773:≅ +GREEK CAPITOL LETTER ALPHA:'41:913:Α +GREEK CAPAITOL LETTER BETA:'42:914:Β +GREEK CAPITOL LETTER CHI:'43:935:Χ +GREEK CAPITOL LETTER DELTA:'44:916:Δ +GREEK CAPITOL LETTER EPSILON:'45:917:Ε +GREEK CAPITOL LETTER PHI:'46:934:Φ +GREEK CAPITOL LETTER GAMMA:'47:915:Γ +GREEK CAPITOL LETTER ETA:'48:919:Η +GREEK CAPITOL LETTER ITOA:'49:913:Α +GREEK THETA SYMBOL:'4A:977:ϑ +GREEK CAPITOL LETTER KAPPA:'4B:922:Κ +GREEK CAPITOL LETTER LAMBDA:'4C:923:Λ +GREEK CAPITOL LETTER MU:'4D:924:Μ +GREEK CAPITOL LETTER NU:'4E:925:Ν +GREEK CAPITOL LETTER OMICRON:'4F:927:Ο +GREEK CAPITAL LETTER PI:'50:928:Π +GREEK CAPITOL LETTER THETA:'51:920:Θ +GREEK CAPITOL LETTER RHO:'52:929:Ρ +GREEK CAPITOL LETTER SIGMA:'53:931:Σ +GREEK CAPITOL LETTER TAU:'54:932:Τ +GREEK CAPITOL LETTER UPSILON:'55:933:Υ +GREEK LETTER STIGMA:'56:986:Ϛ +GREEK CAPITOL LETTER OMEGA:'57:937:Ω +GREEK CAPITOL LETTER XI:'58:926:Ξ +GREEK CAPITOL LETTER PSI:'59:936:Ψ +GREEK CAPITOL LETTER ZETA:'5A:918:Ζ +LEFT SQUARE BRACKET:'5B:91:[ +THEREFORE:'5C:8756:∴ +RIGHT SQUARE BRACKET:'5D:93:] +UP TACK:'5E:8869:⊥ +MODIFIER LETTER LOW MACRON:'5F:717:ˍ +MODIFIER LETTER MACRON:'60:713:ˉ +GREEK SMALL LETTER ALPHA:'61:945:α +GREEK SMALL LETTER BETA:'62:946:β +GREEK SMALL LETTER CHI:'63:967:χ +GREEK SMALL LETTER DELTA:'64:948:δ +GREEK SMALL LETTER EPSILON:'65:949:ε +GREEK PHI SYMBOL:'66:981:ϕ +GREEK MSALL LETTER DELTA:'67:947:γ +GREEK SMALL LETTER ETA:'68:951:η +GREEK SMALL LETTER IOTA:'69:953:ι +GREEK SMALL LETTER PHI:'6A:966:φ +GREEK SMALL LETTER KAPPA:'6B:954:κ +GREEK SMALL LETTER LAMDA:'6C:955:λ +GREEK SMALL LETTER MU:'6D:956:μ +GREEK SMALL LETTER NU:'6E:957:ν +GREEK SMALL LETTER OMICRON:'6F:959:ο +GREEK SMALL LETTER PI:'70:960:π +GREEK SMALL LETTER THETA:'71:952:θ +GREEK SMALL LETTER RHO:'72:961:ρ +GREEK SMALL LETTER SIGMA:'73:963:σ +GREEK SMALL LETTER TAU:'74:964:τ +GREEK SMALL LETTER UPSILON:'75:965:υ +GREEK PI SYMBOL:'76:982:ϖ +GREEK SMALL LETTER OMEGA:'77:969:ω +GREEK SMALL LETTER XI:'78:958:ξ +GREEK SMALL LETTER PHI:'79:966:φ +GREEK SMALL LETTER ZETA:'7A:950:ζ +LEFT CURLY BRACKET:'7B:123:{ +DIVIDES:'7C:8739:∣ +RIGHT CURLY BRACKET:'7D:125:} +TILDE OPERATOR:'7E:8764:∼ +GREEK UPSILON WITH HOOK SYMBOL:'A1:978:ϒ +COMBINING ACUTE TONE MARK:'A2:833:́ +LESS THAN OR EQUAL TO:'A3:8804:≤ +DIVISION SLASH:'A4:8725:∕ +INFINITY:'A5:8734:∞ +LATIN SMALL LETTER F WITH HOOK:'A6:402:ƒ +BLACK CLUB SUIT:'A7:9827:♣ +BLACK DIAMOND SUIT:'A8:9830:♦ +BLACK HEART SUIT:'A9:9829:♥ +BLACK SPADE SUIT:'AA:9824:♠ +LEFT RIGHT ARROW:'AB:8596:↔ +LEFTWARDS ARROW:'AC:8592:← +UPWARDS ARROW:'AD:8593:↑ +RIGHTWARDS ARROW:'AE:8594:→ +DOWNWARDS ARROW:'AF:8595:↓ +DEGREE SIGN:'B0:176:° +PLUS OR MINUS SIGN:'B1:177:± +DOUBLE ACUTE ACCENT:'B2:733:˝ +GREATER THAN OR EQUAL TO:'B3:8805:≥ +MULTIPLICATION SIGN:'B4:215:× +DON'T KNOW:'B5:8733:∝ +PARTIAL DIFFERENTIAL:'B6:8706:∂ +BULLET:'B7:183:· +DIVISION:'B8:247:÷ +NOT EQUAL TO:'B9:8800:≠ +IDENTICAL TO:'BA:8801:≡ +ALMOST EQUAL TO:'BB:8776:≈ +MIDLINE HORIZONTAL ELLIPSES:'BC:8943:⋯ +DIVIDES:'BD:8739:∣ +BOX DRAWINGS LIGHT HORIZONTAL:'BE:9472:─ +DOWNWARDS ARROW WITH TIP LEFTWARDS:'BF:8626:↲ +CIRCLED TIMES:'C4:8855:⊗ +CIRCLED PLUS:'C5:8853:⊕ +EMPTY SET:'C6:8709:∅ +INTERSECTION:'C7:8745:∩ +UNION:'C8:8746:∪ +SUPERSET OF:'C9:8835:⊃ +SUPERSET OF OR EQUAL TO:'CA:8839:⊇ +NIETHER A SUBSET OR EQUAL TO:'CB:8836:⊄ +SUBSET OF:'CC:8834:⊂ +SUBSET OR EQUAL TO:'CD:8838:⊆ +ELEMENT OF:'CE:8712:∈ +NOT AN ELEMENT OF:'CF:8713:∉ +ANGLE:'D0:8736:∠ +WHITE DOWN POINTING TRIANBLE:'D1:9661:▽ +REGISTERED SIGN:'D2:174:® +COPYRIGHT:'D3:169:© +TRADEMARK SYMBOL:'D4:8482:™ +NARY OPERATOR:'D5:8719:∏ +SQUARE ROOT:'D6:8730:√ +BULLET OPERATOR:'D7:8729:∙ +NOT SIGN:'D8:172:¬ +LOGICAL AND:'D9:8743:∧ +LOGICAL OR:'DA:8744:∨ +LEFT RIGHT DOUBLE ARROW:'DB:8660:⇔ +LEFTWARDS DOUBLE ARROW:'DC:8656:⇐ +UPWARDS DOUBLE ARROW:'DD:8657:⇑ +RIGHTWARDS DOUBLE ARROW:'DE:8658:⇒ +DOWNWARDS DOUBLE ARROW:'DF:8659:⇓ +BETWEEN:'E0:8812:≬ +MATHMATICAL LEFT ANGELBRACKET:'E1:10216:⟨ +REGISTERED SIGN:'E2:174:® +COPYRIGHT:'E3:169:© +TRADEMARK SYMBOL:'E4:8482:™ +N-ARY SUMMATION:'E5:8721:∑ +LARGE LEFT PARENTHESIS PART1:'E6:0: +LARGE LEFT PARENTHESIS PART2:'E7:0: +LARGE LEFT PARENTHESIS PART3:'E8:0: +LARGE LEFT SQUARE BRACKET PART1:'E9:0: +LARGE LEFT SQUARE BRACKET PART2:'EA:0: +LARGE LEFT SQUARE BRACKET PART3:'EB:0: +LARGE LEFT BRACKET PART1:'EC:0: +LARGE LEFT BRACKET PART2:'ED:0: +LARGE LEFT BRACKET PART3:'EE:0: +DIVIDES:'EF:8739:∣ +MATHMATICAL RIGHT ANGLE BRACKET:'F1:10217:⟩ +INTEGRAL:'F2:8747:∫ +LARGE INTEGRAL PART 1:'F3:0: +LARGE INTEGRAL PART 2:'F4:0: +LARGE INTEGRAL PART 3:'F5:0: +LARGE RIGHT PARENTHESIS PART1:'F6:0: +LARGE RIGHT PARENTHESIS PART2:'F7:0: +LARGE RIGHT PARENTHESIS PART3:'F8:0: +LARGE RIGHT SQAURE BRACKET PART1:'F9:0: +LARGE RIGHT SQUARE BRACKET PART2:'FA:0: +LARGE RIGHT SQUARE BRACKETPART3:'FB:0: +LARGE RIGHT BRACKET PART1:'FC:0: +LARGE RIGHT BRACKETPART2:'FD:0: +LARGE RIGHT BRACKETPART3:'FE:0: +DOUBLE ACUTE ACCENT:'B2:733:˝ +MY UNDEFINED SYMBOL:'7F:127: +MY UNDEFINED SYMBOL:'80:128: +MY UNDEFINED SYMBOL:'81:129: +MY UNDEFINED SYMBOL:'82:130: +MY UNDEFINED SYMBOL:'83:131: +MY UNDEFINED SYMBOL:'84:132: +MY UNDEFINED SYMBOL:'85:133: +MY UNDEFINED SYMBOL:'86:134: +MY UNDEFINED SYMBOL:'87:135: +MY UNDEFINED SYMBOL:'88:136: +MY UNDEFINED SYMBOL:'89:137: +MY UNDEFINED SYMBOL:'8A:138: +MY UNDEFINED SYMBOL:'8B:139: +MY UNDEFINED SYMBOL:'8C:140: +MY UNDEFINED SYMBOL:'8D:141: +MY UNDEFINED SYMBOL:'8E:142: +MY UNDEFINED SYMBOL:'8F:143: +MY UNDEFINED SYMBOL:'90:144: +MY UNDEFINED SYMBOL:'91:145: +MY UNDEFINED SYMBOL:'92:146: +MY UNDEFINED SYMBOL:'93:147: +MY UNDEFINED SYMBOL:'94:148: +MY UNDEFINED SYMBOL:'95:149: +MY UNDEFINED SYMBOL:'96:150: +MY UNDEFINED SYMBOL:'97:151: +MY UNDEFINED SYMBOL:'98:152: +MY UNDEFINED SYMBOL:'99:153: +MY UNDEFINED SYMBOL:'9A:154: +MY UNDEFINED SYMBOL:'9B:155: +MY UNDEFINED SYMBOL:'9C:156: +MY UNDEFINED SYMBOL:'9D:157: +MY UNDEFINED SYMBOL:'9E:158: +MY UNDEFINED SYMBOL:'9F:159: +MY UNDEFINED SYMBOL:'A0:160: +MY UNDEFINED SYMBOL:'F0:160: + + +SPACE: :32:\'20 +EXCLAMATION MARK:!:33:\'21 +QUOTATION MARK:":34:\'22 +NUMBER SIGN:#:35:\'23 +DOLLAR SIGN:$:36:\'24 +PERCENT SIGN:%:37:\'25 +AMPERSAND:&:38:\'26 +APOSTROPHE:':39:\'27 +LEFT PARENTHESIS:(:40:\'28 +RIGHT PARENTHESIS:):41:\'29 +ASTERISK:*:42:\'2A +PLUS SIGN:+:43:\'2B +COMMA:,:44:\'2C +HYPHEN-MINUS:-:45:\'2D +FULL STOP:.:46:\'2E +SOLIDUS:/:47:\'2F +DIGIT ZERO:0:48:\'30 +DIGIT ONE:1:49:\'31 +DIGIT TWO:2:50:\'32 +DIGIT THREE:3:51:\'33 +DIGIT FOUR:4:52:\'34 +DIGIT FIVE:5:53:\'35 +DIGIT SIX:6:54:\'36 +DIGIT SEVEN:7:55:\'37 +DIGIT EIGHT:8:56:\'38 +DIGIT NINE:9:57:\'39 +COLON:\colon:58:\'3A +SEMICOLON:;:59:\'3B +EQUALS SIGN:=:61:\'3D +QUESTION MARK:?:63:\'3F +LATIN CAPITAL LETTER A:A:65:\'41 +LATIN CAPITAL LETTER B:B:66:\'42 +LATIN CAPITAL LETTER C:C:67:\'43 +LATIN CAPITAL LETTER D:D:68:\'44 +LATIN CAPITAL LETTER E:E:69:\'45 +LATIN CAPITAL LETTER F:F:70:\'46 +LATIN CAPITAL LETTER G:G:71:\'47 +LATIN CAPITAL LETTER H:H:72:\'48 +LATIN CAPITAL LETTER I:I:73:\'49 +LATIN CAPITAL LETTER J:J:74:\'4A +LATIN CAPITAL LETTER K:K:75:\'4B +LATIN CAPITAL LETTER L:L:76:\'4C +LATIN CAPITAL LETTER M:M:77:\'4D +LATIN CAPITAL LETTER N:N:78:\'4E +LATIN CAPITAL LETTER O:O:79:\'4F +LATIN CAPITAL LETTER P:P:80:\'50 +LATIN CAPITAL LETTER Q:Q:81:\'51 +LATIN CAPITAL LETTER R:R:82:\'52 +LATIN CAPITAL LETTER S:S:83:\'53 +LATIN CAPITAL LETTER T:T:84:\'54 +LATIN CAPITAL LETTER U:U:85:\'55 +LATIN CAPITAL LETTER V:V:86:\'56 +LATIN CAPITAL LETTER W:W:87:\'57 +LATIN CAPITAL LETTER X:X:88:\'58 +LATIN CAPITAL LETTER Y:Y:89:\'59 +LATIN CAPITAL LETTER Z:Z:90:\'5A +LEFT SQUARE BRACKET:[:91:\'5B +REVERSE SOLIDUS:\:92:\'5C +RIGHT SQUARE BRACKET:]:93:\'5D +LATIN SMALL LETTER A:a:97:\'61 +LATIN SMALL LETTER B:b:98:\'62 +LATIN SMALL LETTER C:c:99:\'63 +LATIN SMALL LETTER D:d:100:\'64 +LATIN SMALL LETTER E:e:101:\'65 +LATIN SMALL LETTER F:f:102:\'66 +LATIN SMALL LETTER G:g:103:\'67 +LATIN SMALL LETTER H:h:104:\'68 +LATIN SMALL LETTER I:i:105:\'69 +LATIN SMALL LETTER J:j:106:\'6A +LATIN SMALL LETTER K:k:107:\'6B +LATIN SMALL LETTER L:l:108:\'6C +LATIN SMALL LETTER M:m:109:\'6D +LATIN SMALL LETTER N:n:110:\'6E +LATIN SMALL LETTER O:o:111:\'6F +LATIN SMALL LETTER P:p:112:\'70 +LATIN SMALL LETTER Q:q:113:\'71 +LATIN SMALL LETTER R:r:114:\'72 +LATIN SMALL LETTER S:s:115:\'73 +LATIN SMALL LETTER T:t:116:\'74 +LATIN SMALL LETTER U:u:117:\'75 +LATIN SMALL LETTER V:v:118:\'76 +LATIN SMALL LETTER W:w:119:\'77 +LATIN SMALL LETTER X:x:120:\'78 +LATIN SMALL LETTER Y:y:121:\'79 +LATIN SMALL LETTER Z:z:122:\'7A +LEFT CURLY BRACKET:{:123:\'7B +VERTICAL LINE:|:124:\'7C +RIGHT CURLY BRACKET:}:125:\'7D +TILDE:~:126:\'7E + + +SPACE:'20:32: +LOWER RIGHT PENCIL:'21:9998:✎ +BLACK SCISSORS:'22:9986:✂ +UPPER BLADE SCISSORS:'23:9985:✁ +PROPOSE "LOWER LEFT SPECTACLES":'24:none: +PROPOSE "BELL":'25:none: +PROPOSE "OPEN BOOK":'26:none: +PROPOSE "LIGHTED CANDLE":'27:none: +BLACK TELEPHONE:'28:9742:☎ +TELEPHONE LOCATION SIGN:'29:9990:✆ +ENVELOPE:'2A:9993:✉ +ENVELOPE:'2B:9993:✉ +PROPOSE "MAIL FLAG DOWN":'2C:none: +PROPOSE "MAIL FLAG UP":'2D:none: +PROPOSE "MAIL FULL":'2E:none: +PROPOSE "MAIL EMPTY":'2F:none: +PROPOSE "FOLDER CLOSE":'30:none: +PROPOSE "FOLDER OPEN":'31:none: +PROPOSE "DOCUMENT FOLDED":'32:none: +PROPOSE "DOCUMENT":'33:none: +PROPOSE "MULTIPLE DOCUMENTS":'34:none: +PROPOSE "FILE CABINET":'35:none: +HOURGLASS:'36:8987:⌛ +KEYBOARD:'37:9000:⌨ +PROPOSE "MOUSE":'38:none: +PROPOSE "QUICKCAM CAMERA":'39:none: +PROPOSE "COMPUTER":'3A:none: +PROPOSE "HARD DRIVE":'3B:none: +PROPOSE "THREE AND A HALF FLOPPY":'3C:none: +PROPOSE "FIVE AND A QUARTER FLOPPY":'3D:none: +TAPE DRIVE:'3E:9991:✇ +WRITING HAND:'3F:9997:✍ +WRITING HAND:'40:9997:✍ +VICTORY HAND:'41:9996:✌ +PROPOSE "PICKING HAND(OR OMMAT)":'42:none: +PROPOSE "WHITE UP POINTING THUMB":'43:none: +PROPOSE "WHITE DOWN POINTING THUMB":'44:none: +WHITE LEFT POINTING INDEX:'45:9756:☜ +WHITE RIGHT POINTING INDEX:'46:9758:☞ +WHITE UP POINTING INDEX:'47:9757:☝ +WHITE DOWN POINTING INDEX:'48:9759:☟ +PROPOSE "WHITE PALM":'49:none: +WHITE SMILING FACE:'4A:9786:☺ +WHITE SMILING FACE":'4B:9786:☺ +WHITE FROWNING FACE:'4C:9785:☹ +PROPOSE "BLACK BOMB WITH FUSE":'4D:none: +SKULL AND CROSSBONES:'4E:9760:☠ +PROPOSE "WHITE BILLOWING SQUARE FLAG":'4F:none: +PROPOSE "WHITE BILLOWING TRIANGLE FLAG":'50:none: +AIRPLANE:'51:9992:✈ +WHITE SUN WITH RAYS:'52:9788:☼ +PROPOSE "INK BLOT":'53:none: +SNOWFLAKE:'54:10052:❄ +SHADOWED WHITE LATIN CROSS:'55:10014:✞ +SHADOWED WHITE LATIN CROSS:'56:10014:✞ +LATIN CROSS:'57:10013:✝ +MALTESE CROSS:'58:10016:✠ +STAR OF DAVID:'59:10017:✡ +STAR AND CRESCENT:'5A:9770:☪ +YIN YANG:'5B:9775:☯ +DEVANGARI OM CORRECT:'5C:2384:ॐ +WHEEL OF DHARMA:'5D:9784:☸ +ARIES:'5E:9800:♈ +TAURUS:'5F:9801:♉ +GEMINI:'60:9802:♊ +CANCER:'61:9803:♋ +LEO:'62:9804:♌ +VIRGO:'63:9805:♍ +LIBRA:'64:9806:♎ +SCORPIUS:'65:9807:♏ +SAGITTARIUS:'66:9808:♐ +CAPRICORN:'67:9809:♑ +AQUARIUS:'68:9810:♒ +PISCES:'69:9811:♓ +AMPERSAND:'6A:38:& +AMPERSAND:'6B:38:& +BLACK CIRCLE:'6C:9679:● +SHADOWED WHITE CIRCLE:'6D:10061:❍ +BLACK SQUARE:'6E:9632:■ +WHITE SQUARE:'6F:9633:□ +WHITE SQUARE:'70:9633:□ +LOWER RIGHT SHADOWED WHITE SQUARE:'71:10065:❑ +UPPER RIGHT SHADOWED WHITE SQUARE:'72:10066:❒ +LOZENGE:'73:9674:◊ +LOZENGE:'74:9674:◊ +BLACK DIAMOND:'75:9670:◆ +BLACK DIAMOND MINUS WHITE X:'76:10070:❖ +BLACK DIAMOND:'77:9670:◆ +X IN A RECTANGLE BOX:'78:8999:⌧ +APL FUNCTIONAL SYMBOL QUAD UP CARET:'79:9043:⍓ +PLACE OF INTEREST SIGN:'7A:8984:⌘ +WHITE FLORETTE:'7B:10048:❀ +BLACK FLORETTE:'7C:10047:✿ +HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT:'7D:10077:❝ +HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT:'7E:10078:❞ +"UNUSED":'7F:none:udef_symbol/> +CIRCLED DIGIT ZERO:'80:9450:⓪ +CIRCLED DIGIT ONE:'81:9312:① +CIRCLED DIGIT TWO:'82:9313:② +CIRCLED DIGIT THREE:'83:9314:③ +CIRCLED DIGIT FOUR:'84:9315:④ +CIRCLED DIGIT FIVE:'85:9316:⑤ +CIRCLED DIGIT SIX:'86:9317:⑥ +CIRCLED DIGIT SEVEN:'87:9318:⑦ +CIRCLED DIGIT EIGHT:'88:9319:⑧ +CIRCLED DIGIT NINE:'89:9320:⑨ +CIRCLED NUMBER TEN:'8A:9321:⑩ +PROPOSE "DINGBAT NEGATIVE CIRCLED DIGIT ZERO":'8B:none: +DINGBAT NEGATIVE CIRCLED DIGIT ONE:'8C:10102:❶ +DINGBAT NEGATIVE CIRCLED DIGIT TWO:'8D:10103:❷ +DINGBAT NEGATIVE CIRCLED DIGIT THREE:'8E:10104:❸ +DINGBAT NEGATIVE CIRCLED DIGIT FOUR:'8F:10105:❹ +DINGBAT NEGATIVE CIRCLED DIGIT FIVE:'90:10106:❺ +DINGBAT NEGATIVE CIRCLED DIGIT SIX:'91:10107:❻ +DINGBAT NEGATIVE CIRCLED DIGIT SEVEN:'92:10108:❼ +DINGBAT NEGATIVE CIRCLED DIGIT EIGHT:'93:10109:❽ +DINGBAT NEGATIVE CIRCLED DIGIT NINE:'94:10110:❾ +DINGBAT NEGATIVE CIRCLED NUMBER TEN:'95:10111:❿ +ROTATED FLORAL HEART BULLET:'96:10087:❧ +REVERSED ROTATED FLORAL HEART BULLET:'97:9753:☙ +REVERSED ROTATED FLORAL HEART BULLET:'98:9753:☙ +ROTATED FLORAL HEART BULLET:'99:10087:❧ +ROTATED FLORAL HEART BULLET:'9A:10087:❧ +REVERSED ROTATED FLORAL HEART BULLET:'9B:9753:☙ +REVERSED ROTATED FLORAL HEART BULLET:'9C:9753:☙ +ROTATED FLORAL HEART BULLET:'9D:10087:❧ +BULLET:'9E:8226:• +BLACK CIRCLE:'9F:9679:● +DON'T KNOW:'A0:160:  +WHITE CIRCLE:'A1:9675:○ +WHITE CIRCLE:'A2:9675:○ +WHITE CIRCLE:'A3:9675:○ +SUN:'A4:9737:☉ +SUN:'A5:9737:☉ +SHADOWED WHITE CIRCLE:'A6:10061:❍ +BLACK SMALL SQUARE:'A7:9642:▪ +WHITE SQUARE:'A8:9633:□ +PROPOSE "THEE MIGHT BE IN THERE SOMEWHERE":'A9:none: +BLACK FOUR POINTED STAR MAYBE:'AA:10022:✦ +BLACK STAR:'AB:9733:★ +SIX POINTED BLACK STAR:'AC:10038:✶ +EIGHT POINTED RECTILINEAR BLACK STAR:'AD:10039:✷ +TWELVE POINTED BLACK STAR:'AE:10040:✸ +EIGHT POINTED PINWHEEL STAR:'AF:10037:✵ +PROPOSE "CROSSHAIR SQUARE":'B0:none: +PROPOSE "CROSSHAIR CIRCLE":'B1:none: +WHITE FOUR POINTED STAR:'B2:10023:✧ +PROPOSE "THIS HAS TO BE A KNOWN SYMBOL":'B3:none: +REPLACEMENT CHARACTER:'B4:65533:� +CIRCLED WHITE STAR:'B5:10026:✪ +SHADOWED WHITE STAR:'B6:10032:✰ +PROPOSE "1 OCLOCK":'B7:none: +PROPOSE "2 OCLOCK":'B8:none: +PROPOSE "3 OCLOCK":'B9:none: +PROPOSE "4 OCLOCK":'BA:none: +PROPOSE "5 OCLOCK":'BB:none: +PROPOSE "6 OCLOCK":'BC:none: +PROPOSE "7 OCLOCK":'BD:none: +PROPOSE "8 OCLOCK":'BE:none: +PROPOSE "9 OCLOCK":'BF:none: +PROPOSE "10 OCLOCK":'C0:none: +PROPOSE "11 OCLOCK":'C1:none: +PROPOSE "12 OCLOCK":'C2:none: +PROPOSE "NOTCHED DOWNWARDS DOUBLE ARROW WITH TIP LEFTWARDS":'C3:none: +PROPOSE "NOTCHED DOWNWARDS DOUBLE ARROW WITH TIP RIGHTWARDS":'C4:none: +PROPOSE "NOTCHED UPWARDS DOUBLE ARROW WITH TIP LEFTWARDS":'C5:none: +PROPOSE "NOTCHED UPWARDS DOUBLE ARROW WITH TIP RIGHTWARDS":'C6:none: +PROPOSE "NOTCHED LEFTWARDS DOUBLE ARROW WITH TIP UPWARDS":'C7:none: +PROPOSE "NOTCHED RIGHTWARDS DOUBLE ARROW WITH TIP UPWARDS":'C8:none: +PROPOSE "NOTCHED LEFTWARDS DOUBLE ARROW WITH TIP DOWNWARDS":'C9:none: +PROPOSE "NOTCHED RIGHTWARDS DOUBLE ARROW WITH TIP DOWNWARDS":'CA:none: +PROPOSE "NO IDEA":'CB:none: +PROPOSE "REVERSE OF ABOVE":'CC:none: +PROPOSE "HEDERA LOWER LEFT":'CD:none: +PROPOSE "HEDERA UPPER LEFT REVERSED":'CE:none: +PROPOSE "HEDERA LOWER RIGHT REVERSED":'CF:none: +PROPOSE "HEDERA UPPER RIGHT":'D0:none: +PROPOSE "HEDERA UPPER LEFT":'D1:none: +PROPOSE "HEDERA LOWER LEFT REVERSED":'D2:none: +PROPOSE "HEDERA UPPER RIGHT REVERSED":'D3:none: +PROPOSE "HEDERA LOWER RIGHT":'D4:none: +ERASE TO THE LEFT:'D5:9003:⌫ +ERASE TO THE RIGHT:'D6:8998:⌦ +PROPOSE "THREE-D TOP-LIGHTED LEFTWARDS ARROWHEAD":'D7:none: +THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD:'D8:10146:➢ +PROPOSE "THREE-D RIGHT-LIGHTED UPWARDS ARROWHEAD":'D9:none: +PROPOSE "THREE-D LEFT-LIGHTED DOWNWARDS ARROWHEAD":'DA:none: +PROPOSE "CIRCLED HEAVY WHITE LEFTWARDS ARROW":'DB:none: +CIRCLED HEAVY WHITE RIGHTWARDS ARROW:'DC:10162:➲ +PROPOSE "CIRCLED HEAVY WHITE UPWARDS ARROW":'DD:none: +PROPOSE "CIRCLED HEAVY WHITE DOWNWARDS ARROW":'DE:none: +PROPOSE "WIDE-HEADED LEFTWARDS ARROW":'DF:none: +PROPOSE "WIDE-HEADED RIGHTWARDS ARROW":'E0:none: +PROPOSE "WIDE-HEADED UPWARDS ARROW":'E1:none: +PROPOSE "WIDE-HEADED DOWNWARDS ARROW":'E2:none: +PROPOSE "WIDE-HEADED NORTHWEST-WARDS ARROW":'E3:none: +PROPOSE "WIDE-HEADED NORTHEAST-WARDS ARROW":'E4:none: +PROPOSE "WIDE-HEADED SOUTHWEST-WARDS ARROW":'E5:none: +PROPOSE "WIDE-HEADED SOUTHEAST-WARDS ARROW":'E6:none: +PROPOSE "HEAVY WIDE-HEADED LEFTWARDS ARROW":'E7:none: +HEAVY WIDE-HEADED RIGHTWARDS ARROW:'E8:10132:➔ +PROPOSE "HEAVY WIDE-HEADED UPWARDS ARROW":'E9:none: +PROPOSE "HEAVY WIDE-HEADED DOWNWARDS ARROW":'EA:none: +PROPOSE "HEAVY WIDE-HEADED NORTHWEST-WARDS ARROW":'EB:none: +PROPOSE "HEAVY WIDE-HEADED NORTHEAST-WARDS ARROW":'EC:none: +PROPOSE "HEAVY WIDE-HEADED SOUTHWEST-WARDS ARROW":'ED:none: +PROPOSE "HEAVY WIDE-HEADED SOUTHEAST-WARDS ARROW":'EE:none: +LEFTWARDS WHITE ARROW:'EF:8678:⇦ +RIGHTWARDS WHITE ARROW:'F0:8680:⇨ +UPWARDS WHITE ARROW:'F1:8679:⇧ +DOWNWARDS WHITE ARROW:'F2:8681:⇩ +LEFT RIGHT DOUBLE ARROW:'F3:8660:⇔ +UP DOWN DOUBLE ARROW:'F4:8661:⇕ +NORTH WEST DOUBLE ARROW:'F5:8662:⇖ +NORTH EAST DOUBLE ARROW:'F6:8663:⇗ +SOUTH WEST DOUBLE ARROW:'F7:8665:⇙ +SOUTH EAST DOUBLE ARROW:'F8:8664:⇘ +"NO IDEA":'F9:none: +"NO IDEA":'FA:none: +BALLOT X:'FB:10007:✗ +CHECK MARK:'FC:10003:✓ +BALLOT BOX WITH X:'FD:9746:☒ +BALLOT BOX WITH CHECK:'FE:9745:☑ +PROPOSE "MICROSOFT WINDOWS LOGO":'FF:none: + + +SPACE:'20:32: +UPPER BLADE SCISSORS:'21:9985:✁ +BLACK SCISSORS:'22:9986:✂ +LOWER BLADE SCISSORS:'23:9987:✃ +WHITE SCISSORS:'24:9988:✄ +BLACK TELEPHONE:'25:9742:☎ +TELEPHONE LOCATION SIGN:'26:9990:✆ +TAPE DRIVE:'27:9991:✇ +AIRPLANE:'28:9992:✈ +ENVELOPE:'29:9993:✉ +BLACK RIGHT POINTING INDEX:'2A:9755:☛ +WHITE RIGHT POINTING INDEX:'2B:9758:☞ +VICTORY HAND:'2C:9996:✌ +WRITING HAND:'2D:9997:✍ +LOWER RIGHT PENCIL:'2E:9998:✎ +PENCIL:'2F:9999:✏ +UPPER RIGHT PENCIL:'30:10000:✐ +WHITE NIB:'31:10001:✑ +BLACK NIB:'32:10002:✒ +CHECKMARK:'33:10003:✓ +HEAVY CHECKMARK:'34:10004:✔ +MULTIPLICATION X:'35:10005:✕ +HEAVY MULTIPLICATION X:'36:10006:✖ +BALLOT X:'37:10007:✗ +HEAVY BALLOT X:'38:10008:✘ +OUTLINED GREEK CROSS:'39:10009:✙ +HEAVY GREK CROSS:'3A:10010:✚ +OPEN CENTRE CROSS:'3B:10011:✛ +HEAVY OPEN CENTRE CROSS:'3C:10011:✛ +LATIN CROSS:'3D:10013:✝ +SHADOWED WHITE LATIN CROSS:'3E:10014:✞ +OUTLINED LATIN CROSS:'3F:10015:✟ +MALTESE CROSS:'40:10016:✠ +STAR OF DAVID:'41:10017:✡ +FOUR TEARDROP-SPOKED ASTERISK:'42:10018:✢ +FOUR BALLOON-SPOKED ASTERISK:'43:10019:✣ +10019:'43:10019:✣ +HEAVY FOUR BALLOON-SPOKED ASTERISK:'44:10020:✤ +FOUR CLUB-SPOKED ASTERISK:'45:10021:✥ +BLACK FOUR POINTED STAR:'46:10022:✦ +WHITE FOUR POINTED STAR:'47:10023:✧ +BLACK STAR:'48:9989:✅ +STRESS OUTLINED WHITE STAR:'49:10025:✩ +CIRCLED WHITE STAR:'4A:10026:✪ +OPEN CENTRE BLACK STAR:'4B:10027:✫ +BLACK CENTRE WHITE STAR:'4C:10028:✬ +OUTLINED BLACK STAR:'4D:10029:✭ +HEAVY OUTLINED BLACK STAR:'4E:10030:✮ +PINWHEEL STAR:'4F:10031:✯ +SHADOWED WHITE STAR:'50:10032:✰ +HEAVY ASTERISK:'51:10033:✱ +OPEN CENTRE ASTERISK:'52:10034:✲ +EIGHT SPOKED ASTERISK:'53:10035:✳ +EIGHT POINTED BLACK STAR:'54:10036:✴ +EIGHT POINTED PINWHEEL STAR:'55:10037:✵ +SIX POINTED BLACK STAR:'56:10038:✶ +EIGHT POINTED RECTILINEAR BLACK STAR:'57:10039:✷ +HEAVY EIGHT POINTED RECTILINEAR BLACK STAR:'58:10040:✸ +TWELVE POINTED BLACK STAR:'59:10041:✹ +SIXTEEN POINTED ASTERISK:'5A:10042:✺ +TEARDROP-SPOKED ASTERISK:'5B:10043:✻ +OPEN CENTRE TEARDROP-SPOKED ASTERISK:'5C:10044:✼ +HEAVY TEARDROP-SPOKED ASTERISK:'5D:10045:✽ +SIX PETALLED BLACK AND WHITE FLORETTE:'5E:10046:✾ +BLACK FLORETTE:'5F:10047:✿ +WHITE FLORETTE:'60:10048:❀ +EIGHT PETALLED OUTLINED BLACK FLORETTE:'61:10049:❁ +CIRCLED OPEN CENTRE EIGHT POINTED STAR:'62:10050:❂ +HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK:'63:10051:❃ +SNOWFLAKE:'64:10052:❄ +TIGHT TRIFOLIATE SNOWFLAKE:'65:10053:❅ +HEAVY CHEVRON SNOWFLAKE:'66:10054:❆ +SPARKLE:'67:10055:❇ +HEAVY SPARKLE:'68:10056:❈ +BALLOON-SPOKED ASTERISK:'69:10057:❉ +TEARDROP-SPOKED ASTERISK:'6A:10043:✻ +HEAVY TEARDROP-SPOKED ASTERISK:'6B:10045:✽ +BLACK CIRCLE:'6C:9679:● +SHADOWED WHITE CIRCLE:'6D:10061:❍ +BLACK SQUARE:'6E:9632:■ +LOWER RIGHT DROP-SHADOWED SQUARE:'6F:10063:❏ +UPPER RIGHT DROP-SHADOWED WHITE SQUARE:'70:10064:❐ +LOWER RIGHT SHADOWED SQUARE:'71:10065:❑ +UPPER RIGHT SHADOWED WHITE SQUARE:'72:10066:❒ +BLACK UP-POINTING TRIANGLE:'73:9660:▲ +BLACK DOWN-POINTING TRIANGLE:'74:9651:▼ +BLACK DIAMOND:'75:9670:◆ +BLACK DIAMOND MINUS WHITE X:'76:10070:❖ +RIGHT HALF BLACK CIRCLE:'77:9479:┇ +LIGHT VERTICAL BAR:'78:10072:❘ +MEDIUM VERTICAL BAR:'79:10073:❙ +HEAVY VERTICAL BAR:'7A:10074:❚ +HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT:'7B:10075:❛ +HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT:'7C:10076:❜ +HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT:'7D:10077:❝ +HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT:'7E:10078:❞ +UNUSED:'7F:none:udef_symbol num="7F"/> +MEDIUM LEFT PARENTHESIS ORNAMENT:'80:10088:❨ +MEDIUM RIGHT PARENTHESIS ORNAMENT:'81:10089:❩ +MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT:'82:10090:❪ +MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT:'83:10091:❫ +MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT:'84:10092:❬ +MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT:'85:10093:❭ +HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT:'86:10094:❮ +HEAVY RIGHT-POITING ANGLE QUOTATION MARK ORNAMENT:'87:10095:❯ +HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT:'88:10096:❰ +HEAVY RIGHT-POTING ANGLE BRACKET ORNAMENT:'89:10097:❱ +LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT:'8A:10098:❲ +LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT:'8B:10099:❳ +MEDIUM LEFT CURLY BRACKET ORNAMENT:'8C:10100:❴ +MEDIUM RIGHT CURLY BRACKET ORNAMENT:'8D:10101:❵ +UNUSED:'8E:none: +UNUSED:'8F:none:udef_symbol num="8F"/> +UNUSED:'90:none:udef_symbol num="90"/> +UNUSED:'91:none:udef_symbol num="91"/> +UNUSED:'92:none:udef_symbol num="92"/> +UNUSED:'93:none:udef_symbol num="93"/> +UNUSED:'94:none:udef_symbol num="94"/> +UNUSED:'95:none:udef_symbol num="95"/> +UNUSED:'96:none:udef_symbol num="96"/> +UNUSED:'97:none:udef_symbol num="97"/> +UNUSED:'98:none:udef_symbol num="98"/> +UNUSED:'99:none:udef_symbol num="99"/> +UNUSED:'9A:none:udef_symbol num="9A"/> +UNUSED:'9B:none:udef_symbol num="9B"/> +UNUSED:'9C:none:udef_symbol num="9C"/> +UNUSED:'9D:none:udef_symbol num="9D"/> +UNUSED:'9E:none:udef_symbol num="9E"/> +UNUSED:'9F:none:udef_symbol num="9F"/> +UNUSED:'A0:none:udef_symbol num="A0"/> +CURVED STEM PARAGRAPH SIGN ORNAMENT:'A1:10081:❡ +HEAVY EXCLAMATION MARK ORNAMENT:'A2:10082:❢ +HEAVY HEART EXCLAMATION MARK ORNAMENT:'A3:10083:❣ +HEAVY BLACK HEART:'A4:10084:❤ +ROTATED HEAVY BLACK HEART BULLET:'A5:10085:❥ +FLORAL HEART:'A6:10086:❦ +ROTATED FLORAL HEART BULLET:'A7:10087:❧ +BLACK CLUB SUIT:'A8:9827:♣ +BLACK DIAMOND SUIT:'A9:9830:♦ +BLACK HEART SUIT:'AA:9829:♥ +BLACK SPADE SUIT:'AB:9824:♠ +DINGBAT CIRCLED SANS SERIF DIGIT ONE:'AC:10112:➀ +DINGBAT CIRCLED SANS SERIF DIGIT TWO:'AD:10113:➁ +DINGBAT CIRCLED SANS SERIF DIGIT THREE:'AE:10114:➂ +DINGBAT CIRCLED SANS SERIF DIGIT FOUR:'AF:10115:➃ +DINGBAT CIRCLED SANS SERIF DIGIT FIVE:'B0:10116:➄ +DINGBAT CIRCLED SANS SERIF DIGIT SIX:'B1:10117:➅ +DINGBAT CIRCLED SANS SERIF DIGIT SEVEN:'B2:10118:➆ +DINGBAT CIRCLED SANS SERIF DIGIT EIGHT:'B3:10119:➇ +DINGBAT CIRCLED SANS SERIF DIGIT NINE:'B4:10120:➈ +DINGBAT CIRCLED SANS SERIF DIGIT TEN:'B5:10121:➉ +DINGBAT NEGATIVE CIRCLED DIGIT ONE:'B6:10102:❶ +DINGBAT NEGATIVE CIRCLED DIGIT TWO:'B7:10103:❷ +DINGBAT NEGATIVE CIRCLED DIGIT THREE:'B8:10104:❸ +DINGBAT NEGATIVE CIRCLED DIGIT FOUR:'B9:10105:❹ +DINGBAT NEGATIVE CIRCLED DIGIT FIVE:'BA:10106:❺ +DINGBAT NEGATIVE CIRCLED DIGIT SIX:'BB:10107:❻ +DINGBAT NEGATIVE CIRCLED DIGIT SEVEN:'BC:10108:❼ +DINGBAT NEGATIVE CIRCLED DIGIT EIGHT:'BD:10109:❽ +DINGBAT NEGATIVE CIRCLED DIGIT:'BE:10110:❾ +DINGBAT NEGATIVE CIRCLED DIGIT:'BF:10111:❿ +DINGBAT CIRCLED SANS-SERIF DIGIT ONE:'C0:10112:➀ +DINGBAT CIRCLED SANS-SERIF DIGIT TWO:'C1:10113:➁ +DINGBAT CIRCLED SANS-SERIF DIGIT THREE:'C2:10114:➂ +DINGBAT CIRCLED SANS-SERIF DIGIT FOUR:'C3:10115:➃ +DINGBAT CIRCLED SANS-SERIF DIGIT FIVE:'C4:10116:➄ +DINGBAT CIRCLED SANS-SERIF DIGIT SIX:'C5:10117:➅ +DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN:'C6:10118:➆ +DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT:'C7:10119:➇ +DINGBAT CIRCLED SANS-SERIF DIGIT NINE:'C8:10120:➈ +DINGBAT CIRCLED SANS-SERIF DIGIT TEN:'C9:10121:➉ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE:'CA:10122:➊ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO:'CB:10123:➋ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE:'CC:10124:➌ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR:'CD:10125:➍ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE:'CE:10126:➎ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX:'CF:10127:➏ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN:'D0:10128:➐ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT:'D1:10129:➑ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE:'D2:10130:➒ +DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TEN:'D3:10131:➓ +HEAVY WIDE-HEADED RIGHTWARDS ARROW:'D4:10132:➔ +RIGHTWARDS ARROW:'D5:8594:→ +LEFT RIGHT ARROW:'D6:8596:↔ +UP DOWN ARROW:'D7:8597:↕ +HEAVY SOUTH EAST ARROW:'D8:10136:➘ +HEAVY RIGHTWARDS ARROW:'D9:10137:➙ +HEAVY NORTHEAST ARROW:'DA:10138:➚ +DRAFTING POINT RIGHTWARDS ARROW:'DB:10139:➛ +HEAVY ROUND-TIPPED RIGHTWARDS ARROW:'DC:10140:➜ +TRIANGLE-HEADED RIGHTWARDS ARROW:'DD:10141:➝ +HEAVY TRIANGLE-HEADED RIGHTWARDS ARROW:'DE:10142:➞ +DASHED TRIANGLE-HEADED RIGHTWARDS ARROW:'DF:10143:➟ +HEAVY DASHED TRIANGLE-HEADED RIGHTWARS ARROW:'E0:10144:➠ +BLACK RIGHTWARDS ARROW:'E1:10145:➡ +THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD:'E2:10146:➢ +THREE-D BOTTOM-LIGHTED RIGHTWARDS ARROWHEAD:'E3:10147:➣ +BLACK RIGHTWARDS ARROWHEAD:'E4:10148:➤ +HEAVY BLACK CURVED DOWNWARDS AND RIGHTWARDS ARROW:'E5:10149:➥ +HEAVY BLACK CURVED UPWARDS AND RIGHTWARDS ARROW:'E6:10150:➦ +SQUAT BLACK RIGHTWARDS ARROW:'E7:10151:➧ +HEAVY CONCAVE-POINTED BLACK RIGHTWARDS ARROW:'E8:10152:➨ +RIGHT-SHADED WHITE RIGHTWARDS ARROW:'E9:10153:➩ +LEFT-SHADED WHITE RIGHTWARDS ARROW:'EA:10154:➪ +BACK-TILTED SHADOWED WHITE RIGHTWARDS ARROW:'EB:10155:➫ +FRONT-TILTED SHADOWED WHITE RIGHWARDS ARROW:'EC:10156:➬ +HEAVY LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW:'ED:10157:➭ +HEAVY UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW:'EE:10157:➭ +NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW:'EF:10158:➮ +UNUSED:'F0:none:udef_symbol num="F0"/> +NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW:'F1:10161:➱ +CIRCLED HEAVY WHITE RIGHTWARDS ARROW:'F2:10162:➲ +WHITE-FEATHERED RIGHTWARDS ARROW:'F3:10163:➳ +BLACK-FEATHERED SOUTH EAST ARROW:'F4:10164:➴ +BLACK-FEATHERED RIGHTWARDS ARROW:'F5:10165:➵ +BLACK-FEATHERED NORTH EAST ARROW:'F6:10166:➶ +HEAVY BLACK-FEATHERED SOUTH EAST ARROW:'F7:10167:➷ +HEAVY BLACK-FEATHERED RIGHTWARDS ARROW:'F8:10168:➸ +HEAVY BLACK-FEATHERED NORTH EAST ARROW:'F9:10169:➹ +TEARDROP-BARBED RITGHTWARDS ARROW:'FA:10170:➺ +HEAVY TEARDROP-SHANKED RIGHTWARDS ARROW:'FB:10171:➻ +WEDGE-TAILED RIGHTWARDS ARROW:'FC:10172:➼ +HEAVY WEDGED-TAILED RIGHTWARDS ARROW:'FD:10173:➽ +OPEN-OUTLINED RIGHTWARDS ARROW:'FE:10174:➾ +UNUSED:'FF:none:udef_symbol num="FF"/> + + +LATIN CAPITAL LETTER S WITH CARON:š:352:Š +LATIN CAPITAL LETTER S WITH ACUTE:ś:346:Ś +LATIN CAPITAL LETTER T WITH CARON:ť:356:Ť +LATIN CAPITAL LETTER Z WITH CARON:ž:381:Ž +LATIN CAPITAL LETTER Z WITH ACUTE:ź:377:Ź +LATIN CAPITAL LETTER L WITH STROKE:ł:321:Ł +LATIN CAPITAL LETTER A WITH OGONEK:ą:260:Ą +LATIN CAPITAL LETTER S WITH CEDILLA:ş:350:Ş +LATIN CAPITAL LETTER Z WITH DOT ABOVE:ż:379:Ż +LATIN CAPITAL LETTER L WITH CARON:ľ:317:Ľ +LATIN CAPITAL LETTER R WITH ACUTE:ŕ:340:Ŕ +LATIN CAPITAL LETTER A WITH ACUTE:á:193:Á +LATIN CAPITAL LETTER A WITH CIRCUMFLEX:â:194: +LATIN CAPITAL LETTER A WITH BREVE:ă:258:Ă +LATIN CAPITAL LETTER A WITH DIAERESIS:ä:196:Ä +LATIN CAPITAL LETTER L WITH ACUTE:ĺ:313:Ĺ +LATIN CAPITAL LETTER C WITH ACUTE:ć:262:Ć +LATIN CAPITAL LETTER C WITH CEDILLA:ç:199:Ç +LATIN CAPITAL LETTER C WITH CARON:č:268:Č +LATIN CAPITAL LETTER E WITH ACUTE:é:201:É +LATIN CAPITAL LETTER E WITH OGONEK:ę:280:Ę +LATIN CAPITAL LETTER E WITH DIAERESIS:ë:203:Ë +LATIN CAPITAL LETTER E WITH CARON:ě:282:Ě +LATIN CAPITAL LETTER I WITH ACUTE:í:205:Í +LATIN CAPITAL LETTER I WITH CIRCUMFLEX:î:206:Î +LATIN CAPITAL LETTER D WITH CARON:ď:270:Ď +LATIN CAPITAL LETTER D WITH STROKE:đ:272:Đ +LATIN CAPITAL LETTER N WITH ACUTE:ń:323:Ń +LATIN CAPITAL LETTER N WITH CARON:ň:327:Ň +LATIN CAPITAL LETTER O WITH ACUTE:ó:211:Ó +LATIN CAPITAL LETTER O WITH CIRCUMFLEX:ô:212:Ô +LATIN CAPITAL LETTER O WITH DOUBLE ACUTE:ő:336:Ő +LATIN CAPITAL LETTER O WITH DIAERESIS:ö:214:Ö +LATIN CAPITAL LETTER R WITH CARON:ř:344:Ř +LATIN CAPITAL LETTER U WITH RING ABOVE:ů:366:Ů +LATIN CAPITAL LETTER U WITH ACUTE:ú:218:Ú +LATIN CAPITAL LETTER U WITH DOUBLE ACUTE:ű:368:Ű +LATIN CAPITAL LETTER U WITH DIAERESIS:ü:220:Ü +LATIN CAPITAL LETTER Y WITH ACUTE:ý:221:Ý +LATIN CAPITAL LETTER T WITH CEDILLA:ţ:354:Ţ +CYRILLIC CAPITAL LETTER DJE (SERBOCROATIAN):ђ:1026:Ђ +CYRILLIC CAPITAL LETTER GJE:ѓ:1027:Ѓ +CYRILLIC CAPITAL LETTER LJE:љ:1033:Љ +CYRILLIC CAPITAL LETTER NJE:њ:1034:Њ +CYRILLIC CAPITAL LETTER KJE:ќ:1036:Ќ +CYRILLIC CAPITAL LETTER TSHE (SERBOCROATIAN):ћ:1035:Ћ +CYRILLIC CAPITAL LETTER DZHE:џ:1039:Џ +CYRILLIC CAPITAL LETTER SHORT U (BYELORUSSIAN):ў:1038:Ў +CYRILLIC CAPITAL LETTER JE:ј:1032:Ј +CYRILLIC CAPITAL LETTER GHE WITH UPTURN:ґ:1168:Ґ +CYRILLIC CAPITAL LETTER IO:ё:1025:Ё +CYRILLIC CAPITAL LETTER UKRAINIAN IE:є:1028:Є +CYRILLIC CAPITAL LETTER YI (UKRAINIAN):ї:1031:Ї +CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I:і:1030:І +CYRILLIC CAPITAL LETTER DZE:ѕ:1029:Ѕ +CYRILLIC CAPITAL LETTER A:а:1040:А +CYRILLIC CAPITAL LETTER BE:б:1041:Б +CYRILLIC CAPITAL LETTER VE:в:1042:В +CYRILLIC CAPITAL LETTER GHE:г:1043:Г +CYRILLIC CAPITAL LETTER DE:д:1044:Д +CYRILLIC CAPITAL LETTER IE:е:1045:Е +CYRILLIC CAPITAL LETTER ZHE:ж:1046:Ж +CYRILLIC CAPITAL LETTER ZE:з:1047:З +CYRILLIC CAPITAL LETTER I:и:1048:И +CYRILLIC CAPITAL LETTER SHORT I:й:1049:Й +CYRILLIC CAPITAL LETTER KA:к:1050:К +CYRILLIC CAPITAL LETTER EL:л:1051:Л +CYRILLIC CAPITAL LETTER EM:м:1052:М +CYRILLIC CAPITAL LETTER EN:н:1053:Н +CYRILLIC CAPITAL LETTER O:о:1054:О +CYRILLIC CAPITAL LETTER PE:п:1055:П +CYRILLIC CAPITAL LETTER ER:р:1056:Р +CYRILLIC CAPITAL LETTER ES:с:1057:С +CYRILLIC CAPITAL LETTER TE:т:1058:Т +CYRILLIC CAPITAL LETTER U:у:1059:У +CYRILLIC CAPITAL LETTER EF:ф:1060:Ф +CYRILLIC CAPITAL LETTER HA:х:1061:Х +CYRILLIC CAPITAL LETTER TSE:ц:1062:Ц +CYRILLIC CAPITAL LETTER CHE:ч:1063:Ч +CYRILLIC CAPITAL LETTER SHA:ш:1064:Ш +CYRILLIC CAPITAL LETTER SHCHA:щ:1065:Щ +CYRILLIC CAPITAL LETTER YERU:ы:1067:Ы +CYRILLIC CAPITAL LETTER SOFT SIGN:ь:1068:Ь +CYRILLIC CAPITAL LETTER E:э:1069:Э +CYRILLIC CAPITAL LETTER YU:ю:1070:Ю +CYRILLIC CAPITAL LETTER YA:я:1071:Я +CYRILLIC CAPITAL LETTER HARD SIGN:ъ:1066:Ъ +LATIN CAPITAL LIGATURE OE:œ:338:Œ +LATIN CAPITAL LETTER Y WITH DIAERESIS:ÿ:376:Ÿ +LATIN CAPITAL LETTER A WITH GRAVE:à:192:À +LATIN CAPITAL LETTER A WITH TILDE:ã:195:à +LATIN CAPITAL LETTER A WITH RING ABOVE:å:197:Å +LATIN CAPITAL LETTER AE:æ:198:Æ +LATIN CAPITAL LETTER E WITH GRAVE:è:200:È +LATIN CAPITAL LETTER E WITH CIRCUMFLEX:ê:202:Ê +LATIN CAPITAL LETTER I WITH GRAVE:ì:204:Ì +LATIN CAPITAL LETTER I WITH DIAERESIS:ï:207:Ï +LATIN CAPITAL LETTER ETH (ICELANDIC):ð:208:Ð +LATIN CAPITAL LETTER N WITH TILDE:ñ:209:Ñ +LATIN CAPITAL LETTER O WITH GRAVE:ò:210:Ò +LATIN CAPITAL LETTER O WITH TILDE:õ:213:Õ +LATIN CAPITAL LETTER O WITH STROKE:ø:216:Ø +LATIN CAPITAL LETTER U WITH GRAVE:ù:217:Ù +LATIN CAPITAL LETTER U WITH CIRCUMFLEX:û:219:Û +LATIN CAPITAL LETTER THORN (ICELANDIC):þ:222:Þ +GREEK CAPITAL LETTER EPSILON WITH TONOS:έ:904:Έ +GREEK CAPITAL LETTER ETA WITH TONOS:ή:905:Ή +GREEK CAPITAL LETTER IOTA WITH TONOS:ί:906:Ί +GREEK CAPITAL LETTER OMICRON WITH TONOS:ό:908:Ό +GREEK CAPITAL LETTER UPSILON WITH TONOS:ύ:910:Ύ +GREEK CAPITAL LETTER OMEGA WITH TONOS:ώ:911:Ώ +GREEK CAPITAL LETTER IOTA WITH DIALYTIKA:Ϊ:938:Ϊ +GREEK CAPITAL LETTER ALPHA:α:913:Α +GREEK CAPITAL LETTER BETA:β:914:Β +GREEK CAPITAL LETTER GAMMA:γ:915:Γ +GREEK CAPITAL LETTER DELTA:δ:916:Δ +GREEK CAPITAL LETTER EPSILON:ε:917:Ε +GREEK CAPITAL LETTER ZETA:ζ:918:Ζ +GREEK CAPITAL LETTER ETA:η:919:Η +GREEK CAPITAL LETTER THETA:θ:920:Θ +GREEK CAPITAL LETTER IOTA:ι:921:Ι +GREEK CAPITAL LETTER KAPPA:κ:922:Κ +GREEK CAPITAL LETTER LAMDA:λ:923:Λ +GREEK CAPITAL LETTER MU:μ:924:Μ +GREEK CAPITAL LETTER NU:ν:925:Ν +GREEK CAPITAL LETTER XI:ξ:926:Ξ +GREEK CAPITAL LETTER OMICRON:ο:927:Ο +GREEK CAPITAL LETTER PI:π:928:Π +GREEK CAPITAL LETTER RHO:ρ:929:Ρ +GREEK CAPITAL LETTER SIGMA:σ:931:Σ +GREEK CAPITAL LETTER TAU:τ:932:Τ +GREEK CAPITAL LETTER UPSILON:υ:933:Υ +GREEK CAPITAL LETTER PHI:φ:934:Φ +GREEK CAPITAL LETTER CHI:χ:935:Χ +GREEK CAPITAL LETTER OMEGA:ω:937:Ω +GREEK CAPITAL LETTER ALPHA WITH TONOS:ά:902:Ά +GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA:ϋ:939:Ϋ +GREEK CAPITAL LETTER PSI:ψ:936:Ψ +GREEK CAPITAL LETTER IOTA WITH DIALYTIKA:ϊ:938:Ϊ +LATIN CAPITAL LETTER G WITH BREVE:ğ:286:Ğ +LATIN CAPITAL LETTER E WITH DOT ABOVE:ė:278:Ė +LATIN CAPITAL LETTER R WITH CEDILLA:ŗ:342:Ŗ +LATIN CAPITAL LETTER I WITH OGONEK:ī:302:Į +LATIN CAPITAL LETTER A WITH MACRON:ā:256:Ā +LATIN CAPITAL LETTER E WITH MACRON:ē:274:Ē +LATIN CAPITAL LETTER G WITH CEDILLA:ģ:290:Ģ +LATIN CAPITAL LETTER K WITH CEDILLA:ķ:310:Ķ +LATIN CAPITAL LETTER I WITH MACRON:ī:298:Ī +LATIN CAPITAL LETTER L WITH CEDILLA:ļ:315:Ļ +LATIN CAPITAL LETTER N WITH CEDILLA:ņ:325:Ņ +LATIN CAPITAL LETTER O WITH MACRON:ō:332:Ō +LATIN CAPITAL LETTER U WITH OGONEK:ų:370:Ų +LATIN CAPITAL LETTER U WITH MACRON:ū:362:Ū + +#unused character maps + + +LATIN CAPITAL LETTER A:'41:65:'61 +LATIN CAPITAL LETTER B:'42:66:'62 +LATIN CAPITAL LETTER C:'43:67:'63 +LATIN CAPITAL LETTER D:'44:68:'64 +LATIN CAPITAL LETTER E:'45:69:'65 +LATIN CAPITAL LETTER F:'46:70:'66 +LATIN CAPITAL LETTER G:'47:71:'67 +LATIN CAPITAL LETTER H:'48:72:'68 +LATIN CAPITAL LETTER I:'49:73:'69 +LATIN CAPITAL LETTER J:'4A:74:'6a +LATIN CAPITAL LETTER K:'4B:75:'6b +LATIN CAPITAL LETTER L:'4C:76:'6c +LATIN CAPITAL LETTER M:'4D:77:'6d +LATIN CAPITAL LETTER N:'4E:78:'6e +LATIN CAPITAL LETTER O:'4F:79:'6f +LATIN CAPITAL LETTER P:'50:80:'70 +LATIN CAPITAL LETTER Q:'51:81:'71 +LATIN CAPITAL LETTER R:'52:82:'72 +LATIN CAPITAL LETTER S:'53:83:'73 +LATIN CAPITAL LETTER T:'54:84:'74 +LATIN CAPITAL LETTER U:'55:85:'75 +LATIN CAPITAL LETTER V:'56:86:'76 +LATIN CAPITAL LETTER W:'57:87:'77 +LATIN CAPITAL LETTER X:'58:88:'78 +LATIN CAPITAL LETTER Y:'59:89:'79 +LATIN CAPITAL LETTER Z:'5A:90:'7a +LATIN CAPITAL LETTER A:A:65:a +LATIN CAPITAL LETTER B:B:66:b +LATIN CAPITAL LETTER C:C:67:c +LATIN CAPITAL LETTER D:D:68:d +LATIN CAPITAL LETTER E:E:69:e +LATIN CAPITAL LETTER F:F:70:f +LATIN CAPITAL LETTER G:G:71:g +LATIN CAPITAL LETTER H:H:72:h +LATIN CAPITAL LETTER I:I:73:i +LATIN CAPITAL LETTER J:J:74:j +LATIN CAPITAL LETTER K:K:75:K +LATIN CAPITAL LETTER L:L:76:l +LATIN CAPITAL LETTER M:M:77:m +LATIN CAPITAL LETTER N:N:78:n +LATIN CAPITAL LETTER O:O:79:o +LATIN CAPITAL LETTER P:P:80:p +LATIN CAPITAL LETTER Q:Q:81:q +LATIN CAPITAL LETTER R:R:82:r +LATIN CAPITAL LETTER S:S:83:s +LATIN CAPITAL LETTER T:T:84:t +LATIN CAPITAL LETTER U:U:85:u +LATIN CAPITAL LETTER V:V:86:v +LATIN CAPITAL LETTER W:W:87:x +LATIN CAPITAL LETTER X:X:88:x +LATIN CAPITAL LETTER Y:Y:89:y +LATIN CAPITAL LETTER Z:Z:90:z +NO UNICODE VALUE:'E7:231:\'87 +NO UNICODE VALUE:'83:131:\'8E +NO UNICODE VALUE:'92:146:\'EA +NO UNICODE VALUE:'EE:238:\'97 +NO UNICODE VALUE:'F2:242:\'9C +NO UNICODE VALUE:'CB:203:\'88 +NO UNICODE VALUE:'E9:233:\'8F +NO UNICODE VALUE:'ED:237:\'93 +NO UNICODE VALUE:'F1:241:\'98 +NO UNICODE VALUE:'F4:244:\'9D +NO UNICODE VALUE:'E5:229:\'89 +NO UNICODE VALUE:'E6:230:\'90 +NO UNICODE VALUE:'EB:235:\'94 +NO UNICODE VALUE:'EF:239:\'99 +NO UNICODE VALUE:'F3:243:\'9E +NO UNICODE VALUE:'AF:175:\'BF +NO UNICODE VALUE:'84:132:\'96 +NO UNICODE VALUE:'CD:205:\'9B +NO UNICODE VALUE:'CC:204:\'8B +NO UNICODE VALUE:'80:128:\'8A +NO UNICODE VALUE:'E8:232:\'91 +NO UNICODE VALUE:'EC:236:\'95 +NO UNICODE VALUE:'85:133:\'9A +NO UNICODE VALUE:'86:134:\'9F +NO UNICODE VALUE:'82:130:\'8D +NO UNICODE VALUE:'81:129:\'8C +NO UNICODE VALUE:'E1:129:\'C1 + + +CANCER:a:9803:♋ +LEO:b:9804:♌ +VIRGO:c:9805:♍ +LIBRA:d:9806:♎ +SCORPIOUS:e:9807:♏ +SAGITARRIUS:f:9808:♐ +CAPRICON:g:9809:♑ +AQUARIUS:h:9810:♒ +PISCES:i:9811:♓ +MY LOOPY ET:j:0: +AMPERSAND:k:38:& +BLACK CIRCLE:l:9679:● +SHADOWED WHITE CIRCLE:m:10061:❍ +BLACK SQUARE:n:9632:■ +WHITE SQUARE:o:9633:□ +WHITE SQUARE:p:9633:□ +LOWER RIGHT SHADOWED SQUARE:q:10065:❑ +UPPER RIGHT SHADOWED WHITE SQUARE:r:10066:❒ +BLACK DIAMOND:s:9670:◆ +BLACK DIAMOND:t:9670:◆ +BLACK DIAMOND:u:9670:◆ +BLACK DIAMOND MINUS WHITE X:v:10070:❖ +BLACK DIAMOND MINUS WHITE X:w:10070:❖ +BALLOT BOX WITH X:x:9746:☒ +MY COMPUTER KEY:y:0: +MY APPLE KEY:z:0: +VICTORY HAND:A:9996:✌ +OKAY HAND:B:0: +MY THUMBS UP:C:0: +MY THUMBS DOWN:D:0: +WHITE LEFT POINTING INDEX:E:9756:☜ +WHITE RIGHT POINTING INDEX:F:9758:☞ +WHITE POINTING UP INDEX:G:9757:☝ +WHITE POINTING DOWN INDEX:H:9759:☟ +MY OPEN HAND:I:0: +WHITE SMILING FACE:J:9786:☺ +MY STRAIGHT FACE:K:0: +WHITE FROWNING FACE:L:9785:☹ +MY BOMB:M:0: +SKULL AND CROSSBONES:N:9760:☠ +MY FLAG:O:0: +MY PENNANT:P:0: +AIRPLANE:Q:9992:✈ +CIRCLED OPEN CENTRE EIGHT POINTED STAR:R:9794:♂ +MY TEARDROP:S:0: +SNOWFLAKE:T:10052:❄ +SHADOWED WHITE LATIN CROSS:U:10014:✞ +SHADOWED WHITE LATIN CROSS:V:10014:✞ +MY CELTIC CROSS:W:0: +MALTESE CORSS:X:10016:✠ +STAR OF DAVID:Y:10017:✡ +STAR AND CRESCENT:Z:9770:☪ +MY FOLDER:0:0: +MY OPEN FOLDER:1:0: +MY DOG-EARED DOCUMENT:2:0: +MY DOCUMENT:3:0: +MY PAGES:4:0: +MY FILE CABINETS:5:0: +MY HOUR GLASS:6:0: +MY KEYBOARD:7:0: +MY MOUSE:8:0: +MY BOTTOM OF MOUSE:9:0: +LOWER RIGHT PENCIL:!:9998:✎ +WRITING HAND:@:9996:✌ +UPPER BLADE SCISSORS:#:9985:✁ +MY GLASSES:$:0: +MY BELL:%:0: +ARIES:^:9800:♈ +MY BOOK:&:0: +ENVELOPE:*:9993:✉ +BLACK TELEPHONE:(:9742:☎ +TELEPHONE LOCATION SIGN:):9990:✆ +MY MAILBOX:-:0: +TAURUS:_:9801:♉ +MY BLACK FLOPPY DISK:=:0: +ENVELOPE:+:9993:✉ +HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT:~:10078:❞ +GEMINI:`:9802:♊ +MY WHITE FLOPPY DISK:<:0: +MY TAPE REEL:>:0: +MY OPEN MAILBOX:.:0: +WRITING HAND:?:9996:✌ +EIGHT PETALLED OUTLINED BLACK FLORETTE:|:10049:❁ +MY OPEN MAILBOX:/:0: +MY COMPUETR:\colon:0: +MY DOWNWARD LEAF:" :0: +MY UPWARD LEAF:" :0: + + +EXCLAMATION MARK:33:33:! +FOR ALL:34:8704:∀ +NUMBER SIGN:35:35:# +THERE EXISTS:36:8707:∃ +PERCENTAGE SIGN:37:37:% +AMPERSAND:38:38:& +CONTAINS AS A MEMBER:39:8715:∋ +LEFT PARENTHESIS:40:40:( +RIGHT PERENTHESIS:41:41:) +ASTERISK OPERATOR:42:8727:∗ +PLUS SIGN:43:43:+ +COMMA:44:44:, +MINUS SIGN:45:8722:− +FULL STOP:46:46:. +DIVISION SLASH:47:8725:∕ +DIGIT ZERO:48:48:0 +DIGIT ONE:49:49:1 +DIGIT TWO:50:50:2 +DIGIT THREE:51:51:3 +DIGIT FOUR:52:52:4 +DIGIT FIVE:53:53:5 +DIGIT SIX:54:54:6 +DIGIT SEVEN:55:55:7 +DIGIT EIGHT:56:56:8 +DIGIT NINE:57:57:9 +RATIO:58:8758:∶ +SEMICOLON:59:59:; +LESS-THAN SIGN:60:60:< +EQUALS SIGN TO:61:61:= +GREATER-THAN SIGN:62:62:> +QUESTION MARK:63:63:? +APPROXTIMATELY EQUAL TO:64:8773:≅ +GREEK CAPITOL LETTER ALPHA:65:913:Α +GREEK CAPAITOL LETTER BETA:66:914:Β +GREEK CAPITOL LETTER CHI:67:935:Χ +GREEK CAPITOL LETTER DELTA:68:916:Δ +GREEK CAPITOL LETTER EPSILON:69:917:Ε +GREEK CAPITOL LETTER PHI:70:934:Φ +GREEK CAPITOL LETTER GAMMA:71:915:Γ +GREEK CAPITOL LETTER ETA:72:919:Η +GREEK CAPITOL LETTER ITOA:73:913:Α +GREEK THETA SYMBOL:74:977:ϑ +GREEK CAPITOL LETTER KAPPA:75:922:Κ +GREEK CAPITOL LETTER LAMBDA:76:923:Λ +GREEK CAPITOL LETTER MU:77:924:Μ +GREEK CAPITOL LETTER NU:78:925:Ν +GREEK CAPITOL LETTER OMICRON:79:927:Ο +GREEK CAPITAL LETTER PI:80:928:Π +GREEK CAPITOL LETTER THETA:81:920:Θ +GREEK CAPITOL LETTER RHO:82:929:Ρ +GREEK CAPITOL LETTER SIGMA:83:931:Σ +GREEK CAPITOL LETTER TAU:84:932:Τ +GREEK CAPITOL LETTER UPSILON:85:933:Υ +GREEK LETTER STIGMA:86:986:Ϛ +GREEK CAPITOL LETTER OMEGA:87:937:Ω +GREEK CAPITOL LETTER XI:88:926:Ξ +GREEK CAPITOL LETTER PSI:89:936:Ψ +GREEK CAPITOL LETTER ZETA:90:918:Ζ +LEFT SQUARE BRACKET:91:91:[ +THEREFORE:92:8756:∴ +RIGHT SQUARE BRACKET:93:93:] +UP TACK:94:8869:⊥ +MODIFIER LETTER LOW MACRON:95:717:ˍ +MODIFIER LETTER MACRON:96:713:ˉ +GREEK SMALL LETTER ALPHA:97:945:α +GREEK SMALL LETTER BETA:98:946:β +GREEK SMALL LETTER CHI:99:967:χ +GREEK SMALL LETTER DELTA:100:948:δ +GREEK SMALL LETTER EPSILON:101:949:ε +GREEK PHI SYMBOL:102:981:ϕ +GREEK MSALL LETTER DELTA:103:947:γ +GREEK SMALL LETTER ETA:104:951:η +GREEK SMALL LETTER IOTA:105:953:ι +GREEK SMALL LETTER PHI:106:966:φ +GREEK SMALL LETTER KAPPA:107:954:κ +GREEK SMALL LETTER LAMDA:108:955:λ +GREEK SMALL LETTER MU:109:956:μ +GREEK SMALL LETTER NU:110:957:ν +GREEK SMALL LETTER OMICRON:111:959:ο +GREEK SMALL LETTER PI:112:960:π +GREEK SMALL LETTER THETA:113:952:θ +GREEK SMALL LETTER RHO:114:961:ρ +GREEK SMALL LETTER SIGMA:115:963:σ +GREEK SMALL LETTER TAU:116:964:τ +GREEK SMALL LETTER UPSILON:117:965:υ +GREEK PI SYMBOL:118:982:ϖ +GREEK SMALL LETTER OMEGA:119:969:ω +GREEK SMALL LETTER XI:120:958:ξ +GREEK SMALL LETTER PHI:121:966:φ +GREEK SMALL LETTER ZETA:122:950:ζ +LEFT CURLY BRACKET:123:123:{ +DIVIDES:124:8739:∣ +RIGHT CURLY BRACKET:125:125:} +TILDE OPERATOR:126:8764:∼ +GREEK UPSILON WITH HOOK SYMBOL:161:978:ϒ +COMBINING ACUTE TONE MARK:162:833:́ +LESS THAN OR EQUAL TO:163:8804:≤ +DIVISION SLASH:164:8725:∕ +INFINITY:165:8734:∞ +SMALL LETTER F:166:15:f +BLACK CLUB SUIT:167:9827:♣ +BLACK DIAMOND SUIT:168:9830:♦ +BLACK HEART SUIT:169:9829:♥ +BLACK SPADE SUIT:170:9824:♠ +LEFT RIGHT ARROW:171:8596:↔ +LEFTWARDS ARROW:172:8592:← +UPWARDS ARROW:173:8593:↑ +RIGHTWARDS ARROW:174:8594:→ +DOWNWARDS ARROW:175:8595:↓ +DEGREE SIGN:176:176:° +PLUS OR MINUS SIGN:177:177:± +DOUBLE ACUTE ACCENT:178:733:˝ +GREATER THAN OR EQUAL TO:179:8805:≥ +MULTIPLICATION SIGN:180:215:× +DON'T KNOW:181:8733:∝ +PARTIAL DIFFERENTIAL:182:8706:∂ +BULLET:183:183:· +DIVISION:184:247:÷ +NOT EQUAL TO:185:8800:≠ +IDENTICAL TO:186:8801:≡ +ALMOST EQUAL TO:187:8776:≈ +MIDLINE HORIZONTAL ELLIPSES:188:8943:⋯ +DIVIDES:189:8739:∣ +BOX DRAWINGS LIGHT HORIZONTAL:190:9472:─ +DOWNWARDS ARROW WITH TIP LEFTWARDS:191:8626:↲ +CIRCLED TIMES:196:8855:⊗ +CIRCLED PLUS:197:8853:⊕ +EMPTY SET:198:8709:∅ +INTERSECTION:199:8745:∩ +UNION:200:8746:∪ +SUPERSET OF:201:8835:⊃ +SUPERSET OF OR EQUAL TO:202:8839:⊇ +NIETHER A SUBSET OR EQUAL TO:203:8836:⊄ +SUBSET OF:204:8834:⊂ +SUBSET OR EQUAL TO:205:8838:⊆ +ELEMENT OF:206:8712:∈ +NOT AN ELEMENT OF:207:8713:∉ +ANGLE:208:8736:∠ +WHITE DOWN POINTING TRIANBLE:209:9661:▽ +REGISTERED SIGN:210:174:® +COPYRIGHT:211:169:© +TRADEMARK SYMBOL:212:8482:™ +NARY OPERATOR:213:8719:∏ +SQUARE ROOT:214:8730:√ +BULLET OPERATOR:215:8729:∙ +NOT SIGN:216:172:¬ +LOGICAL AND:217:8743:∧ +LOGICAL OR:218:8744:∨ +LEFT RIGHT DOUBLE ARROW:219:8660:⇔ +LEFTWARDS DOUBLE ARROW:220:8656:⇐ +UPWARDS DOUBLE ARROW:221:8657:⇑ +RIGHTWARDS DOUBLE ARROW:222:8658:⇒ +DOWNWARDS DOUBLE ARROW:223:8659:⇓ +BETWEEN:224:8812:≬ +MATHMATICAL LEFT ANGELBRACKET:225:10216:⟨ +REGISTERED SIGN:226:174:® +COPYRIGHT:227:169:© +TRADEMARK SYMBOL:228:8482:™ +N-ARY SUMMATION:229:8721:∑ +LARGE LEFT PARENTHESIS PART1:230:0: +LARGE LEFT PARENTHESIS PART2:231:0: +LARGE LEFT PARENTHESIS PART3:232:0: +LARGE LEFT SQUARE BRACKET PART1:233:0: +LARGE LEFT SQUARE BRACKET PART2:234:0: +LARGE LEFT SQUARE BRACKET PART3:235:0: +LARGE LEFT BRACKET PART1:236:0: +LARGE LEFT BRACKET PART2:237:0: +LARGE LEFT BRACKET PART3:238:0: +DIVIDES:239:8739:∣ +MATHMATICAL RIGHT ANGLE BRACKET:241:10217:⟩ +INTEGRAL:242:8747:∫ +LARGE INTEGRAL PART 1:243:0: +LARGE INTEGRAL PART 2:244:0: +LARGE INTEGRAL PART 3:245:0: +LARGE RIGHT PARENTHESIS PART1:246:0: +LARGE RIGHT PARENTHESIS PART2:247:0: +LARGE RIGHT PARENTHESIS PART3:248:0: +LARGE RIGHT SQAURE BRACKET PART1:249:0: +LARGE RIGHT SQUARE BRACKET PART2:250:0: +LARGE RIGHT SQUARE BRACKETPART3:251:0: +LARGE RIGHT BRACKET PART1:252:0: +LARGE RIGHT BRACKETPART2:253:0: +LARGE RIGHT BRACKETPART3:254:0: +DOUBLE ACUTE ACCENT:178:733:˝ + + +EXCLMATION POINT:33:unknown:! +FOR ALL:34:8704:∀ +POUND SIGN:35:unknown:# +THERE EXISTS:36:8707:∃ +PERCENTAGE SIGN:37:unknown:% +AMPERSAND:38:38:& +CONTAINS AS A MEMBER:39:unknown:∋ +LEFT PARENTHESIS:40:unknown:( +RIGHT PERENTHESIS:41:unknown:) +ASTERISK OPERATOR:42:8727:∗ +PLUS:43:unknown:+ +COMMA:44:unknown:, +MINUS SIGN:45:8722:− +PERIOD:46:unknown:. +DIVISION SLASH:47:8725:∕ +ZERO:48:0:0 +ONE:49:1:1 +TWO:50:2:2 +THREE:51:3:3 +FOUR:52:4:4 +FIVE:53:5:5 +SIX:54:6:6 +SEVEN:55:7:7 +EIGHT:56:8:8 +NINE:57:9:9 +RATIO:58:8758:∶ +SEMICOLON:59:unknown:; +LESS THAN:60:unknown:< +EQAULS TO:61:unknown:= +GREATER THAN:62:unknown:> +QUESTION MARK:63:unknown:? +APPROXTIMATELY EQUAL TO:64:8773:≅ +GREEK CAPITOL LETTER ALPHA:65:913:Α +GREEK CAPAITOL LETTER BETA:66:914:Β +GREEK CAPITOL LETTER CHI:67:unknown:Χ +GREEK CAPITOL LETTER DELTA:68:916:Δ +GREEK CAPITOL LETTER EPSILON:69:917:Ε +GREEK CAPITOL LETTER PHI:70:unknown:Φ +GREEK CAPITOL LETTER GAMMA:71:915:Γ +GREEK CAPITOL LETTER ETA:72:919:Η +GREEK CAPITOL LETTER ITOA:73:913:Α +GREEK THETA SYMBOL:74:unknown:ϑ +GREEK CAPITOL LETTER KAPPA:75:unknown:Κ +GREEK CAPITOL LETTER LAMBDA:76:unknown:Λ +GREEK CAPITOL LETTER MU:77:unknown:Μ +GREEK CAPITOL LETTER NU:78:unknown:Ν +GREEK CAPITOL LETTER OMICRON:79:unknown:Ο +GREEK CAPITAL LETTER PI:80:unknown:Π +GREEK CAPITOL LETTER THETA:81:920:Θ +GREEK CAPITOL LETTER RHO:82:unknown:Ρ +GREEK CAPITOL LETTER SIGMA:83:unknown:Σ +GREEK CAPITOL LETTER TAU:84:unknown:Τ +GREEK CAPITOL LETTER UPSILON:85:unknown:Υ +GREEK LETTER STIGMA:86:unknown:Ϛ +GREEK CAPITOL LETTEROMEGA:87:unknown:Ω +GREEK CAPITOL LETTER XI:88:unknown:Ξ +GREEK CAPITOL LETTER PSI:89:unknown:Ψ +GREEK CAPITOL LETTER ZETA:90:918:Ζ +LEFT BRACKET:91:unknown:[ +THEREFORE:92:8756:∴ +LEFT BRACKET:93:unknown:[ +UP TACK:94:unknown:⊥ +MODIFIER LETTER LOW MACRON:95:unknown:ˍ +MODIFIER LETTER MACRON:96:unknown:ˉ +GREEK SMALL LETTER ALPHA:97:unknown:α +GREEK SMALL LETTER BETA:98:unknown:β +GREEK SMALL LETTER CHI:99:unknown:χ +GREEK SMALL LETTER DELTA:100:unknown:δ +GREEK SMALL LETTER EPSILON:101:unknown:ε +GREEK PHI SYMBOL:102:unknown:ϕ +GREEK MSALL LETTER DELTA:103:unknown:γ +GREEK SMALL LETTER ETA:104:unknown:η +GREEK SMALL LETTER IOTA:105:unknown:ι +GREEK SMALL LETTER PHI:106:unknown:φ +GREEK SMALL LETTER KAPPA:107:unknown:κ +GREEK SMALL LETTER LAMDA:108:unknown:λ +GREEK SMALL LETTER MU:109:unknown:μ +GREEK SMALL LETTER NU:110:unknown:ν +GREEK SMALL LETTER OMICRON:111:unknown:ο +GREEK SMALL LETTER PI:112:unknown:π +GREEK SMALL LETTER THETA:113:unknown:θ +GREEK SMALL LETTER RHO:114:unknown:ρ +GREEK SMALL LETTER SIGMA:115:unknown:σ +GREEK SMALL LETTER TAU:116:unknown:τ +GREEK SMALL LETTER UPSILON:117:unknown:υ +GREEK PI SYMBOL:118:unknown:ϖ +GREEK SMALL LETTER OMEGA:119:unknown:ω +GREEK SMALL LETTER XI:120:unknown:ξ +GREEK SMALL LETTER PHI:121:unknown:φ +GREEK SMALL LETTER ZETA:122:unknown:ζ +RIGHT BRACKET:123:unknown:{ +DIVIDES:124:8739:∣ +LEFT BRACKET:125:unknown:} +TILDE OPERATOR:126:unknown:∼ +GREEK UPSILON WITH HOOK SYMBOL:161:unknown:ϒ +COMBINING ACUTE TONE MARK:162:833:́ +LESS THAN OR EQUAL TO:163:8804:≤ +DIVISION SLASH:164:8725:∕ +INFINITY:165:unknown:∞ +SMALL LETTER F:166:unknown:f +BLACK CLUB SUIT:167:9827:♣ +BLACK DIAMOND SUIT:168:9830:♦ +BLACK HEART SUIT:169:9829:♥ +BLACK SPADE SUIT:170:9824:♠ +LEFT RIGHT ARROW:171:8596:↔ +LEFTWARDS ARROW:172:8592:← +UPWARDS ARROW:173:8593:↑ +RIGHTWARDS ARROW:174:8594:→ +DOWNWARDS ARROW:175:8595:↓ +DEGREE SIGN:176:unknown:° +PLUS OR MINUS SIGN:177:unknown:± +DOUBLE ACUTE ACCENT:178:unknown:˝ +GREATER THAN OR EQUAL TO:179:8805:≥ +MULTIPLICATION SIGN:180:unknown:× +DON' T KNOW:181:unknown:∝ +PARTIAL DIFFERENTIAL:182:8706:∂ +BULLET?:183:unknown:· +DIVISION:184:unknown:÷ +NOT EQUAL TO:185:8800:≠ +IDENTICAL TO:186:8801:≡ +ALMOST EQUAL TO:187:8776:≈ +MIDLINE HORIZONTAL ELLIPSES:188:unknown:⋯ +DIVIDES:189:8739:∣ +BOX DRAWINGS LIGHT HORIZONTAL:190:9472:─ +DOWNWARDS ARROW WITH TIP LEFTWARDS:191:unknown:↲ +CIRCLED TIMES:196:8855:⊗ +CIRCLED PLUS:197:8853:⊕ +EMPTY SET:198:8709:∅ +INTERSECTION:199:8745:∩ +UNION:200:unknown:∪ +SUPERSET OF:201:8835:⊃ +SUPERSET OF OR EQUAL TO:202:8839:⊇ +NIETHER A SUBSET OR EQUAL TO:203:8836:⊄ +SUBSET OF:204:8834:⊂ +SUBSET OR EQUAL TO:205:8838:⊆ +ELEMENT OF:206:8712:∈ +NOT AN ELEMENT OF:207:8713:∉ +ANGLE:208:8736:∠ +WHITE DOWN POINTING TRIANBLE:209:unknown:▽ +REGISTERED SIGN:210:unknown:® +COPYRIGHT:211:unknown:© +NARY OPERATOR:213:unknown:∏ +SQUARE ROOT:214:unknown:√ +BULLET OPERATOR:215:8729:∙ +NOT SIGN:216:unknown:¬ +LOGICAL AND:217:8743:∧ +LOGICAL OR:218:8744:∨ +LEFT RIGHT DOUBLE ARROW:219:unknown:⇔ +LEFTWARDS DOUBLE ARROW:220:unknown:⇐ +UPWARDS DOUBLE ARROW:221:unknown:⇑ +RIGHTWARDS DOUBLE ARROW:222:unknown:⇒ +DOWNWARDS DOUBLE ARROW:223:unknown:⇓ +BETWEEN:224:unknown:≬ +MATHMATICAL LEFT ANGELBRACKET:225:unknown:⟨ +REGISTERED SIGN:226:unknown:® +COPYRIGHT:227:unknown:© +N-ARY SUMMATION:229:8721:∑ +LARGE LEFT PARENTHESIS PART1:230:unknown: +LARGE LEFT PARENTHESIS PART2:231:unknown: +LARGE LEFT PARENTHESIS PART3:232:unknown: +LARGE LEFT SQUARE BRACKET PART1:233:unknown: +LARGE LEFT SQUARE BRACKET PART2:234:unknown: +LARGE LEFT SQUARE BRACKET PART3:235:unknown: +LARGE LEFT BRACKET PART1:236:unknown: +LARGE LEFT BRACKET PART2:237:unknown: +LARGE LEFT BRACKET PART3:238:unknown: +DIVIDES:239:8739:∣ +MATHMATICAL RIGHT ANGLE BRACKET:241:unknown:27E9 +INTEGRAL:242:unknown:∫ +LARGE INTEGRAL PART 1:243:unknown: +LARGE INTEGRAL PART 2:244:unknown: +LARGE INTEGRAL PART 3:245:unknown: +LARGE RIGHT PARENTHESIS PART1:246:unknown: +LARGE RIGHT PARENTHESIS PART2:247:unknown: +LARGE RIGHT PARENTHESIS PART3:248:unknown: +LARGE RIGHT SQAURE BRACKET PART1:249:unknown: +LARGE RIGHT SQUARE BRACKET PART2:250:unknown: +LARGE RIGHT SQUARE BRACKETPART3:251:unknown: +LARGE RIGHT BRACKET PART1:252:unknown: +LARGE RIGHT BRACKETPART2:253:unknown: +LARGE RIGHT BRACKETPART3:254:unknown: +DOUBLE ACUTE ACCENT:178:unknown:02DD +TRADEMARK SYMBOL:212:unknown:™ +TRADEMARK SYMBOL:228:unknown:™ + + +GREEK CAPITAL LETTER ALPHA:A:65:Α +GREEK CAPITAL LETTER BETA:B:66:Β +GREEK CAPITAL LETTER CHI:C:67:Χ +GREEK CAPITAL LETTER DELTA:D:68:Δ +GREEK CAPITAL LETTER EPSILON:E:69:Ε +GREEK CAPITAL LETTER PHI:F:70:Φ +GREEK CAPITAL LETTER GAMMA:G:71:Γ +GREEK CAPITAL LETTER ETA:H:72:Η +GREEK CAPITAL LETTER IOTA:I:73:Ι +GREEK THETA SYMBOL:J:74:ϑ +GREEK CAPITAL LETTER KAPPA:K:75:Κ +GREEK CAPITAL LETTER LAMDA:L:76:Λ +GREEK CAPITAL LETTER MU:M:77:Μ +GREEK CAPITAL LETTER NU:N:78:Ν +GREEK CAPITAL LETTER OMICRON:O:79:Ο +GREEK CAPITAL LETTER PI:P:80:Π +GREEK CAPITAL LETTER THETA:T:81:Θ +GREEK CAPITAL LETTER RHO:R:82:Ρ +GREEK CAPITAL LETTER SIGMA:S:83:Σ +GREEK CAPITAL LETTER TAU:T:84:Τ +GREEK CAPITAL LETTER UPSILON:U:85:Υ +GREEK SMALL LETTER FINAL SIGMA:V:86:Ϛ +GREEK CAPITAL LETTER OMEGA:W:87:Ω +GREEK CAPITAL LETTER XI:X:88:Ξ +GREEK CAPITAL LETTER PSI:Y:89:Ψ +GREEK CAPITAL LETTER ZETA:Z:90:Ζ +GREEK SMALL LETTER ALPHA:a:97:α +GREEK SMALL LETTER BETA:b:98:β +GREEK SMALL LETTER CHI:c:99:χ +GREEK SMALL LETTER DELTA:d:100:δ +GREEK SMALL LETTER EPSILON:e:101:ε +GREEK SMALL LETTER PHI:f:102:φ +GREEK SMALL LETTER GAMMA:g:103:γ +GREEK SMALL LETTER ETA:h:104:η +GREEK SMALL LETTER IOTA:i:105:ι +GREEK PHI SYMBOL:j:106:φ +GREEK SMALL LETTER KAPPA:k:107:κ +GREEK SMALL LETTER LAMDA:l:108:λ +GREEK SMALL LETTER MU:m:109:μ +GREEK SMALL LETTER NU:n:110:ν +GREEK SMALL LETTER OMICRON:o:111:ο +GREEK SMALL LETTER PI:p:112:π +GREEK SMALL LETTER THETA:q:113:θ +GREEK SMALL LETTER RHO:r:114:ρ +GREEK SMALL LETTER SIGMA:s:115:σ +GREEK SMALL LETTER TAU:t:116:τ +GREEK SMALL LETTER UPSILON:u:117:υ +GREEK PI SYMBOL:v:118:ϖ +GREEK SMALL LETTER OMEGA:w:119:ω +GREEK SMALL LETTER XI:x:120:ξ +GREEK SMALL LETTER PSI:y:121:ψ +GREEK SMALL LETTER ZETA:z:122:ζ +APROXTIMATELY EQUAL TO:@:unknown:≅ +THERE EXISTS:$:unknown:2203 +UP TACK:^:unknown:⊥ + + +STAR OF DAVID:A:10017:✡ +FOUR TEARDROP-SPOKED ASTERISK:B:10018:✢ +FOUR BALLOON-SPOKED ASTERISK:C:10019:✣ +HEAVY FOUR BALLOON-SPOKED ASTERISK:D:10020:✤ +FOUR CLUB-SPOKED ASTERISK:E:10021:✥ +BLACK FOUR POINTED STAR:F:10022:✦ +WHITE FOUR POINTED STAR:G:10023:✧ +BLACK STAR:H:9989:✅ +STRESS OUTLINED WHITE STAR:I:10025:✩ +CIRCLED WHITE STAR:J:10026:✪ +OPEN CENTRE BLACK STAR:K:10027:✫ +BLACK CENTRE WHITE STAR:L:10028:✬ +OUTLINED BLACK STAR:M:10029:✭ +HEAVY OUTLINED BLACK STAR:N:10030:✮ +PINWHEEL STAR:O:10031:✯ +SHADOWED WHITE STAR:P:10032:✰ +HEAVY ASTERISK:Q:10033:✱ +OPEN CENTRE ASTERISK:R:10034:✲ +EIGHT SPOKED ASTERISK:S:10035:✳ +EIGHT POINTED BLACK STAR:T:10036:✴ +EIGHT POINTED PINWHEEL STAR:U:10037:✵ +SIX POINTED BLACK STAR:V:10038:✶ +EIGHT POINTED RECTILINEAR BLACK STAR:W:10039:✷ +HEAVY EIGHT POINTED RECTILINEAR BLACK STAR:X:10040:✸ +TWELVE POINTED BLACK STAR:Y:10041:✹ +SIXTEEN POINTED ASTERISK:Z:10042:✺ +EIGHT PETALLED OUTLINED BLACK FLORETTE:a:10049:❁ +CIRCLED OPEN CENTRE EIGHT POINTED STAR:b:10050:❂ +HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK:c:10051:❃ +SNOWFLAKE:d:10052:❄ +TIGHT TRIFOLIATE SNOWFLAKE:e:10053:❅ +HEAVY CHEVRON SNOWFLAKE:f:10054:❆ +SPARKLE:g:10055:❇ +HEAVY SPARKLE:h:10056:❈ +BALLOON-SPOKED ASTERISK:i:10057:❉ +TEARDROP-SPOKED ASTERISK:j:10043:✻ +HEAVY TEARDROP-SPOKED ASTERISK:k:10045:✽ +BLACK CIRCLE:l:9679:● +SHADOWED WHITE CIRCLE:m:10061:❍ +BLACK SQUARE:n:9632:■ +LOWER RIGHT DROP-SHADOWED SQUARE:o:10063:❏ +UPPER RIGHT DROP-SHADOWED WHITE SQUARE:p:10064:❐ +LOWER RIGHT SHADOWED SQUARE:q:10065:❑ +UPPER RIGHT SHADOWED WHITE SQUARE:r:10066:❒ +BLACK UP-POINTING TRIANGLE:s:9650:▲ +BLACK DOWN-POINTING TRIANGLE:t:9660:▼ +BLACK DIAMOND:u:9670:◆ +BLACK DIAMOND MINUS WHITE X:v:10070:❖ +RIGHT HALF BLACK CIRCLE:w:9479:┇ +LIGHT VERTICAL BAR:x:10072:❘ +MEDIUM VERTICAL BAR:y:10073:❙ +HEAVY VERTICAL BAR:z:10074:❚ +WHITE NIB:1:10001:✑ +BLACK NIB:2:10002:✒ +CHECKMARK:3:10003:✓ +HEAVY CHECKMARK:4:10004:✔ +MULTIPLICATION X:5:10005:✕ +HEAVY MULTIPLICATION X:6:10006:✖ +BALLOT X:7:10007:✗ +HEAVY BALLOT X:8:10008:✘ +OUTLINED GREEK CROSS:9:10009:✙ +UPPER RIGHT PENCIL:0:10000:✐ +UPPER BLADE SCISSORS:!:9985:✁ +MALTESE CROSS:@:10016:✠ +LOWER BLADE SCISSORS:#:9987:✃ +WHITE SCISSORS:$:9988:✄ +BLACK TELEPHONE:%:9742:☎ +SIX PETALLED BLACK AND WHITE FLORETTE:^:10046:✾ +TELEPHONE LOCATION SIGN:&:9990:✆ +BLACK RIGHT POINTING INDEX:*:9755:☛ +AIRPLANE:(:9992:✈ +ENVELOPE:):9993:✉ +HEAVY GREK CROSS:\colon:10010:✚ +OUTLINED LATIN CROSS:?:10015:✟ +PENCIL:/:9999:✏ +OPEN CENTRE TEARDROP-SPOKED ASTERISK:\\:10044:✼ +WHITE RIGHT POINTING INDEX:+:9758:☞ +WRITING HAND:-:9997:✍ +LATIN CROSS:=:10013:✝ +HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT:~:10078:❞ + + +LEFT DOUBLE QUOTATION MARK:LDBLQUOTE :8220:“ +RIGHT DOUBLE QUOTATION MARK:RDBLQUOTE :8221:” +RIGHT SINGLE QUOTATION MARK:RQUOTE :8217:’ +LEFT SINGLE QUOTATION MARK:LQUOTE :8216:‘ +EM DASH:EMDASH :8212:— +EN DASH:ENDASH :8211:– +MIDDLE DOT:BULLET :183:· +NO-BREAK SPACE:~ :167:§ +HORIZONTAL TABULATION:TAB :9: + + +NULL:\xef:0:&#xnull; + + +""" diff --git a/src/libprs500/ebooks/rtf2xml/check_brackets.py b/src/libprs500/ebooks/rtf2xml/check_brackets.py new file mode 100755 index 0000000000..418469467d --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/check_brackets.py @@ -0,0 +1,61 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +class CheckBrackets: + """Check that brackets match up""" + def __init__(self, bug_handler = None, file=None): + self.__file=file + self.__bug_handler = bug_handler + self.__bracket_count=0 + self.__ob_count = 0 + self.__cb_count = 0 + self.__open_bracket_num = [] + def open_brack(self, line): + num = line[-5:-1] + self.__open_bracket_num.append(num) + self.__bracket_count += 1 + def close_brack(self, line): + num = line[-5:-1] + ##self.__open_bracket_num.append(num) + try: + last_num = self.__open_bracket_num.pop() + except: + return 0 + if num != last_num: + return 0 + self.__bracket_count -= 1 + return 1 + def check_brackets(self): + read_obj = open(self.__file, 'r') + line = 'dummy' + line_count = 0 + while line: + line_count += 1 + line = read_obj.readline() + self.__token_info = line[:16] + if self.__token_info == 'ob value pair to the color dictionary, with the number + as the key, and the hex number as the value. Write an empty tag + with the hex number and number as attributes. Add one to the color + number. Reset the color string to '#' + """ + hex_num = line[-3:-1] + self.__color_string += hex_num + self.__color_dict[self.__color_num] = self.__color_string + self.__write_obj.write( + 'mi%s%s\n' % (self.__color_num, self.__color_string) + ) + self.__color_num += 1 + self.__color_string = '#' + def __in_color_func(self, line): + """ + Requires: + line + Returns: + nothing + Logic: + Check if the end of the color table has been reached. If so, + change the state to after the color table. + Othewise, get a function by passing the self.__token_info to the + state dictionary. + """ + #mi -1: + line = re.sub(self.__line_color_exp, self.__sub_from_line_color, line) + self.__write_obj.write(line) + """ + if num == 0: + hex_num = 'false' + else: + hex_num = self.__color_dict.get(num) + if hex_num == None: + if self.__run_level > 0: + sys.stderr.write( + 'module is colors.py\n' + 'function is self.__after_color_func\n' + 'no value in self.__color_dict for key %s\n' % num + ) + if self.__run_level > 3: + sys.stderr.write( + 'run level is %s\n' + 'Script will now quit\n' + % self.__run_level) + else: + self.__write_obj.write( + 'cw 3: + msg = 'can\'t make integer from string\n' + raise self.__bug_handler, msg + else: + return 'bdr-color_:no-value' + hex_num = self.__figure_num(num) + return_value = 'bdr-color_:%s' % hex_num + return return_value + def __figure_num(self, num): + if num == 0: + hex_num = 'false' + else: + hex_num = self.__color_dict.get(num) + if hex_num == None: + if self.__run_level > 3: + msg = 'no value in self.__color_dict for key %s\n' % num + raise self.__bug_hanlder, msg + if hex_num == None: + hex_num = '0' + return hex_num + def __do_nothing_func(self, line): + """ + Bad RTF will have text in the color table + """ + pass + def convert_colors(self): + """ + Requires: + nothing + Returns: + nothing (changes the original file) + Logic: + Read one line in at a time. Determine what action to take based on + the state. If the state is before the color table, look for the + beginning of the color table. + If the state is in the color table, create the color dictionary + and print out the tags. + If the state if afer the color table, look for lines with color + info, and substitute the number with the hex number. + """ + self.__initiate_values() + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + action = self.__state_dict.get(self.__state) + if action == None: + sys.stderr.write('no no matching state in module fonts.py\n') + sys.stderr.write(self.__state + '\n') + action(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "color.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) diff --git a/src/libprs500/ebooks/rtf2xml/combine_borders.py b/src/libprs500/ebooks/rtf2xml/combine_borders.py new file mode 100755 index 0000000000..b7f4d68ed7 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/combine_borders.py @@ -0,0 +1,92 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import os, tempfile +from libprs500.ebooks.rtf2xml import copy +class CombineBorders: + """Combine borders in RTF tokens to make later processing easier""" + def __init__(self, + in_file , + bug_handler, + copy = None, + run_level = 1, + ): + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__write_to = tempfile.mktemp() + self.__state = 'default' + self.__bord_pos = 'default' + self.__bord_att = [] + def found_bd(self, line): + #cw' % info) + def __empty_func(self, line): + """ + Print out empty tag and newlines when needed. + """ + info = line[17:-1] + self.__write_obj.write( + '<%s/>' % info) + self.__new_line = 0 + if info in self.__block: + self.__write_new_line() + if info in self.__two_new_line: + self.__write_extra_new_line() + def __open_att_func(self, line): + """ + Process lines for open tags that have attributes. + The important infor is between [17:-1]. Take this info and split it + with the delimeter '<'. The first token in this group is the element + name. The rest are attributes, separated fromt their values by '>'. So + read each token one at a time, and split them by '>'. + """ + #mi + info = line[17:-1] + tokens = info.split("<") + element_name = tokens[0] + tokens = tokens[1:] + self.__write_obj.write('<%s' % element_name) + for token in tokens: + groups = token.split('>') + try: + val = groups[0] + att = groups[1] + att = att.replace('"', '"') + att = att.replace("'", '"') + self.__write_obj.write( + ' %s="%s"' % (val, att) + ) + except: + if self.__run_level > 3: + msg = 'index out of range\n' + raise self.__bug_handler, msg + self.__write_obj.write('>') + self.__new_line = 0 + if element_name in self.__block: + self.__write_new_line() + if element_name in self.__two_new_line: + self.__write_extra_new_line() + def __empty_att_func(self, line): + """ + Same as the __open_att_func, except a '/' is placed at the end of the tag. + """ + #mi + info = line[17:-1] + tokens = info.split("<") + element_name = tokens[0] + tokens = tokens[1:] + self.__write_obj.write('<%s' % element_name) + for token in tokens: + groups = token.split('>') + val = groups[0] + att = groups[1] + att = att.replace('"', '"') + att = att.replace("'", '"') + self.__write_obj.write( + ' %s="%s"' % (val, att)) + self.__write_obj.write('/>') + self.__new_line = 0 + if element_name in self.__block: + self.__write_new_line() + if element_name in self.__two_new_line: + self.__write_extra_new_line() + def __close_func(self, line): + """ + Print out the closed tag and new lines, if appropriate. + """ + #mi' % info) + self.__new_line = 0 + if info in self.__block: + self.__write_new_line() + if info in self.__two_new_line: + self.__write_extra_new_line() + def __text_func(self, line): + """ + Simply print out the information between [17:-1] + """ + #tx') + self.__new_line = 0 + self.__write_new_line() + if self.__no_dtd: + pass + elif self.__dtd_path: + self.__write_obj.write( + '' % self.__dtd_path + ) + elif self.__dtd_path == '': + # don't print dtd if further transformations are going to take + # place + pass + else: + self.__write_obj.write( + '' % public_dtd + ) + self.__new_line = 0 + self.__write_new_line() + def convert_to_tags(self): + """ + Read in the file one line at a time. Get the important info, between + [:16]. Check if this info matches a dictionary entry. If it does, call + the appropriate function. + The functions that are called: + a text function for text + an open funciton for open tags + an open with attribute function for tags with attributes + an empty with attribute function for tags that are empty but have + attribtes. + a closed function for closed tags. + an empty tag function. + """ + self.__initiate_values() + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + self.__write_dec() + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + action = self.__state_dict.get(self.__token_info) + if action != None: + action(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "convert_to_tags.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) diff --git a/src/libprs500/ebooks/rtf2xml/copy.py b/src/libprs500/ebooks/rtf2xml/copy.py new file mode 100755 index 0000000000..a4cc3e0bf2 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/copy.py @@ -0,0 +1,88 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import sys, os +class Copy: + """Copy each changed file to a directory for debugging purposes""" + __dir = "" + def __init__(self, bug_handler, file = None, deb_dir = None, ): + self.__file = file + self.__bug_handler = bug_handler + def set_dir(self, deb_dir): + """Set the temporary directory to write files to""" + if deb_dir is None: + message = "No directory has been provided to write to in the copy.py" + raise self.__bug_handler, message + check = os.path.isdir(deb_dir) + if not check: + message = "%(deb_dir)s is not a directory" % vars() + raise self.__bug_handler , message + Copy.__dir = deb_dir + def remove_files(self ): + """Remove files from directory""" + self.__remove_the_files(Copy.__dir) + """ + list_of_files = os.listdir(Copy.__dir) + list_of_files = os.listdir(the_dir) + for file in list_of_files: + rem_file = os.path.join(Copy.__dir,file) + if os.path.isdir(rem_file): + self.remove_files(rem_file) + else: + os.remove(rem_file) + """ + def __remove_the_files(self, the_dir): + """Remove files from directory""" + list_of_files = os.listdir(the_dir) + for file in list_of_files: + rem_file = os.path.join(Copy.__dir,file) + if os.path.isdir(rem_file): + self.__remove_the_files(rem_file) + else: + try: + os.remove(rem_file) + except OSError: + pass + def copy_file(self, file, new_file): + """ + Copy the file to a new name + If the platform is linux, use the faster linux command + of cp. Otherwise, use a safe python method. + """ + write_file = os.path.join(Copy.__dir,new_file) + platform = sys.platform + if platform[:5] == 'linux': + command = 'cp %(file)s %(write_file)s' % vars() + os.system(command) + else: + read_obj = open(file,'r') + write_obj = open(write_file, 'w') + line = "dummy" + while line: + line = read_obj.read(1000) + write_obj.write(line ) + read_obj.close() + write_obj.close() + def rename(self, source, dest): + read_obj = open(source, 'r') + write_obj = open(dest, 'w') + line = 1 + while line: + line = read_obj.readline() + write_obj.write(line) + read_obj.close() + write_obj.close() diff --git a/src/libprs500/ebooks/rtf2xml/correct_unicode.py b/src/libprs500/ebooks/rtf2xml/correct_unicode.py new file mode 100755 index 0000000000..4fcedd825b --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/correct_unicode.py @@ -0,0 +1,94 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import os, re, tempfile +from libprs500.ebooks.rtf2xml import copy +class CorrectUnicode: + """ + corrects sequences such as \u201c\'F0\'BE + Where \'F0\'BE has to be eliminated. + """ + def __init__(self, + in_file, + exception_handler, + bug_handler, + copy = None, + run_level = 1, + ): + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + self.__exception_handler = exception_handler + self.__bug_handler = bug_handler + self.__state = 'outside' + self.__utf_exp = re.compile(r'&#x(.*?);') + def __process_token(self, line): + if self.__state == 'outside': + if line[:5] == 'tx 57343 and dec_num < 63743: + self.__state = 'outside' + else: + self.__write_obj.write(line) + self.__state = 'after' + else: + self.__write_obj.write(line) + self.__state = 'outside' + def correct_unicode(self): + """ + Requires: + nothing + Returns: + nothing (changes the original file) + Logic: + Read one line in at a time. + """ + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + self.__process_token(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "correct_unicode.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) diff --git a/src/libprs500/ebooks/rtf2xml/default_encoding.py b/src/libprs500/ebooks/rtf2xml/default_encoding.py new file mode 100755 index 0000000000..b932b465d0 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/default_encoding.py @@ -0,0 +1,61 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +class DefaultEncoding: + """ + Find the default encoding for the doc + """ + def __init__(self, in_file, bug_handler, run_level = 1,): + """ + Required: + 'file' + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + def find_default_encoding(self): + platform = 'Windows' + default_num = 'not-defined' + code_page = 'ansicpg1252' + read_obj = open(self.__file, 'r') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + if self.__token_info == 'mi 3: + msg = 'flag problem\n' + raise self.__bug_handler, msg + return 1 + elif self.__token_info in self.__allowable : + if self.__ob: + self.__write_obj.write(self.__ob) + self.__ob = 0 + self.__state = 'default' + else: + pass + return 1 + elif self.__token_info == 'cw 5: + msg = 'After an asterisk, and found neither an allowable or non-allowble token\n' + msg += 'token is "%s"\n' % self.__token_info + raise self.__bug_handler + if not self.__ob: + self.__write_cb = 1 + self.__ob = 0 + self.__state = 'delete' + self.__cb_count = 0 + return 0 + def __found_list_func(self, line): + """ + print out control words in this group + """ + self.__state = 'list' + def __list_func(self, line): + """ + Check to see if the group has ended. + Return 1 for all control words. + Return 0 otherwise. + """ + if self.__delete_count == self.__cb_count and self.__token_info ==\ + 'cb 3: + msg = 'no key for "%s" "%s"\n' % (field_name, changed_string) + raise self.__bug_handler, msg + the_list = self.__fall_back_func(field_name, line) + return the_list + return the_list + def __default_inst_func(self, field_name, name, line): + """ + Requires: + field_name -- the first word in the string + name -- the changed name according to the dictionary + line -- the string to be parsed + Returns: + The name of the field. + Logic: + I only need the changed name for the field. + """ + return [None, None, name] + def __fall_back_func(self, field_name, line): + """ + Requires: + field_name -- the first word in the string + name -- the changed name according to the dictionary + line -- the string to be parsed + Returns: + The name of the field. + Logic: + Used for fields not found in dict + """ + the_string = field_name + the_string += 'none' + return [None, None, the_string] + def __equation_func(self, field_name, name, line): + """ + Requried: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Retuns: + The name of the field + Logic: + """ + return [None, None, name] + def __no_switch_func(self, field_name, name, line): + """ + Required: + field_name --the first + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Retuns: + The name of the field + Logic: + """ + return [None, None, name] + def __num_type_and_format_func(self, field_name, name, line): + """ + Required: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Returns: + list of None, None, and part of a tag + Logic: + parse num_type + parse num_format + """ + the_string = name + num_format = self.__parse_num_format(line) + if num_format: + the_string += '%s' % num_format + num_type = self.__parse_num_type(line) + if num_type: + the_string += '%s' % num_type + # Only QUOTE takes a (mandatory?) argument + if field_name == 'QUOTE': + match_group = re.search(r'QUOTE\s{1,}"(.*?)"', line) + if match_group: + arg = match_group.group(1) + the_string += '%s' % arg + return [None, None, the_string] + def __num_format_func(self, field_name, name, line): + """ + Required: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Returns: + list of None, None, and part of a tag + Logic: + """ + the_string = name + num_format = self.__parse_num_format(line) + if num_format: + the_string += '%s' % num_format + return [None, None, the_string] + def __parse_num_format(self, the_string): + """ + Required: + the_string -- the string to parse + Returns: + a string if the_string contains number formatting information + None, otherwise + Logic: + """ + match_group = re.search(self.__date_exp, the_string) + if match_group: + return match_group(1) + def __parse_num_type(self, the_string): + """ + Required: + the_string -- the string to parse + Returns: + a string if the_string contains number type information + None, otherwise + Logic: + the_string might look like: + USERNAME \\* Arabic \\* MERGEFORMAT + Get the \\* Upper part. Use a dictionary to convert the "Arabic" to + a more-readable word for the value of the key "number-type". + ( + """ + match_group = re.search(self.__num_type_exp, the_string) + if match_group: + name = match_group.group(1) + changed_name = self.__number_dict.get(name) + if changed_name: + return changed_name + else: + sys.stderr.write('module is fields_string\n') + sys.stderr.write('method is __parse_num_type\n') + sys.stderr.write('no dictionary entry for %s\n' % name) + def __date_func(self, field_name, name, line): + """ + Required: + field_name --the fist + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Returns: + list of None, None, and part of a tag + Logic: + """ + the_string = name + match_group = re.search(self.__date_exp, line) + if match_group: + the_string += '%s' % match_group.group(1) + return [None, None, the_string] + def __simple_info_func(self, field_name, name, line): + """ + Requried: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Retuns: + The name of the field + Logic: + These fields can only have the following switches: + 1. Upper + 2. Lower + 3. FirstCap + 4. Caps + """ + the_string = name + match_group = re.search(self.__format_text_exp, line) + if match_group: + name = match_group.group(1) + changed_name = self.__text_format_dict.get(name) + if changed_name: + the_string += '%s' % changed_name + else: + sys.stderr.write('module is fields_string\n') + sys.stderr.write('method is __parse_num_type\n') + sys.stderr.write('no dictionary entry for %s\n' % name) + return [None, None, the_string] + def __hyperlink_func(self, field_name, name, line): + """ + Requried: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Retuns: + The name of the field + Logic: + self.__link_switch = re.compile(r'\\l\s{1,}(.*?)\s') + """ + self.__link_switch = re.compile(r'\\l\s{1,}(.*?)\s') + the_string = name + match_group = re.search(self.__link_switch, line) + if match_group: + link = match_group.group(1) + link = link.replace('"', """) + the_string += '%s' % link + # \l "txt" "link" + # want "file name" so must get rid of \c "txt" + line = re.sub(self.__link_switch, '', line) + match_group = re.search(self.__quote_exp, line) + if match_group: + arg = match_group.group(1) + the_string += '%s' % arg + else: + pass + index = line.find('\\m') + if index > -1: + the_string += 'true' + index = line.find('\\n') + if index > -1: + the_string += 'true' + index = line.find('\\h') + if index > -1: + the_string += 'true' + return [None, None, the_string] + def __include_text_func(self, field_name, name, line): + """ + Requried: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Retuns: + The name of the field + Logic: + """ + the_string = name + match_group = re.search(self.__format_text_exp, line) + if match_group: + name = match_group.group(1) + changed_name = self.__text_format_dict.get(name) + if changed_name: + the_string += '%s' % changed_name + else: + sys.stderr.write('module is fields_string\n') + sys.stderr.write('method is __parse_num_type\n') + sys.stderr.write('no dictionary entry for %s\n' % name) + match_group = re.search(self.__filter_switch, line) + if match_group: + arg = match_group.group(1) + the_string += '%s' % arg + # \c "txt" "file name" + # want "file name" so must get rid of \c "txt" + line = re.sub(self.__filter_switch, '', line) + match_group = re.search(self.__quote_exp, line) + if match_group: + arg = match_group.group(1) + arg = arg.replace('"', """) + the_string += '%s' % arg + else: + sys.stderr.write('Module is field_strings\n') + sys.stderr.write('method is include_text_func\n') + sys.stderr.write('no argument for include text\n') + index = line.find('\\!') + if index > -1: + the_string += 'true' + return [None, None, the_string] + def __include_pict_func(self, field_name, name, line): + """ + Requried: + field_name -- the first word in the string + name --the changed name according to the dictionary + line -- the string to be parse + Retuns: + The name of the field + Logic: + """ + the_string = name + match_group = re.search(self.__filter_switch, line) + if match_group: + arg = match_group.group(1) + arg = arg.replace('"', """) + the_string += '%s' % arg + # \c "txt" "file name" + # want "file name" so must get rid of \c "txt" + line = re.sub(self.__filter_switch, '', line) + match_group = re.search(self.__quote_exp, line) + if match_group: + arg = match_group.group(1) + the_string += '%s' % arg + else: + sys.stderr.write('Module is field_strings\n') + sys.stderr.write('method is include_pict_func\n') + sys.stderr.write('no argument for include pict\n') + index = line.find('\\d') + if index > -1: + the_string += 'true' + return [None, None, the_string] + def __ref_func(self, field_name, name, line): + """ + Requires: + field_name -- the first word in the string + name -- the changed name according to the dictionary + line -- the string to be parsed + Returns: + The name of the field. + Logic: + A page reference field looks like this: + PAGEREF _Toc440880424 \\h + I want to extract the second line of info, which is used as an + achor in the resulting XML file. + """ + the_string = name + match_group = re.search(self.__format_text_exp, line) + if match_group: + name = match_group.group(1) + changed_name = self.__text_format_dict.get(name) + if changed_name: + the_string += '%s' % changed_name + else: + sys.stderr.write('module is fields_string\n') + sys.stderr.write('method is __parse_num_type\n') + sys.stderr.write('no dictionary entry for %s\n' % name) + line = re.sub(self.__merge_format_exp, '', line) + words = line.split() + words = words[1:] # get rid of field name + for word in words: + if word[0:1] != '\\': + the_string += '%s' % word + index = line.find('\\f') + if index > -1: + the_string += 'true' + index = line.find('\\h') + if index > -1: + the_string += 'true' + index = line.find('\\n') + if index > -1: + the_string += 'true' + index = line.find('\\r') + if index > -1: + the_string += 'true' + index = line.find('\\p') + if index > -1: + the_string += 'true' + index = line.find('\\t') + if index > -1: + the_string += 'true' + index = line.find('\\w') + if index > -1: + the_string += 'true' + return [None, None, the_string] + def __toc_table_func(self, field_name, name, line): + """ + Requires: + field_name -- the name of the first word in the string + name --the changed name, according to the dictionary. + line --the string to be parsed. + Returns: + A string for a TOC table field. + Logic: + If the string contains Figure, it is a table of figures. + Otherwise, it is a plain old table of contents. + """ + the_string = name + index = line.find('\\c "Figure"') + if index > -1: + the_string = the_string.replace('table-of-contents', 'table-of-figures') + # don't really need the first value in this list, I don't believe + return [name, None, the_string] + def __sequence_func(self, field_name, name, line): + """ + Requires: + field_name --the name of the first word in the string. + name --the changed name according to the dictionary. + line -- the string to parse. + Returns: + A string with a a value for the type and label attributes + Logic: + The type of sequence--whether figure, graph, my-name, or + whatever--is represented by the second word in the string. Extract + and return. + SEQ Figure \\* ARABIC + """ + fields = line.split() + label = fields[1] + my_string = '%s + The simple field in the above example conatins no paragraph or sections breaks. + This line of RTF: + {{\field{\*\fldinst SYMBOL 97 \\f "Symbol" \\s 12}{\fldrslt\f3\fs24}}} + Becomes: + Χ + The RTF in the example above should be represented as UTF-8 rather than a field. + This RTF: + {\field\fldedit{\*\fldinst { TOC \\o "1-3" }}{\fldrslt {\lang1024 + Heading one\tab }{\field{\*\fldinst {\lang1024 PAGEREF _Toc440880424 + \\h }{\lang1024 {\*\datafield + {\lang1024 1}}}{\lang1024 \par }\pard\plain + \s18\li240\widctlpar\tqr\tldot\tx8630\aspalpha\aspnum\faauto\adjustright\rin0\lin240\itap0 + \f4\lang1033\cgrid {\lang1024 Heading 2\tab }{\field{\*\fldinst + {\lang1024 PAGEREF _Toc440880425 \\h }{\lang1024 {\*\datafield + {\lang1024 1}}}{\lang1024 \par }\pard\plain + \widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 + \f4\lang1033\cgrid }}\pard\plain + \widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 + \f4\lang1033\cgrid {\fs28 \u214\'85 \par }{\fs36 {\field{\*\fldinst + SYMBOL 67 \\f "Symbol" \\s 18}{\fldrslt\f3\fs36}}} + Becomes: + + + Heading one 1 + + + Heading 2 1 + + + """ + def __init__(self, + in_file, + bug_handler, + copy = None, + run_level = 1, + ): + """ + Required: + 'file'--file to parse + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + def __initiate_values(self): + """ + Initiate all values. + """ + self.__text_string = '' + self.__field_instruction_string = '' + self.__marker = 'mi, + since this field is really just UTF-8. + If the field contains paragraph or section breaks, it is a + field-block rather than just a field. + Write the paragraph or section markers for later parsing of the + file. + If the filed list contains more strings, add the latest + (processed) string to the last string in the list. Otherwise, + write the string to the output file. + """ + last_bracket = self.__field_count.pop() + instruction = self.__field_instruction.pop() + inner_field_string = self.__field_string.pop() + sec_in_field = self.__sec_in_field.pop() + par_in_field = self.__par_in_field.pop() + # add a closing bracket, since the closing bracket is not included in + # the field string + if self.__symbol: + inner_field_string = '%scb%s\n%s'\ + 'mi%s\n%s'\ + 'mi%s' + '%snone\n' % (type, my_string)) + return my_changed_string + def __found_toc_index_func(self, line, tag): + """ + Requires: + line --the line to parse + Returns: + nothing + Logic: + This function is called when a toc or index entry is found. The opening + bracket count is stored in the beginning bracket count. The state + is changed to 'toc_index.' + """ + self.__beg_bracket_count = self.__ob_count + self.__cb_count = 0 + self.__state = 'toc_index' + self.__tag = tag + def __toc_index_func(self, line): + """ + Requires: + line --the line to parse + Returns: + nothing + Logic: + This function handles all lines within a toc or index entry. It + adds each line to a string until the end of the entry is found. It + processes the string with the fields_string module, and + prints out the result. + """ + if self.__beg_bracket_count == self.__cb_count: + self.__state = 'body' + type = self.__tag + if type == 'index': + my_string = self.__parse_index_func( + self.__text_string) + elif type == 'toc': + my_string = self.__parse_toc_func( + self.__text_string) + self.__write_obj.write(self.__marker) + self.__write_obj.write(my_string) + self.__text_string = '' + self.__write_obj.write(line) + else: + self.__text_string += line + def fix_fields(self): + """ + Requires: + nothing + Returns: + nothing (changes the original file) + Logic: + Read one line in at a time. Determine what action to take based on + the state. If the state is before the body, look for the + beginning of the body. + The other two states are toc_index (for toc and index entries) and + bookmark. + """ + self.__initiate_values() + read_obj = open(self.__file) + self.__write_obj = open(self.__write_to, 'w') + line_to_read = '1' + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + if self.__token_info == 'ob value pair to the font-table + dictionary. Also create an empty tag with the name and number + as attributes. + Preamture end of font table + """ + #cw%s%s\n' % (self.__text_line, self.__font_num) + ) + elif self.__token_info == 'cwTimes0\n' ) + def __after_font_table_func(self, line): + """ + Required: + line + Returns: + nothing + Logic: + Check the self.__token_info. If this matches a token with font + info, then extract the number from the line, and look up the font + name in the font dictionary. If no name exists for that number, + print out an error. Otherwise print out the same line, except with + the name rather than the number. + If the line does not contain font info, simply print it out to the + file. + """ + if self.__token_info == 'cw 3: + msg = 'no value for %s in self.__font_table\n' % font_num + raise self.__bug_handler, msg + else: + # self.__special_font_dict + if font_name in self.__special_font_list: + self.__special_font_dict[font_name] = 1 + self.__write_obj.write( + 'cwendnote%s\n' % self.__footnote_count) + else: + self.__write_to_foot_obj.write( + 'mi%s\n' % self.__footnote_count) + self.__first_line = 0 + def __in_footnote_func(self, line): + """Handle all tokens that are part of footnote""" + if self.__first_line: + self.__first_line_func(line) + if self.__token_info == 'cw 1: + sys.stderr.write( + 'You can only convert one file at a time.\n') + return_options['valid'] = 0 + else: + return_options['in-file'] = arguments[0] + # check for out file + smart_output = return_options.get('smart-output') + if smart_output == 'false': + smart_output = 0 + if smart_output and not return_options['out-file']: + in_file = return_options['in-file'] + the_file_name, ext = os.path.splitext(in_file) + if ext != '.rtf': + sys.stderr.write( + 'Sorry, but this file does not have an "rtf" extension, so \n' + 'the script will not attempt to convert it.\n' + 'If it is in fact an rtf file, use the "-o" option.\n' + ) + return_options['valid'] = 0 + else: + return_options['out-file'] = '%s.xml' % the_file_name + if not smart_output and not return_options['out-file']: + """ + sys.stderr.write( + 'Please provide and file to outut with the -o option.\n' + 'Or set \'\'.\n' + 'in the configuration file.\n' + ) + return_options['valid'] = 0 + """ + pass + if 'indent' in the_keys: + try: + value = int(options['indent']) + return_options['indent'] = value + except ValueError: + sys.stderr.write('--indent must take an integer') + return_options['valid'] = 0 + # check for format and pyxml + """ + the_format = return_options.get('format') + if the_format != 'raw': + no_pyxml = return_options.get('no-pyxml') + if no_pyxml: + sys.stderr.write('You want to convert your file to "%s".\n' + 'Sorry, but you must have pyxml installed\n' + 'in order to convert your document to anything but raw XML.\n' + 'Please do not use the --format option.\n\n' + % the_format + ) + return_options['valid'] = 0 + xslt_proc = return_options.get('xslt-processor') + if xslt_proc == None and not no_pyxml: + sys.stderr.write('You want to convert your file to "%s".\n' + 'Sorry, but you must have an xslt processor set up\n' + 'in order to conevert your document to anything but raw XML.\n' + 'Please use --format raw.\n\n' + % the_format + ) + return_options['valid'] = 0 + """ + return return_options + def __get_config_options(self): + configure_obj = configure_txt.Configure( + bug_handler = self.__bug_handler, + configuration_file = self.__configuration_file) + options_dict = configure_obj.get_configuration(type = 'normal') + if options_dict == 1: + sys.exit(1) + options_dict['valid'] = 1 + convert_caps = options_dict.get('convert-caps') + if convert_caps == 'false': + options_dict['convert-caps'] = 0 + convert_symbol = options_dict.get('convert-symbol') + if convert_symbol == 'false': + options_dict['convert-symbol'] = 0 + convert_wingdings = options_dict.get('convert-wingdings') + if convert_wingdings == 'false': + options_dict['convert-wingdings'] = 0 + convert_zapf = options_dict.get('convert-zapf-dingbats') + if convert_zapf == 'false': + options_dict['convert-zapf'] = 0 + elif convert_zapf == 'true': + options_dict['convert-zapf'] = 1 + headings_to_sections = options_dict.get('headings-to-sections') + if headings_to_sections == 'true': + options_dict['headings-to-sections'] = 1 + elif headings_to_sections == '1': + options_dict['headings-to-sections'] = 1 + elif headings_to_sections == 'false': + options_dict['headings-to-sections'] = 0 + elif headings_to_sections == '0': + options_dict['headings-to-sections'] = 0 + else: + options_dict['headings-to-sections'] = 0 + write_empty_paragraphs = options_dict.get('write-empty-paragraphs') + if write_empty_paragraphs == 'true': + options_dict['empty-paragraphs'] = 1 + elif write_empty_paragraphs == '1': + options_dict['empty-paragraphs'] = 1 + elif write_empty_paragraphs == 'false': + options_dict['empty-paragraphs'] = 0 + elif write_empty_paragraphs == '0': + options_dict['empty-paragraphs'] = 0 + else: + options_dict['empty-paragraphs'] = 1 + form_lists = options_dict.get('lists') + if form_lists == 'true' or form_lists == '1': + options_dict['form-lists'] = 1 + elif form_lists == 'false' or form_lists == '0': + options_dict['form-lists'] = 0 + else: + options_dict['form-lists'] = 0 + group_styles = options_dict.get('group-styles') + if group_styles == 'true' or group_styles == '1': + options_dict['group-styles'] = 1 + elif group_styles == 'false' or group_styles == '0': + options_dict['group-styles'] = 0 + else: + options_dict['group-styles'] = 0 + group_borders = options_dict.get('group-borders') + if group_borders == 'true' or group_borders == '1': + options_dict['group-borders'] = 1 + elif group_borders == 'false' or group_borders == '0': + options_dict['group-borders'] = 0 + else: + options_dict['group-borders'] = 0 + return options_dict diff --git a/src/libprs500/ebooks/rtf2xml/group_borders.py b/src/libprs500/ebooks/rtf2xml/group_borders.py new file mode 100755 index 0000000000..a3df5b8427 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/group_borders.py @@ -0,0 +1,292 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import sys, os, tempfile, re +from libprs500.ebooks.rtf2xml import copy +class GroupBorders: + """ + Form lists. + Use RTF's own formatting to determine if a paragraph definition is part of a + list. + Use indents to determine items and how lists are nested. + """ + def __init__(self, + in_file, + bug_handler, + copy = None, + run_level = 1, + wrap = 0, + ): + """ + Required: + 'file' + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + self.__wrap = wrap + def __initiate_values(self): + """ + Required: + Nothing + Return: + Nothing + Logic: + The self.__end_list is a list of tokens that will force a list to end. + Likewise, the self.__end_lines is a list of lines that forces a list to end. + """ + self.__state = "default" + self.__left_indent = 0 + self.__border_num = 0 + self.__list_type = 'not-defined' + self.__pard_def = "" + self.__all_lists = [] + self.__list_chunk = '' + self.__state_dict={ + 'default' : self.__default_func, + 'in_pard' : self.__in_pard_func, + 'after_pard' : self.__after_pard_func, + } + # section end + self.__end_list = [ + # section end + 'miNormal< + self.__name_regex = re.compile(r'([^<]+)') + self.__border_regex = re.compile(r'border-paragraph') + self.__found_appt = 0 + self.__line_num = 0 + self.__border_regex = re.compile(r'( 2: + msg = 'wrong flag' + raise self.__bug_handler, msg + elif self.__token_info in self.__end_list: + self.__write_obj.write('mi -1: + return 1 + return 0 + def __parse_pard_with_border(self, line): + border_string = '' + pard_string = '' + tokens = re.split(self.__border_regex, line) + for token in tokens: + if token[0:17] == '') + self.__found_appt = 0 + self.__line_num = 0 + def __in_pard_func(self, line): + """ + Required: + line -- the line of current text. + Return: + Nothing + Logic: + You are in a list, but in the middle of a paragraph definition. + Don't do anything until you find the end of the paragraph definition. + """ + if self.__token_info == 'mi 2: + msg = 'wrong flag' + raise self.__bug_handler, msg + elif self.__token_info in self.__end_list: + self.__write_obj.write('mi%s\n' % name) + self.__write_obj.write('mi%s\n' % (type) + ) + else: + sys.stderr.write('module is header\n') + sys.stderr.write('method is __found_header\n') + sys.stderr.write('no dict entry\n') + sys.stderr.write('line is %s' % line) + self.__write_to_head_obj.write( + 'minone\n' + ) + def __default_sep(self, line): + """Handle all tokens that are not header tokens""" + if self.__token_info[3:5] == 'hf': + self.__found_header(line) + self.__write_obj.write(line) + def __initiate_sep_values(self): + """ + initiate counters for separate_footnotes method. + """ + self.__bracket_count=0 + self.__ob_count = 0 + self.__cb_count = 0 + self.__header_bracket_count = 0 + self.__in_header = 0 + self.__header_count = 0 + self.__head_dict = { + 'head-left_' : ('header-left'), + 'head-right' : ('header-right'), + 'foot-left_' : ('footer-left'), + 'foot-right' : ('footer-right'), + 'head-first' : ('header-first' ), + 'foot-first' : ('footer-first' ), + 'header____' : ('header' ), + 'footer____' : ('footer' ), + } + def separate_headers(self): + """ + Separate all the footnotes in an RTF file and put them at the bottom, + where they are easier to process. Each time a footnote is found, + print all of its contents to a temporary file. Close both the main and + temporary file. Print the footnotes from the temporary file to the + bottom of the main file. + """ + self.__initiate_sep_values() + read_obj = open(self.__file) + self.__write_obj = open(self.__write_to, 'w') + self.__header_holder = tempfile.mktemp() + self.__write_to_head_obj = open(self.__header_holder, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + # keep track of opening and closing brackets + if self.__token_info == 'ob(\d+)') + def __close_lists(self): + """ + Required: + Nothing + Return: + Nothing + Logic: + Reverse the list of dictionaries. Iterate through the list and + get the indent for each list. If the current indent is less than + or equal to the indent in the dictionary, close that level. + Keep track of how many levels you close. Reduce the list by that + many levels. + Reverse the list again. + """ + current_indent = self.__left_indent + self.__all_lists.reverse() + num_levels_closed = 0 + for the_dict in self.__all_lists: + list_indent = the_dict.get('left-indent') + if current_indent <= list_indent: + self.__write_end_item() + self.__write_end_list() + num_levels_closed += 1 + self.__all_lists = self.__all_lists[num_levels_closed:] + self.__all_lists.reverse() + def __close_sections(self, current_level): + self.__all_sections.reverse() + num_levels_closed = 0 + for level in self.__all_sections: + if current_level <= level: + self.__write_end_section() + num_levels_closed += 1 + self.__all_sections = self.__all_sections[num_levels_closed:] + self.__all_sections.reverse() + def __write_start_section(self, current_level, name): + section_num = '' + for the_num in self.__section_num: + section_num += '%s.' % the_num + section_num = section_num[:-1] + num_in_level = len(self.__all_sections) + num_in_level = self.__section_num[num_in_level] + level = len(self.__all_sections) + self.__write_obj.write( + 'mi%s%s%s' + '%s\n' + % (section_num, num_in_level, level, name) + ) + def __write_end_section(self): + self.__write_obj.write('mi 10: + self.__write_obj.write('mi%snot-in-table\n' % + hex_num) + if self.__run_level > 4: + # msg = 'no dictionary entry for %s\n' + # msg += 'the hexidecimal num is "%s"\n' % (hex_num) + # msg += 'dictionary is %s\n' % self.__current_dict_name + msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token + raise self.__bug_handler, msg + def __found_body_func(self, line): + self.__state = 'body' + self.__write_obj.write(line) + def __body_func(self, line): + """ + When parsing preamble + """ + self.__write_obj.write(line) + def __preamble_func(self, line): + action = self.__preamble_state_dict.get(self.__token_info) + if action != None: + action(line) + else: + self.__write_obj.write(line) + def __convert_preamble(self): + self.__state = 'preamble' + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + action = self.__preamble_state_dict.get(self.__state) + if action == None: + sys.stderr.write('error no state found in hex_2_utf8', + self.__state + ) + action(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) + def __preamble_for_body_func(self, line): + """ + Required: + line -- line to parse + Returns: + nothing + Logic: + Used when parsing the body. + """ + if self.__token_info == 'mi 1: + self.__font_list.pop() + else: + sys.stderr.write('module is hex_2_utf8\n') + sys.stderr.write('method is end_font_func\n') + sys.stderr.write('self.__font_list should be greater than one?\n') + face = self.__font_list[-1] + if face == 'Symbol' and self.__convert_symbol: + self.__current_dict_name = 'Symbol' + self.__current_dict = self.__symbol_dict + elif face == 'Wingdings' and self.__convert_wingdings: + self.__current_dict_name = 'Wingdings' + self.__current_dict = self.__wingdings_dict + elif face == 'Zapf Dingbats' and self.__convert_zapf: + self.__current_dict_name = 'Zapf Dingbats' + self.__current_dict = self.__dingbats_dict + else: + self.__current_dict_name = 'default' + self.__current_dict = self.__def_dict + def __start_special_font_func_old(self, line): + """ + Required: + line -- line + Returns; + nothing + Logic: + change the dictionary to use in conversion + """ + # for error checking + if self.__token_info == 'mi 1: + self.__caps_list.pop() + else: + sys.stderr.write('Module is hex_2_utf8\n') + sys.stderr.write('method is __end_caps_func\n') + sys.stderr.write('caps list should be more than one?\n') + def __text_func(self, line): + """ + Required: + line -- line to parse + Returns: + nothing + Logic: + if in caps, convert. Otherwise, print out. + """ + text = line[17:-1] + if self.__current_dict_name == 'Symbol'\ + or self.__current_dict_name == 'Wingdings'\ + or self.__current_dict_name == 'Zapf Dingbats': + the_string = '' + for letter in text: + hex_num = hex(ord(letter)) + hex_num = str(hex_num) + hex_num = hex_num.upper() + hex_num = hex_num[2:] + hex_num = '\'%s' % hex_num + converted = self.__current_dict.get(hex_num) + if converted == None: + sys.stderr.write('module is hex_2_ut8\n') + sys.stderr.write('method is __text_func\n') + sys.stderr.write('no hex value for "%s"\n' % hex_num) + else: + the_string += converted + self.__write_obj.write('txа|б)') +line2 = re.sub(reg_exp, my_sub_func, line) +print line2 +""" diff --git a/src/libprs500/ebooks/rtf2xml/info.py b/src/libprs500/ebooks/rtf2xml/info.py new file mode 100755 index 0000000000..e9bd12d568 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/info.py @@ -0,0 +1,255 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import sys, os, tempfile +from libprs500.ebooks.rtf2xml import copy +class Info: + """ + Make tags for document-information + """ + def __init__(self, + in_file, + bug_handler, + copy = None, + run_level = 1, + ): + """ + Required: + 'file'--file to parse + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + def __initiate_values(self): + """ + Initiate all values. + """ + self.__text_string = '' + self.__state = 'before_info_table' + self.__state_dict = { + 'before_info_table': self.__before_info_table_func, + 'after_info_table': self.__after_info_table_func, + 'in_info_table' : self.__in_info_table_func, + 'collect_text' : self.__collect_text_func, + 'collect_tokens' : self.__collect_tokens_func, + } + self.__info_table_dict = { + 'cw33\n + def __collect_tokens_func(self, line): + """ + Requires: + line -- line to parse + Returns: + nothing + Logic: + This function collects all the token information and adds it to + the text string until the end of the field is found. + First check of the end of the information field. If found, write + the text string to the file. + If not found, get the relevant information from the text string. + This information cannot be directly added to the text string, + because it exists in abbreviated form. (num-of-wor) + I want to check this information in a dictionary to convert it + to a longer, readable form. If the key does not exist in the + dictionary, print out an error message. Otherise add the value + to the text string. + (num-of-wor => number-of-words) + """ + #cw 3: + msg = 'no dictionary match for %s\n' % att + raise self.__bug_handler, msg + else: + self.__text_string += '<%s>%s' % (att_changed, value) + def __single_field_func(self, line, tag): + value = line[20:-1] + self.__write_obj.write( + 'mi%s\n' % (tag, tag, value) + ) + def __after_info_table_func(self, line): + """ + Requires: + line --line to write to file + Returns: + nothing + Logic: + After the end of the information table, simple write the line to + the file. + """ + self.__write_obj.write(line) + def fix_info(self): + """ + Requires: + nothing + Returns: + nothing (changes the original file) + Logic: + Read one line in at a time. Determine what action to take based on + the state. If the state is before the information table, look for the + beginning of the style table. + If the state is in the information table, use other methods to + parse the information + style table, look for lines with style info, and substitute the + number with the name of the style. If the state if afer the + information table, simply write the line to the output file. + """ + self.__initiate_values() + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + action = self.__state_dict.get(self.__state) + if action == None: + sys.stderr.write('no no matching state in module styles.py\n') + sys.stderr.write(self.__state + '\n') + action(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "info.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) diff --git a/src/libprs500/ebooks/rtf2xml/inline.py b/src/libprs500/ebooks/rtf2xml/inline.py new file mode 100755 index 0000000000..8ffdbc76bc --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/inline.py @@ -0,0 +1,411 @@ +import sys, os, tempfile +from libprs500.ebooks.rtf2xml import copy +""" +States. +1. default + 1. an open bracket ends this state. + 2. Text print out text. Print out any groups_in_waiting. + 3. closed bracket. Close groups +2. after an open bracket + 1. The lack of a control word ends this state. + 2. paragraph end -- close out all tags + 3. footnote beg -- close out all tags +""" +class Inline: + """ + Make inline tags within lists. + Logic: + """ + def __init__(self, + in_file, + bug_handler, + copy=None, + run_level = 1,): + """ + Required: + 'file'--file to parse + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + def __initiate_values(self): + """ + Initiate all values. + """ + self.__state_dict = { + 'default': self.__default_func, + 'after_open_bracket': self.__after_open_bracket_func, + } + self.__default_dict = { + 'ob ci + 'annotation' : 'annotation', + 'blue______' : 'blue', + 'bold______' : 'bold', + 'caps______' : 'caps', + 'char-style' : 'character-style', + 'dbl-strike' : 'double-strike-through', + 'emboss____' : 'emboss', + 'engrave___' : 'engrave', + 'font-color' : 'font-color', + 'font-down_' : 'subscript', + 'font-size_' : 'font-size', + 'font-style' : 'font-style', + 'font-up___' : 'superscript', + 'footnot-mk' : 'footnote-marker', + 'green_____' : 'green', + 'hidden____' : 'hidden', + 'italics___' : 'italics', + 'outline___' : 'outline', + 'red_______' : 'red', + 'shadow____' : 'shadow', + 'small-caps' : 'small-caps', + 'strike-thr' : 'strike-through', + 'subscript_' : 'subscript', + 'superscrip' : 'superscript', + 'underlined' : 'underlined', + } + self.__caps_list = ['false'] + def __set_list_func(self, line): + """ + Requires: + line--line of text + Returns: + nothing + Logic: + """ + if self.__place == 'in_list': + if self.__token_info == 'mi 3: + msg = 'self.__inline_list is %s\n' % self.__inline_list + raise self.__bug_handler, msg + self.__write_obj.write('error\n') + self.__groups_in_waiting[0] = 0 + return + for the_dict in inline_list: + if the_dict['contains_inline']: + the_keys = the_dict.keys() + if 'font-style' in the_keys: + face = the_dict['font-style'] + self.__write_obj.write('mi%s' % (the_key, the_dict[the_key])) + self.__write_obj.write('\n') + self.__groups_in_waiting[0] = 0 + def __end_para_func(self, line): + """ + Requires: + line -- line of text + Returns: + nothing + Logic: + Slice from the end the groups in waiting. + Iterate through the list. If the dictionary contaings info, write + a closing tag. + """ + if not self.__in_para: + return + if self.__groups_in_waiting[0] == 0: + inline_list = self.__inline_list + else: + last_index = -1 * self.__groups_in_waiting[0] + inline_list = self.__inline_list[0:last_index] + for the_dict in inline_list: + contains_info = the_dict.get('contains_inline') + if contains_info: + the_keys = the_dict.keys() + if 'font-style' in the_keys: + self.__write_obj.write('mi%s' % (the_key, the_dict[the_key])) + self.__write_obj.write('\n') + self.__groups_in_waiting[0] = 0 + def __found_field_func(self, line): + """ + Just a default function to make sure I don't prematurely exit + default state + """ + pass + def form_tags(self): + """ + Requires: + area--area to parse (list or non-list) + Returns: + nothing + Logic: + Read one line in at a time. Determine what action to take based on + the state. + """ + self.__initiate_values() + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + token = line[0:-1] + self.__token_info = '' + if token == 'tx%s\n' % self.__list_type) + 'mi a list. The first item is a dictionary. + The second item is a list containing a dictionary: + [{list-id:[]}, [{}]] + self.__all_lists[-1][0] => a dictionary of the list attributes + self.__all_lists[-1][-1] => a list with just a dictionary + self.__all_lists[-1][-1][0] => the dictionary of level attributes + """ + self.__state = 'level' + self.__level_ob_count = self.__ob_count + self.__all_lists[-1].append([]) + the_dict = {} + self.__all_lists[-1][-1].append(the_dict) + self.__level_dict + def __level_func(self, line): + """ + Requires: + line -- line to parse + Returns: + nothing + Logic: + Look for the end of the this group. + Change states if an open bracket is found. + Add attributes to all_dicts if an appropriate token is found. + """ + if self.__token_info == 'cb 3: + msg = 'No matching token after open bracket\n' + msg += 'token is "%s\n"' % (line) + raise self.__bug_handler + def __add_to_final_line(self): + """ + Method no longer used. + """ + self.__list_table_final = 'mi value pair. + Remove the first item (the dictionary) form this list. Now iterate + through what is left in the list. Each list will conatin one item, + a dictionary. Get this dictionary and print out key => value pair. + """ + not_allow = ['list-id',] + id = 0 + self.__list_table_final = 'mi(\d+)') + self.__lv_regex = re.compile(r'\(\d+)') + self.__found_appt = 0 + self.__line_num = 0 + def __in_pard_func(self, line): + """ + Required: + line -- the line of current text. + Return: + Nothing + Logic: + You are in a list, but in the middle of a paragraph definition. + Don't do anything until you find the end of the paragraph definition. + """ + if self.__token_info == 'mi last_list_indent: + self.__write_obj.write(self.__list_chunk) + self.__write_start_list(id) + else: + self.__write_end_item() + self.__write_obj.write(self.__list_chunk) + self.__write_start_item() + self.__list_chunk = '' + def __close_lists(self): + """ + Required: + Nothing + Return: + Nothing + Logic: + Reverse the list of dictionaries. Iterate through the list and + get the indent for each list. If the current indent is less than + or equal to the indent in the dictionary, close that level. + Keep track of how many levels you close. Reduce the list by that + many levels. + Reverse the list again. + """ + if self.__line_num < 25 and self.__found_appt: + sys.stderr.write('in closing out lists\n') + sys.stderr.write('current_indent is "%s"\n' % self.__left_indent) + current_indent = self.__left_indent + self.__all_lists.reverse() + num_levels_closed = 0 + for the_dict in self.__all_lists: + list_indent = the_dict.get('left-indent') + if self.__line_num < 25 and self.__found_appt: + sys.stderr.write('last indent is "%s"' % list_indent) + if current_indent <= list_indent: + self.__write_end_item() + self.__write_end_list() + num_levels_closed += 1 + self.__all_lists = self.__all_lists[num_levels_closed:] + self.__all_lists.reverse() + def __write_end_list(self): + """ + Required: + Nothing + Return: + Nothing + Logic: + Write the end of a list. + """ + self.__write_obj.write('mi%s%s' + % (id, lev_num) + ) + list_dict = {} + if self.__list_of_lists: # older RTF won't generate a list_of_lists + index_of_list = self.__get_index_of_list(id) + if index_of_list != None:# found a matching id + list_dict = self.__list_of_lists[index_of_list][0] + level = int(self.__level) + 1 + level_dict = self.__list_of_lists[index_of_list][level][0] + list_type = level_dict.get('numbering-type') + if list_type == 'bullet': + list_type = 'unordered' + else: + list_type = 'ordered' + self.__write_obj.write( + '%s' % (list_type)) + else: # no matching id + self.__write_obj.write( + '%s' % (self.__list_type)) + else:# older RTF + self.__write_obj.write( + '%s' % (self.__list_type)) + # if you want to dump all the info to the list, rather than + # keeping it in the table above, change self.__write_list_info + # to true. + if self.__list_of_lists and self.__write_list_info and list_dict: + not_allow = ['list-id',] + the_keys_list = list_dict.keys() + for the_key in the_keys_list: + if the_key in not_allow: + continue + self.__write_obj.write('<%s>%s' % (the_key, list_dict[the_key])) + the_keys_level = level_dict.keys() + for the_key in the_keys_level: + self.__write_obj.write('<%s>%s' % (the_key, level_dict[the_key])) + self.__write_obj.write('\n') + self.__write_obj.write( + 'mi 0: + sys.stderr.write('Module is make_lists.py\n' + 'Method is __get_index_of_list\n' + 'The main list does not appear to have a matching id for %s \n' + % (id) + ) + # sys.stderr.write(repr(self.__list_of_lists)) +# if self.__run_level > 3: +# msg = 'level is "%s"\n' % self.__run_level +# self.__bug_handler + def __write_start_item(self): + self.__write_obj.write('mi 3: + msg = 'This override does not appear to have a list-id\n' + raise self.__bug_handler, msg + current_table_id = override_dict.get('list-table-id') + if current_table_id == None and self.__run_level > 3: + msg = 'This override does not appear to have a list-table-id\n' + raise self.__bug_handler, msg + counter = 0 + for list in self.__list_of_lists: + info_dict = list[0] + old_table_id = info_dict.get('list-table-id') + if old_table_id == current_table_id: + self.__list_of_lists[counter][0]['list-id'].append(list_id) + break + counter += 1 + def __parse_lines(self, line): + """ + Requires: + line --ine to parse + Returns: + nothing + Logic: + Break the into tokens by splitting it on the newline. + Call on the method according to the state. + """ + lines = line.split('\n') + self.__ob_count = 0 + self.__ob_group = 0 + for line in lines: + self.__token_info = line[:16] + if self.__token_info == 'ob 3: + msg = 'No matching token after open bracket\n' + msg += 'token is "%s\n"' % (line) + raise self.__bug_handler, msg + def __write_final_string(self): + """ + Requires: + line -- line to parse + Returns: + nothing + Logic: + First write out the override-table tag. + Iteratere through the dictionaries in the main override_list. + For each dictionary, write an empty tag "override-list". Add + the attributes and values of the tag from the dictionary. + """ + self.__override_table_final = 'mi%s' % (the_key, the_dict[the_key]) + self.__override_table_final += '\n' + self.__override_table_final += '\n' + self.__override_table_final += \ + 'mi pf + 'par-end___' : 'para', + 'par-def___' : 'paragraph-definition', + 'keep-w-nex' : 'keep-with-next', + 'widow-cntl' : 'widow-control', + 'adjust-rgt' : 'adjust-right', + 'language__' : 'language', + 'right-inde' : 'right-indent', + 'fir-ln-ind' : 'first-line-indent', + 'left-inden' : 'left-indent', + 'space-befo' : 'space-before', + 'space-afte' : 'space-after', + 'line-space' : 'line-spacing', + 'default-ta' : 'default-tab', + 'align_____' : 'align', + 'widow-cntr' : 'widow-control', + # stylesheet = > ss + 'style-shet' : 'stylesheet', + 'based-on__' : 'based-on-style', + 'next-style' : 'next-style', + 'char-style' : 'character-style', + # this is changed to get a nice attribute + 'para-style' : 'name', + # graphics => gr + 'picture___' : 'pict', + 'obj-class_' : 'obj_class', + 'mac-pic___' : 'mac-pict', + # section => sc + 'section___' : 'section-new', + 'sect-defin' : 'section-reset', + 'sect-note_' : 'endnotes-in-section', + # list=> ls + 'list-text_' : 'list-text', + # this line must be wrong because it duplicates an earlier one + 'list-text_' : 'list-text', + 'list______' : 'list', + 'list-lev-d' : 'list-level-definition', + 'list-cardi' : 'list-cardinal-numbering', + 'list-decim' : 'list-decimal-numbering', + 'list-up-al' : 'list-uppercase-alphabetic-numbering', + 'list-up-ro' : 'list-uppercae-roman-numbering', + 'list-ord__' : 'list-ordinal-numbering', + 'list-ordte' : 'list-ordinal-text-numbering', + 'list-bulli' : 'list-bullet', + 'list-simpi' : 'list-simple', + 'list-conti' : 'list-continue', + 'list-hang_' : 'list-hang', + # 'list-tebef' : 'list-text-before', + 'list-level' : 'level', + 'list-id___' : 'list-id', + 'list-start' : 'list-start', + 'nest-level' : 'nest-level', + # duplicate + 'list-level' : 'list-level', + # notes => nt + 'footnote__' : 'footnote', + 'type______' : 'type', + # anchor => an + 'toc_______' : 'anchor-toc', + 'book-mk-st' : 'bookmark-start', + 'book-mk-en' : 'bookmark-end', + 'index-mark' : 'anchor-index', + 'place_____' : 'place', + # field => fd + 'field_____' : 'field', + 'field-inst' : 'field-instruction', + 'field-rslt' : 'field-result', + 'datafield_' : 'data-field', + # info-tables => it + 'font-table' : 'font-table', + 'colr-table' : 'color-table', + 'lovr-table' : 'list-override-table', + 'listtable_' : 'list-table', + 'revi-table' : 'revision-table', + # character info => ci + 'hidden____' : 'hidden', + 'italics___' : 'italics', + 'bold______' : 'bold', + 'strike-thr' : 'strike-through', + 'shadow____' : 'shadow', + 'outline___' : 'outline', + 'small-caps' : 'small-caps', + 'caps______' : 'caps', + 'dbl-strike' : 'double-strike-through', + 'emboss____' : 'emboss', + 'engrave___' : 'engrave', + 'subscript_' : 'subscript', + 'superscrip' : 'superscipt', + 'font-style' : 'font-style', + 'font-color' : 'font-color', + 'font-size_' : 'font-size', + 'font-up___' : 'superscript', + 'font-down_' : 'subscript', + 'red_______' : 'red', + 'blue______' : 'blue', + 'green_____' : 'green', + # table => tb + 'row-def___' : 'row-definition', + 'cell______' : 'cell', + 'row_______' : 'row', + 'in-table__' : 'in-table', + 'columns___' : 'columns', + 'row-pos-le' : 'row-position-left', + 'cell-posit' : 'cell-position', + # preamble => pr + # underline + 'underlined' : 'underlined', + # border => bd + 'bor-t-r-hi' : 'border-table-row-horizontal-inside', + 'bor-t-r-vi' : 'border-table-row-vertical-inside', + 'bor-t-r-to' : 'border-table-row-top', + 'bor-t-r-le' : 'border-table-row-left', + 'bor-t-r-bo' : 'border-table-row-bottom', + 'bor-t-r-ri' : 'border-table-row-right', + 'bor-cel-bo' : 'border-cell-bottom', + 'bor-cel-to' : 'border-cell-top', + 'bor-cel-le' : 'border-cell-left', + 'bor-cel-ri' : 'border-cell-right', + 'bor-par-bo' : 'border-paragraph-bottom', + 'bor-par-to' : 'border-paragraph-top', + 'bor-par-le' : 'border-paragraph-left', + 'bor-par-ri' : 'border-paragraph-right', + 'bor-par-bo' : 'border-paragraph-box', + 'bor-for-ev' : 'border-for-every-paragraph', + 'bor-outsid' : 'border-outisde', + 'bor-none__' : 'border', + # border type => bt + 'bdr-single' : 'single', + 'bdr-doubtb' : 'double-thickness-border', + 'bdr-shadow' : 'shadowed-border', + 'bdr-double' : 'double-border', + 'bdr-dotted' : 'dotted-border', + 'bdr-dashed' : 'dashed', + 'bdr-hair__' : 'hairline', + 'bdr-inset_' : 'inset', + 'bdr-das-sm' : 'dash-small', + 'bdr-dot-sm' : 'dot-dash', + 'bdr-dot-do' : 'dot-dot-dash', + 'bdr-outset' : 'outset', + 'bdr-trippl' : 'tripple', + 'bdr-thsm__' : 'thick-thin-small', + 'bdr-htsm__' : 'thin-thick-small', + 'bdr-hthsm_' : 'thin-thick-thin-small', + 'bdr-thm__' : 'thick-thin-medium', + 'bdr-htm__' : 'thin-thick-medium', + 'bdr-hthm_' : 'thin-thick-thin-medium', + 'bdr-thl__' : 'thick-thin-large', + 'bdr-hthl_' : 'think-thick-think-large', + 'bdr-wavy_' : 'wavy', + 'bdr-d-wav' : 'double-wavy', + 'bdr-strip' : 'striped', + 'bdr-embos' : 'emboss', + 'bdr-engra' : 'engrave', + 'bdr-frame' : 'frame', + 'bdr-li-wid' : 'line-width', + } + self.__tabs_dict = { + 'cw 3: + msg = 'no entry for %s\n' % self.__token_info + raise self.__bug_handler, msg + def __tab_leader_func(self, line): + """ + """ + leader = self.__tab_type_dict.get(self.__token_info) + if leader != None: + type = 'tabs-%s' % self.__tab_type + self.__att_val_dict['tabs'] += '%s^' % leader + else: + if self.__run_level > 3: + msg = 'no entry for %s\n' % self.__token_info + raise self.__bug_handler, msg + def __tab_bar_func(self, line): + """ + """ + # self.__att_val_dict['tabs-bar'] += '%s:' % line[20:-1] + self.__att_val_dict['tabs'] += 'bar:%s;' % (line[20:-1]) + self.__tab_type = 'left' + def __parse_border(self, line): + """ + Requires: + line --line to parse + Returns: + nothing (updates dictionary) + Logic: + Uses the border_parse module to return a dictionary of attribute + value pairs for a border line. + """ + border_dict = self.__border_obj.parse_border(line) + self.__att_val_dict.update(border_dict) + def __para_def_in_para_def_func(self, line): + """ + Requires: + line --line to parse + Returns: + nothing + Logic: + I have found a \pard while I am collecting tokens. I want to reset + the dectionary and do nothing else. + """ + # Change this + self.__state = 'collect_tokens' + self.__reset_dict() + def __end_para_def_func(self, line): + """ + Requires: + Nothing + Returns: + Nothing + Logic: + The previous state was collect tokens, and I have found the start + of a paragraph. I want to outut the defintion tag; output the line + itself (telling me of the beginning of a paragraph);change the + state to 'in_paragraphs'; + """ + self.__write_para_def_beg() + self.__write_obj.write(line) + self.__state = 'in_paragraphs' + def __start_para_after_def_func(self, line): + """ + Requires: + Nothing + Returns: + Nothing + Logic: + The state was is after_para_def. and I have found the start of a + paragraph. I want to outut the defintion tag; output the line + itself (telling me of the beginning of a paragraph);change the + state to 'in_paragraphs'. + (I now realize that this is absolutely identical to the function above!) + """ + self.__write_para_def_beg() + self.__write_obj.write(line) + self.__state = 'in_paragraphs' + def __after_para_def_func(self, line): + """ + Requires: + line -- line to parse + Returns: + nothing + Logic: + Check if the token info is the start of a paragraph. If so, call + on the function found in the value of the dictionary. + """ + action = self.__after_para_def_dict.get(self.__token_info) + if self.__token_info == 'cw%s' % ('tabs', the_value)) + keys = self.__att_val_dict.keys() + keys.sort() + for key in keys: + if key != 'name' and key !='style-num' and key != 'in-table'\ + and key not in tabs_list: + style_string += ('<%s>%s' % (key, self.__att_val_dict[key])) + style_string += '\n' + self.__body_style_strings.append(style_string) + def __write_para_def_beg(self): + """ + Requires: + nothing + Returns: + nothing + Logic: + Print out the beginning of the pargraph definition tag, and the markers + that let me know when I have reached this tag. (These markers are + used for later parsing.) + """ + self.__get_num_of_style() + table = self.__att_val_dict.get('in-table') + if table: + # del self.__att_val_dict['in-table'] + self.__write_obj.write('mi%s' % self.__att_val_dict['name']) + self.__write_obj.write('%s' % self.__att_val_dict['style-num']) + tabs_list = ['tabs-left', 'tabs-right', 'tabs-decimal', 'tabs-center', + 'tabs-bar', 'tabs'] + """ + for tab_item in tabs_list: + if self.__att_val_dict[tab_item] != '': + the_value = self.__att_val_dict[tab_item] + the_value = the_value[:-1] + self.__write_obj.write('<%s>%s' % (tab_item, the_value)) + """ + if self.__att_val_dict['tabs'] != '': + the_value = self.__att_val_dict['tabs'] + # the_value = the_value[:-1] + self.__write_obj.write('<%s>%s' % ('tabs', the_value)) + keys = self.__att_val_dict.keys() + keys.sort() + for key in keys: + if key != 'name' and key !='style-num' and key != 'in-table'\ + and key not in tabs_list: + self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key])) + self.__write_obj.write('\n') + self.__write_obj.write(self.__start2_marker) + if 'font-style' in keys: + face = self.__att_val_dict['font-style'] + self.__write_obj.write('mi 1: + sys.stderr.write('Removing files from old pict directory...\n') + all_files = os.listdir(self.__dir_name) + for the_file in all_files: + the_file = os.path.join(self.__dir_name, the_file) + try: + os.remove(the_file) + except OSError: + pass + if self.__run_level > 1: + sys.stderr.write('Files removed.\n') + def __create_pict_file(self): + """Create a file for all the pict data to be written to. + """ + self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf') + write_pic_obj = open(self.__pict_file, 'w') + write_pic_obj.close() + self.__write_pic_obj = open(self.__pict_file, 'a') + def __in_pict_func(self, line): + if self.__cb_count == self.__pict_br_count: + self.__in_pict = 0 + self.__write_pic_obj.write("}\n") + return 1 + else: + action = self.__pict_dict.get(self.__token_info) + if action: + line = action(line) + self.__write_pic_obj.write(line) + return 0 + def __default(self, line, write_obj): + """Determine if each token marks the beginning of pict data. + If it does, create a new file to write data to (if that file + has not already been created.) Set the self.__in_pict flag to true. + If the line does not contain pict data, return 1 + """ + """ + $pict_count++; + $pict_count = sprintf("%03d", $pict_count); + print OUTPUT "dv$pict_count\n"; + """ + if self.__token_info == 'cw%03d\n" % self.__pict_count) + write_obj.write('mi%03d\n' % self.__pict_count) + write_obj.write('mi%s' % (key, self.__page[key]) + ) + self.__write_obj.write('\n') +#mi%s' % (key, self.__section[key]) + ) + self.__write_obj.write('\n') + def __section_func(self, line): + """ + Add info pertaining to section to the self.__section dictionary, to be + printed out later. + """ + info = self.__translate_sec.get(line[6:16]) + if info == None: + sys.stderr.write ('woops!\n') + else: + self.__section[info] = 'true' + def __body_func(self, line): + self.__write_obj.write(line) + def __default_func(self, line): + # either in preamble or in body + pass + def __para_def_func(self, line): + # if self.__ob_group == 1 + # this tells dept of group + if self.__cb_count == '0002': + self.__state = 'body' + self.__write_preamble() + self.__write_obj.write(line) + def __text_func(self, line): + """ + If the cb_count is less than 1, you have hit the body + For older RTF + Newer RTF should never have to use this function + """ + if self.__cb_count == '': + cb_count = '0002' + else: + cb_count = self.__cb_count + # ignore previous lines + # should be + # if self.__ob_group == 1 + # this tells dept of group + if cb_count == '0002': + self.__state = 'body' + self.__write_preamble() + self.__write_obj.write(line) + def __row_def_func(self, line): + # if self.__ob_group == 1 + # this tells dept of group + if self.__cb_count == '0002': + self.__state = 'body' + self.__write_preamble() + self.__write_obj.write(line) + def __new_section_func(self, line): + """ + This is new. The start of a section marks the end of the preamble + """ + if self.__cb_count == '0002': + self.__state = 'body' + self.__write_preamble() + else: + sys.stderr.write('module is preamble_div\n') + sys.stderr.write('method is __new_section_func\n') + sys.stderr.write('bracket count should be 2?\n') + self.__write_obj.write(line) + def __write_preamble(self): + """ + Write all the strings, which represent all the data in the preamble. + Write a body and section beginning. + """ + if self.__no_namespace: + self.__write_obj.write( + 'mihttp://rtf2xml.sourceforge.net/\n') + self.__write_obj.write('mi1\n') + # self.__print_sec_info() + # self.__write_obj.write('mi%s%s' + '%s\n' % (self.__default_font, self.__code_page, + self.__platform) + ) + def __found_list_table_func(self, line): + self.__state = 'list_table' + def __list_table_func(self, line): + if self.__token_info == 'mi" for sub fields + """ + def __init__(self, + in_file, + exception_handler, + bug_handler, + copy = None, + run_level = 1, + ): + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + self.initiate_token_dict() + ##self.initiate_token_actions() + self.compile_expressions() + self.__bracket_count=0 + self.__exception_handler = exception_handler + self.__bug_handler = bug_handler + def compile_expressions(self): + self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)") + self.__utf_exp = re.compile(r'(&.*?;)') + def initiate_token_dict(self): + self.__return_code = 0 + self.dict_token={ + # unicode + 'mshex' : ('nu', '__________', self.__ms_hex_func), + # brackets + '{' : ('nu', '{', self.ob_func), + '}' : ('nu', '}', self.cb_func), + # microsoft characters + 'ldblquote' : ('mc', 'ldblquote', self.ms_sub_func), + 'rdblquote' : ('mc', 'rdblquote', self.ms_sub_func), + 'rquote' : ('mc', 'rquote', self.ms_sub_func), + 'lquote' : ('mc', 'lquote', self.ms_sub_func), + 'emdash' : ('mc', 'emdash', self.ms_sub_func), + 'endash' : ('mc', 'endash', self.ms_sub_func), + 'bullet' : ('mc', 'bullet', self.ms_sub_func), + '~' : ('mc', '~', self.ms_sub_func), + 'tab' : ('mc', 'tab', self.ms_sub_func), + '_' : ('mc', '_', self.ms_sub_func), + ';' : ('mc', ';', self.ms_sub_func), + # this must be wrong + '-' : ('mc', '-', self.ms_sub_func), + # misc => ml + '*' : ('ml', 'asterisk__', self.default_func), + ':' : ('ml', 'colon_____', self.default_func), + # text + 'backslash' : ('nu', '\\', self.text_func), + 'ob' : ('nu', '{', self.text_func), + 'cb' : ('nu', '}', self.text_func), + # paragraph formatting => pf + 'page' : ('pf', 'page-break', self.default_func), + 'par' : ('pf', 'par-end___', self.default_func), + 'pard' : ('pf', 'par-def___', self.default_func), + 'keepn' : ('pf', 'keep-w-nex', self.bool_st_func), + 'widctlpar' : ('pf', 'widow-cntl', self.bool_st_func), + 'adjustright' : ('pf', 'adjust-rgt', self.bool_st_func), + 'lang' : ('pf', 'language__', self.__language_func), + 'ri' : ('pf', 'right-inde', self.divide_by_20), + 'fi' : ('pf', 'fir-ln-ind', self.divide_by_20), + 'li' : ('pf', 'left-inden', self.divide_by_20), + 'sb' : ('pf', 'space-befo', self.divide_by_20), + 'sa' : ('pf', 'space-afte', self.divide_by_20), + 'sl' : ('pf', 'line-space', self.divide_by_20), + 'deftab' : ('pf', 'default-ta', self.divide_by_20), + 'ql' : ('pf', 'align_____ ss + 'stylesheet' : ('ss', 'style-shet', self.default_func), + 'sbasedon' : ('ss', 'based-on__', self.default_func), + 'snext' : ('ss', 'next-style', self.default_func), + 'cs' : ('ss', 'char-style', self.default_func), + 's' : ('ss', 'para-style', self.default_func), + # graphics => gr + 'pict' : ('gr', 'picture___', self.default_func), + 'objclass' : ('gr', 'obj-class_', self.default_func), + 'macpict' : ('gr', 'mac-pic___', self.default_func), + # section => sc + 'sect' : ('sc', 'section___', self.default_func), + 'sectd' : ('sc', 'sect-defin', self.default_func), + 'endhere' : ('sc', 'sect-note_', self.default_func), + # list=> ls + 'pntext' : ('ls', 'list-text_', self.default_func), + # this line must be wrong because it duplicates an earlier one + 'listtext' : ('ls', 'list-text_', self.default_func), + 'pn' : ('ls', 'list______', self.default_func), + 'pnseclvl' : ('ls', 'list-level', self.default_func), + 'pncard' : ('ls', 'list-cardi', self.bool_st_func), + 'pndec' : ('ls', 'list-decim', self.bool_st_func), + 'pnucltr' : ('ls', 'list-up-al', self.bool_st_func), + 'pnucrm' : ('ls', 'list-up-ro', self.bool_st_func), + 'pnord' : ('ls', 'list-ord__', self.bool_st_func), + 'pnordt' : ('ls', 'list-ordte', self.bool_st_func), + 'pnlvlblt' : ('ls', 'list-bulli', self.bool_st_func), + 'pnlvlbody' : ('ls', 'list-simpi', self.bool_st_func), + 'pnlvlcont' : ('ls', 'list-conti', self.bool_st_func), + 'pnhang' : ('ls', 'list-hang_', self.bool_st_func), + 'pntxtb' : ('ls', 'list-tebef', self.bool_st_func), + 'ilvl' : ('ls', 'list-level', self.default_func), + 'ls' : ('ls', 'list-id___', self.default_func), + 'pnstart' : ('ls', 'list-start', self.default_func), + 'itap' : ('ls', 'nest-level', self.default_func), + 'leveltext' : ('ls', 'level-text', self.default_func), + 'levelnumbers' : ('ls', 'level-numb', self.default_func), + 'list' : ('ls', 'list-in-tb', self.default_func), + 'listlevel' : ('ls', 'list-tb-le', self.default_func), + 'listname' : ('ls', 'list-name_', self.default_func), + 'listtemplateid' : ('ls', 'ls-tem-id_', self.default_func), + 'leveltemplateid' : ('ls', 'lv-tem-id_', self.default_func), + 'listhybrid' : ('ls', 'list-hybri', self.default_func), + 'levelstartat' : ('ls', 'level-star', self.default_func), + 'levelspace' : ('ls', 'level-spac', self.divide_by_20), + 'levelindent' : ('ls', 'level-inde', self.default_func), + 'levelnfc' : ('ls', 'level-type', self.__list_type_func), + 'levelnfcn' : ('ls', 'level-type', self.__list_type_func), + 'listid' : ('ls', 'lis-tbl-id', self.default_func), + 'listoverride' : ('ls', 'lis-overid', self.default_func), + # duplicate + 'pnlvl' : ('ls', 'list-level', self.default_func), + # root info => ri + 'rtf' : ('ri', 'rtf_______', self.default_func), + 'deff' : ('ri', 'deflt-font', self.default_func), + 'mac' : ('ri', 'macintosh_', self.default_func), + 'ansi' : ('ri', 'ansi______', self.default_func), + 'ansicpg' : ('ri', 'ansi-codpg', self.default_func), + # notes => nt + 'footnote' : ('nt', 'footnote__', self.default_func), + 'ftnalt' : ('nt', 'type______ an + 'tc' : ('an', 'toc_______', self.default_func), + 'bkmkstt' : ('an', 'book-mk-st', self.default_func), + 'bkmkstart' : ('an', 'book-mk-st', self.default_func), + 'bkmkend' : ('an', 'book-mk-en', self.default_func), + 'xe' : ('an', 'index-mark', self.default_func), + 'rxe' : ('an', 'place_____', self.default_func), + # index => in + 'bxe' : ('in', 'index-bold', self.default_func), + 'ixe' : ('in', 'index-ital', self.default_func), + 'txe' : ('in', 'index-see_', self.default_func), + # table of contents => tc + 'tcl' : ('tc', 'toc-level_', self.default_func), + 'tcn' : ('tc', 'toc-sup-nu', self.default_func), + # field => fd + 'field' : ('fd', 'field_____', self.default_func), + 'fldinst' : ('fd', 'field-inst', self.default_func), + 'fldrslt' : ('fd', 'field-rslt', self.default_func), + 'datafield' : ('fd', 'datafield_', self.default_func), + # info-tables => it + 'fonttbl' : ('it', 'font-table', self.default_func), + 'colortbl' : ('it', 'colr-table', self.default_func), + 'listoverridetable' : ('it', 'lovr-table', self.default_func), + 'listtable' : ('it', 'listtable_', self.default_func), + 'revtbl' : ('it', 'revi-table', self.default_func), + # character info => ci + 'b' : ('ci', 'bold______', self.bool_st_func), + 'blue' : ('ci', 'blue______', self.color_func), + 'caps' : ('ci', 'caps______', self.bool_st_func), + 'cf' : ('ci', 'font-color', self.default_func), + 'chftn' : ('ci', 'footnot-mk', self.bool_st_func), + 'dn' : ('ci', 'font-down_', self.divide_by_2), + 'embo' : ('ci', 'emboss____', self.bool_st_func), + 'f' : ('ci', 'font-style', self.default_func), + 'fs' : ('ci', 'font-size_', self.divide_by_2), + 'green' : ('ci', 'green_____', self.color_func), + 'i' : ('ci', 'italics___', self.bool_st_func), + 'impr' : ('ci', 'engrave___', self.bool_st_func), + 'outl' : ('ci', 'outline___', self.bool_st_func), + 'plain' : ('ci', 'plain_____', self.bool_st_func), + 'red' : ('ci', 'red_______', self.color_func), + 'scaps' : ('ci', 'small-caps', self.bool_st_func), + 'shad' : ('ci', 'shadow____', self.bool_st_func), + 'strike' : ('ci', 'strike-thr', self.bool_st_func), + 'striked' : ('ci', 'dbl-strike', self.bool_st_func), + 'sub' : ('ci', 'subscript_', self.bool_st_func), + 'super' : ('ci', 'superscrip', self.bool_st_func), + 'nosupersub' : ('ci', 'no-su-supe', self.__no_sup_sub_func), + 'up' : ('ci', 'font-up___', self.divide_by_2), + 'v' : ('ci', 'hidden____', self.default_func), + # table => tb + 'trowd' : ('tb', 'row-def___', self.default_func), + 'cell' : ('tb', 'cell______', self.default_func), + 'row' : ('tb', 'row_______', self.default_func), + 'intbl' : ('tb', 'in-table__', self.default_func), + 'cols' : ('tb', 'columns___', self.default_func), + 'trleft' : ('tb', 'row-pos-le', self.divide_by_20), + 'cellx' : ('tb', 'cell-posit', self.divide_by_20), + 'trhdr' : ('tb', 'row-header', self.default_func), + # preamble => pr + # document information => di + 'info' : ('di', 'doc-info__', self.default_func), + 'author' : ('di', 'author____', self.default_func), + 'operator' : ('di', 'operator__', self.default_func), + 'title' : ('di', 'title_____', self.default_func), + 'keywords' : ('di', 'keywords__', self.default_func), + 'doccomm' : ('di', 'doc-notes_', self.default_func), + 'comment' : ('di', 'doc-notes_', self.default_func), + 'subject' : ('di', 'subject___', self.default_func), + 'creatim' : ('di', 'create-tim', self.default_func), + 'yr' : ('di', 'year______', self.default_func), + 'mo' : ('di', 'month_____', self.default_func), + 'dy' : ('di', 'day_______', self.default_func), + 'min' : ('di', 'minute____', self.default_func), + 'revtim' : ('di', 'revis-time', self.default_func), + 'nofwords' : ('di', 'num-of-wor', self.default_func), + 'nofchars' : ('di', 'num-of-chr', self.default_func), + 'nofpages' : ('di', 'num-of-pag', self.default_func), + 'edmins' : ('di', 'edit-time_', self.default_func), + # headers and footers => hf + 'headerf' : ('hf', 'head-first', self.default_func), + 'headerl' : ('hf', 'head-left_', self.default_func), + 'headerr' : ('hf', 'head-right', self.default_func), + 'footerf' : ('hf', 'foot-first', self.default_func), + 'footerl' : ('hf', 'foot-left_', self.default_func), + 'footerr' : ('hf', 'foot-right', self.default_func), + 'header' : ('hf', 'header____', self.default_func), + 'footer' : ('hf', 'footer____', self.default_func), + # page => pa + 'margl' : ('pa', 'margin-lef', self.divide_by_20), + 'margr' : ('pa', 'margin-rig', self.divide_by_20), + 'margb' : ('pa', 'margin-bot', self.divide_by_20), + 'margt' : ('pa', 'margin-top', self.divide_by_20), + 'gutter' : ('pa', 'gutter____', self.divide_by_20), + 'paperw' : ('pa', 'paper-widt', self.divide_by_20), + 'paperh' : ('pa', 'paper-hght', self.divide_by_20), + # annotation => an + 'annotation' : ('an', 'annotation', self.default_func), + # underline + 'ul' : ('ul', 'underlined bd + 'trbrdrh' : ('bd', 'bor-t-r-hi', self.default_func), + 'trbrdrv' : ('bd', 'bor-t-r-vi', self.default_func), + 'trbrdrt' : ('bd', 'bor-t-r-to', self.default_func), + 'trbrdrl' : ('bd', 'bor-t-r-le', self.default_func), + 'trbrdrb' : ('bd', 'bor-t-r-bo', self.default_func), + 'trbrdrr' : ('bd', 'bor-t-r-ri', self.default_func), + 'clbrdrb' : ('bd', 'bor-cel-bo', self.default_func), + 'clbrdrt' : ('bd', 'bor-cel-to', self.default_func), + 'clbrdrl' : ('bd', 'bor-cel-le', self.default_func), + 'clbrdrr' : ('bd', 'bor-cel-ri', self.default_func), + 'brdrb' : ('bd', 'bor-par-bo', self.default_func), + 'brdrt' : ('bd', 'bor-par-to', self.default_func), + 'brdrl' : ('bd', 'bor-par-le', self.default_func), + 'brdrr' : ('bd', 'bor-par-ri', self.default_func), + 'box' : ('bd', 'bor-par-bx', self.default_func), + 'chbrdr' : ('bd', 'bor-par-bo', self.default_func), + 'brdrbtw' : ('bd', 'bor-for-ev', self.default_func), + 'brdrbar' : ('bd', 'bor-outsid', self.default_func), + 'brdrnone' : ('bd', 'bor-none__ bt + 'brdrs' : ('bt', 'bdr-single', self.default_func), + 'brdrth' : ('bt', 'bdr-doubtb', self.default_func), + 'brdrsh' : ('bt', 'bdr-shadow', self.default_func), + 'brdrdb' : ('bt', 'bdr-double', self.default_func), + 'brdrdot' : ('bt', 'bdr-dotted', self.default_func), + 'brdrdash' : ('bt', 'bdr-dashed', self.default_func), + 'brdrhair' : ('bt', 'bdr-hair__', self.default_func), + 'brdrinset' : ('bt', 'bdr-inset_', self.default_func), + 'brdrdashsm' : ('bt', 'bdr-das-sm', self.default_func), + 'brdrdashd' : ('bt', 'bdr-dot-sm', self.default_func), + 'brdrdashdd' : ('bt', 'bdr-dot-do', self.default_func), + 'brdroutset' : ('bt', 'bdr-outset', self.default_func), + 'brdrtriple' : ('bt', 'bdr-trippl', self.default_func), + 'brdrtnthsg' : ('bt', 'bdr-thsm__', self.default_func), + 'brdrthtnsg' : ('bt', 'bdr-htsm__', self.default_func), + 'brdrtnthtnsg' : ('bt', 'bdr-hthsm_', self.default_func), + 'brdrtnthmg' : ('bt', 'bdr-thm___', self.default_func), + 'brdrthtnmg' : ('bt', 'bdr-htm___', self.default_func), + 'brdrtnthtnmg' : ('bt', 'bdr-hthm__', self.default_func), + 'brdrtnthlg' : ('bt', 'bdr-thl___', self.default_func), + 'brdrtnthtnlg' : ('bt', 'bdr-hthl__', self.default_func), + 'brdrwavy' : ('bt', 'bdr-wavy__', self.default_func), + 'brdrwavydb' : ('bt', 'bdr-d-wav_', self.default_func), + 'brdrdashdotstr' : ('bt', 'bdr-strip_', self.default_func), + 'brdremboss' : ('bt', 'bdr-embos_', self.default_func), + 'brdrengrave' : ('bt', 'bdr-engra_', self.default_func), + 'brdrframe' : ('bt', 'bdr-frame_', self.default_func), + 'brdrw' : ('bt', 'bdr-li-wid', self.divide_by_20), + 'brsp' : ('bt', 'bdr-sp-wid', self.divide_by_20), + 'brdrcf' : ('bt', 'bdr-color_', self.default_func), + # comments + # 'comment' : ('cm', 'comment___', self.default_func), + } + self.__number_type_dict = { + 0: 'Arabic', + 1: 'uppercase Roman numeral', + 2: 'lowercase Roman numeral', + 3: 'uppercase letter', + 4: 'lowercase letter', + 5: 'ordinal number', + 6: 'cardianl text number', + 7: 'ordinal text number', + 10: 'Kanji numbering without the digit character', + 11: 'Kanji numbering with the digit character', + 1246: 'phonetic Katakana characters in aiueo order', + 1346: 'phonetic katakana characters in iroha order', + 14: 'double byte character', + 15: 'single byte character', + 16: 'Kanji numbering 3', + 17: 'Kanji numbering 4', + 18: 'Circle numbering' , + 19: 'double-byte Arabic numbering', + 2046: 'phonetic double-byte Katakana characters', + 2146: 'phonetic double-byte katakana characters', + 22: 'Arabic with leading zero', + 23: 'bullet', + 24: 'Korean numbering 2', + 25: 'Korean numbering 1', + 26: 'Chinese numbering 1', + 27: 'Chinese numbering 2', + 28: 'Chinese numbering 3', + 29: 'Chinese numbering 4', + 30: 'Chinese Zodiac numbering 1', + 31: 'Chinese Zodiac numbering 2', + 32: 'Chinese Zodiac numbering 3', + 33: 'Taiwanese double-byte numbering 1', + 34: 'Taiwanese double-byte numbering 2', + 35: 'Taiwanese double-byte numbering 3', + 36: 'Taiwanese double-byte numbering 4', + 37: 'Chinese double-byte numbering 1', + 38: 'Chinese double-byte numbering 2', + 39: 'Chinese double-byte numbering 3', + 40: 'Chinese double-byte numbering 4', + 41: 'Korean double-byte numbering 1', + 42: 'Korean double-byte numbering 2', + 43: 'Korean double-byte numbering 3', + 44: 'Korean double-byte numbering 4', + 45: 'Hebrew non-standard decimal', + 46: 'Arabic Alif Ba Tah', + 47: 'Hebrew Biblical standard', + 48: 'Arabic Abjad style', + 255: 'No number', + } + self.__language_dict = { + 1078 : 'Afrikaans', + 1052 : 'Albanian', + 1025 : 'Arabic', + 5121 : 'Arabic Algeria', + 15361 : 'Arabic Bahrain', + 3073 : 'Arabic Egypt', + 1 : 'Arabic General', + 2049 : 'Arabic Iraq', + 11265 : 'Arabic Jordan', + 13313 : 'Arabic Kuwait', + 12289 : 'Arabic Lebanon', + 4097 : 'Arabic Libya', + 6145 : 'Arabic Morocco', + 8193 : 'Arabic Oman', + 16385 : 'Arabic Qatar', + 10241 : 'Arabic Syria', + 7169 : 'Arabic Tunisia', + 14337 : 'Arabic U.A.E.', + 9217 : 'Arabic Yemen', + 1067 : 'Armenian', + 1101 : 'Assamese', + 2092 : 'Azeri Cyrillic', + 1068 : 'Azeri Latin', + 1069 : 'Basque', + 1093 : 'Bengali', + 4122 : 'Bosnia Herzegovina', + 1026 : 'Bulgarian', + 1109 : 'Burmese', + 1059 : 'Byelorussian', + 1027 : 'Catalan', + 2052 : 'Chinese China', + 4 : 'Chinese General', + 3076 : 'Chinese Hong Kong', + 4100 : 'Chinese Singapore', + 1028 : 'Chinese Taiwan', + 1050 : 'Croatian', + 1029 : 'Czech', + 1030 : 'Danish', + 2067 : 'Dutch Belgium', + 1043 : 'Dutch Standard', + 3081 : 'English Australia', + 10249 : 'English Belize', + 2057 : 'English British', + 4105 : 'English Canada', + 9225 : 'English Caribbean', + 9 : 'English General', + 6153 : 'English Ireland', + 8201 : 'English Jamaica', + 5129 : 'English New Zealand', + 13321 : 'English Philippines', + 7177 : 'English South Africa', + 11273 : 'English Trinidad', + 1033 : 'English United States', + 1061 : 'Estonian', + 1080 : 'Faerose', + 1065 : 'Farsi', + 1035 : 'Finnish', + 1036 : 'French', + 2060 : 'French Belgium', + 11276 : 'French Cameroon', + 3084 : 'French Canada', + 12300 : 'French Cote d\'Ivoire', + 5132 : 'French Luxembourg', + 13324 : 'French Mali', + 6156 : 'French Monaco', + 8204 : 'French Reunion', + 10252 : 'French Senegal', + 4108 : 'French Swiss', + 7180 : 'French West Indies', + 9228 : 'French Democratic Republic of the Congo', + 1122 : 'Frisian', + 1084 : 'Gaelic', + 2108 : 'Gaelic Ireland', + 1110 : 'Galician', + 1079 : 'Georgian', + 1031 : 'German', + 3079 : 'German Austrian', + 5127 : 'German Liechtenstein', + 4103 : 'German Luxembourg', + 2055 : 'German Switzerland', + 1032 : 'Greek', + 1095 : 'Gujarati', + 1037 : 'Hebrew', + 1081 : 'Hindi', + 1038 : 'Hungarian', + 1039 : 'Icelandic', + 1057 : 'Indonesian', + 1040 : 'Italian', + 2064 : 'Italian Switzerland', + 1041 : 'Japanese', + 1099 : 'Kannada', + 1120 : 'Kashmiri', + 2144 : 'Kashmiri India', + 1087 : 'Kazakh', + 1107 : 'Khmer', + 1088 : 'Kirghiz', + 1111 : 'Konkani', + 1042 : 'Korean', + 2066 : 'Korean Johab', + 1108 : 'Lao', + 1062 : 'Latvian', + 1063 : 'Lithuanian', + 2087 : 'Lithuanian Classic', + 1086 : 'Malay', + 2110 : 'Malay Brunei Darussalam', + 1100 : 'Malayalam', + 1082 : 'Maltese', + 1112 : 'Manipuri', + 1102 : 'Marathi', + 1104 : 'Mongolian', + 1121 : 'Nepali', + 2145 : 'Nepali India', + 1044 : 'Norwegian Bokmal', + 2068 : 'Norwegian Nynorsk', + 1096 : 'Oriya', + 1045 : 'Polish', + 1046 : 'Portuguese (Brazil)', + 2070 : 'Portuguese (Portugal)', + 1094 : 'Punjabi', + 1047 : 'Rhaeto-Romanic', + 1048 : 'Romanian', + 2072 : 'Romanian Moldova', + 1049 : 'Russian', + 2073 : 'Russian Moldova', + 1083 : 'Sami Lappish', + 1103 : 'Sanskrit', + 3098 : 'Serbian Cyrillic', + 2074 : 'Serbian Latin', + 1113 : 'Sindhi', + 1051 : 'Slovak', + 1060 : 'Slovenian', + 1070 : 'Sorbian', + 11274 : 'Spanish Argentina', + 16394 : 'Spanish Bolivia', + 13322 : 'Spanish Chile', + 9226 : 'Spanish Colombia', + 5130 : 'Spanish Costa Rica', + 7178 : 'Spanish Dominican Republic', + 12298 : 'Spanish Ecuador', + 17418 : 'Spanish El Salvador', + 4106 : 'Spanish Guatemala', + 18442 : 'Spanish Honduras', + 2058 : 'Spanish Mexico', + 3082 : 'Spanish Modern', + 19466 : 'Spanish Nicaragua', + 6154 : 'Spanish Panama', + 15370 : 'Spanish Paraguay', + 10250 : 'Spanish Peru', + 20490 : 'Spanish Puerto Rico', + 1034 : 'Spanish Traditional', + 14346 : 'Spanish Uruguay', + 8202 : 'Spanish Venezuela', + 1072 : 'Sutu', + 1089 : 'Swahili', + 1053 : 'Swedish', + 2077 : 'Swedish Finland', + 1064 : 'Tajik', + 1097 : 'Tamil', + 1092 : 'Tatar', + 1098 : 'Telugu', + 1054 : 'Thai', + 1105 : 'Tibetan', + 1073 : 'Tsonga', + 1074 : 'Tswana', + 1055 : 'Turkish', + 1090 : 'Turkmen', + 1058 : 'Ukranian', + 1056 : 'Urdu', + 2080 : 'Urdu India', + 2115 : 'Uzbek Cyrillic', + 1091 : 'Uzbek Latin', + 1075 : 'Venda', + 1066 : 'Vietnamese', + 1106 : 'Welsh', + 1076 : 'Xhosa', + 1085 : 'Yiddish', + 1077 : 'Zulu', + 1024 : 'Unkown', + 255 : 'Unkown', + } + """ + # unknown + # These must get passed on because they occure after \* + 'do' : ('un', 'unknown___', self.default_func), + 'company' : ('un', 'company___', self.default_func), + 'shpinst' : ('un', 'unknown___', self.default_func), + 'panose' : ('un', 'unknown___', self.default_func), + 'falt' : ('un', 'unknown___', self.default_func), + 'listoverridetable' : ('un', 'unknown___', self.default_func), + 'category' : ('un', 'unknown___', self.default_func), + 'template' : ('un', 'unknown___', self.default_func), + 'ud' : ('un', 'unknown___', self.default_func), + 'formfield' : ('un', 'unknown___', self.default_func), + 'ts' : ('un', 'unknown___', self.default_func), + 'rsidtbl' : ('un', 'unknown___', self.default_func), + 'generator' : ('un', 'unknown___', self.default_func), + 'ftnsep' : ('un', 'unknown___', self.default_func), + 'aftnsep' : ('un', 'unknown___', self.default_func), + 'aftnsepc' : ('un', 'unknown___', self.default_func), + 'aftncn' : ('un', 'unknown___', self.default_func), + 'objclass' : ('un', 'unknown___', self.default_func), + 'objdata' : ('un', 'unknown___', self.default_func), + 'picprop' : ('un', 'unknown___', self.default_func), + 'blipuid' : ('un', 'unknown___', self.default_func), + """ + def __ms_hex_func(self, pre, token, num): + num = num[1:] # chop off leading 0, which I added + num = num.upper() # the mappings store hex in caps + return 'tx 3: + msg = 'number "%s" cannot be converted to integer\n' % num + raise self.__bug_handler, msg + type = self.__number_type_dict.get(num) + if type == None: + if self.__run_level > 3: + msg = 'No type for "%s" in self.__number_type_dict\n' + raise self.__bug_handler + type = 'Arabic' + return 'cw<%s<%s 3: + msg = 'No entry for number "%s"' % num + raise self.__bug_handler, msg + return 'cw<%s<%snum<%s\n' % (token, num) + def divide_by_2(self, pre, token, num): + num = self.divide_num(num, 2) + return 'cw<%s<%s%s<%s\n' % (token, num, token) + def divide_by_20(self, pre, token, num): + num = self.divide_num(num, 20) + return 'cw<%s<%s%s<%s\n' % (token, num, token) + def text_func(self, pre, token, num=None): + return 'tx%s<%s\n' % (third_field, token, num, token) + def bool_st_func(self, pre, token, num): + if num is None or num == '' or num == '1': + return 'cw<%s<%strue<%s\n' % (token, token) + elif num == '0': + return 'cw<%s<%sfalse<%s\n' % (token, token) + else: + msg = 'boolean should have some value module process tokens\n' + msg += 'token is ' + token + "\n" + msg += "'" + num + "'" + "\n" + raise self.__bug_handler, msg + def __no_sup_sub_func(self, pre, token, num): + the_string = 'cw 3: + msg = 'no number to process?\n' + msg += 'this indicates that the token ' + msg += ' \(\\li\) should have a number and does not\n' + msg += 'numerator is "%s"\n' % numerator + msg += 'denominator is "%s"\n' % denominator + raise self.__bug_handler, msg + if 5 > self.__return_code: + self.__return_code = 5 + return 0 + num = '%0.2f' % round(numerator/denominator, 2) + return num + string_num = str(num) + if string_num[-2:] == ".0": + string_num = string_num[:-2] + return string_num + def split_let_num(self, token): + match_obj = re.search(self.__num_exp,token) + if match_obj != None: + first = match_obj.group(1) + second = match_obj.group(2) + if not second: + if self.__run_level > 3: + msg = "token is '%s' \n" % token + raise self.__bug_handler, msg + return first, 0 + else: + if self.__run_level > 3: + msg = "token is '%s' \n" % token + raise self.__bug_handler + return token, 0 + return first, second + def convert_to_hex(self,number): + """Convert a string to uppercase hexidecimal""" + num = int(number) + try: + hex_num = "%X" % num + return hex_num + except: + raise self.__bug_handler + def process_cw(self, token): + """Change the value of the control word by determining what dictionary + it belongs to""" + special = [ '*', ':', '}', '{', '~', '_', '-', ';' ] + ##if token != "{" or token != "}": + token = token[1:] # strip off leading \ + token = token.replace(" ", "") + ##if not token: return + only_alpha = token.isalpha() + num = None + if not only_alpha and token not in special: + token, num = self.split_let_num(token) + pre, token, action = self.dict_token.get(token, (None, None, None)) + if action: + return action(pre, token, num) + # unused function + def initiate_token_actions(self): + self.action_for_token={ + '{' : self.ob_func, + '}' : self.cb_func, + '\\' : self.process_cw, + } + # unused function + def evaluate_token(self,token): + """Evaluate tokens. Return a value if the token is not a + control word. Otherwise, pass token onto another method + for further evaluation.""" + token, action = self.dict_token.get(token[0:1]) + if action: + line = action(token) + return line + else : + return 'tx -1: + msg ='Invalid RTF: token "\\ " not valid. \n' + raise self.__exception_handler, msg + elif token[0:1] == "\\": + line = self.process_cw(token) + if line != None: + write_obj.write(line) + else: + fields = re.split(self.__utf_exp, token) + for field in fields: + if not field: + continue + if field[0:1] == '&': + write_obj.write('tx 0: + for key in keys: + my_string += '<%s>%s' % (key, self.__section_values[key]) + my_string += '\n' + my_string += self.__mark_end + # # my_string += line + if self.__state == 'body': + self.__write_obj.write(my_string) + elif self.__state == 'sec_in_field': + self.__handle_sec_def(my_string) + elif self.__run_level > 3: + msg = 'missed a flag\n' + raise self.__bug_handler, msg + def __handle_sec_def(self, my_string): + """ + Requires: + my_string -- the string of attributes and values. (Do I need this?) + Returns: + nothing + Logic: + I need to append the dictionary of attributes and values to list + so I can use it later when I reach the end of the field-block. + """ + values_dict = self.__section_values + self.__list_of_sec_values.append(values_dict) + def __body_func(self, line): + """ + Requires: + line --the line to parse + Returns: + nothing + Logic: + Look for the beginning of a section. Otherwise, print the line to + the output file. + """ + action = self.__body_dict.get(self.__token_info) + if action: + action(line) + else: + self.__write_obj.write(line) + def __before_body_func(self, line): + """ + Requires: + line --line to parse + Returns: + nothing + Logic: + Look for the beginning of the body. Always print out the line. + """ + if self.__token_info == 'mi%s' + '%s' + 'rtf-native' + '0\n' + % (str(self.__section_num), str(self.__section_num)) + ) + self.__found_first_sec = 1 + elif self.__token_info == 'tx%s' + '%s' + 'rtf-native' + '0\n' + % (str(self.__section_num), str(self.__section_num)) + ) + self.__write_obj.write( + 'cw 0: + my_string += 'mi%s' % str(num) + ) + if self.__list_of_sec_values: + keys = self.__list_of_sec_values[0].keys() + for key in keys: + self.__write_obj.write( + '<%s>%s\n' % (key, self.__list_of_sec_values[0][key])) + self.__list_of_sec_values = self.__list_of_sec_values[1:] + self.__write_obj.write('0') + self.__write_obj.write('rtf-native') + self.__write_obj.write('%s' % str(self.__section_num)) + self.__write_obj.write('\n') + # Look here + def __found_section_in_field_func(self, line): + """ + Requires: + line --line to parse + Returns: + nothing + Logic: + I have found a section in a field block. Add one to section + counter, and append this number to a list. + """ + self.__section_num += 1 + self.__field_num.append(self.__section_num) + self.__sec_in_field_string += line + def __found_section_def_in_field_func(self, line): + """ + Requires: + line --line to parse + Returns: + nothing + Logic: + I have found a section definition in a filed block. Change the + state and clear the values dictionary. + """ + self.__state = 'section_def' + self.__section_values.clear() + def make_sections(self): + """ + Requires: + nothing + Returns: + nothing (changes the original file) + Logic: + Read one line in at a time. Determine what action to take based on + the state. If the state is before the body, look for the + beginning of the body. + If the state is body, send the line to the body method. + """ + self.__initiate_values() + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + action = self.__state_dict.get(self.__state) + if action == None: + sys.stderr.write('no no matching state in module sections.py\n') + sys.stderr.write(self.__state + '\n') + action(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "sections.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) diff --git a/src/libprs500/ebooks/rtf2xml/styles.py b/src/libprs500/ebooks/rtf2xml/styles.py new file mode 100755 index 0000000000..c9c74bdf9c --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/styles.py @@ -0,0 +1,705 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import sys, os, tempfile +from libprs500.ebooks.rtf2xml import copy, border_parse +class Styles: + """ + Change lines with style numbers to actual style names. + """ + def __init__(self, + in_file, + bug_handler, + copy = None, + run_level = 1, + ): + """ + Required: + 'file'--file to parse + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__write_to = tempfile.mktemp() + self.__run_level = run_level + def __initiate_values(self): + """ + Initiate all values. + """ + self.__border_obj = border_parse.BorderParse() + self.__styles_dict = {'par':{}, 'char':{}} + self.__styles_num = '0' + self.__type_of_style = 'par' + self.__text_string = '' + self.__state = 'before_styles_table' + self.__state_dict = { + 'before_styles_table': self.__before_styles_func, + 'in_styles_table' : self.__in_styles_func, + 'in_individual_style' : self.__in_individual_style_func, + 'after_styles_table' : self.__after_styles_func, + 'mi pf + 'par-end___' : 'para', + 'par-def___' : 'paragraph-definition', + 'keep-w-nex' : 'keep-with-next', + 'widow-cntl' : 'widow-control', + 'adjust-rgt' : 'adjust-right', + 'language__' : 'language', + 'right-inde' : 'right-indent', + 'fir-ln-ind' : 'first-line-indent', + 'left-inden' : 'left-indent', + 'space-befo' : 'space-before', + 'space-afte' : 'space-after', + 'line-space' : 'line-spacing', + 'default-ta' : 'default-tab', + 'align_____' : 'align', + 'widow-cntr' : 'widow-control', + # page fomratting mixed in! (Just in older RTF?) + 'margin-lef' : 'left-indent', + 'margin-rig' : 'right-indent', + 'margin-bot' : 'space-after', + 'margin-top' : 'space-before', + # stylesheet = > ss + 'style-shet' : 'stylesheet', + 'based-on__' : 'based-on-style', + 'next-style' : 'next-style', + 'char-style' : 'character-style', + 'para-style' : 'paragraph-style', + # graphics => gr + 'picture___' : 'pict', + 'obj-class_' : 'obj_class', + 'mac-pic___' : 'mac-pict', + # section => sc + 'section___' : 'section-new', + 'sect-defin' : 'section-reset', + 'sect-note_' : 'endnotes-in-section', + # list=> ls + 'list-text_' : 'list-text', + # this line must be wrong because it duplicates an earlier one + 'list-text_' : 'list-text', + 'list______' : 'list', + 'list-lev-d' : 'list-level-definition', + 'list-cardi' : 'list-cardinal-numbering', + 'list-decim' : 'list-decimal-numbering', + 'list-up-al' : 'list-uppercase-alphabetic-numbering', + 'list-up-ro' : 'list-uppercae-roman-numbering', + 'list-ord__' : 'list-ordinal-numbering', + 'list-ordte' : 'list-ordinal-text-numbering', + 'list-bulli' : 'list-bullet', + 'list-simpi' : 'list-simple', + 'list-conti' : 'list-continue', + 'list-hang_' : 'list-hang', + # 'list-tebef' : 'list-text-before', + 'list-level' : 'level', + 'list-id___' : 'list-id', + 'list-start' : 'list-start', + 'nest-level' : 'nest-level', + # duplicate + 'list-level' : 'list-level', + # notes => nt + 'footnote__' : 'footnote', + 'type______' : 'type', + # anchor => an + 'toc_______' : 'anchor-toc', + 'book-mk-st' : 'bookmark-start', + 'book-mk-en' : 'bookmark-end', + 'index-mark' : 'anchor-index', + 'place_____' : 'place', + # field => fd + 'field_____' : 'field', + 'field-inst' : 'field-instruction', + 'field-rslt' : 'field-result', + 'datafield_' : 'data-field', + # info-tables => it + 'font-table' : 'font-table', + 'colr-table' : 'color-table', + 'lovr-table' : 'list-override-table', + 'listtable_' : 'list-table', + 'revi-table' : 'revision-table', + # character info => ci + 'hidden____' : 'hidden', + 'italics___' : 'italics', + 'bold______' : 'bold', + 'strike-thr' : 'strike-through', + 'shadow____' : 'shadow', + 'outline___' : 'outline', + 'small-caps' : 'small-caps', + 'dbl-strike' : 'double-strike-through', + 'emboss____' : 'emboss', + 'engrave___' : 'engrave', + 'subscript_' : 'subscript', + 'superscrip' : 'superscript', + 'plain_____' : 'plain', + 'font-style' : 'font-style', + 'font-color' : 'font-color', + 'font-size_' : 'font-size', + 'font-up___' : 'superscript', + 'font-down_' : 'subscript', + 'red_______' : 'red', + 'blue______' : 'blue', + 'green_____' : 'green', + 'caps______' : 'caps', + # table => tb + 'row-def___' : 'row-definition', + 'cell______' : 'cell', + 'row_______' : 'row', + 'in-table__' : 'in-table', + 'columns___' : 'columns', + 'row-pos-le' : 'row-position-left', + 'cell-posit' : 'cell-position', + # preamble => pr + # underline + 'underlined' : 'underlined', + # border => bd + 'bor-t-r-hi' : 'border-table-row-horizontal-inside', + 'bor-t-r-vi' : 'border-table-row-vertical-inside', + 'bor-t-r-to' : 'border-table-row-top', + 'bor-t-r-le' : 'border-table-row-left', + 'bor-t-r-bo' : 'border-table-row-bottom', + 'bor-t-r-ri' : 'border-table-row-right', + 'bor-cel-bo' : 'border-cell-bottom', + 'bor-cel-to' : 'border-cell-top', + 'bor-cel-le' : 'border-cell-left', + 'bor-cel-ri' : 'border-cell-right', + 'bor-par-bo' : 'border-paragraph-bottom', + 'bor-par-to' : 'border-paragraph-top', + 'bor-par-le' : 'border-paragraph-left', + 'bor-par-ri' : 'border-paragraph-right', + 'bor-par-bo' : 'border-paragraph-box', + 'bor-for-ev' : 'border-for-every-paragraph', + 'bor-outsid' : 'border-outisde', + 'bor-none__' : 'border', + # border type => bt + 'bdr-single' : 'single', + 'bdr-doubtb' : 'double-thickness-border', + 'bdr-shadow' : 'shadowed-border', + 'bdr-double' : 'double-border', + 'bdr-dotted' : 'dotted-border', + 'bdr-dashed' : 'dashed', + 'bdr-hair__' : 'hairline', + 'bdr-inset_' : 'inset', + 'bdr-das-sm' : 'dash-small', + 'bdr-dot-sm' : 'dot-dash', + 'bdr-dot-do' : 'dot-dot-dash', + 'bdr-outset' : 'outset', + 'bdr-trippl' : 'tripple', + 'bdr-thsm__' : 'thick-thin-small', + 'bdr-htsm__' : 'thin-thick-small', + 'bdr-hthsm_' : 'thin-thick-thin-small', + 'bdr-thm__' : 'thick-thin-medium', + 'bdr-htm__' : 'thin-thick-medium', + 'bdr-hthm_' : 'thin-thick-thin-medium', + 'bdr-thl__' : 'thick-thin-large', + 'bdr-hthl_' : 'think-thick-think-large', + 'bdr-wavy_' : 'wavy', + 'bdr-d-wav' : 'double-wavy', + 'bdr-strip' : 'striped', + 'bdr-embos' : 'emboss', + 'bdr-engra' : 'engrave', + 'bdr-frame' : 'frame', + 'bdr-li-wid' : 'line-width', + # tabs + 'tab-center' : 'center', + 'tab-right_' : 'right', + 'tab-dec___' : 'decimal', + 'leader-dot' : 'leader-dot', + 'leader-hyp' : 'leader-hyphen', + 'leader-und' : 'leader-underline', + } + self.__tabs_dict = { + 'cw 3: + msg = 'no value for key %s\n' % info + raise self.__bug_handler, msg + else: + value = line[20:-1] + self.__enter_dict_entry(att, value) + elif line[0:2] == 'tx': + self.__text_string += line[17:-1] + def __tab_stop_func(self, line): + """ + Requires: + line -- line to parse + Returns: + nothing + Logic: + Try to add the number to dictionary entry tabs-left, or tabs-right, etc. + If the dictionary entry doesn't exist, create one. + """ + type = 'tabs-%s' % self.__tab_type + try: + if self.__leader_found: + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s:' % self.__tab_type + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s;' % line[20:-1] + else: + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s:' % self.__tab_type + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s;' % line[20:-1] + except KeyError: + self.__enter_dict_entry('tabs', '') + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s:' % self.__tab_type + self.__styles_dict['par'][self.__styles_num]['tabs'] += '%s;' % line[20:-1] + self.__tab_type = 'left' + self.__leader_found = 0 + def __tab_type_func(self, line): + """ + """ + type = self.__tab_type_dict.get(self.__token_info) + if type != None: + self.__tab_type = type + else: + if self.__run_level > 3: + msg = 'no entry for %s\n' % self.__token_info + raise self.__bug_handler, msg + def __tab_leader_func(self, line): + """ + Requires: + line --line to parse + Returns: + nothing + Logic: + Try to add the string of the tab leader to dictionary entry + tabs-left, or tabs-right, etc. If the dictionary entry doesn't + exist, create one. + """ + self.__leader_found = 1 + leader = self.__tab_type_dict.get(self.__token_info) + if leader != None: + leader += '^' + type = 'tabs-%s' % self.__tab_type + try: + self.__styles_dict['par'][self.__styles_num]['tabs'] += ':%s;' % leader + except KeyError: + self.__enter_dict_entry('tabs', '') + self.__styles_dict['par'][self.__styles_num]['tabs'] += '%s;' % leader + else: + if self.__run_level > 3: + msg = 'no entry for %s\n' % self.__token_info + raise self.__bug_handler, msg + def __tab_bar_func(self, line): + """ + Requires: + line -- line to parse + Returns: + nothing + Logic: + Try to add the string of the tab bar to dictionary entry tabs-bar. + If the dictionary entry doesn't exist, create one. + """ + # self.__add_dict_entry('tabs-bar', line[20:-1]) + try: + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s:' % 'bar' + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s;' % line[20:-1] + except KeyError: + self.__enter_dict_entry('tabs', '') + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s:' % 'bar' + self.__styles_dict['par'][self.__styles_num]['tabs']\ + += '%s;' % line[20:-1] + self.__tab_type = 'left' + def __enter_dict_entry(self, att, value): + """ + Required: + att -- the attribute + value -- the value + Returns: + nothing + Logic: + Try to add the attribute value directly to the styles dictionary. + If a keyerror is found, that means I have to build the "branches" + of the dictionary before I can add the key value pair. + """ + try: + self.__styles_dict[self.__type_of_style][self.__styles_num][att] = value + except KeyError: + self.__add_dict_entry(att, value) + def __add_dict_entry(self, att, value): + """ + Required: + att --the attribute + value --the value + Returns: + nothing + Logic: + I have to build the branches of the dictionary before I can add + the leaves. (I am comparing a dictionary to a tree.) To achieve + this, I first make a temporary dictionary by extracting either the + inside dictionary of the keyword par or char. This temporary + dictionary is called type_dict. + Next, create a second, smaller dictionary with just the attribute and value. + Add the small dictionary to the type dictionary. + Add this type dictionary to the main styles dictionary. + """ + if self.__type_of_style == 'par': + type_dict =self.__styles_dict['par'] + elif self.__type_of_style == 'char': + type_dict = self.__styles_dict['char'] + else: + if self.__run_level > 3: + msg = self.__type_of_style + 'error\n' + raise self.__bug_handler, msg + smallest_dict = {} + smallest_dict[att] = value + type_dict[self.__styles_num] = smallest_dict + self.__styles_dict[self.__type_of_style] = type_dict + def __para_style_func(self, line): + """ + Required: + line + Returns: + nothing + Logic: + Set the type of style to paragraph. + Extract the number for a line such as "cw '15'. I want to change the 15 to the name of the + style. I accomplish this by simply looking up the value of 15 in + the styles table. + Use two loops. First, check all the paragraph styles. Then check + all the characer styles. + The inner loop: first check 'next-style', then check 'based-on-style'. + Make sure values exist for the keys to avoid the nasty keyerror message. + """ + types = ['par', 'char'] + for type in types: + keys = self.__styles_dict[type].keys() + for key in keys: + styles = ['next-style', 'based-on-style'] + for style in styles: + value = self.__styles_dict[type][key].get(style) + if value != None: + temp_dict = self.__styles_dict[type].get(value) + if temp_dict: + changed_value = self.__styles_dict[type][value].get('name') + if changed_value: + self.__styles_dict[type][key][style] = \ + changed_value + else: + if value == 0 or value == '0': + pass + else: + if self.__run_level > 4: + msg = '%s %s is based on %s\n' % (type, key, value) + msg = 'There is no style with %s\n' % value + raise self.__bug_handler, msg + del self.__styles_dict[type][key][style] + def __print_style_table(self): + """ + Required: + nothing + Returns: + nothing + Logic: + This function prints out the style table. + I use three nested for loops. The outer loop prints out the + paragraphs styles, then the character styles. + The next loop iterates through the style numbers. + The most inside loop iterates over the pairs of attributes and + values, and prints them out. + """ + types = ['par', 'char'] + for type in types: + if type == 'par': + prefix = 'paragraph' + else: + prefix = 'character' + self.__write_obj.write( + 'mi%s' % (prefix, num) + ) + attributes = self.__styles_dict[type][num].keys() + for att in attributes: + this_value = self.__styles_dict[type][num][att] + self.__write_obj.write( + '<%s>%s' % (att, this_value) + ) + self.__write_obj.write('\n') + self.__write_obj.write( + 'mi 0: + if self.__state[-1] == 'in_row_def': + self.__state.pop() + # added [{]] at the *end* of each /cell. Get rid of extra one + self.__cell_list.pop() + widths = self.__row_dict.get('widths') + if widths: + width_list = widths.split(',') + num_cells = len (width_list) + self.__row_dict['number-of-cells'] = num_cells + def __in_row_def_func(self, line): + """ + Requires: + line --line to parse + Returns: + nothing + Logic: + In the text that defines a row. If a control word is found, handle the + control word with another method. + Check for states that will end this state. + While in the row definition, certain tokens can end a row or end a table. + If a paragrah definition (pard-start) is found, and the you are already in + a table, start of a row. + """ + if self.__token_info == 'cw 0 and self.__state[-1] == 'in_table': + self.__start_row_func(line) + self.__start_cell_func(line) + self.__write_obj.write(line) + elif self.__token_info == 'mi 0 and self.__state[-1] != 'in_table': + self.__start_table_func(line) + self.__write_obj.write(line) + else: + self.__write_obj.write(line) + def __handle_row_token(self, line): + """ + Requires: + line -- line to parse + Returns: + ? + Logic: + the tokens in the row definition contain the following information: + 1. row borders. + 2. cell borders for all cells in the row. + 3. cell postions for all cells in the row. + Put all information about row borders into a row dictionary. + Put all information about cell borders into into the dictionary in + the last item in the cell list. ([{border:something, width:something}, + {border:something, width:something}]) + cw attributes for key=> value + pop the self.__cell_list. + Otherwise, print out a cell tag. + """ + self.__state.append('in_cell') + # self.__cell_list = [] + if len(self.__cell_list) > 0: + self.__write_obj.write('mi%s' % (key, cell_dict[key])) + self.__write_obj.write('\n') + # self.__cell_list.pop() + self.__cell_list.pop(0) + # self.__cell_list = self.__cell_list[1:] + else: + self.__write_obj.write('mi attributes for key=> value + """ + self.__state.append('in_row') + self.__write_obj.write('mi%s' % (key, self.__row_dict[key])) + self.__write_obj.write('\n') + self.__cells_in_row = 0 + self.__rows_in_table += 1 + def __found_cell_position(self, line): + """ + needs: + line: current line + returns: + nothing + logic: + Calculate the cell width. + If the cell is the first cell, you should add the left cell position to it. + (This value is often negative.) + Next, set the new last_cell_position to the current cell position. + """ + # cw 1: + if self.__state[-1] == 'in_cell': + self.__state.pop() + self.__write_obj.write('mi 1 and self.__state[-1] == 'in_row': + self.__state.pop() + self.__write_obj.write('mi self.__max_number_cells_in_row: + self.__max_number_cells_in_row = self.__cells_in_row + self.__list_of_cells_in_row.append(self.__cells_in_row) + def __empty_cell(self, line): + """ + Required: + line -- line of text + Returns: + nothing + Logic: + Write an empty tag with attributes if there are attributes. + Otherwise, writen an empty tag with cell as element. + """ + if len(self.__cell_list) > 0: + self.__write_obj.write('mi%s' % (key, cell_dict[key])) + self.__write_obj.write('\n') + else: + self.__write_obj.write('mi max: + mode = item + max = num_of_values + return mode + def make_table(self): + """ + Requires: + nothing + Returns: + A dictionary of values for the beginning of the table. + Logic: + Read one line in at a time. Determine what action to take based on + the state. + """ + self.__initiate_values() + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + self.__token_info = line[:16] + action = self.__state_dict.get(self.__state[-1]) + # print self.__state[-1] + if action == None: + sys.stderr.write('No matching state in module table.py\n') + sys.stderr.write(self.__state[-1] + '\n') + action(line) + read_obj.close() + self.__write_obj.close() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "table.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) + return self.__table_data diff --git a/src/libprs500/ebooks/rtf2xml/table_info.py b/src/libprs500/ebooks/rtf2xml/table_info.py new file mode 100755 index 0000000000..403e176ae2 --- /dev/null +++ b/src/libprs500/ebooks/rtf2xml/table_info.py @@ -0,0 +1,85 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### +import os, tempfile +from libprs500.ebooks.rtf2xml import copy +# note to self. This is the first module in which I use tempfile. A good idea? +""" +""" +class TableInfo: + """ + Insert table data for tables. + Logic: + """ + def __init__(self, + in_file, + bug_handler, + table_data, + copy=None, + run_level = 1,): + """ + Required: + 'file'--file to parse + 'table_data' -- a dictionary for each table. + Optional: + 'copy'-- whether to make a copy of result for debugging + 'temp_dir' --where to output temporary results (default is + directory from which the script is run.) + Returns: + nothing + """ + self.__file = in_file + self.__bug_handler = bug_handler + self.__copy = copy + self.__table_data = table_data + self.__run_level = run_level + self.__write_to = tempfile.mktemp() + # self.__write_to = 'table_info.data' + def insert_info(self): + """ + """ + read_obj = open(self.__file, 'r') + self.__write_obj = open(self.__write_to, 'w') + line_to_read = 1 + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + if line == 'mi 0: + table_dict = self.__table_data[0] + self.__write_obj.write('mi%s' % (key, table_dict[key])) + self.__write_obj.write('\n') + self.__table_data = self.__table_data[1:] + else: + # this shouldn't happen! + if self.__run_level > 3: + msg = 'Not enough data for each table\n' + raise self.__bug_handler, msg + self.__write_obj.write('mi", ">") + line = line.replace("\\~", "\\~ ") + line = line.replace("\\_", "\\_ ") + line = line.replace("\\:", "\\: ") + line = line.replace("\\-", "\\- ") + # turn into a generic token to eliminate special + # cases and make processing easier + line = line.replace("\\{", "\\ob ") + # turn into a generic token to eliminate special + # cases and make processing easier + line = line.replace("\\}", "\\cb ") + # put a backslash in front of to eliminate special cases and + # make processing easier + line = line.replace("{", "\\{") + # put a backslash in front of to eliminate special cases and + # make processing easier + line = line.replace("}", "\\}") + line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line) + # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line) + line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line) + ##line = line.replace("\\backslash", "\\\\") + # this is for older RTF + line = re.sub(self.__par_exp, '\\par ', line) + return line + def __compile_expressions(self): + self.__ms_hex_exp = re.compile(r"\\\'(..)") + self.__utf_exp = re.compile(r"\\u(-?\d{3,6})") + self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)") + self.__par_exp = re.compile(r'\\$') + self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)") + ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") + def __create_tokens(self): + self.__compile_expressions() + read_obj = open(self.__file, 'r') + write_obj = open(self.__write_to, 'w') + line_to_read = "dummy" + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + line = line.replace("\n", "") + line = self.__sub_line_reg(line) + tokens = re.split(self.__splitexp, line) + ##print tokens + for token in tokens: + if token != "": + write_obj.write(token + "\n") + """ + match_obj = re.search(self.__mixed_exp, token) + if match_obj != None: + first = match_obj.group(1) + second = match_obj.group(2) + write_obj.write(first + "\n") + write_obj.write(second + "\n") + else: + write_obj.write(token + "\n") + """ + read_obj.close() + write_obj.close() + def tokenize(self): + """Main class for handling other methods. Reads in one line \ + at a time, usues method self.sub_line to make basic substitutions,\ + uses ? to process tokens""" + self.__create_tokens() + copy_obj = copy.Copy(bug_handler = self.__bug_handler) + if self.__copy: + copy_obj.copy_file(self.__write_to, "tokenize.data") + copy_obj.rename(self.__write_to, self.__file) + os.remove(self.__write_to) diff --git a/src/libprs500/trac/download/download.py b/src/libprs500/trac/download/download.py index 0f6f26a4ef..c7c5d0b90d 100644 --- a/src/libprs500/trac/download/download.py +++ b/src/libprs500/trac/download/download.py @@ -93,7 +93,7 @@ class Distribution(object): self.command = cmd.strip() if os == 'debian': self.command += '\n'+prefix + 'cp -R /usr/share/pycentral/fonttools/site-packages/FontTools* /usr/lib/python2.5/site-packages/' - self.command += '\n'+prefix+'easy_install -U TTFQuery libprs500 \n'+prefix+'easy_install -f http://sourceforge.net/project/showfiles.php?group_id=68617 rtf2xml\n'+prefix+'libprs500_postinstall' + self.command += '\n'+prefix+'easy_install -U TTFQuery libprs500 \n'+prefix+'libprs500_postinstall' try: self.manual = Markup(self.MANUAL_MAP[os]) except KeyError: