mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Clean defaut encoding
This commit is contained in:
parent
5784256e02
commit
bb50018eb3
@ -1,61 +1,118 @@
|
|||||||
#########################################################################
|
#########################################################################
|
||||||
# #
|
# #
|
||||||
# #
|
|
||||||
# copyright 2002 Paul Henry Tremblay #
|
# copyright 2002 Paul Henry Tremblay #
|
||||||
# #
|
# #
|
||||||
# This program is distributed in the hope that it will be useful, #
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
||||||
# General Public License for more details. #
|
|
||||||
# #
|
|
||||||
# You should have received a copy of the GNU General Public License #
|
|
||||||
# along with this program; if not, write to the Free Software #
|
|
||||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
|
|
||||||
# 02111-1307 USA #
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
'''
|
||||||
|
Codepages as to RTF 1.9.1:
|
||||||
|
437 United States IBM
|
||||||
|
708 Arabic (ASMO 708)
|
||||||
|
709 Arabic (ASMO 449+, BCON V4)
|
||||||
|
710 Arabic (transparent Arabic)
|
||||||
|
711 Arabic (Nafitha Enhanced)
|
||||||
|
720 Arabic (transparent ASMO)
|
||||||
|
819 Windows 3.1 (United States and Western Europe)
|
||||||
|
850 IBM multilingual
|
||||||
|
852 Eastern European
|
||||||
|
860 Portuguese
|
||||||
|
862 Hebrew
|
||||||
|
863 French Canadian
|
||||||
|
864 Arabic
|
||||||
|
865 Norwegian
|
||||||
|
866 Soviet Union
|
||||||
|
874 Thai
|
||||||
|
932 Japanese
|
||||||
|
936 Simplified Chinese
|
||||||
|
949 Korean
|
||||||
|
950 Traditional Chinese
|
||||||
|
1250 Eastern European
|
||||||
|
1251 Cyrillic
|
||||||
|
1252 Western European
|
||||||
|
1253 Greek
|
||||||
|
1254 Turkish
|
||||||
|
1255 Hebrew
|
||||||
|
1256 Arabic
|
||||||
|
1257 Baltic
|
||||||
|
1258 Vietnamese
|
||||||
|
1361 Johab
|
||||||
|
10000 MAC Roman
|
||||||
|
10001 MAC Japan
|
||||||
|
10004 MAC Arabic
|
||||||
|
10005 MAC Hebrew
|
||||||
|
10006 MAC Greek
|
||||||
|
10007 MAC Cyrillic
|
||||||
|
10029 MAC Latin2
|
||||||
|
10081 MAC Turkish
|
||||||
|
57002 Devanagari
|
||||||
|
57003 Bengali
|
||||||
|
57004 Tamil
|
||||||
|
57005 Telugu
|
||||||
|
57006 Assamese
|
||||||
|
57007 Oriya
|
||||||
|
57008 Kannada
|
||||||
|
57009 Malayalam
|
||||||
|
57010 Gujarati
|
||||||
|
57011 Punjabi
|
||||||
|
'''
|
||||||
|
|
||||||
class DefaultEncoding:
|
class DefaultEncoding:
|
||||||
"""
|
"""
|
||||||
Find the default encoding for the doc
|
Find the default encoding for the doc
|
||||||
"""
|
"""
|
||||||
def __init__(self, in_file, bug_handler, run_level = 1,):
|
def __init__(self, in_file, bug_handler, run_level = 1,):
|
||||||
"""
|
|
||||||
Required:
|
|
||||||
'file'
|
|
||||||
Returns:
|
|
||||||
nothing
|
|
||||||
"""
|
|
||||||
self.__file = in_file
|
self.__file = in_file
|
||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
|
self.__platform = 'Windows'
|
||||||
|
self.__default_num = 'not-defined'
|
||||||
|
self.__code_page = '1252'
|
||||||
|
self.__datafetched = False
|
||||||
|
|
||||||
def find_default_encoding(self):
|
def find_default_encoding(self):
|
||||||
platform = 'Windows'
|
if not self.__datafetched:
|
||||||
default_num = 'not-defined'
|
self._encoding()
|
||||||
code_page = 'ansicpg1252'
|
self.__datafetched = True
|
||||||
read_obj = open(self.__file, 'r')
|
if self.__platform = 'Macintosh':
|
||||||
line_to_read = 1
|
code_page = self.__code_page
|
||||||
while line_to_read:
|
else
|
||||||
line_to_read = read_obj.readline()
|
code_page = 'ansicpg' + self.__code_page
|
||||||
line = line_to_read
|
return platform, code_page, self.__default_num
|
||||||
self.__token_info = line[:16]
|
|
||||||
if self.__token_info == 'mi<mk<rtfhed-end':
|
def get_codepage(self):
|
||||||
break
|
if not self.__datafetched:
|
||||||
if self.__token_info == 'cw<ri<ansi-codpg':
|
self._encoding()
|
||||||
#cw<ri<ansi-codpg<nu<10000
|
self.__datafetched = True
|
||||||
num = line[20:-1]
|
return self.__code_page
|
||||||
if not num:
|
|
||||||
num = '1252'
|
def get_platform(self):
|
||||||
code_page = 'ansicpg' + num
|
if not self.__datafetched:
|
||||||
if self.__token_info == 'cw<ri<macintosh_':
|
self._encoding()
|
||||||
platform = 'Macintosh'
|
self.__datafetched = True
|
||||||
if self.__token_info == 'cw<ri<deflt-font':
|
return self.__platform
|
||||||
default_num = line[20:-1]
|
|
||||||
#cw<ri<deflt-font<nu<0
|
def _encoding(self):
|
||||||
#action = self.__state_dict.get(self.__state)
|
with open(self.__file, 'r') as read_obj:
|
||||||
#if action == None:
|
for line in read_obj:
|
||||||
#print self.__state
|
self.__token_info = line[:16]
|
||||||
#action(line)
|
if self.__token_info == 'mi<mk<rtfhed-end':
|
||||||
read_obj.close()
|
break
|
||||||
if platform == 'Macintosh':
|
if self.__token_info == 'cw<ri<ansi-codpg':
|
||||||
code_page = 'mac_roman'
|
#cw<ri<ansi-codpg<nu<10000
|
||||||
return platform, code_page, default_num
|
self.__code_page = line[20:-1] if line[20:-1] \
|
||||||
|
else '1252'
|
||||||
|
if self.__token_info == 'cw<ri<macintosh_':
|
||||||
|
self.__platform = 'Macintosh'
|
||||||
|
elif self.__token_info == 'cw<ri<pc________':
|
||||||
|
self.__platform = 'IBMPC'
|
||||||
|
elif self.__token_info == 'cw<ri<pca_______':
|
||||||
|
self.__platform = 'OS/2'
|
||||||
|
if self.__token_info == 'cw<ri<deflt-font':
|
||||||
|
self.__default_num = line[20:-1]
|
||||||
|
#cw<ri<deflt-font<nu<0
|
||||||
|
if self.__platform == 'Macintosh':
|
||||||
|
self.__code_page = 'mac_roman'
|
||||||
|
elif self.__platform = 'IBMPC':
|
||||||
|
self.__code_page = '437'
|
||||||
|
elif self.__platform = 'OS/2':
|
||||||
|
self.__code_page = '850'
|
||||||
|
|
||||||
|
@ -163,15 +163,17 @@ class ProcessTokens:
|
|||||||
'rtf' : ('ri', 'rtf_______', self.default_func),
|
'rtf' : ('ri', 'rtf_______', self.default_func),
|
||||||
'deff' : ('ri', 'deflt-font', self.default_func),
|
'deff' : ('ri', 'deflt-font', self.default_func),
|
||||||
'mac' : ('ri', 'macintosh_', self.default_func),
|
'mac' : ('ri', 'macintosh_', self.default_func),
|
||||||
|
'pc' : ('ri', 'pc________', self.default_func),
|
||||||
|
'pca' : ('ri', 'pca_______', self.default_func),
|
||||||
'ansi' : ('ri', 'ansi______', self.default_func),
|
'ansi' : ('ri', 'ansi______', self.default_func),
|
||||||
'ansicpg' : ('ri', 'ansi-codpg', self.default_func),
|
'ansicpg' : ('ri', 'ansi-codpg', self.default_func),
|
||||||
# notes => nt
|
# notes => nt
|
||||||
'footnote' : ('nt', 'footnote__', self.default_func),
|
'footnote' : ('nt', 'footnote__', self.default_func),
|
||||||
'ftnalt' : ('nt', 'type______<endnote', self.two_part_func),
|
'ftnalt' : ('nt', 'type______<endnote', self.two_part_func),
|
||||||
# anchor => an
|
# anchor => an
|
||||||
'tc' : ('an', 'toc_______', self.default_func),
|
'tc' : ('an', 'toc_______', self.default_func),
|
||||||
'bkmkstt' : ('an', 'book-mk-st', self.default_func),
|
'bkmkstt' : ('an', 'book-mk-st', self.default_func),
|
||||||
'bkmkstart' : ('an', 'book-mk-st', self.default_func),
|
'bkmkstart' : ('an', 'book-mk-st', self.default_func),
|
||||||
'bkmkend' : ('an', 'book-mk-en', self.default_func),
|
'bkmkend' : ('an', 'book-mk-en', self.default_func),
|
||||||
'xe' : ('an', 'index-mark', self.default_func),
|
'xe' : ('an', 'index-mark', self.default_func),
|
||||||
'rxe' : ('an', 'place_____', self.default_func),
|
'rxe' : ('an', 'place_____', self.default_func),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user