mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Modify mac-roman encoding, now go to 10000
This commit is contained in:
parent
10c2e603e2
commit
93ef1699df
@ -237,9 +237,7 @@ class ParseRtf:
|
|||||||
check_encoding_obj = check_encoding.CheckEncoding(
|
check_encoding_obj = check_encoding.CheckEncoding(
|
||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
)
|
)
|
||||||
enc = encode_obj.get_codepage()
|
enc = 'cp' + encode_obj.get_codepage()
|
||||||
if enc != 'mac_roman':
|
|
||||||
enc = 'cp' + enc
|
|
||||||
msg = 'Exception in token processing'
|
msg = 'Exception in token processing'
|
||||||
if check_encoding_obj.check_encoding(self.__file, enc):
|
if check_encoding_obj.check_encoding(self.__file, enc):
|
||||||
file_name = self.__file if isinstance(self.__file, str) \
|
file_name = self.__file if isinstance(self.__file, str) \
|
||||||
|
@ -74,9 +74,6 @@ class DefaultEncoding:
|
|||||||
if not self.__datafetched:
|
if not self.__datafetched:
|
||||||
self._encoding()
|
self._encoding()
|
||||||
self.__datafetched = True
|
self.__datafetched = True
|
||||||
if self.__platform == 'Macintosh':
|
|
||||||
code_page = self.__code_page
|
|
||||||
else:
|
|
||||||
code_page = 'ansicpg' + self.__code_page
|
code_page = 'ansicpg' + self.__code_page
|
||||||
return self.__platform, code_page, self.__default_num
|
return self.__platform, code_page, self.__default_num
|
||||||
|
|
||||||
@ -94,49 +91,59 @@ class DefaultEncoding:
|
|||||||
|
|
||||||
def _encoding(self):
|
def _encoding(self):
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
|
cpfound = False
|
||||||
if not self.__fetchraw:
|
if not self.__fetchraw:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
if self.__token_info == 'mi<mk<rtfhed-end':
|
if self.__token_info == 'mi<mk<rtfhed-end':
|
||||||
break
|
break
|
||||||
if self.__token_info == 'cw<ri<ansi-codpg':
|
|
||||||
#cw<ri<ansi-codpg<nu<10000
|
|
||||||
self.__code_page = line[20:-1] if int(line[20:-1]) \
|
|
||||||
else '1252'
|
|
||||||
if self.__token_info == 'cw<ri<macintosh_':
|
if self.__token_info == 'cw<ri<macintosh_':
|
||||||
self.__platform = 'Macintosh'
|
self.__platform = 'Macintosh'
|
||||||
self.__code_page = 'mac_roman'
|
|
||||||
elif self.__token_info == 'cw<ri<pc________':
|
elif self.__token_info == 'cw<ri<pc________':
|
||||||
self.__platform = 'IBMPC'
|
self.__platform = 'IBMPC'
|
||||||
self.__code_page = '437'
|
|
||||||
elif self.__token_info == 'cw<ri<pca_______':
|
elif self.__token_info == 'cw<ri<pca_______':
|
||||||
self.__platform = 'OS/2'
|
self.__platform = 'OS/2'
|
||||||
self.__code_page = '850'
|
if self.__token_info == 'cw<ri<ansi-codpg' \
|
||||||
|
and int(line[20:-1]):
|
||||||
|
self.__code_page = line[20:-1]
|
||||||
if self.__token_info == 'cw<ri<deflt-font':
|
if self.__token_info == 'cw<ri<deflt-font':
|
||||||
self.__default_num = line[20:-1]
|
self.__default_num = line[20:-1]
|
||||||
|
cpfound = True
|
||||||
#cw<ri<deflt-font<nu<0
|
#cw<ri<deflt-font<nu<0
|
||||||
|
if self.__platform != 'Windows' and \
|
||||||
|
not cpfound:
|
||||||
|
if self.__platform == 'Macintosh':
|
||||||
|
self.__code_page = '10000'
|
||||||
|
elif self.__platform == 'IBMPC':
|
||||||
|
self.__code_page = '437'
|
||||||
|
elif self.__platform == 'OS/2':
|
||||||
|
self.__code_page = '850'
|
||||||
else:
|
else:
|
||||||
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
|
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
|
||||||
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
|
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
|
||||||
|
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
|
if fenc.search(line):
|
||||||
|
enc = fenc.search(line).group(1)
|
||||||
if fenccp.search(line):
|
if fenccp.search(line):
|
||||||
cp = fenccp.search(line).group(1)
|
cp = fenccp.search(line).group(1)
|
||||||
if not int(cp):
|
if not int(cp):
|
||||||
self.__code_page = cp
|
self.__code_page = cp
|
||||||
|
cpfound = True
|
||||||
break
|
break
|
||||||
if fenc.search(line):
|
if self.__platform != 'Windows' and \
|
||||||
enc = fenc.search(line).group(1)
|
not cpfound:
|
||||||
if enc == 'mac':
|
if enc == 'mac':
|
||||||
self.__code_page = 'mac_roman'
|
self.__code_page = '10000'
|
||||||
elif enc == 'pc':
|
elif enc == 'pc':
|
||||||
self.__code_page = '437'
|
self.__code_page = '437'
|
||||||
elif enc == 'pca':
|
elif enc == 'pca':
|
||||||
self.__code_page = '850'
|
self.__code_page = '850'
|
||||||
|
|
||||||
# if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# encode_obj = DefaultEncoding(
|
encode_obj = DefaultEncoding(
|
||||||
# in_file = sys.argv[1],
|
in_file = sys.argv[1],
|
||||||
# bug_handler = Exception,
|
bug_handler = Exception,
|
||||||
# check_raw = True,
|
check_raw = True,
|
||||||
# )
|
)
|
||||||
# print encode_obj.get_codepage()
|
print encode_obj.get_codepage()
|
||||||
|
@ -78,7 +78,6 @@ class ProcessTokens:
|
|||||||
'backslash' : ('nu', '\\', self.text_func),
|
'backslash' : ('nu', '\\', self.text_func),
|
||||||
'ob' : ('nu', '{', self.text_func),
|
'ob' : ('nu', '{', self.text_func),
|
||||||
'cb' : ('nu', '}', self.text_func),
|
'cb' : ('nu', '}', self.text_func),
|
||||||
#'line' : ('nu', ' ', self.text_func), calibre
|
|
||||||
# paragraph formatting => pf
|
# paragraph formatting => pf
|
||||||
'page' : ('pf', 'page-break', self.default_func),
|
'page' : ('pf', 'page-break', self.default_func),
|
||||||
'par' : ('pf', 'par-end___', self.default_func),
|
'par' : ('pf', 'par-end___', self.default_func),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user