mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle non ascii charset in RTF if declared as codepage
This commit is contained in:
parent
b2187360ec
commit
ac07ff853e
@ -326,7 +326,6 @@ class ParseRtf:
|
||||
invalid_rtf_handler = InvalidRtfException,
|
||||
)
|
||||
hex2utf_obj.convert_hex_2_utf8()
|
||||
# raise RtfInvalidCodeException, 'stop'
|
||||
self.__bracket_match('hex_2_utf_preamble')
|
||||
fonts_obj = fonts.Fonts(
|
||||
in_file = self.__temp_file,
|
||||
@ -523,6 +522,7 @@ class ParseRtf:
|
||||
indent = self.__indent,
|
||||
run_level = self.__run_level,
|
||||
no_dtd = self.__no_dtd,
|
||||
encoding = encode_obj.get_codepage(),
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
)
|
||||
tags_obj.convert_to_tags()
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
|
||||
class CheckEncoding:
|
||||
|
||||
def __init__(self, bug_handler):
|
||||
|
@ -1,6 +1,9 @@
|
||||
import os, tempfile
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy, check_encoding
|
||||
|
||||
public_dtd = 'rtf2xml1.0.dtd'
|
||||
|
||||
class ConvertToTags:
|
||||
"""
|
||||
Convert file to XML
|
||||
@ -10,6 +13,7 @@ class ConvertToTags:
|
||||
bug_handler,
|
||||
dtd_path,
|
||||
no_dtd,
|
||||
encoding,
|
||||
indent = None,
|
||||
copy = None,
|
||||
run_level = 1,
|
||||
@ -29,9 +33,14 @@ class ConvertToTags:
|
||||
self.__copy = copy
|
||||
self.__dtd_path = dtd_path
|
||||
self.__no_dtd = no_dtd
|
||||
if encoding != 'mac_roman':
|
||||
self.__encoding = 'cp' + encoding
|
||||
else:
|
||||
self.__encoding = 'mac_roman'
|
||||
self.__indent = indent
|
||||
self.__run_level = run_level
|
||||
self.__write_to = tempfile.mktemp()
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Set values, including those for the dictionary.
|
||||
@ -61,6 +70,7 @@ class ConvertToTags:
|
||||
'tx<ut<__________' : self.__text_func,
|
||||
'mi<tg<empty_____' : self.__empty_func,
|
||||
}
|
||||
|
||||
def __open_func(self, line):
|
||||
"""
|
||||
Print the opening tag and newlines when needed.
|
||||
@ -73,6 +83,7 @@ class ConvertToTags:
|
||||
if info in self.__two_new_line:
|
||||
self.__write_extra_new_line()
|
||||
self.__write_obj.write('<%s>' % info)
|
||||
|
||||
def __empty_func(self, line):
|
||||
"""
|
||||
Print out empty tag and newlines when needed.
|
||||
@ -85,6 +96,7 @@ class ConvertToTags:
|
||||
self.__write_new_line()
|
||||
if info in self.__two_new_line:
|
||||
self.__write_extra_new_line()
|
||||
|
||||
def __open_att_func(self, line):
|
||||
"""
|
||||
Process lines for open tags that have attributes.
|
||||
@ -119,6 +131,7 @@ class ConvertToTags:
|
||||
self.__write_new_line()
|
||||
if element_name in self.__two_new_line:
|
||||
self.__write_extra_new_line()
|
||||
|
||||
def __empty_att_func(self, line):
|
||||
"""
|
||||
Same as the __open_att_func, except a '/' is placed at the end of the tag.
|
||||
@ -143,6 +156,7 @@ class ConvertToTags:
|
||||
self.__write_new_line()
|
||||
if element_name in self.__two_new_line:
|
||||
self.__write_extra_new_line()
|
||||
|
||||
def __close_func(self, line):
|
||||
"""
|
||||
Print out the closed tag and new lines, if appropriate.
|
||||
@ -156,6 +170,7 @@ class ConvertToTags:
|
||||
self.__write_new_line()
|
||||
if info in self.__two_new_line:
|
||||
self.__write_extra_new_line()
|
||||
|
||||
def __text_func(self, line):
|
||||
"""
|
||||
Simply print out the information between [17:-1]
|
||||
@ -163,6 +178,7 @@ class ConvertToTags:
|
||||
#tx<nu<__________<Normal;
|
||||
# change this!
|
||||
self.__write_obj.write(line[17:-1])
|
||||
|
||||
def __write_extra_new_line(self):
|
||||
"""
|
||||
Print out extra new lines if the new lines have not exceeded two. If
|
||||
@ -172,8 +188,10 @@ class ConvertToTags:
|
||||
return
|
||||
if self.__new_line < 2:
|
||||
self.__write_obj.write('\n')
|
||||
|
||||
def __default_func(self, line):
|
||||
pass
|
||||
|
||||
def __write_new_line(self):
|
||||
"""
|
||||
Print out a new line if a new line has not already been printed out.
|
||||
@ -183,11 +201,22 @@ class ConvertToTags:
|
||||
if not self.__new_line:
|
||||
self.__write_obj.write('\n')
|
||||
self.__new_line += 1
|
||||
|
||||
def __write_dec(self):
|
||||
"""
|
||||
Write the XML declaration at the top of the document.
|
||||
"""
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
#keep maximum compatibility with previous version
|
||||
check_encoding_obj = check_encoding.CheckEncoding(
|
||||
bug_handler = self.__bug_handler,
|
||||
)
|
||||
if not check_encoding_obj.check_encoding(self.__file):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
|
||||
else:
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
sys.stderr.write(_('Bad RTF encoding, revert to US-ASCII chars and hope for the best'))
|
||||
self.__new_line = 0
|
||||
self.__write_new_line()
|
||||
if self.__no_dtd:
|
||||
@ -207,6 +236,7 @@ class ConvertToTags:
|
||||
)
|
||||
self.__new_line = 0
|
||||
self.__write_new_line()
|
||||
|
||||
def convert_to_tags(self):
|
||||
"""
|
||||
Read in the file one line at a time. Get the important info, between
|
||||
@ -222,18 +252,14 @@ class ConvertToTags:
|
||||
an empty tag function.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
self.__write_dec()
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action != None:
|
||||
action(line)
|
||||
read_obj.close()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action is not None:
|
||||
action(line)
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
|
@ -132,8 +132,7 @@ class DefaultEncoding:
|
||||
self.__code_page = '850'
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# from calibre.ebooks.rtf2xml import default_encoding
|
||||
# encode_obj = default_encoding.DefaultEncoding(
|
||||
# encode_obj = DefaultEncoding(
|
||||
# in_file = sys.argv[1],
|
||||
# bug_handler = Exception,
|
||||
# check_raw = True,
|
||||
|
Loading…
x
Reference in New Issue
Block a user