diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index a3d52a854c..5fc57c98f6 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -25,6 +25,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \ body_styles, preamble_rest, group_styles, \ inline from calibre.ebooks.rtf2xml.old_rtf import OldRtf +from . import open_for_read, open_for_write """ Here is an example script using the ParseRTF module directly @@ -562,8 +563,8 @@ class ParseRtf: def __make_temp_file(self,file): """Make a temporary file to parse""" write_file="rtf_write_file" - read_obj = file if hasattr(file, 'read') else open(file,'rb') - with open(write_file, 'wb') as write_obj: + read_obj = file if hasattr(file, 'read') else open_for_read(file) + with open_for_write(write_file) as write_obj: for line in read_obj: write_obj.write(line) return write_file diff --git a/src/calibre/ebooks/rtf2xml/__init__.py b/src/calibre/ebooks/rtf2xml/__init__.py index b3d0c915b5..5200b54694 100755 --- a/src/calibre/ebooks/rtf2xml/__init__.py +++ b/src/calibre/ebooks/rtf2xml/__init__.py @@ -1,3 +1,12 @@ -''' -modules for rtf2xml -''' +from __future__ import unicode_literals + +import io + + +def open_for_read(path): + return io.open(path, encoding='utf-8', errors='replace') + + +def open_for_write(path, append=False): + mode = 'a' if append else 'w' + return io.open(path, mode, encoding='utf-8', errors='replace', newline='') diff --git a/src/calibre/ebooks/rtf2xml/add_brackets.py b/src/calibre/ebooks/rtf2xml/add_brackets.py index 0af94f6d35..71b08b5bb5 100755 --- a/src/calibre/ebooks/rtf2xml/add_brackets.py +++ b/src/calibre/ebooks/rtf2xml/add_brackets.py @@ -16,6 +16,7 @@ import sys, os from calibre.ebooks.rtf2xml import copy, check_brackets from calibre.ptempfile import better_mktemp from polyglot.builtins import iteritems +from . import open_for_read, open_for_write class AddBrackets: @@ -202,8 +203,8 @@ class AddBrackets: """ """ self.__initiate_values() - with open(self.__file, 'r') as read_obj: - with open(self.__write_to, 'w') as self.__write_obj: + with open_for_read(self.__file) as read_obj: + with open_for_write(self.__write_to) as self.__write_obj: for line in read_obj: self.__token_info = line[:16] if self.__token_info == 'ob -1: msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\ % line_count raise self.__exception_handler(msg) - elif token[:1] == b"\\": - try: - token.decode('us-ascii') - except UnicodeError as msg: - msg = '\nInvalid RTF: Tokens not ascii encoded.\n%s\nError at line %d'\ - % (str(msg), line_count) - raise self.__exception_handler(msg) + elif token[:1] == "\\": line = self.process_cw(token) if line is not None: write_obj.write(line) @@ -816,10 +811,10 @@ class ProcessTokens: for field in fields: if not field: continue - if field[0:1] == b'&': - write_obj.write(b'tx 0, tokens)) # write - with open(self.__write_to, 'wb') as write_obj: - write_obj.write('\n'.join(tokens).encode('utf-8')) + with open_for_write(self.__write_to) as write_obj: + write_obj.write('\n'.join(tokens)) # Move and copy copy_obj = copy.Copy(bug_handler=self.__bug_handler) if self.__copy: