diff --git a/resources/templates/html_export_default.tmpl b/resources/templates/html_export_default.tmpl index c3ed921255..7aac247e59 100644 --- a/resources/templates/html_export_default.tmpl +++ b/resources/templates/html_export_default.tmpl @@ -14,16 +14,16 @@ ${head_content}$ ${for title in meta.titles():}$ ${if pos1:}$
\s*(?=[[a-z\d])' % length), lambda match: '')) end_rules.append( # Un wrap using punctuation - (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?(i|b|u)>)?\s*(
\s*\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa + (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\\\IA\u00DF]|(?(i|b|u)>)?\s*(
\s*\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa ) for rule in self.PREPROCESS + start_rules: diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index 6fb58c62e7..452bdb7d63 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -391,7 +391,7 @@ class Mobi8Reader(object): fi = self.get_file_info(pos) if fi.filename is None: raise ValueError('Index entry has invalid pos: %d'%pos) - idtag = self.get_id_tag(pos).decode(self.header.codec) + idtag = self.get_id_tag(pos) href = '%s/%s'%(fi.type, fi.filename) else: try: @@ -403,7 +403,7 @@ class Mobi8Reader(object): continue entry['href'] = href - entry['idtag'] = idtag + entry['idtag'] = idtag.decode(self.header.codec) for e in remove: index_entries.remove(e) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index d17c73c6d6..98b6ef5c7b 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -605,7 +605,7 @@ class DirContainer(object): for root, dirs, files in os.walk(base): for fname in files: fname = os.path.join(root, fname) - fname = fname.replace('\\', '/') + fname = fname.replace(b'\\', b'/') if not isinstance(fname, unicode_type): try: fname = fname.decode(filesystem_encoding) diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py index 86ae4d3bcc..efd0a1c3aa 100644 --- a/src/calibre/ebooks/pdb/header.py +++ b/src/calibre/ebooks/pdb/header.py @@ -24,7 +24,7 @@ class PdbHeaderReader(object): def identity(self): self.stream.seek(60) ident = self.stream.read(8) - return ident + return ident.decode('utf-8') def section_count(self): self.stream.seek(76) @@ -67,8 +67,8 @@ class PdbHeaderReader(object): class PdbHeaderBuilder(object): def __init__(self, identity, title): - self.identity = identity.ljust(3, '\x00')[:8] - self.title = '%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + self.identity = identity.ljust(3, '\x00')[:8].encode('utf-8') + self.title = b'%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace') def build_header(self, section_lengths, out_stream): ''' @@ -85,4 +85,4 @@ class PdbHeaderBuilder(object): for id, record in enumerate(section_lengths): out_stream.write(struct.pack('>LBBBB', long_type(offset), 0, 0, 0, 0)) offset += record - out_stream.write('\x00\x00') + out_stream.write(b'\x00\x00') diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py index 390329b124..13d69b451f 100644 --- a/src/calibre/ebooks/pdb/palmdoc/writer.py +++ b/src/calibre/ebooks/pdb/palmdoc/writer.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import division ''' Writer content to palmdoc pdb file. @@ -57,13 +58,13 @@ class Writer(FormatWriter): txt_length = len(txt) txt_records = [] - for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1): + for i in range(0, (len(txt) // MAX_RECORD_SIZE) + 1): txt_records.append(txt[i * MAX_RECORD_SIZE: (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE]) return txt_records, txt_length def _header_record(self, txt_length, record_count): - record = '' + record = b'' record += struct.pack('>H', 2) # [0:2], PalmDoc compression. (1 = No compression). record += struct.pack('>H', 0) # [2:4], Always 0. @@ -73,4 +74,3 @@ class Writer(FormatWriter): record += struct.pack('>L', 0) # [12-16], Current reading position, as an offset into the uncompressed text. return record - diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index f6f737275e..854bd7fa5c 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -174,8 +174,8 @@ class PMLMLizer(object): return text def prepare_text(self, text): - # Replace empty paragraphs with \c pml codes used to denote emtpy lines. - text = re.sub(unicode_type(r'(?<=
)\s*]*>[\xc2\xa0\s]*
'), '\\c\n\\c', text) + # Replace empty paragraphs with \c pml codes used to denote empty lines. + text = re.sub(unicode_type(r'(?<=)\s*]*>[\xc2\xa0\s]*
'), r'\\c\n\\c', text) return text def clean_text(self, text): @@ -207,7 +207,7 @@ class PMLMLizer(object): text = re.sub('[ ]{2,}', ' ', text) # Condense excessive \c empty line sequences. - text = re.sub('(\\c\\s*\\c\\s*){2,}', '\\c \n\\c\n', text) + text = re.sub(r'(\\c\\s*\\c\\s*){2,}', r'\\c \n\\c\n', text) # Remove excessive newlines. text = re.sub('\n[ ]+\n', '\n\n', text) diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index 8321f5cccd..a3d52a854c 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -562,7 +562,7 @@ class ParseRtf: def __make_temp_file(self,file): """Make a temporary file to parse""" write_file="rtf_write_file" - read_obj = file if hasattr(file, 'read') else open(file,'r') + read_obj = file if hasattr(file, 'read') else open(file,'rb') with open(write_file, 'wb') as write_obj: for line in read_obj: write_obj.write(line) diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py index 3e2b8156e8..5dbc59a995 100755 --- a/src/calibre/ebooks/rtf2xml/line_endings.py +++ b/src/calibre/ebooks/rtf2xml/line_endings.py @@ -36,11 +36,11 @@ class FixLineEndings: def fix_endings(self): # read - with open(self.__file, 'r') as read_obj: + with open(self.__file, 'rb') as read_obj: input_file = read_obj.read() # calibre go from win and mac to unix - input_file = input_file.replace('\r\n', '\n') - input_file = input_file.replace('\r', '\n') + input_file = input_file.replace(b'\r\n', b'\n') + input_file = input_file.replace(b'\r', b'\n') # remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 if self.__replace_illegals: input_file = clean_ascii_chars(input_file) diff --git a/src/calibre/ebooks/rtf2xml/paragraph_def.py b/src/calibre/ebooks/rtf2xml/paragraph_def.py index 82962fe9ea..0812e15776 100755 --- a/src/calibre/ebooks/rtf2xml/paragraph_def.py +++ b/src/calibre/ebooks/rtf2xml/paragraph_def.py @@ -608,12 +608,10 @@ if another paragraph_def is found, the state changes to collect_tokens. # when determining uniqueness for a style, ingorne these values, since # they don't tell us if the style is unique ignore_values = ['style-num', 'nest-level', 'in-table'] - keys = self.__att_val_dict.keys() - keys.sort() - for key in keys: - if key in ignore_values: + for k, v in self.__att_val_dict.items(): + if k in ignore_values: continue - my_string += '%s:%s' % (key, self.__att_val_dict[key]) + my_string += '%s:%s' % (k, v) if my_string in self.__style_num_strings: num = self.__style_num_strings.index(my_string) num += 1 # since indexing starts at zero, rather than 1 @@ -637,12 +635,9 @@ if another paragraph_def is found, the state changes to collect_tokens. the_value = self.__att_val_dict['tabs'] # the_value = the_value[:-1] style_string += ('<%s>%s' % ('tabs', the_value)) - keys = self.__att_val_dict.keys() - keys.sort() - for key in keys: - if key != 'name' and key !='style-num' and key != 'in-table'\ - and key not in tabs_list: - style_string += ('<%s>%s' % (key, self.__att_val_dict[key])) + for k, v in self.__att_val_dict.items(): + if k not in ['name', 'style-num', 'in-table'] + tabs_list: + style_string += ('<%s>%s' % (k, v)) style_string += '\n' self.__body_style_strings.append(style_string) @@ -690,11 +685,9 @@ if another paragraph_def is found, the state changes to collect_tokens. the_value = self.__att_val_dict['tabs'] # the_value = the_value[:-1] self.__write_obj.write('<%s>%s' % ('tabs', the_value)) - keys = self.__att_val_dict.keys() - keys.sort() + keys = sorted(self.__att_val_dict.keys()) for key in keys: - if key != 'name' and key !='style-num' and key != 'in-table'\ - and key not in tabs_list: + if key not in ['name', 'style-num', 'in-table'] + tabs_list: self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key])) self.__write_obj.write('\n') self.__write_obj.write(self.__start2_marker) diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py index 0f18d5ff9b..30dc0545ee 100755 --- a/src/calibre/ebooks/rtf2xml/process_tokens.py +++ b/src/calibre/ebooks/rtf2xml/process_tokens.py @@ -43,8 +43,8 @@ class ProcessTokens: self.__bug_handler = bug_handler def compile_expressions(self): - self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)") - self.__utf_exp = re.compile(r'(&.*?;)') + self.__num_exp = re.compile(br"([a-zA-Z]+)(.*)") + self.__utf_exp = re.compile(br'(&.*?;)') def initiate_token_dict(self): self.__return_code = 0 @@ -762,10 +762,10 @@ class ProcessTokens: def process_cw(self, token): """Change the value of the control word by determining what dictionary it belongs to""" - special = ['*', ':', '}', '{', '~', '_', '-', ';'] + special = [b'*', b':', b'}', b'{', b'~', b'_', b'-', b';'] # if token != "{" or token != "}": token = token[1:] # strip off leading \ - token = token.replace(" ", "") + token = token.replace(b" ", b"") # if not token: return only_alpha = token.isalpha() num = None @@ -784,24 +784,24 @@ class ProcessTokens: def process_tokens(self): """Main method for handling other methods. """ line_count = 0 - with open(self.__file, 'r') as read_obj: + with open(self.__file, 'rb') as read_obj: with open(self.__write_to, 'wb') as write_obj: for line in read_obj: - token = line.replace("\n","") + token = line.replace(b"\n",b"") line_count += 1 - if line_count == 1 and token != '\\{': + if line_count == 1 and token != b'\\{': msg = '\nInvalid RTF: document doesn\'t start with {\n' raise self.__exception_handler(msg) - elif line_count == 2 and token[0:4] != '\\rtf': + elif line_count == 2 and token[0:4] != b'\\rtf': msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n' raise self.__exception_handler(msg) - the_index = token.find('\\ ') + the_index = token.find(b'\\ ') if token is not None and the_index > -1: msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\ % line_count raise self.__exception_handler(msg) - elif token[:1] == "\\": + elif token[:1] == b"\\": try: token.decode('us-ascii') except UnicodeError as msg: @@ -816,10 +816,10 @@ class ProcessTokens: for field in fields: if not field: continue - if field[0:1] == '&': - write_obj.write('tx