diff --git a/resources/templates/html_export_default.tmpl b/resources/templates/html_export_default.tmpl index c3ed921255..7aac247e59 100644 --- a/resources/templates/html_export_default.tmpl +++ b/resources/templates/html_export_default.tmpl @@ -14,16 +14,16 @@ ${head_content}$ ${for title in meta.titles():}$ ${if pos1:}$

- ${print title}$ + ${print(title)}$

${:else:}$ -
${print title}$
+
${print(title)}$
${:endif}$ ${pos1=0}$ ${:endfor}$
- ${print ', '.join(meta.creators())}$ + ${print(', '.join(meta.creators()))}$
@@ -33,13 +33,13 @@ ${head_content}$ ${if prevLink or nextLink:}$
${if prevLink:}$ - ${print _('previous page'),}$ + ${print(_('previous page'))}$ ${:else:}$ - ${print _('previous page'),}$ + ${print(_('previous page'))}$ ${:endif}$ ${if nextLink:}$ - ${print _('next page'),}$ + ${print(_('next page'))}$ ${:endif}$
${:endif}$ @@ -49,22 +49,22 @@ ${head_content}$ ${if has_toc:}$
-

${print _('Table of contents'),}$

- ${print toc()}$ +

${print( _('Table of contents'))}$

+ ${print(toc())}$
${:endif}$
${if prevLink:}$ - ${print _('previous page'),}$ + ${print(_('previous page'))}$ ${:else:}$ - ${print _('previous page'),}$ + ${print(_('previous page'))}$ ${:endif}$ - ${print _('start'),}$ + ${print(_('start'))}$ ${if nextLink:}$ - ${print _('next page'),}$ + ${print(_('next page'))}$ ${:endif}$
diff --git a/resources/templates/html_export_default_index.tmpl b/resources/templates/html_export_default_index.tmpl index 4a9e8ab6f3..f0665ad275 100644 --- a/resources/templates/html_export_default_index.tmpl +++ b/resources/templates/html_export_default_index.tmpl @@ -6,10 +6,10 @@ -${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$ +${print(', '.join(meta.creators()))}$ - ${print(next(meta.titles())); print(meta.titles().close())}$ ${for item in meta:}$ - + ${:endfor}$ @@ -22,16 +22,16 @@ ${:endfor}$ ${for title in meta.titles():}$ ${if pos1:}$

- ${print title}$ + ${print(title)}$

${:else:}$ -
${print title}$
+
${print(title)}$
${:endif}$ ${pos1=0}$ ${:endfor}$
- ${print ', '.join(meta.creators()),}$ + ${print(', '.join(meta.creators()))}$
@@ -40,19 +40,19 @@ ${:endfor}$ ${if has_toc:}$
-

${print _('Table of contents'),}$

+

${print(_('Table of contents'))}$

${toc}$
${:else:}$ -

${print _('No table of contents present'),}$

-
${print _('begin to read'),}$
+

${print(_('No table of contents present'))}$

+
${print(_('begin to read'))}$
${:endif}$
${if nextLink:}$ - ${print _('next page'),}$ + ${print(_('next page'))}$ ${:endif}$
diff --git a/src/calibre/ebooks/conversion/plugins/fb2_input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py index 9992797e3d..d802fe2b2a 100644 --- a/src/calibre/ebooks/conversion/plugins/fb2_input.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py @@ -90,7 +90,8 @@ class FB2Input(InputFormatPlugin): css = re.sub(r'name\s*=\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') - ss = open(P('templates/fb2.xsl'), 'rb').read() + with open(P('templates/fb2.xsl'), 'rb') as f: + ss = f.read().decode('utf-8') ss = ss.replace("__FB_NS__", fb_ns) if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') @@ -124,8 +125,10 @@ class FB2Input(InputFormatPlugin): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) - open(u'index.xhtml', 'wb').write(index) - open(u'inline-styles.css', 'wb').write(css) + with open(u'index.xhtml', 'wb') as f: + f.write(index.encode('utf-8')) + with open(u'inline-styles.css', 'wb') as f: + f.write(css.encode('utf-8')) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: diff --git a/src/calibre/ebooks/conversion/plugins/html_output.py b/src/calibre/ebooks/conversion/plugins/html_output.py index 3caa19ef2f..ba2e922267 100644 --- a/src/calibre/ebooks/conversion/plugins/html_output.py +++ b/src/calibre/ebooks/conversion/plugins/html_output.py @@ -79,7 +79,7 @@ class HTMLOutput(OutputFormatPlugin): from lxml import etree root = self.generate_toc(oeb_book, ref_url, output_dir) - return etree.tostring(root, pretty_print=True, encoding='utf-8', + return etree.tostring(root, pretty_print=True, encoding='unicode', xml_declaration=False) def convert(self, oeb_book, output_path, input_plugin, opts, log): @@ -161,14 +161,14 @@ class HTMLOutput(OutputFormatPlugin): # get & clean HTML -data head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0] - head_content = etree.tostring(head, pretty_print=True, encoding='utf-8') + head_content = etree.tostring(head, pretty_print=True, encoding='unicode') head_content = re.sub(r'\<\/?head.*\>', '', head_content) head_content = re.sub(re.compile(r'\', re.M|re.S), '', head_content) head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2>', head_content) # get & clean HTML -data body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0] - ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8') + ebook_content = etree.tostring(body, pretty_print=True, encoding='unicode') ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content) ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2>', ebook_content) @@ -202,7 +202,7 @@ class HTMLOutput(OutputFormatPlugin): # write html to file with open(path, 'wb') as f: - f.write(t) + f.write(t.encode('utf-8')) item.unload_data_from_memory(memory=path) zfile = zipfile.ZipFile(output_path, "w") diff --git a/src/calibre/ebooks/conversion/plugins/pml_input.py b/src/calibre/ebooks/conversion/plugins/pml_input.py index 3388bdefd3..8bdb773358 100644 --- a/src/calibre/ebooks/conversion/plugins/pml_input.py +++ b/src/calibre/ebooks/conversion/plugins/pml_input.py @@ -41,7 +41,9 @@ class PMLInput(InputFormatPlugin): else: html_stream = html_path - ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252' + ienc = getattr(pml_stream, 'encoding', None) + if ienc is None: + ienc = 'cp1252' if self.options.input_encoding: ienc = self.options.input_encoding diff --git a/src/calibre/ebooks/conversion/plugins/snb_output.py b/src/calibre/ebooks/conversion/plugins/snb_output.py index 00d0b0dc34..6f4de34c79 100644 --- a/src/calibre/ebooks/conversion/plugins/snb_output.py +++ b/src/calibre/ebooks/conversion/plugins/snb_output.py @@ -142,7 +142,7 @@ class SNBOutput(OutputFormatPlugin): for tocitem in oeb_book.toc: if tocitem.href.find('#') != -1: - item = string.split(tocitem.href, '#') + item = tocitem.href.split('#') if len(item) != 2: log.error('Error in TOC item: %s' % tocitem) else: diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py index f5def565d2..e9f6fa54fe 100644 --- a/src/calibre/ebooks/conversion/plugins/txt_input.py +++ b/src/calibre/ebooks/conversion/plugins/txt_input.py @@ -138,7 +138,7 @@ class TXTInput(InputFormatPlugin): block_to_single_line, separate_hard_scene_breaks) self.log = log - txt = '' + txt = b'' log.debug('Reading text from file...') length = 0 base_dir = getcwd() @@ -151,7 +151,7 @@ class TXTInput(InputFormatPlugin): for x in walk('.'): if os.path.splitext(x)[1].lower() in ('.txt', '.text'): with open(x, 'rb') as tf: - txt += tf.read() + '\n\n' + txt += tf.read() + b'\n\n' else: if getattr(stream, 'name', None): base_dir = os.path.dirname(stream.name) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index f6230269fa..ccbddb2eaa 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -584,7 +584,7 @@ class HTMLPreProcessor(object): end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*

\s*(?=[[a-z\d])' % length), lambda match: '')) end_rules.append( # Un wrap using punctuation - (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?)?\s*(

\s*

\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa + (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\\\IA\u00DF]|(?)?\s*(

\s*

\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), # noqa ) for rule in self.PREPROCESS + start_rules: diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index 6fb58c62e7..452bdb7d63 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -391,7 +391,7 @@ class Mobi8Reader(object): fi = self.get_file_info(pos) if fi.filename is None: raise ValueError('Index entry has invalid pos: %d'%pos) - idtag = self.get_id_tag(pos).decode(self.header.codec) + idtag = self.get_id_tag(pos) href = '%s/%s'%(fi.type, fi.filename) else: try: @@ -403,7 +403,7 @@ class Mobi8Reader(object): continue entry['href'] = href - entry['idtag'] = idtag + entry['idtag'] = idtag.decode(self.header.codec) for e in remove: index_entries.remove(e) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index d17c73c6d6..98b6ef5c7b 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -605,7 +605,7 @@ class DirContainer(object): for root, dirs, files in os.walk(base): for fname in files: fname = os.path.join(root, fname) - fname = fname.replace('\\', '/') + fname = fname.replace(b'\\', b'/') if not isinstance(fname, unicode_type): try: fname = fname.decode(filesystem_encoding) diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py index 86ae4d3bcc..efd0a1c3aa 100644 --- a/src/calibre/ebooks/pdb/header.py +++ b/src/calibre/ebooks/pdb/header.py @@ -24,7 +24,7 @@ class PdbHeaderReader(object): def identity(self): self.stream.seek(60) ident = self.stream.read(8) - return ident + return ident.decode('utf-8') def section_count(self): self.stream.seek(76) @@ -67,8 +67,8 @@ class PdbHeaderReader(object): class PdbHeaderBuilder(object): def __init__(self, identity, title): - self.identity = identity.ljust(3, '\x00')[:8] - self.title = '%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + self.identity = identity.ljust(3, '\x00')[:8].encode('utf-8') + self.title = b'%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace') def build_header(self, section_lengths, out_stream): ''' @@ -85,4 +85,4 @@ class PdbHeaderBuilder(object): for id, record in enumerate(section_lengths): out_stream.write(struct.pack('>LBBBB', long_type(offset), 0, 0, 0, 0)) offset += record - out_stream.write('\x00\x00') + out_stream.write(b'\x00\x00') diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py index 390329b124..13d69b451f 100644 --- a/src/calibre/ebooks/pdb/palmdoc/writer.py +++ b/src/calibre/ebooks/pdb/palmdoc/writer.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import division ''' Writer content to palmdoc pdb file. @@ -57,13 +58,13 @@ class Writer(FormatWriter): txt_length = len(txt) txt_records = [] - for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1): + for i in range(0, (len(txt) // MAX_RECORD_SIZE) + 1): txt_records.append(txt[i * MAX_RECORD_SIZE: (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE]) return txt_records, txt_length def _header_record(self, txt_length, record_count): - record = '' + record = b'' record += struct.pack('>H', 2) # [0:2], PalmDoc compression. (1 = No compression). record += struct.pack('>H', 0) # [2:4], Always 0. @@ -73,4 +74,3 @@ class Writer(FormatWriter): record += struct.pack('>L', 0) # [12-16], Current reading position, as an offset into the uncompressed text. return record - diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index f6f737275e..854bd7fa5c 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -174,8 +174,8 @@ class PMLMLizer(object): return text def prepare_text(self, text): - # Replace empty paragraphs with \c pml codes used to denote emtpy lines. - text = re.sub(unicode_type(r'(?<=

)\s*]*>[\xc2\xa0\s]*

'), '\\c\n\\c', text) + # Replace empty paragraphs with \c pml codes used to denote empty lines. + text = re.sub(unicode_type(r'(?<=

)\s*]*>[\xc2\xa0\s]*

'), r'\\c\n\\c', text) return text def clean_text(self, text): @@ -207,7 +207,7 @@ class PMLMLizer(object): text = re.sub('[ ]{2,}', ' ', text) # Condense excessive \c empty line sequences. - text = re.sub('(\\c\\s*\\c\\s*){2,}', '\\c \n\\c\n', text) + text = re.sub(r'(\\c\\s*\\c\\s*){2,}', r'\\c \n\\c\n', text) # Remove excessive newlines. text = re.sub('\n[ ]+\n', '\n\n', text) diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index 8321f5cccd..a3d52a854c 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -562,7 +562,7 @@ class ParseRtf: def __make_temp_file(self,file): """Make a temporary file to parse""" write_file="rtf_write_file" - read_obj = file if hasattr(file, 'read') else open(file,'r') + read_obj = file if hasattr(file, 'read') else open(file,'rb') with open(write_file, 'wb') as write_obj: for line in read_obj: write_obj.write(line) diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py index 3e2b8156e8..5dbc59a995 100755 --- a/src/calibre/ebooks/rtf2xml/line_endings.py +++ b/src/calibre/ebooks/rtf2xml/line_endings.py @@ -36,11 +36,11 @@ class FixLineEndings: def fix_endings(self): # read - with open(self.__file, 'r') as read_obj: + with open(self.__file, 'rb') as read_obj: input_file = read_obj.read() # calibre go from win and mac to unix - input_file = input_file.replace('\r\n', '\n') - input_file = input_file.replace('\r', '\n') + input_file = input_file.replace(b'\r\n', b'\n') + input_file = input_file.replace(b'\r', b'\n') # remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 if self.__replace_illegals: input_file = clean_ascii_chars(input_file) diff --git a/src/calibre/ebooks/rtf2xml/paragraph_def.py b/src/calibre/ebooks/rtf2xml/paragraph_def.py index 82962fe9ea..0812e15776 100755 --- a/src/calibre/ebooks/rtf2xml/paragraph_def.py +++ b/src/calibre/ebooks/rtf2xml/paragraph_def.py @@ -608,12 +608,10 @@ if another paragraph_def is found, the state changes to collect_tokens. # when determining uniqueness for a style, ingorne these values, since # they don't tell us if the style is unique ignore_values = ['style-num', 'nest-level', 'in-table'] - keys = self.__att_val_dict.keys() - keys.sort() - for key in keys: - if key in ignore_values: + for k, v in self.__att_val_dict.items(): + if k in ignore_values: continue - my_string += '%s:%s' % (key, self.__att_val_dict[key]) + my_string += '%s:%s' % (k, v) if my_string in self.__style_num_strings: num = self.__style_num_strings.index(my_string) num += 1 # since indexing starts at zero, rather than 1 @@ -637,12 +635,9 @@ if another paragraph_def is found, the state changes to collect_tokens. the_value = self.__att_val_dict['tabs'] # the_value = the_value[:-1] style_string += ('<%s>%s' % ('tabs', the_value)) - keys = self.__att_val_dict.keys() - keys.sort() - for key in keys: - if key != 'name' and key !='style-num' and key != 'in-table'\ - and key not in tabs_list: - style_string += ('<%s>%s' % (key, self.__att_val_dict[key])) + for k, v in self.__att_val_dict.items(): + if k not in ['name', 'style-num', 'in-table'] + tabs_list: + style_string += ('<%s>%s' % (k, v)) style_string += '\n' self.__body_style_strings.append(style_string) @@ -690,11 +685,9 @@ if another paragraph_def is found, the state changes to collect_tokens. the_value = self.__att_val_dict['tabs'] # the_value = the_value[:-1] self.__write_obj.write('<%s>%s' % ('tabs', the_value)) - keys = self.__att_val_dict.keys() - keys.sort() + keys = sorted(self.__att_val_dict.keys()) for key in keys: - if key != 'name' and key !='style-num' and key != 'in-table'\ - and key not in tabs_list: + if key not in ['name', 'style-num', 'in-table'] + tabs_list: self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key])) self.__write_obj.write('\n') self.__write_obj.write(self.__start2_marker) diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py index 0f18d5ff9b..30dc0545ee 100755 --- a/src/calibre/ebooks/rtf2xml/process_tokens.py +++ b/src/calibre/ebooks/rtf2xml/process_tokens.py @@ -43,8 +43,8 @@ class ProcessTokens: self.__bug_handler = bug_handler def compile_expressions(self): - self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)") - self.__utf_exp = re.compile(r'(&.*?;)') + self.__num_exp = re.compile(br"([a-zA-Z]+)(.*)") + self.__utf_exp = re.compile(br'(&.*?;)') def initiate_token_dict(self): self.__return_code = 0 @@ -762,10 +762,10 @@ class ProcessTokens: def process_cw(self, token): """Change the value of the control word by determining what dictionary it belongs to""" - special = ['*', ':', '}', '{', '~', '_', '-', ';'] + special = [b'*', b':', b'}', b'{', b'~', b'_', b'-', b';'] # if token != "{" or token != "}": token = token[1:] # strip off leading \ - token = token.replace(" ", "") + token = token.replace(b" ", b"") # if not token: return only_alpha = token.isalpha() num = None @@ -784,24 +784,24 @@ class ProcessTokens: def process_tokens(self): """Main method for handling other methods. """ line_count = 0 - with open(self.__file, 'r') as read_obj: + with open(self.__file, 'rb') as read_obj: with open(self.__write_to, 'wb') as write_obj: for line in read_obj: - token = line.replace("\n","") + token = line.replace(b"\n",b"") line_count += 1 - if line_count == 1 and token != '\\{': + if line_count == 1 and token != b'\\{': msg = '\nInvalid RTF: document doesn\'t start with {\n' raise self.__exception_handler(msg) - elif line_count == 2 and token[0:4] != '\\rtf': + elif line_count == 2 and token[0:4] != b'\\rtf': msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n' raise self.__exception_handler(msg) - the_index = token.find('\\ ') + the_index = token.find(b'\\ ') if token is not None and the_index > -1: msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\ % line_count raise self.__exception_handler(msg) - elif token[:1] == "\\": + elif token[:1] == b"\\": try: token.decode('us-ascii') except UnicodeError as msg: @@ -816,10 +816,10 @@ class ProcessTokens: for field in fields: if not field: continue - if field[0:1] == '&': - write_obj.write('tx\n\\g<2>", input_file) + input_file = self.__par_exp.sub(r'\n\\par \n', input_file) + input_file = self.__cwdigit_exp.sub(r"\g<1>\n\g<2>", input_file) input_file = self.__cs_ast.sub(r"\g<1>", input_file) - input_file = self.__ms_hex_exp.sub("\\mshex0\\g<1> ", input_file) - input_file = self.__utf_ud.sub("\\{\\uc0 \\g<1>\\}", input_file) + input_file = self.__ms_hex_exp.sub(r"\\mshex0\g<1> ", input_file) + input_file = self.__utf_ud.sub(r"\\{\\uc0 \g<1>\\}", input_file) # remove \n in bin data input_file = self.__bin_exp.sub(lambda x: x.group().replace('\n', '') + '\n', input_file) @@ -188,7 +188,7 @@ class Tokenize: # write with open(self.__write_to, 'wb') as write_obj: - write_obj.write('\n'.join(tokens)) + write_obj.write('\n'.join(tokens).encode('utf-8')) # Move and copy copy_obj = copy.Copy(bug_handler=self.__bug_handler) if self.__copy: