diff --git a/resources/templates/html_export_default.tmpl b/resources/templates/html_export_default.tmpl
index c3ed921255..7aac247e59 100644
--- a/resources/templates/html_export_default.tmpl
+++ b/resources/templates/html_export_default.tmpl
@@ -14,16 +14,16 @@ ${head_content}$
   ${for title in meta.titles():}$
     ${if pos1:}$
     <h1>
-      <a href="${tocUrl}$">${print title}$</a>
+      <a href="${tocUrl}$">${print(title)}$</a>
     </h1>
     ${:else:}$
-    <div class="calibreMetaSubtitle">${print title}$</div>
+    <div class="calibreMetaSubtitle">${print(title)}$</div>
     ${:endif}$
     ${pos1=0}$
   ${:endfor}$
   </div>
   <div class="calibreMetaAuthor">
-    ${print ', '.join(meta.creators())}$
+    ${print(', '.join(meta.creators()))}$
   </div>
 </div>
 
@@ -33,13 +33,13 @@ ${head_content}$
     ${if prevLink or nextLink:}$
       <div class="calibreEbNavTop">
         ${if prevLink:}$
-          <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+          <a href="${prevLink}$" class="calibreAPrev">${print(_('previous page'))}$</a>
         ${:else:}$
-          <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+          <a href="${tocUrl}$" class="calibreAPrev">${print(_('previous page'))}$</a>
         ${:endif}$
 
         ${if nextLink:}$
-          <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+          <a href="${nextLink}$" class="calibreANext">${print(_('next page'))}$</a>
         ${:endif}$
       </div>
     ${:endif}$
@@ -49,22 +49,22 @@ ${head_content}$
 
   ${if has_toc:}$
   <div class="calibreToc">
-    <h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
-    ${print toc()}$
+    <h2><a href="${tocUrl}$">${print( _('Table of contents'))}$</a></h2>
+    ${print(toc())}$
   </div>
   ${:endif}$
 
   <div class="calibreEbNav">
     ${if prevLink:}$
-      <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+      <a href="${prevLink}$" class="calibreAPrev">${print(_('previous page'))}$</a>
     ${:else:}$
-      <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+      <a href="${tocUrl}$" class="calibreAPrev">${print(_('previous page'))}$</a>
     ${:endif}$
 
-    <a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
+    <a href="${tocUrl}$" class="calibreAHome">${print(_('start'))}$</a>
 
     ${if nextLink:}$
-      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+      <a href="${nextLink}$" class="calibreANext">${print(_('next page'))}$</a>
     ${:endif}$
   </div>
 
diff --git a/resources/templates/html_export_default_index.tmpl b/resources/templates/html_export_default_index.tmpl
index 4a9e8ab6f3..f0665ad275 100644
--- a/resources/templates/html_export_default_index.tmpl
+++ b/resources/templates/html_export_default_index.tmpl
@@ -6,10 +6,10 @@
 <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
 <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
 
-<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
+<title>${print(', '.join(meta.creators()))}$ - ${print(next(meta.titles())); print(meta.titles().close())}$</title>
 
 ${for item in meta:}$
-  <meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
+  <meta ${print('name="DC.'+item['name']+'"')}$ ${print('content="'+item['value']+'"')}$ />
 ${:endfor}$
 
 <link href="${cssLink}$" type="text/css" rel="stylesheet" />
@@ -22,16 +22,16 @@ ${:endfor}$
   ${for title in meta.titles():}$
     ${if pos1:}$
     <h1>
-      <a href="${tocUrl}$">${print title}$</a>
+      <a href="${tocUrl}$">${print(title)}$</a>
     </h1>
     ${:else:}$
-    <div class="calibreMetaSubtitle">${print title}$</div>
+    <div class="calibreMetaSubtitle">${print(title)}$</div>
     ${:endif}$
     ${pos1=0}$
   ${:endfor}$
   </div>
   <div class="calibreMetaAuthor">
-    ${print ', '.join(meta.creators()),}$
+    ${print(', '.join(meta.creators()))}$
   </div>
 </div>
 
@@ -40,19 +40,19 @@ ${:endfor}$
 
     ${if has_toc:}$
       <div class="calibreTocIndex">
-        <h2>${print _('Table of contents'),}$</h2>
+        <h2>${print(_('Table of contents'))}$</h2>
         ${toc}$
       </div>
     ${:else:}$
-        <h2>${print _('No table of contents present'),}$</h2>
-        <div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
+        <h2>${print(_('No table of contents present'))}$</h2>
+        <div><strong><a href="${nextLink}$">${print(_('begin to read'))}$</a></strong></div>
     ${:endif}$
 
   </div>
 
   <div class="calibreEbNav">
     ${if nextLink:}$
-      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+      <a href="${nextLink}$" class="calibreANext">${print(_('next page'))}$</a>
     ${:endif}$
   </div>
 </div>
diff --git a/src/calibre/ebooks/conversion/plugins/fb2_input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py
index 9992797e3d..d802fe2b2a 100644
--- a/src/calibre/ebooks/conversion/plugins/fb2_input.py
+++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py
@@ -90,7 +90,8 @@ class FB2Input(InputFormatPlugin):
             css = re.sub(r'name\s*=\s*', 'class=', css)
         self.extract_embedded_content(doc)
         log.debug('Converting XML to HTML...')
-        ss = open(P('templates/fb2.xsl'), 'rb').read()
+        with open(P('templates/fb2.xsl'), 'rb') as f:
+            ss = f.read().decode('utf-8')
         ss = ss.replace("__FB_NS__", fb_ns)
         if options.no_inline_fb2_toc:
             log('Disabling generation of inline FB2 TOC')
@@ -124,8 +125,10 @@ class FB2Input(InputFormatPlugin):
             src = img.get('src')
             img.set('src', self.binary_map.get(src, src))
         index = transform.tostring(result)
-        open(u'index.xhtml', 'wb').write(index)
-        open(u'inline-styles.css', 'wb').write(css)
+        with open(u'index.xhtml', 'wb') as f:
+            f.write(index.encode('utf-8'))
+        with open(u'inline-styles.css', 'wb') as f:
+            f.write(css.encode('utf-8'))
         stream.seek(0)
         mi = get_metadata(stream, 'fb2')
         if not mi.title:
diff --git a/src/calibre/ebooks/conversion/plugins/html_output.py b/src/calibre/ebooks/conversion/plugins/html_output.py
index 3caa19ef2f..ba2e922267 100644
--- a/src/calibre/ebooks/conversion/plugins/html_output.py
+++ b/src/calibre/ebooks/conversion/plugins/html_output.py
@@ -79,7 +79,7 @@ class HTMLOutput(OutputFormatPlugin):
         from lxml import etree
 
         root = self.generate_toc(oeb_book, ref_url, output_dir)
-        return etree.tostring(root, pretty_print=True, encoding='utf-8',
+        return etree.tostring(root, pretty_print=True, encoding='unicode',
                 xml_declaration=False)
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
@@ -161,14 +161,14 @@ class HTMLOutput(OutputFormatPlugin):
 
                 # get & clean HTML <HEAD>-data
                 head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
-                head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
+                head_content = etree.tostring(head, pretty_print=True, encoding='unicode')
                 head_content = re.sub(r'\<\/?head.*\>', '', head_content)
                 head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
                 head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>', head_content)
 
                 # get & clean HTML <BODY>-data
                 body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
-                ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
+                ebook_content = etree.tostring(body, pretty_print=True, encoding='unicode')
                 ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
                 ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ebook_content)
 
@@ -202,7 +202,7 @@ class HTMLOutput(OutputFormatPlugin):
 
                 # write html to file
                 with open(path, 'wb') as f:
-                    f.write(t)
+                    f.write(t.encode('utf-8'))
                 item.unload_data_from_memory(memory=path)
 
         zfile = zipfile.ZipFile(output_path, "w")
diff --git a/src/calibre/ebooks/conversion/plugins/pml_input.py b/src/calibre/ebooks/conversion/plugins/pml_input.py
index 3388bdefd3..8bdb773358 100644
--- a/src/calibre/ebooks/conversion/plugins/pml_input.py
+++ b/src/calibre/ebooks/conversion/plugins/pml_input.py
@@ -41,7 +41,9 @@ class PMLInput(InputFormatPlugin):
         else:
             html_stream = html_path
 
-        ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
+        ienc = getattr(pml_stream, 'encoding', None)
+        if ienc is None:
+            ienc = 'cp1252'
         if self.options.input_encoding:
             ienc = self.options.input_encoding
 
diff --git a/src/calibre/ebooks/conversion/plugins/snb_output.py b/src/calibre/ebooks/conversion/plugins/snb_output.py
index 00d0b0dc34..6f4de34c79 100644
--- a/src/calibre/ebooks/conversion/plugins/snb_output.py
+++ b/src/calibre/ebooks/conversion/plugins/snb_output.py
@@ -142,7 +142,7 @@ class SNBOutput(OutputFormatPlugin):
 
             for tocitem in oeb_book.toc:
                 if tocitem.href.find('#') != -1:
-                    item = string.split(tocitem.href, '#')
+                    item = tocitem.href.split('#')
                     if len(item) != 2:
                         log.error('Error in TOC item: %s' % tocitem)
                     else:
diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py
index f5def565d2..e9f6fa54fe 100644
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@@ -138,7 +138,7 @@ class TXTInput(InputFormatPlugin):
                 block_to_single_line, separate_hard_scene_breaks)
 
         self.log = log
-        txt = ''
+        txt = b''
         log.debug('Reading text from file...')
         length = 0
         base_dir = getcwd()
@@ -151,7 +151,7 @@ class TXTInput(InputFormatPlugin):
             for x in walk('.'):
                 if os.path.splitext(x)[1].lower() in ('.txt', '.text'):
                     with open(x, 'rb') as tf:
-                        txt += tf.read() + '\n\n'
+                        txt += tf.read() + b'\n\n'
         else:
             if getattr(stream, 'name', None):
                 base_dir = os.path.dirname(stream.name)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index f6230269fa..ccbddb2eaa 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -584,7 +584,7 @@ class HTMLPreProcessor(object):
                 end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),  # noqa
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\\\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),  # noqa
                 )
 
         for rule in self.PREPROCESS + start_rules:
diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py
index 6fb58c62e7..452bdb7d63 100644
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@@ -391,7 +391,7 @@ class Mobi8Reader(object):
                 fi = self.get_file_info(pos)
                 if fi.filename is None:
                     raise ValueError('Index entry has invalid pos: %d'%pos)
-                idtag = self.get_id_tag(pos).decode(self.header.codec)
+                idtag = self.get_id_tag(pos)
                 href = '%s/%s'%(fi.type, fi.filename)
             else:
                 try:
@@ -403,7 +403,7 @@ class Mobi8Reader(object):
                     continue
 
             entry['href'] = href
-            entry['idtag'] = idtag
+            entry['idtag'] = idtag.decode(self.header.codec)
 
         for e in remove:
             index_entries.remove(e)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index d17c73c6d6..98b6ef5c7b 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -605,7 +605,7 @@ class DirContainer(object):
         for root, dirs, files in os.walk(base):
             for fname in files:
                 fname = os.path.join(root, fname)
-                fname = fname.replace('\\', '/')
+                fname = fname.replace(b'\\', b'/')
                 if not isinstance(fname, unicode_type):
                     try:
                         fname = fname.decode(filesystem_encoding)
diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py
index 86ae4d3bcc..efd0a1c3aa 100644
--- a/src/calibre/ebooks/pdb/header.py
+++ b/src/calibre/ebooks/pdb/header.py
@@ -24,7 +24,7 @@ class PdbHeaderReader(object):
     def identity(self):
         self.stream.seek(60)
         ident = self.stream.read(8)
-        return ident
+        return ident.decode('utf-8')
 
     def section_count(self):
         self.stream.seek(76)
@@ -67,8 +67,8 @@ class PdbHeaderReader(object):
 class PdbHeaderBuilder(object):
 
     def __init__(self, identity, title):
-        self.identity = identity.ljust(3, '\x00')[:8]
-        self.title = '%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace')
+        self.identity = identity.ljust(3, '\x00')[:8].encode('utf-8')
+        self.title = b'%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace')
 
     def build_header(self, section_lengths, out_stream):
         '''
@@ -85,4 +85,4 @@ class PdbHeaderBuilder(object):
         for id, record in enumerate(section_lengths):
             out_stream.write(struct.pack('>LBBBB', long_type(offset), 0, 0, 0, 0))
             offset += record
-        out_stream.write('\x00\x00')
+        out_stream.write(b'\x00\x00')
diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index 390329b124..13d69b451f 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import division
 
 '''
 Writer content to palmdoc pdb file.
@@ -57,13 +58,13 @@ class Writer(FormatWriter):
         txt_length = len(txt)
 
         txt_records = []
-        for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
+        for i in range(0, (len(txt) // MAX_RECORD_SIZE) + 1):
             txt_records.append(txt[i * MAX_RECORD_SIZE: (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
 
         return txt_records, txt_length
 
     def _header_record(self, txt_length, record_count):
-        record = ''
+        record = b''
 
         record += struct.pack('>H', 2)                  # [0:2],   PalmDoc compression. (1 = No compression).
         record += struct.pack('>H', 0)                  # [2:4],   Always 0.
@@ -73,4 +74,3 @@ class Writer(FormatWriter):
         record += struct.pack('>L', 0)                  # [12-16], Current reading position, as an offset into the uncompressed text.
 
         return record
-
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index f6f737275e..854bd7fa5c 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -174,8 +174,8 @@ class PMLMLizer(object):
         return text
 
     def prepare_text(self, text):
-        # Replace empty paragraphs with \c pml codes used to denote emtpy lines.
-        text = re.sub(unicode_type(r'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>'), '\\c\n\\c', text)
+        # Replace empty paragraphs with \c pml codes used to denote empty lines.
+        text = re.sub(unicode_type(r'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>'), r'\\c\n\\c', text)
         return text
 
     def clean_text(self, text):
@@ -207,7 +207,7 @@ class PMLMLizer(object):
         text = re.sub('[ ]{2,}', ' ', text)
 
         # Condense excessive \c empty line sequences.
-        text = re.sub('(\\c\\s*\\c\\s*){2,}', '\\c \n\\c\n', text)
+        text = re.sub(r'(\\c\\s*\\c\\s*){2,}', r'\\c \n\\c\n', text)
 
         # Remove excessive newlines.
         text = re.sub('\n[ ]+\n', '\n\n', text)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 8321f5cccd..a3d52a854c 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -562,7 +562,7 @@ class ParseRtf:
     def __make_temp_file(self,file):
         """Make a temporary file to parse"""
         write_file="rtf_write_file"
-        read_obj = file if hasattr(file, 'read') else open(file,'r')
+        read_obj = file if hasattr(file, 'read') else open(file,'rb')
         with open(write_file, 'wb') as write_obj:
             for line in read_obj:
                 write_obj.write(line)
diff --git a/src/calibre/ebooks/rtf2xml/line_endings.py b/src/calibre/ebooks/rtf2xml/line_endings.py
index 3e2b8156e8..5dbc59a995 100755
--- a/src/calibre/ebooks/rtf2xml/line_endings.py
+++ b/src/calibre/ebooks/rtf2xml/line_endings.py
@@ -36,11 +36,11 @@ class FixLineEndings:
 
     def fix_endings(self):
         # read
-        with open(self.__file, 'r') as read_obj:
+        with open(self.__file, 'rb') as read_obj:
             input_file = read_obj.read()
         # calibre go from win and mac to unix
-        input_file = input_file.replace('\r\n', '\n')
-        input_file = input_file.replace('\r', '\n')
+        input_file = input_file.replace(b'\r\n', b'\n')
+        input_file = input_file.replace(b'\r', b'\n')
         # remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27
         if self.__replace_illegals:
             input_file = clean_ascii_chars(input_file)
diff --git a/src/calibre/ebooks/rtf2xml/paragraph_def.py b/src/calibre/ebooks/rtf2xml/paragraph_def.py
index 82962fe9ea..0812e15776 100755
--- a/src/calibre/ebooks/rtf2xml/paragraph_def.py
+++ b/src/calibre/ebooks/rtf2xml/paragraph_def.py
@@ -608,12 +608,10 @@ if another paragraph_def is found, the state changes to collect_tokens.
         # when determining uniqueness for a style, ingorne these values, since
         # they don't tell us if the style is unique
         ignore_values = ['style-num', 'nest-level', 'in-table']
-        keys = self.__att_val_dict.keys()
-        keys.sort()
-        for key in keys:
-            if key in ignore_values:
+        for k, v in self.__att_val_dict.items():
+            if k in ignore_values:
                 continue
-            my_string += '%s:%s' % (key, self.__att_val_dict[key])
+            my_string += '%s:%s' % (k, v)
         if my_string in self.__style_num_strings:
             num = self.__style_num_strings.index(my_string)
             num += 1  # since indexing starts at zero, rather than 1
@@ -637,12 +635,9 @@ if another paragraph_def is found, the state changes to collect_tokens.
             the_value = self.__att_val_dict['tabs']
             # the_value = the_value[:-1]
             style_string += ('<%s>%s' % ('tabs', the_value))
-        keys = self.__att_val_dict.keys()
-        keys.sort()
-        for key in keys:
-            if key != 'name' and key !='style-num' and key != 'in-table'\
-              and key not in tabs_list:
-                style_string += ('<%s>%s' % (key, self.__att_val_dict[key]))
+        for k, v in self.__att_val_dict.items():
+            if k not in ['name', 'style-num', 'in-table'] + tabs_list:
+                style_string += ('<%s>%s' % (k, v))
         style_string += '\n'
         self.__body_style_strings.append(style_string)
 
@@ -690,11 +685,9 @@ if another paragraph_def is found, the state changes to collect_tokens.
             the_value = self.__att_val_dict['tabs']
             # the_value = the_value[:-1]
             self.__write_obj.write('<%s>%s' % ('tabs', the_value))
-        keys = self.__att_val_dict.keys()
-        keys.sort()
+        keys = sorted(self.__att_val_dict.keys())
         for key in keys:
-            if key != 'name' and key !='style-num' and key != 'in-table'\
-              and key not in tabs_list:
+            if key not in ['name', 'style-num', 'in-table'] + tabs_list:
                 self.__write_obj.write('<%s>%s' % (key, self.__att_val_dict[key]))
         self.__write_obj.write('\n')
         self.__write_obj.write(self.__start2_marker)
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 0f18d5ff9b..30dc0545ee 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -43,8 +43,8 @@ class ProcessTokens:
         self.__bug_handler = bug_handler
 
     def compile_expressions(self):
-        self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)")
-        self.__utf_exp = re.compile(r'(&.*?;)')
+        self.__num_exp = re.compile(br"([a-zA-Z]+)(.*)")
+        self.__utf_exp = re.compile(br'(&.*?;)')
 
     def initiate_token_dict(self):
         self.__return_code = 0
@@ -762,10 +762,10 @@ class ProcessTokens:
     def process_cw(self, token):
         """Change the value of the control word by determining what dictionary
         it belongs to"""
-        special = ['*', ':', '}', '{', '~', '_', '-', ';']
+        special = [b'*', b':', b'}', b'{', b'~', b'_', b'-', b';']
         # if token != "{" or token != "}":
         token = token[1:]  # strip off leading \
-        token = token.replace(" ", "")
+        token = token.replace(b" ", b"")
         # if not token: return
         only_alpha = token.isalpha()
         num = None
@@ -784,24 +784,24 @@ class ProcessTokens:
     def process_tokens(self):
         """Main method for handling other methods. """
         line_count = 0
-        with open(self.__file, 'r') as read_obj:
+        with open(self.__file, 'rb') as read_obj:
             with open(self.__write_to, 'wb') as write_obj:
                 for line in read_obj:
-                    token = line.replace("\n","")
+                    token = line.replace(b"\n",b"")
                     line_count += 1
-                    if line_count == 1 and token != '\\{':
+                    if line_count == 1 and token != b'\\{':
                         msg = '\nInvalid RTF: document doesn\'t start with {\n'
                         raise self.__exception_handler(msg)
-                    elif line_count == 2 and token[0:4] != '\\rtf':
+                    elif line_count == 2 and token[0:4] != b'\\rtf':
                         msg = '\nInvalid RTF: document doesn\'t start with \\rtf \n'
                         raise self.__exception_handler(msg)
 
-                    the_index = token.find('\\ ')
+                    the_index = token.find(b'\\ ')
                     if token is not None and the_index > -1:
                         msg = '\nInvalid RTF: token "\\ " not valid.\nError at line %d'\
                             % line_count
                         raise self.__exception_handler(msg)
-                    elif token[:1] == "\\":
+                    elif token[:1] == b"\\":
                         try:
                             token.decode('us-ascii')
                         except UnicodeError as msg:
@@ -816,10 +816,10 @@ class ProcessTokens:
                         for field in fields:
                             if not field:
                                 continue
-                            if field[0:1] == '&':
-                                write_obj.write('tx<ut<__________<%s\n' % field)
+                            if field[0:1] == b'&':
+                                write_obj.write(b'tx<ut<__________<%s\n' % field)
                             else:
-                                write_obj.write('tx<nu<__________<%s\n' % field)
+                                write_obj.write(b'tx<nu<__________<%s\n' % field)
 
         if not line_count:
             msg = '\nInvalid RTF: file appears to be empty.\n'
diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 0158ffd39c..fcca41aa01 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -94,7 +94,7 @@ class Tokenize:
             uni_len = len(match_obj.group(0))
             if uni_char < 0:
                 uni_char += 65536
-            uni_char = codepoint_to_chr(uni_char).encode('ascii', 'xmlcharrefreplace')
+            uni_char = codepoint_to_chr(uni_char).encode('ascii', 'xmlcharrefreplace').decode('ascii')
             self.__uc_char = self.__uc_value[-1]
             # there is only an unicode char
             if len(token)<= uni_len:
@@ -113,11 +113,11 @@ class Tokenize:
     def __sub_reg_split(self,input_file):
         input_file = self.__replace_spchar.mreplace(input_file)
         # this is for older RTF
-        input_file = self.__par_exp.sub('\n\\par \n', input_file)
-        input_file = self.__cwdigit_exp.sub("\\g<1>\n\\g<2>", input_file)
+        input_file = self.__par_exp.sub(r'\n\\par \n', input_file)
+        input_file = self.__cwdigit_exp.sub(r"\g<1>\n\g<2>", input_file)
         input_file = self.__cs_ast.sub(r"\g<1>", input_file)
-        input_file = self.__ms_hex_exp.sub("\\mshex0\\g<1> ", input_file)
-        input_file = self.__utf_ud.sub("\\{\\uc0 \\g<1>\\}", input_file)
+        input_file = self.__ms_hex_exp.sub(r"\\mshex0\g<1> ", input_file)
+        input_file = self.__utf_ud.sub(r"\\{\\uc0 \g<1>\\}", input_file)
         # remove \n in bin data
         input_file = self.__bin_exp.sub(lambda x:
                                         x.group().replace('\n', '') + '\n', input_file)
@@ -188,7 +188,7 @@ class Tokenize:
 
         # write
         with open(self.__write_to, 'wb') as write_obj:
-            write_obj.write('\n'.join(tokens))
+            write_obj.write('\n'.join(tokens).encode('utf-8'))
         # Move and copy
         copy_obj = copy.Copy(bug_handler=self.__bug_handler)
         if self.__copy: