Various py3 related fixes exposed by the unicode patch

2025-07-09 03:04:10 -04:00 · 2019-03-13 06:40:38 +05:30 · 2019-03-13 06:40:38 +05:30 · 5b76089839
commit 5b76089839
parent 56af613e10
48 changed files with 390 additions and 3210 deletions
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -670,7 +670,7 @@ class DB(object):
            if d['is_multiple']:
                if x is None:
                    return []
-                if isinstance(x, (str, unicode_type, bytes)):
+                if isinstance(x, (unicode_type, bytes)):
                    x = x.split(d['multiple_seps']['ui_to_list'])
                x = [y.strip() for y in x if y.strip()]
                x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
@ -681,12 +681,16 @@ class DB(object):
                        x.decode(preferred_encoding, 'replace')
        def adapt_datetime(x, d):
-            if isinstance(x, (str, unicode_type, bytes)):
+            if isinstance(x, (unicode_type, bytes)):
                if isinstance(x, bytes):
                    x = x.decode(preferred_encoding, 'replace')
                x = parse_date(x, assume_utc=False, as_utc=False)
            return x
        def adapt_bool(x, d):
-            if isinstance(x, (str, unicode_type, bytes)):
+            if isinstance(x, (unicode_type, bytes)):
                if isinstance(x, bytes):
                    x = x.decode(preferred_encoding, 'replace')
                x = x.lower()
                if x == 'true':
                    x = True
@ -707,7 +711,9 @@ class DB(object):
        def adapt_number(x, d):
            if x is None:
                return None
-            if isinstance(x, (str, unicode_type, bytes)):
+            if isinstance(x, (unicode_type, bytes)):
                if isinstance(x, bytes):
                    x = x.decode(preferred_encoding, 'replace')
                if x.lower() == 'none':
                    return None
            if d['datatype'] == 'int':
@ -1083,7 +1089,7 @@ class DB(object):
    def dump_and_restore(self, callback=None, sql=None):
        import codecs
-        from calibre.utils.apsw_shell import Shell
+        from apsw import Shell
        from contextlib import closing
        if callback is None:
            callback = lambda x: x
@ -1096,7 +1102,7 @@ class DB(object):
                    shell = Shell(db=self.conn, stdout=buf)
                    shell.process_command('.dump')
            else:
-                with open(fname, 'wb') as buf:
+                with lopen(fname, 'wb') as buf:
                    buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8'))
            with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb:
--- a/src/calibre/db/categories.py
+++ b/src/calibre/db/categories.py
@ -11,6 +11,7 @@ import copy
 from functools import partial
 from polyglot.builtins import unicode_type, map
 from calibre.constants import ispy3
 from calibre.ebooks.metadata import author_to_author_sort
 from calibre.utils.config_base import tweaks
 from calibre.utils.icu import sort_key, collation_order
@ -43,11 +44,19 @@ class Tag(object):
        self.search_expression = search_expression
        self.original_categories = None
-    def __unicode__(self):
+    @property
    def string_representation(self):
        return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
-    def __str__(self):
+    if ispy3:
-        return unicode_type(self).encode('utf-8')
+        def __str__(self):
            return self.string_representation
    else:
        def __str__(self):
            return self.string_representation.encode('utf-8')
        def __unicode__(self):
            return self.string_representation
    def __repr__(self):
        return str(self)
--- a/src/calibre/db/search.py
+++ b/src/calibre/db/search.py
@ -80,7 +80,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit
                    if primary_contains(query, t):
                        return True
                elif query in t:
-                        return True
+                    return True
        except re.error:
            pass
    return False
@ -149,7 +149,9 @@ class DateSearch(object):  # {{{
        if query == 'false':
            for v, book_ids in field_iter():
-                if isinstance(v, (str, unicode_type)):
+                if isinstance(v, (bytes, unicode_type)):
                    if isinstance(v, bytes):
                        v = v.decode(preferred_encoding, 'replace')
                    v = parse_date(v)
                if v is None or v <= UNDEFINED_DATE:
                    matches |= book_ids
@ -157,7 +159,9 @@ class DateSearch(object):  # {{{
        if query == 'true':
            for v, book_ids in field_iter():
-                if isinstance(v, (str, unicode_type)):
+                if isinstance(v, (bytes, unicode_type)):
                    if isinstance(v, bytes):
                        v = v.decode(preferred_encoding, 'replace')
                    v = parse_date(v)
                if v is not None and v > UNDEFINED_DATE:
                    matches |= book_ids
--- a/src/calibre/db/utils.py
+++ b/src/calibre/db/utils.py
@ -13,13 +13,15 @@ from polyglot.builtins import map, unicode_type
 from threading import Lock
 from calibre import as_unicode, prints
-from calibre.constants import cache_dir, get_windows_number_formats, iswindows
+from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding
 from calibre.utils.localization import canonicalize_lang
 def force_to_bool(val):
-    if isinstance(val, (str, unicode_type)):
+    if isinstance(val, (bytes, unicode_type)):
        if isinstance(val, bytes):
            val = val.decode(preferred_encoding, 'replace')
        try:
            val = icu_lower(val)
            if not val:
--- a/src/calibre/db/write.py
+++ b/src/calibre/db/write.py
@ -88,6 +88,8 @@ def adapt_number(typ, x):
    if x is None:
        return None
    if isinstance(x, (unicode_type, bytes)):
        if isinstance(x, bytes):
            x = x.decode(preferred_encoding, 'replace')
        if not x or x.lower() == 'none':
            return None
    return typ(x)
@ -95,6 +97,8 @@ def adapt_number(typ, x):
 def adapt_bool(x):
    if isinstance(x, (unicode_type, bytes)):
        if isinstance(x, bytes):
            x = x.decode(preferred_encoding, 'replace')
        x = x.lower()
        if x == 'true':
            x = True
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -171,7 +171,7 @@ class PRST1(USBMS):
        with closing(sqlite.connect(dbpath)) as connection:
            # Replace undecodable characters in the db instead of erroring out
-            connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace")
+            connection.text_factory = lambda x: x if isinstance(x, unicode_type) else x.decode('utf-8', 'replace')
            cursor = connection.cursor()
            # Query collections
@ -758,7 +758,7 @@ class PRST1(USBMS):
        thumbnail_path = THUMBPATH%book.bookId
-        prefix = self._main_prefix if source_id is 0 else self._card_a_prefix
+        prefix = self._main_prefix if source_id == 0 else self._card_a_prefix
        thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
        thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
        if not os.path.exists(thumbnail_dir_path):
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -398,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                    if isinstance(a, dict):
                        printable = {}
                        for k,v in a.iteritems():
-                            if isinstance(v, (str, unicode_type)) and len(v) > 50:
+                            if isinstance(v, (bytes, unicode_type)) and len(v) > 50:
                                printable[k] = 'too long'
                            else:
                                printable[k] = v
@ -666,7 +666,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            if v:
                v = json.loads(v, object_hook=from_json)
                if print_debug_info and extra_debug:
-                        self._debug('receive after decode')  # , v)
+                    self._debug('receive after decode')  # , v)
                return (self.reverse_opcodes[v[0]], v[1])
            self._debug('protocol error -- empty json string')
        except socket.timeout:
@ -1155,7 +1155,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                                      (self.DEFAULT_THUMBNAIL_HEIGHT/3) * 4)
                self._debug('cover width', self.THUMBNAIL_WIDTH)
            elif hasattr(self, 'THUMBNAIL_WIDTH'):
-                    delattr(self, 'THUMBNAIL_WIDTH')
+                delattr(self, 'THUMBNAIL_WIDTH')
            self.is_read_sync_col = result.get('isReadSyncCol', None)
            self._debug('Device is_read sync col', self.is_read_sync_col)
--- a/src/calibre/ebooks/conversion/plugins/epub_output.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_output.py
@ -12,7 +12,6 @@ from calibre.customize.conversion import (OutputFormatPlugin,
        OptionRecommendation)
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
 from calibre.constants import filesystem_encoding
 from polyglot.builtins import unicode_type
 block_level_tags = (
@ -41,7 +40,7 @@ block_level_tags = (
      'pre',
      'table',
      'ul',
-      )
+)
 class EPUBOutput(OutputFormatPlugin):
@ -326,13 +325,11 @@ class EPUBOutput(OutputFormatPlugin):
            fonts = []
            for uri in list(uris.keys()):
                path = uris[uri]
                if isinstance(path, unicode_type):
                    path = path.encode(filesystem_encoding)
                if not os.path.exists(path):
                    uris.pop(uri)
                    continue
                self.log.debug('Encrypting font:', uri)
-                with open(path, 'r+b') as f:
+                with lopen(path, 'r+b') as f:
                    data = f.read(1024)
                    if len(data) >= 1024:
                        f.seek(0)
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -55,11 +55,7 @@ def munge_paths(basepath, url):
    if not path:
        path = basepath
    elif not os.path.isabs(path):
        if isinstance(path, unicode_type):
            path = path.encode(sys.getfilesystemencoding())
        dn = os.path.dirname(basepath)
        if isinstance(dn, unicode_type):
            dn = dn.encode(sys.getfilesystemencoding())
        path = os.path.join(dn, path)
    return os.path.normpath(path), fragment
@ -1480,11 +1476,6 @@ class HTMLConverter(object):
                        ext = os.path.splitext(path)[1]
                        if ext:
                            ext = ext[1:].lower()
                        enc = sys.getfilesystemencoding()
                        if not enc:
                            enc = 'utf8'
                        if isinstance(path, unicode_type):
                            path = path.encode(enc, 'replace')
                        if os.access(path, os.R_OK) and os.path.isfile(path):
                            if ext in ['png', 'jpg', 'bmp', 'jpeg']:
                                self.process_image(path, tag_css)
@ -1811,8 +1802,6 @@ class HTMLConverter(object):
 def process_file(path, options, logger):
    if not isinstance(path, unicode_type):
        path = path.decode(sys.getfilesystemencoding())
    path = os.path.abspath(path)
    default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
    dirpath = os.path.dirname(path)
--- a/src/calibre/ebooks/lrf/meta.py
+++ b/src/calibre/ebooks/lrf/meta.py
@ -196,8 +196,8 @@ class xml_field(object):
        if not val:
            val = u''
-        if isinstance(val, unicode_type):
+        if not isinstance(val, unicode_type):
-            val = unicode_type(val, 'utf-8')
+            val = val.decode('utf-8')
        elems = document.getElementsByTagName(self.tag_name)
        elem = None
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import struct, array, zlib, cStringIO, collections, re
 from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
 from calibre.constants import ispy3
 from calibre import entity_to_unicode, prepare_string_for_xml
 from calibre.ebooks.lrf.tags import Tag
 from polyglot.builtins import unicode_type
@ -88,11 +89,8 @@ class LRFObject(object):
        for i in range(0):
            yield i
    def __unicode__(self):
        return unicode_type(self.__class__.__name__)
    def __str__(self):
-        return unicode_type(self).encode('utf-8')
+        return self.__class__.__name__
 class LRFContentObject(LRFObject):
@ -204,12 +202,15 @@ class StyleObject(object):
                s += u'%s="%s" '%(attr, getattr(self, attr))
        return s
-    def __unicode__(self):
+    def __str__(self):
        s = u'<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id)
        s += self._tags_to_xml()
        s += u'/>\n'
        return s
    if not ispy3:
        __unicode__ = __str__
    def as_dict(self):
        d = {}
        for h in self.tag_map.values():
@ -252,11 +253,11 @@ class Color(object):
    def __init__(self, val):
        self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
-    def __unicode__(self):
+    def __str__(self):
        return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
-    def __str__(self):
+    if not ispy3:
-        return unicode_type(self)
+        __unicode__ = __str__
    def __len__(self):
        return 4
@ -284,10 +285,13 @@ class PageDiv(EmptyPageElement):
        self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth
        self.linecolor = Color(linecolor)
-    def __unicode__(self):
+    def __str__(self):
        return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
                (self.pain, self.spacesize, self.linewidth, self.color)
    if not ispy3:
        __unicode__ = __str__
 class RuledLine(EmptyPageElement):
@ -299,19 +303,25 @@ class RuledLine(EmptyPageElement):
        self.linecolor = Color(linecolor)
        self.id = -1
-    def __unicode__(self):
+    def __str__(self):
        return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
                (self.linelength, self.linetype, self.linewidth, self.linecolor)
    if not ispy3:
        __unicode__ = __str__
 class Wait(EmptyPageElement):
    def __init__(self, time):
        self.time = time
-    def __unicode__(self):
+    def __str__(self):
        return u'\n<Wait time="%d" />\n'%(self.time)
    if not ispy3:
        __unicode__ = __str__
 class Locate(EmptyPageElement):
@ -320,19 +330,25 @@ class Locate(EmptyPageElement):
    def __init__(self, pos):
        self.pos = self.pos_map[pos]
-    def __unicode__(self):
+    def __str__(self):
        return u'\n<Locate pos="%s" />\n'%(self.pos)
    if not ispy3:
        __unicode__ = __str__
 class BlockSpace(EmptyPageElement):
    def __init__(self, xspace, yspace):
        self.xspace, self.yspace = xspace, yspace
-    def __unicode__(self):
+    def __str__(self):
        return u'\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
                (self.xspace, self.yspace)
    if not ispy3:
        __unicode__ = __str__
 class Page(LRFStream):
    tag_map = {
@ -427,15 +443,15 @@ class Page(LRFStream):
        for i in self.content:
            yield i
-    def __unicode__(self):
+    def __str__(self):
        s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
        for i in self:
            s += unicode_type(i)
        s += '\n</Page>\n'
        return s
-    def __str__(self):
+    if not ispy3:
-        return unicode_type(self)
+        __unicode__ = __str__
    def to_html(self):
        s = u''
@ -612,7 +628,7 @@ class Block(LRFStream, TextCSS):
            if hasattr(self, attr):
                self.attrs[attr] = getattr(self, attr)
-    def __unicode__(self):
+    def __str__(self):
        s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id)
        if hasattr(self, 'textstyle_id'):
            s += 'textstyle="%d" '%(self.textstyle_id,)
@ -625,6 +641,9 @@ class Block(LRFStream, TextCSS):
            return s
        return s.rstrip() + ' />\n'
    if not ispy3:
        __unicode__ = __str__
    def to_html(self):
        if self.name == 'TextBlock':
            return u'<div class="block%s text%s">%s</div>'%(self.style_id, self.textstyle_id, self.content.to_html())
@ -697,12 +716,15 @@ class Text(LRFStream):
            self.attrs = attrs
            self.self_closing = self_closing
-        def __unicode__(self):
+        def __str__(self):
            s = u'<%s '%(self.name,)
            for name, val in self.attrs.items():
                s += '%s="%s" '%(name, val)
            return s.rstrip() + (u' />' if self.self_closing else u'>')
        if not ispy3:
            __unicode__ = __str__
        def to_html(self):
            s = u''
            return s
@ -878,7 +900,7 @@ class Text(LRFStream):
            self.close_containers()
        self.stream = None
-    def __unicode__(self):
+    def __str__(self):
        s = u''
        open_containers = collections.deque()
        for c in self.content:
@ -900,6 +922,9 @@ class Text(LRFStream):
                raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
        return s
    if not ispy3:
        __unicode__ = __str__
    def to_html(self):
        s = u''
        open_containers = collections.deque()
@ -944,10 +969,13 @@ class Image(LRFObject):
    encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding)
    data = property(fget=lambda self : self._document.objects[self.refstream].stream)
-    def __unicode__(self):
+    def __str__(self):
        return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
        (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
    if not ispy3:
        __unicode__ = __str__
 class PutObj(EmptyPageElement):
@ -955,9 +983,12 @@ class PutObj(EmptyPageElement):
        self.x1, self.y1, self.refobj = x1, y1, refobj
        self.object = objects[refobj]
-    def __unicode__(self):
+    def __str__(self):
        return u'<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj)
    if not ispy3:
        __unicode__ = __str__
 class Canvas(LRFStream):
    tag_map = {
@ -996,7 +1027,7 @@ class Canvas(LRFStream):
            except struct.error:
                print('Canvas object has errors, skipping.')
-    def __unicode__(self):
+    def __str__(self):
        s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)
        for attr in self.attrs:
            s += '%s="%s" '%(attr, self.attrs[attr])
@ -1006,6 +1037,9 @@ class Canvas(LRFStream):
        s += '</%s>\n'%(self.__class__.__name__,)
        return s
    if not ispy3:
        __unicode__ = __str__
    def __iter__(self):
        for i in self._contents:
            yield i
@ -1039,10 +1073,13 @@ class ImageStream(LRFStream):
        if self._document is not None:
            self._document.image_map[self.id] = self
-    def __unicode__(self):
+    def __str__(self):
        return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
            (self.id, self.encoding, self.file)
    if not ispy3:
        __unicode__ = __str__
 class Import(LRFStream):
    pass
@ -1118,7 +1155,7 @@ class Button(LRFObject):
                return i[1:][0]
        return (None, None)
-    def __unicode__(self):
+    def __str__(self):
        s = u'<Button objid="%s">\n'%(self.id,)
        if self.button_flags & 0x10 != 0:
            s += '<PushButton '
@ -1132,6 +1169,9 @@ class Button(LRFObject):
        s += '</Button>\n'
        return s
    if not ispy3:
        __unicode__ = __str__
    refpage = property(fget=lambda self : self.jump_action(2)[0])
    refobj = property(fget=lambda self : self.jump_action(2)[1])
@ -1192,7 +1232,7 @@ class BookAttr(StyleObject, LRFObject):
    def add_font(self, tag, f):
        self.font_link_list.append(tag.dword)
-    def __unicode__(self):
+    def __str__(self):
        s = u'<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id)
        s += u'<SetDefault %s />\n'%(self._tags_to_xml(),)
        doc = self._document
@ -1203,6 +1243,9 @@ class BookAttr(StyleObject, LRFObject):
        s += '</BookStyle>\n'
        return s
    if not ispy3:
        __unicode__ = __str__
 class SimpleText(Text):
    pass
@ -1213,9 +1256,12 @@ class TocLabel(object):
    def __init__(self, refpage, refobject, label):
        self.refpage, self.refobject, self.label = refpage, refobject, label
-    def __unicode__(self):
+    def __str__(self):
        return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
    if not ispy3:
        __unicode__ = __str__
 class TOCObject(LRFStream):
@ -1237,12 +1283,15 @@ class TOCObject(LRFStream):
        for i in self._contents:
            yield i
-    def __unicode__(self):
+    def __str__(self):
        s = u'<TOC>\n'
        for i in self:
            s += unicode_type(i)
        return s + '</TOC>\n'
    if not ispy3:
        __unicode__ = __str__
 object_map = [
    None,  # 00
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 import copy, traceback
 from calibre import prints
-from calibre.constants import DEBUG
+from calibre.constants import DEBUG, ispy3
 from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
        SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
        TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
@ -709,7 +709,7 @@ class Metadata(object):
        return (None, None, None, None)
-    def __unicode__(self):
+    def __unicode__representation__(self):
        '''
        A string representation of this object, suitable for printing to
        console
@ -791,11 +791,17 @@ class Metadata(object):
            ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
        return u'<table>%s</table>'%u'\n'.join(ans)
-    def __str__(self):
+    if ispy3:
-        return self.__unicode__().encode('utf-8')
+        __str__ = __unicode__representation__
    else:
        __unicode__ = __unicode__representation__
        def __str__(self):
            return self.__unicode__().encode('utf-8')
    def __nonzero__(self):
        return bool(self.title or self.author or self.comments or self.tags)
    __bool__ = __nonzero__
    # }}}
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@ -115,7 +115,7 @@ def get_metadata(stream):
    if book_title:
        book_title = unicode_type(book_title)
    else:
-        book_title = force_unicode_type(os.path.splitext(
+        book_title = force_unicode(os.path.splitext(
            os.path.basename(getattr(stream, 'name',
                _('Unknown'))))[0])
    mi = MetaInformation(book_title, authors)
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -15,7 +15,7 @@ from urlparse import urlparse
 from lxml import etree
 from calibre.ebooks import escape_xpath_attr
-from calibre.constants import __appname__, __version__, filesystem_encoding
+from calibre.constants import __appname__, __version__, filesystem_encoding, ispy3
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf as _pretty_print
 from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_isbn
@ -73,7 +73,7 @@ class Resource(object):  # {{{
            path = href_or_path
            if not os.path.isabs(path):
                path = os.path.abspath(os.path.join(basedir, path))
-            if isinstance(path, str):
+            if isinstance(path, bytes):
                path = path.decode(sys.getfilesystemencoding())
            self.path = path
        else:
@ -112,8 +112,8 @@ class Resource(object):  # {{{
            rpath = os.path.relpath(self.path, basedir)
        except ValueError:  # On windows path and basedir could be on different drives
            rpath = self.path
-        if isinstance(rpath, unicode_type):
+        if isinstance(rpath, bytes):
-            rpath = rpath.encode('utf-8')
+            rpath = rpath.decode(filesystem_encoding)
        return rpath.replace(os.sep, '/')+frag
    def set_basedir(self, path):
@ -203,11 +203,16 @@ class ManifestItem(Resource):  # {{{
            self.mime_type = val
        return property(fget=fget, fset=fset)
-    def __unicode__(self):
+    def __unicode__representation__(self):
        return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
-    def __str__(self):
+    if ispy3:
-        return unicode_type(self).encode('utf-8')
+        __str__ = __unicode__representation__
    else:
        __unicode__ = __unicode__representation__
        def __str__(self):
            return unicode_type(self).encode('utf-8')
    def __repr__(self):
        return unicode_type(self)
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -229,7 +229,9 @@ class MobiMLizer(object):
                while vspace > 0:
                    wrapper.addprevious(etree.Element(XHTML('br')))
                    vspace -= 1
-            if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)):
+            if istate.halign != 'auto' and isinstance(istate.halign, (bytes, unicode_type)):
                if isinstance(istate.halign, bytes):
                    istate.halign = istate.halign.decode('utf-8')
                para.attrib['align'] = istate.halign
        istate.rendered = True
        pstate = bstate.istate
@ -568,17 +570,17 @@ class MobiMLizer(object):
            self.opts.mobi_ignore_margins = False
        if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
-            # We have an id but no text and no children, the id should still
+                # We have an id but no text and no children, the id should still
-            # be added.
+                # be added.
-            istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and
+                istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and
-            len(elem)==0)):
+                len(elem)==0)):
-                if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib:
+            if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib:
-                    try:
+                try:
-                        value = int(elem.attrib['value'])
+                    value = int(elem.attrib['value'])
-                        istates[-2].list_num = value - 1
+                    istates[-2].list_num = value - 1
-                    except:
+                except:
-                        pass
+                    pass
-                self.mobimlize_content(tag, text, bstate, istates)
+            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@ -283,24 +283,29 @@ class MobiReader(object):
                    ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
        except AttributeError:
            pass
        def write_as_utf8(path, data):
            if isinstance(data, unicode_type):
                data = data.encode('utf-8')
            with lopen(path, 'wb') as f:
                f.write(data)
        parse_cache[htmlfile] = root
        self.htmlfile = htmlfile
        ncx = cStringIO.StringIO()
        opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
        self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
-        opf.render(open(self.created_opf_path, 'wb'), ncx,
+        opf.render(lopen(self.created_opf_path, 'wb'), ncx,
            ncx_manifest_entry=ncx_manifest_entry)
        ncx = ncx.getvalue()
        if ncx:
            ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
-            open(ncx_path, 'wb').write(ncx)
+            write_as_utf8(ncx_path, ncx)
-        with open('styles.css', 'wb') as s:
+        css = [self.base_css_rules, '\n\n']
-            s.write(self.base_css_rules + '\n\n')
+        for cls, rule in self.tag_css_rules.items():
-            for cls, rule in self.tag_css_rules.items():
+            css.append('.%s { %s }\n\n' % (cls, rule))
-                if isinstance(rule, unicode_type):
+        write_as_utf8('styles.css', ''.join(css))
                    rule = rule.encode('utf-8')
                s.write('.%s { %s }\n\n' % (cls, rule))
        if self.book_header.exth is not None or self.embedded_mi is not None:
            self.log.debug('Creating OPF...')
@ -310,7 +315,7 @@ class MobiReader(object):
                ncx_manifest_entry)
            ncx = ncx.getvalue()
            if ncx:
-                open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
+                write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx)
    def read_embedded_metadata(self, root, elem, guide):
        raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
@ -423,24 +428,25 @@ class MobiReader(object):
                    styles.append(style)
            if 'height' in attrib:
                height = attrib.pop('height').strip()
-                if height and '<' not in height and '>' not in height and \
+                if (
-                    re.search(r'\d+', height):
+                        height and '<' not in height and '>' not in height and
-                        if tag.tag in ('table', 'td', 'tr'):
+                        re.search(r'\d+', height)):
-                            pass
+                    if tag.tag in ('table', 'td', 'tr'):
-                        elif tag.tag == 'img':
+                        pass
-                            tag.set('height', height)
+                    elif tag.tag == 'img':
                        tag.set('height', height)
                    else:
                        if tag.tag == 'div' and not tag.text and \
                                (not tag.tail or not tag.tail.strip()) and \
                                not len(list(tag.iterdescendants())):
                            # Paragraph spacer
                            # Insert nbsp so that the element is never
                            # discarded by a renderer
                            tag.text = u'\u00a0'  # nbsp
                            styles.append('height: %s' %
                                    self.ensure_unit(height))
                        else:
-                            if tag.tag == 'div' and not tag.text and \
+                            styles.append('margin-top: %s' % self.ensure_unit(height))
                                    (not tag.tail or not tag.tail.strip()) and \
                                    not len(list(tag.iterdescendants())):
                                # Paragraph spacer
                                # Insert nbsp so that the element is never
                                # discarded by a renderer
                                tag.text = u'\u00a0'  # nbsp
                                styles.append('height: %s' %
                                        self.ensure_unit(height))
                            else:
                                styles.append('margin-top: %s' % self.ensure_unit(height))
            if 'width' in attrib:
                width = attrib.pop('width').strip()
                if width and re.search(r'\d+', width):
@ -837,11 +843,10 @@ class MobiReader(object):
            anchor = '<a id="filepos%d"></a>'
            if r > -1 and (r < l or l == end or l == -1):
                p = self.mobi_html.rfind('<', 0, end + 1)
-                if pos < end and p > -1 and \
+                if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and
-                    not end_tag_re.match(self.mobi_html[p:r]) and \
+                        not self.mobi_html[p:r + 1].endswith('/>')):
-                    not self.mobi_html[p:r + 1].endswith('/>'):
+                    anchor = ' filepos-id="filepos%d"'
-                        anchor = ' filepos-id="filepos%d"'
+                    end = r
                        end = r
                else:
                    end = r + 1
            processed_html.write(self.mobi_html[pos:end] + (anchor % oend))
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -1,23 +1,32 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import (unicode_literals, division, absolute_import,
+from __future__ import absolute_import, division, print_function, unicode_literals
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, unicodedata
-from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
+import re
-        namespace, prefixname, urlnormalize)
+import unicodedata
 from collections import defaultdict
 from io import BytesIO
 from urlparse import urldefrag
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.ebooks.mobi.utils import is_guide_ref_start
 from calibre.ebooks.oeb.base import (
    OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
 )
 from polyglot.builtins import unicode_type
-from collections import defaultdict
+
-from urlparse import urldefrag
+class Buf(BytesIO):
-from cStringIO import StringIO
+
    def write(self, x):
        if isinstance(x, unicode_type):
            x = x.encode('utf-8')
        BytesIO.write(self, x)
 class Serializer(object):
@ -116,7 +125,7 @@ class Serializer(object):
        '''
        Return the document serialized as a single UTF-8 encoded bytestring.
        '''
-        buf = self.buf = StringIO()
+        buf = self.buf = Buf()
        buf.write(b'<html>')
        self.serialize_head()
        self.serialize_body()
@ -214,22 +223,22 @@ class Serializer(object):
            # if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
            if href is not None:
                # resolve the section url in id_offsets
-                buf.write('<mbp:pagebreak />')
+                buf.write(b'<mbp:pagebreak />')
                self.id_offsets[urlnormalize(href)] = buf.tell()
            if tocref.klass == "periodical":
-                buf.write('<div> <div height="1em"></div>')
+                buf.write(b'<div> <div height="1em"></div>')
            else:
                t = tocref.title
                if isinstance(t, unicode_type):
                    t = t.encode('utf-8')
-                buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
+                buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
-                          '</b></font></h2> <div height="1em"></div>')
+                          b'</b></font></h2> <div height="1em"></div>')
-            buf.write('<ul>')
+            buf.write(b'<ul>')
            for tocitem in tocref.nodes:
-                buf.write('<li><a filepos=')
+                buf.write(b'<li><a filepos=')
                itemhref = tocitem.href
                if tocref.klass == 'periodical':
                    # This is a section node.
@ -238,15 +247,15 @@ class Serializer(object):
                    # so we change the href.
                    itemhref = re.sub(r'article_\d+/', '', itemhref)
                self.href_offsets[itemhref].append(buf.tell())
-                buf.write('0000000000')
+                buf.write(b'0000000000')
-                buf.write(' ><font size="+1"><b><u>')
+                buf.write(b' ><font size="+1"><b><u>')
                t = tocitem.title
                if isinstance(t, unicode_type):
                    t = t.encode('utf-8')
                buf.write(t)
-                buf.write('</u></b></font></a></li>')
+                buf.write(b'</u></b></font></a></li>')
-            buf.write('</ul><div height="1em"></div></div><mbp:pagebreak />')
+            buf.write(b'</ul><div height="1em"></div></div><mbp:pagebreak />')
        self.anchor_offset = buf.tell()
        buf.write(b'<body>')
@ -350,7 +359,7 @@ class Serializer(object):
                if child.tail:
                    self.anchor_offset = None
                    self.serialize_text(child.tail)
-        buf.write(b'</%s>' % tag.encode('utf-8'))
+        buf.write(('</%s>' % tag).encode('utf-8'))
    def serialize_text(self, text, quot=False):
        text = text.replace('&', '&amp;')
@ -384,4 +393,4 @@ class Serializer(object):
                    self.start_offset = ioff
                for hoff in hoffs:
                    buf.seek(hoff)
-                    buf.write(b'%010d' % ioff)
+                    buf.write(('%010d' % ioff).encode('utf-8'))
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -13,7 +13,7 @@ from urlparse import urldefrag, urlparse, urlunparse, urljoin
 from urllib import unquote
 from lxml import etree, html
-from calibre.constants import filesystem_encoding, __version__
+from calibre.constants import filesystem_encoding, __version__, ispy3
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.conversion.preprocess import CSSPreProcessor
@ -107,13 +107,35 @@ self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b
 'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
 'video', 'title', 'script', 'style'}
-_self_closing_pat = re.compile(
+
-    r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(self_closing_bad_tags)),
+def as_string_type(pat, for_unicode):
-    re.IGNORECASE)
+    if for_unicode:
        if isinstance(pat, bytes):
            pat = pat.decode('utf-8')
    else:
        if isinstance(pat, unicode_type):
            pat = pat.encode('utf-8')
    return pat
 def self_closing_pat(for_unicode):
    attr = 'unicode_ans' if for_unicode else 'bytes_ans'
    ans = getattr(self_closing_pat, attr, None)
    if ans is None:
        sub = '|'.join(self_closing_bad_tags)
        template = r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'
        pat = template % sub
        pat = as_string_type(pat, for_unicode)
        ans = re.compile(pat, flags=re.IGNORECASE)
        setattr(self_closing_pat, attr, ans)
    return ans
 def close_self_closing_tags(raw):
-    return _self_closing_pat.sub(r'<\g<tag>\g<arg>></\g<tag>>', raw)
+    for_unicode = isinstance(raw, unicode_type)
    repl = as_string_type(r'<\g<tag>\g<arg>></\g<tag>>', for_unicode)
    pat = self_closing_pat(for_unicode)
    return pat.sub(repl, raw)
 def uuid_id():
@ -745,11 +767,15 @@ class Metadata(object):
            return 'Item(term=%r, value=%r, attrib=%r)' \
                % (barename(self.term), self.value, self.attrib)
-        def __str__(self):
+        if ispy3:
-            return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace')
+            def __str__(self):
                return as_unicode(self.value)
        else:
            def __str__(self):
                return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace')
-        def __unicode__(self):
+            def __unicode__(self):
-            return as_unicode(self.value)
+                return as_unicode(self.value)
        def to_opf1(self, dcmeta=None, xmeta=None, nsrmap={}):
            attrib = {}
@ -1075,19 +1101,27 @@ class Manifest(object):
                    self._loader = loader2
                self._data = None
-        def __str__(self):
+        @property
-            return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print)
+        def unicode_representation(self):
        def __unicode__(self):
            data = self.data
            if isinstance(data, etree._Element):
                return xml2unicode(data, pretty_print=self.oeb.pretty_print)
            if isinstance(data, unicode_type):
                return data
            if hasattr(data, 'cssText'):
-                return data.cssText
+                return unicode_type(data.cssText, 'utf-8', 'replace')
            return unicode_type(data)
        if ispy3:
            def __str__(self):
                return self.unicode_representation
        else:
            def __unicode__(self):
                return self.unicode_representation
            def __str__(self):
                return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print)
        def __eq__(self, other):
            return id(self) == id(other)
@ -1616,11 +1650,15 @@ class TOC(object):
            ans.extend(child.get_lines(lvl+1))
        return ans
-    def __str__(self):
+    if ispy3:
-        return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
+        def __str__(self):
            return u'\n'.join(self.get_lines())
    else:
        def __unicode__(self):
            return u'\n'.join(self.get_lines())
-    def __unicode__(self):
+        def __str__(self):
-        return u'\n'.join(self.get_lines())
+            return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
    def to_opf1(self, tour):
        for node in self.nodes:
--- a/src/calibre/ebooks/oeb/iterator/spine.py
+++ b/src/calibre/ebooks/oeb/iterator/spine.py
@ -53,7 +53,7 @@ class SpineItem(unicode_type):
        if not os.path.exists(path) and os.path.exists(ppath):
            path = ppath
        obj = super(SpineItem, cls).__new__(cls, path)
-        with open(path, 'rb') as f:
+        with lopen(path, 'rb') as f:
            raw = f.read()
        if from_epub:
            # According to the spec, HTML in EPUB must be encoded in utf-8 or
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -99,7 +99,7 @@ def html5_parse(data, max_nesting_depth=100):
    # Check that the asinine HTML 5 algorithm did not result in a tree with
    # insane nesting depths
    for x in data.iterdescendants():
-        if isinstance(x.tag, basestring) and len(x) is 0:  # Leaf node
+        if isinstance(x.tag, basestring) and not len(x):  # Leaf node
            depth = node_depth(x)
            if depth > max_nesting_depth:
                raise ValueError('HTML 5 parsing resulted in a tree with nesting'
@ -259,7 +259,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        nroot = etree.fromstring('<html></html>')
        has_body = False
        for child in list(data):
-            if isinstance(child.tag, (unicode_type, str)) and barename(child.tag) == 'body':
+            if isinstance(child.tag, (unicode_type, bytes)) and barename(child.tag) == 'body':
                has_body = True
                break
        parent = nroot
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -607,12 +607,12 @@ class Style(object):
                result = base
            else:
                result = self._unit_convert(width, base=base)
-            if isinstance(result, (unicode_type, str, bytes)):
+            if isinstance(result, (unicode_type, bytes)):
                result = self._profile.width
            self._width = result
            if 'max-width' in self._style:
                result = self._unit_convert(self._style['max-width'], base=base)
-                if isinstance(result, (unicode_type, str, bytes)):
+                if isinstance(result, (unicode_type, bytes)):
                    result = self._width
                if result < self._width:
                    self._width = result
@ -644,12 +644,12 @@ class Style(object):
                result = base
            else:
                result = self._unit_convert(height, base=base)
-            if isinstance(result, (unicode_type, str, bytes)):
+            if isinstance(result, (unicode_type, bytes)):
                result = self._profile.height
            self._height = result
            if 'max-height' in self._style:
                result = self._unit_convert(self._style['max-height'], base=base)
-                if isinstance(result, (unicode_type, str, bytes)):
+                if isinstance(result, (unicode_type, bytes)):
                    result = self._height
                if result < self._height:
                    self._height = result
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -15,7 +15,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.conversion.preprocess import DocAnalysis
 from calibre.utils.cleantext import clean_ascii_chars
-from polyglot.builtins import unicode_type
+from polyglot.builtins import unicode_type, map, range
 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
@ -55,7 +55,7 @@ def split_txt(txt, epub_split_size_kb=0):
    result in the entire document being one giant
    paragraph. In this case the EPUB parser will not
    be able to determine where to split the file
-    to accomidate the EPUB file size limitation
+    to accommodate the EPUB file size limitation
    and will fail.
    '''
    # Takes care if there is no point to split
@ -66,9 +66,12 @@ def split_txt(txt, epub_split_size_kb=0):
        # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
        chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
        # if there are chunks with a superior size then go and break
-        if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) :
+        parts = txt.split(b'\n\n')
-            txt = '\n\n'.join([split_string_separator(line, chunk_size)
+        lengths = tuple(map(len, parts))
-                for line in txt.split('\n\n')])
+        if lengths and max(lengths) > chunk_size:
            txt = b'\n\n'.join([
                split_string_separator(line, chunk_size) for line in parts
            ])
    if isbytestring(txt):
        txt = txt.decode('utf-8')
@ -227,7 +230,7 @@ def opf_writer(path, opf_name, manifest, spine, mi):
    opf = OPFCreator(path, mi)
    opf.create_manifest(manifest)
    opf.create_spine(spine)
-    with open(os.path.join(path, opf_name), 'wb') as opffile:
+    with lopen(os.path.join(path, opf_name), 'wb') as opffile:
        opf.render(opffile)
@ -236,9 +239,16 @@ def split_string_separator(txt, size):
    Splits the text by putting \n\n at the point size.
    '''
    if len(txt) > size:
-        txt = ''.join([re.sub(type(u'')(r'\.(?P<ends>[^.]*)$'), r'.\n\n\g<ends>',
+        size -= 2
-            txt[i:i+size], 1) for i in
+        txt = []
-            xrange(0, len(txt), size)])
+        for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
            idx = part.rfind('.')
            if idx == -1:
                part += b'\n\n'
            else:
                part = part[:idx + 1] + b'\n\n' + part[idx:]
            txt.append(part)
        txt = b''.join(txt)
    return txt
--- a/src/calibre/ebooks/unihandecode/init.py
+++ b/src/calibre/ebooks/unihandecode/init.py
@ -19,8 +19,6 @@ Tranliterate the string from unicode characters to ASCII in Chinese and others.
 '''
 import unicodedata
 from calibre.constants import ispy3
 class Unihandecoder(object):
    preferred_encoding = None
@ -43,15 +41,11 @@ class Unihandecoder(object):
            self.decoder = Unidecoder()
    def decode(self, text):
-        if not ispy3:
+        if isinstance(text, bytes):
-            if not isinstance(text, unicode):
+            try:
-                try:
+                text = text.decode(self.preferred_encoding)
-                    text = unicode(text)
+            except Exception:
-                except:
+                text = text.decode('utf-8', 'replace')
                    try:
                        text = text.decode(self.preferred_encoding)
                    except:
                        text = text.decode('utf-8', 'replace')
        # at first unicode normalize it. (see Unicode standards)
        ntext = unicodedata.normalize('NFKC', text)
        return self.decoder.decode(ntext)
--- a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
@ -4,7 +4,7 @@
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 from zlib import decompress
-from calibre.constants import ispy3
+from polyglot.builtins import unicode_type
 class jisyo (object):
@ -34,8 +34,8 @@ class jisyo (object):
                P('localization/pykakasi/kanadict2.calibre_msgpack', data=True))
    def load_jisyo(self, char):
-        if not ispy3:
+        if not isinstance(char, unicode_type):
-            char = unicode(char)
+            char = unicode_type(char, 'utf-8')
        key = "%04x"%ord(char)
        try:  # already exist?
--- a/src/calibre/ebooks/unihandecode/unidecoder.py
+++ b/src/calibre/ebooks/unihandecode/unidecoder.py
@ -60,9 +60,9 @@ it under the same terms as Perl itself.
 '''
 import re
 from calibre.constants import ispy3
 from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
 from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
 from polyglot.builtins import unicode_type
 class Unidecoder(object):
@ -95,8 +95,8 @@ class Unidecoder(object):
        Find what group character is a part of.
        '''
        # Code groups withing CODEPOINTS take the form 'xAB'
-        if not ispy3:
+        if not isinstance(character, unicode_type):
-            character = unicode(character)
+            character = unicode_type(character, "utf-8")
        return 'x%02x' % (ord(character) >> 8)
    def grouped_point(self, character):
@ -104,6 +104,6 @@ class Unidecoder(object):
        Return the location the replacement character is in the list for a
        the group character is a part of.
        '''
-        if not ispy3:
+        if not isinstance(character, unicode_type):
-            character = unicode(character)
+            character = unicode_type(character, "utf-8")
        return ord(character) & 255
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
@ -347,7 +347,7 @@ class EditorWidget(QWebView, LineEditECM):  # {{{
                    return unicode_type(self.page().mainFrame().toHtml())
                check = unicode_type(self.page().mainFrame().toPlainText()).strip()
                raw = unicode_type(self.page().mainFrame().toHtml())
-                raw = xml_to_unicode_type(raw, strip_encoding_pats=True,
+                raw = xml_to_unicode(raw, strip_encoding_pats=True,
                                    resolve_entities=True)[0]
                raw = self.comments_pat.sub('', raw)
                if not check and '<img' not in raw.lower():
--- a/src/calibre/gui2/dbus_export/gtk.py
+++ b/src/calibre/gui2/dbus_export/gtk.py
@ -201,7 +201,7 @@ class MenuExampleWindow(Gtk.ApplicationWindow):
 def convert(v):
-    if isinstance(v, basestring):
+    if isinstance(v, (unicode_type, bytes)):
        return unicode_type(v)
    if isinstance(v, dbus.Struct):
        return tuple(convert(val) for val in v)
@ -309,6 +309,7 @@ class MyApplication(Gtk.Application):
    def do_startup(self):
        Gtk.Application.do_startup(self)
 app = MyApplication(application_id='com.calibre-ebook.test-gtk')
 signal.signal(signal.SIGINT, signal.SIG_DFL)
 sys.exit(app.run(sys.argv))
--- a/src/calibre/gui2/dialogs/quickview.py
+++ b/src/calibre/gui2/dialogs/quickview.py
@ -43,7 +43,7 @@ class TableItem(QTableWidgetItem):
            # self is not None and other is None therefore self >= other
            return True
-        if isinstance(self.sort, (str, unicode_type)):
+        if isinstance(self.sort, (bytes, unicode_type)):
            l = sort_key(self.sort)
            r = sort_key(other.sort)
        else:
@ -66,7 +66,7 @@ class TableItem(QTableWidgetItem):
            # self is not None therefore self > other
            return False
-        if isinstance(self.sort, (str, unicode_type)):
+        if isinstance(self.sort, (bytes, unicode_type)):
            l = sort_key(self.sort)
            r = sort_key(other.sort)
        else:
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -13,7 +13,7 @@ from PyQt5.Qt import (Qt, QApplication, QStackedWidget, QMenu, QTimer,
 from calibre.utils.config import prefs
 from calibre.utils.icu import sort_key
-from calibre.constants import (isosx, __appname__, preferred_encoding,
+from calibre.constants import (__appname__, preferred_encoding,
    get_version)
 from calibre.gui2 import config, is_widescreen, gprefs, error_dialog, open_url
 from calibre.gui2.library.views import BooksView, DeviceBooksView
@ -323,11 +323,6 @@ class StatusBar(QStatusBar):  # {{{
    def show_message(self, msg, timeout=0, show_notification=True):
        self.showMessage(msg, timeout)
        if self.notifier is not None and not config['disable_tray_notification'] and show_notification:
            if isosx and isinstance(msg, unicode_type):
                try:
                    msg = msg.encode(preferred_encoding)
                except UnicodeEncodeError:
                    msg = msg.encode('utf-8')
            self.notifier(msg)
    def clear_message(self):
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -129,7 +129,7 @@ class LocationManager(QObject):  # {{{
        had_device = self.has_device
        if cp is None:
            cp = (None, None)
-        if isinstance(cp, (str, unicode_type)):
+        if isinstance(cp, (bytes, unicode_type)):
            cp = (cp, None)
        if len(fs) < 3:
            fs = list(fs) + [0]
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@ -6,7 +6,7 @@ import sys, logging, os, traceback, time
 from PyQt5.Qt import (
    QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon, Qt, QCoreApplication, QThread, QScrollBar)
-from calibre import __appname__, setup_cli_handlers, islinux, isbsd
+from calibre import __appname__, setup_cli_handlers, islinux, isbsd, as_unicode
 from calibre.ebooks.lrf.lrfparser import LRFDocument
 from calibre.gui2 import error_dialog, \
@ -17,7 +17,6 @@ from calibre.gui2.lrf_renderer.config_ui import Ui_ViewerConfig
 from calibre.gui2.main_window import MainWindow
 from calibre.gui2.lrf_renderer.document import Document
 from calibre.gui2.search_box import SearchBox2
 from polyglot.builtins import unicode_type
 class RenderWorker(QThread):
@ -201,7 +200,7 @@ class Main(MainWindow, Ui_MainWindow):
            print('Error rendering document', file=sys.stderr)
            print(exception, file=sys.stderr)
            print(self.renderer.formatted_traceback, file=sys.stderr)
-            msg =  u'<p><b>%s</b>: '%(exception.__class__.__name__,) + unicode_type(str(exception), 'utf8', 'replace') + u'</p>'
+            msg =  u'<p><b>%s</b>: '%(exception.__class__.__name__,) + as_unicode(exception) + u'</p>'
            msg += u'<p>Failed to render document</p>'
            msg += u'<p>Detailed <b>traceback</b>:<pre>'
            msg += self.renderer.formatted_traceback + '</pre>'
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -132,7 +132,7 @@ def get_default_library_path():
        fname = 'Calibre Library'
    if isinstance(fname, unicode_type):
        try:
-            fname = fname.encode(filesystem_encoding)
+            fname.encode(filesystem_encoding)
        except:
            fname = 'Calibre Library'
    x = os.path.expanduser('~'+os.sep+fname)
--- a/src/calibre/gui2/main_window.py
+++ b/src/calibre/gui2/main_window.py
@ -5,14 +5,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import StringIO, traceback, sys, gc, weakref
+import traceback, sys, gc, weakref
 from io import BytesIO
 from PyQt5.Qt import (QMainWindow, QTimer, QAction, QMenu, QMenuBar, QIcon,
                      QObject)
 from calibre.utils.config import OptionParser
 from calibre.gui2 import error_dialog
-from calibre import prints
+from calibre import prints, force_unicode
 from polyglot.builtins import unicode_type
 def option_parser(usage='''\
@ -134,7 +134,7 @@ class MainWindow(QMainWindow):
        if type is KeyboardInterrupt:
            return
        try:
-            sio = StringIO.StringIO()
+            sio = BytesIO()
            try:
                from calibre.debug import print_basic_debug_info
                print_basic_debug_info(out=sio)
@ -145,7 +145,8 @@ class MainWindow(QMainWindow):
                prints(value.locking_debug_msg, file=sio)
            fe = sio.getvalue()
            prints(fe, file=sys.stderr)
-            msg = '<b>%s</b>:'%type.__name__ + unicode_type(str(value), 'utf8', 'replace')
+            fe = force_unicode(fe)
            msg = '<b>%s</b>:'%type.__name__ + force_unicode(value)
            error_dialog(self, _('Unhandled exception'), msg, det_msg=fe,
                    show=True)
        except BaseException:
--- a/src/calibre/gui2/notify.py
+++ b/src/calibre/gui2/notify.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 import time
 from calibre import prints
-from calibre.constants import islinux, isosx, get_osx_version, DEBUG
+from calibre.constants import islinux, isosx, get_osx_version, DEBUG, ispy3
 from polyglot.builtins import unicode_type
@ -145,8 +145,12 @@ class AppleNotifier(Notifier):
    def notify(self, body, summary):
        def encode(x):
-            if isinstance(x, unicode_type):
+            if ispy3:
-                x = x.encode('utf-8')
+                if isinstance(x, bytes):
                    x = x.decode('utf-8')
            else:
                if isinstance(x, unicode_type):
                    x = x.encode('utf-8')
            return x
        cmd = [self.exe, '-activate',
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -67,7 +67,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.setup_store_checks()
        # Set the search query
-        if isinstance(query, (str, unicode_type)):
+        if isinstance(query, (bytes, unicode_type)):
            self.search_edit.setText(query)
        elif isinstance(query, dict):
            if 'author' in query:
@ -233,7 +233,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        query = query.replace('<', '')
        # Remove the prefix.
        for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
-            query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
+            query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, r'\g<a>', query)
            query = query.replace('%s:' % loc, '')
        # Remove the prefix and search text.
        for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
--- a/src/calibre/gui2/tweak_book/function_replace.py
+++ b/src/calibre/gui2/tweak_book/function_replace.py
@ -30,8 +30,8 @@ user_functions = JSONConfig('editor-search-replace-functions')
 def compile_code(src, name='<string>'):
    if not isinstance(src, unicode_type):
-        match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
+        match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
-        enc = match.group(1) if match else 'utf-8'
+        enc = match.group(1).decode('utf-8') if match else 'utf-8'
        src = src.decode(enc)
    if not src or not src.strip():
        src = EMPTY_FUNC
--- a/src/calibre/library/add_to_library.py
+++ b/src/calibre/library/add_to_library.py
@ -9,7 +9,6 @@ import os
 from hashlib import sha1
 from calibre.ebooks import BOOK_EXTENSIONS
 from polyglot.builtins import unicode_type
 def find_folders_under(root, db, add_root=True,  # {{{
@ -106,11 +105,9 @@ class FormatCollection(object):  # {{{
 def books_in_folder(folder, one_per_folder,  # {{{
        cancel_callback=lambda : False):
    assert not isinstance(folder, unicode_type)
    dirpath = os.path.abspath(folder)
    if one_per_folder:
-        formats = set([])
+        formats = set()
        for path in os.listdir(dirpath):
            if cancel_callback():
                return []
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -19,7 +19,7 @@ from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
 from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
-from calibre import prints
+from calibre import prints, force_unicode
 from polyglot.builtins import unicode_type
@ -137,7 +137,9 @@ del y, c, n, u
 def force_to_bool(val):
-    if isinstance(val, (str, unicode_type)):
+    if isinstance(val, (bytes, unicode_type)):
        if isinstance(val, bytes):
            val = force_unicode(val)
        try:
            val = icu_lower(val)
            if not val:
@ -348,7 +350,7 @@ class ResultCache(SearchQueryParser):  # {{{
                if item is None:
                    continue
                v = item[loc]
-                if isinstance(v, (str, unicode_type)):
+                if isinstance(v, (bytes, unicode_type)):
                    v = parse_date(v)
                if v is None or v <= UNDEFINED_DATE:
                    matches.add(item[0])
@ -359,7 +361,7 @@ class ResultCache(SearchQueryParser):  # {{{
                if item is None:
                    continue
                v = item[loc]
-                if isinstance(v, (str, unicode_type)):
+                if isinstance(v, (bytes, unicode_type)):
                    v = parse_date(v)
                if v is not None and v > UNDEFINED_DATE:
                    matches.add(item[0])
@ -371,7 +373,7 @@ class ResultCache(SearchQueryParser):  # {{{
                (p, relop) = self.date_search_relops[k]
                query = query[p:]
        if relop is None:
-                (p, relop) = self.date_search_relops['=']
+            (p, relop) = self.date_search_relops['=']
        if query in self.local_today:
            qd = now()
@ -403,7 +405,7 @@ class ResultCache(SearchQueryParser):  # {{{
            if item is None or item[loc] is None:
                continue
            v = item[loc]
-            if isinstance(v, (str, unicode_type)):
+            if isinstance(v, (bytes, unicode_type)):
                v = parse_date(v)
            if relop(v, qd, field_count):
                matches.add(item[0])
@ -448,7 +450,7 @@ class ResultCache(SearchQueryParser):  # {{{
                    (p, relop) = self.numeric_search_relops[k]
                    query = query[p:]
            if relop is None:
-                    (p, relop) = self.numeric_search_relops['=']
+                (p, relop) = self.numeric_search_relops['=']
            if dt == 'int':
                cast = lambda x: int(x)
--- a/src/calibre/library/catalogs/csv_xml.py
+++ b/src/calibre/library/catalogs/csv_xml.py
@ -154,7 +154,7 @@ class CSV_XML(CatalogPlugin):
                        item = u'%.2g' % (item / 2.0)
                    # Convert HTML to markdown text
-                    if type(item) is unicode_type:
+                    if isinstance(item, unicode_type):
                        opening_tag = re.search('<(\\w+)(\x20|>)', item)
                        if opening_tag:
                            closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item)
@ -177,7 +177,7 @@ class CSV_XML(CatalogPlugin):
                for field in fields:
                    if field.startswith('#'):
                        val = db.get_field(r['id'], field, index_is_id=True)
-                        if not isinstance(val, (str, unicode_type)):
+                        if not isinstance(val, unicode_type):
                            val = unicode_type(val)
                        item = getattr(E, field.replace('#', '_'))(val)
                        record.append(item)
@ -188,7 +188,7 @@ class CSV_XML(CatalogPlugin):
                        val = r[field]
                        if not val:
                            continue
-                        if not isinstance(val, (str, unicode_type)):
+                        if not isinstance(val, (bytes, unicode_type)):
                            if (fm.get(field, {}).get('datatype', None) ==
                                    'rating' and val):
                                val = u'%.2g' % (val / 2.0)
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 import json, re
 from functools import partial
-from calibre import prints
+from calibre import prints, force_unicode
 from calibre.constants import preferred_encoding
 from calibre.library.field_metadata import FieldMetadata
 from calibre.utils.date import parse_date
@ -131,7 +131,7 @@ class CustomColumns(object):
            if d['is_multiple']:
                if x is None:
                    return []
-                if isinstance(x, (str, unicode_type, bytes)):
+                if isinstance(x, (unicode_type, bytes)):
                    x = x.split(d['multiple_seps']['ui_to_list'])
                x = [y.strip() for y in x if y.strip()]
                x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
@ -142,12 +142,14 @@ class CustomColumns(object):
                        x.decode(preferred_encoding, 'replace')
        def adapt_datetime(x, d):
-            if isinstance(x, (str, unicode_type, bytes)):
+            if isinstance(x, (unicode_type, bytes)):
                x = parse_date(x, assume_utc=False, as_utc=False)
            return x
        def adapt_bool(x, d):
-            if isinstance(x, (str, unicode_type, bytes)):
+            if isinstance(x, (unicode_type, bytes)):
                if isinstance(x, bytes):
                    x = force_unicode(x)
                x = x.lower()
                if x == 'true':
                    x = True
@ -168,7 +170,9 @@ class CustomColumns(object):
        def adapt_number(x, d):
            if x is None:
                return None
-            if isinstance(x, (str, unicode_type, bytes)):
+            if isinstance(x, (unicode_type, bytes)):
                if isinstance(x, bytes):
                    x = force_unicode(x)
                if x.lower() == 'none':
                    return None
            if d['datatype'] == 'int':
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -24,7 +24,7 @@ from calibre.library.custom_columns import CustomColumns
 from calibre.library.sqlite import connect, IntegrityError
 from calibre.library.prefs import DBPrefs
 from calibre.ebooks.metadata.book.base import Metadata
-from calibre.constants import preferred_encoding, iswindows, filesystem_encoding
+from calibre.constants import preferred_encoding, iswindows, filesystem_encoding, ispy3
 from calibre.ptempfile import (PersistentTemporaryFile,
        base_dir, SpooledTemporaryFile)
 from calibre.customize.ui import (run_plugins_on_import,
@ -1754,12 +1754,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            self.rc = rc
            self.id = id
-        def __str__(self):
+        def __unicode_representation__(self):
-            return unicode_type(self)
+            return u'n=%s s=%s c=%d rt=%d rc=%d id=%s' % (
                self.n, self.s, self.c, self.rt, self.rc, self.id)
-        def __unicode__(self):
+        if ispy3:
-            return 'n=%s s=%s c=%d rt=%d rc=%d id=%s'%\
+            __str__ = __unicode_representation__
-                            (self.n, self.s, self.c, self.rt, self.rc, self.id)
+        else:
            __str__ = __unicode__ = __unicode_representation__
    def clean_user_categories(self):
        user_cats = self.prefs.get('user_categories', {})
--- a/src/calibre/srv/routes.py
+++ b/src/calibre/srv/routes.py
@ -202,7 +202,7 @@ class Route(object):
            raise RouteError('The variable(s) %s are not part of the route: %s' % (','.join(unknown), self.endpoint.route))
        def quoted(x):
-            if not isinstance(x, unicode_type) and not isinstance(x, bytes):
+            if not isinstance(x, (unicode_type, bytes)):
                x = unicode_type(x)
            if isinstance(x, unicode_type):
                x = x.encode('utf-8')
--- a/src/calibre/utils/apsw_shell.py
+++ b/src/calibre/utils/apsw_shell.py
--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@ -11,7 +11,7 @@ from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR
 from functools import partial
 from calibre import strftime
-from calibre.constants import iswindows, isosx, plugins
+from calibre.constants import iswindows, isosx, plugins, preferred_encoding
 from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
 from calibre.utils.localization import lcdata
 from polyglot.builtins import unicode_type
@ -101,6 +101,8 @@ def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
    from dateutil.parser import parse
    if not date_string:
        return UNDEFINED_DATE
    if isinstance(date_string, bytes):
        date_string = date_string.decode(preferred_encoding, 'replace')
    if default is None:
        func = datetime.utcnow if assume_utc else datetime.now
        default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@ -132,7 +132,7 @@ class FormatterFunction(object):
    def eval_(self, formatter, kwargs, mi, locals, *args):
        ret = self.evaluate(formatter, kwargs, mi, locals, *args)
-        if isinstance(ret, (str, unicode_type)):
+        if isinstance(ret, (bytes, unicode_type)):
            return ret
        if isinstance(ret, list):
            return ','.join(ret)
--- a/src/calibre/utils/ipc/simple_worker.py
+++ b/src/calibre/utils/ipc/simple_worker.py
@ -253,8 +253,8 @@ def offload_worker(env={}, priority='normal', cwd=None):
 def compile_code(src):
    import re, io
    if not isinstance(src, unicode_type):
-        match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
+        match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
-        enc = match.group(1) if match else 'utf-8'
+        enc = match.group(1).decode('utf-8') if match else 'utf-8'
        src = src.decode(enc)
    # Python complains if there is a coding declaration in a unicode string
    src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -32,8 +32,8 @@ def compile_recipe(src):
    :return: Recipe class or None, if no such class was found in src
    '''
    if not isinstance(src, unicode_type):
-        match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
+        match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
-        enc = match.group(1) if match else 'utf-8'
+        enc = match.group(1).decode('utf-8') if match else 'utf-8'
        src = src.decode(enc)
    # Python complains if there is a coding declaration in a unicode string
    src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src.lstrip(u'\ufeff'), flags=re.MULTILINE)
--- a/src/polyglot/builtins.py
+++ b/src/polyglot/builtins.py
@ -25,6 +25,7 @@ if is_py3:
    zip = builtins.__dict__['zip']
    map = builtins.__dict__['map']
    filter = builtins.__dict__['filter']
    range = builtins.__dict__['range']
    codepoint_to_chr = chr
    unicode_type = str
@ -47,6 +48,7 @@ else:
 """)
    from future_builtins import zip, map, filter  # noqa
    range = xrange
    import __builtin__ as builtins
    codepoint_to_chr = unichr