Some more fixes for the unicode type

Now replaced in all dynamically loaded code. Recipes/metadata sources/etc. In the case of recipes, since they get compiled by calibre we simply make the unicode/unichr names available, no need for any changes to the actual recipes themselves.
2025-08-30 23:00:21 -04:00 · 2019-03-13 12:04:09 +05:30 · 2019-03-13 12:04:09 +05:30 · 2d21a8efa2
commit 2d21a8efa2
parent 6ad22b392b
33 changed files with 158 additions and 154 deletions
--- a/imgsrc/srv/generate.py
+++ b/imgsrc/srv/generate.py
@ -36,7 +36,7 @@ def merge():
        for child in svg.iterchildren('*'):
            clone_node(child, symbol)
        ans.append(symbol)
-    ans = etree.tostring(ans, encoding=unicode, pretty_print=True, with_tail=False)
+    ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
    ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
    return ans

--- a/manual/conf.py
+++ b/manual/conf.py
@ -168,7 +168,7 @@ def sort_languages(x):
    lc, name = x
    if lc == language:
        return ''
-    return sort_key(unicode(name))
+    return sort_key(type(u'')(name))


 html_context['other_languages'].sort(key=sort_languages)
--- a/manual/custom.py
+++ b/manual/custom.py
@ -198,7 +198,7 @@ def generate_ebook_convert_help(preamble, app):


 def update_cli_doc(name, raw, app):
-    if isinstance(raw, unicode):
+    if isinstance(raw, type(u'')):
        raw = raw.encode('utf-8')
    path = 'generated/%s/%s.rst' % (app.config.language, name)
    old_raw = open(path, 'rb').read() if os.path.exists(path) else ''
--- a/manual/plugin_examples/interface_demo/config.py
+++ b/manual/plugin_examples/interface_demo/config.py
@ -21,6 +21,7 @@ prefs = JSONConfig('plugins/interface_demo')
 # Set defaults
 prefs.defaults['hello_world_msg'] = 'Hello, World!'

+
 class ConfigWidget(QWidget):

    def __init__(self):
@ -37,5 +38,4 @@ class ConfigWidget(QWidget):
        self.label.setBuddy(self.msg)

    def save_settings(self):
-        prefs['hello_world_msg'] = unicode(self.msg.text())
-
+        prefs['hello_world_msg'] = self.msg.text()
--- a/setup/init.py
+++ b/setup/init.py
@ -139,7 +139,7 @@ else:
        enc = preferred_encoding
        safe_encode = kwargs.get('safe_encode', False)
        for i, arg in enumerate(args):
-            if isinstance(arg, unicode):
+            if isinstance(arg, type(u'')):
                try:
                    arg = arg.encode(enc)
                except UnicodeEncodeError:
@ -150,8 +150,8 @@ else:
                try:
                    arg = str(arg)
                except ValueError:
-                    arg = unicode(arg)
-                if isinstance(arg, unicode):
+                    arg = type(u'')(arg)
+                if isinstance(arg, type(u'')):
                    try:
                        arg = arg.encode(enc)
                    except UnicodeEncodeError:
--- a/src/calibre/ebooks/BeautifulSoup.py
+++ b/src/calibre/ebooks/BeautifulSoup.py
@ -1795,41 +1795,41 @@ class UnicodeDammit:
            elif xml_data[:4] == '\x00\x3c\x00\x3f':
                # UTF-16BE
                sniffed_xml_encoding = 'utf-16be'
-                #xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
+                #xml_data = type(u'')(xml_data, 'utf-16be').encode('utf-8')
            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
                     and (xml_data[2:4] != '\x00\x00'):
                # UTF-16BE with BOM
                sniffed_xml_encoding = 'utf-16be'
-                #xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
+                #xml_data = type(u'')(xml_data[2:], 'utf-16be').encode('utf-8')
            elif xml_data[:4] == '\x3c\x00\x3f\x00':
                # UTF-16LE
                sniffed_xml_encoding = 'utf-16le'
-                #xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
+                #xml_data = type(u'')(xml_data, 'utf-16le').encode('utf-8')
            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
                     (xml_data[2:4] != '\x00\x00'):
                # UTF-16LE with BOM
                sniffed_xml_encoding = 'utf-16le'
-                #xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
+                #xml_data = type(u'')(xml_data[2:], 'utf-16le').encode('utf-8')
            elif xml_data[:4] == '\x00\x00\x00\x3c':
                # UTF-32BE
                sniffed_xml_encoding = 'utf-32be'
-                #xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
+                #xml_data = type(u'')(xml_data, 'utf-32be').encode('utf-8')
            elif xml_data[:4] == '\x3c\x00\x00\x00':
                # UTF-32LE
                sniffed_xml_encoding = 'utf-32le'
-                #xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
+                #xml_data = type(u'')(xml_data, 'utf-32le').encode('utf-8')
            elif xml_data[:4] == '\x00\x00\xfe\xff':
                # UTF-32BE with BOM
                sniffed_xml_encoding = 'utf-32be'
-                #xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
+                #xml_data = type(u'')(xml_data[4:], 'utf-32be').encode('utf-8')
            elif xml_data[:4] == '\xff\xfe\x00\x00':
                # UTF-32LE with BOM
                sniffed_xml_encoding = 'utf-32le'
-                #xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
+                #xml_data = type(u'')(xml_data[4:], 'utf-32le').encode('utf-8')
            elif xml_data[:3] == '\xef\xbb\xbf':
                # UTF-8 with BOM
                sniffed_xml_encoding = 'utf-8'
-                #xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
+                #xml_data = type(u'')(xml_data[3:], 'utf-8').encode('utf-8')
            else:
                sniffed_xml_encoding = 'ascii'
                pass
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -93,7 +93,7 @@ def parse_details_page(url, log, timeout, browser, domain):
    errmsg = root.xpath('//*[@id="errorMessage"]')
    if errmsg:
        msg = 'Failed to parse amazon details page: %r' % url
-        msg += tostring(errmsg, method='text', encoding=unicode).strip()
+        msg += tostring(errmsg, method='text', encoding='unicode').strip()
        log.error(msg)
        return

@ -466,7 +466,7 @@ class Worker(Thread):  # Get details {{{
            self.result_queue.put(mi)

    def totext(self, elem):
-        return self.tostring(elem, encoding=unicode, method='text').strip()
+        return self.tostring(elem, encoding='unicode', method='text').strip()

    def parse_title(self, root):
        h1 = root.xpath('//h1[@id="title"]')
@ -478,10 +478,10 @@ class Worker(Thread):  # Get details {{{
        tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
        actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
        if actual_title:
-            title = self.tostring(actual_title[0], encoding=unicode,
+            title = self.tostring(actual_title[0], encoding='unicode',
                                  method='text').strip()
        else:
-            title = self.tostring(tdiv, encoding=unicode,
+            title = self.tostring(tdiv, encoding='unicode',
                                  method='text').strip()
        ans = re.sub(r'[(\[].*[)\]]', '', title).strip()
        if not ans:
@ -508,7 +508,7 @@ class Worker(Thread):  # Get details {{{
                    ''')
        for x in aname:
            x.tail = ''
-        authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
+        authors = [self.tostring(x, encoding='unicode', method='text').strip() for x
                   in aname]
        authors = [a for a in authors if a]
        return authors
@ -559,7 +559,7 @@ class Worker(Thread):  # Get details {{{
        for a in desc.xpath('descendant::a[@href]'):
            del a.attrib['href']
            a.tag = 'span'
-        desc = self.tostring(desc, method='html', encoding=unicode).strip()
+        desc = self.tostring(desc, method='html', encoding='unicode').strip()

        # Encoding bug in Amazon data U+fffd (replacement char)
        # in some examples it is present in place of '
@ -626,14 +626,14 @@ class Worker(Thread):  # Get details {{{
            spans = series.xpath('./span')
            if spans:
                raw = self.tostring(
-                    spans[0], encoding=unicode, method='text', with_tail=False).strip()
+                    spans[0], encoding='unicode', method='text', with_tail=False).strip()
                m = re.search(r'\s+([0-9.]+)$', raw.strip())
                if m is not None:
                    series_index = float(m.group(1))
                    s = series.xpath('./a[@id="series-page-link"]')
                    if s:
                        series = self.tostring(
-                            s[0], encoding=unicode, method='text', with_tail=False).strip()
+                            s[0], encoding='unicode', method='text', with_tail=False).strip()
                        if series:
                            ans = (series, series_index)
        # This is found on Kindle edition pages on amazon.com
@ -646,7 +646,7 @@ class Worker(Thread):  # Get details {{{
                    a = span.xpath('./a[@href]')
                    if a:
                        series = self.tostring(
-                            a[0], encoding=unicode, method='text', with_tail=False).strip()
+                            a[0], encoding='unicode', method='text', with_tail=False).strip()
                        if series:
                            ans = (series, series_index)
        # This is found on newer Kindle edition pages on amazon.com
@ -659,14 +659,14 @@ class Worker(Thread):  # Get details {{{
                    a = b.getparent().xpath('./a[@href]')
                    if a:
                        series = self.tostring(
-                            a[0], encoding=unicode, method='text', with_tail=False).partition('(')[0].strip()
+                            a[0], encoding='unicode', method='text', with_tail=False).partition('(')[0].strip()
                        if series:
                            ans = series, series_index

        if ans == (None, None):
            desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]')
            if desc:
-                raw = self.tostring(desc[0], method='text', encoding=unicode)
+                raw = self.tostring(desc[0], method='text', encoding='unicode')
                raw = re.sub(r'\s+', ' ', raw)
                match = self.series_pat.search(raw)
                if match is not None:
@ -1161,7 +1161,7 @@ class Amazon(Source):
        if not result_links:
            result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]')
        for a in result_links:
-            title = tostring(a, method='text', encoding=unicode)
+            title = tostring(a, method='text', encoding='unicode')
            if title_ok(title):
                url = a.get('href')
                if url.startswith('/'):
@ -1177,7 +1177,7 @@ class Amazon(Source):
                    # New amazon markup
                    links = div.xpath('descendant::h3/a[@href]')
                for a in links:
-                    title = tostring(a, method='text', encoding=unicode)
+                    title = tostring(a, method='text', encoding='unicode')
                    if title_ok(title):
                        url = a.get('href')
                        if url.startswith('/'):
@ -1192,7 +1192,7 @@ class Amazon(Source):
            for td in root.xpath(
                    r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
                for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
-                    title = tostring(a, method='text', encoding=unicode)
+                    title = tostring(a, method='text', encoding='unicode')
                    if title_ok(title):
                        url = a.get('href')
                        if url.startswith('/'):
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@ -99,7 +99,7 @@ def main(args=sys.argv):
    log = buf.getvalue()

    result = (metadata_to_opf(result) if opts.opf else
-                    unicode(result).encode('utf-8'))
+                    type(u'')(result).encode('utf-8'))

    if opts.verbose:
        print (log, file=sys.stderr)
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@ -203,7 +203,7 @@ class Douban(Source):
                    build_term('author', author_tokens))
            t = 'search'
        q = q.strip()
-        if isinstance(q, unicode):
+        if isinstance(q, type(u'')):
            q = q.encode('utf-8')
        if not q:
            return None
--- a/src/calibre/ebooks/metadata/sources/edelweiss.py
+++ b/src/calibre/ebooks/metadata/sources/edelweiss.py
@ -31,7 +31,7 @@ def parse_html(raw):

 def astext(node):
    from lxml import etree
-    return etree.tostring(node, method='text', encoding=unicode,
+    return etree.tostring(node, method='text', encoding='unicode',
                          with_tail=False).strip()


@ -110,7 +110,7 @@ class Worker(Thread):  # {{{
        for a in desc.xpath('descendant::a[@href]'):
            del a.attrib['href']
            a.tag = 'span'
-        desc = etree.tostring(desc, method='html', encoding=unicode).strip()
+        desc = etree.tostring(desc, method='html', encoding='unicode').strip()

        # remove all attributes from tags
        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
@ -160,7 +160,7 @@ def get_basic_data(browser, log, *skus):
            tags = []
        rating = 0
        for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
-            m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar)
+            m = re.search(r'width: (\d+)px;.*max-width: (\d+)px', bar)
            if m is not None:
                rating = float(m.group(1)) / float(m.group(2))
                break
@ -283,7 +283,7 @@ class Edelweiss(Source):
            except Exception as e:
                log.exception('Failed to make identify query: %r'%query)
                return as_unicode(e)
-            items = re.search('window[.]items\s*=\s*(.+?);', raw)
+            items = re.search(r'window[.]items\s*=\s*(.+?);', raw)
            if items is None:
                log.error('Failed to get list of matching items')
                log.debug('Response text:')
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -214,7 +214,7 @@ class GoogleBooks(Source):
            if author_tokens:
                q += ('+' if q else '') + build_term('author', author_tokens)

-        if isinstance(q, unicode):
+        if isinstance(q, type(u'')):
            q = q.encode('utf-8')
        if not q:
            return None
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -471,7 +471,7 @@ def identify(log, abort,  # {{{
        for r in presults:
            log('\n\n---')
            try:
-                log(unicode(r))
+                log(type(u'')(r))
            except TypeError:
                log(repr(r))
        if plog:
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -233,7 +233,7 @@ class OverDrive(Source):
            xreq.add_header('Referer', q_init_search)
            xreq.add_header('Accept', 'application/json, text/javascript, */*')
            raw = br.open_novisit(xreq).read()
-            for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
+            for m in re.finditer(type(u'')(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
                if int(m.group('totalrecords')) == 0:
                    return ''
                elif int(m.group('displayrecords')) >= 1:
@ -450,7 +450,7 @@ class OverDrive(Source):

        if desc:
            desc = desc[0]
-            desc = html.tostring(desc, method='html', encoding=unicode).strip()
+            desc = html.tostring(desc, method='html', encoding='unicode').strip()
            # remove all attributes from tags
            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
            # Remove comments
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ b/src/calibre/ebooks/metadata/sources/ozon.py
@ -100,7 +100,7 @@ class Ozon(Source):
        qItems.discard('')
        searchText = u' '.join(qItems).strip()

-        if isinstance(searchText, unicode):
+        if isinstance(searchText, type(u'')):
            searchText = searchText.encode('utf-8')
        if not searchText:
            return None
@ -148,7 +148,7 @@ class Ozon(Source):
            else:
                # Redirect page: trying to extract ozon_id from javascript data
                h = HTMLParser()
-                entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
+                entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding='unicode')))
                json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
                json_info = re.search(json_pat, entry_string)
                jsondata = json_info.group(1) if json_info else None
@ -198,16 +198,16 @@ class Ozon(Source):

        reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')

-        title = unicode(title).upper() if title else ''
+        title = type(u'')(title).upper() if title else ''
        if reRemoveFromTitle:
            title = reRemoveFromTitle.sub('', title)
        authors = map(_normalizeAuthorNameWithInitials,
-                      map(unicode.upper, map(unicode, authors))) if authors else None
+                      map(type(u'').upper, map(type(u''), authors))) if authors else None

        ozon_id = identifiers.get('ozon', None)
        # log.debug(u'ozonid: ', ozon_id)

-        unk = unicode(_('Unknown')).upper()
+        unk = type(u'')(_('Unknown')).upper()

        if title == unk:
            title = None
@ -226,7 +226,7 @@ class Ozon(Source):
        def calc_source_relevance(mi):  # {{{
            relevance = 0
            if title:
-                mititle = unicode(mi.title).upper() if mi.title else ''
+                mititle = type(u'')(mi.title).upper() if mi.title else ''

                if reRemoveFromTitle:
                    mititle = reRemoveFromTitle.sub('', mititle)
@ -240,7 +240,7 @@ class Ozon(Source):
                relevance += 1

            if authors:
-                miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
+                miauthors = map(type(u'').upper, map(type(u''), mi.authors)) if mi.authors else []
                # log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors)))

                if (in_authors(authors, miauthors)):
@ -320,13 +320,13 @@ class Ozon(Source):
    # }}}

    def to_metadata(self, log, entry):  # {{{
-        title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
+        title = type(u'')(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
        # log.debug(u'Title: -----> %s' % title)

-        author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
+        author = type(u'')(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
        # log.debug(u'Author: -----> %s' % author)

-        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
+        norm_authors = map(_normalizeAuthorNameWithInitials, map(type(u'').strip, type(u'')(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.get('data-href').split('/')[-2]
@ -524,7 +524,7 @@ class Ozon(Source):
        # comments, from Javascript data
        beginning = fullString.find(u'FirstBlock')
        end = fullString.find(u'}', beginning)
-        comments = unicode(fullString[beginning + 75:end - 1]).decode("unicode-escape")
+        comments = type(u'')(fullString[beginning + 75:end - 1]).decode("unicode-escape")
        metadata.comments = replace_entities(comments, 'utf-8')
        # }}}

@ -603,7 +603,7 @@ def _format_isbn(log, isbn):  # {{{


 def _translageLanguageToCode(displayLang):  # {{{
-    displayLang = unicode(displayLang).strip() if displayLang else None
+    displayLang = type(u'')(displayLang).strip() if displayLang else None
    langTbl = {None: 'ru',
               u'Русский': 'ru',
               u'Немецкий': 'de',
@ -627,9 +627,9 @@ def _normalizeAuthorNameWithInitials(name):  # {{{
    if name:
        re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
        re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
-        matcher = re.match(re1, unicode(name), re.UNICODE)
+        matcher = re.match(re1, type(u'')(name), re.UNICODE)
        if not matcher:
-            matcher = re.match(re2, unicode(name), re.UNICODE)
+            matcher = re.match(re2, type(u'')(name), re.UNICODE)

        if matcher:
            d = matcher.groupdict()
@ -653,7 +653,7 @@ def toPubdate(log, yearAsString):  # {{{
 # }}}

 def _listToUnicodePrintStr(lst):  # {{{
-    return u'[' + u', '.join(unicode(x) for x in lst) + u']'
+    return u'[' + u', '.join(type(u'')(x) for x in lst) + u']'


 # }}}
--- a/src/calibre/ebooks/metadata/sources/search_engines.py
+++ b/src/calibre/ebooks/metadata/sources/search_engines.py
@ -26,7 +26,7 @@ Result = namedtuple('Result', 'url title cached_url')


 def tostring(elem):
-    return etree.tostring(elem, encoding=unicode, method='text', with_tail=False)
+    return etree.tostring(elem, encoding='unicode', method='text', with_tail=False)


 def browser():
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@ -128,11 +128,11 @@ class Textile(object):

    pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
    # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
-    urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
+    urlch = r'[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'

    url_schemes = ('http', 'https', 'ftp', 'mailto')

-    btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
+    btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p')
    btag_lite = ('bq', 'bc', 'p')

    macro_defaults = [
@ -292,7 +292,7 @@ class Textile(object):
        """
        self.html_type = html_type

-        # text = unicode(text)
+        # text = type(u'')(text)
        text = _normalize_newlines(text)

        if self.restricted:
--- a/src/calibre/gui2/store/stores/biblio_plugin.py
+++ b/src/calibre/gui2/store/stores/biblio_plugin.py
@ -21,7 +21,9 @@ class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore):

    def search(self, query, max_results=10, timeout=60):
        # check for cyrillic symbols before performing search
-        uquery = unicode(query.strip(), 'utf-8')
+        if isinstance(query, bytes):
+            query = query.decode('utf-8')
+        uquery = query.strip()
        reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
        if not reObj:
            return
--- a/src/calibre/gui2/store/stores/chitanka_plugin.py
+++ b/src/calibre/gui2/store/stores/chitanka_plugin.py
@ -43,7 +43,9 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):

    def search(self, query, max_results=10, timeout=60):
        # check for cyrillic symbols before performing search
-        uquery = unicode(query.strip(), 'utf-8')
+        if isinstance(query, bytes):
+            query = query.decode('utf-8')
+        uquery = query.strip()
        reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
        if not reObj:
            return
@ -56,7 +58,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
        br = browser()
        try:
            with closing(br.open(url, timeout=timeout)) as f:
-                f = unicode(f.read(), 'utf-8')
+                f = f.read().decode('utf-8')
                doc = html.fromstring(f)

                for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
@ -98,7 +100,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
            with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
                if counter <= 0:
                    break
-                f = unicode(f.read(), 'utf-8')
+                f = f.read().decode('utf-8')
                doc2 = html.fromstring(f)

                # search for book title
--- a/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py
+++ b/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py
@ -22,10 +22,9 @@ class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
        return ascii_text(s)

    def search(self, query, max_results=10, timeout=60):
-        query = self.strip_accents(unicode(query))
+        query = self.strip_accents(type(u'')(query))
        for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
            if s.downloads:
                s.drm = SearchResult.DRM_UNLOCKED
                s.price = '$0.00'
                yield s
-
--- a/src/calibre/gui2/store/stores/eknigi_plugin.py
+++ b/src/calibre/gui2/store/stores/eknigi_plugin.py
@ -49,7 +49,7 @@ class eKnigiStore(BasicStoreConfig, StorePlugin):

    def search(self, query, max_results=10, timeout=60):
        # check for cyrillic symbols before performing search
-        uquery = unicode(query.strip(), 'utf-8')
+        uquery = type(u'')(query.strip(), 'utf-8')
        reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
        if not reObj:
            return
--- a/src/calibre/gui2/store/stores/kobo_plugin.py
+++ b/src/calibre/gui2/store/stores/kobo_plugin.py
@ -46,7 +46,7 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
                cover_url = None

            for p in select('p.title', item):
-                title = etree.tostring(p, method='text', encoding=unicode).strip()
+                title = etree.tostring(p, method='text', encoding='unicode').strip()
                for a in select('a[href]', p):
                    url = a.get('href')
                    break
@ -58,11 +58,11 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):

            authors = []
            for a in select('p.contributor-list a.contributor-name', item):
-                authors.append(etree.tostring(a, method='text', encoding=unicode).strip())
+                authors.append(etree.tostring(a, method='text', encoding='unicode').strip())
            authors = authors_to_string(authors)

            for p in select('p.price', item):
-                price = etree.tostring(p, method='text', encoding=unicode).strip()
+                price = etree.tostring(p, method='text', encoding='unicode').strip()
                break
            else:
                price = None
--- a/src/calibre/gui2/store/stores/litres_plugin.py
+++ b/src/calibre/gui2/store/stores/litres_plugin.py
@ -88,7 +88,7 @@ class LitResStore(BasicStoreConfig, StorePlugin):
        authors = data.xpath('.//title-info/author/first-name/text()|'
        './/title-info/author/middle-name/text()|'
        './/title-info/author/last-name/text()')
-        sRes.author = u' '.join(map(unicode, authors))
+        sRes.author = u' '.join(map(type(u''), authors))
        sRes.price = data.xpath(xp_template.format('price'))
        # cover vs cover_preview
        sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
@ -107,7 +107,7 @@ def format_price_in_RUR(price):
    @return: formatted price if possible otherwise original value
    @rtype: unicode
    '''
-    if price and re.match("^\d*?\.\d*?$", price):
+    if price and re.match(r"^\d*?\.\d*?$", price):
        try:
            price = u'{:,.2F} руб.'.format(float(price))
            price = price.replace(',', ' ').replace('.', ',', 1)
--- a/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py
+++ b/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py
@ -67,7 +67,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
            self.mc = '='
        else:
            self.mc = '~'
-        all, any, phrase, none = map(lambda x: unicode(x.text()),
+        all, any, phrase, none = map(lambda x: type(u'')(x.text()),
                (self.all, self.any, self.phrase, self.none))
        all, any, none = map(self.tokens, (all, any, none))
        phrase = phrase.strip()
@ -86,11 +86,11 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
        return ans

    def token(self):
-        txt = unicode(self.text.text()).strip()
+        txt = type(u'')(self.text.text()).strip()
        if txt:
            if self.negate.isChecked():
                txt = '!'+txt
-            tok = self.FIELDS[unicode(self.field.currentText())]+txt
+            tok = self.FIELDS[type(u'')(self.field.currentText())]+txt
            if re.search(r'\s', tok):
                tok = '"%s"'%tok
            return tok
@ -106,13 +106,13 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):

        ans = []
        self.box_last_values = {}
-        title = unicode(self.title_box.text()).strip()
+        title = type(u'')(self.title_box.text()).strip()
        if title:
            ans.append('title:"' + self.mc + title + '"')
-        author = unicode(self.author_box.text()).strip()
+        author = type(u'')(self.author_box.text()).strip()
        if author:
            ans.append('author:"' + self.mc + author + '"')
-        format = unicode(self.format_box.text()).strip()
+        format = type(u'')(self.format_box.text()).strip()
        if format:
            ans.append('format:"' + self.mc + format + '"')
        if ans:
--- a/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py
+++ b/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py
@ -22,7 +22,7 @@ class CacheUpdateThread(Thread, QObject):

    total_changed = pyqtSignal(int)
    update_progress = pyqtSignal(int)
-    update_details = pyqtSignal(unicode)
+    update_details = pyqtSignal(type(u''))

    def __init__(self, config, seralize_books_function, timeout):
        Thread.__init__(self)
--- a/src/calibre/gui2/store/stores/mobileread/models.py
+++ b/src/calibre/gui2/store/stores/mobileread/models.py
@ -105,7 +105,7 @@ class BooksModel(QAbstractItemModel):
            return
        descending = order == Qt.DescendingOrder
        self.books.sort(None,
-            lambda x: sort_key(unicode(self.data_as_text(x, col))),
+            lambda x: sort_key(type(u'')(self.data_as_text(x, col))),
            descending)
        if reset:
            self.beginResetModel(), self.endResetModel()
--- a/src/calibre/gui2/store/stores/mobileread/store_dialog.py
+++ b/src/calibre/gui2/store/stores/mobileread/store_dialog.py
@ -40,7 +40,7 @@ class MobileReadStoreDialog(QDialog, Ui_Dialog):
        self.restore_state()

    def do_search(self):
-        self.results_view.model().search(unicode(self.search_query.text()))
+        self.results_view.model().search(type(u'')(self.search_query.text()))

    def open_store(self, index):
        result = self.results_view.model().get_book(index)
--- a/src/calibre/utils/unicode_getpass.py
+++ b/src/calibre/utils/unicode_getpass.py
@ -6,10 +6,14 @@ from __future__ import absolute_import, division, print_function, unicode_litera

 import sys

-from calibre.constants import iswindows, preferred_encoding
+from calibre.constants import iswindows, preferred_encoding, ispy3


-def getpass(prompt):
+if ispy3:
+    from getpass import getpass
+    getpass
+else:
+    def getpass(prompt):
        if iswindows:
            # getpass is broken on windows with python 2.x and unicode, the
            # below implementation is from the python 3 source code
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -9,7 +9,7 @@ from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
    AutomaticNewsRecipe, CalibrePeriodical)
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.utils.config import JSONConfig
-from polyglot.builtins import unicode_type
+from polyglot.builtins import unicode_type, codepoint_to_chr

 basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
        CalibrePeriodical)
@ -44,7 +44,9 @@ def compile_recipe(src):
            'BasicNewsRecipe':BasicNewsRecipe,
            'AutomaticNewsRecipe':AutomaticNewsRecipe,
            'time':time, 're':re,
-            'BeautifulSoup':BeautifulSoup
+            'BeautifulSoup':BeautifulSoup,
+            'unicode': unicode_type,
+            'unichr': codepoint_to_chr,
    }
    exec(src, namespace)

--- a/src/css_selectors/parser.py
+++ b/src/css_selectors/parser.py
@ -15,20 +15,17 @@ import operator
 import string

 from css_selectors.errors import SelectorSyntaxError, ExpressionError
+from polyglot.builtins import unicode_type, codepoint_to_chr

-if sys.version_info[0] < 3:
-    _unicode = unicode
-    _unichr = unichr
-else:
-    _unicode = str
-    _unichr = chr

 tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
 utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)}

+
 def ascii_lower(string):
    """Lower-case, but only in the ASCII range."""
-    return string.translate(utab if isinstance(string, _unicode) else tab)
+    return string.translate(utab if isinstance(string, unicode_type) else tab)
+

 def urepr(x):
    if isinstance(x, list):
@ -38,6 +35,7 @@ def urepr(x):
        ans = ans[1:]
    return ans

+
 # Parsed objects

 class Selector(object):
@ -385,6 +383,7 @@ def parse_selector_group(stream):
        else:
            break

+
 def parse_selector(stream):
    result, pseudo_element = parse_simple_selector(stream)
    while 1:
@ -461,7 +460,7 @@ def parse_simple_selector(stream, inside_negation=False):
                                 'before', 'after'):
                # Special case: CSS 2.1 pseudo-elements can have a single ':'
                # Any new pseudo-element must have two.
-                pseudo_element = _unicode(ident)
+                pseudo_element = unicode_type(ident)
                continue
            if stream.peek() != ('DELIM', '('):
                result = Pseudo(result, ident)
@ -626,11 +625,13 @@ class TokenMacros:
    nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
    nmstart = '[_a-z]|%s|%s' % (escape, nonascii)

+
 def _compile(pattern):
    return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match

+
 _match_whitespace = _compile(r'[ \t\r\n\f]+')
-_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
+_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
 _match_hash = _compile('#(?:%(nmchar)s)+')
 _match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
 _match_string_by_quote = {
@ -650,11 +651,12 @@ else:
    def _replace_simple(match):
        return match.group(1)

+
 def _replace_unicode(match):
    codepoint = int(match.group(1), 16)
    if codepoint > sys.maxunicode:
        codepoint = 0xFFFD
-    return _unichr(codepoint)
+    return codepoint_to_chr(codepoint)


 def unescape_ident(value):
--- a/src/odf/attrconverters.py
+++ b/src/odf/attrconverters.py
@ -29,7 +29,7 @@ def make_NCName(arg):
    return arg

 def cnv_anyURI(attribute, arg, element):
-    return unicode(arg)
+    return type(u'')(arg)

 def cnv_boolean(attribute, arg, element):
    if arg.lower() in ("false","no"):
@ -85,13 +85,13 @@ def cnv_family(attribute, arg, element):
 def __save_prefix(attribute, arg, element):
    prefix = arg.split(':',1)[0]
    if prefix == arg:
-        return unicode(arg)
+        return type(u'')(arg)
    namespace = element.get_knownns(prefix)
    if namespace is None:
        #raise ValueError, "'%s' is an unknown prefix" % str(prefix)
-        return unicode(arg)
+        return type(u'')(arg)
    p = element.get_nsprefix(namespace)
-    return unicode(arg)
+    return type(u'')(arg)

 def cnv_formula(attribute, arg, element):
    """ A string containing a formula. Formulas do not have a predefined syntax, but the string should
@ -218,7 +218,7 @@ def cnv_positiveInteger(attribute, arg, element):
    return str(arg)

 def cnv_string(attribute, arg, element):
-    return unicode(arg)
+    return type(u'')(arg)

 def cnv_textnoteclass(attribute, arg, element):
    if str(arg) not in ("footnote", "endnote"):
@ -1480,5 +1480,4 @@ class AttrConverters:
            conversion = attrconverters.get((attribute, None), None)
            if conversion is not None:
                return conversion(attribute, value, element)
-        return unicode(value)
-
+        return type(u'')(value)
--- a/src/odf/element.py
+++ b/src/odf/element.py
@ -182,7 +182,7 @@ class Node(xml.dom.Node):
    def __unicode__(self):
        val = []
        for c in self.childNodes:
-            val.append(unicode(c))
+            val.append(type(u'')(c))
        return u''.join(val)

 defproperty(Node, "firstChild", doc="First child node, or None.")
@ -253,7 +253,7 @@ class Text(Childless, Node):
    def toXml(self,level,f):
        """ Write XML in UTF-8 """
        if self.data:
-            f.write(_escape(unicode(self.data).encode('utf-8')))
+            f.write(_escape(type(u'')(self.data).encode('utf-8')))

 class CDATASection(Childless, Text):
    nodeType = Node.CDATA_SECTION_NODE
@ -469,7 +469,7 @@ class Element(Node):
                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
        for qname in self.attributes.keys():
            prefix = self.get_nsprefix(qname[0])
-            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
        f.write('>')

    def write_close_tag(self, level, f):
@ -483,7 +483,7 @@ class Element(Node):
                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
        for qname in self.attributes.keys():
            prefix = self.get_nsprefix(qname[0])
-            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
        if self.childNodes:
            f.write('>')
            for element in self.childNodes:
@ -509,5 +509,3 @@ class Element(Node):
        """ This is a check to see if the object is an instance of a type """
        obj = element(check_grammar=False)
        return self.qname == obj.qname
-
-
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -55,6 +55,7 @@ if False:  # Added by Kovid
 # character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
 # In ODF a style can have a parent, these parents can be chained.

+
 class StyleToCSS:

    """ The purpose of the StyleToCSS class is to contain the rules to convert
@ -317,6 +318,7 @@ class TagStack:
            if attr in attrs:
                return attrs[attr]
        return None
+
    def count_tags(self, tag):
        c = 0
        for ttag, tattrs in self.stack:
@ -324,6 +326,7 @@ class TagStack:
                c = c + 1
        return c

+
 special_styles = {
   'S-Emphasis':'em',
   'S-Citation':'cite',
@ -352,6 +355,8 @@ special_styles = {
 # ODFCONTENTHANDLER
 #
 # -----------------------------------------------------------------------------
+
+
 class ODF2XHTML(handler.ContentHandler):

    """ The ODF2XHTML parses an ODF file and produces XHTML"""
@ -625,9 +630,6 @@ class ODF2XHTML(handler.ContentHandler):
            self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
        return self.anchors.get(name)

-
-# --------------------------------------------------
-
    def purgedata(self):
        self.data = []

@ -1457,7 +1459,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
 #        self.writeout( escape(mark) )
        # Since HTML only knows about endnotes, there is too much risk that the
        # marker is reused in the source. Therefore we force numeric markers
-        self.writeout(unicode(self.currentnote))
+        self.writeout(type(u'')(self.currentnote))
        self.closetag('a')
        self.closetag('sup')

@ -1566,12 +1568,11 @@ dl.notes dd:last-of-type { page-break-after: avoid }
        self.writedata()
        self.purgedata()

-
-# -----------------------------------------------------------------------------
-#
-# Reading the file
-#
-# -----------------------------------------------------------------------------
+    # -----------------------------------------------------------------------------
+    #
+    # Reading the file
+    #
+    # -----------------------------------------------------------------------------

    def load(self, odffile):
        """ Loads a document into the parser and parses it.
@ -1593,7 +1594,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
                self._walknode(c)
            self.endElementNS(node.qname, node.tagName)
        if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
-            self.characters(unicode(node))
+            self.characters(type(u'')(node))

    def odf2xhtml(self, odffile):
        """ Load a file and return the XHTML
--- a/src/tinycss/tests/init.py
+++ b/src/tinycss/tests/init.py
@ -8,10 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

 import unittest

-try:
-    unicode
-except NameError:
-    unicode = str

 def jsonify(tokens):
    """Turn tokens into "JSON-compatible" data structures."""
@ -24,6 +20,7 @@ def jsonify(tokens):
        else:
            yield token.type, token.value

+
 class BaseTest(unittest.TestCase):

    longMessage = True
@ -34,10 +31,8 @@ class BaseTest(unittest.TestCase):
        """Test not complete error messages but only substrings."""
        self.ae(len(errors), len(expected_errors))
        for error, expected in zip(errors, expected_errors):
-            self.assertIn(expected, unicode(error))
+            self.assertIn(expected, type(u'')(error))

    def jsonify_declarations(self, rule):
        return [(decl.name, list(jsonify(decl.value)))
                for decl in rule.declarations]
-
-