From fc2409cdd8ea9c46cb842fbbdd80699c213d88eb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 15 May 2019 21:19:22 +0530
Subject: [PATCH] Port calibre.__init__ to use unicode_literals

---
 src/calibre/__init__.py                 | 74 +++++++++----------------
 src/calibre/ebooks/metadata/__init__.py | 24 +++++++-
 2 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 6aa14b8473..2312f89528 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -1,11 +1,11 @@
+from __future__ import unicode_literals, print_function
 ''' E-book management software'''
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 import sys, os, re, time, random, warnings
-from polyglot.builtins import (codepoint_to_chr, iteritems,
-        itervalues, unicode_type, range, filter, hasenv)
+from polyglot.builtins import codepoint_to_chr, unicode_type, range, hasenv
 from math import floor
 from functools import partial
 
@@ -105,11 +105,11 @@ def confirm_config_name(name):
     return name + '_again'
 
 
-_filename_sanitize_unicode = frozenset((u'\\', u'|', u'?', u'*', u'<',
-    u'"', u':', u'>', u'+', u'/') + tuple(map(codepoint_to_chr, range(32))))
+_filename_sanitize_unicode = frozenset(('\\', '|', '?', '*', '<',        # no2to3
+    '"', ':', '>', '+', '/') + tuple(map(codepoint_to_chr, range(32))))  # no2to3
 
 
-def sanitize_file_name(name, substitute=u'_'):
+def sanitize_file_name(name, substitute='_'):
     '''
     Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
     The set of invalid characters is the union of the invalid characters in Windows,
@@ -122,11 +122,11 @@ def sanitize_file_name(name, substitute=u'_'):
     if isbytestring(substitute):
         substitute = substitute.decode(filesystem_encoding, 'replace')
     chars = (substitute if c in _filename_sanitize_unicode else c for c in name)
-    one = u''.join(chars)
-    one = re.sub(r'\s', u' ', one).strip()
+    one = ''.join(chars)
+    one = re.sub(r'\s', ' ', one).strip()
     bname, ext = os.path.splitext(one)
-    one = re.sub(r'^\.+$', u'_', bname)
-    one = one.replace(u'..', substitute)
+    one = re.sub(r'^\.+$', '_', bname)
+    one = one.replace('..', substitute)
     one += ext
     # Windows doesn't like path components that end with a period or space
     if one and one[-1] in ('.', ' '):
@@ -151,7 +151,7 @@ def prints(*args, **kwargs):
     '''
     file = kwargs.get('file', sys.stdout)
     file = getattr(file, 'buffer', file)
-    enc = 'utf-8' if 'CALIBRE_WORKER' in os.environ else preferred_encoding
+    enc = 'utf-8' if hasenv('CALIBRE_WORKER') else preferred_encoding
     sep  = kwargs.get('sep', ' ')
     if not isinstance(sep, bytes):
         sep = sep.encode(enc)
@@ -219,7 +219,7 @@ class CommandLineError(Exception):
 
 def setup_cli_handlers(logger, level):
     import logging
-    if os.environ.get('CALIBRE_WORKER', None) is not None and logger.handlers:
+    if hasenv('CALIBRE_WORKER') and logger.handlers:
         return
     logger.setLevel(level)
     if level == logging.WARNING:
@@ -347,16 +347,16 @@ def get_proxy_info(proxy_scheme, proxy_string):
     '''
     from polyglot.urllib import urlparse
     try:
-        proxy_url = u'%s://%s'%(proxy_scheme, proxy_string)
+        proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
         urlinfo = urlparse(proxy_url)
         ans = {
-            u'scheme': urlinfo.scheme,
-            u'hostname': urlinfo.hostname,
-            u'port': urlinfo.port,
-            u'username': urlinfo.username,
-            u'password': urlinfo.password,
+            'scheme': urlinfo.scheme,
+            'hostname': urlinfo.hostname,
+            'port': urlinfo.port,
+            'username': urlinfo.username,
+            'password': urlinfo.password,
         }
-    except:
+    except Exception:
         return None
     return ans
 
@@ -373,9 +373,9 @@ def is_mobile_ua(ua):
 def random_user_agent(choose=None, allow_ie=True):
     from calibre.utils.random_ua import common_user_agents
     ua_list = common_user_agents()
-    ua_list = list(filter(lambda x: not is_mobile_ua(x), ua_list))
+    ua_list = [x for x in ua_list if not is_mobile_ua(x)]
     if not allow_ie:
-        ua_list = list(filter(lambda x: 'Trident/' not in x and 'Edge/' not in x, ua_list))
+        ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
     return random.choice(ua_list) if choose is None else ua_list[choose]
 
 
@@ -420,13 +420,13 @@ def fit_image(width, height, pwidth, pheight):
     '''
     scaled = height > pheight or width > pwidth
     if height > pheight:
-        corrf = pheight/float(height)
+        corrf = pheight / float(height)
         width, height = floor(corrf*width), pheight
     if width > pwidth:
-        corrf = pwidth/float(width)
+        corrf = pwidth / float(width)
         width, height = pwidth, floor(corrf*height)
     if height > pheight:
-        corrf = pheight/float(height)
+        corrf = pheight / float(height)
         width, height = floor(corrf*width), pheight
 
     return scaled, int(width), int(height)
@@ -474,7 +474,6 @@ def detect_ncpus():
 
 
 relpath = os.path.relpath
-_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
 
 
 def walk(dir):
@@ -488,7 +487,7 @@ def strftime(fmt, t=None):
     ''' A version of strftime that returns unicode strings and tries to handle dates
     before 1900 '''
     if not fmt:
-        return u''
+        return ''
     if t is None:
         t = time.localtime()
     if hasattr(t, 'timetuple'):
@@ -504,14 +503,14 @@ def strftime(fmt, t=None):
     if iswindows:
         if isinstance(fmt, bytes):
             fmt = fmt.decode('mbcs', 'replace')
-        fmt = fmt.replace(u'%e', u'%#d')
+        fmt = fmt.replace('%e', '%#d')
         ans = plugins['winutil'][0].strftime(fmt, t)
     else:
         ans = time.strftime(fmt, t)
         if isinstance(ans, bytes):
             ans = ans.decode(preferred_encoding, 'replace')
     if early_year:
-        ans = ans.replace(u'_early year hack##', unicode_type(orig_year))
+        ans = ans.replace('_early year hack##', unicode_type(orig_year))
     return ans
 
 
@@ -519,7 +518,7 @@ def my_unichr(num):
     try:
         return safe_chr(num)
     except (ValueError, OverflowError):
-        return u'?'
+        return '?'
 
 
 def entity_to_unicode(match, exceptions=[], encoding='cp1252',
@@ -654,25 +653,6 @@ def human_readable(size, sep=' '):
     return size + sep + suffix
 
 
-def remove_bracketed_text(src,
-        brackets={u'(':u')', u'[':u']', u'{':u'}'}):
-    from collections import Counter
-    counts = Counter()
-    buf = []
-    src = force_unicode(src)
-    rmap = dict([(v, k) for k, v in iteritems(brackets)])
-    for char in src:
-        if char in brackets:
-            counts[char] += 1
-        elif char in rmap:
-            idx = rmap[char]
-            if counts[idx] > 0:
-                counts[idx] -= 1
-        elif sum(itervalues(counts)) < 1:
-            buf.append(char)
-    return u''.join(buf)
-
-
 def ipython(user_ns=None):
     from calibre.utils.ipython import ipython
     ipython(user_ns=user_ns)
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 149733dd7e..1dd23171d1 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -9,9 +9,9 @@ Provides abstraction for metadata reading.writing from a variety of ebook format
 """
 import os, sys, re
 
-from calibre import relpath, guess_type, remove_bracketed_text, prints, force_unicode
+from calibre import relpath, guess_type, prints, force_unicode
 from calibre.utils.config_base import tweaks
-from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd
+from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues
 from polyglot.urllib import quote, unquote, urlparse
 
 
@@ -39,6 +39,26 @@ def authors_to_string(authors):
         return ''
 
 
+def remove_bracketed_text(src, brackets=None):
+    if brackets is None:
+        brackets = {u'(': u')', u'[': u']', u'{': u'}'}
+    from collections import Counter
+    counts = Counter()
+    buf = []
+    src = force_unicode(src)
+    rmap = {v: k for k, v in iteritems(brackets)}
+    for char in src:
+        if char in brackets:
+            counts[char] += 1
+        elif char in rmap:
+            idx = rmap[char]
+            if counts[idx] > 0:
+                counts[idx] -= 1
+        elif sum(itervalues(counts)) < 1:
+            buf.append(char)
+    return u''.join(buf)
+
+
 def author_to_author_sort(author, method=None):
     if not author:
         return u''