mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Port calibre.__init__ to use unicode_literals
This commit is contained in:
parent
7a4f44b6aa
commit
fc2409cdd8
@ -1,11 +1,11 @@
|
|||||||
|
from __future__ import unicode_literals, print_function
|
||||||
''' E-book management software'''
|
''' E-book management software'''
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, os, re, time, random, warnings
|
import sys, os, re, time, random, warnings
|
||||||
from polyglot.builtins import (codepoint_to_chr, iteritems,
|
from polyglot.builtins import codepoint_to_chr, unicode_type, range, hasenv
|
||||||
itervalues, unicode_type, range, filter, hasenv)
|
|
||||||
from math import floor
|
from math import floor
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
@ -105,11 +105,11 @@ def confirm_config_name(name):
|
|||||||
return name + '_again'
|
return name + '_again'
|
||||||
|
|
||||||
|
|
||||||
_filename_sanitize_unicode = frozenset((u'\\', u'|', u'?', u'*', u'<',
|
_filename_sanitize_unicode = frozenset(('\\', '|', '?', '*', '<', # no2to3
|
||||||
u'"', u':', u'>', u'+', u'/') + tuple(map(codepoint_to_chr, range(32))))
|
'"', ':', '>', '+', '/') + tuple(map(codepoint_to_chr, range(32)))) # no2to3
|
||||||
|
|
||||||
|
|
||||||
def sanitize_file_name(name, substitute=u'_'):
|
def sanitize_file_name(name, substitute='_'):
|
||||||
'''
|
'''
|
||||||
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
|
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
|
||||||
The set of invalid characters is the union of the invalid characters in Windows,
|
The set of invalid characters is the union of the invalid characters in Windows,
|
||||||
@ -122,11 +122,11 @@ def sanitize_file_name(name, substitute=u'_'):
|
|||||||
if isbytestring(substitute):
|
if isbytestring(substitute):
|
||||||
substitute = substitute.decode(filesystem_encoding, 'replace')
|
substitute = substitute.decode(filesystem_encoding, 'replace')
|
||||||
chars = (substitute if c in _filename_sanitize_unicode else c for c in name)
|
chars = (substitute if c in _filename_sanitize_unicode else c for c in name)
|
||||||
one = u''.join(chars)
|
one = ''.join(chars)
|
||||||
one = re.sub(r'\s', u' ', one).strip()
|
one = re.sub(r'\s', ' ', one).strip()
|
||||||
bname, ext = os.path.splitext(one)
|
bname, ext = os.path.splitext(one)
|
||||||
one = re.sub(r'^\.+$', u'_', bname)
|
one = re.sub(r'^\.+$', '_', bname)
|
||||||
one = one.replace(u'..', substitute)
|
one = one.replace('..', substitute)
|
||||||
one += ext
|
one += ext
|
||||||
# Windows doesn't like path components that end with a period or space
|
# Windows doesn't like path components that end with a period or space
|
||||||
if one and one[-1] in ('.', ' '):
|
if one and one[-1] in ('.', ' '):
|
||||||
@ -151,7 +151,7 @@ def prints(*args, **kwargs):
|
|||||||
'''
|
'''
|
||||||
file = kwargs.get('file', sys.stdout)
|
file = kwargs.get('file', sys.stdout)
|
||||||
file = getattr(file, 'buffer', file)
|
file = getattr(file, 'buffer', file)
|
||||||
enc = 'utf-8' if 'CALIBRE_WORKER' in os.environ else preferred_encoding
|
enc = 'utf-8' if hasenv('CALIBRE_WORKER') else preferred_encoding
|
||||||
sep = kwargs.get('sep', ' ')
|
sep = kwargs.get('sep', ' ')
|
||||||
if not isinstance(sep, bytes):
|
if not isinstance(sep, bytes):
|
||||||
sep = sep.encode(enc)
|
sep = sep.encode(enc)
|
||||||
@ -219,7 +219,7 @@ class CommandLineError(Exception):
|
|||||||
|
|
||||||
def setup_cli_handlers(logger, level):
|
def setup_cli_handlers(logger, level):
|
||||||
import logging
|
import logging
|
||||||
if os.environ.get('CALIBRE_WORKER', None) is not None and logger.handlers:
|
if hasenv('CALIBRE_WORKER') and logger.handlers:
|
||||||
return
|
return
|
||||||
logger.setLevel(level)
|
logger.setLevel(level)
|
||||||
if level == logging.WARNING:
|
if level == logging.WARNING:
|
||||||
@ -347,16 +347,16 @@ def get_proxy_info(proxy_scheme, proxy_string):
|
|||||||
'''
|
'''
|
||||||
from polyglot.urllib import urlparse
|
from polyglot.urllib import urlparse
|
||||||
try:
|
try:
|
||||||
proxy_url = u'%s://%s'%(proxy_scheme, proxy_string)
|
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
|
||||||
urlinfo = urlparse(proxy_url)
|
urlinfo = urlparse(proxy_url)
|
||||||
ans = {
|
ans = {
|
||||||
u'scheme': urlinfo.scheme,
|
'scheme': urlinfo.scheme,
|
||||||
u'hostname': urlinfo.hostname,
|
'hostname': urlinfo.hostname,
|
||||||
u'port': urlinfo.port,
|
'port': urlinfo.port,
|
||||||
u'username': urlinfo.username,
|
'username': urlinfo.username,
|
||||||
u'password': urlinfo.password,
|
'password': urlinfo.password,
|
||||||
}
|
}
|
||||||
except:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
@ -373,9 +373,9 @@ def is_mobile_ua(ua):
|
|||||||
def random_user_agent(choose=None, allow_ie=True):
|
def random_user_agent(choose=None, allow_ie=True):
|
||||||
from calibre.utils.random_ua import common_user_agents
|
from calibre.utils.random_ua import common_user_agents
|
||||||
ua_list = common_user_agents()
|
ua_list = common_user_agents()
|
||||||
ua_list = list(filter(lambda x: not is_mobile_ua(x), ua_list))
|
ua_list = [x for x in ua_list if not is_mobile_ua(x)]
|
||||||
if not allow_ie:
|
if not allow_ie:
|
||||||
ua_list = list(filter(lambda x: 'Trident/' not in x and 'Edge/' not in x, ua_list))
|
ua_list = [x for x in ua_list if 'Trident/' not in x and 'Edge/' not in x]
|
||||||
return random.choice(ua_list) if choose is None else ua_list[choose]
|
return random.choice(ua_list) if choose is None else ua_list[choose]
|
||||||
|
|
||||||
|
|
||||||
@ -474,7 +474,6 @@ def detect_ncpus():
|
|||||||
|
|
||||||
|
|
||||||
relpath = os.path.relpath
|
relpath = os.path.relpath
|
||||||
_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
def walk(dir):
|
def walk(dir):
|
||||||
@ -488,7 +487,7 @@ def strftime(fmt, t=None):
|
|||||||
''' A version of strftime that returns unicode strings and tries to handle dates
|
''' A version of strftime that returns unicode strings and tries to handle dates
|
||||||
before 1900 '''
|
before 1900 '''
|
||||||
if not fmt:
|
if not fmt:
|
||||||
return u''
|
return ''
|
||||||
if t is None:
|
if t is None:
|
||||||
t = time.localtime()
|
t = time.localtime()
|
||||||
if hasattr(t, 'timetuple'):
|
if hasattr(t, 'timetuple'):
|
||||||
@ -504,14 +503,14 @@ def strftime(fmt, t=None):
|
|||||||
if iswindows:
|
if iswindows:
|
||||||
if isinstance(fmt, bytes):
|
if isinstance(fmt, bytes):
|
||||||
fmt = fmt.decode('mbcs', 'replace')
|
fmt = fmt.decode('mbcs', 'replace')
|
||||||
fmt = fmt.replace(u'%e', u'%#d')
|
fmt = fmt.replace('%e', '%#d')
|
||||||
ans = plugins['winutil'][0].strftime(fmt, t)
|
ans = plugins['winutil'][0].strftime(fmt, t)
|
||||||
else:
|
else:
|
||||||
ans = time.strftime(fmt, t)
|
ans = time.strftime(fmt, t)
|
||||||
if isinstance(ans, bytes):
|
if isinstance(ans, bytes):
|
||||||
ans = ans.decode(preferred_encoding, 'replace')
|
ans = ans.decode(preferred_encoding, 'replace')
|
||||||
if early_year:
|
if early_year:
|
||||||
ans = ans.replace(u'_early year hack##', unicode_type(orig_year))
|
ans = ans.replace('_early year hack##', unicode_type(orig_year))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
@ -519,7 +518,7 @@ def my_unichr(num):
|
|||||||
try:
|
try:
|
||||||
return safe_chr(num)
|
return safe_chr(num)
|
||||||
except (ValueError, OverflowError):
|
except (ValueError, OverflowError):
|
||||||
return u'?'
|
return '?'
|
||||||
|
|
||||||
|
|
||||||
def entity_to_unicode(match, exceptions=[], encoding='cp1252',
|
def entity_to_unicode(match, exceptions=[], encoding='cp1252',
|
||||||
@ -654,25 +653,6 @@ def human_readable(size, sep=' '):
|
|||||||
return size + sep + suffix
|
return size + sep + suffix
|
||||||
|
|
||||||
|
|
||||||
def remove_bracketed_text(src,
|
|
||||||
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
|
|
||||||
from collections import Counter
|
|
||||||
counts = Counter()
|
|
||||||
buf = []
|
|
||||||
src = force_unicode(src)
|
|
||||||
rmap = dict([(v, k) for k, v in iteritems(brackets)])
|
|
||||||
for char in src:
|
|
||||||
if char in brackets:
|
|
||||||
counts[char] += 1
|
|
||||||
elif char in rmap:
|
|
||||||
idx = rmap[char]
|
|
||||||
if counts[idx] > 0:
|
|
||||||
counts[idx] -= 1
|
|
||||||
elif sum(itervalues(counts)) < 1:
|
|
||||||
buf.append(char)
|
|
||||||
return u''.join(buf)
|
|
||||||
|
|
||||||
|
|
||||||
def ipython(user_ns=None):
|
def ipython(user_ns=None):
|
||||||
from calibre.utils.ipython import ipython
|
from calibre.utils.ipython import ipython
|
||||||
ipython(user_ns=user_ns)
|
ipython(user_ns=user_ns)
|
||||||
|
@ -9,9 +9,9 @@ Provides abstraction for metadata reading.writing from a variety of ebook format
|
|||||||
"""
|
"""
|
||||||
import os, sys, re
|
import os, sys, re
|
||||||
|
|
||||||
from calibre import relpath, guess_type, remove_bracketed_text, prints, force_unicode
|
from calibre import relpath, guess_type, prints, force_unicode
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd
|
from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues
|
||||||
from polyglot.urllib import quote, unquote, urlparse
|
from polyglot.urllib import quote, unquote, urlparse
|
||||||
|
|
||||||
|
|
||||||
@ -39,6 +39,26 @@ def authors_to_string(authors):
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def remove_bracketed_text(src, brackets=None):
|
||||||
|
if brackets is None:
|
||||||
|
brackets = {u'(': u')', u'[': u']', u'{': u'}'}
|
||||||
|
from collections import Counter
|
||||||
|
counts = Counter()
|
||||||
|
buf = []
|
||||||
|
src = force_unicode(src)
|
||||||
|
rmap = {v: k for k, v in iteritems(brackets)}
|
||||||
|
for char in src:
|
||||||
|
if char in brackets:
|
||||||
|
counts[char] += 1
|
||||||
|
elif char in rmap:
|
||||||
|
idx = rmap[char]
|
||||||
|
if counts[idx] > 0:
|
||||||
|
counts[idx] -= 1
|
||||||
|
elif sum(itervalues(counts)) < 1:
|
||||||
|
buf.append(char)
|
||||||
|
return u''.join(buf)
|
||||||
|
|
||||||
|
|
||||||
def author_to_author_sort(author, method=None):
|
def author_to_author_sort(author, method=None):
|
||||||
if not author:
|
if not author:
|
||||||
return u''
|
return u''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user