python3: add unicode/unichr wrappers to polyglot

This commit is contained in:
Eli Schwartz 2019-03-10 13:49:54 -04:00 committed by Kovid Goyal
parent 77728a15ef
commit cbc42bec23
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
386 changed files with 2012 additions and 1743 deletions

View File

@ -12,6 +12,7 @@ from functools import partial
from contextlib import closing from contextlib import closing
from setup import iswindows from setup import iswindows
from polyglot.builtins import unicode_type
if iswindows: if iswindows:
from ctypes import windll, Structure, POINTER, c_size_t from ctypes import windll, Structure, POINTER, c_size_t
@ -52,7 +53,7 @@ def run_worker(job, decorate=True):
try: try:
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
except Exception as err: except Exception as err:
return False, human_text, unicode(err) return False, human_text, unicode_type(err)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
if stdout: if stdout:
stdout = stdout.decode('utf-8') stdout = stdout.decode('utf-8')

View File

@ -12,7 +12,7 @@ from itertools import chain
is_ci = os.environ.get('CI', '').lower() == 'true' is_ci = os.environ.get('CI', '').lower() == 'true'
from setup import Command, basenames, __appname__, download_securely from setup import Command, basenames, __appname__, download_securely
from polyglot.builtins import itervalues, iteritems from polyglot.builtins import codepoint_to_chr, itervalues, iteritems
def get_opts_from_parser(parser): def get_opts_from_parser(parser):
@ -173,7 +173,7 @@ class Kakasi(Command): # {{{
continue continue
if re.match(r"^$",line): if re.match(r"^$",line):
continue continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line) pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:codepoint_to_chr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1] dic[pair[0]] = pair[1]
from calibre.utils.serialize import msgpack_dumps from calibre.utils.serialize import msgpack_dumps
with open(dst, 'wb') as f: with open(dst, 'wb') as f:

View File

@ -13,7 +13,7 @@ from functools import partial
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file
from setup.parallel_build import parallel_check_output from setup.parallel_build import parallel_check_output
from polyglot.builtins import iteritems from polyglot.builtins import codepoint_to_chr, iteritems
is_ci = os.environ.get('CI', '').lower() == 'true' is_ci = os.environ.get('CI', '').lower() == 'true'
@ -82,7 +82,7 @@ class POT(Command): # {{{
ans = [] ans = []
for lineno, msg in msgs: for lineno, msg in msgs:
ans.append('#: %s:%d'%(path, lineno)) ans.append('#: %s:%d'%(path, lineno))
slash = unichr(92) slash = codepoint_to_chr(92)
msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n', msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
r'\n').replace('\r', r'\r').replace('\t', r'\t') r'\n').replace('\r', r'\r').replace('\t', r'\t')
ans.append('msgid "%s"'%msg) ans.append('msgid "%s"'%msg)

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os, re, time, random, warnings import sys, os, re, time, random, warnings
from polyglot.builtins import builtins from polyglot.builtins import builtins, codepoint_to_chr, unicode_type
builtins.__dict__['dynamic_property'] = lambda func: func(None) builtins.__dict__['dynamic_property'] = lambda func: func(None)
from math import floor from math import floor
from functools import partial from functools import partial
@ -77,7 +77,7 @@ def get_types_map():
def to_unicode(raw, encoding='utf-8', errors='strict'): def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode): if isinstance(raw, unicode_type):
return raw return raw
return raw.decode(encoding, errors) return raw.decode(encoding, errors)
@ -113,7 +113,7 @@ def confirm_config_name(name):
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]') _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<', _filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32)))) u'"', u':', u'>', u'+', u'/'] + list(map(codepoint_to_chr, xrange(32))))
def sanitize_file_name(name, substitute='_', as_unicode=False): def sanitize_file_name(name, substitute='_', as_unicode=False):
@ -126,7 +126,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings *NOTE:* This function always returns byte strings, not unicode objects. The byte strings
are encoded in the filesystem encoding of the platform, or UTF-8. are encoded in the filesystem encoding of the platform, or UTF-8.
''' '''
if isinstance(name, unicode): if isinstance(name, unicode_type):
name = name.encode(filesystem_encoding, 'ignore') name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name) one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip() one = re.sub(r'\s', ' ', one).strip()
@ -198,7 +198,7 @@ def prints(*args, **kwargs):
safe_encode = kwargs.get('safe_encode', False) safe_encode = kwargs.get('safe_encode', False)
count = 0 count = 0
for i, arg in enumerate(args): for i, arg in enumerate(args):
if isinstance(arg, unicode): if isinstance(arg, unicode_type):
if iswindows: if iswindows:
from calibre.utils.terminal import Detect from calibre.utils.terminal import Detect
cs = Detect(file) cs = Detect(file)
@ -222,8 +222,8 @@ def prints(*args, **kwargs):
try: try:
arg = str(arg) arg = str(arg)
except ValueError: except ValueError:
arg = unicode(arg) arg = unicode_type(arg)
if isinstance(arg, unicode): if isinstance(arg, unicode_type):
try: try:
arg = arg.encode(enc) arg = arg.encode(enc)
except UnicodeEncodeError: except UnicodeEncodeError:
@ -288,7 +288,7 @@ def load_library(name, cdll):
def filename_to_utf8(name): def filename_to_utf8(name):
'''Return C{name} encoded in utf8. Unhandled characters are replaced. ''' '''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
if isinstance(name, unicode): if isinstance(name, unicode_type):
return name.encode('utf8') return name.encode('utf8')
codec = 'cp1252' if iswindows else 'utf8' codec = 'cp1252' if iswindows else 'utf8'
return name.decode(codec, 'replace').encode('utf8') return name.decode(codec, 'replace').encode('utf8')
@ -557,7 +557,7 @@ def strftime(fmt, t=None):
else: else:
ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace') ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
if early_year: if early_year:
ans = ans.replace(u'_early year hack##', unicode(orig_year)) ans = ans.replace(u'_early year hack##', unicode_type(orig_year))
return ans return ans
@ -669,7 +669,7 @@ def force_unicode(obj, enc=preferred_encoding):
def as_unicode(obj, enc=preferred_encoding): def as_unicode(obj, enc=preferred_encoding):
if not isbytestring(obj): if not isbytestring(obj):
try: try:
obj = unicode(obj) obj = unicode_type(obj)
except: except:
try: try:
obj = str(obj) obj = str(obj)

View File

@ -2,12 +2,12 @@
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import print_function from __future__ import print_function
from polyglot.builtins import map from polyglot.builtins import map, unicode_type
import sys, locale, codecs, os, importlib, collections import sys, locale, codecs, os, importlib, collections
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (3, 40, 1) numeric_version = (3, 40, 1)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode_type, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
@ -300,7 +300,7 @@ def get_portable_base():
def get_unicode_windows_env_var(name): def get_unicode_windows_env_var(name):
getenv = plugins['winutil'][0].getenv getenv = plugins['winutil'][0].getenv
return getenv(unicode(name)) return getenv(unicode_type(name))
def get_windows_username(): def get_windows_username():

View File

@ -7,6 +7,7 @@ import os, sys, zipfile, importlib
from calibre.constants import numeric_version, iswindows, isosx from calibre.constants import numeric_version, iswindows, isosx
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from polyglot.builtins import unicode_type
platform = 'linux' platform = 'linux'
if iswindows: if iswindows:
@ -195,7 +196,7 @@ class Plugin(object): # {{{
config_dialog.exec_() config_dialog.exec_()
if config_dialog.result() == QDialog.Accepted: if config_dialog.result() == QDialog.Accepted:
sc = unicode(sc.text()).strip() sc = unicode_type(sc.text()).strip()
customize_plugin(self, sc) customize_plugin(self, sc)
geom = bytearray(config_dialog.saveGeometry()) geom = bytearray(config_dialog.saveGeometry())

View File

@ -10,6 +10,7 @@ from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
from calibre.constants import numeric_version from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
from calibre.ebooks.html.to_zip import HTML2ZIP from calibre.ebooks.html.to_zip import HTML2ZIP
from polyglot.builtins import unicode_type
plugins = [] plugins = []
@ -64,23 +65,23 @@ class TXT2TXTZ(FileTypePlugin):
images = [] images = []
# Textile # Textile
for m in re.finditer(unicode(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt): for m in re.finditer(unicode_type(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
path = m.group('path') path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path) images.append(path)
# Markdown inline # Markdown inline
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
path = m.group('path') path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path) images.append(path)
# Markdown reference # Markdown reference
refs = {} refs = {}
for m in re.finditer(unicode(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt): for m in re.finditer(unicode_type(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
if m.group('id') and m.group('path'): if m.group('id') and m.group('path'):
refs[m.group('id')] = m.group('path') refs[m.group('id')] = m.group('path')
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
path = refs.get(m.group('id'), None) path = refs.get(m.group('id'), None)
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path) images.append(path)

View File

@ -6,6 +6,7 @@ import re, os, shutil
from calibre import CurrentDir from calibre import CurrentDir
from calibre.customize import Plugin from calibre.customize import Plugin
from polyglot.builtins import unicode_type
class ConversionOption(object): class ConversionOption(object):
@ -79,7 +80,7 @@ class OptionRecommendation(object):
self.option.choices: self.option.choices:
raise ValueError('OpRec: %s: Recommended value not in choices'% raise ValueError('OpRec: %s: Recommended value not in choices'%
self.option.name) self.option.name)
if not (isinstance(self.recommended_value, (int, float, str, unicode)) or self.recommended_value is None): if not (isinstance(self.recommended_value, (int, float, str, unicode_type)) or self.recommended_value is None):
raise ValueError('OpRec: %s:'%self.option.name + repr( raise ValueError('OpRec: %s:'%self.option.name + repr(
self.recommended_value) + ' is not a string or a number') self.recommended_value) + ' is not a string or a number')
@ -340,7 +341,7 @@ class OutputFormatPlugin(Plugin):
@property @property
def is_periodical(self): def is_periodical(self):
return self.oeb.metadata.publication_type and \ return self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:') unicode_type(self.oeb.metadata.publication_type[0]).startswith('periodical:')
def specialize_options(self, log, opts, input_fmt): def specialize_options(self, log, opts, input_fmt):
''' '''

View File

@ -2,7 +2,7 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
from polyglot.builtins import map from polyglot.builtins import map, unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -216,7 +216,7 @@ class PluginLoader(object):
if ans.minimum_calibre_version > numeric_version: if ans.minimum_calibre_version > numeric_version:
raise InvalidPlugin( raise InvalidPlugin(
'The plugin at %s needs a version of calibre >= %s' % 'The plugin at %s needs a version of calibre >= %s' %
(as_unicode(path_to_zip_file), '.'.join(map(unicode, (as_unicode(path_to_zip_file), '.'.join(map(unicode_type,
ans.minimum_calibre_version)))) ans.minimum_calibre_version))))
if platform not in ans.supported_platforms: if platform not in ans.supported_platforms:
@ -231,7 +231,7 @@ class PluginLoader(object):
raise raise
def _locate_code(self, zf, path_to_zip_file): def _locate_code(self, zf, path_to_zip_file):
names = [x if isinstance(x, unicode) else x.decode('utf-8') for x in names = [x if isinstance(x, unicode_type) else x.decode('utf-8') for x in
zf.namelist()] zf.namelist()]
names = [x[1:] if x[0] == '/' else x for x in names] names = [x[1:] if x[0] == '/' else x for x in names]

View File

@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, time, re import os, time, re
from collections import defaultdict from collections import defaultdict
from polyglot.builtins import map from polyglot.builtins import map, unicode_type
from contextlib import contextmanager from contextlib import contextmanager
from functools import partial from functools import partial
@ -69,7 +69,7 @@ def metadata_extensions():
# but not actually added) # but not actually added)
global _metadata_extensions global _metadata_extensions
if _metadata_extensions is None: if _metadata_extensions is None:
_metadata_extensions = frozenset(map(unicode, BOOK_EXTENSIONS)) | {'opf'} _metadata_extensions = frozenset(map(unicode_type, BOOK_EXTENSIONS)) | {'opf'}
return _metadata_extensions return _metadata_extensions
@ -143,7 +143,7 @@ def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(
for path in listdir_impl(dirpath, sort_by_mtime=True): for path in listdir_impl(dirpath, sort_by_mtime=True):
key, ext = splitext(path) key, ext = splitext(path)
if allow_path(path, ext, compiled_rules): if allow_path(path, ext, compiled_rules):
books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path books[icu_lower(key) if isinstance(key, unicode_type) else key.lower()][ext] = path
for formats in books.itervalues(): for formats in books.itervalues():
if formats_ok(formats): if formats_ok(formats):

View File

@ -12,7 +12,7 @@ import os, shutil, uuid, json, glob, time, hashlib, errno, sys
from functools import partial from functools import partial
import apsw import apsw
from polyglot.builtins import reraise from polyglot.builtins import unicode_type, reraise
from calibre import isbytestring, force_unicode, prints, as_unicode from calibre import isbytestring, force_unicode, prints, as_unicode
from calibre.constants import (iswindows, filesystem_encoding, from calibre.constants import (iswindows, filesystem_encoding,
@ -93,7 +93,7 @@ class DBPrefs(dict): # {{{
dict.__setitem__(self, key, val) dict.__setitem__(self, key, val)
def raw_to_object(self, raw): def raw_to_object(self, raw):
if not isinstance(raw, unicode): if not isinstance(raw, unicode_type):
raw = raw.decode(preferred_encoding) raw = raw.decode(preferred_encoding)
return json.loads(raw, object_hook=from_json) return json.loads(raw, object_hook=from_json)
@ -561,10 +561,10 @@ class DB(object):
prints('found user category case overlap', catmap[uc]) prints('found user category case overlap', catmap[uc])
cat = catmap[uc][0] cat = catmap[uc][0]
suffix = 1 suffix = 1
while icu_lower((cat + unicode(suffix))) in catmap: while icu_lower((cat + unicode_type(suffix))) in catmap:
suffix += 1 suffix += 1
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix))) prints('Renaming user category %s to %s'%(cat, cat+unicode_type(suffix)))
user_cats[cat + unicode(suffix)] = user_cats[cat] user_cats[cat + unicode_type(suffix)] = user_cats[cat]
del user_cats[cat] del user_cats[cat]
cats_changed = True cats_changed = True
if cats_changed: if cats_changed:
@ -670,23 +670,23 @@ class DB(object):
if d['is_multiple']: if d['is_multiple']:
if x is None: if x is None:
return [] return []
if isinstance(x, (str, unicode, bytes)): if isinstance(x, (str, unicode_type, bytes)):
x = x.split(d['multiple_seps']['ui_to_list']) x = x.split(d['multiple_seps']['ui_to_list'])
x = [y.strip() for y in x if y.strip()] x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
unicode) else y for y in x] unicode_type) else y for y in x]
return [u' '.join(y.split()) for y in x] return [u' '.join(y.split()) for y in x]
else: else:
return x if x is None or isinstance(x, unicode) else \ return x if x is None or isinstance(x, unicode_type) else \
x.decode(preferred_encoding, 'replace') x.decode(preferred_encoding, 'replace')
def adapt_datetime(x, d): def adapt_datetime(x, d):
if isinstance(x, (str, unicode, bytes)): if isinstance(x, (str, unicode_type, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False) x = parse_date(x, assume_utc=False, as_utc=False)
return x return x
def adapt_bool(x, d): def adapt_bool(x, d):
if isinstance(x, (str, unicode, bytes)): if isinstance(x, (str, unicode_type, bytes)):
x = x.lower() x = x.lower()
if x == 'true': if x == 'true':
x = True x = True
@ -707,7 +707,7 @@ class DB(object):
def adapt_number(x, d): def adapt_number(x, d):
if x is None: if x is None:
return None return None
if isinstance(x, (str, unicode, bytes)): if isinstance(x, (str, unicode_type, bytes)):
if x.lower() == 'none': if x.lower() == 'none':
return None return None
if d['datatype'] == 'int': if d['datatype'] == 'int':
@ -1239,7 +1239,7 @@ class DB(object):
return self._library_id_ return self._library_id_
def fset(self, val): def fset(self, val):
self._library_id_ = unicode(val) self._library_id_ = unicode_type(val)
self.execute(''' self.execute('''
DELETE FROM library_id; DELETE FROM library_id;
INSERT INTO library_id (uuid) VALUES (?); INSERT INTO library_id (uuid) VALUES (?);
@ -1715,7 +1715,7 @@ class DB(object):
[(book_id, fmt.upper()) for book_id in book_ids]) [(book_id, fmt.upper()) for book_id in book_ids])
def set_conversion_options(self, options, fmt): def set_conversion_options(self, options, fmt):
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode) else data))) options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data)))
for book_id, data in options.iteritems()] for book_id, data in options.iteritems()]
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options) self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
@ -1754,7 +1754,7 @@ class DB(object):
copyfile_using_links(src, dest, dest_is_dir=False) copyfile_using_links(src, dest, dest_is_dir=False)
old_files.add(src) old_files.add(src)
x = path_map[x] x = path_map[x]
if not isinstance(x, unicode): if not isinstance(x, unicode_type):
x = x.decode(filesystem_encoding, 'replace') x = x.decode(filesystem_encoding, 'replace')
progress(x, i+1, total) progress(x, i+1, total)

View File

@ -11,7 +11,7 @@ import os, traceback, random, shutil, operator
from io import BytesIO from io import BytesIO
from collections import defaultdict, Set, MutableSet from collections import defaultdict, Set, MutableSet
from functools import wraps, partial from functools import wraps, partial
from polyglot.builtins import zip from polyglot.builtins import unicode_type, zip
from time import time from time import time
from calibre import isbytestring, as_unicode from calibre import isbytestring, as_unicode
@ -528,14 +528,14 @@ class Cache(object):
@read_api @read_api
def get_item_id(self, field, item_name): def get_item_id(self, field, item_name):
' Return the item id for item_name (case-insensitive) ' ' Return the item id for item_name (case-insensitive) '
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()} rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode) else item_name, None) return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode_type) else item_name, None)
@read_api @read_api
def get_item_ids(self, field, item_names): def get_item_ids(self, field, item_names):
' Return the item id for item_name (case-insensitive) ' ' Return the item id for item_name (case-insensitive) '
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()} rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode) else name, None) for name in item_names} return {name:rmap.get(icu_lower(name) if isinstance(name, unicode_type) else name, None) for name in item_names}
@read_api @read_api
def author_data(self, author_ids=None): def author_data(self, author_ids=None):

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import copy import copy
from functools import partial from functools import partial
from polyglot.builtins import map from polyglot.builtins import unicode_type, map
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
@ -47,7 +47,7 @@ class Tag(object):
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category) return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
def __str__(self): def __str__(self):
return unicode(self).encode('utf-8') return unicode_type(self).encode('utf-8')
def __repr__(self): def __repr__(self):
return str(self) return str(self)
@ -101,8 +101,8 @@ def clean_user_categories(dbcache):
if len(comps) == 0: if len(comps) == 0:
i = 1 i = 1
while True: while True:
if unicode(i) not in user_cats: if unicode_type(i) not in user_cats:
new_cats[unicode(i)] = user_cats[k] new_cats[unicode_type(i)] = user_cats[k]
break break
i += 1 i += 1
else: else:

View File

@ -10,6 +10,7 @@ from textwrap import TextWrapper
from io import BytesIO from io import BytesIO
from calibre import prints from calibre import prints
from polyglot.builtins import unicode_type
readonly = True readonly = True
version = 0 # change this if you change signature of implementation() version = 0 # change this if you change signature of implementation()
@ -79,7 +80,7 @@ def do_list(fields, data, opts):
widths = list(map(lambda x: 0, fields)) widths = list(map(lambda x: 0, fields))
for i in data: for i in data:
for j, field in enumerate(fields): for j, field in enumerate(fields):
widths[j] = max(widths[j], max(len(field), len(unicode(i[field])))) widths[j] = max(widths[j], max(len(field), len(unicode_type(i[field]))))
screen_width = geometry()[0] screen_width = geometry()[0]
if not screen_width: if not screen_width:
@ -110,7 +111,7 @@ def do_list(fields, data, opts):
for record in data: for record in data:
text = [ text = [
wrappers[i].wrap(unicode(record[field])) wrappers[i].wrap(unicode_type(record[field]))
for i, field in enumerate(fields) for i, field in enumerate(fields)
] ]
lines = max(map(len, text)) lines = max(map(len, text))
@ -129,7 +130,7 @@ def do_csv(fields, data, opts):
for d in data: for d in data:
row = [d[f] for f in fields] row = [d[f] for f in fields]
csv_print.writerow([ csv_print.writerow([
x if isinstance(x, bytes) else unicode(x).encode('utf-8') for x in row x if isinstance(x, bytes) else unicode_type(x).encode('utf-8') for x in row
]) ])
print(buf.getvalue()) print(buf.getvalue())
@ -164,11 +165,11 @@ def main(opts, args, dbctx):
is_rating = category_metadata(category)['datatype'] == 'rating' is_rating = category_metadata(category)['datatype'] == 'rating'
for tag in category_data[category]: for tag in category_data[category]:
if is_rating: if is_rating:
tag.name = unicode(len(tag.name)) tag.name = unicode_type(len(tag.name))
data.append({ data.append({
'category': category, 'category': category,
'tag_name': tag.name, 'tag_name': tag.name,
'count': unicode(tag.count), 'count': unicode_type(tag.count),
'rating': fmtr(tag.avg_rating), 'rating': fmtr(tag.avg_rating),
}) })
else: else:
@ -176,7 +177,7 @@ def main(opts, args, dbctx):
data.append({ data.append({
'category': category, 'category': category,
'tag_name': _('CATEGORY ITEMS'), 'tag_name': _('CATEGORY ITEMS'),
'count': unicode(len(category_data[category])), 'count': unicode_type(len(category_data[category])),
'rating': '' 'rating': ''
}) })

View File

@ -11,6 +11,7 @@ from calibre.ebooks.metadata.book.base import field_from_string
from calibre.ebooks.metadata.book.serialize import read_cover from calibre.ebooks.metadata.book.serialize import read_cover
from calibre.ebooks.metadata.opf import get_metadata from calibre.ebooks.metadata.opf import get_metadata
from calibre.srv.changes import metadata from calibre.srv.changes import metadata
from polyglot.builtins import unicode_type
readonly = False readonly = False
version = 0 # change this if you change signature of implementation() version = 0 # change this if you change signature of implementation()
@ -181,5 +182,5 @@ def main(opts, args, dbctx):
if not final_mi: if not final_mi:
raise SystemExit(_('No book with id: %s in the database') % book_id) raise SystemExit(_('No book with id: %s in the database') % book_id)
prints(unicode(final_mi)) prints(unicode_type(final_mi))
return 0 return 0

View File

@ -9,6 +9,7 @@ import sys
from calibre import prints from calibre import prints
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from polyglot.builtins import unicode_type
readonly = True readonly = True
version = 0 # change this if you change signature of implementation() version = 0 # change this if you change signature of implementation()
@ -52,6 +53,6 @@ def main(opts, args, dbctx):
mi = OPFCreator(os.getcwdu(), mi) mi = OPFCreator(os.getcwdu(), mi)
mi.render(sys.stdout) mi.render(sys.stdout)
else: else:
prints(unicode(mi)) prints(unicode_type(mi))
return 0 return 0

View File

@ -15,6 +15,7 @@ from copy import deepcopy
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES, ALL_METADATA_FIELDS from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES, ALL_METADATA_FIELDS
from calibre.ebooks.metadata.book.formatter import SafeFormat from calibre.ebooks.metadata.book.formatter import SafeFormat
from calibre.utils.date import utcnow from calibre.utils.date import utcnow
from polyglot.builtins import unicode_type
# Lazy format metadata retrieval {{{ # Lazy format metadata retrieval {{{
''' '''
@ -46,7 +47,7 @@ class MutableBase(object):
@resolved @resolved
def __unicode__(self): def __unicode__(self):
return unicode(self._values) return unicode_type(self._values)
@resolved @resolved
def __len__(self): def __len__(self):

View File

@ -11,6 +11,7 @@ import os
from calibre import prints from calibre import prints
from calibre.utils.date import isoformat, DEFAULT_DATE from calibre.utils.date import isoformat, DEFAULT_DATE
from polyglot.builtins import unicode_type
class SchemaUpgrade(object): class SchemaUpgrade(object):
@ -601,7 +602,7 @@ class SchemaUpgrade(object):
id_ = str(id_) id_ = str(id_)
fname = custom_recipe_filename(id_, title) fname = custom_recipe_filename(id_, title)
custom_recipes[id_] = (title, fname) custom_recipes[id_] = (title, fname)
if isinstance(script, unicode): if isinstance(script, unicode_type):
script = script.encode('utf-8') script = script.encode('utf-8')
with open(os.path.join(bdir, fname), 'wb') as f: with open(os.path.join(bdir, fname), 'wb') as f:
f.write(script) f.write(script)

View File

@ -19,6 +19,7 @@ from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
from calibre.utils.icu import primary_contains, sort_key from calibre.utils.icu import primary_contains, sort_key
from calibre.utils.localization import lang_map, canonicalize_lang from calibre.utils.localization import lang_map, canonicalize_lang
from calibre.utils.search_query_parser import SearchQueryParser, ParseException from calibre.utils.search_query_parser import SearchQueryParser, ParseException
from polyglot.builtins import unicode_type
CONTAINS_MATCH = 0 CONTAINS_MATCH = 0
EQUALS_MATCH = 1 EQUALS_MATCH = 1
@ -148,7 +149,7 @@ class DateSearch(object): # {{{
if query == 'false': if query == 'false':
for v, book_ids in field_iter(): for v, book_ids in field_iter():
if isinstance(v, (str, unicode)): if isinstance(v, (str, unicode_type)):
v = parse_date(v) v = parse_date(v)
if v is None or v <= UNDEFINED_DATE: if v is None or v <= UNDEFINED_DATE:
matches |= book_ids matches |= book_ids
@ -156,7 +157,7 @@ class DateSearch(object): # {{{
if query == 'true': if query == 'true':
for v, book_ids in field_iter(): for v, book_ids in field_iter():
if isinstance(v, (str, unicode)): if isinstance(v, (str, unicode_type)):
v = parse_date(v) v = parse_date(v)
if v is not None and v > UNDEFINED_DATE: if v is not None and v > UNDEFINED_DATE:
matches |= book_ids matches |= book_ids
@ -198,7 +199,7 @@ class DateSearch(object): # {{{
field_count = query.count('/') + 1 field_count = query.count('/') + 1
for v, book_ids in field_iter(): for v, book_ids in field_iter():
if isinstance(v, (str, unicode)): if isinstance(v, (str, unicode_type)):
v = parse_date(v) v = parse_date(v)
if v is not None and relop(dt_as_local(v), qd, field_count): if v is not None and relop(dt_as_local(v), qd, field_count):
matches |= book_ids matches |= book_ids
@ -407,7 +408,7 @@ class SavedSearchQueries(object): # {{{
return self._db() return self._db()
def force_unicode(self, x): def force_unicode(self, x):
if not isinstance(x, unicode): if not isinstance(x, unicode_type):
x = x.decode(preferred_encoding, 'replace') x = x.decode(preferred_encoding, 'replace')
return x return x

View File

@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, errno, cPickle, sys, re import os, errno, cPickle, sys, re
from locale import localeconv from locale import localeconv
from collections import OrderedDict, namedtuple from collections import OrderedDict, namedtuple
from polyglot.builtins import map from polyglot.builtins import map, unicode_type
from threading import Lock from threading import Lock
from calibre import as_unicode, prints from calibre import as_unicode, prints
@ -19,7 +19,7 @@ from calibre.utils.localization import canonicalize_lang
def force_to_bool(val): def force_to_bool(val):
if isinstance(val, (str, unicode)): if isinstance(val, (str, unicode_type)):
try: try:
val = icu_lower(val) val = icu_lower(val)
if not val: if not val:

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import weakref, operator import weakref, operator
from functools import partial from functools import partial
from itertools import izip, imap from itertools import izip, imap
from polyglot.builtins import map from polyglot.builtins import map, unicode_type
from calibre.ebooks.metadata import title_sort from calibre.ebooks.metadata import title_sort
from calibre.utils.config_base import tweaks, prefs from calibre.utils.config_base import tweaks, prefs
@ -374,7 +374,7 @@ class View(object):
self.marked_ids = dict.fromkeys(id_dict, u'true') self.marked_ids = dict.fromkeys(id_dict, u'true')
else: else:
# Ensure that all the items in the dict are text # Ensure that all the items in the dict are text
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode, self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode_type,
id_dict.itervalues()))) id_dict.itervalues())))
# This invalidates all searches in the cache even though the cache may # This invalidates all searches in the cache even though the cache may
# be shared by multiple views. This is not ideal, but... # be shared by multiple views. This is not ideal, but...
@ -432,4 +432,3 @@ class View(object):
self._map_filtered = ids + self._map_filtered self._map_filtered = ids + self._map_filtered
if prefs['mark_new_books']: if prefs['mark_new_books']:
self.toggle_marked_ids(ids) self.toggle_marked_ids(ids)

View File

@ -10,18 +10,15 @@ __docformat__ = 'restructuredtext en'
import re import re
from functools import partial from functools import partial
from datetime import datetime from datetime import datetime
from polyglot.builtins import zip from polyglot.builtins import unicode_type, zip
from calibre.constants import preferred_encoding, ispy3 from calibre.constants import preferred_encoding
from calibre.ebooks.metadata import author_to_author_sort, title_sort from calibre.ebooks.metadata import author_to_author_sort, title_sort
from calibre.utils.date import ( from calibre.utils.date import (
parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined) parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined)
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
from calibre.utils.icu import strcmp from calibre.utils.icu import strcmp
if ispy3:
unicode = str
# Convert data into values suitable for the db {{{ # Convert data into values suitable for the db {{{
@ -32,7 +29,7 @@ def sqlite_datetime(x):
def single_text(x): def single_text(x):
if x is None: if x is None:
return x return x
if not isinstance(x, unicode): if not isinstance(x, unicode_type):
x = x.decode(preferred_encoding, 'replace') x = x.decode(preferred_encoding, 'replace')
x = x.strip() x = x.strip()
return x if x else None return x if x else None
@ -60,7 +57,7 @@ def multiple_text(sep, ui_sep, x):
return () return ()
if isinstance(x, bytes): if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace') x = x.decode(preferred_encoding, 'replace')
if isinstance(x, unicode): if isinstance(x, unicode_type):
x = x.split(sep) x = x.split(sep)
else: else:
x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes) x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
@ -72,7 +69,7 @@ def multiple_text(sep, ui_sep, x):
def adapt_datetime(x): def adapt_datetime(x):
if isinstance(x, (unicode, bytes)): if isinstance(x, (unicode_type, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False) x = parse_date(x, assume_utc=False, as_utc=False)
if x and is_date_undefined(x): if x and is_date_undefined(x):
x = UNDEFINED_DATE x = UNDEFINED_DATE
@ -80,7 +77,7 @@ def adapt_datetime(x):
def adapt_date(x): def adapt_date(x):
if isinstance(x, (unicode, bytes)): if isinstance(x, (unicode_type, bytes)):
x = parse_only_date(x) x = parse_only_date(x)
if x is None or is_date_undefined(x): if x is None or is_date_undefined(x):
x = UNDEFINED_DATE x = UNDEFINED_DATE
@ -90,14 +87,14 @@ def adapt_date(x):
def adapt_number(typ, x): def adapt_number(typ, x):
if x is None: if x is None:
return None return None
if isinstance(x, (unicode, bytes)): if isinstance(x, (unicode_type, bytes)):
if not x or x.lower() == 'none': if not x or x.lower() == 'none':
return None return None
return typ(x) return typ(x)
def adapt_bool(x): def adapt_bool(x):
if isinstance(x, (unicode, bytes)): if isinstance(x, (unicode_type, bytes)):
x = x.lower() x = x.lower()
if x == 'true': if x == 'true':
x = True x = True

View File

@ -14,6 +14,7 @@ import sys
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre.ebooks.metadata import string_to_authors from calibre.ebooks.metadata import string_to_authors
from polyglot.builtins import unicode_type
class JETBOOK(USBMS): class JETBOOK(USBMS):
@ -64,7 +65,7 @@ class JETBOOK(USBMS):
def check_unicode(txt): def check_unicode(txt):
txt = txt.replace('_', ' ') txt = txt.replace('_', ' ')
if not isinstance(txt, unicode): if not isinstance(txt, unicode_type):
return txt.decode(sys.getfilesystemencoding(), 'replace') return txt.decode(sys.getfilesystemencoding(), 'replace')
return txt return txt

View File

@ -15,6 +15,7 @@ from calibre.constants import DEBUG
from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.kindle.bookmark import Bookmark
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre import strftime, fsync, prints from calibre import strftime, fsync, prints
from polyglot.builtins import unicode_type
''' '''
Notes on collections: Notes on collections:
@ -113,7 +114,7 @@ class KINDLE(USBMS):
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path)) match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
if match is not None: if match is not None:
mi.title = match.group('title') mi.title = match.group('title')
if not isinstance(mi.title, unicode): if not isinstance(mi.title, unicode_type):
mi.title = mi.title.decode(sys.getfilesystemencoding(), mi.title = mi.title.decode(sys.getfilesystemencoding(),
'replace') 'replace')
return mi return mi
@ -291,9 +292,9 @@ class KINDLE(USBMS):
hrTag['class'] = 'annotations_divider' hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag) user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify()) mi.comments += unicode_type(user_notes_soup.prettify())
else: else:
mi.comments = unicode(user_notes_soup.prettify()) mi.comments = unicode_type(user_notes_soup.prettify())
# Update library comments # Update library comments
db.set_comment(db_id, mi.comments) db.set_comment(db_id, mi.comments)
@ -547,7 +548,7 @@ class KINDLE2(KINDLE):
cust_col_name = opts.extra_customization[self.OPT_APNX_METHOD_COL] cust_col_name = opts.extra_customization[self.OPT_APNX_METHOD_COL]
if cust_col_name: if cust_col_name:
try: try:
temp = unicode(metadata.get(cust_col_name)).lower() temp = unicode_type(metadata.get(cust_col_name)).lower()
if temp in self.EXTRA_CUSTOMIZATION_CHOICES[self.OPT_APNX_METHOD]: if temp in self.EXTRA_CUSTOMIZATION_CHOICES[self.OPT_APNX_METHOD]:
method = temp method = temp
else: else:

View File

@ -14,6 +14,7 @@ from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.config_base import prefs from calibre.utils.config_base import prefs
from calibre.devices.usbms.driver import debug_print from calibre.devices.usbms.driver import debug_print
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
from polyglot.builtins import unicode_type
class Book(Book_): class Book(Book_):
@ -95,7 +96,7 @@ class Book(Book_):
ans = [u"Kobo metadata:"] ans = [u"Kobo metadata:"]
def fmt(x, y): def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y))) ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
if self.contentID: if self.contentID:
fmt('Content ID', self.contentID) fmt('Content ID', self.contentID)

View File

@ -32,6 +32,7 @@ from calibre import prints, fsync
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.constants import DEBUG from calibre.constants import DEBUG
from calibre.utils.config_base import prefs from calibre.utils.config_base import prefs
from polyglot.builtins import unicode_type
EPUB_EXT = '.epub' EPUB_EXT = '.epub'
KEPUB_EXT = '.kepub' KEPUB_EXT = '.kepub'
@ -43,7 +44,7 @@ def qhash(inputstr):
instr = b"" instr = b""
if isinstance(inputstr, bytes): if isinstance(inputstr, bytes):
instr = inputstr instr = inputstr
elif isinstance(inputstr, unicode): elif isinstance(inputstr, unicode_type):
instr = inputstr.encode("utf8") instr = inputstr.encode("utf8")
else: else:
return -1 return -1
@ -1323,9 +1324,9 @@ class KOBO(USBMS):
hrTag['class'] = 'annotations_divider' hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag) user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify()) mi.comments += unicode_type(user_notes_soup.prettify())
else: else:
mi.comments = unicode(user_notes_soup.prettify()) mi.comments = unicode_type(user_notes_soup.prettify())
# Update library comments # Update library comments
db.set_comment(db_id, mi.comments) db.set_comment(db_id, mi.comments)
@ -1824,7 +1825,7 @@ class KOBOTOUCH(KOBO):
bookshelves.append(row['ShelfName']) bookshelves.append(row['ShelfName'])
cursor.close() cursor.close()
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode(count_bookshelves)) # debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode_type(count_bookshelves))
return bookshelves return bookshelves
self.debug_index = 0 self.debug_index = 0
@ -2394,7 +2395,7 @@ class KOBOTOUCH(KOBO):
if self.manage_collections: if self.manage_collections:
if collections: if collections:
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode(len(collections))) # debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode_type(len(collections)))
# Need to reset the collections outside the particular loops # Need to reset the collections outside the particular loops
# otherwise the last item will not be removed # otherwise the last item will not be removed
@ -2834,7 +2835,7 @@ class KOBOTOUCH(KOBO):
# count_bookshelves = i + 1 # count_bookshelves = i + 1
cursor.close() cursor.close()
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode(count_bookshelves)) # debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode_type(count_bookshelves))
return bookshelves return bookshelves
@ -2918,7 +2919,7 @@ class KOBOTOUCH(KOBO):
cursor.execute(addquery, add_values) cursor.execute(addquery, add_values)
elif result['_IsDeleted'] == 'true': elif result['_IsDeleted'] == 'true':
debug_print("KoboTouch:check_for_bookshelf - Shelf '%s' is deleted - undeleting. result['_IsDeleted']='%s'" % ( debug_print("KoboTouch:check_for_bookshelf - Shelf '%s' is deleted - undeleting. result['_IsDeleted']='%s'" % (
bookshelf_name, unicode(result['_IsDeleted']))) bookshelf_name, unicode_type(result['_IsDeleted'])))
cursor.execute(updatequery, test_values) cursor.execute(updatequery, test_values)
cursor.close() cursor.close()

View File

@ -16,6 +16,7 @@ from calibre.gui2.device_drivers.tabbed_device_config import TabbedDeviceConfig,
from calibre.devices.usbms.driver import debug_print from calibre.devices.usbms.driver import debug_print
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.template_dialog import TemplateDialog from calibre.gui2.dialogs.template_dialog import TemplateDialog
from polyglot.builtins import unicode_type
def wrap_msg(msg): def wrap_msg(msg):
@ -122,7 +123,7 @@ class KOBOTOUCHConfig(TabbedDeviceConfig):
p['support_newer_firmware'] = self.support_newer_firmware p['support_newer_firmware'] = self.support_newer_firmware
p['debugging_title'] = self.debugging_title p['debugging_title'] = self.debugging_title
p['driver_version'] = '.'.join([unicode(i) for i in self.device.version]) p['driver_version'] = '.'.join([unicode_type(i) for i in self.device.version])
return p return p
@ -397,7 +398,7 @@ class AdvancedGroupBox(DeviceOptionsGroupBox):
'to perform full read-write functionality - Here be Dragons!! ' 'to perform full read-write functionality - Here be Dragons!! '
'Enable only if you are comfortable with restoring your kobo ' 'Enable only if you are comfortable with restoring your kobo '
'to factory defaults and testing software. ' 'to factory defaults and testing software. '
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode( 'This driver supports firmware V2.x.x and DBVersion up to ') + unicode_type(
device.supported_dbversion), device.get_pref('support_newer_firmware') device.supported_dbversion), device.get_pref('support_newer_firmware')
) )
@ -555,7 +556,7 @@ class TemplateConfig(QWidget): # {{{
@property @property
def template(self): def template(self):
return unicode(self.t.text()).strip() return unicode_type(self.t.text()).strip()
@template.setter @template.setter
def template(self, template): def template(self, template):
@ -577,7 +578,7 @@ class TemplateConfig(QWidget): # {{{
except Exception as err: except Exception as err:
error_dialog(self, _('Invalid template'), error_dialog(self, _('Invalid template'),
'<p>'+_('The template "%s" is invalid:')%tmpl + '<p>'+_('The template "%s" is invalid:')%tmpl +
'<br>'+unicode(err), show=True) '<br>'+unicode_type(err), show=True)
return False return False
# }}} # }}}

View File

@ -18,6 +18,7 @@ from calibre.devices.mtp.base import debug
from calibre.devices.mtp.defaults import DeviceDefaults from calibre.devices.mtp.defaults import DeviceDefaults
from calibre.ptempfile import SpooledTemporaryFile, PersistentTemporaryDirectory from calibre.ptempfile import SpooledTemporaryFile, PersistentTemporaryDirectory
from calibre.utils.filenames import shorten_components_to from calibre.utils.filenames import shorten_components_to
from polyglot.builtins import unicode_type
BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%( BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%(
'windows' if iswindows else 'unix')).MTP_DEVICE 'windows' if iswindows else 'unix')).MTP_DEVICE
@ -75,7 +76,7 @@ class MTP_DEVICE(BASE):
def is_folder_ignored(self, storage_or_storage_id, path, def is_folder_ignored(self, storage_or_storage_id, path,
ignored_folders=None): ignored_folders=None):
storage_id = unicode(getattr(storage_or_storage_id, 'object_id', storage_id = unicode_type(getattr(storage_or_storage_id, 'object_id',
storage_or_storage_id)) storage_or_storage_id))
lpath = tuple(icu_lower(name) for name in path) lpath = tuple(icu_lower(name) for name in path)
if ignored_folders is None: if ignored_folders is None:
@ -166,14 +167,14 @@ class MTP_DEVICE(BASE):
traceback.print_exc() traceback.print_exc()
dinfo = {} dinfo = {}
if dinfo.get('device_store_uuid', None) is None: if dinfo.get('device_store_uuid', None) is None:
dinfo['device_store_uuid'] = unicode(uuid.uuid4()) dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
if dinfo.get('device_name', None) is None: if dinfo.get('device_name', None) is None:
dinfo['device_name'] = self.current_friendly_name dinfo['device_name'] = self.current_friendly_name
if name is not None: if name is not None:
dinfo['device_name'] = name dinfo['device_name'] = name
dinfo['location_code'] = location_code dinfo['location_code'] = location_code
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None) dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version]) dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
dinfo['date_last_connected'] = isoformat(now()) dinfo['date_last_connected'] = isoformat(now())
dinfo['mtp_prefix'] = storage.storage_prefix dinfo['mtp_prefix'] = storage.storage_prefix
raw = json.dumps(dinfo, default=to_json) raw = json.dumps(dinfo, default=to_json)

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import weakref, sys, json import weakref, sys, json
from collections import deque from collections import deque
from operator import attrgetter from operator import attrgetter
from polyglot.builtins import map from polyglot.builtins import map, unicode_type
from datetime import datetime from datetime import datetime
from calibre import human_readable, prints, force_unicode from calibre import human_readable, prints, force_unicode
@ -74,7 +74,7 @@ class FileOrFolder(object):
def __repr__(self): def __repr__(self):
name = 'Folder' if self.is_folder else 'File' name = 'Folder' if self.is_folder else 'File'
try: try:
path = unicode(self.full_path) path = unicode_type(self.full_path)
except: except:
path = '' path = ''
datum = 'size=%s'%(self.size) datum = 'size=%s'%(self.size)
@ -250,5 +250,3 @@ class FilesystemCache(object):
return self.id_map[object_id] return self.id_map[object_id]
except KeyError: except KeyError:
raise ValueError('No object found with MTP path: %s'%path) raise ValueError('No object found with MTP path: %s'%path)

View File

@ -17,6 +17,7 @@ from calibre.constants import plugins, islinux, isosx, ispy3
from calibre.ptempfile import SpooledTemporaryFile from calibre.ptempfile import SpooledTemporaryFile
from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice, OpenActionNeeded from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice, OpenActionNeeded
from calibre.devices.mtp.base import MTPDeviceBase, synchronous, debug from calibre.devices.mtp.base import MTPDeviceBase, synchronous, debug
from polyglot.builtins import unicode_type
MTPDevice = namedtuple('MTPDevice', 'busnum devnum vendor_id product_id ' MTPDevice = namedtuple('MTPDevice', 'busnum devnum vendor_id product_id '
'bcd serial manufacturer product') 'bcd serial manufacturer product')
@ -321,7 +322,7 @@ class MTP_DEVICE(MTPDeviceBase):
storage.append({'id':sid, 'size':capacity, storage.append({'id':sid, 'size':capacity,
'is_folder':True, 'name':name, 'can_delete':False, 'is_folder':True, 'name':name, 'can_delete':False,
'is_system':True}) 'is_system':True})
self._currently_getting_sid = unicode(sid) self._currently_getting_sid = unicode_type(sid)
items, errs = self.dev.get_filesystem(sid, items, errs = self.dev.get_filesystem(sid,
partial(self._filesystem_callback, {})) partial(self._filesystem_callback, {}))
all_items.extend(items), all_errs.extend(errs) all_items.extend(items), all_errs.extend(errs)
@ -373,7 +374,7 @@ class MTP_DEVICE(MTPDeviceBase):
e = parent.folder_named(name) e = parent.folder_named(name)
if e is not None: if e is not None:
return e return e
ename = name.encode('utf-8') if isinstance(name, unicode) else name ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
sid, pid = parent.storage_id, parent.object_id sid, pid = parent.storage_id, parent.object_id
if pid == sid: if pid == sid:
pid = 0 pid = 0
@ -396,7 +397,7 @@ class MTP_DEVICE(MTPDeviceBase):
raise ValueError('Cannot upload file %s, it already exists'%( raise ValueError('Cannot upload file %s, it already exists'%(
e.full_path,)) e.full_path,))
self.delete_file_or_folder(e) self.delete_file_or_folder(e)
ename = name.encode('utf-8') if isinstance(name, unicode) else name ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
sid, pid = parent.storage_id, parent.object_id sid, pid = parent.storage_id, parent.object_id
if pid == sid: if pid == sid:
pid = 0xFFFFFFFF pid = 0xFFFFFFFF

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import time, threading, traceback import time, threading, traceback
from functools import wraps, partial from functools import wraps, partial
from polyglot.builtins import zip from polyglot.builtins import unicode_type, zip
from itertools import chain from itertools import chain
from calibre import as_unicode, prints, force_unicode from calibre import as_unicode, prints, force_unicode
@ -264,7 +264,7 @@ class MTP_DEVICE(MTPDeviceBase):
break break
storage = {'id':storage_id, 'size':capacity, 'name':name, storage = {'id':storage_id, 'size':capacity, 'name':name,
'is_folder':True, 'can_delete':False, 'is_system':True} 'is_folder':True, 'can_delete':False, 'is_system':True}
self._currently_getting_sid = unicode(storage_id) self._currently_getting_sid = unicode_type(storage_id)
id_map = self.dev.get_filesystem(storage_id, partial( id_map = self.dev.get_filesystem(storage_id, partial(
self._filesystem_callback, {})) self._filesystem_callback, {}))
for x in id_map.itervalues(): for x in id_map.itervalues():
@ -441,5 +441,3 @@ class MTP_DEVICE(MTPDeviceBase):
ans = self.dev.put_file(pid, name, stream, size, callback) ans = self.dev.put_file(pid, name, stream, size, callback)
ans['storage_id'] = sid ans['storage_id'] = sid
return parent.add_child(ans) return parent.add_child(ans)

View File

@ -24,6 +24,7 @@ from calibre.devices.usbms.books import CollectionsBookList
from calibre.devices.usbms.books import BookList from calibre.devices.usbms.books import BookList
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
from calibre.constants import islinux from calibre.constants import islinux
from polyglot.builtins import unicode_type
DBPATH = 'Sony_Reader/database/books.db' DBPATH = 'Sony_Reader/database/books.db'
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg' THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
@ -170,7 +171,7 @@ class PRST1(USBMS):
with closing(sqlite.connect(dbpath)) as connection: with closing(sqlite.connect(dbpath)) as connection:
# Replace undecodable characters in the db instead of erroring out # Replace undecodable characters in the db instead of erroring out
connection.text_factory = lambda x: unicode(x, "utf-8", "replace") connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace")
cursor = connection.cursor() cursor = connection.cursor()
# Query collections # Query collections

View File

@ -38,6 +38,7 @@ from calibre.utils.filenames import ascii_filename as sanitize, shorten_componen
from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
unpublish_zeroconf, get_all_ips) unpublish_zeroconf, get_all_ips)
from calibre.utils.socket_inheritance import set_socket_inherit from calibre.utils.socket_inheritance import set_socket_inherit
from polyglot.builtins import unicode_type
def synchronous(tlockname): def synchronous(tlockname):
@ -397,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if isinstance(a, dict): if isinstance(a, dict):
printable = {} printable = {}
for k,v in a.iteritems(): for k,v in a.iteritems():
if isinstance(v, (str, unicode)) and len(v) > 50: if isinstance(v, (str, unicode_type)) and len(v) > 50:
printable[k] = 'too long' printable[k] = 'too long'
else: else:
printable[k] = v printable[k] = v
@ -418,14 +419,14 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if not isinstance(dinfo, dict): if not isinstance(dinfo, dict):
dinfo = {} dinfo = {}
if dinfo.get('device_store_uuid', None) is None: if dinfo.get('device_store_uuid', None) is None:
dinfo['device_store_uuid'] = unicode(uuid.uuid4()) dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
if dinfo.get('device_name') is None: if dinfo.get('device_name') is None:
dinfo['device_name'] = self.get_gui_name() dinfo['device_name'] = self.get_gui_name()
if name is not None: if name is not None:
dinfo['device_name'] = name dinfo['device_name'] = name
dinfo['location_code'] = location_code dinfo['location_code'] = location_code
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None) dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version]) dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
dinfo['date_last_connected'] = isoformat(now()) dinfo['date_last_connected'] = isoformat(now())
dinfo['prefix'] = self.PREFIX dinfo['prefix'] = self.PREFIX
return dinfo return dinfo
@ -478,7 +479,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
from calibre.library.save_to_disk import get_components from calibre.library.save_to_disk import get_components
from calibre.library.save_to_disk import config from calibre.library.save_to_disk import config
opts = config().parse() opts = config().parse()
if not isinstance(template, unicode): if not isinstance(template, unicode_type):
template = template.decode('utf-8') template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', '')) app_id = str(getattr(mdata, 'application_id', ''))
id_ = mdata.get('id', fname) id_ = mdata.get('id', fname)
@ -726,7 +727,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
from calibre.utils.date import now, parse_date from calibre.utils.date import now, parse_date
try: try:
key = self._make_metadata_cache_key(uuid, ext_or_lpath) key = self._make_metadata_cache_key(uuid, ext_or_lpath)
if isinstance(lastmod, unicode): if isinstance(lastmod, unicode_type):
if lastmod == 'None': if lastmod == 'None':
return None return None
lastmod = parse_date(lastmod) lastmod = parse_date(lastmod)

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
import os, re import os, re
from polyglot.builtins import unicode_type
def node_mountpoint(node): def node_mountpoint(node):
@ -48,7 +49,7 @@ class UDisks(object):
def mount(self, device_node_path): def mount(self, device_node_path):
d = self.device(device_node_path) d = self.device(device_node_path)
try: try:
return unicode(d.FilesystemMount('', return unicode_type(d.FilesystemMount('',
['auth_no_user_interaction', 'rw', 'noexec', 'nosuid', ['auth_no_user_interaction', 'rw', 'noexec', 'nosuid',
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()])) 'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]))
except: except:
@ -131,7 +132,7 @@ class UDisks2(object):
mount_options = ['rw', 'noexec', 'nosuid', mount_options = ['rw', 'noexec', 'nosuid',
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()] 'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]
try: try:
return unicode(d.Mount( return unicode_type(d.Mount(
{ {
'auth.no_user_interaction':True, 'auth.no_user_interaction':True,
'options':','.join(mount_options) 'options':','.join(mount_options)

View File

@ -5,6 +5,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.utils.config_base import Config, ConfigProxy from calibre.utils.config_base import Config, ConfigProxy
from polyglot.builtins import unicode_type
class DeviceConfig(object): class DeviceConfig(object):
@ -107,15 +108,15 @@ class DeviceConfig(object):
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'): if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
ec.append(config_widget.opt_extra_customization[i].isChecked()) ec.append(config_widget.opt_extra_customization[i].isChecked())
elif hasattr(config_widget.opt_extra_customization[i], 'currentText'): elif hasattr(config_widget.opt_extra_customization[i], 'currentText'):
ec.append(unicode(config_widget.opt_extra_customization[i].currentText()).strip()) ec.append(unicode_type(config_widget.opt_extra_customization[i].currentText()).strip())
else: else:
ec.append(unicode(config_widget.opt_extra_customization[i].text()).strip()) ec.append(unicode_type(config_widget.opt_extra_customization[i].text()).strip())
else: else:
ec = unicode(config_widget.opt_extra_customization.text()).strip() ec = unicode_type(config_widget.opt_extra_customization.text()).strip()
if not ec: if not ec:
ec = None ec = None
proxy['extra_customization'] = ec proxy['extra_customization'] = ec
st = unicode(config_widget.opt_save_template.text()) st = unicode_type(config_widget.opt_save_template.text())
proxy['save_template'] = st proxy['save_template'] = st
@classmethod @classmethod

View File

@ -20,6 +20,7 @@ from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book from calibre.devices.usbms.books import BookList, Book
from calibre.ebooks.metadata.book.json_codec import JsonCodec from calibre.ebooks.metadata.book.json_codec import JsonCodec
from polyglot.builtins import unicode_type
BASE_TIME = None BASE_TIME = None
@ -105,14 +106,14 @@ class USBMS(CLI, Device):
if not isinstance(dinfo, dict): if not isinstance(dinfo, dict):
dinfo = {} dinfo = {}
if dinfo.get('device_store_uuid', None) is None: if dinfo.get('device_store_uuid', None) is None:
dinfo['device_store_uuid'] = unicode(uuid.uuid4()) dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
if dinfo.get('device_name', None) is None: if dinfo.get('device_name', None) is None:
dinfo['device_name'] = self.get_gui_name() dinfo['device_name'] = self.get_gui_name()
if name is not None: if name is not None:
dinfo['device_name'] = name dinfo['device_name'] = name
dinfo['location_code'] = location_code dinfo['location_code'] = location_code
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None) dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version]) dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
dinfo['date_last_connected'] = isoformat(now()) dinfo['date_last_connected'] = isoformat(now())
dinfo['prefix'] = prefix.replace('\\', '/') dinfo['prefix'] = prefix.replace('\\', '/')
return dinfo return dinfo

View File

@ -11,6 +11,7 @@ import os, time, re
from functools import partial from functools import partial
from calibre.devices.errors import DeviceError, WrongDestinationError, FreeSpaceError from calibre.devices.errors import DeviceError, WrongDestinationError, FreeSpaceError
from polyglot.builtins import unicode_type
def sanity_check(on_card, files, card_prefixes, free_space): def sanity_check(on_card, files, card_prefixes, free_space):
@ -97,7 +98,7 @@ def create_upload_path(mdata, fname, template, sanitize,
ext = path_type.splitext(fname)[1] ext = path_type.splitext(fname)[1]
opts = config().parse() opts = config().parse()
if not isinstance(template, unicode): if not isinstance(template, unicode_type):
template = template.decode('utf-8') template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', '')) app_id = str(getattr(mdata, 'application_id', ''))
id_ = mdata.get('id', fname) id_ = mdata.get('id', fname)

View File

@ -9,6 +9,7 @@ from various formats.
import traceback, os, re import traceback, os, re
from calibre import CurrentDir, prints from calibre import CurrentDir, prints
from polyglot.builtins import unicode_type
class ConversionError(Exception): class ConversionError(Exception):
@ -113,7 +114,7 @@ def extract_calibre_cover(raw, base, log):
if matches is None: if matches is None:
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
text = u''.join(map(unicode, body.findAll(text=True))) text = u''.join(map(unicode_type, body.findAll(text=True)))
if text.strip(): if text.strip():
# Body has text, abort # Body has text, abort
return return
@ -210,7 +211,7 @@ def check_ebook_format(stream, current_guess):
def normalize(x): def normalize(x):
if isinstance(x, unicode): if isinstance(x, unicode_type):
import unicodedata import unicodedata
x = unicodedata.normalize('NFC', x) x = unicodedata.normalize('NFC', x)
return x return x

View File

@ -8,6 +8,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, codecs import re, codecs
from polyglot.builtins import unicode_type
ENCODING_PATS = [ ENCODING_PATS = [
# XML declaration # XML declaration
@ -92,7 +93,7 @@ def force_encoding(raw, verbose, assume_utf8=False):
def detect_xml_encoding(raw, verbose=False, assume_utf8=False): def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
if not raw or isinstance(raw, unicode): if not raw or isinstance(raw, unicode_type):
return raw, None return raw, None
for x in ('utf8', 'utf-16-le', 'utf-16-be'): for x in ('utf8', 'utf-16-le', 'utf-16-be'):
bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace( bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace(
@ -135,7 +136,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
return '', None return '', None
raw, encoding = detect_xml_encoding(raw, verbose=verbose, raw, encoding = detect_xml_encoding(raw, verbose=verbose,
assume_utf8=assume_utf8) assume_utf8=assume_utf8)
if not isinstance(raw, unicode): if not isinstance(raw, unicode_type):
raw = raw.decode(encoding, 'replace') raw = raw.decode(encoding, 'replace')
if strip_encoding_pats: if strip_encoding_pats:

View File

@ -14,6 +14,7 @@ from calibre.utils.chm.chm import CHMFile
from calibre.constants import plugins from calibre.constants import plugins
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
chmlib, chmlib_err = plugins['chmlib'] chmlib, chmlib_err = plugins['chmlib']
@ -48,7 +49,7 @@ class CHMReader(CHMFile):
def __init__(self, input, log, input_encoding=None): def __init__(self, input, log, input_encoding=None):
CHMFile.__init__(self) CHMFile.__init__(self)
if isinstance(input, unicode): if isinstance(input, unicode_type):
input = input.encode(filesystem_encoding) input = input.encode(filesystem_encoding)
if not self.LoadCHM(input): if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,)) raise CHMError("Unable to open CHM file '%s'"%(input,))
@ -113,7 +114,7 @@ class CHMReader(CHMFile):
enc = 'cp1252' enc = 'cp1252'
for path in self.Contents(): for path in self.Contents():
fpath = path fpath = path
if not isinstance(path, unicode): if not isinstance(path, unicode_type):
fpath = path.decode(enc) fpath = path.decode(enc)
lpath = os.path.join(output_dir, fpath) lpath = os.path.join(output_dir, fpath)
self._ensure_dir(lpath) self._ensure_dir(lpath)
@ -146,7 +147,7 @@ class CHMReader(CHMFile):
with open(lpath, 'r+b') as f: with open(lpath, 'r+b') as f:
data = f.read() data = f.read()
data = self._reformat(data, lpath) data = self._reformat(data, lpath)
if isinstance(data, unicode): if isinstance(data, unicode_type):
data = data.encode('utf-8') data = data.encode('utf-8')
f.seek(0) f.seek(0)
f.truncate() f.truncate()

View File

@ -16,6 +16,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.icu import numeric_sort_key from calibre.utils.icu import numeric_sort_key
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob from calibre.utils.ipc.job import ParallelJob
from polyglot.builtins import unicode_type
# If the specified screen has either dimension larger than this value, no image # If the specified screen has either dimension larger than this value, no image
# rescaling is done (we assume that it is a tablet output profile) # rescaling is done (we assume that it is a tablet output profile)
@ -27,7 +28,7 @@ def extract_comic(path_to_comic_file):
Un-archive the comic file. Un-archive the comic file.
''' '''
tdir = PersistentTemporaryDirectory(suffix='_comic_extract') tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
if not isinstance(tdir, unicode): if not isinstance(tdir, unicode_type):
# Needed in case the zip file has wrongly encoded unicode file/dir # Needed in case the zip file has wrongly encoded unicode file/dir
# names # names
tdir = tdir.decode(filesystem_encoding) tdir = tdir.decode(filesystem_encoding)
@ -273,6 +274,3 @@ def process_pages(pages, opts, update, tdir):
ans += pages ans += pages
failures += failures_ failures += failures_
return ans, failures return ans, failures

View File

@ -13,6 +13,7 @@ from calibre.utils.lock import ExclusiveFile
from calibre import sanitize_file_name from calibre import sanitize_file_name
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import available_output_formats from calibre.customize.ui import available_output_formats
from polyglot.builtins import unicode_type
config_dir = os.path.join(config_dir, 'conversion') config_dir = os.path.join(config_dir, 'conversion')
@ -85,7 +86,7 @@ class GuiRecommendations(dict):
def serialize(self): def serialize(self):
ans = json.dumps(self, indent=2, ensure_ascii=False) ans = json.dumps(self, indent=2, ensure_ascii=False)
if isinstance(ans, unicode): if isinstance(ans, unicode_type):
ans = ans.encode('utf-8') ans = ans.encode('utf-8')
return b'json:' + ans return b'json:' + ans

View File

@ -8,6 +8,7 @@ import os
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.constants import filesystem_encoding from calibre.constants import filesystem_encoding
from polyglot.builtins import unicode_type
class CHMInput(InputFormatPlugin): class CHMInput(InputFormatPlugin):
@ -34,7 +35,7 @@ class CHMInput(InputFormatPlugin):
log.debug('Processing CHM...') log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir: with TemporaryDirectory('_chm2oeb') as tdir:
if not isinstance(tdir, unicode): if not isinstance(tdir, unicode_type):
tdir = tdir.decode(filesystem_encoding) tdir = tdir.decode(filesystem_encoding)
html_input = plugin_for_input_format('html') html_input = plugin_for_input_format('html')
for opt in html_input.options: for opt in html_input.options:
@ -125,7 +126,7 @@ class CHMInput(InputFormatPlugin):
base = os.path.dirname(os.path.abspath(htmlpath)) base = os.path.dirname(os.path.abspath(htmlpath))
def unquote(x): def unquote(x):
if isinstance(x, unicode): if isinstance(x, unicode_type):
x = x.encode('utf-8') x = x.encode('utf-8')
return _unquote(x).decode('utf-8') return _unquote(x).decode('utf-8')

View File

@ -7,6 +7,7 @@ import os, re, posixpath
from itertools import cycle from itertools import cycle
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from polyglot.builtins import unicode_type
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC' ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding' IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
@ -367,7 +368,7 @@ class EPUBInput(InputFormatPlugin):
def add_from_li(li, parent): def add_from_li(li, parent):
href = text = None href = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')): for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
href = x.get('href') href = x.get('href')
if href: if href:
if href.startswith('#'): if href.startswith('#'):

View File

@ -13,6 +13,7 @@ from calibre.customize.conversion import (OutputFormatPlugin,
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir from calibre import CurrentDir
from calibre.constants import filesystem_encoding from calibre.constants import filesystem_encoding
from polyglot.builtins import unicode_type
block_level_tags = ( block_level_tags = (
'address', 'address',
@ -225,8 +226,8 @@ class EPUBOutput(OutputFormatPlugin):
identifiers = oeb.metadata['identifier'] identifiers = oeb.metadata['identifier']
uuid = None uuid = None
for x in identifiers: for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
uuid = unicode(x).split(':')[-1] uuid = unicode_type(x).split(':')[-1]
break break
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
@ -241,7 +242,7 @@ class EPUBOutput(OutputFormatPlugin):
# for some absurd reason, or it will throw a hissy fit and refuse # for some absurd reason, or it will throw a hissy fit and refuse
# to use the obfuscated fonts. # to use the obfuscated fonts.
for x in identifiers: for x in identifiers:
if unicode(x) == uuid: if unicode_type(x) == uuid:
x.content = 'urn:uuid:'+uuid x.content = 'urn:uuid:'+uuid
with TemporaryDirectory(u'_epub_output') as tdir: with TemporaryDirectory(u'_epub_output') as tdir:
@ -325,7 +326,7 @@ class EPUBOutput(OutputFormatPlugin):
fonts = [] fonts = []
for uri in list(uris.keys()): for uri in list(uris.keys()):
path = uris[uri] path = uris[uri]
if isinstance(path, unicode): if isinstance(path, unicode_type):
path = path.encode(filesystem_encoding) path = path.encode(filesystem_encoding)
if not os.path.exists(path): if not os.path.exists(path):
uris.pop(uri) uris.pop(uri)
@ -339,7 +340,7 @@ class EPUBOutput(OutputFormatPlugin):
f.write(chr(ord(data[i]) ^ key[i%16])) f.write(chr(ord(data[i]) ^ key[i%16]))
else: else:
self.log.warn('Font', path, 'is invalid, ignoring') self.log.warn('Font', path, 'is invalid, ignoring')
if not isinstance(uri, unicode): if not isinstance(uri, unicode_type):
uri = uri.decode('utf-8') uri = uri.decode('utf-8')
fonts.append(u''' fonts.append(u'''
<enc:EncryptedData> <enc:EncryptedData>

View File

@ -8,6 +8,7 @@ import os, re
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type from calibre import guess_type
from polyglot.builtins import unicode_type
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0' FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1' FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
@ -70,7 +71,7 @@ class FB2Input(InputFormatPlugin):
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
css = '' css = ''
for s in stylesheets: for s in stylesheets:
css += etree.tostring(s, encoding=unicode, method='text', css += etree.tostring(s, encoding=unicode_type, method='text',
with_tail=False) + '\n\n' with_tail=False) + '\n\n'
if css: if css:
import css_parser, logging import css_parser, logging
@ -82,7 +83,7 @@ class FB2Input(InputFormatPlugin):
log.debug('Parsing stylesheet...') log.debug('Parsing stylesheet...')
stylesheet = parser.parseString(text) stylesheet = parser.parseString(text)
stylesheet.namespaces['h'] = XHTML_NS stylesheet.namespaces['h'] = XHTML_NS
css = unicode(stylesheet.cssText).replace('h|style', 'h|span') css = unicode_type(stylesheet.cssText).replace('h|style', 'h|span')
css = re.sub(r'name\s*=\s*', 'class=', css) css = re.sub(r'name\s*=\s*', 'class=', css)
self.extract_embedded_content(doc) self.extract_embedded_content(doc)
log.debug('Converting XML to HTML...') log.debug('Converting XML to HTML...')

View File

@ -17,6 +17,7 @@ from calibre.customize.conversion import (InputFormatPlugin,
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from polyglot.builtins import unicode_type
def sanitize_file_name(x): def sanitize_file_name(x):
@ -225,7 +226,7 @@ class HTMLInput(InputFormatPlugin):
def link_to_local_path(self, link_, base=None): def link_to_local_path(self, link_, base=None):
from calibre.ebooks.html.input import Link from calibre.ebooks.html.input import Link
if not isinstance(link_, unicode): if not isinstance(link_, unicode_type):
try: try:
link_ = link_.decode('utf-8', 'error') link_ = link_.decode('utf-8', 'error')
except: except:
@ -289,7 +290,7 @@ class HTMLInput(InputFormatPlugin):
# bhref refers to an already existing file. The read() method of # bhref refers to an already existing file. The read() method of
# DirContainer will call unquote on it before trying to read the # DirContainer will call unquote on it before trying to read the
# file, therefore we quote it here. # file, therefore we quote it here.
if isinstance(bhref, unicode): if isinstance(bhref, unicode_type):
bhref = bhref.encode('utf-8') bhref = bhref.encode('utf-8')
item.html_input_href = quote(bhref).decode('utf-8') item.html_input_href = quote(bhref).decode('utf-8')
if guessed in self.OEB_STYLES: if guessed in self.OEB_STYLES:

View File

@ -9,6 +9,7 @@ from os.path import dirname, abspath, relpath as _relpath, exists, basename
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre import CurrentDir from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from polyglot.builtins import unicode_type
def relpath(*args): def relpath(*args):
@ -135,7 +136,7 @@ class HTMLOutput(OutputFormatPlugin):
toc=html_toc, meta=meta, nextLink=nextLink, toc=html_toc, meta=meta, nextLink=nextLink,
tocUrl=tocUrl, cssLink=cssLink, tocUrl=tocUrl, cssLink=cssLink,
firstContentPageLink=nextLink) firstContentPageLink=nextLink)
if isinstance(t, unicode): if isinstance(t, unicode_type):
t = t.encode('utf-8') t = t.encode('utf-8')
f.write(t) f.write(t)

View File

@ -13,6 +13,7 @@ from cStringIO import StringIO
from calibre.customize.conversion import OutputFormatPlugin, \ from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation OptionRecommendation
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from polyglot.builtins import unicode_type
class HTMLZOutput(OutputFormatPlugin): class HTMLZOutput(OutputFormatPlugin):
@ -81,9 +82,9 @@ class HTMLZOutput(OutputFormatPlugin):
fname = u'index' fname = u'index'
if opts.htmlz_title_filename: if opts.htmlz_title_filename:
from calibre.utils.filenames import shorten_components_to from calibre.utils.filenames import shorten_components_to
fname = shorten_components_to(100, (ascii_filename(unicode(oeb_book.metadata.title[0])),))[0] fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf: with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
if isinstance(html, unicode): if isinstance(html, unicode_type):
html = html.encode('utf-8') html = html.encode('utf-8')
tf.write(html) tf.write(html)
@ -100,7 +101,7 @@ class HTMLZOutput(OutputFormatPlugin):
for item in oeb_book.manifest: for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images: if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME: if item.media_type == SVG_MIME:
data = unicode(etree.tostring(item.data, encoding=unicode)) data = unicode_type(etree.tostring(item.data, encoding=unicode_type))
else: else:
data = item.data data = item.data
fname = os.path.join(tdir, u'images', images[item.href]) fname = os.path.join(tdir, u'images', images[item.href])

View File

@ -10,6 +10,7 @@ import sys, os
from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from polyglot.builtins import unicode_type
class LRFOptions(object): class LRFOptions(object):
@ -17,7 +18,7 @@ class LRFOptions(object):
def __init__(self, output, opts, oeb): def __init__(self, output, opts, oeb):
def f2s(f): def f2s(f):
try: try:
return unicode(f[0]) return unicode_type(f[0])
except: except:
return '' return ''
m = oeb.metadata m = oeb.metadata
@ -31,13 +32,13 @@ class LRFOptions(object):
self.title_sort = self.author_sort = '' self.title_sort = self.author_sort = ''
for x in m.creator: for x in m.creator:
if x.role == 'aut': if x.role == 'aut':
self.author = unicode(x) self.author = unicode_type(x)
fa = unicode(getattr(x, 'file_as', '')) fa = unicode_type(getattr(x, 'file_as', ''))
if fa: if fa:
self.author_sort = fa self.author_sort = fa
for x in m.title: for x in m.title:
if unicode(x.file_as): if unicode_type(x.file_as):
self.title_sort = unicode(x.file_as) self.title_sort = unicode_type(x.file_as)
self.freetext = f2s(m.description) self.freetext = f2s(m.description)
self.category = f2s(m.subject) self.category = f2s(m.subject)
self.cover = None self.cover = None

View File

@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
import os import os
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from polyglot.builtins import unicode_type
class MOBIInput(InputFormatPlugin): class MOBIInput(InputFormatPlugin):
@ -49,7 +50,7 @@ class MOBIInput(InputFormatPlugin):
raw = parse_cache.pop('calibre_raw_mobi_markup', False) raw = parse_cache.pop('calibre_raw_mobi_markup', False)
if raw: if raw:
if isinstance(raw, unicode): if isinstance(raw, unicode_type):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
open(u'debug-raw.html', 'wb').write(raw) open(u'debug-raw.html', 'wb').write(raw)
from calibre.ebooks.oeb.base import close_self_closing_tags from calibre.ebooks.oeb.base import close_self_closing_tags

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
from calibre.customize.conversion import (OutputFormatPlugin, from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from polyglot.builtins import unicode_type
def remove_html_cover(oeb, log): def remove_html_cover(oeb, log):
@ -121,7 +122,7 @@ class MOBIOutput(OutputFormatPlugin):
if not found: if not found:
from calibre.ebooks import generate_masthead from calibre.ebooks import generate_masthead
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...') self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
raw = generate_masthead(unicode(self.oeb.metadata['title'][0])) raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
id, href = self.oeb.manifest.generate('masthead', 'masthead') id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href) self.oeb.guide.add('masthead', 'Masthead Image', href)
@ -165,7 +166,7 @@ class MOBIOutput(OutputFormatPlugin):
sec.nodes.remove(a) sec.nodes.remove(a)
root = TOC(klass='periodical', href=self.oeb.spine[0].href, root = TOC(klass='periodical', href=self.oeb.spine[0].href,
title=unicode(self.oeb.metadata.title[0])) title=unicode_type(self.oeb.metadata.title[0]))
for s in sections: for s in sections:
if articles[id(s)]: if articles[id(s)]:

View File

@ -14,6 +14,7 @@ from calibre.constants import iswindows
from calibre.customize.conversion import (OutputFormatPlugin, from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from polyglot.builtins import unicode_type
UNITS = ['millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot', UNITS = ['millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
'cicero', 'devicepixel'] 'cicero', 'devicepixel']
@ -202,8 +203,8 @@ class PDFOutput(OutputFormatPlugin):
def get_cover_data(self): def get_cover_data(self):
oeb = self.oeb oeb = self.oeb
if (oeb.metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): if (oeb.metadata.cover and unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode(oeb.metadata.cover[0]) cover_id = unicode_type(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id] item = oeb.manifest.ids[cover_id]
self.cover_data = item.data self.cover_data = item.data

View File

@ -9,6 +9,7 @@ import os, cStringIO
from calibre.customize.conversion import (OutputFormatPlugin, from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from polyglot.builtins import unicode_type
class PMLOutput(OutputFormatPlugin): class PMLOutput(OutputFormatPlugin):
@ -40,7 +41,7 @@ class PMLOutput(OutputFormatPlugin):
with TemporaryDirectory('_pmlz_output') as tdir: with TemporaryDirectory('_pmlz_output') as tdir:
pmlmlizer = PMLMLizer(log) pmlmlizer = PMLMLizer(log)
pml = unicode(pmlmlizer.extract_content(oeb_book, opts)) pml = unicode_type(pmlmlizer.extract_content(oeb_book, opts))
with open(os.path.join(tdir, 'index.pml'), 'wb') as out: with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
out.write(pml.encode(opts.pml_output_encoding, 'replace')) out.write(pml.encode(opts.pml_output_encoding, 'replace'))

View File

@ -11,6 +11,7 @@ import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.constants import numeric_version from calibre.constants import numeric_version
from calibre import walk from calibre import walk
from polyglot.builtins import unicode_type
class RecipeDisabled(Exception): class RecipeDisabled(Exception):
@ -161,6 +162,6 @@ class RecipeInput(InputFormatPlugin):
def save_download(self, zf): def save_download(self, zf):
raw = self.recipe_source raw = self.recipe_source
if isinstance(raw, unicode): if isinstance(raw, unicode_type):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
zf.writestr('download.recipe', raw) zf.writestr('download.recipe', raw)

View File

@ -9,6 +9,7 @@ import os, string
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
from polyglot.builtins import unicode_type
class SNBOutput(OutputFormatPlugin): class SNBOutput(OutputFormatPlugin):
@ -73,20 +74,20 @@ class SNBOutput(OutputFormatPlugin):
# Process Meta data # Process Meta data
meta = oeb_book.metadata meta = oeb_book.metadata
if meta.title: if meta.title:
title = unicode(meta.title[0]) title = unicode_type(meta.title[0])
else: else:
title = '' title = ''
authors = [unicode(x) for x in meta.creator if x.role == 'aut'] authors = [unicode_type(x) for x in meta.creator if x.role == 'aut']
if meta.publisher: if meta.publisher:
publishers = unicode(meta.publisher[0]) publishers = unicode_type(meta.publisher[0])
else: else:
publishers = '' publishers = ''
if meta.language: if meta.language:
lang = unicode(meta.language[0]).upper() lang = unicode_type(meta.language[0]).upper()
else: else:
lang = '' lang = ''
if meta.description: if meta.description:
abstract = unicode(meta.description[0]) abstract = unicode_type(meta.description[0])
else: else:
abstract = '' abstract = ''

View File

@ -18,6 +18,7 @@ from calibre.utils.zipfile import ZipFile
from calibre import (extract, walk, isbytestring, filesystem_encoding, from calibre import (extract, walk, isbytestring, filesystem_encoding,
get_types_map) get_types_map)
from calibre.constants import __version__ from calibre.constants import __version__
from polyglot.builtins import unicode_type
DEBUG_README=u''' DEBUG_README=u'''
This debug directory contains snapshots of the e-book as it passes through the This debug directory contains snapshots of the e-book as it passes through the
@ -794,7 +795,7 @@ OptionRecommendation(name='search_replace',
def unarchive(self, path, tdir): def unarchive(self, path, tdir):
extract(path, tdir) extract(path, tdir)
files = list(walk(tdir)) files = list(walk(tdir))
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding) files = [f if isinstance(f, unicode_type) else f.decode(filesystem_encoding)
for f in files] for f in files]
from calibre.customize.ui import available_input_formats from calibre.customize.ui import available_input_formats
fmts = set(available_input_formats()) fmts = set(available_input_formats())
@ -915,7 +916,7 @@ OptionRecommendation(name='search_replace',
try: try:
val = parse_date(val, assume_utc=x=='timestamp') val = parse_date(val, assume_utc=x=='timestamp')
except: except:
self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val)) self.log.exception(_('Failed to parse date/time') + ' ' + unicode_type(val))
continue continue
setattr(mi, x, val) setattr(mi, x, val)

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import functools, re, json import functools, re, json
from calibre import entity_to_unicode, as_unicode from calibre import entity_to_unicode, as_unicode
from polyglot.builtins import unicode_type
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>') XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
SVG_NS = 'http://www.w3.org/2000/svg' SVG_NS = 'http://www.w3.org/2000/svg'
@ -218,8 +219,8 @@ class Dehyphenator(object):
wraptags = match.group('wraptags') wraptags = match.group('wraptags')
except: except:
wraptags = '' wraptags = ''
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf) hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf)
dehyphenated = unicode(firsthalf) + unicode(secondhalf) dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf)
if self.suffixes.match(secondhalf) is None: if self.suffixes.match(secondhalf) is None:
lookupword = self.removesuffixes.sub('', dehyphenated) lookupword = self.removesuffixes.sub('', dehyphenated)
else: else:
@ -315,7 +316,7 @@ class CSSPreProcessor(object):
# are commented lines before the first @import or @charset rule. Since # are commented lines before the first @import or @charset rule. Since
# the conversion will remove all stylesheets anyway, we don't lose # the conversion will remove all stylesheets anyway, we don't lose
# anything # anything
data = re.sub(unicode(r'/\*.*?\*/'), u'', data, flags=re.DOTALL) data = re.sub(unicode_type(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
ans, namespaced = [], False ans, namespaced = [], False
for line in data.splitlines(): for line in data.splitlines():

View File

@ -10,6 +10,7 @@ from math import ceil
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.wordcount import get_wordcount_obj from calibre.utils.wordcount import get_wordcount_obj
from polyglot.builtins import unicode_type
class HeuristicProcessor(object): class HeuristicProcessor(object):
@ -50,8 +51,8 @@ class HeuristicProcessor(object):
title = match.group('title') title = match.group('title')
if not title: if not title:
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) + self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
" chapters. - " + unicode(chap)) " chapters. - " + unicode_type(chap))
return '<h2>'+chap+'</h2>\n' return '<h2>'+chap+'</h2>\n'
else: else:
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$') delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
@ -59,16 +60,16 @@ class HeuristicProcessor(object):
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap))) txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title))) txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) + self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
" chapters & titles. - " + unicode(chap) + ", " + unicode(title)) " chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title))
return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n' return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
def chapter_break(self, match): def chapter_break(self, match):
chap = match.group('section') chap = match.group('section')
styles = match.group('styles') styles = match.group('styles')
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) + self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
" section markers based on punctuation. - " + unicode(chap)) " section markers based on punctuation. - " + unicode_type(chap))
return '<'+styles+' style="page-break-before:always">'+chap return '<'+styles+' style="page-break-before:always">'+chap
def analyze_title_matches(self, match): def analyze_title_matches(self, match):
@ -111,8 +112,8 @@ class HeuristicProcessor(object):
line_end = line_end_ere.findall(raw) line_end = line_end_ere.findall(raw)
tot_htm_ends = len(htm_end) tot_htm_ends = len(htm_end)
tot_ln_fds = len(line_end) tot_ln_fds = len(line_end)
# self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " + # self.log.debug("There are " + unicode_type(tot_ln_fds) + " total Line feeds, and " +
# unicode(tot_htm_ends) + " marked up endings") # unicode_type(tot_htm_ends) + " marked up endings")
if percent > 1: if percent > 1:
percent = 1 percent = 1
@ -120,7 +121,7 @@ class HeuristicProcessor(object):
percent = 0 percent = 0
min_lns = tot_ln_fds * percent min_lns = tot_ln_fds * percent
# self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup") # self.log.debug("There must be fewer than " + unicode_type(min_lns) + " unmarked lines to add markup")
return min_lns > tot_htm_ends return min_lns > tot_htm_ends
def dump(self, raw, where): def dump(self, raw, where):
@ -157,17 +158,17 @@ class HeuristicProcessor(object):
] ]
ITALICIZE_STYLE_PATS = [ ITALICIZE_STYLE_PATS = [
unicode(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'), unicode_type(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'),
unicode(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'), unicode_type(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'),
unicode(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'), unicode_type(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'),
unicode(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'), unicode_type(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'),
unicode(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'), unicode_type(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'),
unicode(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'), unicode_type(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'),
unicode(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'), unicode_type(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'),
unicode(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'), unicode_type(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'),
unicode(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'), unicode_type(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'),
unicode(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'), unicode_type(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'),
unicode(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'), unicode_type(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'),
] ]
for word in ITALICIZE_WORDS: for word in ITALICIZE_WORDS:
@ -177,10 +178,10 @@ class HeuristicProcessor(object):
search_text = re.sub(r'<[^>]*>', '', search_text) search_text = re.sub(r'<[^>]*>', '', search_text)
for pat in ITALICIZE_STYLE_PATS: for pat in ITALICIZE_STYLE_PATS:
for match in re.finditer(pat, search_text): for match in re.finditer(pat, search_text):
ital_string = unicode(match.group('words')) ital_string = unicode_type(match.group('words'))
# self.log.debug("italicising "+unicode(match.group(0))+" with <i>"+ital_string+"</i>") # self.log.debug("italicising "+unicode_type(match.group(0))+" with <i>"+ital_string+"</i>")
try: try:
html = re.sub(re.escape(unicode(match.group(0))), '<i>%s</i>' % ital_string, html) html = re.sub(re.escape(unicode_type(match.group(0))), '<i>%s</i>' % ital_string, html)
except OverflowError: except OverflowError:
# match.group(0) was too large to be compiled into a regex # match.group(0) was too large to be compiled into a regex
continue continue
@ -205,10 +206,10 @@ class HeuristicProcessor(object):
if wordcount > 200000: if wordcount > 200000:
typical_chapters = 15000. typical_chapters = 15000.
self.min_chapters = int(ceil(wordcount / typical_chapters)) self.min_chapters = int(ceil(wordcount / typical_chapters))
self.log.debug("minimum chapters required are: "+unicode(self.min_chapters)) self.log.debug("minimum chapters required are: "+unicode_type(self.min_chapters))
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html)) self.html_preprocess_sections = len(heading.findall(html))
self.log.debug("found " + unicode(self.html_preprocess_sections) + " pre-existing headings") self.log.debug("found " + unicode_type(self.html_preprocess_sections) + " pre-existing headings")
# Build the Regular Expressions in pieces # Build the Regular Expressions in pieces
init_lookahead = "(?=<(p|div))" init_lookahead = "(?=<(p|div))"
@ -295,7 +296,7 @@ class HeuristicProcessor(object):
if n_lookahead_req: if n_lookahead_req:
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
if not analyze: if not analyze:
self.log.debug("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message) self.log.debug("Marked " + unicode_type(self.html_preprocess_sections) + " headings, " + log_message)
chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \ chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \
lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
@ -308,9 +309,9 @@ class HeuristicProcessor(object):
if float(self.chapters_with_title) / float(hits) > .5: if float(self.chapters_with_title) / float(hits) > .5:
title_req = True title_req = True
strict_title = False strict_title = False
self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+ self.log.debug(unicode_type(type_name)+" had "+unicode_type(hits)+" hits - "+unicode_type(self.chapters_no_title)+" chapters with no title, "+
unicode(self.chapters_with_title)+" chapters with titles, "+ unicode_type(self.chapters_with_title)+" chapters with titles, "+
unicode(float(self.chapters_with_title) / float(hits))+" percent. ") unicode_type(float(self.chapters_with_title) / float(hits))+" percent. ")
if type_name == 'common': if type_name == 'common':
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits: elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
@ -327,8 +328,8 @@ class HeuristicProcessor(object):
words_per_chptr = wordcount words_per_chptr = wordcount
if words_per_chptr > 0 and self.html_preprocess_sections > 0: if words_per_chptr > 0 and self.html_preprocess_sections > 0:
words_per_chptr = wordcount / self.html_preprocess_sections words_per_chptr = wordcount / self.html_preprocess_sections
self.log.debug("Total wordcount is: "+ unicode(wordcount)+", Average words per section is: "+ self.log.debug("Total wordcount is: "+ unicode_type(wordcount)+", Average words per section is: "+
unicode(words_per_chptr)+", Marked up "+unicode(self.html_preprocess_sections)+" chapters") unicode_type(words_per_chptr)+", Marked up "+unicode_type(self.html_preprocess_sections)+" chapters")
return html return html
def punctuation_unwrap(self, length, content, format): def punctuation_unwrap(self, length, content, format):
@ -358,8 +359,8 @@ class HeuristicProcessor(object):
# define the pieces of the regex # define the pieces of the regex
# (?<!\&\w{4});) is a semicolon not part of an entity # (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))" lookahead = "(?<=.{"+unicode_type(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])" em_en_lookahead = "(?<=.{"+unicode_type(length)+u"}[\u2013\u2014])"
soft_hyphen = u"\xad" soft_hyphen = u"\xad"
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?" line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*" blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
@ -419,18 +420,18 @@ class HeuristicProcessor(object):
return html return html
def fix_nbsp_indents(self, html): def fix_nbsp_indents(self, html):
txtindent = re.compile(unicode(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE) txtindent = re.compile(unicode_type(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
html = txtindent.sub(self.insert_indent, html) html = txtindent.sub(self.insert_indent, html)
if self.found_indents > 1: if self.found_indents > 1:
self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles") self.log.debug("replaced "+unicode_type(self.found_indents)+ " nbsp indents with inline styles")
return html return html
def cleanup_markup(self, html): def cleanup_markup(self, html):
# remove remaining non-breaking spaces # remove remaining non-breaking spaces
html = re.sub(unicode(r'\u00a0'), ' ', html) html = re.sub(unicode_type(r'\u00a0'), ' ', html)
# Get rid of various common microsoft specific tags which can cause issues later # Get rid of various common microsoft specific tags which can cause issues later
# Get rid of empty <o:p> tags to simplify other processing # Get rid of empty <o:p> tags to simplify other processing
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html) html = re.sub(unicode_type(r'\s*<o:p>\s*</o:p>'), ' ', html)
# Delete microsoft 'smart' tags # Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\\w+>', '', html) html = re.sub('(?i)</?st1:\\w+>', '', html)
# Re-open self closing paragraph tags # Re-open self closing paragraph tags
@ -470,8 +471,8 @@ class HeuristicProcessor(object):
blanklines = self.blankreg.findall(html) blanklines = self.blankreg.findall(html)
lines = self.linereg.findall(html) lines = self.linereg.findall(html)
if len(lines) > 1: if len(lines) > 1:
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " + self.log.debug("There are " + unicode_type(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank") unicode_type(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40: if float(len(blanklines)) / float(len(lines)) > 0.40:
return True return True
@ -493,11 +494,11 @@ class HeuristicProcessor(object):
lines = float(len(self.single_blank.findall(to_merge))) - 1. lines = float(len(self.single_blank.findall(to_merge))) - 1.
em = base_em + (em_per_line * lines) em = base_em + (em_per_line * lines)
if to_merge.find('whitespace'): if to_merge.find('whitespace'):
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode(int(em * 10))+ newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode_type(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0)) '" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
else: else:
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode(int(em * 10))+ newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode_type(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0)) '" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
return newline return newline
html = self.any_multi_blank.sub(merge_matches, html) html = self.any_multi_blank.sub(merge_matches, html)
@ -518,9 +519,9 @@ class HeuristicProcessor(object):
top_margin = '' top_margin = ''
bottom_margin = '' bottom_margin = ''
if initblanks is not None: if initblanks is not None:
top_margin = 'margin-top:'+unicode(len(self.single_blank.findall(initblanks)))+'em;' top_margin = 'margin-top:'+unicode_type(len(self.single_blank.findall(initblanks)))+'em;'
if endblanks is not None: if endblanks is not None:
bottom_margin = 'margin-bottom:'+unicode(len(self.single_blank.findall(endblanks)))+'em;' bottom_margin = 'margin-bottom:'+unicode_type(len(self.single_blank.findall(endblanks)))+'em;'
if initblanks is None and endblanks is None: if initblanks is None and endblanks is None:
return content return content
@ -597,7 +598,7 @@ class HeuristicProcessor(object):
else: else:
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) / 2 divpercent = (100 - width) / 2
hr_open = re.sub('45', unicode(divpercent), hr_open) hr_open = re.sub('45', unicode_type(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>' scene_break = hr_open+replacement_break+'</div>'
else: else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
@ -657,12 +658,12 @@ class HeuristicProcessor(object):
else: else:
styles = match.group('styles').split(';') styles = match.group('styles').split(';')
is_paragraph = self.check_paragraph(content) is_paragraph = self.check_paragraph(content)
# print "styles for this line are: "+unicode(styles) # print "styles for this line are: "+unicode_type(styles)
split_styles = [] split_styles = []
for style in styles: for style in styles:
# print "style is: "+unicode(style) # print "style is: "+unicode_type(style)
newstyle = style.split(':') newstyle = style.split(':')
# print "newstyle is: "+unicode(newstyle) # print "newstyle is: "+unicode_type(newstyle)
split_styles.append(newstyle) split_styles.append(newstyle)
styles = split_styles styles = split_styles
for style, setting in styles: for style, setting in styles:
@ -673,7 +674,7 @@ class HeuristicProcessor(object):
if 9 < setting < 14: if 9 < setting < 14:
text_indent = indented_text text_indent = indented_text
else: else:
text_indent = style+':'+unicode(setting)+'pt;' text_indent = style+':'+unicode_type(setting)+'pt;'
if style == 'padding': if style == 'padding':
setting = re.sub('pt', '', setting).split(' ') setting = re.sub('pt', '', setting).split(' ')
if int(setting[1]) < 16 and int(setting[3]) < 16: if int(setting[1]) < 16 and int(setting[3]) < 16:
@ -694,23 +695,23 @@ class HeuristicProcessor(object):
blockquote_open_loop = blockquote_open blockquote_open_loop = blockquote_open
if debugabby: if debugabby:
self.log.debug('\n\n******\n') self.log.debug('\n\n******\n')
self.log.debug('padding top is: '+unicode(setting[0])) self.log.debug('padding top is: '+unicode_type(setting[0]))
self.log.debug('padding right is:' +unicode(setting[1])) self.log.debug('padding right is:' +unicode_type(setting[1]))
self.log.debug('padding bottom is: ' + unicode(setting[2])) self.log.debug('padding bottom is: ' + unicode_type(setting[2]))
self.log.debug('padding left is: ' +unicode(setting[3])) self.log.debug('padding left is: ' +unicode_type(setting[3]))
# print "text-align is: "+unicode(text_align) # print "text-align is: "+unicode_type(text_align)
# print "\n***\nline is:\n "+unicode(match.group(0))+'\n' # print "\n***\nline is:\n "+unicode_type(match.group(0))+'\n'
if debugabby: if debugabby:
# print "this line is a paragraph = "+unicode(is_paragraph)+", previous line was "+unicode(self.previous_was_paragraph) # print "this line is a paragraph = "+unicode_type(is_paragraph)+", previous line was "+unicode_type(self.previous_was_paragraph)
self.log.debug("styles for this line were:", styles) self.log.debug("styles for this line were:", styles)
self.log.debug('newline is:') self.log.debug('newline is:')
self.log.debug(blockquote_open_loop+blockquote_close_loop+ self.log.debug(blockquote_open_loop+blockquote_close_loop+
paragraph_before+'<p style="'+text_indent+text_align+ paragraph_before+'<p style="'+text_indent+text_align+
'">'+content+'</p>'+paragraph_after+'\n\n\n\n\n') '">'+content+'</p>'+paragraph_after+'\n\n\n\n\n')
# print "is_paragraph is "+unicode(is_paragraph)+", previous_was_paragraph is "+unicode(self.previous_was_paragraph) # print "is_paragraph is "+unicode_type(is_paragraph)+", previous_was_paragraph is "+unicode_type(self.previous_was_paragraph)
self.previous_was_paragraph = is_paragraph self.previous_was_paragraph = is_paragraph
# print "previous_was_paragraph is now set to "+unicode(self.previous_was_paragraph)+"\n\n\n" # print "previous_was_paragraph is now set to "+unicode_type(self.previous_was_paragraph)+"\n\n\n"
return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after
html = abbyy_line.sub(convert_styles, html) html = abbyy_line.sub(convert_styles, html)
@ -793,12 +794,12 @@ class HeuristicProcessor(object):
# more of the lines break in the same region of the document then unwrapping is required # more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html) docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50) hardbreaks = docanalysis.line_histogram(.50)
self.log.debug("Hard line breaks check returned "+unicode(hardbreaks)) self.log.debug("Hard line breaks check returned "+unicode_type(hardbreaks))
# Calculate Length # Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor) length = docanalysis.line_length(unwrap_factor)
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format") self.log.debug("Median line length is " + unicode_type(length) + ", calculated with " + format + " format")
# ##### Unwrap lines ###### # ##### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False): if getattr(self.extra_opts, 'unwrap_lines', False):
@ -820,7 +821,7 @@ class HeuristicProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation # If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
self.log.debug("Looking for more split points based on punctuation," self.log.debug("Looking for more split points based on punctuation,"
" currently have " + unicode(self.html_preprocess_sections)) " currently have " + unicode_type(self.html_preprocess_sections))
chapdetect3 = re.compile( chapdetect3 = re.compile(
r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) # noqa r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) # noqa
html = chapdetect3.sub(self.chapter_break, html) html = chapdetect3.sub(self.chapter_break, html)

View File

@ -20,6 +20,7 @@ from calibre.utils.localization import canonicalize_lang
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
from polyglot.builtins import unicode_type
def fromstring(raw, parser=RECOVER_PARSER): def fromstring(raw, parser=RECOVER_PARSER):
@ -56,7 +57,7 @@ def read_doc_props(raw, mi, XPath):
desc = XPath('//dc:description')(root) desc = XPath('//dc:description')(root)
if desc: if desc:
raw = etree.tostring(desc[0], method='text', encoding=unicode) raw = etree.tostring(desc[0], method='text', encoding=unicode_type)
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
mi.comments = raw.strip() mi.comments = raw.strip()

View File

@ -14,6 +14,7 @@ from calibre.utils.filenames import ascii_filename
from calibre.utils.fonts.scanner import font_scanner, NoFonts from calibre.utils.fonts.scanner import font_scanner, NoFonts
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
from calibre.utils.icu import ord_string from calibre.utils.icu import ord_string
from polyglot.builtins import codepoint_to_chr
Embed = namedtuple('Embed', 'name key subsetted') Embed = namedtuple('Embed', 'name key subsetted')
@ -124,7 +125,7 @@ def do_map(m, points):
if base < p < limit: if base < p < limit:
yield m[p - base] yield m[p - base]
else: else:
yield unichr(p) yield codepoint_to_chr(p)
def map_symbol_text(text, font): def map_symbol_text(text, font):

View File

@ -11,6 +11,7 @@ from operator import itemgetter
from lxml import etree from lxml import etree
from calibre.utils.icu import partition_by_first_letter, sort_key from calibre.utils.icu import partition_by_first_letter, sort_key
from polyglot.builtins import unicode_type
def get_applicable_xe_fields(index, xe_fields, XPath, expand): def get_applicable_xe_fields(index, xe_fields, XPath, expand):
@ -246,7 +247,7 @@ def polish_index_markup(index, blocks):
a = block.xpath('descendant::a[1]') a = block.xpath('descendant::a[1]')
text = '' text = ''
if a: if a:
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode).strip() text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip()
if ':' in text: if ':' in text:
path_map[block] = parts = filter(None, (x.strip() for x in text.split(':'))) path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
if len(parts) > 1: if len(parts) > 1:

View File

@ -504,8 +504,6 @@ class Table(object):
def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row): def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
cs = CellStyle(self.namespace) cs = CellStyle(self.namespace)
# from lxml.etree import tostring
# txt = tostring(tc, method='text', encoding=unicode)
for o in overrides: for o in overrides:
if o in self.overrides: if o in self.overrides:
ovr = self.overrides[o] ovr = self.overrides[o]
@ -699,4 +697,3 @@ class Tables(object):
table = self.para_map.get(p, None) table = self.para_map.get(p, None)
if table is not None: if table is not None:
return table.style_map.get(p, (None, None))[1] return table.style_map.get(p, (None, None))[1]

View File

@ -13,6 +13,7 @@ from lxml.etree import tostring
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
from polyglot.builtins import unicode_type
def from_headings(body, log, namespace): def from_headings(body, log, namespace):
@ -93,7 +94,7 @@ def link_to_txt(a, styles, object_map):
if rs.css.get('display', None) == 'none': if rs.css.get('display', None) == 'none':
a.remove(child) a.remove(child)
return tostring(a, method='text', with_tail=False, encoding=unicode).strip() return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip()
def from_toc(docx, link_map, styles, object_map, log, namespace): def from_toc(docx, link_map, styles, object_map, log, namespace):

View File

@ -19,6 +19,7 @@ from calibre.ebooks.docx.writer.lists import ListsManager
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
from calibre.ebooks.oeb.base import XPath, barename from calibre.ebooks.oeb.base import XPath, barename
from calibre.utils.localization import lang_as_iso639_1 from calibre.utils.localization import lang_as_iso639_1
from polyglot.builtins import unicode_type
def lang_for_tag(tag): def lang_for_tag(tag):
@ -439,8 +440,8 @@ class Convert(object):
if self.add_toc: if self.add_toc:
self.links_manager.process_toc_links(self.oeb) self.links_manager.process_toc_links(self.oeb)
if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids: if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
cover_id = unicode(self.oeb.metadata.cover[0]) cover_id = unicode_type(self.oeb.metadata.cover[0])
item = self.oeb.manifest.ids[cover_id] item = self.oeb.manifest.ids[cover_id]
self.cover_img = self.images_manager.read_image(item.href) self.cover_img = self.images_manager.read_image(item.href)

View File

@ -14,6 +14,7 @@ from lxml import etree
from calibre.ebooks import parse_css_length from calibre.ebooks import parse_css_length
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
from calibre.utils.localization import lang_as_iso639_1 from calibre.utils.localization import lang_as_iso639_1
from polyglot.builtins import unicode_type
from tinycss.css21 import CSS21Parser from tinycss.css21 import CSS21Parser
css_parser = CSS21Parser() css_parser = CSS21Parser()
@ -45,7 +46,7 @@ def bmap(x):
def is_dropcaps(html_tag, tag_style): def is_dropcaps(html_tag, tag_style):
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left' return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left'
class CombinedStyle(object): class CombinedStyle(object):

View File

@ -10,6 +10,7 @@ import unittest
from polyglot.builtins import map from polyglot.builtins import map
from calibre.ebooks.epub.cfi.parse import parser, cfi_sort_key, decode_cfi from calibre.ebooks.epub.cfi.parse import parser, cfi_sort_key, decode_cfi
from polyglot.builtins import unicode_type
class Tests(unittest.TestCase): class Tests(unittest.TestCase):
@ -60,7 +61,7 @@ class Tests(unittest.TestCase):
if after is not None: if after is not None:
ta['after'] = after ta['after'] = after
if params: if params:
ta['params'] = {unicode(k):(v,) if isinstance(v, unicode) else v for k, v in params.iteritems()} ta['params'] = {unicode_type(k):(v,) if isinstance(v, unicode_type) else v for k, v in params.iteritems()}
if ta: if ta:
step['text_assertion'] = ta step['text_assertion'] = ta
return ans return ans

View File

@ -11,6 +11,7 @@ import time
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
from calibre import strftime, prepare_string_for_xml as xml from calibre import strftime, prepare_string_for_xml as xml
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from polyglot.builtins import unicode_type
SONY_METADATA = u'''\ SONY_METADATA = u'''\
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
@ -81,21 +82,21 @@ SONY_ATOM_ENTRY = u'''\
def sony_metadata(oeb): def sony_metadata(oeb):
m = oeb.metadata m = oeb.metadata
title = short_title = unicode(m.title[0]) title = short_title = unicode_type(m.title[0])
publisher = __appname__ + ' ' + __version__ publisher = __appname__ + ' ' + __version__
try: try:
pt = unicode(oeb.metadata.publication_type[0]) pt = unicode_type(oeb.metadata.publication_type[0])
short_title = u':'.join(pt.split(':')[2:]) short_title = u':'.join(pt.split(':')[2:])
except: except:
pass pass
try: try:
date = parse_date(unicode(m.date[0]), date = parse_date(unicode_type(m.date[0]),
as_utc=False).strftime('%Y-%m-%d') as_utc=False).strftime('%Y-%m-%d')
except: except:
date = strftime('%Y-%m-%d') date = strftime('%Y-%m-%d')
try: try:
language = unicode(m.language[0]).replace('_', '-') language = unicode_type(m.language[0]).replace('_', '-')
except: except:
language = 'en' language = 'en'
short_title = xml(short_title, True) short_title = xml(short_title, True)
@ -113,7 +114,7 @@ def sony_metadata(oeb):
return True return True
try: try:
base_id = unicode(list(filter(cal_id, m.identifier))[0]) base_id = unicode_type(list(filter(cal_id, m.identifier))[0])
except: except:
base_id = str(uuid4()) base_id = str(uuid4())
@ -128,7 +129,7 @@ def sony_metadata(oeb):
for x in toc: for x in toc:
section.nodes.append(x) section.nodes.append(x)
toc = TOC(klass='periodical', href=oeb.spine[2].href, toc = TOC(klass='periodical', href=oeb.spine[2].href,
title=unicode(oeb.metadata.title[0])) title=unicode_type(oeb.metadata.title[0]))
toc.nodes.append(section) toc.nodes.append(section)
entries = [] entries = []
@ -188,4 +189,3 @@ def sony_metadata(oeb):
id=xml(base_id)).encode('utf-8') id=xml(base_id)).encode('utf-8')
return metadata, atom return metadata, atom

View File

@ -19,6 +19,7 @@ from calibre.constants import __appname__, __version__
from calibre.utils.localization import lang_as_iso639_1 from calibre.utils.localization import lang_as_iso639_1
from calibre.utils.img import save_cover_data_to from calibre.utils.img import save_cover_data_to
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
from polyglot.builtins import unicode_type
class FB2MLizer(object): class FB2MLizer(object):
@ -64,7 +65,7 @@ class FB2MLizer(object):
output = self.clean_text(u''.join(output)) output = self.clean_text(u''.join(output))
if self.opts.pretty_print: if self.opts.pretty_print:
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True)
else: else:
return u'<?xml version="1.0" encoding="UTF-8"?>' + output return u'<?xml version="1.0" encoding="UTF-8"?>' + output
@ -140,7 +141,7 @@ class FB2MLizer(object):
metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>' metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
metadata['keywords'] = u'' metadata['keywords'] = u''
tags = list(map(unicode, self.oeb_book.metadata.subject)) tags = list(map(unicode_type, self.oeb_book.metadata.subject))
if tags: if tags:
tags = ', '.join(prepare_string_for_xml(x) for x in tags) tags = ', '.join(prepare_string_for_xml(x) for x in tags)
metadata['keywords'] = '<keywords>%s</keywords>'%tags metadata['keywords'] = '<keywords>%s</keywords>'%tags
@ -155,8 +156,8 @@ class FB2MLizer(object):
year = publisher = isbn = u'' year = publisher = isbn = u''
identifiers = self.oeb_book.metadata['identifier'] identifiers = self.oeb_book.metadata['identifier']
for x in identifiers: for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
metadata['id'] = unicode(x).split(':')[-1] metadata['id'] = unicode_type(x).split(':')[-1]
break break
if metadata['id'] is None: if metadata['id'] is None:
self.log.warn('No UUID identifier found') self.log.warn('No UUID identifier found')
@ -229,8 +230,8 @@ class FB2MLizer(object):
cover_href = None cover_href = None
# Get the raster cover if it's available. # Get the raster cover if it's available.
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: if self.oeb_book.metadata.cover and unicode_type(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode(self.oeb_book.metadata.cover[0]) id = unicode_type(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id] cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES: if cover_item.media_type in OEB_RASTER_IMAGES:
cover_href = cover_item.href cover_href = cover_item.href

View File

@ -19,6 +19,7 @@ from calibre.ebooks.oeb.base import urlunquote
from calibre.ebooks.chardet import detect_xml_encoding from calibre.ebooks.chardet import detect_xml_encoding
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre import unicode_path, as_unicode, replace_entities from calibre import unicode_path, as_unicode, replace_entities
from polyglot.builtins import unicode_type
class Link(object): class Link(object):
@ -46,7 +47,7 @@ class Link(object):
:param base: The base directory that relative URLs are with respect to. :param base: The base directory that relative URLs are with respect to.
Must be a unicode string. Must be a unicode string.
''' '''
assert isinstance(url, unicode) and isinstance(base, unicode) assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
self.url = url self.url = url
self.parsed_url = urlparse(self.url) self.parsed_url = urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file') self.is_local = self.parsed_url.scheme in ('', 'file')
@ -248,6 +249,3 @@ def get_filelist(htmlfile, dir, opts, log):
for f in filelist: for f in filelist:
log.debug('\t\t', f) log.debug('\t\t', f)
return filelist return filelist

View File

@ -11,6 +11,7 @@ import textwrap, os, glob
from calibre.customize import FileTypePlugin from calibre.customize import FileTypePlugin
from calibre.constants import numeric_version from calibre.constants import numeric_version
from polyglot.builtins import unicode_type
class HTML2ZIP(FileTypePlugin): class HTML2ZIP(FileTypePlugin):
@ -114,10 +115,9 @@ every time you add an HTML file to the library.\
config_dialog.exec_() config_dialog.exec_()
if config_dialog.result() == QDialog.Accepted: if config_dialog.result() == QDialog.Accepted:
sc = unicode(sc.text()).strip() sc = unicode_type(sc.text()).strip()
if bf.isChecked(): if bf.isChecked():
sc += '|bf' sc += '|bf'
customize_plugin(self, sc) customize_plugin(self, sc)
return config_dialog.result() return config_dialog.result()

View File

@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import (
XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize) XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from polyglot.builtins import unicode_type
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'} SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
@ -46,7 +47,7 @@ class OEB2HTML(object):
self.log.info('Converting OEB book to HTML...') self.log.info('Converting OEB book to HTML...')
self.opts = opts self.opts = opts
try: try:
self.book_title = unicode(oeb_book.metadata.title[0]) self.book_title = unicode_type(oeb_book.metadata.title[0])
except Exception: except Exception:
self.book_title = _('Unknown') self.book_title = _('Unknown')
self.links = {} self.links = {}

View File

@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import urlnormalize, xpath
from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre import plugins from calibre import plugins
from polyglot.builtins import codepoint_to_chr, unicode_type
lzx, lxzerror = plugins['lzx'] lzx, lxzerror = plugins['lzx']
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
@ -110,7 +111,7 @@ def read_utf8_char(bytes, pos):
raise LitError( raise LitError(
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i])) 'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
c = (c << 6) | (b & 0x3F) c = (c << 6) | (b & 0x3F)
return unichr(c), pos+elsize return codepoint_to_chr(c), pos+elsize
def consume_sized_utf8_string(bytes, zpad=False): def consume_sized_utf8_string(bytes, zpad=False):
@ -125,7 +126,7 @@ def consume_sized_utf8_string(bytes, zpad=False):
def encode(string): def encode(string):
return unicode(string).encode('ascii', 'xmlcharrefreplace') return unicode_type(string).encode('ascii', 'xmlcharrefreplace')
class UnBinary(object): class UnBinary(object):
@ -243,9 +244,9 @@ class UnBinary(object):
else: else:
dynamic_tag += 1 dynamic_tag += 1
errors += 1 errors += 1
tag_name = '?'+unichr(tag)+'?' tag_name = '?'+codepoint_to_chr(tag)+'?'
current_map = self.tag_to_attr_map[tag] current_map = self.tag_to_attr_map[tag]
print('WARNING: tag %s unknown' % unichr(tag)) print('WARNING: tag %s unknown' % codepoint_to_chr(tag))
buf.write(encode(tag_name)) buf.write(encode(tag_name))
elif flags & FLAG_CLOSING: elif flags & FLAG_CLOSING:
if depth == 0: if depth == 0:
@ -947,4 +948,3 @@ class LitReader(OEBReader):
item.media_type = 'application/xhtml+xml' item.media_type = 'application/xhtml+xml'
item.data = item._parse_xhtml(etree.tostring(item.data)) item.data = item._parse_xhtml(etree.tostring(item.data))
super(LitReader, self)._spine_from_opf(opf) super(LitReader, self)._spine_from_opf(opf)

View File

@ -31,6 +31,7 @@ import calibre
from calibre import plugins from calibre import plugins
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
from polyglot.builtins import codepoint_to_chr, unicode_type
__all__ = ['LitWriter'] __all__ = ['LitWriter']
@ -163,9 +164,9 @@ class ReBinary(object):
for value in values: for value in values:
if isinstance(value, (int, long)): if isinstance(value, (int, long)):
try: try:
value = unichr(value) value = codepoint_to_chr(value)
except OverflowError: except OverflowError:
self.logger.warn('Unicode overflow for integer:', value) self.logger.warn('unicode_type overflow for integer:', value)
value = u'?' value = u'?'
self.buf.write(value.encode('utf-8')) self.buf.write(value.encode('utf-8'))
@ -216,9 +217,9 @@ class ReBinary(object):
path, frag = urldefrag(value) path, frag = urldefrag(value)
if self.item: if self.item:
path = self.item.abshref(path) path = self.item.abshref(path)
prefix = unichr(3) prefix = codepoint_to_chr(3)
if path in self.manifest.hrefs: if path in self.manifest.hrefs:
prefix = unichr(2) prefix = codepoint_to_chr(2)
value = self.manifest.hrefs[path].id value = self.manifest.hrefs[path].id
if frag: if frag:
value = '#'.join((value, frag)) value = '#'.join((value, frag))
@ -281,9 +282,9 @@ class ReBinary(object):
self.logger.warn("More than six anchors in file %r. " self.logger.warn("More than six anchors in file %r. "
"Some links may not work properly." % self.item.href) "Some links may not work properly." % self.item.href)
data = StringIO() data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8')) data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors: for anchor, offset in self.anchors:
data.write(unichr(len(anchor)).encode('utf-8')) data.write(codepoint_to_chr(len(anchor)).encode('utf-8'))
data.write(anchor) data.write(anchor)
data.write(pack('<I', offset)) data.write(pack('<I', offset))
return data.getvalue() return data.getvalue()
@ -313,7 +314,7 @@ class LitWriter(object):
oeb.metadata.add('calibre-version', calibre.__version__) oeb.metadata.add('calibre-version', calibre.__version__)
cover = None cover = None
if oeb.metadata.cover: if oeb.metadata.cover:
id = unicode(oeb.metadata.cover[0]) id = unicode_type(oeb.metadata.cover[0])
cover = oeb.manifest.ids[id] cover = oeb.manifest.ids[id]
for type, title in ALL_MS_COVER_TYPES: for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide: if type not in oeb.guide:
@ -485,7 +486,7 @@ class LitWriter(object):
data = rebin.content data = rebin.content
name = name + '/content' name = name + '/content'
secnum = 1 secnum = 1
elif isinstance(data, unicode): elif isinstance(data, unicode_type):
data = data.encode('utf-8') data = data.encode('utf-8')
elif hasattr(data, 'cssText'): elif hasattr(data, 'cssText'):
data = str(item) data = str(item)
@ -521,9 +522,9 @@ class LitWriter(object):
item.offset = offset \ item.offset = offset \
if state in ('linear', 'nonlinear') else 0 if state in ('linear', 'nonlinear') else 0
data.write(pack('<I', item.offset)) data.write(pack('<I', item.offset))
entry = [unichr(len(id)), unicode(id), entry = [codepoint_to_chr(len(id)), unicode_type(id),
unichr(len(href)), unicode(href), codepoint_to_chr(len(href)), unicode_type(href),
unichr(len(media_type)), unicode(media_type)] codepoint_to_chr(len(media_type)), unicode_type(media_type)]
for value in entry: for value in entry:
data.write(value.encode('utf-8')) data.write(value.encode('utf-8'))
data.write('\0') data.write('\0')

View File

@ -36,6 +36,7 @@ from calibre.ptempfile import PersistentTemporaryFile
from calibre.devices.interface import DevicePlugin as Device from calibre.devices.interface import DevicePlugin as Device
from calibre.ebooks.lrf.html.color_map import lrs_color from calibre.ebooks.lrf.html.color_map import lrs_color
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
def update_css(ncss, ocss): def update_css(ncss, ocss):
@ -54,10 +55,10 @@ def munge_paths(basepath, url):
if not path: if not path:
path = basepath path = basepath
elif not os.path.isabs(path): elif not os.path.isabs(path):
if isinstance(path, unicode): if isinstance(path, unicode_type):
path = path.encode(sys.getfilesystemencoding()) path = path.encode(sys.getfilesystemencoding())
dn = os.path.dirname(basepath) dn = os.path.dirname(basepath)
if isinstance(dn, unicode): if isinstance(dn, unicode_type):
dn = dn.encode(sys.getfilesystemencoding()) dn = dn.encode(sys.getfilesystemencoding())
path = os.path.join(dn, path) path = os.path.join(dn, path)
return os.path.normpath(path), fragment return os.path.normpath(path), fragment
@ -272,7 +273,7 @@ class HTMLConverter(object):
update_css(npcss, self.override_pcss) update_css(npcss, self.override_pcss)
paths = [os.path.abspath(path) for path in paths] paths = [os.path.abspath(path) for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode) else path for path in paths] paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
while len(paths) > 0 and self.link_level <= self.link_levels: while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths: for path in paths:
@ -336,7 +337,7 @@ class HTMLConverter(object):
markupMassage=nmassage) markupMassage=nmassage)
except ConversionError as err: except ConversionError as err:
if 'Failed to coerce to unicode' in str(err): if 'Failed to coerce to unicode' in str(err):
raw = unicode(raw, 'utf8', 'replace') raw = unicode_type(raw, 'utf8', 'replace')
soup = BeautifulSoup(raw, soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.XHTML_ENTITIES, convertEntities=BeautifulSoup.XHTML_ENTITIES,
markupMassage=nmassage) markupMassage=nmassage)
@ -359,7 +360,7 @@ class HTMLConverter(object):
os.makedirs(tdir) os.makedirs(tdir)
try: try:
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
dump.write(unicode(soup).encode('utf-8')) dump.write(unicode_type(soup).encode('utf-8'))
self.log.info(_('Written preprocessed HTML to ')+dump.name) self.log.info(_('Written preprocessed HTML to ')+dump.name)
dump.close() dump.close()
except: except:
@ -394,7 +395,7 @@ class HTMLConverter(object):
self.log.info(_('\tConverting to BBeB...')) self.log.info(_('\tConverting to BBeB...'))
self.current_style = {} self.current_style = {}
self.page_break_found = False self.page_break_found = False
if not isinstance(path, unicode): if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
self.target_prefix = path self.target_prefix = path
self.previous_text = '\n' self.previous_text = '\n'
@ -589,7 +590,7 @@ class HTMLConverter(object):
if isinstance(c, HTMLConverter.IGNORED_TAGS): if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue continue
if isinstance(c, NavigableString): if isinstance(c, NavigableString):
text += unicode(c) text += unicode_type(c)
elif isinstance(c, Tag): elif isinstance(c, Tag):
if c.name.lower() == 'img' and c.has_key('alt'): # noqa if c.name.lower() == 'img' and c.has_key('alt'): # noqa
alt_text += c['alt'] alt_text += c['alt']
@ -644,7 +645,7 @@ class HTMLConverter(object):
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment'] para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
ascii_text = text ascii_text = text
if not isinstance(path, unicode): if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
if path in self.processed_files: if path in self.processed_files:
if path+fragment in self.targets.keys(): if path+fragment in self.targets.keys():
@ -1323,7 +1324,7 @@ class HTMLConverter(object):
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px' bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
if 'em' in tag_css['text-indent']: if 'em' in tag_css['text-indent']:
bl = '10pt' bl = '10pt'
indent = self.unit_convert(unicode(tag_css['text-indent']), pts=True, base_length=bl) indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
if not indent: if not indent:
indent = 0 indent = 0
if indent > 0 and indent < 10 * self.minimum_indent: if indent > 0 and indent < 10 * self.minimum_indent:
@ -1482,7 +1483,7 @@ class HTMLConverter(object):
enc = sys.getfilesystemencoding() enc = sys.getfilesystemencoding()
if not enc: if not enc:
enc = 'utf8' enc = 'utf8'
if isinstance(path, unicode): if isinstance(path, unicode_type):
path = path.encode(enc, 'replace') path = path.encode(enc, 'replace')
if os.access(path, os.R_OK) and os.path.isfile(path): if os.access(path, os.R_OK) and os.path.isfile(path):
if ext in ['png', 'jpg', 'bmp', 'jpeg']: if ext in ['png', 'jpg', 'bmp', 'jpeg']:
@ -1526,7 +1527,7 @@ class HTMLConverter(object):
elif tagname in ['style', 'link']: elif tagname in ['style', 'link']:
ncss, npcss = {}, {} ncss, npcss = {}, {}
if tagname == 'style': if tagname == 'style':
text = ''.join([unicode(i) for i in tag.findAll(text=True)]) text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
css, pcss = self.parse_css(text) css, pcss = self.parse_css(text)
ncss.update(css) ncss.update(css)
npcss.update(pcss) npcss.update(pcss)
@ -1559,7 +1560,7 @@ class HTMLConverter(object):
if tag.contents: if tag.contents:
c = tag.contents[0] c = tag.contents[0]
if isinstance(c, NavigableString): if isinstance(c, NavigableString):
c = unicode(c).replace('\r\n', '\n').replace('\r', '\n') c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
if c.startswith('\n'): if c.startswith('\n'):
c = c[1:] c = c[1:]
tag.contents[0] = NavigableString(c) tag.contents[0] = NavigableString(c)
@ -1759,7 +1760,7 @@ class HTMLConverter(object):
except Exception as err: except Exception as err:
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err)) self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
self.log.exception('') self.log.exception('')
self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300]) self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
self.in_table = False self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
finally: finally:
@ -1810,7 +1811,7 @@ class HTMLConverter(object):
def process_file(path, options, logger): def process_file(path, options, logger):
if not isinstance(path, unicode): if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
path = os.path.abspath(path) path = os.path.abspath(path)
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0]) default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
@ -1857,9 +1858,9 @@ def process_file(path, options, logger):
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'): for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop, None) val = getattr(options, prop, None)
if val and not isinstance(val, unicode): if val and not isinstance(val, unicode_type):
soup = BeautifulSoup(val) soup = BeautifulSoup(val)
setattr(options, prop, unicode(soup)) setattr(options, prop, unicode_type(soup))
title = (options.title, options.title_sort) title = (options.title, options.title_sort)
author = (options.author, options.author_sort) author = (options.author, options.author_sort)
@ -1903,7 +1904,7 @@ def process_file(path, options, logger):
options.force_page_break = fpb options.force_page_break = fpb
options.link_exclude = le options.link_exclude = le
options.page_break = pb options.page_break = pb
if not isinstance(options.chapter_regex, unicode): if not isinstance(options.chapter_regex, unicode_type):
options.chapter_regex = options.chapter_regex.decode(preferred_encoding) options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE) options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
fpba = options.force_page_break_attr.split(',') fpba = options.force_page_break_attr.split(',')

View File

@ -11,6 +11,8 @@ from PyQt5.Qt import QUrl, QApplication, QSize, QEventLoop, \
QPainter, QImage, QObject, Qt QPainter, QImage, QObject, Qt
from PyQt5.QtWebKitWidgets import QWebPage from PyQt5.QtWebKitWidgets import QWebPage
from polyglot.builtins import unicode_type
class HTMLTableRenderer(QObject): class HTMLTableRenderer(QObject):
@ -67,7 +69,7 @@ class HTMLTableRenderer(QObject):
def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0): def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
head = '' head = ''
for e in soup.findAll(['link', 'style']): for e in soup.findAll(['link', 'style']):
head += unicode(e)+'\n\n' head += unicode_type(e)+'\n\n'
style = '' style = ''
for key, val in css.items(): for key, val in css.items():
style += key + ':%s;'%val style += key + ':%s;'%val
@ -83,7 +85,7 @@ def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
%s %s
</body> </body>
</html> </html>
'''%(head, width-10, style, unicode(table)) '''%(head, width-10, style, unicode_type(table))
images, tdir = do_render(html, base_dir, width, height, dpi, factor) images, tdir = do_render(html, base_dir, width, height, dpi, factor)
atexit.register(shutil.rmtree, tdir) atexit.register(shutil.rmtree, tdir)
return images return images

View File

@ -10,6 +10,7 @@ from calibre.utils.filenames import ascii_filename
from calibre.ebooks.lrf.meta import LRFMetaFile from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \ from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
Font, Text, TOCObject, BookAttr, ruby_tags Font, Text, TOCObject, BookAttr, ruby_tags
from polyglot.builtins import unicode_type
class LRFDocument(LRFMetaFile): class LRFDocument(LRFMetaFile):
@ -112,7 +113,7 @@ class LRFDocument(LRFMetaFile):
pages += u'<PageTree objid="%d">\n'%(page_tree.id,) pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
close = u'</PageTree>\n' close = u'</PageTree>\n'
for page in page_tree: for page in page_tree:
pages += unicode(page) pages += unicode_type(page)
pages += close pages += close
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
@ -125,9 +126,9 @@ class LRFDocument(LRFMetaFile):
if isinstance(obj, (Font, Text, TOCObject)): if isinstance(obj, (Font, Text, TOCObject)):
continue continue
if isinstance(obj, StyleObject): if isinstance(obj, StyleObject):
styles += unicode(obj) styles += unicode_type(obj)
else: else:
objects += unicode(obj) objects += unicode_type(obj)
styles += '</Style>\n' styles += '</Style>\n'
objects += '</Objects>\n' objects += '</Objects>\n'
if write_files: if write_files:

View File

@ -20,6 +20,7 @@ import xml.dom.minidom as dom
from functools import wraps from functools import wraps
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
@ -195,8 +196,8 @@ class xml_field(object):
if not val: if not val:
val = u'' val = u''
if type(val).__name__ != 'unicode': if isinstance(val, unicode_type):
val = unicode(val, 'utf-8') val = unicode_type(val, 'utf-8')
elems = document.getElementsByTagName(self.tag_name) elems = document.getElementsByTagName(self.tag_name)
elem = None elem = None

View File

@ -6,6 +6,7 @@ import struct, array, zlib, cStringIO, collections, re
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
from calibre import entity_to_unicode, prepare_string_for_xml from calibre import entity_to_unicode, prepare_string_for_xml
from calibre.ebooks.lrf.tags import Tag from calibre.ebooks.lrf.tags import Tag
from polyglot.builtins import unicode_type
ruby_tags = { ruby_tags = {
0xF575: ['rubyAlignAndAdjust', 'W'], 0xF575: ['rubyAlignAndAdjust', 'W'],
@ -88,10 +89,10 @@ class LRFObject(object):
yield i yield i
def __unicode__(self): def __unicode__(self):
return unicode(self.__class__.__name__) return unicode_type(self.__class__.__name__)
def __str__(self): def __str__(self):
return unicode(self).encode('utf-8') return unicode_type(self).encode('utf-8')
class LRFContentObject(LRFObject): class LRFContentObject(LRFObject):
@ -255,7 +256,7 @@ class Color(object):
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
def __str__(self): def __str__(self):
return unicode(self) return unicode_type(self)
def __len__(self): def __len__(self):
return 4 return 4
@ -274,7 +275,7 @@ class EmptyPageElement(object):
yield i yield i
def __str__(self): def __str__(self):
return unicode(self) return unicode_type(self)
class PageDiv(EmptyPageElement): class PageDiv(EmptyPageElement):
@ -429,12 +430,12 @@ class Page(LRFStream):
def __unicode__(self): def __unicode__(self):
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id) s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
for i in self: for i in self:
s += unicode(i) s += unicode_type(i)
s += '\n</Page>\n' s += '\n</Page>\n'
return s return s
def __str__(self): def __str__(self):
return unicode(self) return unicode_type(self)
def to_html(self): def to_html(self):
s = u'' s = u''
@ -619,7 +620,7 @@ class Block(LRFStream, TextCSS):
s += '%s="%s" '%(attr, self.attrs[attr]) s += '%s="%s" '%(attr, self.attrs[attr])
if self.name != 'ImageBlock': if self.name != 'ImageBlock':
s = s.rstrip()+'>\n' s = s.rstrip()+'>\n'
s += unicode(self.content) s += unicode_type(self.content)
s += '</%s>\n'%(self.name,) s += '</%s>\n'%(self.name,)
return s return s
return s.rstrip() + ' />\n' return s.rstrip() + ' />\n'
@ -717,7 +718,7 @@ class Text(LRFStream):
lineposition_map = {1:'before', 2:'after'} lineposition_map = {1:'before', 2:'after'}
def add_text(self, text): def add_text(self, text):
s = unicode(text, "utf-16-le") s = unicode_type(text, "utf-16-le")
if s: if s:
s = s.translate(self.text_map) s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(entity_to_unicode, s)) self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
@ -888,7 +889,7 @@ class Text(LRFStream):
p = open_containers.pop() p = open_containers.pop()
s += u'</%s>'%(p.name,) s += u'</%s>'%(p.name,)
else: else:
s += unicode(c) s += unicode_type(c)
if not c.self_closing: if not c.self_closing:
open_containers.append(c) open_containers.append(c)
@ -1001,7 +1002,7 @@ class Canvas(LRFStream):
s += '%s="%s" '%(attr, self.attrs[attr]) s += '%s="%s" '%(attr, self.attrs[attr])
s = s.rstrip() + '>\n' s = s.rstrip() + '>\n'
for po in self: for po in self:
s += unicode(po) + '\n' s += unicode_type(po) + '\n'
s += '</%s>\n'%(self.__class__.__name__,) s += '</%s>\n'%(self.__class__.__name__,)
return s return s
@ -1198,7 +1199,7 @@ class BookAttr(StyleObject, LRFObject):
s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\ s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
(self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth)
for font in self._document.font_map.values(): for font in self._document.font_map.values():
s += unicode(font) s += unicode_type(font)
s += '</BookStyle>\n' s += '</BookStyle>\n'
return s return s
@ -1239,7 +1240,7 @@ class TOCObject(LRFStream):
def __unicode__(self): def __unicode__(self):
s = u'<TOC>\n' s = u'<TOC>\n'
for i in self: for i in self:
s += unicode(i) s += unicode_type(i)
return s + '</TOC>\n' return s + '</TOC>\n'
@ -1288,5 +1289,3 @@ def get_object(document, stream, id, offset, size, scramble_key):
return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0]) return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
raise LRFParseError("Unknown object type: %02X!" % obj_type) raise LRFParseError("Unknown object type: %02X!" % obj_type)

View File

@ -1,5 +1,7 @@
""" elements.py -- replacements and helpers for ElementTree """ """ elements.py -- replacements and helpers for ElementTree """
from polyglot.builtins import unicode_type
class ElementWriter(object): class ElementWriter(object):
@ -21,9 +23,9 @@ class ElementWriter(object):
return text return text
def _writeAttribute(self, f, name, value): def _writeAttribute(self, f, name, value):
f.write(u' %s="' % unicode(name)) f.write(u' %s="' % unicode_type(name))
if not isinstance(value, basestring): if not isinstance(value, basestring):
value = unicode(value) value = unicode_type(value)
value = self._encodeCdata(value) value = self._encodeCdata(value)
value = value.replace('"', '&quot;') value = value.replace('"', '&quot;')
f.write(value) f.write(value)
@ -34,7 +36,7 @@ class ElementWriter(object):
f.write(text) f.write(text)
def _write(self, f, e): def _write(self, f, e):
f.write(u'<' + unicode(e.tag)) f.write(u'<' + unicode_type(e.tag))
attributes = e.items() attributes = e.items()
attributes.sort() attributes.sort()
@ -72,6 +74,3 @@ class ElementWriter(object):
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName) f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
self._write(f, self.e) self._write(f, self.e)

View File

@ -5,6 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import struct import struct
from calibre.ebooks.lrf import LRFParseError from calibre.ebooks.lrf import LRFParseError
from polyglot.builtins import unicode_type
class Tag(object): class Tag(object):
@ -246,7 +247,7 @@ class Tag(object):
@classmethod @classmethod
def string_parser(self, stream): def string_parser(self, stream):
size = struct.unpack("<H", stream.read(2))[0] size = struct.unpack("<H", stream.read(2))[0]
return unicode(stream.read(size), "utf_16") return unicode_type(stream.read(size), "utf_16")
def type_one_parser(self, stream): def type_one_parser(self, stream):
cnt = struct.unpack("<H", stream.read(2))[0] cnt = struct.unpack("<H", stream.read(2))[0]

View File

@ -15,6 +15,8 @@ from calibre import relpath, guess_type, remove_bracketed_text, prints, force_un
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
from polyglot.builtins import codepoint_to_chr, unicode_type
try: try:
_author_pat = re.compile(tweaks['authors_split_regex']) _author_pat = re.compile(tweaks['authors_split_regex'])
except: except:
@ -134,7 +136,7 @@ def get_title_sort_pat(lang=None):
return ans return ans
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in _ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in
range(0x2018, 0x201e)+[0x2032, 0x2033]) range(0x2018, 0x201e)+[0x2032, 0x2033])
@ -227,7 +229,7 @@ class Resource(object):
self._href = href_or_path self._href = href_or_path
else: else:
pc = url[2] pc = url[2]
if isinstance(pc, unicode): if isinstance(pc, unicode_type):
pc = pc.encode('utf-8') pc = pc.encode('utf-8')
pc = unquote(pc).decode('utf-8') pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@ -249,7 +251,7 @@ class Resource(object):
basedir = os.getcwdu() basedir = os.getcwdu()
if self.path is None: if self.path is None:
return self._href return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
frag = '#'+quote(f) if self.fragment else '' frag = '#'+quote(f) if self.fragment else ''
if self.path == basedir: if self.path == basedir:
return ''+frag return ''+frag
@ -257,7 +259,7 @@ class Resource(object):
rpath = relpath(self.path, basedir) rpath = relpath(self.path, basedir)
except OSError: # On windows path and basedir could be on different drives except OSError: # On windows path and basedir could be on different drives
rpath = self.path rpath = self.path
if isinstance(rpath, unicode): if isinstance(rpath, unicode_type):
rpath = rpath.encode('utf-8') rpath = rpath.encode('utf-8')
return quote(rpath.replace(os.sep, '/'))+frag return quote(rpath.replace(os.sep, '/'))+frag

View File

@ -14,6 +14,7 @@ from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS) TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
from calibre.library.field_metadata import FieldMetadata from calibre.library.field_metadata import FieldMetadata
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from polyglot.builtins import unicode_type
# Special sets used to optimize the performance of getting and setting # Special sets used to optimize the performance of getting and setting
# attributes on Metadata objects # attributes on Metadata objects
@ -606,14 +607,14 @@ class Metadata(object):
return authors_to_string(self.authors) return authors_to_string(self.authors)
def format_tags(self): def format_tags(self):
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)]) return u', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)])
def format_rating(self, v=None, divide_by=1.0): def format_rating(self, v=None, divide_by=1.0):
if v is None: if v is None:
if self.rating is not None: if self.rating is not None:
return unicode(self.rating/divide_by) return unicode_type(self.rating/divide_by)
return u'None' return u'None'
return unicode(v/divide_by) return unicode_type(v/divide_by)
def format_field(self, key, series_with_index=True): def format_field(self, key, series_with_index=True):
''' '''
@ -637,15 +638,15 @@ class Metadata(object):
if cmeta and cmeta['datatype'] == 'series': if cmeta and cmeta['datatype'] == 'series':
if self.get(tkey): if self.get(tkey):
res = self.get_extra(tkey) res = self.get_extra(tkey)
return (unicode(cmeta['name']+'_index'), return (unicode_type(cmeta['name']+'_index'),
self.format_series_index(res), res, cmeta) self.format_series_index(res), res, cmeta)
else: else:
return (unicode(cmeta['name']+'_index'), '', '', cmeta) return (unicode_type(cmeta['name']+'_index'), '', '', cmeta)
if key in self.custom_field_keys(): if key in self.custom_field_keys():
res = self.get(key, None) # get evaluates all necessary composites res = self.get(key, None) # get evaluates all necessary composites
cmeta = self.get_user_metadata(key, make_copy=False) cmeta = self.get_user_metadata(key, make_copy=False)
name = unicode(cmeta['name']) name = unicode_type(cmeta['name'])
if res is None or res == '': # can't check "not res" because of numeric fields if res is None or res == '': # can't check "not res" because of numeric fields
return (name, res, None, None) return (name, res, None, None)
orig_res = res orig_res = res
@ -668,7 +669,7 @@ class Metadata(object):
res = fmt.format(res) res = fmt.format(res)
except: except:
pass pass
return (name, unicode(res), orig_res, cmeta) return (name, unicode_type(res), orig_res, cmeta)
# convert top-level ids into their value # convert top-level ids into their value
if key in TOP_LEVEL_IDENTIFIERS: if key in TOP_LEVEL_IDENTIFIERS:
@ -682,11 +683,11 @@ class Metadata(object):
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field': if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
res = self.get(key, None) res = self.get(key, None)
fmeta = field_metadata[fmkey] fmeta = field_metadata[fmkey]
name = unicode(fmeta['name']) name = unicode_type(fmeta['name'])
if res is None or res == '': if res is None or res == '':
return (name, res, None, None) return (name, res, None, None)
orig_res = res orig_res = res
name = unicode(fmeta['name']) name = unicode_type(fmeta['name'])
datatype = fmeta['datatype'] datatype = fmeta['datatype']
if key == 'authors': if key == 'authors':
res = authors_to_string(res) res = authors_to_string(res)
@ -704,7 +705,7 @@ class Metadata(object):
res = u'%.2g'%(res/2.0) res = u'%.2g'%(res/2.0)
elif key == 'size': elif key == 'size':
res = human_readable(res) res = human_readable(res)
return (name, unicode(res), orig_res, fmeta) return (name, unicode_type(res), orig_res, fmeta)
return (None, None, None, None) return (None, None, None, None)
@ -718,7 +719,7 @@ class Metadata(object):
ans = [] ans = []
def fmt(x, y): def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y))) ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
fmt('Title', self.title) fmt('Title', self.title)
if self.title_sort: if self.title_sort:
@ -732,7 +733,7 @@ class Metadata(object):
if getattr(self, 'book_producer', False): if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer) fmt('Book Producer', self.book_producer)
if self.tags: if self.tags:
fmt('Tags', u', '.join([unicode(t) for t in self.tags])) fmt('Tags', u', '.join([unicode_type(t) for t in self.tags]))
if self.series: if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index()) fmt('Series', self.series + ' #%s'%self.format_series_index())
if not self.is_null('languages'): if not self.is_null('languages'):
@ -745,7 +746,7 @@ class Metadata(object):
if self.pubdate is not None: if self.pubdate is not None:
fmt('Published', isoformat(self.pubdate)) fmt('Published', isoformat(self.pubdate))
if self.rights is not None: if self.rights is not None:
fmt('Rights', unicode(self.rights)) fmt('Rights', unicode_type(self.rights))
if self.identifiers: if self.identifiers:
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
self.identifiers.iteritems()])) self.identifiers.iteritems()]))
@ -756,7 +757,7 @@ class Metadata(object):
val = self.get(key, None) val = self.get(key, None)
if val: if val:
(name, val) = self.format_field(key) (name, val) = self.format_field(key)
fmt(name, unicode(val)) fmt(name, unicode_type(val))
return u'\n'.join(ans) return u'\n'.join(ans)
def to_html(self): def to_html(self):
@ -765,22 +766,22 @@ class Metadata(object):
''' '''
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.utils.date import isoformat from calibre.utils.date import isoformat
ans = [(_('Title'), unicode(self.title))] ans = [(_('Title'), unicode_type(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))] ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
ans += [(_('Publisher'), unicode(self.publisher))] ans += [(_('Publisher'), unicode_type(self.publisher))]
ans += [(_('Producer'), unicode(self.book_producer))] ans += [(_('Producer'), unicode_type(self.book_producer))]
ans += [(_('Comments'), unicode(self.comments))] ans += [(_('Comments'), unicode_type(self.comments))]
ans += [('ISBN', unicode(self.isbn))] ans += [('ISBN', unicode_type(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))] ans += [(_('Tags'), u', '.join([unicode_type(t) for t in self.tags]))]
if self.series: if self.series:
ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())] ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Languages'), u', '.join(self.languages))] ans += [(_('Languages'), u', '.join(self.languages))]
if self.timestamp is not None: if self.timestamp is not None:
ans += [(_('Timestamp'), unicode(isoformat(self.timestamp, as_utc=False, sep=' ')))] ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))]
if self.pubdate is not None: if self.pubdate is not None:
ans += [(_('Published'), unicode(isoformat(self.pubdate, as_utc=False, sep=' ')))] ans += [(_('Published'), unicode_type(isoformat(self.pubdate, as_utc=False, sep=' ')))]
if self.rights is not None: if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights))] ans += [(_('Rights'), unicode_type(self.rights))]
for key in self.custom_field_keys(): for key in self.custom_field_keys():
val = self.get(key, None) val = self.get(key, None)
if val: if val:

View File

@ -20,6 +20,7 @@ from calibre.utils.icu import sort_key
from calibre.utils.formatter import EvalFormatter from calibre.utils.formatter import EvalFormatter
from calibre.utils.date import is_date_undefined from calibre.utils.date import is_date_undefined
from calibre.utils.localization import calibre_langcode_to_name from calibre.utils.localization import calibre_langcode_to_name
from polyglot.builtins import unicode_type
default_sort = ('title', 'title_sort', 'authors', 'author_sort', 'series', 'rating', 'pubdate', 'tags', 'publisher', 'identifiers') default_sort = ('title', 'title_sort', 'authors', 'author_sort', 'series', 'rating', 'pubdate', 'tags', 'publisher', 'identifiers')
@ -163,7 +164,7 @@ def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=
path = force_unicode(mi.path, filesystem_encoding) path = force_unicode(mi.path, filesystem_encoding)
scheme = u'devpath' if isdevice else u'path' scheme = u'devpath' if isdevice else u'path'
url = prepare_string_for_xml(path if isdevice else url = prepare_string_for_xml(path if isdevice else
unicode(book_id), True) unicode_type(book_id), True)
pathstr = _('Click to open') pathstr = _('Click to open')
extra = '' extra = ''
if isdevice: if isdevice:

View File

@ -10,10 +10,11 @@ from calibre.constants import preferred_encoding
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from polyglot.builtins import unicode_type
def ensure_unicode(obj, enc=preferred_encoding): def ensure_unicode(obj, enc=preferred_encoding):
if isinstance(obj, unicode): if isinstance(obj, unicode_type):
return obj return obj
if isinstance(obj, bytes): if isinstance(obj, bytes):
return obj.decode(enc, 'replace') return obj.decode(enc, 'replace')

View File

@ -16,6 +16,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
from calibre.ebooks.lrf.meta import LRFMetaFile from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre import prints from calibre import prints
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from polyglot.builtins import unicode_type
USAGE=_('%prog ebook_file [options]\n') + \ USAGE=_('%prog ebook_file [options]\n') + \
_(''' _('''
@ -181,7 +182,7 @@ def main(args=sys.argv):
mi = get_metadata(stream, stream_type, force_read_metadata=True) mi = get_metadata(stream, stream_type, force_read_metadata=True)
if trying_to_set: if trying_to_set:
prints(_('Original metadata')+'::') prints(_('Original metadata')+'::')
metadata = unicode(mi) metadata = unicode_type(mi)
if trying_to_set: if trying_to_set:
metadata = '\t'+'\n\t'.join(metadata.split('\n')) metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata, safe_encode=True) prints(metadata, safe_encode=True)
@ -198,7 +199,7 @@ def main(args=sys.argv):
lrf.book_id = opts.lrf_bookid lrf.book_id = opts.lrf_bookid
mi = get_metadata(stream, stream_type, force_read_metadata=True) mi = get_metadata(stream, stream_type, force_read_metadata=True)
prints('\n' + _('Changed metadata') + '::') prints('\n' + _('Changed metadata') + '::')
metadata = unicode(mi) metadata = unicode_type(mi)
metadata = '\t'+'\n\t'.join(metadata.split('\n')) metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata, safe_encode=True) prints(metadata, safe_encode=True)
if lrf is not None: if lrf is not None:

View File

@ -18,6 +18,7 @@ from calibre.utils.imghdr import identify
from calibre import guess_type, guess_all_extensions, prints, force_unicode from calibre import guess_type, guess_all_extensions, prints, force_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn from calibre.ebooks.metadata import MetaInformation, check_isbn
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
NAMESPACES = { NAMESPACES = {
@ -26,7 +27,7 @@ NAMESPACES = {
'xlink' : 'http://www.w3.org/1999/xlink' 'xlink' : 'http://www.w3.org/1999/xlink'
} }
tostring = partial(etree.tostring, method='text', encoding=unicode) tostring = partial(etree.tostring, method='text', encoding=unicode_type)
def XLINK(tag): def XLINK(tag):
@ -112,9 +113,9 @@ def get_metadata(stream):
# fallback for book_title # fallback for book_title
if book_title: if book_title:
book_title = unicode(book_title) book_title = unicode_type(book_title)
else: else:
book_title = force_unicode(os.path.splitext( book_title = force_unicode_type(os.path.splitext(
os.path.basename(getattr(stream, 'name', os.path.basename(getattr(stream, 'name',
_('Unknown'))))[0]) _('Unknown'))))[0])
mi = MetaInformation(book_title, authors) mi = MetaInformation(book_title, authors)
@ -249,7 +250,7 @@ def _parse_tags(root, mi, ctx):
# -- i18n Translations-- ? # -- i18n Translations-- ?
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root) tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
if tags: if tags:
mi.tags = list(map(unicode, tags)) mi.tags = list(map(unicode_type, tags))
break break
@ -447,7 +448,7 @@ def ensure_namespace(doc):
break break
if bare_tags: if bare_tags:
import re import re
raw = etree.tostring(doc, encoding=unicode) raw = etree.tostring(doc, encoding=unicode_type)
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw) raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
doc = etree.fromstring(raw) doc = etree.fromstring(raw)
return doc return doc

View File

@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys import sys
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG'] MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
@ -43,6 +44,6 @@ def get_metadata(stream):
if category: if category:
mi.category = category mi.category = category
except Exception as err: except Exception as err:
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err)) msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr) print(msg.encode('utf8'), file=sys.stderr)
return mi return mi

View File

@ -14,11 +14,12 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre import browser from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import codepoint_to_chr, unicode_type
URL = \ URL = \
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping=" "http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033]) _ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
def get_series(title, authors, timeout=60): def get_series(title, authors, timeout=60):
@ -28,7 +29,7 @@ def get_series(title, authors, timeout=60):
title = re.sub(r'^(A|The|An)\s+', '', title).strip() title = re.sub(r'^(A|The|An)\s+', '', title).strip()
if not title: if not title:
return mi return mi
if isinstance(title, unicode): if isinstance(title, unicode_type):
title = title.encode('utf-8') title = title.encode('utf-8')
title = urllib.quote_plus(title) title = urllib.quote_plus(title)
@ -73,7 +74,7 @@ def get_series(title, authors, timeout=60):
mi.series = series mi.series = series
ns = ss.nextSibling ns = ss.nextSibling
if ns.contents: if ns.contents:
raw = unicode(ns.contents[0]) raw = unicode_type(ns.contents[0])
raw = raw.partition('.')[0].strip() raw = raw.partition('.')[0].strip()
try: try:
mi.series_index = int(raw) mi.series_index = int(raw)
@ -85,4 +86,3 @@ def get_series(title, authors, timeout=60):
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
print(get_series(sys.argv[-2], [sys.argv[-1]])) print(get_series(sys.argv[-2], [sys.argv[-1]]))

View File

@ -18,6 +18,7 @@ from calibre.utils.config_base import tweaks
from calibre.utils.date import parse_only_date from calibre.utils.date import parse_only_date
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
from calibre.utils.imghdr import identify from calibre.utils.imghdr import identify
from polyglot.builtins import unicode_type
class InvalidKFX(ValueError): class InvalidKFX(ValueError):
@ -356,4 +357,4 @@ if __name__ == '__main__':
from calibre import prints from calibre import prints
with open(sys.argv[-1], 'rb') as f: with open(sys.argv[-1], 'rb') as f:
mi = read_metadata_kfx(f) mi = read_metadata_kfx(f)
prints(unicode(mi)) prints(unicode_type(mi))

View File

@ -21,6 +21,7 @@ from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.utils.date import now as nowf from calibre.utils.date import now as nowf
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
from polyglot.builtins import unicode_type
def is_image(ss): def is_image(ss):
@ -223,7 +224,7 @@ class MetadataUpdater(object):
def create_exth(self, new_title=None, exth=None): def create_exth(self, new_title=None, exth=None):
# Add an EXTH block to record 0, rewrite the stream # Add an EXTH block to record 0, rewrite the stream
if isinstance(new_title, unicode): if isinstance(new_title, unicode_type):
new_title = new_title.encode(self.codec, 'replace') new_title = new_title.encode(self.codec, 'replace')
# Fetch the existing title # Fetch the existing title

View File

@ -25,6 +25,7 @@ from calibre.utils.localization import get_lang, canonicalize_lang
from calibre import prints, guess_type from calibre import prints, guess_type
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
from polyglot.builtins import unicode_type
pretty_print_opf = False pretty_print_opf = False
@ -82,7 +83,7 @@ class Resource(object): # {{{
self._href = href_or_path self._href = href_or_path
else: else:
pc = url[2] pc = url[2]
if isinstance(pc, unicode): if isinstance(pc, unicode_type):
pc = pc.encode('utf-8') pc = pc.encode('utf-8')
pc = pc.decode('utf-8') pc = pc.decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@ -103,7 +104,7 @@ class Resource(object): # {{{
basedir = os.getcwdu() basedir = os.getcwdu()
if self.path is None: if self.path is None:
return self._href return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
frag = '#'+f if self.fragment else '' frag = '#'+f if self.fragment else ''
if self.path == basedir: if self.path == basedir:
return ''+frag return ''+frag
@ -111,7 +112,7 @@ class Resource(object): # {{{
rpath = os.path.relpath(self.path, basedir) rpath = os.path.relpath(self.path, basedir)
except ValueError: # On windows path and basedir could be on different drives except ValueError: # On windows path and basedir could be on different drives
rpath = self.path rpath = self.path
if isinstance(rpath, unicode): if isinstance(rpath, unicode_type):
rpath = rpath.encode('utf-8') rpath = rpath.encode('utf-8')
return rpath.replace(os.sep, '/')+frag return rpath.replace(os.sep, '/')+frag
@ -206,10 +207,10 @@ class ManifestItem(Resource): # {{{
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type) return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
def __str__(self): def __str__(self):
return unicode(self).encode('utf-8') return unicode_type(self).encode('utf-8')
def __repr__(self): def __repr__(self):
return unicode(self) return unicode_type(self)
def __getitem__(self, index): def __getitem__(self, index):
if index == 0: if index == 0:
@ -410,7 +411,7 @@ class Guide(ResourceCollection): # {{{
class MetadataField(object): class MetadataField(object):
def __init__(self, name, is_dc=True, formatter=None, none_is=None, def __init__(self, name, is_dc=True, formatter=None, none_is=None,
renderer=lambda x: unicode(x)): renderer=lambda x: unicode_type(x)):
self.name = name self.name = name
self.is_dc = is_dc self.is_dc = is_dc
self.formatter = formatter self.formatter = formatter
@ -791,7 +792,7 @@ class OPF(object): # {{{
def unquote_urls(self): def unquote_urls(self):
def get_href(item): def get_href(item):
raw = unquote(item.get('href', '')) raw = unquote(item.get('href', ''))
if not isinstance(raw, unicode): if not isinstance(raw, unicode_type):
raw = raw.decode('utf-8') raw = raw.decode('utf-8')
return raw return raw
for item in self.itermanifest(): for item in self.itermanifest():
@ -820,7 +821,7 @@ class OPF(object): # {{{
titles = () titles = ()
if val: if val:
title = titles[0] if titles else self.create_metadata_element('title') title = titles[0] if titles else self.create_metadata_element('title')
title.text = re.sub(r'\s+', ' ', unicode(val)) title.text = re.sub(r'\s+', ' ', unicode_type(val))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -869,7 +870,7 @@ class OPF(object): # {{{
for key in matches[0].attrib: for key in matches[0].attrib:
if key.endswith('file-as'): if key.endswith('file-as'):
matches[0].attrib.pop(key) matches[0].attrib.pop(key)
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode(val)) matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode_type(val))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -889,7 +890,7 @@ class OPF(object): # {{{
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in val: for tag in val:
elem = self.create_metadata_element('subject') elem = self.create_metadata_element('subject')
self.set_text(elem, unicode(tag)) self.set_text(elem, unicode_type(tag))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -900,7 +901,7 @@ class OPF(object): # {{{
ans = None ans = None
for match in self.pubdate_path(self.metadata): for match in self.pubdate_path(self.metadata):
try: try:
val = parse_date(etree.tostring(match, encoding=unicode, val = parse_date(etree.tostring(match, encoding=unicode_type,
method='text', with_tail=False).strip()) method='text', with_tail=False).strip())
except: except:
continue continue
@ -912,7 +913,7 @@ class OPF(object): # {{{
least_val = least_elem = None least_val = least_elem = None
for match in self.pubdate_path(self.metadata): for match in self.pubdate_path(self.metadata):
try: try:
cval = parse_date(etree.tostring(match, encoding=unicode, cval = parse_date(etree.tostring(match, encoding=unicode_type,
method='text', with_tail=False).strip()) method='text', with_tail=False).strip())
except: except:
match.getparent().remove(match) match.getparent().remove(match)
@ -962,7 +963,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'} attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
matches = [self.create_metadata_element('identifier', matches = [self.create_metadata_element('identifier',
attrib=attrib)] attrib=attrib)]
self.set_text(matches[0], unicode(val)) self.set_text(matches[0], unicode_type(val))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -975,7 +976,7 @@ class OPF(object): # {{{
for attr, val in x.attrib.iteritems(): for attr, val in x.attrib.iteritems():
if attr.endswith('scheme'): if attr.endswith('scheme'):
typ = icu_lower(val) typ = icu_lower(val)
val = etree.tostring(x, with_tail=False, encoding=unicode, val = etree.tostring(x, with_tail=False, encoding=unicode_type,
method='text').strip() method='text').strip()
if val and typ not in ('calibre', 'uuid'): if val and typ not in ('calibre', 'uuid'):
if typ == 'isbn' and val.lower().startswith('urn:isbn:'): if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
@ -984,7 +985,7 @@ class OPF(object): # {{{
found_scheme = True found_scheme = True
break break
if not found_scheme: if not found_scheme:
val = etree.tostring(x, with_tail=False, encoding=unicode, val = etree.tostring(x, with_tail=False, encoding=unicode_type,
method='text').strip() method='text').strip()
if val.lower().startswith('urn:isbn:'): if val.lower().startswith('urn:isbn:'):
val = check_isbn(val.split(':')[-1]) val = check_isbn(val.split(':')[-1])
@ -1017,7 +1018,7 @@ class OPF(object): # {{{
for typ, val in identifiers.iteritems(): for typ, val in identifiers.iteritems():
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()} attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
self.set_text(self.create_metadata_element( self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode(val)) 'identifier', attrib=attrib), unicode_type(val))
@dynamic_property @dynamic_property
def application_id(self): def application_id(self):
@ -1041,7 +1042,7 @@ class OPF(object): # {{{
if uuid_id and uuid_id in removed_ids: if uuid_id and uuid_id in removed_ids:
attrib['id'] = uuid_id attrib['id'] = uuid_id
self.set_text(self.create_metadata_element( self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode(val)) 'identifier', attrib=attrib), unicode_type(val))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -1058,7 +1059,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'} attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
matches = [self.create_metadata_element('identifier', matches = [self.create_metadata_element('identifier',
attrib=attrib)] attrib=attrib)]
self.set_text(matches[0], unicode(val)) self.set_text(matches[0], unicode_type(val))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -1095,7 +1096,7 @@ class OPF(object): # {{{
for lang in val: for lang in val:
l = self.create_metadata_element('language') l = self.create_metadata_element('language')
self.set_text(l, unicode(lang)) self.set_text(l, unicode_type(lang))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@ -1118,7 +1119,7 @@ class OPF(object): # {{{
if not matches: if not matches:
matches = [self.create_metadata_element('contributor')] matches = [self.create_metadata_element('contributor')]
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp') matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
self.set_text(matches[0], unicode(val)) self.set_text(matches[0], unicode_type(val))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
def identifier_iter(self): def identifier_iter(self):
@ -1701,7 +1702,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
metadata[-1].tail = '\n' +(' '*4) metadata[-1].tail = '\n' +(' '*4)
if mi.cover: if mi.cover:
if not isinstance(mi.cover, unicode): if not isinstance(mi.cover, unicode_type):
mi.cover = mi.cover.decode(filesystem_encoding) mi.cover = mi.cover.decode(filesystem_encoding)
guide.text = '\n'+(' '*8) guide.text = '\n'+(' '*8)
r = guide.makeelement(OPF('reference'), r = guide.makeelement(OPF('reference'),

View File

@ -12,6 +12,7 @@ from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import ( from calibre.ebooks.metadata import (
MetaInformation, string_to_authors, check_isbn, check_doi) MetaInformation, string_to_authors, check_isbn, check_doi)
from calibre.utils.ipc.simple_worker import fork_job, WorkerError from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from polyglot.builtins import unicode_type
def get_tools(): def get_tools():
@ -88,8 +89,8 @@ def page_images(pdfpath, outputdir, first=1, last=1):
import win32process as w import win32process as w
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
try: try:
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode(first), subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode_type(first),
'-l', unicode(last), pdfpath, '-l', unicode_type(last), pdfpath,
os.path.join(outputdir, 'page-images')], **args) os.path.join(outputdir, 'page-images')], **args)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode) raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)

View File

@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys, struct import sys, struct
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00' MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
@ -47,9 +48,7 @@ def get_metadata(stream):
mi.author = value mi.author = value
mi.authors = string_to_authors(value) mi.authors = string_to_authors(value)
except Exception as err: except Exception as err:
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err)) msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr) print(msg.encode('utf8'), file=sys.stderr)
raise raise
return mi return mi

View File

@ -8,6 +8,7 @@ import re, cStringIO, codecs
from calibre import force_unicode from calibre import force_unicode
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import codepoint_to_chr, unicode_type
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL) title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL) author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -75,7 +76,7 @@ def detect_codepage(stream):
def encode(unistr): def encode(unistr):
if not isinstance(unistr, unicode): if not isinstance(unistr, unicode_type):
unistr = force_unicode(unistr) unistr = force_unicode(unistr)
return ''.join([str(c) if ord(c) < 128 else '\\u' + str(ord(c)) + '?' for c in unistr]) return ''.join([str(c) if ord(c) < 128 else '\\u' + str(ord(c)) + '?' for c in unistr])
@ -88,7 +89,7 @@ def decode(raw, codec):
raw = raw.decode(codec) raw = raw.decode(codec)
def uni(match): def uni(match):
return unichr(int(match.group(1))) return codepoint_to_chr(int(match.group(1)))
raw = re.sub(r'\\u([0-9]{3,4}).', uni, raw) raw = re.sub(r'\\u([0-9]{3,4}).', uni, raw)
return raw return raw
@ -232,4 +233,3 @@ def set_metadata(stream, options):
stream.truncate() stream.truncate()
stream.write(src) stream.write(src)
stream.write(after) stream.write(after)

View File

@ -15,6 +15,7 @@ from calibre.constants import __appname__, __version__
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_xml_chars from calibre.utils.cleantext import clean_xml_chars
from polyglot.builtins import unicode_type
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/" NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata" CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
@ -194,7 +195,7 @@ class TOC(list):
text = u'' text = u''
for txt in txt_path(nl): for txt in txt_path(nl):
text += etree.tostring(txt, method='text', text += etree.tostring(txt, method='text',
encoding=unicode, with_tail=False) encoding=unicode_type, with_tail=False)
content = content_path(np) content = content_path(np)
if content and text: if content and text:
content = content[0] content = content[0]
@ -229,7 +230,7 @@ class TOC(list):
fragment = fragment.strip() fragment = fragment.strip()
href = href.strip() href = href.strip()
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)]) txt = ''.join([unicode_type(s).strip() for s in a.findAll(text=True)])
add = True add = True
for i in self.flat(): for i in self.flat():
if i.href == href and i.fragment == fragment: if i.href == href and i.fragment == fragment:
@ -264,7 +265,7 @@ class TOC(list):
text = clean_xml_chars(text) text = clean_xml_chars(text)
elem = E.navPoint( elem = E.navPoint(
E.navLabel(E.text(re.sub(r'\s+', ' ', text))), E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
E.content(src=unicode(np.href)+(('#' + unicode(np.fragment)) E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment))
if np.fragment else '')), if np.fragment else '')),
id=item_id, id=item_id,
playOrder=str(np.play_order) playOrder=str(np.play_order)

View File

@ -20,6 +20,7 @@ from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from calibre.ebooks.mobi.debug import format_bytes from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.debug.headers import TextRecord from calibre.ebooks.mobi.debug.headers import TextRecord
from polyglot.builtins import unicode_type
class TagX(object): # {{{ class TagX(object): # {{{
@ -564,7 +565,7 @@ class TBSIndexing(object): # {{{
def get_index(self, idx): def get_index(self, idx):
for i in self.indices: for i in self.indices:
if i.index in {idx, unicode(idx)}: if i.index in {idx, unicode_type(idx)}:
return i return i
raise IndexError('Index %d not found'%idx) raise IndexError('Index %d not found'%idx)
@ -844,5 +845,3 @@ def inspect_mobi(mobi_file, ddir):
# }}} # }}}

View File

@ -16,6 +16,7 @@ from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
from calibre.ebooks.mobi.utils import convert_color_for_font_tag from calibre.ebooks.mobi.utils import convert_color_for_font_tag
from calibre.utils.imghdr import identify from calibre.utils.imghdr import identify
from polyglot.builtins import unicode_type
MBP_NS = 'http://mobipocket.com/ns/mbp' MBP_NS = 'http://mobipocket.com/ns/mbp'
@ -151,7 +152,7 @@ class MobiMLizer(object):
return "%dem" % int(round(ptsize / embase)) return "%dem" % int(round(ptsize / embase))
def preize_text(self, text, pre_wrap=False): def preize_text(self, text, pre_wrap=False):
text = unicode(text) text = unicode_type(text)
if pre_wrap: if pre_wrap:
# Replace n consecutive spaces with n-1 NBSP + space # Replace n consecutive spaces with n-1 NBSP + space
text = re.sub(r' {2,}', lambda m:(u'\xa0'*(len(m.group())-1) + u' '), text) text = re.sub(r' {2,}', lambda m:(u'\xa0'*(len(m.group())-1) + u' '), text)
@ -228,7 +229,7 @@ class MobiMLizer(object):
while vspace > 0: while vspace > 0:
wrapper.addprevious(etree.Element(XHTML('br'))) wrapper.addprevious(etree.Element(XHTML('br')))
vspace -= 1 vspace -= 1
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode)): if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)):
para.attrib['align'] = istate.halign para.attrib['align'] = istate.halign
istate.rendered = True istate.rendered = True
pstate = bstate.istate pstate = bstate.istate

View File

@ -16,6 +16,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
from polyglot.builtins import unicode_type
NULL_INDEX = 0xffffffff NULL_INDEX = 0xffffffff
@ -239,7 +240,7 @@ class BookHeader(object):
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84]) self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None self.exth = None
if not isinstance(self.title, unicode): if not isinstance(self.title, unicode_type):
self.title = self.title.decode(self.codec, 'replace') self.title = self.title.decode(self.codec, 'replace')
if self.exth_flag & 0x40: if self.exth_flag & 0x40:
try: try:

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import re, os import re, os
from calibre.ebooks.chardet import strip_encoding_declarations from calibre.ebooks.chardet import strip_encoding_declarations
from polyglot.builtins import unicode_type
def update_internal_links(mobi8_reader, log): def update_internal_links(mobi8_reader, log):
@ -130,7 +131,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
flows.append(flow) flows.append(flow)
continue continue
if not isinstance(flow, unicode): if not isinstance(flow, unicode_type):
try: try:
flow = flow.decode(mr.header.codec) flow = flow.decode(mr.header.codec)
except UnicodeDecodeError: except UnicodeDecodeError:

Some files were not shown because too many files have changed in this diff Show More