python3: add unicode/unichr wrappers to polyglot

This commit is contained in:
Eli Schwartz 2019-03-10 13:49:54 -04:00 committed by Kovid Goyal
parent 77728a15ef
commit cbc42bec23
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
386 changed files with 2012 additions and 1743 deletions

View File

@ -12,6 +12,7 @@ from functools import partial
from contextlib import closing
from setup import iswindows
from polyglot.builtins import unicode_type
if iswindows:
from ctypes import windll, Structure, POINTER, c_size_t
@ -52,7 +53,7 @@ def run_worker(job, decorate=True):
try:
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
except Exception as err:
return False, human_text, unicode(err)
return False, human_text, unicode_type(err)
stdout, stderr = p.communicate()
if stdout:
stdout = stdout.decode('utf-8')

View File

@ -12,7 +12,7 @@ from itertools import chain
is_ci = os.environ.get('CI', '').lower() == 'true'
from setup import Command, basenames, __appname__, download_securely
from polyglot.builtins import itervalues, iteritems
from polyglot.builtins import codepoint_to_chr, itervalues, iteritems
def get_opts_from_parser(parser):
@ -173,7 +173,7 @@ class Kakasi(Command): # {{{
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:codepoint_to_chr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
from calibre.utils.serialize import msgpack_dumps
with open(dst, 'wb') as f:

View File

@ -13,7 +13,7 @@ from functools import partial
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file
from setup.parallel_build import parallel_check_output
from polyglot.builtins import iteritems
from polyglot.builtins import codepoint_to_chr, iteritems
is_ci = os.environ.get('CI', '').lower() == 'true'
@ -82,7 +82,7 @@ class POT(Command): # {{{
ans = []
for lineno, msg in msgs:
ans.append('#: %s:%d'%(path, lineno))
slash = unichr(92)
slash = codepoint_to_chr(92)
msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
r'\n').replace('\r', r'\r').replace('\t', r'\t')
ans.append('msgid "%s"'%msg)

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, re, time, random, warnings
from polyglot.builtins import builtins
from polyglot.builtins import builtins, codepoint_to_chr, unicode_type
builtins.__dict__['dynamic_property'] = lambda func: func(None)
from math import floor
from functools import partial
@ -77,7 +77,7 @@ def get_types_map():
def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode):
if isinstance(raw, unicode_type):
return raw
return raw.decode(encoding, errors)
@ -113,7 +113,7 @@ def confirm_config_name(name):
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
u'"', u':', u'>', u'+', u'/'] + list(map(codepoint_to_chr, xrange(32))))
def sanitize_file_name(name, substitute='_', as_unicode=False):
@ -126,7 +126,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
are encoded in the filesystem encoding of the platform, or UTF-8.
'''
if isinstance(name, unicode):
if isinstance(name, unicode_type):
name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip()
@ -198,7 +198,7 @@ def prints(*args, **kwargs):
safe_encode = kwargs.get('safe_encode', False)
count = 0
for i, arg in enumerate(args):
if isinstance(arg, unicode):
if isinstance(arg, unicode_type):
if iswindows:
from calibre.utils.terminal import Detect
cs = Detect(file)
@ -222,8 +222,8 @@ def prints(*args, **kwargs):
try:
arg = str(arg)
except ValueError:
arg = unicode(arg)
if isinstance(arg, unicode):
arg = unicode_type(arg)
if isinstance(arg, unicode_type):
try:
arg = arg.encode(enc)
except UnicodeEncodeError:
@ -288,7 +288,7 @@ def load_library(name, cdll):
def filename_to_utf8(name):
'''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
if isinstance(name, unicode):
if isinstance(name, unicode_type):
return name.encode('utf8')
codec = 'cp1252' if iswindows else 'utf8'
return name.decode(codec, 'replace').encode('utf8')
@ -557,7 +557,7 @@ def strftime(fmt, t=None):
else:
ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
if early_year:
ans = ans.replace(u'_early year hack##', unicode(orig_year))
ans = ans.replace(u'_early year hack##', unicode_type(orig_year))
return ans
@ -669,7 +669,7 @@ def force_unicode(obj, enc=preferred_encoding):
def as_unicode(obj, enc=preferred_encoding):
if not isbytestring(obj):
try:
obj = unicode(obj)
obj = unicode_type(obj)
except:
try:
obj = str(obj)

View File

@ -2,12 +2,12 @@
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import print_function
from polyglot.builtins import map
from polyglot.builtins import map, unicode_type
import sys, locale, codecs, os, importlib, collections
__appname__ = u'calibre'
numeric_version = (3, 40, 1)
__version__ = u'.'.join(map(unicode, numeric_version))
__version__ = u'.'.join(map(unicode_type, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
'''
@ -300,7 +300,7 @@ def get_portable_base():
def get_unicode_windows_env_var(name):
getenv = plugins['winutil'][0].getenv
return getenv(unicode(name))
return getenv(unicode_type(name))
def get_windows_username():

View File

@ -7,6 +7,7 @@ import os, sys, zipfile, importlib
from calibre.constants import numeric_version, iswindows, isosx
from calibre.ptempfile import PersistentTemporaryFile
from polyglot.builtins import unicode_type
platform = 'linux'
if iswindows:
@ -195,7 +196,7 @@ class Plugin(object): # {{{
config_dialog.exec_()
if config_dialog.result() == QDialog.Accepted:
sc = unicode(sc.text()).strip()
sc = unicode_type(sc.text()).strip()
customize_plugin(self, sc)
geom = bytearray(config_dialog.saveGeometry())

View File

@ -10,6 +10,7 @@ from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
from calibre.ebooks.html.to_zip import HTML2ZIP
from polyglot.builtins import unicode_type
plugins = []
@ -64,23 +65,23 @@ class TXT2TXTZ(FileTypePlugin):
images = []
# Textile
for m in re.finditer(unicode(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
for m in re.finditer(unicode_type(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown inline
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown reference
refs = {}
for m in re.finditer(unicode(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
for m in re.finditer(unicode_type(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
if m.group('id') and m.group('path'):
refs[m.group('id')] = m.group('path')
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
path = refs.get(m.group('id'), None)
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)

View File

@ -6,6 +6,7 @@ import re, os, shutil
from calibre import CurrentDir
from calibre.customize import Plugin
from polyglot.builtins import unicode_type
class ConversionOption(object):
@ -79,7 +80,7 @@ class OptionRecommendation(object):
self.option.choices:
raise ValueError('OpRec: %s: Recommended value not in choices'%
self.option.name)
if not (isinstance(self.recommended_value, (int, float, str, unicode)) or self.recommended_value is None):
if not (isinstance(self.recommended_value, (int, float, str, unicode_type)) or self.recommended_value is None):
raise ValueError('OpRec: %s:'%self.option.name + repr(
self.recommended_value) + ' is not a string or a number')
@ -340,7 +341,7 @@ class OutputFormatPlugin(Plugin):
@property
def is_periodical(self):
return self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
unicode_type(self.oeb.metadata.publication_type[0]).startswith('periodical:')
def specialize_options(self, log, opts, input_fmt):
'''

View File

@ -2,7 +2,7 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from polyglot.builtins import map
from polyglot.builtins import map, unicode_type
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -216,7 +216,7 @@ class PluginLoader(object):
if ans.minimum_calibre_version > numeric_version:
raise InvalidPlugin(
'The plugin at %s needs a version of calibre >= %s' %
(as_unicode(path_to_zip_file), '.'.join(map(unicode,
(as_unicode(path_to_zip_file), '.'.join(map(unicode_type,
ans.minimum_calibre_version))))
if platform not in ans.supported_platforms:
@ -231,7 +231,7 @@ class PluginLoader(object):
raise
def _locate_code(self, zf, path_to_zip_file):
names = [x if isinstance(x, unicode) else x.decode('utf-8') for x in
names = [x if isinstance(x, unicode_type) else x.decode('utf-8') for x in
zf.namelist()]
names = [x[1:] if x[0] == '/' else x for x in names]

View File

@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, time, re
from collections import defaultdict
from polyglot.builtins import map
from polyglot.builtins import map, unicode_type
from contextlib import contextmanager
from functools import partial
@ -69,7 +69,7 @@ def metadata_extensions():
# but not actually added)
global _metadata_extensions
if _metadata_extensions is None:
_metadata_extensions = frozenset(map(unicode, BOOK_EXTENSIONS)) | {'opf'}
_metadata_extensions = frozenset(map(unicode_type, BOOK_EXTENSIONS)) | {'opf'}
return _metadata_extensions
@ -143,7 +143,7 @@ def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(
for path in listdir_impl(dirpath, sort_by_mtime=True):
key, ext = splitext(path)
if allow_path(path, ext, compiled_rules):
books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path
books[icu_lower(key) if isinstance(key, unicode_type) else key.lower()][ext] = path
for formats in books.itervalues():
if formats_ok(formats):

View File

@ -12,7 +12,7 @@ import os, shutil, uuid, json, glob, time, hashlib, errno, sys
from functools import partial
import apsw
from polyglot.builtins import reraise
from polyglot.builtins import unicode_type, reraise
from calibre import isbytestring, force_unicode, prints, as_unicode
from calibre.constants import (iswindows, filesystem_encoding,
@ -93,7 +93,7 @@ class DBPrefs(dict): # {{{
dict.__setitem__(self, key, val)
def raw_to_object(self, raw):
if not isinstance(raw, unicode):
if not isinstance(raw, unicode_type):
raw = raw.decode(preferred_encoding)
return json.loads(raw, object_hook=from_json)
@ -561,10 +561,10 @@ class DB(object):
prints('found user category case overlap', catmap[uc])
cat = catmap[uc][0]
suffix = 1
while icu_lower((cat + unicode(suffix))) in catmap:
while icu_lower((cat + unicode_type(suffix))) in catmap:
suffix += 1
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
user_cats[cat + unicode(suffix)] = user_cats[cat]
prints('Renaming user category %s to %s'%(cat, cat+unicode_type(suffix)))
user_cats[cat + unicode_type(suffix)] = user_cats[cat]
del user_cats[cat]
cats_changed = True
if cats_changed:
@ -670,23 +670,23 @@ class DB(object):
if d['is_multiple']:
if x is None:
return []
if isinstance(x, (str, unicode, bytes)):
if isinstance(x, (str, unicode_type, bytes)):
x = x.split(d['multiple_seps']['ui_to_list'])
x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
unicode) else y for y in x]
unicode_type) else y for y in x]
return [u' '.join(y.split()) for y in x]
else:
return x if x is None or isinstance(x, unicode) else \
return x if x is None or isinstance(x, unicode_type) else \
x.decode(preferred_encoding, 'replace')
def adapt_datetime(x, d):
if isinstance(x, (str, unicode, bytes)):
if isinstance(x, (str, unicode_type, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False)
return x
def adapt_bool(x, d):
if isinstance(x, (str, unicode, bytes)):
if isinstance(x, (str, unicode_type, bytes)):
x = x.lower()
if x == 'true':
x = True
@ -707,7 +707,7 @@ class DB(object):
def adapt_number(x, d):
if x is None:
return None
if isinstance(x, (str, unicode, bytes)):
if isinstance(x, (str, unicode_type, bytes)):
if x.lower() == 'none':
return None
if d['datatype'] == 'int':
@ -1239,7 +1239,7 @@ class DB(object):
return self._library_id_
def fset(self, val):
self._library_id_ = unicode(val)
self._library_id_ = unicode_type(val)
self.execute('''
DELETE FROM library_id;
INSERT INTO library_id (uuid) VALUES (?);
@ -1715,7 +1715,7 @@ class DB(object):
[(book_id, fmt.upper()) for book_id in book_ids])
def set_conversion_options(self, options, fmt):
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode) else data)))
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data)))
for book_id, data in options.iteritems()]
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
@ -1754,7 +1754,7 @@ class DB(object):
copyfile_using_links(src, dest, dest_is_dir=False)
old_files.add(src)
x = path_map[x]
if not isinstance(x, unicode):
if not isinstance(x, unicode_type):
x = x.decode(filesystem_encoding, 'replace')
progress(x, i+1, total)

View File

@ -11,7 +11,7 @@ import os, traceback, random, shutil, operator
from io import BytesIO
from collections import defaultdict, Set, MutableSet
from functools import wraps, partial
from polyglot.builtins import zip
from polyglot.builtins import unicode_type, zip
from time import time
from calibre import isbytestring, as_unicode
@ -528,14 +528,14 @@ class Cache(object):
@read_api
def get_item_id(self, field, item_name):
' Return the item id for item_name (case-insensitive) '
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode) else item_name, None)
rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode_type) else item_name, None)
@read_api
def get_item_ids(self, field, item_names):
' Return the item id for item_name (case-insensitive) '
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode) else name, None) for name in item_names}
rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode_type) else name, None) for name in item_names}
@read_api
def author_data(self, author_ids=None):

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import copy
from functools import partial
from polyglot.builtins import map
from polyglot.builtins import unicode_type, map
from calibre.ebooks.metadata import author_to_author_sort
from calibre.utils.config_base import tweaks
@ -47,7 +47,7 @@ class Tag(object):
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
def __str__(self):
return unicode(self).encode('utf-8')
return unicode_type(self).encode('utf-8')
def __repr__(self):
return str(self)
@ -101,8 +101,8 @@ def clean_user_categories(dbcache):
if len(comps) == 0:
i = 1
while True:
if unicode(i) not in user_cats:
new_cats[unicode(i)] = user_cats[k]
if unicode_type(i) not in user_cats:
new_cats[unicode_type(i)] = user_cats[k]
break
i += 1
else:

View File

@ -10,6 +10,7 @@ from textwrap import TextWrapper
from io import BytesIO
from calibre import prints
from polyglot.builtins import unicode_type
readonly = True
version = 0 # change this if you change signature of implementation()
@ -79,7 +80,7 @@ def do_list(fields, data, opts):
widths = list(map(lambda x: 0, fields))
for i in data:
for j, field in enumerate(fields):
widths[j] = max(widths[j], max(len(field), len(unicode(i[field]))))
widths[j] = max(widths[j], max(len(field), len(unicode_type(i[field]))))
screen_width = geometry()[0]
if not screen_width:
@ -110,7 +111,7 @@ def do_list(fields, data, opts):
for record in data:
text = [
wrappers[i].wrap(unicode(record[field]))
wrappers[i].wrap(unicode_type(record[field]))
for i, field in enumerate(fields)
]
lines = max(map(len, text))
@ -129,7 +130,7 @@ def do_csv(fields, data, opts):
for d in data:
row = [d[f] for f in fields]
csv_print.writerow([
x if isinstance(x, bytes) else unicode(x).encode('utf-8') for x in row
x if isinstance(x, bytes) else unicode_type(x).encode('utf-8') for x in row
])
print(buf.getvalue())
@ -164,11 +165,11 @@ def main(opts, args, dbctx):
is_rating = category_metadata(category)['datatype'] == 'rating'
for tag in category_data[category]:
if is_rating:
tag.name = unicode(len(tag.name))
tag.name = unicode_type(len(tag.name))
data.append({
'category': category,
'tag_name': tag.name,
'count': unicode(tag.count),
'count': unicode_type(tag.count),
'rating': fmtr(tag.avg_rating),
})
else:
@ -176,7 +177,7 @@ def main(opts, args, dbctx):
data.append({
'category': category,
'tag_name': _('CATEGORY ITEMS'),
'count': unicode(len(category_data[category])),
'count': unicode_type(len(category_data[category])),
'rating': ''
})

View File

@ -11,6 +11,7 @@ from calibre.ebooks.metadata.book.base import field_from_string
from calibre.ebooks.metadata.book.serialize import read_cover
from calibre.ebooks.metadata.opf import get_metadata
from calibre.srv.changes import metadata
from polyglot.builtins import unicode_type
readonly = False
version = 0 # change this if you change signature of implementation()
@ -181,5 +182,5 @@ def main(opts, args, dbctx):
if not final_mi:
raise SystemExit(_('No book with id: %s in the database') % book_id)
prints(unicode(final_mi))
prints(unicode_type(final_mi))
return 0

View File

@ -9,6 +9,7 @@ import sys
from calibre import prints
from calibre.ebooks.metadata.opf2 import OPFCreator
from polyglot.builtins import unicode_type
readonly = True
version = 0 # change this if you change signature of implementation()
@ -52,6 +53,6 @@ def main(opts, args, dbctx):
mi = OPFCreator(os.getcwdu(), mi)
mi.render(sys.stdout)
else:
prints(unicode(mi))
prints(unicode_type(mi))
return 0

View File

@ -15,6 +15,7 @@ from copy import deepcopy
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES, ALL_METADATA_FIELDS
from calibre.ebooks.metadata.book.formatter import SafeFormat
from calibre.utils.date import utcnow
from polyglot.builtins import unicode_type
# Lazy format metadata retrieval {{{
'''
@ -46,7 +47,7 @@ class MutableBase(object):
@resolved
def __unicode__(self):
return unicode(self._values)
return unicode_type(self._values)
@resolved
def __len__(self):

View File

@ -11,6 +11,7 @@ import os
from calibre import prints
from calibre.utils.date import isoformat, DEFAULT_DATE
from polyglot.builtins import unicode_type
class SchemaUpgrade(object):
@ -601,7 +602,7 @@ class SchemaUpgrade(object):
id_ = str(id_)
fname = custom_recipe_filename(id_, title)
custom_recipes[id_] = (title, fname)
if isinstance(script, unicode):
if isinstance(script, unicode_type):
script = script.encode('utf-8')
with open(os.path.join(bdir, fname), 'wb') as f:
f.write(script)

View File

@ -19,6 +19,7 @@ from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
from calibre.utils.icu import primary_contains, sort_key
from calibre.utils.localization import lang_map, canonicalize_lang
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
from polyglot.builtins import unicode_type
CONTAINS_MATCH = 0
EQUALS_MATCH = 1
@ -148,7 +149,7 @@ class DateSearch(object): # {{{
if query == 'false':
for v, book_ids in field_iter():
if isinstance(v, (str, unicode)):
if isinstance(v, (str, unicode_type)):
v = parse_date(v)
if v is None or v <= UNDEFINED_DATE:
matches |= book_ids
@ -156,7 +157,7 @@ class DateSearch(object): # {{{
if query == 'true':
for v, book_ids in field_iter():
if isinstance(v, (str, unicode)):
if isinstance(v, (str, unicode_type)):
v = parse_date(v)
if v is not None and v > UNDEFINED_DATE:
matches |= book_ids
@ -198,7 +199,7 @@ class DateSearch(object): # {{{
field_count = query.count('/') + 1
for v, book_ids in field_iter():
if isinstance(v, (str, unicode)):
if isinstance(v, (str, unicode_type)):
v = parse_date(v)
if v is not None and relop(dt_as_local(v), qd, field_count):
matches |= book_ids
@ -407,7 +408,7 @@ class SavedSearchQueries(object): # {{{
return self._db()
def force_unicode(self, x):
if not isinstance(x, unicode):
if not isinstance(x, unicode_type):
x = x.decode(preferred_encoding, 'replace')
return x

View File

@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, errno, cPickle, sys, re
from locale import localeconv
from collections import OrderedDict, namedtuple
from polyglot.builtins import map
from polyglot.builtins import map, unicode_type
from threading import Lock
from calibre import as_unicode, prints
@ -19,7 +19,7 @@ from calibre.utils.localization import canonicalize_lang
def force_to_bool(val):
if isinstance(val, (str, unicode)):
if isinstance(val, (str, unicode_type)):
try:
val = icu_lower(val)
if not val:

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import weakref, operator
from functools import partial
from itertools import izip, imap
from polyglot.builtins import map
from polyglot.builtins import map, unicode_type
from calibre.ebooks.metadata import title_sort
from calibre.utils.config_base import tweaks, prefs
@ -374,7 +374,7 @@ class View(object):
self.marked_ids = dict.fromkeys(id_dict, u'true')
else:
# Ensure that all the items in the dict are text
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode,
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode_type,
id_dict.itervalues())))
# This invalidates all searches in the cache even though the cache may
# be shared by multiple views. This is not ideal, but...
@ -432,4 +432,3 @@ class View(object):
self._map_filtered = ids + self._map_filtered
if prefs['mark_new_books']:
self.toggle_marked_ids(ids)

View File

@ -10,18 +10,15 @@ __docformat__ = 'restructuredtext en'
import re
from functools import partial
from datetime import datetime
from polyglot.builtins import zip
from polyglot.builtins import unicode_type, zip
from calibre.constants import preferred_encoding, ispy3
from calibre.constants import preferred_encoding
from calibre.ebooks.metadata import author_to_author_sort, title_sort
from calibre.utils.date import (
parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined)
from calibre.utils.localization import canonicalize_lang
from calibre.utils.icu import strcmp
if ispy3:
unicode = str
# Convert data into values suitable for the db {{{
@ -32,7 +29,7 @@ def sqlite_datetime(x):
def single_text(x):
if x is None:
return x
if not isinstance(x, unicode):
if not isinstance(x, unicode_type):
x = x.decode(preferred_encoding, 'replace')
x = x.strip()
return x if x else None
@ -60,7 +57,7 @@ def multiple_text(sep, ui_sep, x):
return ()
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace')
if isinstance(x, unicode):
if isinstance(x, unicode_type):
x = x.split(sep)
else:
x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
@ -72,7 +69,7 @@ def multiple_text(sep, ui_sep, x):
def adapt_datetime(x):
if isinstance(x, (unicode, bytes)):
if isinstance(x, (unicode_type, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False)
if x and is_date_undefined(x):
x = UNDEFINED_DATE
@ -80,7 +77,7 @@ def adapt_datetime(x):
def adapt_date(x):
if isinstance(x, (unicode, bytes)):
if isinstance(x, (unicode_type, bytes)):
x = parse_only_date(x)
if x is None or is_date_undefined(x):
x = UNDEFINED_DATE
@ -90,14 +87,14 @@ def adapt_date(x):
def adapt_number(typ, x):
if x is None:
return None
if isinstance(x, (unicode, bytes)):
if isinstance(x, (unicode_type, bytes)):
if not x or x.lower() == 'none':
return None
return typ(x)
def adapt_bool(x):
if isinstance(x, (unicode, bytes)):
if isinstance(x, (unicode_type, bytes)):
x = x.lower()
if x == 'true':
x = True

View File

@ -14,6 +14,7 @@ import sys
from calibre.devices.usbms.driver import USBMS
from calibre.ebooks.metadata import string_to_authors
from polyglot.builtins import unicode_type
class JETBOOK(USBMS):
@ -64,7 +65,7 @@ class JETBOOK(USBMS):
def check_unicode(txt):
txt = txt.replace('_', ' ')
if not isinstance(txt, unicode):
if not isinstance(txt, unicode_type):
return txt.decode(sys.getfilesystemencoding(), 'replace')
return txt

View File

@ -15,6 +15,7 @@ from calibre.constants import DEBUG
from calibre.devices.kindle.bookmark import Bookmark
from calibre.devices.usbms.driver import USBMS
from calibre import strftime, fsync, prints
from polyglot.builtins import unicode_type
'''
Notes on collections:
@ -113,7 +114,7 @@ class KINDLE(USBMS):
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
if match is not None:
mi.title = match.group('title')
if not isinstance(mi.title, unicode):
if not isinstance(mi.title, unicode_type):
mi.title = mi.title.decode(sys.getfilesystemencoding(),
'replace')
return mi
@ -291,9 +292,9 @@ class KINDLE(USBMS):
hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify())
mi.comments += unicode_type(user_notes_soup.prettify())
else:
mi.comments = unicode(user_notes_soup.prettify())
mi.comments = unicode_type(user_notes_soup.prettify())
# Update library comments
db.set_comment(db_id, mi.comments)
@ -547,7 +548,7 @@ class KINDLE2(KINDLE):
cust_col_name = opts.extra_customization[self.OPT_APNX_METHOD_COL]
if cust_col_name:
try:
temp = unicode(metadata.get(cust_col_name)).lower()
temp = unicode_type(metadata.get(cust_col_name)).lower()
if temp in self.EXTRA_CUSTOMIZATION_CHOICES[self.OPT_APNX_METHOD]:
method = temp
else:

View File

@ -14,6 +14,7 @@ from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.config_base import prefs
from calibre.devices.usbms.driver import debug_print
from calibre.ebooks.metadata import author_to_author_sort
from polyglot.builtins import unicode_type
class Book(Book_):
@ -95,7 +96,7 @@ class Book(Book_):
ans = [u"Kobo metadata:"]
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
if self.contentID:
fmt('Content ID', self.contentID)

View File

@ -32,6 +32,7 @@ from calibre import prints, fsync
from calibre.ptempfile import PersistentTemporaryFile
from calibre.constants import DEBUG
from calibre.utils.config_base import prefs
from polyglot.builtins import unicode_type
EPUB_EXT = '.epub'
KEPUB_EXT = '.kepub'
@ -43,7 +44,7 @@ def qhash(inputstr):
instr = b""
if isinstance(inputstr, bytes):
instr = inputstr
elif isinstance(inputstr, unicode):
elif isinstance(inputstr, unicode_type):
instr = inputstr.encode("utf8")
else:
return -1
@ -1323,9 +1324,9 @@ class KOBO(USBMS):
hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify())
mi.comments += unicode_type(user_notes_soup.prettify())
else:
mi.comments = unicode(user_notes_soup.prettify())
mi.comments = unicode_type(user_notes_soup.prettify())
# Update library comments
db.set_comment(db_id, mi.comments)
@ -1824,7 +1825,7 @@ class KOBOTOUCH(KOBO):
bookshelves.append(row['ShelfName'])
cursor.close()
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode(count_bookshelves))
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode_type(count_bookshelves))
return bookshelves
self.debug_index = 0
@ -2394,7 +2395,7 @@ class KOBOTOUCH(KOBO):
if self.manage_collections:
if collections:
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode(len(collections)))
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode_type(len(collections)))
# Need to reset the collections outside the particular loops
# otherwise the last item will not be removed
@ -2834,7 +2835,7 @@ class KOBOTOUCH(KOBO):
# count_bookshelves = i + 1
cursor.close()
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode(count_bookshelves))
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode_type(count_bookshelves))
return bookshelves
@ -2918,7 +2919,7 @@ class KOBOTOUCH(KOBO):
cursor.execute(addquery, add_values)
elif result['_IsDeleted'] == 'true':
debug_print("KoboTouch:check_for_bookshelf - Shelf '%s' is deleted - undeleting. result['_IsDeleted']='%s'" % (
bookshelf_name, unicode(result['_IsDeleted'])))
bookshelf_name, unicode_type(result['_IsDeleted'])))
cursor.execute(updatequery, test_values)
cursor.close()

View File

@ -16,6 +16,7 @@ from calibre.gui2.device_drivers.tabbed_device_config import TabbedDeviceConfig,
from calibre.devices.usbms.driver import debug_print
from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.template_dialog import TemplateDialog
from polyglot.builtins import unicode_type
def wrap_msg(msg):
@ -122,7 +123,7 @@ class KOBOTOUCHConfig(TabbedDeviceConfig):
p['support_newer_firmware'] = self.support_newer_firmware
p['debugging_title'] = self.debugging_title
p['driver_version'] = '.'.join([unicode(i) for i in self.device.version])
p['driver_version'] = '.'.join([unicode_type(i) for i in self.device.version])
return p
@ -397,7 +398,7 @@ class AdvancedGroupBox(DeviceOptionsGroupBox):
'to perform full read-write functionality - Here be Dragons!! '
'Enable only if you are comfortable with restoring your kobo '
'to factory defaults and testing software. '
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode(
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode_type(
device.supported_dbversion), device.get_pref('support_newer_firmware')
)
@ -555,7 +556,7 @@ class TemplateConfig(QWidget): # {{{
@property
def template(self):
return unicode(self.t.text()).strip()
return unicode_type(self.t.text()).strip()
@template.setter
def template(self, template):
@ -577,7 +578,7 @@ class TemplateConfig(QWidget): # {{{
except Exception as err:
error_dialog(self, _('Invalid template'),
'<p>'+_('The template "%s" is invalid:')%tmpl +
'<br>'+unicode(err), show=True)
'<br>'+unicode_type(err), show=True)
return False
# }}}

View File

@ -18,6 +18,7 @@ from calibre.devices.mtp.base import debug
from calibre.devices.mtp.defaults import DeviceDefaults
from calibre.ptempfile import SpooledTemporaryFile, PersistentTemporaryDirectory
from calibre.utils.filenames import shorten_components_to
from polyglot.builtins import unicode_type
BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%(
'windows' if iswindows else 'unix')).MTP_DEVICE
@ -75,7 +76,7 @@ class MTP_DEVICE(BASE):
def is_folder_ignored(self, storage_or_storage_id, path,
ignored_folders=None):
storage_id = unicode(getattr(storage_or_storage_id, 'object_id',
storage_id = unicode_type(getattr(storage_or_storage_id, 'object_id',
storage_or_storage_id))
lpath = tuple(icu_lower(name) for name in path)
if ignored_folders is None:
@ -166,14 +167,14 @@ class MTP_DEVICE(BASE):
traceback.print_exc()
dinfo = {}
if dinfo.get('device_store_uuid', None) is None:
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
if dinfo.get('device_name', None) is None:
dinfo['device_name'] = self.current_friendly_name
if name is not None:
dinfo['device_name'] = name
dinfo['location_code'] = location_code
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
dinfo['date_last_connected'] = isoformat(now())
dinfo['mtp_prefix'] = storage.storage_prefix
raw = json.dumps(dinfo, default=to_json)

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import weakref, sys, json
from collections import deque
from operator import attrgetter
from polyglot.builtins import map
from polyglot.builtins import map, unicode_type
from datetime import datetime
from calibre import human_readable, prints, force_unicode
@ -74,7 +74,7 @@ class FileOrFolder(object):
def __repr__(self):
name = 'Folder' if self.is_folder else 'File'
try:
path = unicode(self.full_path)
path = unicode_type(self.full_path)
except:
path = ''
datum = 'size=%s'%(self.size)
@ -250,5 +250,3 @@ class FilesystemCache(object):
return self.id_map[object_id]
except KeyError:
raise ValueError('No object found with MTP path: %s'%path)

View File

@ -17,6 +17,7 @@ from calibre.constants import plugins, islinux, isosx, ispy3
from calibre.ptempfile import SpooledTemporaryFile
from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice, OpenActionNeeded
from calibre.devices.mtp.base import MTPDeviceBase, synchronous, debug
from polyglot.builtins import unicode_type
MTPDevice = namedtuple('MTPDevice', 'busnum devnum vendor_id product_id '
'bcd serial manufacturer product')
@ -321,7 +322,7 @@ class MTP_DEVICE(MTPDeviceBase):
storage.append({'id':sid, 'size':capacity,
'is_folder':True, 'name':name, 'can_delete':False,
'is_system':True})
self._currently_getting_sid = unicode(sid)
self._currently_getting_sid = unicode_type(sid)
items, errs = self.dev.get_filesystem(sid,
partial(self._filesystem_callback, {}))
all_items.extend(items), all_errs.extend(errs)
@ -373,7 +374,7 @@ class MTP_DEVICE(MTPDeviceBase):
e = parent.folder_named(name)
if e is not None:
return e
ename = name.encode('utf-8') if isinstance(name, unicode) else name
ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
sid, pid = parent.storage_id, parent.object_id
if pid == sid:
pid = 0
@ -396,7 +397,7 @@ class MTP_DEVICE(MTPDeviceBase):
raise ValueError('Cannot upload file %s, it already exists'%(
e.full_path,))
self.delete_file_or_folder(e)
ename = name.encode('utf-8') if isinstance(name, unicode) else name
ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
sid, pid = parent.storage_id, parent.object_id
if pid == sid:
pid = 0xFFFFFFFF

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import time, threading, traceback
from functools import wraps, partial
from polyglot.builtins import zip
from polyglot.builtins import unicode_type, zip
from itertools import chain
from calibre import as_unicode, prints, force_unicode
@ -264,7 +264,7 @@ class MTP_DEVICE(MTPDeviceBase):
break
storage = {'id':storage_id, 'size':capacity, 'name':name,
'is_folder':True, 'can_delete':False, 'is_system':True}
self._currently_getting_sid = unicode(storage_id)
self._currently_getting_sid = unicode_type(storage_id)
id_map = self.dev.get_filesystem(storage_id, partial(
self._filesystem_callback, {}))
for x in id_map.itervalues():
@ -441,5 +441,3 @@ class MTP_DEVICE(MTPDeviceBase):
ans = self.dev.put_file(pid, name, stream, size, callback)
ans['storage_id'] = sid
return parent.add_child(ans)

View File

@ -24,6 +24,7 @@ from calibre.devices.usbms.books import CollectionsBookList
from calibre.devices.usbms.books import BookList
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
from calibre.constants import islinux
from polyglot.builtins import unicode_type
DBPATH = 'Sony_Reader/database/books.db'
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
@ -170,7 +171,7 @@ class PRST1(USBMS):
with closing(sqlite.connect(dbpath)) as connection:
# Replace undecodable characters in the db instead of erroring out
connection.text_factory = lambda x: unicode(x, "utf-8", "replace")
connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace")
cursor = connection.cursor()
# Query collections

View File

@ -38,6 +38,7 @@ from calibre.utils.filenames import ascii_filename as sanitize, shorten_componen
from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
unpublish_zeroconf, get_all_ips)
from calibre.utils.socket_inheritance import set_socket_inherit
from polyglot.builtins import unicode_type
def synchronous(tlockname):
@ -397,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if isinstance(a, dict):
printable = {}
for k,v in a.iteritems():
if isinstance(v, (str, unicode)) and len(v) > 50:
if isinstance(v, (str, unicode_type)) and len(v) > 50:
printable[k] = 'too long'
else:
printable[k] = v
@ -418,14 +419,14 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if not isinstance(dinfo, dict):
dinfo = {}
if dinfo.get('device_store_uuid', None) is None:
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
if dinfo.get('device_name') is None:
dinfo['device_name'] = self.get_gui_name()
if name is not None:
dinfo['device_name'] = name
dinfo['location_code'] = location_code
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
dinfo['date_last_connected'] = isoformat(now())
dinfo['prefix'] = self.PREFIX
return dinfo
@ -478,7 +479,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
from calibre.library.save_to_disk import get_components
from calibre.library.save_to_disk import config
opts = config().parse()
if not isinstance(template, unicode):
if not isinstance(template, unicode_type):
template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', ''))
id_ = mdata.get('id', fname)
@ -726,7 +727,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
from calibre.utils.date import now, parse_date
try:
key = self._make_metadata_cache_key(uuid, ext_or_lpath)
if isinstance(lastmod, unicode):
if isinstance(lastmod, unicode_type):
if lastmod == 'None':
return None
lastmod = parse_date(lastmod)

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
import os, re
from polyglot.builtins import unicode_type
def node_mountpoint(node):
@ -48,7 +49,7 @@ class UDisks(object):
def mount(self, device_node_path):
d = self.device(device_node_path)
try:
return unicode(d.FilesystemMount('',
return unicode_type(d.FilesystemMount('',
['auth_no_user_interaction', 'rw', 'noexec', 'nosuid',
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]))
except:
@ -131,7 +132,7 @@ class UDisks2(object):
mount_options = ['rw', 'noexec', 'nosuid',
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]
try:
return unicode(d.Mount(
return unicode_type(d.Mount(
{
'auth.no_user_interaction':True,
'options':','.join(mount_options)

View File

@ -5,6 +5,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.utils.config_base import Config, ConfigProxy
from polyglot.builtins import unicode_type
class DeviceConfig(object):
@ -107,15 +108,15 @@ class DeviceConfig(object):
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
ec.append(config_widget.opt_extra_customization[i].isChecked())
elif hasattr(config_widget.opt_extra_customization[i], 'currentText'):
ec.append(unicode(config_widget.opt_extra_customization[i].currentText()).strip())
ec.append(unicode_type(config_widget.opt_extra_customization[i].currentText()).strip())
else:
ec.append(unicode(config_widget.opt_extra_customization[i].text()).strip())
ec.append(unicode_type(config_widget.opt_extra_customization[i].text()).strip())
else:
ec = unicode(config_widget.opt_extra_customization.text()).strip()
ec = unicode_type(config_widget.opt_extra_customization.text()).strip()
if not ec:
ec = None
proxy['extra_customization'] = ec
st = unicode(config_widget.opt_save_template.text())
st = unicode_type(config_widget.opt_save_template.text())
proxy['save_template'] = st
@classmethod

View File

@ -20,6 +20,7 @@ from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
from calibre.ebooks.metadata.book.json_codec import JsonCodec
from polyglot.builtins import unicode_type
BASE_TIME = None
@ -105,14 +106,14 @@ class USBMS(CLI, Device):
if not isinstance(dinfo, dict):
dinfo = {}
if dinfo.get('device_store_uuid', None) is None:
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
if dinfo.get('device_name', None) is None:
dinfo['device_name'] = self.get_gui_name()
if name is not None:
dinfo['device_name'] = name
dinfo['location_code'] = location_code
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
dinfo['date_last_connected'] = isoformat(now())
dinfo['prefix'] = prefix.replace('\\', '/')
return dinfo

View File

@ -11,6 +11,7 @@ import os, time, re
from functools import partial
from calibre.devices.errors import DeviceError, WrongDestinationError, FreeSpaceError
from polyglot.builtins import unicode_type
def sanity_check(on_card, files, card_prefixes, free_space):
@ -97,7 +98,7 @@ def create_upload_path(mdata, fname, template, sanitize,
ext = path_type.splitext(fname)[1]
opts = config().parse()
if not isinstance(template, unicode):
if not isinstance(template, unicode_type):
template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', ''))
id_ = mdata.get('id', fname)

View File

@ -9,6 +9,7 @@ from various formats.
import traceback, os, re
from calibre import CurrentDir, prints
from polyglot.builtins import unicode_type
class ConversionError(Exception):
@ -113,7 +114,7 @@ def extract_calibre_cover(raw, base, log):
if matches is None:
body = soup.find('body')
if body is not None:
text = u''.join(map(unicode, body.findAll(text=True)))
text = u''.join(map(unicode_type, body.findAll(text=True)))
if text.strip():
# Body has text, abort
return
@ -210,7 +211,7 @@ def check_ebook_format(stream, current_guess):
def normalize(x):
if isinstance(x, unicode):
if isinstance(x, unicode_type):
import unicodedata
x = unicodedata.normalize('NFC', x)
return x

View File

@ -8,6 +8,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, codecs
from polyglot.builtins import unicode_type
ENCODING_PATS = [
# XML declaration
@ -92,7 +93,7 @@ def force_encoding(raw, verbose, assume_utf8=False):
def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
if not raw or isinstance(raw, unicode):
if not raw or isinstance(raw, unicode_type):
return raw, None
for x in ('utf8', 'utf-16-le', 'utf-16-be'):
bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace(
@ -135,7 +136,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
return '', None
raw, encoding = detect_xml_encoding(raw, verbose=verbose,
assume_utf8=assume_utf8)
if not isinstance(raw, unicode):
if not isinstance(raw, unicode_type):
raw = raw.decode(encoding, 'replace')
if strip_encoding_pats:

View File

@ -14,6 +14,7 @@ from calibre.utils.chm.chm import CHMFile
from calibre.constants import plugins
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
chmlib, chmlib_err = plugins['chmlib']
@ -48,7 +49,7 @@ class CHMReader(CHMFile):
def __init__(self, input, log, input_encoding=None):
CHMFile.__init__(self)
if isinstance(input, unicode):
if isinstance(input, unicode_type):
input = input.encode(filesystem_encoding)
if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,))
@ -113,7 +114,7 @@ class CHMReader(CHMFile):
enc = 'cp1252'
for path in self.Contents():
fpath = path
if not isinstance(path, unicode):
if not isinstance(path, unicode_type):
fpath = path.decode(enc)
lpath = os.path.join(output_dir, fpath)
self._ensure_dir(lpath)
@ -146,7 +147,7 @@ class CHMReader(CHMFile):
with open(lpath, 'r+b') as f:
data = f.read()
data = self._reformat(data, lpath)
if isinstance(data, unicode):
if isinstance(data, unicode_type):
data = data.encode('utf-8')
f.seek(0)
f.truncate()

View File

@ -16,6 +16,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.icu import numeric_sort_key
from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob
from polyglot.builtins import unicode_type
# If the specified screen has either dimension larger than this value, no image
# rescaling is done (we assume that it is a tablet output profile)
@ -27,7 +28,7 @@ def extract_comic(path_to_comic_file):
Un-archive the comic file.
'''
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
if not isinstance(tdir, unicode):
if not isinstance(tdir, unicode_type):
# Needed in case the zip file has wrongly encoded unicode file/dir
# names
tdir = tdir.decode(filesystem_encoding)
@ -273,6 +274,3 @@ def process_pages(pages, opts, update, tdir):
ans += pages
failures += failures_
return ans, failures

View File

@ -13,6 +13,7 @@ from calibre.utils.lock import ExclusiveFile
from calibre import sanitize_file_name
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import available_output_formats
from polyglot.builtins import unicode_type
config_dir = os.path.join(config_dir, 'conversion')
@ -85,7 +86,7 @@ class GuiRecommendations(dict):
def serialize(self):
ans = json.dumps(self, indent=2, ensure_ascii=False)
if isinstance(ans, unicode):
if isinstance(ans, unicode_type):
ans = ans.encode('utf-8')
return b'json:' + ans

View File

@ -8,6 +8,7 @@ import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import filesystem_encoding
from polyglot.builtins import unicode_type
class CHMInput(InputFormatPlugin):
@ -34,7 +35,7 @@ class CHMInput(InputFormatPlugin):
log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir:
if not isinstance(tdir, unicode):
if not isinstance(tdir, unicode_type):
tdir = tdir.decode(filesystem_encoding)
html_input = plugin_for_input_format('html')
for opt in html_input.options:
@ -125,7 +126,7 @@ class CHMInput(InputFormatPlugin):
base = os.path.dirname(os.path.abspath(htmlpath))
def unquote(x):
if isinstance(x, unicode):
if isinstance(x, unicode_type):
x = x.encode('utf-8')
return _unquote(x).decode('utf-8')

View File

@ -7,6 +7,7 @@ import os, re, posixpath
from itertools import cycle
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from polyglot.builtins import unicode_type
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
@ -367,7 +368,7 @@ class EPUBInput(InputFormatPlugin):
def add_from_li(li, parent):
href = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
href = x.get('href')
if href:
if href.startswith('#'):

View File

@ -13,6 +13,7 @@ from calibre.customize.conversion import (OutputFormatPlugin,
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from calibre.constants import filesystem_encoding
from polyglot.builtins import unicode_type
block_level_tags = (
'address',
@ -225,8 +226,8 @@ class EPUBOutput(OutputFormatPlugin):
identifiers = oeb.metadata['identifier']
uuid = None
for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
uuid = unicode(x).split(':')[-1]
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
uuid = unicode_type(x).split(':')[-1]
break
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
@ -241,7 +242,7 @@ class EPUBOutput(OutputFormatPlugin):
# for some absurd reason, or it will throw a hissy fit and refuse
# to use the obfuscated fonts.
for x in identifiers:
if unicode(x) == uuid:
if unicode_type(x) == uuid:
x.content = 'urn:uuid:'+uuid
with TemporaryDirectory(u'_epub_output') as tdir:
@ -325,7 +326,7 @@ class EPUBOutput(OutputFormatPlugin):
fonts = []
for uri in list(uris.keys()):
path = uris[uri]
if isinstance(path, unicode):
if isinstance(path, unicode_type):
path = path.encode(filesystem_encoding)
if not os.path.exists(path):
uris.pop(uri)
@ -339,7 +340,7 @@ class EPUBOutput(OutputFormatPlugin):
f.write(chr(ord(data[i]) ^ key[i%16]))
else:
self.log.warn('Font', path, 'is invalid, ignoring')
if not isinstance(uri, unicode):
if not isinstance(uri, unicode_type):
uri = uri.decode('utf-8')
fonts.append(u'''
<enc:EncryptedData>

View File

@ -8,6 +8,7 @@ import os, re
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type
from polyglot.builtins import unicode_type
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
@ -70,7 +71,7 @@ class FB2Input(InputFormatPlugin):
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
css = ''
for s in stylesheets:
css += etree.tostring(s, encoding=unicode, method='text',
css += etree.tostring(s, encoding=unicode_type, method='text',
with_tail=False) + '\n\n'
if css:
import css_parser, logging
@ -82,7 +83,7 @@ class FB2Input(InputFormatPlugin):
log.debug('Parsing stylesheet...')
stylesheet = parser.parseString(text)
stylesheet.namespaces['h'] = XHTML_NS
css = unicode(stylesheet.cssText).replace('h|style', 'h|span')
css = unicode_type(stylesheet.cssText).replace('h|style', 'h|span')
css = re.sub(r'name\s*=\s*', 'class=', css)
self.extract_embedded_content(doc)
log.debug('Converting XML to HTML...')

View File

@ -17,6 +17,7 @@ from calibre.customize.conversion import (InputFormatPlugin,
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
from calibre.utils.imghdr import what
from polyglot.builtins import unicode_type
def sanitize_file_name(x):
@ -225,7 +226,7 @@ class HTMLInput(InputFormatPlugin):
def link_to_local_path(self, link_, base=None):
from calibre.ebooks.html.input import Link
if not isinstance(link_, unicode):
if not isinstance(link_, unicode_type):
try:
link_ = link_.decode('utf-8', 'error')
except:
@ -289,7 +290,7 @@ class HTMLInput(InputFormatPlugin):
# bhref refers to an already existing file. The read() method of
# DirContainer will call unquote on it before trying to read the
# file, therefore we quote it here.
if isinstance(bhref, unicode):
if isinstance(bhref, unicode_type):
bhref = bhref.encode('utf-8')
item.html_input_href = quote(bhref).decode('utf-8')
if guessed in self.OEB_STYLES:

View File

@ -9,6 +9,7 @@ from os.path import dirname, abspath, relpath as _relpath, exists, basename
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
from polyglot.builtins import unicode_type
def relpath(*args):
@ -135,7 +136,7 @@ class HTMLOutput(OutputFormatPlugin):
toc=html_toc, meta=meta, nextLink=nextLink,
tocUrl=tocUrl, cssLink=cssLink,
firstContentPageLink=nextLink)
if isinstance(t, unicode):
if isinstance(t, unicode_type):
t = t.encode('utf-8')
f.write(t)

View File

@ -13,6 +13,7 @@ from cStringIO import StringIO
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ptempfile import TemporaryDirectory
from polyglot.builtins import unicode_type
class HTMLZOutput(OutputFormatPlugin):
@ -81,9 +82,9 @@ class HTMLZOutput(OutputFormatPlugin):
fname = u'index'
if opts.htmlz_title_filename:
from calibre.utils.filenames import shorten_components_to
fname = shorten_components_to(100, (ascii_filename(unicode(oeb_book.metadata.title[0])),))[0]
fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
if isinstance(html, unicode):
if isinstance(html, unicode_type):
html = html.encode('utf-8')
tf.write(html)
@ -100,7 +101,7 @@ class HTMLZOutput(OutputFormatPlugin):
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME:
data = unicode(etree.tostring(item.data, encoding=unicode))
data = unicode_type(etree.tostring(item.data, encoding=unicode_type))
else:
data = item.data
fname = os.path.join(tdir, u'images', images[item.href])

View File

@ -10,6 +10,7 @@ import sys, os
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
from polyglot.builtins import unicode_type
class LRFOptions(object):
@ -17,7 +18,7 @@ class LRFOptions(object):
def __init__(self, output, opts, oeb):
def f2s(f):
try:
return unicode(f[0])
return unicode_type(f[0])
except:
return ''
m = oeb.metadata
@ -31,13 +32,13 @@ class LRFOptions(object):
self.title_sort = self.author_sort = ''
for x in m.creator:
if x.role == 'aut':
self.author = unicode(x)
fa = unicode(getattr(x, 'file_as', ''))
self.author = unicode_type(x)
fa = unicode_type(getattr(x, 'file_as', ''))
if fa:
self.author_sort = fa
for x in m.title:
if unicode(x.file_as):
self.title_sort = unicode(x.file_as)
if unicode_type(x.file_as):
self.title_sort = unicode_type(x.file_as)
self.freetext = f2s(m.description)
self.category = f2s(m.subject)
self.cover = None

View File

@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
from polyglot.builtins import unicode_type
class MOBIInput(InputFormatPlugin):
@ -49,7 +50,7 @@ class MOBIInput(InputFormatPlugin):
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
if raw:
if isinstance(raw, unicode):
if isinstance(raw, unicode_type):
raw = raw.encode('utf-8')
open(u'debug-raw.html', 'wb').write(raw)
from calibre.ebooks.oeb.base import close_self_closing_tags

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from polyglot.builtins import unicode_type
def remove_html_cover(oeb, log):
@ -121,7 +122,7 @@ class MOBIOutput(OutputFormatPlugin):
if not found:
from calibre.ebooks import generate_masthead
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href)
@ -165,7 +166,7 @@ class MOBIOutput(OutputFormatPlugin):
sec.nodes.remove(a)
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
title=unicode(self.oeb.metadata.title[0]))
title=unicode_type(self.oeb.metadata.title[0]))
for s in sections:
if articles[id(s)]:

View File

@ -14,6 +14,7 @@ from calibre.constants import iswindows
from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory
from polyglot.builtins import unicode_type
UNITS = ['millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
'cicero', 'devicepixel']
@ -202,8 +203,8 @@ class PDFOutput(OutputFormatPlugin):
def get_cover_data(self):
oeb = self.oeb
if (oeb.metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode(oeb.metadata.cover[0])
if (oeb.metadata.cover and unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode_type(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id]
self.cover_data = item.data

View File

@ -9,6 +9,7 @@ import os, cStringIO
from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory
from polyglot.builtins import unicode_type
class PMLOutput(OutputFormatPlugin):
@ -40,7 +41,7 @@ class PMLOutput(OutputFormatPlugin):
with TemporaryDirectory('_pmlz_output') as tdir:
pmlmlizer = PMLMLizer(log)
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
pml = unicode_type(pmlmlizer.extract_content(oeb_book, opts))
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
out.write(pml.encode(opts.pml_output_encoding, 'replace'))

View File

@ -11,6 +11,7 @@ import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.constants import numeric_version
from calibre import walk
from polyglot.builtins import unicode_type
class RecipeDisabled(Exception):
@ -161,6 +162,6 @@ class RecipeInput(InputFormatPlugin):
def save_download(self, zf):
raw = self.recipe_source
if isinstance(raw, unicode):
if isinstance(raw, unicode_type):
raw = raw.encode('utf-8')
zf.writestr('download.recipe', raw)

View File

@ -9,6 +9,7 @@ import os, string
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__
from polyglot.builtins import unicode_type
class SNBOutput(OutputFormatPlugin):
@ -73,20 +74,20 @@ class SNBOutput(OutputFormatPlugin):
# Process Meta data
meta = oeb_book.metadata
if meta.title:
title = unicode(meta.title[0])
title = unicode_type(meta.title[0])
else:
title = ''
authors = [unicode(x) for x in meta.creator if x.role == 'aut']
authors = [unicode_type(x) for x in meta.creator if x.role == 'aut']
if meta.publisher:
publishers = unicode(meta.publisher[0])
publishers = unicode_type(meta.publisher[0])
else:
publishers = ''
if meta.language:
lang = unicode(meta.language[0]).upper()
lang = unicode_type(meta.language[0]).upper()
else:
lang = ''
if meta.description:
abstract = unicode(meta.description[0])
abstract = unicode_type(meta.description[0])
else:
abstract = ''

View File

@ -18,6 +18,7 @@ from calibre.utils.zipfile import ZipFile
from calibre import (extract, walk, isbytestring, filesystem_encoding,
get_types_map)
from calibre.constants import __version__
from polyglot.builtins import unicode_type
DEBUG_README=u'''
This debug directory contains snapshots of the e-book as it passes through the
@ -794,7 +795,7 @@ OptionRecommendation(name='search_replace',
def unarchive(self, path, tdir):
extract(path, tdir)
files = list(walk(tdir))
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
files = [f if isinstance(f, unicode_type) else f.decode(filesystem_encoding)
for f in files]
from calibre.customize.ui import available_input_formats
fmts = set(available_input_formats())
@ -915,7 +916,7 @@ OptionRecommendation(name='search_replace',
try:
val = parse_date(val, assume_utc=x=='timestamp')
except:
self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
self.log.exception(_('Failed to parse date/time') + ' ' + unicode_type(val))
continue
setattr(mi, x, val)

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import functools, re, json
from calibre import entity_to_unicode, as_unicode
from polyglot.builtins import unicode_type
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
SVG_NS = 'http://www.w3.org/2000/svg'
@ -218,8 +219,8 @@ class Dehyphenator(object):
wraptags = match.group('wraptags')
except:
wraptags = ''
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf)
dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf)
if self.suffixes.match(secondhalf) is None:
lookupword = self.removesuffixes.sub('', dehyphenated)
else:
@ -315,7 +316,7 @@ class CSSPreProcessor(object):
# are commented lines before the first @import or @charset rule. Since
# the conversion will remove all stylesheets anyway, we don't lose
# anything
data = re.sub(unicode(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
data = re.sub(unicode_type(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
ans, namespaced = [], False
for line in data.splitlines():

View File

@ -10,6 +10,7 @@ from math import ceil
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log
from calibre.utils.wordcount import get_wordcount_obj
from polyglot.builtins import unicode_type
class HeuristicProcessor(object):
@ -50,8 +51,8 @@ class HeuristicProcessor(object):
title = match.group('title')
if not title:
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
" chapters. - " + unicode(chap))
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
" chapters. - " + unicode_type(chap))
return '<h2>'+chap+'</h2>\n'
else:
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
@ -59,16 +60,16 @@ class HeuristicProcessor(object):
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
" chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title))
return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
def chapter_break(self, match):
chap = match.group('section')
styles = match.group('styles')
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
" section markers based on punctuation. - " + unicode(chap))
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
" section markers based on punctuation. - " + unicode_type(chap))
return '<'+styles+' style="page-break-before:always">'+chap
def analyze_title_matches(self, match):
@ -111,8 +112,8 @@ class HeuristicProcessor(object):
line_end = line_end_ere.findall(raw)
tot_htm_ends = len(htm_end)
tot_ln_fds = len(line_end)
# self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
# unicode(tot_htm_ends) + " marked up endings")
# self.log.debug("There are " + unicode_type(tot_ln_fds) + " total Line feeds, and " +
# unicode_type(tot_htm_ends) + " marked up endings")
if percent > 1:
percent = 1
@ -120,7 +121,7 @@ class HeuristicProcessor(object):
percent = 0
min_lns = tot_ln_fds * percent
# self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
# self.log.debug("There must be fewer than " + unicode_type(min_lns) + " unmarked lines to add markup")
return min_lns > tot_htm_ends
def dump(self, raw, where):
@ -157,17 +158,17 @@ class HeuristicProcessor(object):
]
ITALICIZE_STYLE_PATS = [
unicode(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'),
unicode(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'),
unicode(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'),
unicode(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'),
unicode(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'),
unicode(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'),
unicode(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'),
unicode(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'),
unicode(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'),
unicode(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'),
unicode(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'),
unicode_type(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'),
unicode_type(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'),
unicode_type(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'),
unicode_type(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'),
unicode_type(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'),
unicode_type(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'),
unicode_type(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'),
unicode_type(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'),
unicode_type(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'),
unicode_type(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'),
unicode_type(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'),
]
for word in ITALICIZE_WORDS:
@ -177,10 +178,10 @@ class HeuristicProcessor(object):
search_text = re.sub(r'<[^>]*>', '', search_text)
for pat in ITALICIZE_STYLE_PATS:
for match in re.finditer(pat, search_text):
ital_string = unicode(match.group('words'))
# self.log.debug("italicising "+unicode(match.group(0))+" with <i>"+ital_string+"</i>")
ital_string = unicode_type(match.group('words'))
# self.log.debug("italicising "+unicode_type(match.group(0))+" with <i>"+ital_string+"</i>")
try:
html = re.sub(re.escape(unicode(match.group(0))), '<i>%s</i>' % ital_string, html)
html = re.sub(re.escape(unicode_type(match.group(0))), '<i>%s</i>' % ital_string, html)
except OverflowError:
# match.group(0) was too large to be compiled into a regex
continue
@ -205,10 +206,10 @@ class HeuristicProcessor(object):
if wordcount > 200000:
typical_chapters = 15000.
self.min_chapters = int(ceil(wordcount / typical_chapters))
self.log.debug("minimum chapters required are: "+unicode(self.min_chapters))
self.log.debug("minimum chapters required are: "+unicode_type(self.min_chapters))
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
self.log.debug("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
self.log.debug("found " + unicode_type(self.html_preprocess_sections) + " pre-existing headings")
# Build the Regular Expressions in pieces
init_lookahead = "(?=<(p|div))"
@ -295,7 +296,7 @@ class HeuristicProcessor(object):
if n_lookahead_req:
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
if not analyze:
self.log.debug("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
self.log.debug("Marked " + unicode_type(self.html_preprocess_sections) + " headings, " + log_message)
chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \
lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
@ -308,9 +309,9 @@ class HeuristicProcessor(object):
if float(self.chapters_with_title) / float(hits) > .5:
title_req = True
strict_title = False
self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+
unicode(self.chapters_with_title)+" chapters with titles, "+
unicode(float(self.chapters_with_title) / float(hits))+" percent. ")
self.log.debug(unicode_type(type_name)+" had "+unicode_type(hits)+" hits - "+unicode_type(self.chapters_no_title)+" chapters with no title, "+
unicode_type(self.chapters_with_title)+" chapters with titles, "+
unicode_type(float(self.chapters_with_title) / float(hits))+" percent. ")
if type_name == 'common':
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
@ -327,8 +328,8 @@ class HeuristicProcessor(object):
words_per_chptr = wordcount
if words_per_chptr > 0 and self.html_preprocess_sections > 0:
words_per_chptr = wordcount / self.html_preprocess_sections
self.log.debug("Total wordcount is: "+ unicode(wordcount)+", Average words per section is: "+
unicode(words_per_chptr)+", Marked up "+unicode(self.html_preprocess_sections)+" chapters")
self.log.debug("Total wordcount is: "+ unicode_type(wordcount)+", Average words per section is: "+
unicode_type(words_per_chptr)+", Marked up "+unicode_type(self.html_preprocess_sections)+" chapters")
return html
def punctuation_unwrap(self, length, content, format):
@ -358,8 +359,8 @@ class HeuristicProcessor(object):
# define the pieces of the regex
# (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
lookahead = "(?<=.{"+unicode_type(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
em_en_lookahead = "(?<=.{"+unicode_type(length)+u"}[\u2013\u2014])"
soft_hyphen = u"\xad"
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
@ -419,18 +420,18 @@ class HeuristicProcessor(object):
return html
def fix_nbsp_indents(self, html):
txtindent = re.compile(unicode(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
txtindent = re.compile(unicode_type(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
html = txtindent.sub(self.insert_indent, html)
if self.found_indents > 1:
self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
self.log.debug("replaced "+unicode_type(self.found_indents)+ " nbsp indents with inline styles")
return html
def cleanup_markup(self, html):
# remove remaining non-breaking spaces
html = re.sub(unicode(r'\u00a0'), ' ', html)
html = re.sub(unicode_type(r'\u00a0'), ' ', html)
# Get rid of various common microsoft specific tags which can cause issues later
# Get rid of empty <o:p> tags to simplify other processing
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
html = re.sub(unicode_type(r'\s*<o:p>\s*</o:p>'), ' ', html)
# Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\\w+>', '', html)
# Re-open self closing paragraph tags
@ -470,8 +471,8 @@ class HeuristicProcessor(object):
blanklines = self.blankreg.findall(html)
lines = self.linereg.findall(html)
if len(lines) > 1:
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
self.log.debug("There are " + unicode_type(len(blanklines)) + " blank lines. " +
unicode_type(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40:
return True
@ -493,11 +494,11 @@ class HeuristicProcessor(object):
lines = float(len(self.single_blank.findall(to_merge))) - 1.
em = base_em + (em_per_line * lines)
if to_merge.find('whitespace'):
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0))
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode_type(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
else:
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0))
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode_type(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
return newline
html = self.any_multi_blank.sub(merge_matches, html)
@ -518,9 +519,9 @@ class HeuristicProcessor(object):
top_margin = ''
bottom_margin = ''
if initblanks is not None:
top_margin = 'margin-top:'+unicode(len(self.single_blank.findall(initblanks)))+'em;'
top_margin = 'margin-top:'+unicode_type(len(self.single_blank.findall(initblanks)))+'em;'
if endblanks is not None:
bottom_margin = 'margin-bottom:'+unicode(len(self.single_blank.findall(endblanks)))+'em;'
bottom_margin = 'margin-bottom:'+unicode_type(len(self.single_blank.findall(endblanks)))+'em;'
if initblanks is None and endblanks is None:
return content
@ -597,7 +598,7 @@ class HeuristicProcessor(object):
else:
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) / 2
hr_open = re.sub('45', unicode(divpercent), hr_open)
hr_open = re.sub('45', unicode_type(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>'
else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
@ -657,12 +658,12 @@ class HeuristicProcessor(object):
else:
styles = match.group('styles').split(';')
is_paragraph = self.check_paragraph(content)
# print "styles for this line are: "+unicode(styles)
# print "styles for this line are: "+unicode_type(styles)
split_styles = []
for style in styles:
# print "style is: "+unicode(style)
# print "style is: "+unicode_type(style)
newstyle = style.split(':')
# print "newstyle is: "+unicode(newstyle)
# print "newstyle is: "+unicode_type(newstyle)
split_styles.append(newstyle)
styles = split_styles
for style, setting in styles:
@ -673,7 +674,7 @@ class HeuristicProcessor(object):
if 9 < setting < 14:
text_indent = indented_text
else:
text_indent = style+':'+unicode(setting)+'pt;'
text_indent = style+':'+unicode_type(setting)+'pt;'
if style == 'padding':
setting = re.sub('pt', '', setting).split(' ')
if int(setting[1]) < 16 and int(setting[3]) < 16:
@ -694,23 +695,23 @@ class HeuristicProcessor(object):
blockquote_open_loop = blockquote_open
if debugabby:
self.log.debug('\n\n******\n')
self.log.debug('padding top is: '+unicode(setting[0]))
self.log.debug('padding right is:' +unicode(setting[1]))
self.log.debug('padding bottom is: ' + unicode(setting[2]))
self.log.debug('padding left is: ' +unicode(setting[3]))
self.log.debug('padding top is: '+unicode_type(setting[0]))
self.log.debug('padding right is:' +unicode_type(setting[1]))
self.log.debug('padding bottom is: ' + unicode_type(setting[2]))
self.log.debug('padding left is: ' +unicode_type(setting[3]))
# print "text-align is: "+unicode(text_align)
# print "\n***\nline is:\n "+unicode(match.group(0))+'\n'
# print "text-align is: "+unicode_type(text_align)
# print "\n***\nline is:\n "+unicode_type(match.group(0))+'\n'
if debugabby:
# print "this line is a paragraph = "+unicode(is_paragraph)+", previous line was "+unicode(self.previous_was_paragraph)
# print "this line is a paragraph = "+unicode_type(is_paragraph)+", previous line was "+unicode_type(self.previous_was_paragraph)
self.log.debug("styles for this line were:", styles)
self.log.debug('newline is:')
self.log.debug(blockquote_open_loop+blockquote_close_loop+
paragraph_before+'<p style="'+text_indent+text_align+
'">'+content+'</p>'+paragraph_after+'\n\n\n\n\n')
# print "is_paragraph is "+unicode(is_paragraph)+", previous_was_paragraph is "+unicode(self.previous_was_paragraph)
# print "is_paragraph is "+unicode_type(is_paragraph)+", previous_was_paragraph is "+unicode_type(self.previous_was_paragraph)
self.previous_was_paragraph = is_paragraph
# print "previous_was_paragraph is now set to "+unicode(self.previous_was_paragraph)+"\n\n\n"
# print "previous_was_paragraph is now set to "+unicode_type(self.previous_was_paragraph)+"\n\n\n"
return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after
html = abbyy_line.sub(convert_styles, html)
@ -793,12 +794,12 @@ class HeuristicProcessor(object):
# more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50)
self.log.debug("Hard line breaks check returned "+unicode(hardbreaks))
self.log.debug("Hard line breaks check returned "+unicode_type(hardbreaks))
# Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
self.log.debug("Median line length is " + unicode_type(length) + ", calculated with " + format + " format")
# ##### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False):
@ -820,7 +821,7 @@ class HeuristicProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
self.log.debug("Looking for more split points based on punctuation,"
" currently have " + unicode(self.html_preprocess_sections))
" currently have " + unicode_type(self.html_preprocess_sections))
chapdetect3 = re.compile(
r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) # noqa
html = chapdetect3.sub(self.chapter_break, html)

View File

@ -20,6 +20,7 @@ from calibre.utils.localization import canonicalize_lang
from calibre.utils.logging import default_log
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
from polyglot.builtins import unicode_type
def fromstring(raw, parser=RECOVER_PARSER):
@ -56,7 +57,7 @@ def read_doc_props(raw, mi, XPath):
desc = XPath('//dc:description')(root)
if desc:
raw = etree.tostring(desc[0], method='text', encoding=unicode)
raw = etree.tostring(desc[0], method='text', encoding=unicode_type)
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
mi.comments = raw.strip()

View File

@ -14,6 +14,7 @@ from calibre.utils.filenames import ascii_filename
from calibre.utils.fonts.scanner import font_scanner, NoFonts
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
from calibre.utils.icu import ord_string
from polyglot.builtins import codepoint_to_chr
Embed = namedtuple('Embed', 'name key subsetted')
@ -124,7 +125,7 @@ def do_map(m, points):
if base < p < limit:
yield m[p - base]
else:
yield unichr(p)
yield codepoint_to_chr(p)
def map_symbol_text(text, font):

View File

@ -11,6 +11,7 @@ from operator import itemgetter
from lxml import etree
from calibre.utils.icu import partition_by_first_letter, sort_key
from polyglot.builtins import unicode_type
def get_applicable_xe_fields(index, xe_fields, XPath, expand):
@ -246,7 +247,7 @@ def polish_index_markup(index, blocks):
a = block.xpath('descendant::a[1]')
text = ''
if a:
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode).strip()
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip()
if ':' in text:
path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
if len(parts) > 1:

View File

@ -504,8 +504,6 @@ class Table(object):
def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
cs = CellStyle(self.namespace)
# from lxml.etree import tostring
# txt = tostring(tc, method='text', encoding=unicode)
for o in overrides:
if o in self.overrides:
ovr = self.overrides[o]
@ -699,4 +697,3 @@ class Tables(object):
table = self.para_map.get(p, None)
if table is not None:
return table.style_map.get(p, (None, None))[1]

View File

@ -13,6 +13,7 @@ from lxml.etree import tostring
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
from polyglot.builtins import unicode_type
def from_headings(body, log, namespace):
@ -93,7 +94,7 @@ def link_to_txt(a, styles, object_map):
if rs.css.get('display', None) == 'none':
a.remove(child)
return tostring(a, method='text', with_tail=False, encoding=unicode).strip()
return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip()
def from_toc(docx, link_map, styles, object_map, log, namespace):

View File

@ -19,6 +19,7 @@ from calibre.ebooks.docx.writer.lists import ListsManager
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
from calibre.ebooks.oeb.base import XPath, barename
from calibre.utils.localization import lang_as_iso639_1
from polyglot.builtins import unicode_type
def lang_for_tag(tag):
@ -439,8 +440,8 @@ class Convert(object):
if self.add_toc:
self.links_manager.process_toc_links(self.oeb)
if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
cover_id = unicode(self.oeb.metadata.cover[0])
if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
cover_id = unicode_type(self.oeb.metadata.cover[0])
item = self.oeb.manifest.ids[cover_id]
self.cover_img = self.images_manager.read_image(item.href)

View File

@ -14,6 +14,7 @@ from lxml import etree
from calibre.ebooks import parse_css_length
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
from calibre.utils.localization import lang_as_iso639_1
from polyglot.builtins import unicode_type
from tinycss.css21 import CSS21Parser
css_parser = CSS21Parser()
@ -45,7 +46,7 @@ def bmap(x):
def is_dropcaps(html_tag, tag_style):
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left'
class CombinedStyle(object):

View File

@ -10,6 +10,7 @@ import unittest
from polyglot.builtins import map
from calibre.ebooks.epub.cfi.parse import parser, cfi_sort_key, decode_cfi
from polyglot.builtins import unicode_type
class Tests(unittest.TestCase):
@ -60,7 +61,7 @@ class Tests(unittest.TestCase):
if after is not None:
ta['after'] = after
if params:
ta['params'] = {unicode(k):(v,) if isinstance(v, unicode) else v for k, v in params.iteritems()}
ta['params'] = {unicode_type(k):(v,) if isinstance(v, unicode_type) else v for k, v in params.iteritems()}
if ta:
step['text_assertion'] = ta
return ans

View File

@ -11,6 +11,7 @@ import time
from calibre.constants import __appname__, __version__
from calibre import strftime, prepare_string_for_xml as xml
from calibre.utils.date import parse_date
from polyglot.builtins import unicode_type
SONY_METADATA = u'''\
<?xml version="1.0" encoding="utf-8"?>
@ -81,21 +82,21 @@ SONY_ATOM_ENTRY = u'''\
def sony_metadata(oeb):
m = oeb.metadata
title = short_title = unicode(m.title[0])
title = short_title = unicode_type(m.title[0])
publisher = __appname__ + ' ' + __version__
try:
pt = unicode(oeb.metadata.publication_type[0])
pt = unicode_type(oeb.metadata.publication_type[0])
short_title = u':'.join(pt.split(':')[2:])
except:
pass
try:
date = parse_date(unicode(m.date[0]),
date = parse_date(unicode_type(m.date[0]),
as_utc=False).strftime('%Y-%m-%d')
except:
date = strftime('%Y-%m-%d')
try:
language = unicode(m.language[0]).replace('_', '-')
language = unicode_type(m.language[0]).replace('_', '-')
except:
language = 'en'
short_title = xml(short_title, True)
@ -113,7 +114,7 @@ def sony_metadata(oeb):
return True
try:
base_id = unicode(list(filter(cal_id, m.identifier))[0])
base_id = unicode_type(list(filter(cal_id, m.identifier))[0])
except:
base_id = str(uuid4())
@ -128,7 +129,7 @@ def sony_metadata(oeb):
for x in toc:
section.nodes.append(x)
toc = TOC(klass='periodical', href=oeb.spine[2].href,
title=unicode(oeb.metadata.title[0]))
title=unicode_type(oeb.metadata.title[0]))
toc.nodes.append(section)
entries = []
@ -188,4 +189,3 @@ def sony_metadata(oeb):
id=xml(base_id)).encode('utf-8')
return metadata, atom

View File

@ -19,6 +19,7 @@ from calibre.constants import __appname__, __version__
from calibre.utils.localization import lang_as_iso639_1
from calibre.utils.img import save_cover_data_to
from calibre.ebooks.oeb.base import urlnormalize
from polyglot.builtins import unicode_type
class FB2MLizer(object):
@ -64,7 +65,7 @@ class FB2MLizer(object):
output = self.clean_text(u''.join(output))
if self.opts.pretty_print:
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True)
else:
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
@ -140,7 +141,7 @@ class FB2MLizer(object):
metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
metadata['keywords'] = u''
tags = list(map(unicode, self.oeb_book.metadata.subject))
tags = list(map(unicode_type, self.oeb_book.metadata.subject))
if tags:
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
metadata['keywords'] = '<keywords>%s</keywords>'%tags
@ -155,8 +156,8 @@ class FB2MLizer(object):
year = publisher = isbn = u''
identifiers = self.oeb_book.metadata['identifier']
for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
metadata['id'] = unicode(x).split(':')[-1]
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
metadata['id'] = unicode_type(x).split(':')[-1]
break
if metadata['id'] is None:
self.log.warn('No UUID identifier found')
@ -229,8 +230,8 @@ class FB2MLizer(object):
cover_href = None
# Get the raster cover if it's available.
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode(self.oeb_book.metadata.cover[0])
if self.oeb_book.metadata.cover and unicode_type(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode_type(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES:
cover_href = cover_item.href

View File

@ -19,6 +19,7 @@ from calibre.ebooks.oeb.base import urlunquote
from calibre.ebooks.chardet import detect_xml_encoding
from calibre.constants import iswindows
from calibre import unicode_path, as_unicode, replace_entities
from polyglot.builtins import unicode_type
class Link(object):
@ -46,7 +47,7 @@ class Link(object):
:param base: The base directory that relative URLs are with respect to.
Must be a unicode string.
'''
assert isinstance(url, unicode) and isinstance(base, unicode)
assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
self.url = url
self.parsed_url = urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file')
@ -248,6 +249,3 @@ def get_filelist(htmlfile, dir, opts, log):
for f in filelist:
log.debug('\t\t', f)
return filelist

View File

@ -11,6 +11,7 @@ import textwrap, os, glob
from calibre.customize import FileTypePlugin
from calibre.constants import numeric_version
from polyglot.builtins import unicode_type
class HTML2ZIP(FileTypePlugin):
@ -114,10 +115,9 @@ every time you add an HTML file to the library.\
config_dialog.exec_()
if config_dialog.result() == QDialog.Accepted:
sc = unicode(sc.text()).strip()
sc = unicode_type(sc.text()).strip()
if bf.isChecked():
sc += '|bf'
customize_plugin(self, sc)
return config_dialog.result()

View File

@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import (
XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.logging import default_log
from polyglot.builtins import unicode_type
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
@ -46,7 +47,7 @@ class OEB2HTML(object):
self.log.info('Converting OEB book to HTML...')
self.opts = opts
try:
self.book_title = unicode(oeb_book.metadata.title[0])
self.book_title = unicode_type(oeb_book.metadata.title[0])
except Exception:
self.book_title = _('Unknown')
self.links = {}

View File

@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import urlnormalize, xpath
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks import DRMError
from calibre import plugins
from polyglot.builtins import codepoint_to_chr, unicode_type
lzx, lxzerror = plugins['lzx']
msdes, msdeserror = plugins['msdes']
@ -110,7 +111,7 @@ def read_utf8_char(bytes, pos):
raise LitError(
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
c = (c << 6) | (b & 0x3F)
return unichr(c), pos+elsize
return codepoint_to_chr(c), pos+elsize
def consume_sized_utf8_string(bytes, zpad=False):
@ -125,7 +126,7 @@ def consume_sized_utf8_string(bytes, zpad=False):
def encode(string):
return unicode(string).encode('ascii', 'xmlcharrefreplace')
return unicode_type(string).encode('ascii', 'xmlcharrefreplace')
class UnBinary(object):
@ -243,9 +244,9 @@ class UnBinary(object):
else:
dynamic_tag += 1
errors += 1
tag_name = '?'+unichr(tag)+'?'
tag_name = '?'+codepoint_to_chr(tag)+'?'
current_map = self.tag_to_attr_map[tag]
print('WARNING: tag %s unknown' % unichr(tag))
print('WARNING: tag %s unknown' % codepoint_to_chr(tag))
buf.write(encode(tag_name))
elif flags & FLAG_CLOSING:
if depth == 0:
@ -947,4 +948,3 @@ class LitReader(OEBReader):
item.media_type = 'application/xhtml+xml'
item.data = item._parse_xhtml(etree.tostring(item.data))
super(LitReader, self)._spine_from_opf(opf)

View File

@ -31,6 +31,7 @@ import calibre
from calibre import plugins
msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1
from polyglot.builtins import codepoint_to_chr, unicode_type
__all__ = ['LitWriter']
@ -163,9 +164,9 @@ class ReBinary(object):
for value in values:
if isinstance(value, (int, long)):
try:
value = unichr(value)
value = codepoint_to_chr(value)
except OverflowError:
self.logger.warn('Unicode overflow for integer:', value)
self.logger.warn('unicode_type overflow for integer:', value)
value = u'?'
self.buf.write(value.encode('utf-8'))
@ -216,9 +217,9 @@ class ReBinary(object):
path, frag = urldefrag(value)
if self.item:
path = self.item.abshref(path)
prefix = unichr(3)
prefix = codepoint_to_chr(3)
if path in self.manifest.hrefs:
prefix = unichr(2)
prefix = codepoint_to_chr(2)
value = self.manifest.hrefs[path].id
if frag:
value = '#'.join((value, frag))
@ -281,9 +282,9 @@ class ReBinary(object):
self.logger.warn("More than six anchors in file %r. "
"Some links may not work properly." % self.item.href)
data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8'))
data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors:
data.write(unichr(len(anchor)).encode('utf-8'))
data.write(codepoint_to_chr(len(anchor)).encode('utf-8'))
data.write(anchor)
data.write(pack('<I', offset))
return data.getvalue()
@ -313,7 +314,7 @@ class LitWriter(object):
oeb.metadata.add('calibre-version', calibre.__version__)
cover = None
if oeb.metadata.cover:
id = unicode(oeb.metadata.cover[0])
id = unicode_type(oeb.metadata.cover[0])
cover = oeb.manifest.ids[id]
for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide:
@ -485,7 +486,7 @@ class LitWriter(object):
data = rebin.content
name = name + '/content'
secnum = 1
elif isinstance(data, unicode):
elif isinstance(data, unicode_type):
data = data.encode('utf-8')
elif hasattr(data, 'cssText'):
data = str(item)
@ -521,9 +522,9 @@ class LitWriter(object):
item.offset = offset \
if state in ('linear', 'nonlinear') else 0
data.write(pack('<I', item.offset))
entry = [unichr(len(id)), unicode(id),
unichr(len(href)), unicode(href),
unichr(len(media_type)), unicode(media_type)]
entry = [codepoint_to_chr(len(id)), unicode_type(id),
codepoint_to_chr(len(href)), unicode_type(href),
codepoint_to_chr(len(media_type)), unicode_type(media_type)]
for value in entry:
data.write(value.encode('utf-8'))
data.write('\0')

View File

@ -36,6 +36,7 @@ from calibre.ptempfile import PersistentTemporaryFile
from calibre.devices.interface import DevicePlugin as Device
from calibre.ebooks.lrf.html.color_map import lrs_color
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
def update_css(ncss, ocss):
@ -54,10 +55,10 @@ def munge_paths(basepath, url):
if not path:
path = basepath
elif not os.path.isabs(path):
if isinstance(path, unicode):
if isinstance(path, unicode_type):
path = path.encode(sys.getfilesystemencoding())
dn = os.path.dirname(basepath)
if isinstance(dn, unicode):
if isinstance(dn, unicode_type):
dn = dn.encode(sys.getfilesystemencoding())
path = os.path.join(dn, path)
return os.path.normpath(path), fragment
@ -272,7 +273,7 @@ class HTMLConverter(object):
update_css(npcss, self.override_pcss)
paths = [os.path.abspath(path) for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode) else path for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths:
@ -336,7 +337,7 @@ class HTMLConverter(object):
markupMassage=nmassage)
except ConversionError as err:
if 'Failed to coerce to unicode' in str(err):
raw = unicode(raw, 'utf8', 'replace')
raw = unicode_type(raw, 'utf8', 'replace')
soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.XHTML_ENTITIES,
markupMassage=nmassage)
@ -359,7 +360,7 @@ class HTMLConverter(object):
os.makedirs(tdir)
try:
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
dump.write(unicode(soup).encode('utf-8'))
dump.write(unicode_type(soup).encode('utf-8'))
self.log.info(_('Written preprocessed HTML to ')+dump.name)
dump.close()
except:
@ -394,7 +395,7 @@ class HTMLConverter(object):
self.log.info(_('\tConverting to BBeB...'))
self.current_style = {}
self.page_break_found = False
if not isinstance(path, unicode):
if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding())
self.target_prefix = path
self.previous_text = '\n'
@ -589,7 +590,7 @@ class HTMLConverter(object):
if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue
if isinstance(c, NavigableString):
text += unicode(c)
text += unicode_type(c)
elif isinstance(c, Tag):
if c.name.lower() == 'img' and c.has_key('alt'): # noqa
alt_text += c['alt']
@ -644,7 +645,7 @@ class HTMLConverter(object):
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
ascii_text = text
if not isinstance(path, unicode):
if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding())
if path in self.processed_files:
if path+fragment in self.targets.keys():
@ -1323,7 +1324,7 @@ class HTMLConverter(object):
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
if 'em' in tag_css['text-indent']:
bl = '10pt'
indent = self.unit_convert(unicode(tag_css['text-indent']), pts=True, base_length=bl)
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
if not indent:
indent = 0
if indent > 0 and indent < 10 * self.minimum_indent:
@ -1482,7 +1483,7 @@ class HTMLConverter(object):
enc = sys.getfilesystemencoding()
if not enc:
enc = 'utf8'
if isinstance(path, unicode):
if isinstance(path, unicode_type):
path = path.encode(enc, 'replace')
if os.access(path, os.R_OK) and os.path.isfile(path):
if ext in ['png', 'jpg', 'bmp', 'jpeg']:
@ -1526,7 +1527,7 @@ class HTMLConverter(object):
elif tagname in ['style', 'link']:
ncss, npcss = {}, {}
if tagname == 'style':
text = ''.join([unicode(i) for i in tag.findAll(text=True)])
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
css, pcss = self.parse_css(text)
ncss.update(css)
npcss.update(pcss)
@ -1559,7 +1560,7 @@ class HTMLConverter(object):
if tag.contents:
c = tag.contents[0]
if isinstance(c, NavigableString):
c = unicode(c).replace('\r\n', '\n').replace('\r', '\n')
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
if c.startswith('\n'):
c = c[1:]
tag.contents[0] = NavigableString(c)
@ -1759,7 +1760,7 @@ class HTMLConverter(object):
except Exception as err:
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
self.log.exception('')
self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300])
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
@ -1810,7 +1811,7 @@ class HTMLConverter(object):
def process_file(path, options, logger):
if not isinstance(path, unicode):
if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding())
path = os.path.abspath(path)
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
@ -1857,9 +1858,9 @@ def process_file(path, options, logger):
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop, None)
if val and not isinstance(val, unicode):
if val and not isinstance(val, unicode_type):
soup = BeautifulSoup(val)
setattr(options, prop, unicode(soup))
setattr(options, prop, unicode_type(soup))
title = (options.title, options.title_sort)
author = (options.author, options.author_sort)
@ -1903,7 +1904,7 @@ def process_file(path, options, logger):
options.force_page_break = fpb
options.link_exclude = le
options.page_break = pb
if not isinstance(options.chapter_regex, unicode):
if not isinstance(options.chapter_regex, unicode_type):
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
fpba = options.force_page_break_attr.split(',')

View File

@ -11,6 +11,8 @@ from PyQt5.Qt import QUrl, QApplication, QSize, QEventLoop, \
QPainter, QImage, QObject, Qt
from PyQt5.QtWebKitWidgets import QWebPage
from polyglot.builtins import unicode_type
class HTMLTableRenderer(QObject):
@ -67,7 +69,7 @@ class HTMLTableRenderer(QObject):
def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
head = ''
for e in soup.findAll(['link', 'style']):
head += unicode(e)+'\n\n'
head += unicode_type(e)+'\n\n'
style = ''
for key, val in css.items():
style += key + ':%s;'%val
@ -83,7 +85,7 @@ def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
%s
</body>
</html>
'''%(head, width-10, style, unicode(table))
'''%(head, width-10, style, unicode_type(table))
images, tdir = do_render(html, base_dir, width, height, dpi, factor)
atexit.register(shutil.rmtree, tdir)
return images

View File

@ -10,6 +10,7 @@ from calibre.utils.filenames import ascii_filename
from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
Font, Text, TOCObject, BookAttr, ruby_tags
from polyglot.builtins import unicode_type
class LRFDocument(LRFMetaFile):
@ -112,7 +113,7 @@ class LRFDocument(LRFMetaFile):
pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
close = u'</PageTree>\n'
for page in page_tree:
pages += unicode(page)
pages += unicode_type(page)
pages += close
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
@ -125,9 +126,9 @@ class LRFDocument(LRFMetaFile):
if isinstance(obj, (Font, Text, TOCObject)):
continue
if isinstance(obj, StyleObject):
styles += unicode(obj)
styles += unicode_type(obj)
else:
objects += unicode(obj)
objects += unicode_type(obj)
styles += '</Style>\n'
objects += '</Objects>\n'
if write_files:

View File

@ -20,6 +20,7 @@ import xml.dom.minidom as dom
from functools import wraps
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
@ -195,8 +196,8 @@ class xml_field(object):
if not val:
val = u''
if type(val).__name__ != 'unicode':
val = unicode(val, 'utf-8')
if isinstance(val, unicode_type):
val = unicode_type(val, 'utf-8')
elems = document.getElementsByTagName(self.tag_name)
elem = None

View File

@ -6,6 +6,7 @@ import struct, array, zlib, cStringIO, collections, re
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
from calibre import entity_to_unicode, prepare_string_for_xml
from calibre.ebooks.lrf.tags import Tag
from polyglot.builtins import unicode_type
ruby_tags = {
0xF575: ['rubyAlignAndAdjust', 'W'],
@ -88,10 +89,10 @@ class LRFObject(object):
yield i
def __unicode__(self):
return unicode(self.__class__.__name__)
return unicode_type(self.__class__.__name__)
def __str__(self):
return unicode(self).encode('utf-8')
return unicode_type(self).encode('utf-8')
class LRFContentObject(LRFObject):
@ -255,7 +256,7 @@ class Color(object):
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
def __str__(self):
return unicode(self)
return unicode_type(self)
def __len__(self):
return 4
@ -274,7 +275,7 @@ class EmptyPageElement(object):
yield i
def __str__(self):
return unicode(self)
return unicode_type(self)
class PageDiv(EmptyPageElement):
@ -429,12 +430,12 @@ class Page(LRFStream):
def __unicode__(self):
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
for i in self:
s += unicode(i)
s += unicode_type(i)
s += '\n</Page>\n'
return s
def __str__(self):
return unicode(self)
return unicode_type(self)
def to_html(self):
s = u''
@ -619,7 +620,7 @@ class Block(LRFStream, TextCSS):
s += '%s="%s" '%(attr, self.attrs[attr])
if self.name != 'ImageBlock':
s = s.rstrip()+'>\n'
s += unicode(self.content)
s += unicode_type(self.content)
s += '</%s>\n'%(self.name,)
return s
return s.rstrip() + ' />\n'
@ -717,7 +718,7 @@ class Text(LRFStream):
lineposition_map = {1:'before', 2:'after'}
def add_text(self, text):
s = unicode(text, "utf-16-le")
s = unicode_type(text, "utf-16-le")
if s:
s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
@ -888,7 +889,7 @@ class Text(LRFStream):
p = open_containers.pop()
s += u'</%s>'%(p.name,)
else:
s += unicode(c)
s += unicode_type(c)
if not c.self_closing:
open_containers.append(c)
@ -1001,7 +1002,7 @@ class Canvas(LRFStream):
s += '%s="%s" '%(attr, self.attrs[attr])
s = s.rstrip() + '>\n'
for po in self:
s += unicode(po) + '\n'
s += unicode_type(po) + '\n'
s += '</%s>\n'%(self.__class__.__name__,)
return s
@ -1198,7 +1199,7 @@ class BookAttr(StyleObject, LRFObject):
s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
(self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth)
for font in self._document.font_map.values():
s += unicode(font)
s += unicode_type(font)
s += '</BookStyle>\n'
return s
@ -1239,7 +1240,7 @@ class TOCObject(LRFStream):
def __unicode__(self):
s = u'<TOC>\n'
for i in self:
s += unicode(i)
s += unicode_type(i)
return s + '</TOC>\n'
@ -1288,5 +1289,3 @@ def get_object(document, stream, id, offset, size, scramble_key):
return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
raise LRFParseError("Unknown object type: %02X!" % obj_type)

View File

@ -1,5 +1,7 @@
""" elements.py -- replacements and helpers for ElementTree """
from polyglot.builtins import unicode_type
class ElementWriter(object):
@ -21,9 +23,9 @@ class ElementWriter(object):
return text
def _writeAttribute(self, f, name, value):
f.write(u' %s="' % unicode(name))
f.write(u' %s="' % unicode_type(name))
if not isinstance(value, basestring):
value = unicode(value)
value = unicode_type(value)
value = self._encodeCdata(value)
value = value.replace('"', '&quot;')
f.write(value)
@ -34,7 +36,7 @@ class ElementWriter(object):
f.write(text)
def _write(self, f, e):
f.write(u'<' + unicode(e.tag))
f.write(u'<' + unicode_type(e.tag))
attributes = e.items()
attributes.sort()
@ -72,6 +74,3 @@ class ElementWriter(object):
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
self._write(f, self.e)

View File

@ -5,6 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import struct
from calibre.ebooks.lrf import LRFParseError
from polyglot.builtins import unicode_type
class Tag(object):
@ -246,7 +247,7 @@ class Tag(object):
@classmethod
def string_parser(self, stream):
size = struct.unpack("<H", stream.read(2))[0]
return unicode(stream.read(size), "utf_16")
return unicode_type(stream.read(size), "utf_16")
def type_one_parser(self, stream):
cnt = struct.unpack("<H", stream.read(2))[0]

View File

@ -15,6 +15,8 @@ from calibre import relpath, guess_type, remove_bracketed_text, prints, force_un
from calibre.utils.config_base import tweaks
from polyglot.builtins import codepoint_to_chr, unicode_type
try:
_author_pat = re.compile(tweaks['authors_split_regex'])
except:
@ -134,7 +136,7 @@ def get_title_sort_pat(lang=None):
return ans
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in
range(0x2018, 0x201e)+[0x2032, 0x2033])
@ -227,7 +229,7 @@ class Resource(object):
self._href = href_or_path
else:
pc = url[2]
if isinstance(pc, unicode):
if isinstance(pc, unicode_type):
pc = pc.encode('utf-8')
pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@ -249,7 +251,7 @@ class Resource(object):
basedir = os.getcwdu()
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
frag = '#'+quote(f) if self.fragment else ''
if self.path == basedir:
return ''+frag
@ -257,7 +259,7 @@ class Resource(object):
rpath = relpath(self.path, basedir)
except OSError: # On windows path and basedir could be on different drives
rpath = self.path
if isinstance(rpath, unicode):
if isinstance(rpath, unicode_type):
rpath = rpath.encode('utf-8')
return quote(rpath.replace(os.sep, '/'))+frag

View File

@ -14,6 +14,7 @@ from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
from calibre.library.field_metadata import FieldMetadata
from calibre.utils.icu import sort_key
from polyglot.builtins import unicode_type
# Special sets used to optimize the performance of getting and setting
# attributes on Metadata objects
@ -606,14 +607,14 @@ class Metadata(object):
return authors_to_string(self.authors)
def format_tags(self):
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
return u', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)])
def format_rating(self, v=None, divide_by=1.0):
if v is None:
if self.rating is not None:
return unicode(self.rating/divide_by)
return unicode_type(self.rating/divide_by)
return u'None'
return unicode(v/divide_by)
return unicode_type(v/divide_by)
def format_field(self, key, series_with_index=True):
'''
@ -637,15 +638,15 @@ class Metadata(object):
if cmeta and cmeta['datatype'] == 'series':
if self.get(tkey):
res = self.get_extra(tkey)
return (unicode(cmeta['name']+'_index'),
return (unicode_type(cmeta['name']+'_index'),
self.format_series_index(res), res, cmeta)
else:
return (unicode(cmeta['name']+'_index'), '', '', cmeta)
return (unicode_type(cmeta['name']+'_index'), '', '', cmeta)
if key in self.custom_field_keys():
res = self.get(key, None) # get evaluates all necessary composites
cmeta = self.get_user_metadata(key, make_copy=False)
name = unicode(cmeta['name'])
name = unicode_type(cmeta['name'])
if res is None or res == '': # can't check "not res" because of numeric fields
return (name, res, None, None)
orig_res = res
@ -668,7 +669,7 @@ class Metadata(object):
res = fmt.format(res)
except:
pass
return (name, unicode(res), orig_res, cmeta)
return (name, unicode_type(res), orig_res, cmeta)
# convert top-level ids into their value
if key in TOP_LEVEL_IDENTIFIERS:
@ -682,11 +683,11 @@ class Metadata(object):
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
res = self.get(key, None)
fmeta = field_metadata[fmkey]
name = unicode(fmeta['name'])
name = unicode_type(fmeta['name'])
if res is None or res == '':
return (name, res, None, None)
orig_res = res
name = unicode(fmeta['name'])
name = unicode_type(fmeta['name'])
datatype = fmeta['datatype']
if key == 'authors':
res = authors_to_string(res)
@ -704,7 +705,7 @@ class Metadata(object):
res = u'%.2g'%(res/2.0)
elif key == 'size':
res = human_readable(res)
return (name, unicode(res), orig_res, fmeta)
return (name, unicode_type(res), orig_res, fmeta)
return (None, None, None, None)
@ -718,7 +719,7 @@ class Metadata(object):
ans = []
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
fmt('Title', self.title)
if self.title_sort:
@ -732,7 +733,7 @@ class Metadata(object):
if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer)
if self.tags:
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
fmt('Tags', u', '.join([unicode_type(t) for t in self.tags]))
if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index())
if not self.is_null('languages'):
@ -745,7 +746,7 @@ class Metadata(object):
if self.pubdate is not None:
fmt('Published', isoformat(self.pubdate))
if self.rights is not None:
fmt('Rights', unicode(self.rights))
fmt('Rights', unicode_type(self.rights))
if self.identifiers:
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
self.identifiers.iteritems()]))
@ -756,7 +757,7 @@ class Metadata(object):
val = self.get(key, None)
if val:
(name, val) = self.format_field(key)
fmt(name, unicode(val))
fmt(name, unicode_type(val))
return u'\n'.join(ans)
def to_html(self):
@ -765,22 +766,22 @@ class Metadata(object):
'''
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.date import isoformat
ans = [(_('Title'), unicode(self.title))]
ans = [(_('Title'), unicode_type(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
ans += [(_('Publisher'), unicode(self.publisher))]
ans += [(_('Producer'), unicode(self.book_producer))]
ans += [(_('Comments'), unicode(self.comments))]
ans += [('ISBN', unicode(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
ans += [(_('Publisher'), unicode_type(self.publisher))]
ans += [(_('Producer'), unicode_type(self.book_producer))]
ans += [(_('Comments'), unicode_type(self.comments))]
ans += [('ISBN', unicode_type(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode_type(t) for t in self.tags]))]
if self.series:
ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Languages'), u', '.join(self.languages))]
if self.timestamp is not None:
ans += [(_('Timestamp'), unicode(isoformat(self.timestamp, as_utc=False, sep=' ')))]
ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))]
if self.pubdate is not None:
ans += [(_('Published'), unicode(isoformat(self.pubdate, as_utc=False, sep=' ')))]
ans += [(_('Published'), unicode_type(isoformat(self.pubdate, as_utc=False, sep=' ')))]
if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights))]
ans += [(_('Rights'), unicode_type(self.rights))]
for key in self.custom_field_keys():
val = self.get(key, None)
if val:

View File

@ -20,6 +20,7 @@ from calibre.utils.icu import sort_key
from calibre.utils.formatter import EvalFormatter
from calibre.utils.date import is_date_undefined
from calibre.utils.localization import calibre_langcode_to_name
from polyglot.builtins import unicode_type
default_sort = ('title', 'title_sort', 'authors', 'author_sort', 'series', 'rating', 'pubdate', 'tags', 'publisher', 'identifiers')
@ -163,7 +164,7 @@ def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=
path = force_unicode(mi.path, filesystem_encoding)
scheme = u'devpath' if isdevice else u'path'
url = prepare_string_for_xml(path if isdevice else
unicode(book_id), True)
unicode_type(book_id), True)
pathstr = _('Click to open')
extra = ''
if isdevice:

View File

@ -10,10 +10,11 @@ from calibre.constants import preferred_encoding
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.imghdr import what
from polyglot.builtins import unicode_type
def ensure_unicode(obj, enc=preferred_encoding):
if isinstance(obj, unicode):
if isinstance(obj, unicode_type):
return obj
if isinstance(obj, bytes):
return obj.decode(enc, 'replace')

View File

@ -16,6 +16,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre import prints
from calibre.utils.date import parse_date
from polyglot.builtins import unicode_type
USAGE=_('%prog ebook_file [options]\n') + \
_('''
@ -181,7 +182,7 @@ def main(args=sys.argv):
mi = get_metadata(stream, stream_type, force_read_metadata=True)
if trying_to_set:
prints(_('Original metadata')+'::')
metadata = unicode(mi)
metadata = unicode_type(mi)
if trying_to_set:
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata, safe_encode=True)
@ -198,7 +199,7 @@ def main(args=sys.argv):
lrf.book_id = opts.lrf_bookid
mi = get_metadata(stream, stream_type, force_read_metadata=True)
prints('\n' + _('Changed metadata') + '::')
metadata = unicode(mi)
metadata = unicode_type(mi)
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata, safe_encode=True)
if lrf is not None:

View File

@ -18,6 +18,7 @@ from calibre.utils.imghdr import identify
from calibre import guess_type, guess_all_extensions, prints, force_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type
NAMESPACES = {
@ -26,7 +27,7 @@ NAMESPACES = {
'xlink' : 'http://www.w3.org/1999/xlink'
}
tostring = partial(etree.tostring, method='text', encoding=unicode)
tostring = partial(etree.tostring, method='text', encoding=unicode_type)
def XLINK(tag):
@ -112,9 +113,9 @@ def get_metadata(stream):
# fallback for book_title
if book_title:
book_title = unicode(book_title)
book_title = unicode_type(book_title)
else:
book_title = force_unicode(os.path.splitext(
book_title = force_unicode_type(os.path.splitext(
os.path.basename(getattr(stream, 'name',
_('Unknown'))))[0])
mi = MetaInformation(book_title, authors)
@ -249,7 +250,7 @@ def _parse_tags(root, mi, ctx):
# -- i18n Translations-- ?
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
if tags:
mi.tags = list(map(unicode, tags))
mi.tags = list(map(unicode_type, tags))
break
@ -447,7 +448,7 @@ def ensure_namespace(doc):
break
if bare_tags:
import re
raw = etree.tostring(doc, encoding=unicode)
raw = etree.tostring(doc, encoding=unicode_type)
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
doc = etree.fromstring(raw)
return doc

View File

@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
@ -43,6 +44,6 @@ def get_metadata(stream):
if category:
mi.category = category
except Exception as err:
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr)
return mi

View File

@ -14,11 +14,12 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import codepoint_to_chr, unicode_type
URL = \
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
def get_series(title, authors, timeout=60):
@ -28,7 +29,7 @@ def get_series(title, authors, timeout=60):
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
if not title:
return mi
if isinstance(title, unicode):
if isinstance(title, unicode_type):
title = title.encode('utf-8')
title = urllib.quote_plus(title)
@ -73,7 +74,7 @@ def get_series(title, authors, timeout=60):
mi.series = series
ns = ss.nextSibling
if ns.contents:
raw = unicode(ns.contents[0])
raw = unicode_type(ns.contents[0])
raw = raw.partition('.')[0].strip()
try:
mi.series_index = int(raw)
@ -85,4 +86,3 @@ def get_series(title, authors, timeout=60):
if __name__ == '__main__':
import sys
print(get_series(sys.argv[-2], [sys.argv[-1]]))

View File

@ -18,6 +18,7 @@ from calibre.utils.config_base import tweaks
from calibre.utils.date import parse_only_date
from calibre.utils.localization import canonicalize_lang
from calibre.utils.imghdr import identify
from polyglot.builtins import unicode_type
class InvalidKFX(ValueError):
@ -356,4 +357,4 @@ if __name__ == '__main__':
from calibre import prints
with open(sys.argv[-1], 'rb') as f:
mi = read_metadata_kfx(f)
prints(unicode(mi))
prints(unicode_type(mi))

View File

@ -21,6 +21,7 @@ from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.utils.date import now as nowf
from calibre.utils.imghdr import what
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
from polyglot.builtins import unicode_type
def is_image(ss):
@ -223,7 +224,7 @@ class MetadataUpdater(object):
def create_exth(self, new_title=None, exth=None):
# Add an EXTH block to record 0, rewrite the stream
if isinstance(new_title, unicode):
if isinstance(new_title, unicode_type):
new_title = new_title.encode(self.codec, 'replace')
# Fetch the existing title

View File

@ -25,6 +25,7 @@ from calibre.utils.localization import get_lang, canonicalize_lang
from calibre import prints, guess_type
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.config import tweaks
from polyglot.builtins import unicode_type
pretty_print_opf = False
@ -82,7 +83,7 @@ class Resource(object): # {{{
self._href = href_or_path
else:
pc = url[2]
if isinstance(pc, unicode):
if isinstance(pc, unicode_type):
pc = pc.encode('utf-8')
pc = pc.decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@ -103,7 +104,7 @@ class Resource(object): # {{{
basedir = os.getcwdu()
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
frag = '#'+f if self.fragment else ''
if self.path == basedir:
return ''+frag
@ -111,7 +112,7 @@ class Resource(object): # {{{
rpath = os.path.relpath(self.path, basedir)
except ValueError: # On windows path and basedir could be on different drives
rpath = self.path
if isinstance(rpath, unicode):
if isinstance(rpath, unicode_type):
rpath = rpath.encode('utf-8')
return rpath.replace(os.sep, '/')+frag
@ -206,10 +207,10 @@ class ManifestItem(Resource): # {{{
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
def __str__(self):
return unicode(self).encode('utf-8')
return unicode_type(self).encode('utf-8')
def __repr__(self):
return unicode(self)
return unicode_type(self)
def __getitem__(self, index):
if index == 0:
@ -410,7 +411,7 @@ class Guide(ResourceCollection): # {{{
class MetadataField(object):
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
renderer=lambda x: unicode(x)):
renderer=lambda x: unicode_type(x)):
self.name = name
self.is_dc = is_dc
self.formatter = formatter
@ -791,7 +792,7 @@ class OPF(object): # {{{
def unquote_urls(self):
def get_href(item):
raw = unquote(item.get('href', ''))
if not isinstance(raw, unicode):
if not isinstance(raw, unicode_type):
raw = raw.decode('utf-8')
return raw
for item in self.itermanifest():
@ -820,7 +821,7 @@ class OPF(object): # {{{
titles = ()
if val:
title = titles[0] if titles else self.create_metadata_element('title')
title.text = re.sub(r'\s+', ' ', unicode(val))
title.text = re.sub(r'\s+', ' ', unicode_type(val))
return property(fget=fget, fset=fset)
@ -869,7 +870,7 @@ class OPF(object): # {{{
for key in matches[0].attrib:
if key.endswith('file-as'):
matches[0].attrib.pop(key)
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode(val))
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode_type(val))
return property(fget=fget, fset=fset)
@ -889,7 +890,7 @@ class OPF(object): # {{{
tag.getparent().remove(tag)
for tag in val:
elem = self.create_metadata_element('subject')
self.set_text(elem, unicode(tag))
self.set_text(elem, unicode_type(tag))
return property(fget=fget, fset=fset)
@ -900,7 +901,7 @@ class OPF(object): # {{{
ans = None
for match in self.pubdate_path(self.metadata):
try:
val = parse_date(etree.tostring(match, encoding=unicode,
val = parse_date(etree.tostring(match, encoding=unicode_type,
method='text', with_tail=False).strip())
except:
continue
@ -912,7 +913,7 @@ class OPF(object): # {{{
least_val = least_elem = None
for match in self.pubdate_path(self.metadata):
try:
cval = parse_date(etree.tostring(match, encoding=unicode,
cval = parse_date(etree.tostring(match, encoding=unicode_type,
method='text', with_tail=False).strip())
except:
match.getparent().remove(match)
@ -962,7 +963,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
matches = [self.create_metadata_element('identifier',
attrib=attrib)]
self.set_text(matches[0], unicode(val))
self.set_text(matches[0], unicode_type(val))
return property(fget=fget, fset=fset)
@ -975,7 +976,7 @@ class OPF(object): # {{{
for attr, val in x.attrib.iteritems():
if attr.endswith('scheme'):
typ = icu_lower(val)
val = etree.tostring(x, with_tail=False, encoding=unicode,
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
method='text').strip()
if val and typ not in ('calibre', 'uuid'):
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
@ -984,7 +985,7 @@ class OPF(object): # {{{
found_scheme = True
break
if not found_scheme:
val = etree.tostring(x, with_tail=False, encoding=unicode,
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
method='text').strip()
if val.lower().startswith('urn:isbn:'):
val = check_isbn(val.split(':')[-1])
@ -1017,7 +1018,7 @@ class OPF(object): # {{{
for typ, val in identifiers.iteritems():
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode(val))
'identifier', attrib=attrib), unicode_type(val))
@dynamic_property
def application_id(self):
@ -1041,7 +1042,7 @@ class OPF(object): # {{{
if uuid_id and uuid_id in removed_ids:
attrib['id'] = uuid_id
self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode(val))
'identifier', attrib=attrib), unicode_type(val))
return property(fget=fget, fset=fset)
@ -1058,7 +1059,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
matches = [self.create_metadata_element('identifier',
attrib=attrib)]
self.set_text(matches[0], unicode(val))
self.set_text(matches[0], unicode_type(val))
return property(fget=fget, fset=fset)
@ -1095,7 +1096,7 @@ class OPF(object): # {{{
for lang in val:
l = self.create_metadata_element('language')
self.set_text(l, unicode(lang))
self.set_text(l, unicode_type(lang))
return property(fget=fget, fset=fset)
@ -1118,7 +1119,7 @@ class OPF(object): # {{{
if not matches:
matches = [self.create_metadata_element('contributor')]
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
self.set_text(matches[0], unicode(val))
self.set_text(matches[0], unicode_type(val))
return property(fget=fget, fset=fset)
def identifier_iter(self):
@ -1701,7 +1702,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
metadata[-1].tail = '\n' +(' '*4)
if mi.cover:
if not isinstance(mi.cover, unicode):
if not isinstance(mi.cover, unicode_type):
mi.cover = mi.cover.decode(filesystem_encoding)
guide.text = '\n'+(' '*8)
r = guide.makeelement(OPF('reference'),

View File

@ -12,6 +12,7 @@ from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import (
MetaInformation, string_to_authors, check_isbn, check_doi)
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from polyglot.builtins import unicode_type
def get_tools():
@ -88,8 +89,8 @@ def page_images(pdfpath, outputdir, first=1, last=1):
import win32process as w
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
try:
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode(first),
'-l', unicode(last), pdfpath,
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode_type(first),
'-l', unicode_type(last), pdfpath,
os.path.join(outputdir, 'page-images')], **args)
except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)

View File

@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
import sys, struct
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
@ -47,9 +48,7 @@ def get_metadata(stream):
mi.author = value
mi.authors = string_to_authors(value)
except Exception as err:
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr)
raise
return mi

View File

@ -8,6 +8,7 @@ import re, cStringIO, codecs
from calibre import force_unicode
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import codepoint_to_chr, unicode_type
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -75,7 +76,7 @@ def detect_codepage(stream):
def encode(unistr):
if not isinstance(unistr, unicode):
if not isinstance(unistr, unicode_type):
unistr = force_unicode(unistr)
return ''.join([str(c) if ord(c) < 128 else '\\u' + str(ord(c)) + '?' for c in unistr])
@ -88,7 +89,7 @@ def decode(raw, codec):
raw = raw.decode(codec)
def uni(match):
return unichr(int(match.group(1)))
return codepoint_to_chr(int(match.group(1)))
raw = re.sub(r'\\u([0-9]{3,4}).', uni, raw)
return raw
@ -232,4 +233,3 @@ def set_metadata(stream, options):
stream.truncate()
stream.write(src)
stream.write(after)

View File

@ -15,6 +15,7 @@ from calibre.constants import __appname__, __version__
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_xml_chars
from polyglot.builtins import unicode_type
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
@ -194,7 +195,7 @@ class TOC(list):
text = u''
for txt in txt_path(nl):
text += etree.tostring(txt, method='text',
encoding=unicode, with_tail=False)
encoding=unicode_type, with_tail=False)
content = content_path(np)
if content and text:
content = content[0]
@ -229,7 +230,7 @@ class TOC(list):
fragment = fragment.strip()
href = href.strip()
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
txt = ''.join([unicode_type(s).strip() for s in a.findAll(text=True)])
add = True
for i in self.flat():
if i.href == href and i.fragment == fragment:
@ -264,7 +265,7 @@ class TOC(list):
text = clean_xml_chars(text)
elem = E.navPoint(
E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
E.content(src=unicode(np.href)+(('#' + unicode(np.fragment))
E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment))
if np.fragment else '')),
id=item_id,
playOrder=str(np.play_order)

View File

@ -20,6 +20,7 @@ from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
from calibre.utils.imghdr import what
from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.debug.headers import TextRecord
from polyglot.builtins import unicode_type
class TagX(object): # {{{
@ -564,7 +565,7 @@ class TBSIndexing(object): # {{{
def get_index(self, idx):
for i in self.indices:
if i.index in {idx, unicode(idx)}:
if i.index in {idx, unicode_type(idx)}:
return i
raise IndexError('Index %d not found'%idx)
@ -844,5 +845,3 @@ def inspect_mobi(mobi_file, ddir):
# }}}

View File

@ -16,6 +16,7 @@ from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
from calibre.ebooks.mobi.utils import convert_color_for_font_tag
from calibre.utils.imghdr import identify
from polyglot.builtins import unicode_type
MBP_NS = 'http://mobipocket.com/ns/mbp'
@ -151,7 +152,7 @@ class MobiMLizer(object):
return "%dem" % int(round(ptsize / embase))
def preize_text(self, text, pre_wrap=False):
text = unicode(text)
text = unicode_type(text)
if pre_wrap:
# Replace n consecutive spaces with n-1 NBSP + space
text = re.sub(r' {2,}', lambda m:(u'\xa0'*(len(m.group())-1) + u' '), text)
@ -228,7 +229,7 @@ class MobiMLizer(object):
while vspace > 0:
wrapper.addprevious(etree.Element(XHTML('br')))
vspace -= 1
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode)):
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)):
para.attrib['align'] = istate.halign
istate.rendered = True
pstate = bstate.istate

View File

@ -16,6 +16,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.localization import canonicalize_lang
from calibre.utils.config_base import tweaks
from polyglot.builtins import unicode_type
NULL_INDEX = 0xffffffff
@ -239,7 +240,7 @@ class BookHeader(object):
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None
if not isinstance(self.title, unicode):
if not isinstance(self.title, unicode_type):
self.title = self.title.decode(self.codec, 'replace')
if self.exth_flag & 0x40:
try:

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import re, os
from calibre.ebooks.chardet import strip_encoding_declarations
from polyglot.builtins import unicode_type
def update_internal_links(mobi8_reader, log):
@ -130,7 +131,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
flows.append(flow)
continue
if not isinstance(flow, unicode):
if not isinstance(flow, unicode_type):
try:
flow = flow.decode(mr.header.codec)
except UnicodeDecodeError:

Some files were not shown because too many files have changed in this diff Show More