mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
python3: add unicode/unichr wrappers to polyglot
This commit is contained in:
parent
77728a15ef
commit
cbc42bec23
@ -12,6 +12,7 @@ from functools import partial
|
||||
from contextlib import closing
|
||||
|
||||
from setup import iswindows
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
if iswindows:
|
||||
from ctypes import windll, Structure, POINTER, c_size_t
|
||||
@ -52,7 +53,7 @@ def run_worker(job, decorate=True):
|
||||
try:
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
except Exception as err:
|
||||
return False, human_text, unicode(err)
|
||||
return False, human_text, unicode_type(err)
|
||||
stdout, stderr = p.communicate()
|
||||
if stdout:
|
||||
stdout = stdout.decode('utf-8')
|
||||
|
@ -12,7 +12,7 @@ from itertools import chain
|
||||
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||
|
||||
from setup import Command, basenames, __appname__, download_securely
|
||||
from polyglot.builtins import itervalues, iteritems
|
||||
from polyglot.builtins import codepoint_to_chr, itervalues, iteritems
|
||||
|
||||
|
||||
def get_opts_from_parser(parser):
|
||||
@ -173,7 +173,7 @@ class Kakasi(Command): # {{{
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:codepoint_to_chr(int(x.group(1),16)), line)
|
||||
dic[pair[0]] = pair[1]
|
||||
from calibre.utils.serialize import msgpack_dumps
|
||||
with open(dst, 'wb') as f:
|
||||
|
@ -13,7 +13,7 @@ from functools import partial
|
||||
|
||||
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file
|
||||
from setup.parallel_build import parallel_check_output
|
||||
from polyglot.builtins import iteritems
|
||||
from polyglot.builtins import codepoint_to_chr, iteritems
|
||||
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||
|
||||
|
||||
@ -82,7 +82,7 @@ class POT(Command): # {{{
|
||||
ans = []
|
||||
for lineno, msg in msgs:
|
||||
ans.append('#: %s:%d'%(path, lineno))
|
||||
slash = unichr(92)
|
||||
slash = codepoint_to_chr(92)
|
||||
msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
|
||||
r'\n').replace('\r', r'\r').replace('\t', r'\t')
|
||||
ans.append('msgid "%s"'%msg)
|
||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, re, time, random, warnings
|
||||
from polyglot.builtins import builtins
|
||||
from polyglot.builtins import builtins, codepoint_to_chr, unicode_type
|
||||
builtins.__dict__['dynamic_property'] = lambda func: func(None)
|
||||
from math import floor
|
||||
from functools import partial
|
||||
@ -77,7 +77,7 @@ def get_types_map():
|
||||
|
||||
|
||||
def to_unicode(raw, encoding='utf-8', errors='strict'):
|
||||
if isinstance(raw, unicode):
|
||||
if isinstance(raw, unicode_type):
|
||||
return raw
|
||||
return raw.decode(encoding, errors)
|
||||
|
||||
@ -113,7 +113,7 @@ def confirm_config_name(name):
|
||||
|
||||
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
|
||||
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
|
||||
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
|
||||
u'"', u':', u'>', u'+', u'/'] + list(map(codepoint_to_chr, xrange(32))))
|
||||
|
||||
|
||||
def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
@ -126,7 +126,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
|
||||
are encoded in the filesystem encoding of the platform, or UTF-8.
|
||||
'''
|
||||
if isinstance(name, unicode):
|
||||
if isinstance(name, unicode_type):
|
||||
name = name.encode(filesystem_encoding, 'ignore')
|
||||
one = _filename_sanitize.sub(substitute, name)
|
||||
one = re.sub(r'\s', ' ', one).strip()
|
||||
@ -198,7 +198,7 @@ def prints(*args, **kwargs):
|
||||
safe_encode = kwargs.get('safe_encode', False)
|
||||
count = 0
|
||||
for i, arg in enumerate(args):
|
||||
if isinstance(arg, unicode):
|
||||
if isinstance(arg, unicode_type):
|
||||
if iswindows:
|
||||
from calibre.utils.terminal import Detect
|
||||
cs = Detect(file)
|
||||
@ -222,8 +222,8 @@ def prints(*args, **kwargs):
|
||||
try:
|
||||
arg = str(arg)
|
||||
except ValueError:
|
||||
arg = unicode(arg)
|
||||
if isinstance(arg, unicode):
|
||||
arg = unicode_type(arg)
|
||||
if isinstance(arg, unicode_type):
|
||||
try:
|
||||
arg = arg.encode(enc)
|
||||
except UnicodeEncodeError:
|
||||
@ -288,7 +288,7 @@ def load_library(name, cdll):
|
||||
|
||||
def filename_to_utf8(name):
|
||||
'''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
|
||||
if isinstance(name, unicode):
|
||||
if isinstance(name, unicode_type):
|
||||
return name.encode('utf8')
|
||||
codec = 'cp1252' if iswindows else 'utf8'
|
||||
return name.decode(codec, 'replace').encode('utf8')
|
||||
@ -557,7 +557,7 @@ def strftime(fmt, t=None):
|
||||
else:
|
||||
ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
|
||||
if early_year:
|
||||
ans = ans.replace(u'_early year hack##', unicode(orig_year))
|
||||
ans = ans.replace(u'_early year hack##', unicode_type(orig_year))
|
||||
return ans
|
||||
|
||||
|
||||
@ -669,7 +669,7 @@ def force_unicode(obj, enc=preferred_encoding):
|
||||
def as_unicode(obj, enc=preferred_encoding):
|
||||
if not isbytestring(obj):
|
||||
try:
|
||||
obj = unicode(obj)
|
||||
obj = unicode_type(obj)
|
||||
except:
|
||||
try:
|
||||
obj = str(obj)
|
||||
|
@ -2,12 +2,12 @@
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import print_function
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import map, unicode_type
|
||||
import sys, locale, codecs, os, importlib, collections
|
||||
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (3, 40, 1)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__version__ = u'.'.join(map(unicode_type, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
'''
|
||||
@ -300,7 +300,7 @@ def get_portable_base():
|
||||
|
||||
def get_unicode_windows_env_var(name):
|
||||
getenv = plugins['winutil'][0].getenv
|
||||
return getenv(unicode(name))
|
||||
return getenv(unicode_type(name))
|
||||
|
||||
|
||||
def get_windows_username():
|
||||
|
@ -7,6 +7,7 @@ import os, sys, zipfile, importlib
|
||||
|
||||
from calibre.constants import numeric_version, iswindows, isosx
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
platform = 'linux'
|
||||
if iswindows:
|
||||
@ -195,7 +196,7 @@ class Plugin(object): # {{{
|
||||
config_dialog.exec_()
|
||||
|
||||
if config_dialog.result() == QDialog.Accepted:
|
||||
sc = unicode(sc.text()).strip()
|
||||
sc = unicode_type(sc.text()).strip()
|
||||
customize_plugin(self, sc)
|
||||
|
||||
geom = bytearray(config_dialog.saveGeometry())
|
||||
|
@ -10,6 +10,7 @@ from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
|
||||
from calibre.ebooks.html.to_zip import HTML2ZIP
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
plugins = []
|
||||
|
||||
@ -64,23 +65,23 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
images = []
|
||||
|
||||
# Textile
|
||||
for m in re.finditer(unicode(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
|
||||
for m in re.finditer(unicode_type(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
|
||||
path = m.group('path')
|
||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||
images.append(path)
|
||||
|
||||
# Markdown inline
|
||||
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
|
||||
for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
|
||||
path = m.group('path')
|
||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||
images.append(path)
|
||||
|
||||
# Markdown reference
|
||||
refs = {}
|
||||
for m in re.finditer(unicode(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
|
||||
for m in re.finditer(unicode_type(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
|
||||
if m.group('id') and m.group('path'):
|
||||
refs[m.group('id')] = m.group('path')
|
||||
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
|
||||
for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
|
||||
path = refs.get(m.group('id'), None)
|
||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||
images.append(path)
|
||||
|
@ -6,6 +6,7 @@ import re, os, shutil
|
||||
|
||||
from calibre import CurrentDir
|
||||
from calibre.customize import Plugin
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class ConversionOption(object):
|
||||
@ -79,7 +80,7 @@ class OptionRecommendation(object):
|
||||
self.option.choices:
|
||||
raise ValueError('OpRec: %s: Recommended value not in choices'%
|
||||
self.option.name)
|
||||
if not (isinstance(self.recommended_value, (int, float, str, unicode)) or self.recommended_value is None):
|
||||
if not (isinstance(self.recommended_value, (int, float, str, unicode_type)) or self.recommended_value is None):
|
||||
raise ValueError('OpRec: %s:'%self.option.name + repr(
|
||||
self.recommended_value) + ' is not a string or a number')
|
||||
|
||||
@ -340,7 +341,7 @@ class OutputFormatPlugin(Plugin):
|
||||
@property
|
||||
def is_periodical(self):
|
||||
return self.oeb.metadata.publication_type and \
|
||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||
unicode_type(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||
|
||||
def specialize_options(self, log, opts, input_fmt):
|
||||
'''
|
||||
|
@ -2,7 +2,7 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import map, unicode_type
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
@ -216,7 +216,7 @@ class PluginLoader(object):
|
||||
if ans.minimum_calibre_version > numeric_version:
|
||||
raise InvalidPlugin(
|
||||
'The plugin at %s needs a version of calibre >= %s' %
|
||||
(as_unicode(path_to_zip_file), '.'.join(map(unicode,
|
||||
(as_unicode(path_to_zip_file), '.'.join(map(unicode_type,
|
||||
ans.minimum_calibre_version))))
|
||||
|
||||
if platform not in ans.supported_platforms:
|
||||
@ -231,7 +231,7 @@ class PluginLoader(object):
|
||||
raise
|
||||
|
||||
def _locate_code(self, zf, path_to_zip_file):
|
||||
names = [x if isinstance(x, unicode) else x.decode('utf-8') for x in
|
||||
names = [x if isinstance(x, unicode_type) else x.decode('utf-8') for x in
|
||||
zf.namelist()]
|
||||
names = [x[1:] if x[0] == '/' else x for x in names]
|
||||
|
||||
|
@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, time, re
|
||||
from collections import defaultdict
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import map, unicode_type
|
||||
from contextlib import contextmanager
|
||||
from functools import partial
|
||||
|
||||
@ -69,7 +69,7 @@ def metadata_extensions():
|
||||
# but not actually added)
|
||||
global _metadata_extensions
|
||||
if _metadata_extensions is None:
|
||||
_metadata_extensions = frozenset(map(unicode, BOOK_EXTENSIONS)) | {'opf'}
|
||||
_metadata_extensions = frozenset(map(unicode_type, BOOK_EXTENSIONS)) | {'opf'}
|
||||
return _metadata_extensions
|
||||
|
||||
|
||||
@ -143,7 +143,7 @@ def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(
|
||||
for path in listdir_impl(dirpath, sort_by_mtime=True):
|
||||
key, ext = splitext(path)
|
||||
if allow_path(path, ext, compiled_rules):
|
||||
books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path
|
||||
books[icu_lower(key) if isinstance(key, unicode_type) else key.lower()][ext] = path
|
||||
|
||||
for formats in books.itervalues():
|
||||
if formats_ok(formats):
|
||||
|
@ -12,7 +12,7 @@ import os, shutil, uuid, json, glob, time, hashlib, errno, sys
|
||||
from functools import partial
|
||||
|
||||
import apsw
|
||||
from polyglot.builtins import reraise
|
||||
from polyglot.builtins import unicode_type, reraise
|
||||
|
||||
from calibre import isbytestring, force_unicode, prints, as_unicode
|
||||
from calibre.constants import (iswindows, filesystem_encoding,
|
||||
@ -93,7 +93,7 @@ class DBPrefs(dict): # {{{
|
||||
dict.__setitem__(self, key, val)
|
||||
|
||||
def raw_to_object(self, raw):
|
||||
if not isinstance(raw, unicode):
|
||||
if not isinstance(raw, unicode_type):
|
||||
raw = raw.decode(preferred_encoding)
|
||||
return json.loads(raw, object_hook=from_json)
|
||||
|
||||
@ -561,10 +561,10 @@ class DB(object):
|
||||
prints('found user category case overlap', catmap[uc])
|
||||
cat = catmap[uc][0]
|
||||
suffix = 1
|
||||
while icu_lower((cat + unicode(suffix))) in catmap:
|
||||
while icu_lower((cat + unicode_type(suffix))) in catmap:
|
||||
suffix += 1
|
||||
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
|
||||
user_cats[cat + unicode(suffix)] = user_cats[cat]
|
||||
prints('Renaming user category %s to %s'%(cat, cat+unicode_type(suffix)))
|
||||
user_cats[cat + unicode_type(suffix)] = user_cats[cat]
|
||||
del user_cats[cat]
|
||||
cats_changed = True
|
||||
if cats_changed:
|
||||
@ -670,23 +670,23 @@ class DB(object):
|
||||
if d['is_multiple']:
|
||||
if x is None:
|
||||
return []
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
if isinstance(x, (str, unicode_type, bytes)):
|
||||
x = x.split(d['multiple_seps']['ui_to_list'])
|
||||
x = [y.strip() for y in x if y.strip()]
|
||||
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
||||
unicode) else y for y in x]
|
||||
unicode_type) else y for y in x]
|
||||
return [u' '.join(y.split()) for y in x]
|
||||
else:
|
||||
return x if x is None or isinstance(x, unicode) else \
|
||||
return x if x is None or isinstance(x, unicode_type) else \
|
||||
x.decode(preferred_encoding, 'replace')
|
||||
|
||||
def adapt_datetime(x, d):
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
if isinstance(x, (str, unicode_type, bytes)):
|
||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||
return x
|
||||
|
||||
def adapt_bool(x, d):
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
if isinstance(x, (str, unicode_type, bytes)):
|
||||
x = x.lower()
|
||||
if x == 'true':
|
||||
x = True
|
||||
@ -707,7 +707,7 @@ class DB(object):
|
||||
def adapt_number(x, d):
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
if isinstance(x, (str, unicode_type, bytes)):
|
||||
if x.lower() == 'none':
|
||||
return None
|
||||
if d['datatype'] == 'int':
|
||||
@ -1239,7 +1239,7 @@ class DB(object):
|
||||
return self._library_id_
|
||||
|
||||
def fset(self, val):
|
||||
self._library_id_ = unicode(val)
|
||||
self._library_id_ = unicode_type(val)
|
||||
self.execute('''
|
||||
DELETE FROM library_id;
|
||||
INSERT INTO library_id (uuid) VALUES (?);
|
||||
@ -1715,7 +1715,7 @@ class DB(object):
|
||||
[(book_id, fmt.upper()) for book_id in book_ids])
|
||||
|
||||
def set_conversion_options(self, options, fmt):
|
||||
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode) else data)))
|
||||
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data)))
|
||||
for book_id, data in options.iteritems()]
|
||||
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
|
||||
|
||||
@ -1754,7 +1754,7 @@ class DB(object):
|
||||
copyfile_using_links(src, dest, dest_is_dir=False)
|
||||
old_files.add(src)
|
||||
x = path_map[x]
|
||||
if not isinstance(x, unicode):
|
||||
if not isinstance(x, unicode_type):
|
||||
x = x.decode(filesystem_encoding, 'replace')
|
||||
progress(x, i+1, total)
|
||||
|
||||
|
@ -11,7 +11,7 @@ import os, traceback, random, shutil, operator
|
||||
from io import BytesIO
|
||||
from collections import defaultdict, Set, MutableSet
|
||||
from functools import wraps, partial
|
||||
from polyglot.builtins import zip
|
||||
from polyglot.builtins import unicode_type, zip
|
||||
from time import time
|
||||
|
||||
from calibre import isbytestring, as_unicode
|
||||
@ -528,14 +528,14 @@ class Cache(object):
|
||||
@read_api
|
||||
def get_item_id(self, field, item_name):
|
||||
' Return the item id for item_name (case-insensitive) '
|
||||
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
||||
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode) else item_name, None)
|
||||
rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
||||
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode_type) else item_name, None)
|
||||
|
||||
@read_api
|
||||
def get_item_ids(self, field, item_names):
|
||||
' Return the item id for item_name (case-insensitive) '
|
||||
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
||||
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode) else name, None) for name in item_names}
|
||||
rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
||||
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode_type) else name, None) for name in item_names}
|
||||
|
||||
@read_api
|
||||
def author_data(self, author_ids=None):
|
||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import copy
|
||||
from functools import partial
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import unicode_type, map
|
||||
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
from calibre.utils.config_base import tweaks
|
||||
@ -47,7 +47,7 @@ class Tag(object):
|
||||
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self).encode('utf-8')
|
||||
return unicode_type(self).encode('utf-8')
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@ -101,8 +101,8 @@ def clean_user_categories(dbcache):
|
||||
if len(comps) == 0:
|
||||
i = 1
|
||||
while True:
|
||||
if unicode(i) not in user_cats:
|
||||
new_cats[unicode(i)] = user_cats[k]
|
||||
if unicode_type(i) not in user_cats:
|
||||
new_cats[unicode_type(i)] = user_cats[k]
|
||||
break
|
||||
i += 1
|
||||
else:
|
||||
|
@ -10,6 +10,7 @@ from textwrap import TextWrapper
|
||||
from io import BytesIO
|
||||
|
||||
from calibre import prints
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
readonly = True
|
||||
version = 0 # change this if you change signature of implementation()
|
||||
@ -79,7 +80,7 @@ def do_list(fields, data, opts):
|
||||
widths = list(map(lambda x: 0, fields))
|
||||
for i in data:
|
||||
for j, field in enumerate(fields):
|
||||
widths[j] = max(widths[j], max(len(field), len(unicode(i[field]))))
|
||||
widths[j] = max(widths[j], max(len(field), len(unicode_type(i[field]))))
|
||||
|
||||
screen_width = geometry()[0]
|
||||
if not screen_width:
|
||||
@ -110,7 +111,7 @@ def do_list(fields, data, opts):
|
||||
|
||||
for record in data:
|
||||
text = [
|
||||
wrappers[i].wrap(unicode(record[field]))
|
||||
wrappers[i].wrap(unicode_type(record[field]))
|
||||
for i, field in enumerate(fields)
|
||||
]
|
||||
lines = max(map(len, text))
|
||||
@ -129,7 +130,7 @@ def do_csv(fields, data, opts):
|
||||
for d in data:
|
||||
row = [d[f] for f in fields]
|
||||
csv_print.writerow([
|
||||
x if isinstance(x, bytes) else unicode(x).encode('utf-8') for x in row
|
||||
x if isinstance(x, bytes) else unicode_type(x).encode('utf-8') for x in row
|
||||
])
|
||||
print(buf.getvalue())
|
||||
|
||||
@ -164,11 +165,11 @@ def main(opts, args, dbctx):
|
||||
is_rating = category_metadata(category)['datatype'] == 'rating'
|
||||
for tag in category_data[category]:
|
||||
if is_rating:
|
||||
tag.name = unicode(len(tag.name))
|
||||
tag.name = unicode_type(len(tag.name))
|
||||
data.append({
|
||||
'category': category,
|
||||
'tag_name': tag.name,
|
||||
'count': unicode(tag.count),
|
||||
'count': unicode_type(tag.count),
|
||||
'rating': fmtr(tag.avg_rating),
|
||||
})
|
||||
else:
|
||||
@ -176,7 +177,7 @@ def main(opts, args, dbctx):
|
||||
data.append({
|
||||
'category': category,
|
||||
'tag_name': _('CATEGORY ITEMS'),
|
||||
'count': unicode(len(category_data[category])),
|
||||
'count': unicode_type(len(category_data[category])),
|
||||
'rating': ''
|
||||
})
|
||||
|
||||
|
@ -11,6 +11,7 @@ from calibre.ebooks.metadata.book.base import field_from_string
|
||||
from calibre.ebooks.metadata.book.serialize import read_cover
|
||||
from calibre.ebooks.metadata.opf import get_metadata
|
||||
from calibre.srv.changes import metadata
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
readonly = False
|
||||
version = 0 # change this if you change signature of implementation()
|
||||
@ -181,5 +182,5 @@ def main(opts, args, dbctx):
|
||||
if not final_mi:
|
||||
raise SystemExit(_('No book with id: %s in the database') % book_id)
|
||||
|
||||
prints(unicode(final_mi))
|
||||
prints(unicode_type(final_mi))
|
||||
return 0
|
||||
|
@ -9,6 +9,7 @@ import sys
|
||||
|
||||
from calibre import prints
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
readonly = True
|
||||
version = 0 # change this if you change signature of implementation()
|
||||
@ -52,6 +53,6 @@ def main(opts, args, dbctx):
|
||||
mi = OPFCreator(os.getcwdu(), mi)
|
||||
mi.render(sys.stdout)
|
||||
else:
|
||||
prints(unicode(mi))
|
||||
prints(unicode_type(mi))
|
||||
|
||||
return 0
|
||||
|
@ -15,6 +15,7 @@ from copy import deepcopy
|
||||
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES, ALL_METADATA_FIELDS
|
||||
from calibre.ebooks.metadata.book.formatter import SafeFormat
|
||||
from calibre.utils.date import utcnow
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
# Lazy format metadata retrieval {{{
|
||||
'''
|
||||
@ -46,7 +47,7 @@ class MutableBase(object):
|
||||
|
||||
@resolved
|
||||
def __unicode__(self):
|
||||
return unicode(self._values)
|
||||
return unicode_type(self._values)
|
||||
|
||||
@resolved
|
||||
def __len__(self):
|
||||
|
@ -11,6 +11,7 @@ import os
|
||||
|
||||
from calibre import prints
|
||||
from calibre.utils.date import isoformat, DEFAULT_DATE
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class SchemaUpgrade(object):
|
||||
@ -601,7 +602,7 @@ class SchemaUpgrade(object):
|
||||
id_ = str(id_)
|
||||
fname = custom_recipe_filename(id_, title)
|
||||
custom_recipes[id_] = (title, fname)
|
||||
if isinstance(script, unicode):
|
||||
if isinstance(script, unicode_type):
|
||||
script = script.encode('utf-8')
|
||||
with open(os.path.join(bdir, fname), 'wb') as f:
|
||||
f.write(script)
|
||||
|
@ -19,6 +19,7 @@ from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
|
||||
from calibre.utils.icu import primary_contains, sort_key
|
||||
from calibre.utils.localization import lang_map, canonicalize_lang
|
||||
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
CONTAINS_MATCH = 0
|
||||
EQUALS_MATCH = 1
|
||||
@ -148,7 +149,7 @@ class DateSearch(object): # {{{
|
||||
|
||||
if query == 'false':
|
||||
for v, book_ids in field_iter():
|
||||
if isinstance(v, (str, unicode)):
|
||||
if isinstance(v, (str, unicode_type)):
|
||||
v = parse_date(v)
|
||||
if v is None or v <= UNDEFINED_DATE:
|
||||
matches |= book_ids
|
||||
@ -156,7 +157,7 @@ class DateSearch(object): # {{{
|
||||
|
||||
if query == 'true':
|
||||
for v, book_ids in field_iter():
|
||||
if isinstance(v, (str, unicode)):
|
||||
if isinstance(v, (str, unicode_type)):
|
||||
v = parse_date(v)
|
||||
if v is not None and v > UNDEFINED_DATE:
|
||||
matches |= book_ids
|
||||
@ -198,7 +199,7 @@ class DateSearch(object): # {{{
|
||||
field_count = query.count('/') + 1
|
||||
|
||||
for v, book_ids in field_iter():
|
||||
if isinstance(v, (str, unicode)):
|
||||
if isinstance(v, (str, unicode_type)):
|
||||
v = parse_date(v)
|
||||
if v is not None and relop(dt_as_local(v), qd, field_count):
|
||||
matches |= book_ids
|
||||
@ -407,7 +408,7 @@ class SavedSearchQueries(object): # {{{
|
||||
return self._db()
|
||||
|
||||
def force_unicode(self, x):
|
||||
if not isinstance(x, unicode):
|
||||
if not isinstance(x, unicode_type):
|
||||
x = x.decode(preferred_encoding, 'replace')
|
||||
return x
|
||||
|
||||
|
@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import os, errno, cPickle, sys, re
|
||||
from locale import localeconv
|
||||
from collections import OrderedDict, namedtuple
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import map, unicode_type
|
||||
from threading import Lock
|
||||
|
||||
from calibre import as_unicode, prints
|
||||
@ -19,7 +19,7 @@ from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
|
||||
def force_to_bool(val):
|
||||
if isinstance(val, (str, unicode)):
|
||||
if isinstance(val, (str, unicode_type)):
|
||||
try:
|
||||
val = icu_lower(val)
|
||||
if not val:
|
||||
|
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import weakref, operator
|
||||
from functools import partial
|
||||
from itertools import izip, imap
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import map, unicode_type
|
||||
|
||||
from calibre.ebooks.metadata import title_sort
|
||||
from calibre.utils.config_base import tweaks, prefs
|
||||
@ -374,7 +374,7 @@ class View(object):
|
||||
self.marked_ids = dict.fromkeys(id_dict, u'true')
|
||||
else:
|
||||
# Ensure that all the items in the dict are text
|
||||
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode,
|
||||
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode_type,
|
||||
id_dict.itervalues())))
|
||||
# This invalidates all searches in the cache even though the cache may
|
||||
# be shared by multiple views. This is not ideal, but...
|
||||
@ -432,4 +432,3 @@ class View(object):
|
||||
self._map_filtered = ids + self._map_filtered
|
||||
if prefs['mark_new_books']:
|
||||
self.toggle_marked_ids(ids)
|
||||
|
||||
|
@ -10,18 +10,15 @@ __docformat__ = 'restructuredtext en'
|
||||
import re
|
||||
from functools import partial
|
||||
from datetime import datetime
|
||||
from polyglot.builtins import zip
|
||||
from polyglot.builtins import unicode_type, zip
|
||||
|
||||
from calibre.constants import preferred_encoding, ispy3
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.ebooks.metadata import author_to_author_sort, title_sort
|
||||
from calibre.utils.date import (
|
||||
parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined)
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.icu import strcmp
|
||||
|
||||
if ispy3:
|
||||
unicode = str
|
||||
|
||||
# Convert data into values suitable for the db {{{
|
||||
|
||||
|
||||
@ -32,7 +29,7 @@ def sqlite_datetime(x):
|
||||
def single_text(x):
|
||||
if x is None:
|
||||
return x
|
||||
if not isinstance(x, unicode):
|
||||
if not isinstance(x, unicode_type):
|
||||
x = x.decode(preferred_encoding, 'replace')
|
||||
x = x.strip()
|
||||
return x if x else None
|
||||
@ -60,7 +57,7 @@ def multiple_text(sep, ui_sep, x):
|
||||
return ()
|
||||
if isinstance(x, bytes):
|
||||
x = x.decode(preferred_encoding, 'replace')
|
||||
if isinstance(x, unicode):
|
||||
if isinstance(x, unicode_type):
|
||||
x = x.split(sep)
|
||||
else:
|
||||
x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
|
||||
@ -72,7 +69,7 @@ def multiple_text(sep, ui_sep, x):
|
||||
|
||||
|
||||
def adapt_datetime(x):
|
||||
if isinstance(x, (unicode, bytes)):
|
||||
if isinstance(x, (unicode_type, bytes)):
|
||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||
if x and is_date_undefined(x):
|
||||
x = UNDEFINED_DATE
|
||||
@ -80,7 +77,7 @@ def adapt_datetime(x):
|
||||
|
||||
|
||||
def adapt_date(x):
|
||||
if isinstance(x, (unicode, bytes)):
|
||||
if isinstance(x, (unicode_type, bytes)):
|
||||
x = parse_only_date(x)
|
||||
if x is None or is_date_undefined(x):
|
||||
x = UNDEFINED_DATE
|
||||
@ -90,14 +87,14 @@ def adapt_date(x):
|
||||
def adapt_number(typ, x):
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, (unicode, bytes)):
|
||||
if isinstance(x, (unicode_type, bytes)):
|
||||
if not x or x.lower() == 'none':
|
||||
return None
|
||||
return typ(x)
|
||||
|
||||
|
||||
def adapt_bool(x):
|
||||
if isinstance(x, (unicode, bytes)):
|
||||
if isinstance(x, (unicode_type, bytes)):
|
||||
x = x.lower()
|
||||
if x == 'true':
|
||||
x = True
|
||||
|
@ -14,6 +14,7 @@ import sys
|
||||
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre.ebooks.metadata import string_to_authors
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class JETBOOK(USBMS):
|
||||
@ -64,7 +65,7 @@ class JETBOOK(USBMS):
|
||||
|
||||
def check_unicode(txt):
|
||||
txt = txt.replace('_', ' ')
|
||||
if not isinstance(txt, unicode):
|
||||
if not isinstance(txt, unicode_type):
|
||||
return txt.decode(sys.getfilesystemencoding(), 'replace')
|
||||
|
||||
return txt
|
||||
|
@ -15,6 +15,7 @@ from calibre.constants import DEBUG
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre import strftime, fsync, prints
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
'''
|
||||
Notes on collections:
|
||||
@ -113,7 +114,7 @@ class KINDLE(USBMS):
|
||||
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
|
||||
if match is not None:
|
||||
mi.title = match.group('title')
|
||||
if not isinstance(mi.title, unicode):
|
||||
if not isinstance(mi.title, unicode_type):
|
||||
mi.title = mi.title.decode(sys.getfilesystemencoding(),
|
||||
'replace')
|
||||
return mi
|
||||
@ -291,9 +292,9 @@ class KINDLE(USBMS):
|
||||
hrTag['class'] = 'annotations_divider'
|
||||
user_notes_soup.insert(0, hrTag)
|
||||
|
||||
mi.comments += unicode(user_notes_soup.prettify())
|
||||
mi.comments += unicode_type(user_notes_soup.prettify())
|
||||
else:
|
||||
mi.comments = unicode(user_notes_soup.prettify())
|
||||
mi.comments = unicode_type(user_notes_soup.prettify())
|
||||
# Update library comments
|
||||
db.set_comment(db_id, mi.comments)
|
||||
|
||||
@ -547,7 +548,7 @@ class KINDLE2(KINDLE):
|
||||
cust_col_name = opts.extra_customization[self.OPT_APNX_METHOD_COL]
|
||||
if cust_col_name:
|
||||
try:
|
||||
temp = unicode(metadata.get(cust_col_name)).lower()
|
||||
temp = unicode_type(metadata.get(cust_col_name)).lower()
|
||||
if temp in self.EXTRA_CUSTOMIZATION_CHOICES[self.OPT_APNX_METHOD]:
|
||||
method = temp
|
||||
else:
|
||||
|
@ -14,6 +14,7 @@ from calibre.devices.usbms.books import CollectionsBookList
|
||||
from calibre.utils.config_base import prefs
|
||||
from calibre.devices.usbms.driver import debug_print
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class Book(Book_):
|
||||
@ -95,7 +96,7 @@ class Book(Book_):
|
||||
ans = [u"Kobo metadata:"]
|
||||
|
||||
def fmt(x, y):
|
||||
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
|
||||
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
|
||||
|
||||
if self.contentID:
|
||||
fmt('Content ID', self.contentID)
|
||||
|
@ -32,6 +32,7 @@ from calibre import prints, fsync
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.config_base import prefs
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
EPUB_EXT = '.epub'
|
||||
KEPUB_EXT = '.kepub'
|
||||
@ -43,7 +44,7 @@ def qhash(inputstr):
|
||||
instr = b""
|
||||
if isinstance(inputstr, bytes):
|
||||
instr = inputstr
|
||||
elif isinstance(inputstr, unicode):
|
||||
elif isinstance(inputstr, unicode_type):
|
||||
instr = inputstr.encode("utf8")
|
||||
else:
|
||||
return -1
|
||||
@ -1323,9 +1324,9 @@ class KOBO(USBMS):
|
||||
hrTag['class'] = 'annotations_divider'
|
||||
user_notes_soup.insert(0, hrTag)
|
||||
|
||||
mi.comments += unicode(user_notes_soup.prettify())
|
||||
mi.comments += unicode_type(user_notes_soup.prettify())
|
||||
else:
|
||||
mi.comments = unicode(user_notes_soup.prettify())
|
||||
mi.comments = unicode_type(user_notes_soup.prettify())
|
||||
# Update library comments
|
||||
db.set_comment(db_id, mi.comments)
|
||||
|
||||
@ -1824,7 +1825,7 @@ class KOBOTOUCH(KOBO):
|
||||
bookshelves.append(row['ShelfName'])
|
||||
|
||||
cursor.close()
|
||||
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode(count_bookshelves))
|
||||
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode_type(count_bookshelves))
|
||||
return bookshelves
|
||||
|
||||
self.debug_index = 0
|
||||
@ -2394,7 +2395,7 @@ class KOBOTOUCH(KOBO):
|
||||
|
||||
if self.manage_collections:
|
||||
if collections:
|
||||
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode(len(collections)))
|
||||
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode_type(len(collections)))
|
||||
|
||||
# Need to reset the collections outside the particular loops
|
||||
# otherwise the last item will not be removed
|
||||
@ -2834,7 +2835,7 @@ class KOBOTOUCH(KOBO):
|
||||
# count_bookshelves = i + 1
|
||||
|
||||
cursor.close()
|
||||
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode(count_bookshelves))
|
||||
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode_type(count_bookshelves))
|
||||
|
||||
return bookshelves
|
||||
|
||||
@ -2918,7 +2919,7 @@ class KOBOTOUCH(KOBO):
|
||||
cursor.execute(addquery, add_values)
|
||||
elif result['_IsDeleted'] == 'true':
|
||||
debug_print("KoboTouch:check_for_bookshelf - Shelf '%s' is deleted - undeleting. result['_IsDeleted']='%s'" % (
|
||||
bookshelf_name, unicode(result['_IsDeleted'])))
|
||||
bookshelf_name, unicode_type(result['_IsDeleted'])))
|
||||
cursor.execute(updatequery, test_values)
|
||||
|
||||
cursor.close()
|
||||
|
@ -16,6 +16,7 @@ from calibre.gui2.device_drivers.tabbed_device_config import TabbedDeviceConfig,
|
||||
from calibre.devices.usbms.driver import debug_print
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.dialogs.template_dialog import TemplateDialog
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def wrap_msg(msg):
|
||||
@ -122,7 +123,7 @@ class KOBOTOUCHConfig(TabbedDeviceConfig):
|
||||
|
||||
p['support_newer_firmware'] = self.support_newer_firmware
|
||||
p['debugging_title'] = self.debugging_title
|
||||
p['driver_version'] = '.'.join([unicode(i) for i in self.device.version])
|
||||
p['driver_version'] = '.'.join([unicode_type(i) for i in self.device.version])
|
||||
|
||||
return p
|
||||
|
||||
@ -397,7 +398,7 @@ class AdvancedGroupBox(DeviceOptionsGroupBox):
|
||||
'to perform full read-write functionality - Here be Dragons!! '
|
||||
'Enable only if you are comfortable with restoring your kobo '
|
||||
'to factory defaults and testing software. '
|
||||
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode(
|
||||
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode_type(
|
||||
device.supported_dbversion), device.get_pref('support_newer_firmware')
|
||||
)
|
||||
|
||||
@ -555,7 +556,7 @@ class TemplateConfig(QWidget): # {{{
|
||||
|
||||
@property
|
||||
def template(self):
|
||||
return unicode(self.t.text()).strip()
|
||||
return unicode_type(self.t.text()).strip()
|
||||
|
||||
@template.setter
|
||||
def template(self, template):
|
||||
@ -577,7 +578,7 @@ class TemplateConfig(QWidget): # {{{
|
||||
except Exception as err:
|
||||
error_dialog(self, _('Invalid template'),
|
||||
'<p>'+_('The template "%s" is invalid:')%tmpl +
|
||||
'<br>'+unicode(err), show=True)
|
||||
'<br>'+unicode_type(err), show=True)
|
||||
|
||||
return False
|
||||
# }}}
|
||||
|
@ -18,6 +18,7 @@ from calibre.devices.mtp.base import debug
|
||||
from calibre.devices.mtp.defaults import DeviceDefaults
|
||||
from calibre.ptempfile import SpooledTemporaryFile, PersistentTemporaryDirectory
|
||||
from calibre.utils.filenames import shorten_components_to
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%(
|
||||
'windows' if iswindows else 'unix')).MTP_DEVICE
|
||||
@ -75,7 +76,7 @@ class MTP_DEVICE(BASE):
|
||||
|
||||
def is_folder_ignored(self, storage_or_storage_id, path,
|
||||
ignored_folders=None):
|
||||
storage_id = unicode(getattr(storage_or_storage_id, 'object_id',
|
||||
storage_id = unicode_type(getattr(storage_or_storage_id, 'object_id',
|
||||
storage_or_storage_id))
|
||||
lpath = tuple(icu_lower(name) for name in path)
|
||||
if ignored_folders is None:
|
||||
@ -166,14 +167,14 @@ class MTP_DEVICE(BASE):
|
||||
traceback.print_exc()
|
||||
dinfo = {}
|
||||
if dinfo.get('device_store_uuid', None) is None:
|
||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
||||
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
|
||||
if dinfo.get('device_name', None) is None:
|
||||
dinfo['device_name'] = self.current_friendly_name
|
||||
if name is not None:
|
||||
dinfo['device_name'] = name
|
||||
dinfo['location_code'] = location_code
|
||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
||||
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
|
||||
dinfo['date_last_connected'] = isoformat(now())
|
||||
dinfo['mtp_prefix'] = storage.storage_prefix
|
||||
raw = json.dumps(dinfo, default=to_json)
|
||||
|
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import weakref, sys, json
|
||||
from collections import deque
|
||||
from operator import attrgetter
|
||||
from polyglot.builtins import map
|
||||
from polyglot.builtins import map, unicode_type
|
||||
from datetime import datetime
|
||||
|
||||
from calibre import human_readable, prints, force_unicode
|
||||
@ -74,7 +74,7 @@ class FileOrFolder(object):
|
||||
def __repr__(self):
|
||||
name = 'Folder' if self.is_folder else 'File'
|
||||
try:
|
||||
path = unicode(self.full_path)
|
||||
path = unicode_type(self.full_path)
|
||||
except:
|
||||
path = ''
|
||||
datum = 'size=%s'%(self.size)
|
||||
@ -250,5 +250,3 @@ class FilesystemCache(object):
|
||||
return self.id_map[object_id]
|
||||
except KeyError:
|
||||
raise ValueError('No object found with MTP path: %s'%path)
|
||||
|
||||
|
||||
|
@ -17,6 +17,7 @@ from calibre.constants import plugins, islinux, isosx, ispy3
|
||||
from calibre.ptempfile import SpooledTemporaryFile
|
||||
from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice, OpenActionNeeded
|
||||
from calibre.devices.mtp.base import MTPDeviceBase, synchronous, debug
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
MTPDevice = namedtuple('MTPDevice', 'busnum devnum vendor_id product_id '
|
||||
'bcd serial manufacturer product')
|
||||
@ -321,7 +322,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
||||
storage.append({'id':sid, 'size':capacity,
|
||||
'is_folder':True, 'name':name, 'can_delete':False,
|
||||
'is_system':True})
|
||||
self._currently_getting_sid = unicode(sid)
|
||||
self._currently_getting_sid = unicode_type(sid)
|
||||
items, errs = self.dev.get_filesystem(sid,
|
||||
partial(self._filesystem_callback, {}))
|
||||
all_items.extend(items), all_errs.extend(errs)
|
||||
@ -373,7 +374,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
||||
e = parent.folder_named(name)
|
||||
if e is not None:
|
||||
return e
|
||||
ename = name.encode('utf-8') if isinstance(name, unicode) else name
|
||||
ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
|
||||
sid, pid = parent.storage_id, parent.object_id
|
||||
if pid == sid:
|
||||
pid = 0
|
||||
@ -396,7 +397,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
||||
raise ValueError('Cannot upload file %s, it already exists'%(
|
||||
e.full_path,))
|
||||
self.delete_file_or_folder(e)
|
||||
ename = name.encode('utf-8') if isinstance(name, unicode) else name
|
||||
ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
|
||||
sid, pid = parent.storage_id, parent.object_id
|
||||
if pid == sid:
|
||||
pid = 0xFFFFFFFF
|
||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import time, threading, traceback
|
||||
from functools import wraps, partial
|
||||
from polyglot.builtins import zip
|
||||
from polyglot.builtins import unicode_type, zip
|
||||
from itertools import chain
|
||||
|
||||
from calibre import as_unicode, prints, force_unicode
|
||||
@ -264,7 +264,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
||||
break
|
||||
storage = {'id':storage_id, 'size':capacity, 'name':name,
|
||||
'is_folder':True, 'can_delete':False, 'is_system':True}
|
||||
self._currently_getting_sid = unicode(storage_id)
|
||||
self._currently_getting_sid = unicode_type(storage_id)
|
||||
id_map = self.dev.get_filesystem(storage_id, partial(
|
||||
self._filesystem_callback, {}))
|
||||
for x in id_map.itervalues():
|
||||
@ -441,5 +441,3 @@ class MTP_DEVICE(MTPDeviceBase):
|
||||
ans = self.dev.put_file(pid, name, stream, size, callback)
|
||||
ans['storage_id'] = sid
|
||||
return parent.add_child(ans)
|
||||
|
||||
|
||||
|
@ -24,6 +24,7 @@ from calibre.devices.usbms.books import CollectionsBookList
|
||||
from calibre.devices.usbms.books import BookList
|
||||
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
|
||||
from calibre.constants import islinux
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
DBPATH = 'Sony_Reader/database/books.db'
|
||||
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
|
||||
@ -170,7 +171,7 @@ class PRST1(USBMS):
|
||||
|
||||
with closing(sqlite.connect(dbpath)) as connection:
|
||||
# Replace undecodable characters in the db instead of erroring out
|
||||
connection.text_factory = lambda x: unicode(x, "utf-8", "replace")
|
||||
connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace")
|
||||
|
||||
cursor = connection.cursor()
|
||||
# Query collections
|
||||
|
@ -38,6 +38,7 @@ from calibre.utils.filenames import ascii_filename as sanitize, shorten_componen
|
||||
from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
|
||||
unpublish_zeroconf, get_all_ips)
|
||||
from calibre.utils.socket_inheritance import set_socket_inherit
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def synchronous(tlockname):
|
||||
@ -397,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
||||
if isinstance(a, dict):
|
||||
printable = {}
|
||||
for k,v in a.iteritems():
|
||||
if isinstance(v, (str, unicode)) and len(v) > 50:
|
||||
if isinstance(v, (str, unicode_type)) and len(v) > 50:
|
||||
printable[k] = 'too long'
|
||||
else:
|
||||
printable[k] = v
|
||||
@ -418,14 +419,14 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
||||
if not isinstance(dinfo, dict):
|
||||
dinfo = {}
|
||||
if dinfo.get('device_store_uuid', None) is None:
|
||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
||||
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
|
||||
if dinfo.get('device_name') is None:
|
||||
dinfo['device_name'] = self.get_gui_name()
|
||||
if name is not None:
|
||||
dinfo['device_name'] = name
|
||||
dinfo['location_code'] = location_code
|
||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
||||
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
|
||||
dinfo['date_last_connected'] = isoformat(now())
|
||||
dinfo['prefix'] = self.PREFIX
|
||||
return dinfo
|
||||
@ -478,7 +479,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
||||
from calibre.library.save_to_disk import get_components
|
||||
from calibre.library.save_to_disk import config
|
||||
opts = config().parse()
|
||||
if not isinstance(template, unicode):
|
||||
if not isinstance(template, unicode_type):
|
||||
template = template.decode('utf-8')
|
||||
app_id = str(getattr(mdata, 'application_id', ''))
|
||||
id_ = mdata.get('id', fname)
|
||||
@ -726,7 +727,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
||||
from calibre.utils.date import now, parse_date
|
||||
try:
|
||||
key = self._make_metadata_cache_key(uuid, ext_or_lpath)
|
||||
if isinstance(lastmod, unicode):
|
||||
if isinstance(lastmod, unicode_type):
|
||||
if lastmod == 'None':
|
||||
return None
|
||||
lastmod = parse_date(lastmod)
|
||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
def node_mountpoint(node):
|
||||
|
||||
@ -48,7 +49,7 @@ class UDisks(object):
|
||||
def mount(self, device_node_path):
|
||||
d = self.device(device_node_path)
|
||||
try:
|
||||
return unicode(d.FilesystemMount('',
|
||||
return unicode_type(d.FilesystemMount('',
|
||||
['auth_no_user_interaction', 'rw', 'noexec', 'nosuid',
|
||||
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]))
|
||||
except:
|
||||
@ -131,7 +132,7 @@ class UDisks2(object):
|
||||
mount_options = ['rw', 'noexec', 'nosuid',
|
||||
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]
|
||||
try:
|
||||
return unicode(d.Mount(
|
||||
return unicode_type(d.Mount(
|
||||
{
|
||||
'auth.no_user_interaction':True,
|
||||
'options':','.join(mount_options)
|
||||
|
@ -5,6 +5,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.utils.config_base import Config, ConfigProxy
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class DeviceConfig(object):
|
||||
@ -107,15 +108,15 @@ class DeviceConfig(object):
|
||||
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
|
||||
ec.append(config_widget.opt_extra_customization[i].isChecked())
|
||||
elif hasattr(config_widget.opt_extra_customization[i], 'currentText'):
|
||||
ec.append(unicode(config_widget.opt_extra_customization[i].currentText()).strip())
|
||||
ec.append(unicode_type(config_widget.opt_extra_customization[i].currentText()).strip())
|
||||
else:
|
||||
ec.append(unicode(config_widget.opt_extra_customization[i].text()).strip())
|
||||
ec.append(unicode_type(config_widget.opt_extra_customization[i].text()).strip())
|
||||
else:
|
||||
ec = unicode(config_widget.opt_extra_customization.text()).strip()
|
||||
ec = unicode_type(config_widget.opt_extra_customization.text()).strip()
|
||||
if not ec:
|
||||
ec = None
|
||||
proxy['extra_customization'] = ec
|
||||
st = unicode(config_widget.opt_save_template.text())
|
||||
st = unicode_type(config_widget.opt_save_template.text())
|
||||
proxy['save_template'] = st
|
||||
|
||||
@classmethod
|
||||
|
@ -20,6 +20,7 @@ from calibre.devices.usbms.cli import CLI
|
||||
from calibre.devices.usbms.device import Device
|
||||
from calibre.devices.usbms.books import BookList, Book
|
||||
from calibre.ebooks.metadata.book.json_codec import JsonCodec
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
BASE_TIME = None
|
||||
|
||||
@ -105,14 +106,14 @@ class USBMS(CLI, Device):
|
||||
if not isinstance(dinfo, dict):
|
||||
dinfo = {}
|
||||
if dinfo.get('device_store_uuid', None) is None:
|
||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
||||
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
|
||||
if dinfo.get('device_name', None) is None:
|
||||
dinfo['device_name'] = self.get_gui_name()
|
||||
if name is not None:
|
||||
dinfo['device_name'] = name
|
||||
dinfo['location_code'] = location_code
|
||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
||||
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
|
||||
dinfo['date_last_connected'] = isoformat(now())
|
||||
dinfo['prefix'] = prefix.replace('\\', '/')
|
||||
return dinfo
|
||||
|
@ -11,6 +11,7 @@ import os, time, re
|
||||
from functools import partial
|
||||
|
||||
from calibre.devices.errors import DeviceError, WrongDestinationError, FreeSpaceError
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def sanity_check(on_card, files, card_prefixes, free_space):
|
||||
@ -97,7 +98,7 @@ def create_upload_path(mdata, fname, template, sanitize,
|
||||
ext = path_type.splitext(fname)[1]
|
||||
|
||||
opts = config().parse()
|
||||
if not isinstance(template, unicode):
|
||||
if not isinstance(template, unicode_type):
|
||||
template = template.decode('utf-8')
|
||||
app_id = str(getattr(mdata, 'application_id', ''))
|
||||
id_ = mdata.get('id', fname)
|
||||
|
@ -9,6 +9,7 @@ from various formats.
|
||||
|
||||
import traceback, os, re
|
||||
from calibre import CurrentDir, prints
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class ConversionError(Exception):
|
||||
@ -113,7 +114,7 @@ def extract_calibre_cover(raw, base, log):
|
||||
if matches is None:
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
text = u''.join(map(unicode, body.findAll(text=True)))
|
||||
text = u''.join(map(unicode_type, body.findAll(text=True)))
|
||||
if text.strip():
|
||||
# Body has text, abort
|
||||
return
|
||||
@ -210,7 +211,7 @@ def check_ebook_format(stream, current_guess):
|
||||
|
||||
|
||||
def normalize(x):
|
||||
if isinstance(x, unicode):
|
||||
if isinstance(x, unicode_type):
|
||||
import unicodedata
|
||||
x = unicodedata.normalize('NFC', x)
|
||||
return x
|
||||
|
@ -8,6 +8,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, codecs
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
ENCODING_PATS = [
|
||||
# XML declaration
|
||||
@ -92,7 +93,7 @@ def force_encoding(raw, verbose, assume_utf8=False):
|
||||
|
||||
|
||||
def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
|
||||
if not raw or isinstance(raw, unicode):
|
||||
if not raw or isinstance(raw, unicode_type):
|
||||
return raw, None
|
||||
for x in ('utf8', 'utf-16-le', 'utf-16-be'):
|
||||
bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace(
|
||||
@ -135,7 +136,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||
return '', None
|
||||
raw, encoding = detect_xml_encoding(raw, verbose=verbose,
|
||||
assume_utf8=assume_utf8)
|
||||
if not isinstance(raw, unicode):
|
||||
if not isinstance(raw, unicode_type):
|
||||
raw = raw.decode(encoding, 'replace')
|
||||
|
||||
if strip_encoding_pats:
|
||||
|
@ -14,6 +14,7 @@ from calibre.utils.chm.chm import CHMFile
|
||||
from calibre.constants import plugins
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
chmlib, chmlib_err = plugins['chmlib']
|
||||
@ -48,7 +49,7 @@ class CHMReader(CHMFile):
|
||||
|
||||
def __init__(self, input, log, input_encoding=None):
|
||||
CHMFile.__init__(self)
|
||||
if isinstance(input, unicode):
|
||||
if isinstance(input, unicode_type):
|
||||
input = input.encode(filesystem_encoding)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
@ -113,7 +114,7 @@ class CHMReader(CHMFile):
|
||||
enc = 'cp1252'
|
||||
for path in self.Contents():
|
||||
fpath = path
|
||||
if not isinstance(path, unicode):
|
||||
if not isinstance(path, unicode_type):
|
||||
fpath = path.decode(enc)
|
||||
lpath = os.path.join(output_dir, fpath)
|
||||
self._ensure_dir(lpath)
|
||||
@ -146,7 +147,7 @@ class CHMReader(CHMFile):
|
||||
with open(lpath, 'r+b') as f:
|
||||
data = f.read()
|
||||
data = self._reformat(data, lpath)
|
||||
if isinstance(data, unicode):
|
||||
if isinstance(data, unicode_type):
|
||||
data = data.encode('utf-8')
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
|
@ -16,6 +16,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.icu import numeric_sort_key
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
# If the specified screen has either dimension larger than this value, no image
|
||||
# rescaling is done (we assume that it is a tablet output profile)
|
||||
@ -27,7 +28,7 @@ def extract_comic(path_to_comic_file):
|
||||
Un-archive the comic file.
|
||||
'''
|
||||
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
|
||||
if not isinstance(tdir, unicode):
|
||||
if not isinstance(tdir, unicode_type):
|
||||
# Needed in case the zip file has wrongly encoded unicode file/dir
|
||||
# names
|
||||
tdir = tdir.decode(filesystem_encoding)
|
||||
@ -273,6 +274,3 @@ def process_pages(pages, opts, update, tdir):
|
||||
ans += pages
|
||||
failures += failures_
|
||||
return ans, failures
|
||||
|
||||
|
||||
|
||||
|
@ -13,6 +13,7 @@ from calibre.utils.lock import ExclusiveFile
|
||||
from calibre import sanitize_file_name
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.customize.ui import available_output_formats
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
config_dir = os.path.join(config_dir, 'conversion')
|
||||
@ -85,7 +86,7 @@ class GuiRecommendations(dict):
|
||||
|
||||
def serialize(self):
|
||||
ans = json.dumps(self, indent=2, ensure_ascii=False)
|
||||
if isinstance(ans, unicode):
|
||||
if isinstance(ans, unicode_type):
|
||||
ans = ans.encode('utf-8')
|
||||
return b'json:' + ans
|
||||
|
||||
|
@ -8,6 +8,7 @@ import os
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.constants import filesystem_encoding
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class CHMInput(InputFormatPlugin):
|
||||
@ -34,7 +35,7 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||
if not isinstance(tdir, unicode):
|
||||
if not isinstance(tdir, unicode_type):
|
||||
tdir = tdir.decode(filesystem_encoding)
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
@ -125,7 +126,7 @@ class CHMInput(InputFormatPlugin):
|
||||
base = os.path.dirname(os.path.abspath(htmlpath))
|
||||
|
||||
def unquote(x):
|
||||
if isinstance(x, unicode):
|
||||
if isinstance(x, unicode_type):
|
||||
x = x.encode('utf-8')
|
||||
return _unquote(x).decode('utf-8')
|
||||
|
||||
|
@ -7,6 +7,7 @@ import os, re, posixpath
|
||||
from itertools import cycle
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
||||
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
|
||||
@ -367,7 +368,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
def add_from_li(li, parent):
|
||||
href = text = None
|
||||
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
||||
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
||||
text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
||||
href = x.get('href')
|
||||
if href:
|
||||
if href.startswith('#'):
|
||||
|
@ -13,6 +13,7 @@ from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir
|
||||
from calibre.constants import filesystem_encoding
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
block_level_tags = (
|
||||
'address',
|
||||
@ -225,8 +226,8 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
identifiers = oeb.metadata['identifier']
|
||||
uuid = None
|
||||
for x in identifiers:
|
||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||
uuid = unicode(x).split(':')[-1]
|
||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
|
||||
uuid = unicode_type(x).split(':')[-1]
|
||||
break
|
||||
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
||||
|
||||
@ -241,7 +242,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
# for some absurd reason, or it will throw a hissy fit and refuse
|
||||
# to use the obfuscated fonts.
|
||||
for x in identifiers:
|
||||
if unicode(x) == uuid:
|
||||
if unicode_type(x) == uuid:
|
||||
x.content = 'urn:uuid:'+uuid
|
||||
|
||||
with TemporaryDirectory(u'_epub_output') as tdir:
|
||||
@ -325,7 +326,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
fonts = []
|
||||
for uri in list(uris.keys()):
|
||||
path = uris[uri]
|
||||
if isinstance(path, unicode):
|
||||
if isinstance(path, unicode_type):
|
||||
path = path.encode(filesystem_encoding)
|
||||
if not os.path.exists(path):
|
||||
uris.pop(uri)
|
||||
@ -339,7 +340,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
f.write(chr(ord(data[i]) ^ key[i%16]))
|
||||
else:
|
||||
self.log.warn('Font', path, 'is invalid, ignoring')
|
||||
if not isinstance(uri, unicode):
|
||||
if not isinstance(uri, unicode_type):
|
||||
uri = uri.decode('utf-8')
|
||||
fonts.append(u'''
|
||||
<enc:EncryptedData>
|
||||
|
@ -8,6 +8,7 @@ import os, re
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import guess_type
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
||||
@ -70,7 +71,7 @@ class FB2Input(InputFormatPlugin):
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||
css = ''
|
||||
for s in stylesheets:
|
||||
css += etree.tostring(s, encoding=unicode, method='text',
|
||||
css += etree.tostring(s, encoding=unicode_type, method='text',
|
||||
with_tail=False) + '\n\n'
|
||||
if css:
|
||||
import css_parser, logging
|
||||
@ -82,7 +83,7 @@ class FB2Input(InputFormatPlugin):
|
||||
log.debug('Parsing stylesheet...')
|
||||
stylesheet = parser.parseString(text)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
css = unicode(stylesheet.cssText).replace('h|style', 'h|span')
|
||||
css = unicode_type(stylesheet.cssText).replace('h|style', 'h|span')
|
||||
css = re.sub(r'name\s*=\s*', 'class=', css)
|
||||
self.extract_embedded_content(doc)
|
||||
log.debug('Converting XML to HTML...')
|
||||
|
@ -17,6 +17,7 @@ from calibre.customize.conversion import (InputFormatPlugin,
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.imghdr import what
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def sanitize_file_name(x):
|
||||
@ -225,7 +226,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
|
||||
def link_to_local_path(self, link_, base=None):
|
||||
from calibre.ebooks.html.input import Link
|
||||
if not isinstance(link_, unicode):
|
||||
if not isinstance(link_, unicode_type):
|
||||
try:
|
||||
link_ = link_.decode('utf-8', 'error')
|
||||
except:
|
||||
@ -289,7 +290,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
# bhref refers to an already existing file. The read() method of
|
||||
# DirContainer will call unquote on it before trying to read the
|
||||
# file, therefore we quote it here.
|
||||
if isinstance(bhref, unicode):
|
||||
if isinstance(bhref, unicode_type):
|
||||
bhref = bhref.encode('utf-8')
|
||||
item.html_input_href = quote(bhref).decode('utf-8')
|
||||
if guessed in self.OEB_STYLES:
|
||||
|
@ -9,6 +9,7 @@ from os.path import dirname, abspath, relpath as _relpath, exists, basename
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def relpath(*args):
|
||||
@ -135,7 +136,7 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
toc=html_toc, meta=meta, nextLink=nextLink,
|
||||
tocUrl=tocUrl, cssLink=cssLink,
|
||||
firstContentPageLink=nextLink)
|
||||
if isinstance(t, unicode):
|
||||
if isinstance(t, unicode_type):
|
||||
t = t.encode('utf-8')
|
||||
f.write(t)
|
||||
|
||||
|
@ -13,6 +13,7 @@ from cStringIO import StringIO
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class HTMLZOutput(OutputFormatPlugin):
|
||||
@ -81,9 +82,9 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
fname = u'index'
|
||||
if opts.htmlz_title_filename:
|
||||
from calibre.utils.filenames import shorten_components_to
|
||||
fname = shorten_components_to(100, (ascii_filename(unicode(oeb_book.metadata.title[0])),))[0]
|
||||
fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
|
||||
with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
|
||||
if isinstance(html, unicode):
|
||||
if isinstance(html, unicode_type):
|
||||
html = html.encode('utf-8')
|
||||
tf.write(html)
|
||||
|
||||
@ -100,7 +101,7 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
for item in oeb_book.manifest:
|
||||
if item.media_type in OEB_IMAGES and item.href in images:
|
||||
if item.media_type == SVG_MIME:
|
||||
data = unicode(etree.tostring(item.data, encoding=unicode))
|
||||
data = unicode_type(etree.tostring(item.data, encoding=unicode_type))
|
||||
else:
|
||||
data = item.data
|
||||
fname = os.path.join(tdir, u'images', images[item.href])
|
||||
|
@ -10,6 +10,7 @@ import sys, os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class LRFOptions(object):
|
||||
@ -17,7 +18,7 @@ class LRFOptions(object):
|
||||
def __init__(self, output, opts, oeb):
|
||||
def f2s(f):
|
||||
try:
|
||||
return unicode(f[0])
|
||||
return unicode_type(f[0])
|
||||
except:
|
||||
return ''
|
||||
m = oeb.metadata
|
||||
@ -31,13 +32,13 @@ class LRFOptions(object):
|
||||
self.title_sort = self.author_sort = ''
|
||||
for x in m.creator:
|
||||
if x.role == 'aut':
|
||||
self.author = unicode(x)
|
||||
fa = unicode(getattr(x, 'file_as', ''))
|
||||
self.author = unicode_type(x)
|
||||
fa = unicode_type(getattr(x, 'file_as', ''))
|
||||
if fa:
|
||||
self.author_sort = fa
|
||||
for x in m.title:
|
||||
if unicode(x.file_as):
|
||||
self.title_sort = unicode(x.file_as)
|
||||
if unicode_type(x.file_as):
|
||||
self.title_sort = unicode_type(x.file_as)
|
||||
self.freetext = f2s(m.description)
|
||||
self.category = f2s(m.subject)
|
||||
self.cover = None
|
||||
|
@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class MOBIInput(InputFormatPlugin):
|
||||
@ -49,7 +50,7 @@ class MOBIInput(InputFormatPlugin):
|
||||
|
||||
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
||||
if raw:
|
||||
if isinstance(raw, unicode):
|
||||
if isinstance(raw, unicode_type):
|
||||
raw = raw.encode('utf-8')
|
||||
open(u'debug-raw.html', 'wb').write(raw)
|
||||
from calibre.ebooks.oeb.base import close_self_closing_tags
|
||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def remove_html_cover(oeb, log):
|
||||
@ -121,7 +122,7 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
if not found:
|
||||
from calibre.ebooks import generate_masthead
|
||||
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
|
||||
raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
|
||||
raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
|
||||
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
||||
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
||||
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
||||
@ -165,7 +166,7 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
sec.nodes.remove(a)
|
||||
|
||||
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
|
||||
title=unicode(self.oeb.metadata.title[0]))
|
||||
title=unicode_type(self.oeb.metadata.title[0]))
|
||||
|
||||
for s in sections:
|
||||
if articles[id(s)]:
|
||||
|
@ -14,6 +14,7 @@ from calibre.constants import iswindows
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
UNITS = ['millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
|
||||
'cicero', 'devicepixel']
|
||||
@ -202,8 +203,8 @@ class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
def get_cover_data(self):
|
||||
oeb = self.oeb
|
||||
if (oeb.metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||
cover_id = unicode(oeb.metadata.cover[0])
|
||||
if (oeb.metadata.cover and unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||
cover_id = unicode_type(oeb.metadata.cover[0])
|
||||
item = oeb.manifest.ids[cover_id]
|
||||
self.cover_data = item.data
|
||||
|
||||
|
@ -9,6 +9,7 @@ import os, cStringIO
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class PMLOutput(OutputFormatPlugin):
|
||||
@ -40,7 +41,7 @@ class PMLOutput(OutputFormatPlugin):
|
||||
|
||||
with TemporaryDirectory('_pmlz_output') as tdir:
|
||||
pmlmlizer = PMLMLizer(log)
|
||||
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
|
||||
pml = unicode_type(pmlmlizer.extract_content(oeb_book, opts))
|
||||
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
||||
out.write(pml.encode(opts.pml_output_encoding, 'replace'))
|
||||
|
||||
|
@ -11,6 +11,7 @@ import os
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.constants import numeric_version
|
||||
from calibre import walk
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class RecipeDisabled(Exception):
|
||||
@ -161,6 +162,6 @@ class RecipeInput(InputFormatPlugin):
|
||||
|
||||
def save_download(self, zf):
|
||||
raw = self.recipe_source
|
||||
if isinstance(raw, unicode):
|
||||
if isinstance(raw, unicode_type):
|
||||
raw = raw.encode('utf-8')
|
||||
zf.writestr('download.recipe', raw)
|
||||
|
@ -9,6 +9,7 @@ import os, string
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.constants import __appname__, __version__
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class SNBOutput(OutputFormatPlugin):
|
||||
@ -73,20 +74,20 @@ class SNBOutput(OutputFormatPlugin):
|
||||
# Process Meta data
|
||||
meta = oeb_book.metadata
|
||||
if meta.title:
|
||||
title = unicode(meta.title[0])
|
||||
title = unicode_type(meta.title[0])
|
||||
else:
|
||||
title = ''
|
||||
authors = [unicode(x) for x in meta.creator if x.role == 'aut']
|
||||
authors = [unicode_type(x) for x in meta.creator if x.role == 'aut']
|
||||
if meta.publisher:
|
||||
publishers = unicode(meta.publisher[0])
|
||||
publishers = unicode_type(meta.publisher[0])
|
||||
else:
|
||||
publishers = ''
|
||||
if meta.language:
|
||||
lang = unicode(meta.language[0]).upper()
|
||||
lang = unicode_type(meta.language[0]).upper()
|
||||
else:
|
||||
lang = ''
|
||||
if meta.description:
|
||||
abstract = unicode(meta.description[0])
|
||||
abstract = unicode_type(meta.description[0])
|
||||
else:
|
||||
abstract = ''
|
||||
|
||||
|
@ -18,6 +18,7 @@ from calibre.utils.zipfile import ZipFile
|
||||
from calibre import (extract, walk, isbytestring, filesystem_encoding,
|
||||
get_types_map)
|
||||
from calibre.constants import __version__
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
DEBUG_README=u'''
|
||||
This debug directory contains snapshots of the e-book as it passes through the
|
||||
@ -794,7 +795,7 @@ OptionRecommendation(name='search_replace',
|
||||
def unarchive(self, path, tdir):
|
||||
extract(path, tdir)
|
||||
files = list(walk(tdir))
|
||||
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
|
||||
files = [f if isinstance(f, unicode_type) else f.decode(filesystem_encoding)
|
||||
for f in files]
|
||||
from calibre.customize.ui import available_input_formats
|
||||
fmts = set(available_input_formats())
|
||||
@ -915,7 +916,7 @@ OptionRecommendation(name='search_replace',
|
||||
try:
|
||||
val = parse_date(val, assume_utc=x=='timestamp')
|
||||
except:
|
||||
self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
|
||||
self.log.exception(_('Failed to parse date/time') + ' ' + unicode_type(val))
|
||||
continue
|
||||
setattr(mi, x, val)
|
||||
|
||||
|
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import functools, re, json
|
||||
|
||||
from calibre import entity_to_unicode, as_unicode
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
|
||||
SVG_NS = 'http://www.w3.org/2000/svg'
|
||||
@ -218,8 +219,8 @@ class Dehyphenator(object):
|
||||
wraptags = match.group('wraptags')
|
||||
except:
|
||||
wraptags = ''
|
||||
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
|
||||
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
|
||||
hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf)
|
||||
dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf)
|
||||
if self.suffixes.match(secondhalf) is None:
|
||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||
else:
|
||||
@ -315,7 +316,7 @@ class CSSPreProcessor(object):
|
||||
# are commented lines before the first @import or @charset rule. Since
|
||||
# the conversion will remove all stylesheets anyway, we don't lose
|
||||
# anything
|
||||
data = re.sub(unicode(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
|
||||
data = re.sub(unicode_type(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
|
||||
|
||||
ans, namespaced = [], False
|
||||
for line in data.splitlines():
|
||||
|
@ -10,6 +10,7 @@ from math import ceil
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.utils.wordcount import get_wordcount_obj
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class HeuristicProcessor(object):
|
||||
@ -50,8 +51,8 @@ class HeuristicProcessor(object):
|
||||
title = match.group('title')
|
||||
if not title:
|
||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||
" chapters. - " + unicode(chap))
|
||||
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
|
||||
" chapters. - " + unicode_type(chap))
|
||||
return '<h2>'+chap+'</h2>\n'
|
||||
else:
|
||||
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
|
||||
@ -59,16 +60,16 @@ class HeuristicProcessor(object):
|
||||
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
|
||||
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
|
||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
||||
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
|
||||
" chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title))
|
||||
return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
|
||||
|
||||
def chapter_break(self, match):
|
||||
chap = match.group('section')
|
||||
styles = match.group('styles')
|
||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
||||
" section markers based on punctuation. - " + unicode(chap))
|
||||
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
|
||||
" section markers based on punctuation. - " + unicode_type(chap))
|
||||
return '<'+styles+' style="page-break-before:always">'+chap
|
||||
|
||||
def analyze_title_matches(self, match):
|
||||
@ -111,8 +112,8 @@ class HeuristicProcessor(object):
|
||||
line_end = line_end_ere.findall(raw)
|
||||
tot_htm_ends = len(htm_end)
|
||||
tot_ln_fds = len(line_end)
|
||||
# self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
|
||||
# unicode(tot_htm_ends) + " marked up endings")
|
||||
# self.log.debug("There are " + unicode_type(tot_ln_fds) + " total Line feeds, and " +
|
||||
# unicode_type(tot_htm_ends) + " marked up endings")
|
||||
|
||||
if percent > 1:
|
||||
percent = 1
|
||||
@ -120,7 +121,7 @@ class HeuristicProcessor(object):
|
||||
percent = 0
|
||||
|
||||
min_lns = tot_ln_fds * percent
|
||||
# self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
|
||||
# self.log.debug("There must be fewer than " + unicode_type(min_lns) + " unmarked lines to add markup")
|
||||
return min_lns > tot_htm_ends
|
||||
|
||||
def dump(self, raw, where):
|
||||
@ -157,17 +158,17 @@ class HeuristicProcessor(object):
|
||||
]
|
||||
|
||||
ITALICIZE_STYLE_PATS = [
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/'),
|
||||
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'),
|
||||
]
|
||||
|
||||
for word in ITALICIZE_WORDS:
|
||||
@ -177,10 +178,10 @@ class HeuristicProcessor(object):
|
||||
search_text = re.sub(r'<[^>]*>', '', search_text)
|
||||
for pat in ITALICIZE_STYLE_PATS:
|
||||
for match in re.finditer(pat, search_text):
|
||||
ital_string = unicode(match.group('words'))
|
||||
# self.log.debug("italicising "+unicode(match.group(0))+" with <i>"+ital_string+"</i>")
|
||||
ital_string = unicode_type(match.group('words'))
|
||||
# self.log.debug("italicising "+unicode_type(match.group(0))+" with <i>"+ital_string+"</i>")
|
||||
try:
|
||||
html = re.sub(re.escape(unicode(match.group(0))), '<i>%s</i>' % ital_string, html)
|
||||
html = re.sub(re.escape(unicode_type(match.group(0))), '<i>%s</i>' % ital_string, html)
|
||||
except OverflowError:
|
||||
# match.group(0) was too large to be compiled into a regex
|
||||
continue
|
||||
@ -205,10 +206,10 @@ class HeuristicProcessor(object):
|
||||
if wordcount > 200000:
|
||||
typical_chapters = 15000.
|
||||
self.min_chapters = int(ceil(wordcount / typical_chapters))
|
||||
self.log.debug("minimum chapters required are: "+unicode(self.min_chapters))
|
||||
self.log.debug("minimum chapters required are: "+unicode_type(self.min_chapters))
|
||||
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
||||
self.html_preprocess_sections = len(heading.findall(html))
|
||||
self.log.debug("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
|
||||
self.log.debug("found " + unicode_type(self.html_preprocess_sections) + " pre-existing headings")
|
||||
|
||||
# Build the Regular Expressions in pieces
|
||||
init_lookahead = "(?=<(p|div))"
|
||||
@ -295,7 +296,7 @@ class HeuristicProcessor(object):
|
||||
if n_lookahead_req:
|
||||
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
|
||||
if not analyze:
|
||||
self.log.debug("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
|
||||
self.log.debug("Marked " + unicode_type(self.html_preprocess_sections) + " headings, " + log_message)
|
||||
|
||||
chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \
|
||||
lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
|
||||
@ -308,9 +309,9 @@ class HeuristicProcessor(object):
|
||||
if float(self.chapters_with_title) / float(hits) > .5:
|
||||
title_req = True
|
||||
strict_title = False
|
||||
self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+
|
||||
unicode(self.chapters_with_title)+" chapters with titles, "+
|
||||
unicode(float(self.chapters_with_title) / float(hits))+" percent. ")
|
||||
self.log.debug(unicode_type(type_name)+" had "+unicode_type(hits)+" hits - "+unicode_type(self.chapters_no_title)+" chapters with no title, "+
|
||||
unicode_type(self.chapters_with_title)+" chapters with titles, "+
|
||||
unicode_type(float(self.chapters_with_title) / float(hits))+" percent. ")
|
||||
if type_name == 'common':
|
||||
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
|
||||
elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
|
||||
@ -327,8 +328,8 @@ class HeuristicProcessor(object):
|
||||
words_per_chptr = wordcount
|
||||
if words_per_chptr > 0 and self.html_preprocess_sections > 0:
|
||||
words_per_chptr = wordcount / self.html_preprocess_sections
|
||||
self.log.debug("Total wordcount is: "+ unicode(wordcount)+", Average words per section is: "+
|
||||
unicode(words_per_chptr)+", Marked up "+unicode(self.html_preprocess_sections)+" chapters")
|
||||
self.log.debug("Total wordcount is: "+ unicode_type(wordcount)+", Average words per section is: "+
|
||||
unicode_type(words_per_chptr)+", Marked up "+unicode_type(self.html_preprocess_sections)+" chapters")
|
||||
return html
|
||||
|
||||
def punctuation_unwrap(self, length, content, format):
|
||||
@ -358,8 +359,8 @@ class HeuristicProcessor(object):
|
||||
|
||||
# define the pieces of the regex
|
||||
# (?<!\&\w{4});) is a semicolon not part of an entity
|
||||
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
||||
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
||||
lookahead = "(?<=.{"+unicode_type(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
||||
em_en_lookahead = "(?<=.{"+unicode_type(length)+u"}[\u2013\u2014])"
|
||||
soft_hyphen = u"\xad"
|
||||
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
||||
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
|
||||
@ -419,18 +420,18 @@ class HeuristicProcessor(object):
|
||||
return html
|
||||
|
||||
def fix_nbsp_indents(self, html):
|
||||
txtindent = re.compile(unicode(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
|
||||
txtindent = re.compile(unicode_type(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
|
||||
html = txtindent.sub(self.insert_indent, html)
|
||||
if self.found_indents > 1:
|
||||
self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
|
||||
self.log.debug("replaced "+unicode_type(self.found_indents)+ " nbsp indents with inline styles")
|
||||
return html
|
||||
|
||||
def cleanup_markup(self, html):
|
||||
# remove remaining non-breaking spaces
|
||||
html = re.sub(unicode(r'\u00a0'), ' ', html)
|
||||
html = re.sub(unicode_type(r'\u00a0'), ' ', html)
|
||||
# Get rid of various common microsoft specific tags which can cause issues later
|
||||
# Get rid of empty <o:p> tags to simplify other processing
|
||||
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
||||
html = re.sub(unicode_type(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
||||
# Delete microsoft 'smart' tags
|
||||
html = re.sub('(?i)</?st1:\\w+>', '', html)
|
||||
# Re-open self closing paragraph tags
|
||||
@ -470,8 +471,8 @@ class HeuristicProcessor(object):
|
||||
blanklines = self.blankreg.findall(html)
|
||||
lines = self.linereg.findall(html)
|
||||
if len(lines) > 1:
|
||||
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
|
||||
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
||||
self.log.debug("There are " + unicode_type(len(blanklines)) + " blank lines. " +
|
||||
unicode_type(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
||||
|
||||
if float(len(blanklines)) / float(len(lines)) > 0.40:
|
||||
return True
|
||||
@ -493,11 +494,11 @@ class HeuristicProcessor(object):
|
||||
lines = float(len(self.single_blank.findall(to_merge))) - 1.
|
||||
em = base_em + (em_per_line * lines)
|
||||
if to_merge.find('whitespace'):
|
||||
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode(int(em * 10))+
|
||||
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0))
|
||||
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode_type(int(em * 10))+
|
||||
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
|
||||
else:
|
||||
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode(int(em * 10))+
|
||||
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0))
|
||||
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode_type(int(em * 10))+
|
||||
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
|
||||
return newline
|
||||
|
||||
html = self.any_multi_blank.sub(merge_matches, html)
|
||||
@ -518,9 +519,9 @@ class HeuristicProcessor(object):
|
||||
top_margin = ''
|
||||
bottom_margin = ''
|
||||
if initblanks is not None:
|
||||
top_margin = 'margin-top:'+unicode(len(self.single_blank.findall(initblanks)))+'em;'
|
||||
top_margin = 'margin-top:'+unicode_type(len(self.single_blank.findall(initblanks)))+'em;'
|
||||
if endblanks is not None:
|
||||
bottom_margin = 'margin-bottom:'+unicode(len(self.single_blank.findall(endblanks)))+'em;'
|
||||
bottom_margin = 'margin-bottom:'+unicode_type(len(self.single_blank.findall(endblanks)))+'em;'
|
||||
|
||||
if initblanks is None and endblanks is None:
|
||||
return content
|
||||
@ -597,7 +598,7 @@ class HeuristicProcessor(object):
|
||||
else:
|
||||
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
|
||||
divpercent = (100 - width) / 2
|
||||
hr_open = re.sub('45', unicode(divpercent), hr_open)
|
||||
hr_open = re.sub('45', unicode_type(divpercent), hr_open)
|
||||
scene_break = hr_open+replacement_break+'</div>'
|
||||
else:
|
||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||
@ -657,12 +658,12 @@ class HeuristicProcessor(object):
|
||||
else:
|
||||
styles = match.group('styles').split(';')
|
||||
is_paragraph = self.check_paragraph(content)
|
||||
# print "styles for this line are: "+unicode(styles)
|
||||
# print "styles for this line are: "+unicode_type(styles)
|
||||
split_styles = []
|
||||
for style in styles:
|
||||
# print "style is: "+unicode(style)
|
||||
# print "style is: "+unicode_type(style)
|
||||
newstyle = style.split(':')
|
||||
# print "newstyle is: "+unicode(newstyle)
|
||||
# print "newstyle is: "+unicode_type(newstyle)
|
||||
split_styles.append(newstyle)
|
||||
styles = split_styles
|
||||
for style, setting in styles:
|
||||
@ -673,7 +674,7 @@ class HeuristicProcessor(object):
|
||||
if 9 < setting < 14:
|
||||
text_indent = indented_text
|
||||
else:
|
||||
text_indent = style+':'+unicode(setting)+'pt;'
|
||||
text_indent = style+':'+unicode_type(setting)+'pt;'
|
||||
if style == 'padding':
|
||||
setting = re.sub('pt', '', setting).split(' ')
|
||||
if int(setting[1]) < 16 and int(setting[3]) < 16:
|
||||
@ -694,23 +695,23 @@ class HeuristicProcessor(object):
|
||||
blockquote_open_loop = blockquote_open
|
||||
if debugabby:
|
||||
self.log.debug('\n\n******\n')
|
||||
self.log.debug('padding top is: '+unicode(setting[0]))
|
||||
self.log.debug('padding right is:' +unicode(setting[1]))
|
||||
self.log.debug('padding bottom is: ' + unicode(setting[2]))
|
||||
self.log.debug('padding left is: ' +unicode(setting[3]))
|
||||
self.log.debug('padding top is: '+unicode_type(setting[0]))
|
||||
self.log.debug('padding right is:' +unicode_type(setting[1]))
|
||||
self.log.debug('padding bottom is: ' + unicode_type(setting[2]))
|
||||
self.log.debug('padding left is: ' +unicode_type(setting[3]))
|
||||
|
||||
# print "text-align is: "+unicode(text_align)
|
||||
# print "\n***\nline is:\n "+unicode(match.group(0))+'\n'
|
||||
# print "text-align is: "+unicode_type(text_align)
|
||||
# print "\n***\nline is:\n "+unicode_type(match.group(0))+'\n'
|
||||
if debugabby:
|
||||
# print "this line is a paragraph = "+unicode(is_paragraph)+", previous line was "+unicode(self.previous_was_paragraph)
|
||||
# print "this line is a paragraph = "+unicode_type(is_paragraph)+", previous line was "+unicode_type(self.previous_was_paragraph)
|
||||
self.log.debug("styles for this line were:", styles)
|
||||
self.log.debug('newline is:')
|
||||
self.log.debug(blockquote_open_loop+blockquote_close_loop+
|
||||
paragraph_before+'<p style="'+text_indent+text_align+
|
||||
'">'+content+'</p>'+paragraph_after+'\n\n\n\n\n')
|
||||
# print "is_paragraph is "+unicode(is_paragraph)+", previous_was_paragraph is "+unicode(self.previous_was_paragraph)
|
||||
# print "is_paragraph is "+unicode_type(is_paragraph)+", previous_was_paragraph is "+unicode_type(self.previous_was_paragraph)
|
||||
self.previous_was_paragraph = is_paragraph
|
||||
# print "previous_was_paragraph is now set to "+unicode(self.previous_was_paragraph)+"\n\n\n"
|
||||
# print "previous_was_paragraph is now set to "+unicode_type(self.previous_was_paragraph)+"\n\n\n"
|
||||
return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after
|
||||
|
||||
html = abbyy_line.sub(convert_styles, html)
|
||||
@ -793,12 +794,12 @@ class HeuristicProcessor(object):
|
||||
# more of the lines break in the same region of the document then unwrapping is required
|
||||
docanalysis = DocAnalysis(format, html)
|
||||
hardbreaks = docanalysis.line_histogram(.50)
|
||||
self.log.debug("Hard line breaks check returned "+unicode(hardbreaks))
|
||||
self.log.debug("Hard line breaks check returned "+unicode_type(hardbreaks))
|
||||
|
||||
# Calculate Length
|
||||
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
||||
length = docanalysis.line_length(unwrap_factor)
|
||||
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
|
||||
self.log.debug("Median line length is " + unicode_type(length) + ", calculated with " + format + " format")
|
||||
|
||||
# ##### Unwrap lines ######
|
||||
if getattr(self.extra_opts, 'unwrap_lines', False):
|
||||
@ -820,7 +821,7 @@ class HeuristicProcessor(object):
|
||||
# If still no sections after unwrapping mark split points on lines with no punctuation
|
||||
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||
self.log.debug("Looking for more split points based on punctuation,"
|
||||
" currently have " + unicode(self.html_preprocess_sections))
|
||||
" currently have " + unicode_type(self.html_preprocess_sections))
|
||||
chapdetect3 = re.compile(
|
||||
r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) # noqa
|
||||
html = chapdetect3.sub(self.chapter_break, html)
|
||||
|
@ -20,6 +20,7 @@ from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def fromstring(raw, parser=RECOVER_PARSER):
|
||||
@ -56,7 +57,7 @@ def read_doc_props(raw, mi, XPath):
|
||||
|
||||
desc = XPath('//dc:description')(root)
|
||||
if desc:
|
||||
raw = etree.tostring(desc[0], method='text', encoding=unicode)
|
||||
raw = etree.tostring(desc[0], method='text', encoding=unicode_type)
|
||||
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
|
||||
mi.comments = raw.strip()
|
||||
|
||||
|
@ -14,6 +14,7 @@ from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.fonts.scanner import font_scanner, NoFonts
|
||||
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
|
||||
from calibre.utils.icu import ord_string
|
||||
from polyglot.builtins import codepoint_to_chr
|
||||
|
||||
Embed = namedtuple('Embed', 'name key subsetted')
|
||||
|
||||
@ -124,7 +125,7 @@ def do_map(m, points):
|
||||
if base < p < limit:
|
||||
yield m[p - base]
|
||||
else:
|
||||
yield unichr(p)
|
||||
yield codepoint_to_chr(p)
|
||||
|
||||
|
||||
def map_symbol_text(text, font):
|
||||
|
@ -11,6 +11,7 @@ from operator import itemgetter
|
||||
from lxml import etree
|
||||
|
||||
from calibre.utils.icu import partition_by_first_letter, sort_key
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def get_applicable_xe_fields(index, xe_fields, XPath, expand):
|
||||
@ -246,7 +247,7 @@ def polish_index_markup(index, blocks):
|
||||
a = block.xpath('descendant::a[1]')
|
||||
text = ''
|
||||
if a:
|
||||
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode).strip()
|
||||
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip()
|
||||
if ':' in text:
|
||||
path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
|
||||
if len(parts) > 1:
|
||||
|
@ -504,8 +504,6 @@ class Table(object):
|
||||
|
||||
def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
|
||||
cs = CellStyle(self.namespace)
|
||||
# from lxml.etree import tostring
|
||||
# txt = tostring(tc, method='text', encoding=unicode)
|
||||
for o in overrides:
|
||||
if o in self.overrides:
|
||||
ovr = self.overrides[o]
|
||||
@ -699,4 +697,3 @@ class Tables(object):
|
||||
table = self.para_map.get(p, None)
|
||||
if table is not None:
|
||||
return table.style_map.get(p, (None, None))[1]
|
||||
|
||||
|
@ -13,6 +13,7 @@ from lxml.etree import tostring
|
||||
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def from_headings(body, log, namespace):
|
||||
@ -93,7 +94,7 @@ def link_to_txt(a, styles, object_map):
|
||||
if rs.css.get('display', None) == 'none':
|
||||
a.remove(child)
|
||||
|
||||
return tostring(a, method='text', with_tail=False, encoding=unicode).strip()
|
||||
return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip()
|
||||
|
||||
|
||||
def from_toc(docx, link_map, styles, object_map, log, namespace):
|
||||
|
@ -19,6 +19,7 @@ from calibre.ebooks.docx.writer.lists import ListsManager
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||
from calibre.ebooks.oeb.base import XPath, barename
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def lang_for_tag(tag):
|
||||
@ -439,8 +440,8 @@ class Convert(object):
|
||||
if self.add_toc:
|
||||
self.links_manager.process_toc_links(self.oeb)
|
||||
|
||||
if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
||||
cover_id = unicode(self.oeb.metadata.cover[0])
|
||||
if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
||||
cover_id = unicode_type(self.oeb.metadata.cover[0])
|
||||
item = self.oeb.manifest.ids[cover_id]
|
||||
self.cover_img = self.images_manager.read_image(item.href)
|
||||
|
||||
|
@ -14,6 +14,7 @@ from lxml import etree
|
||||
from calibre.ebooks import parse_css_length
|
||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from polyglot.builtins import unicode_type
|
||||
from tinycss.css21 import CSS21Parser
|
||||
|
||||
css_parser = CSS21Parser()
|
||||
@ -45,7 +46,7 @@ def bmap(x):
|
||||
|
||||
|
||||
def is_dropcaps(html_tag, tag_style):
|
||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||
|
||||
|
||||
class CombinedStyle(object):
|
||||
|
@ -10,6 +10,7 @@ import unittest
|
||||
from polyglot.builtins import map
|
||||
|
||||
from calibre.ebooks.epub.cfi.parse import parser, cfi_sort_key, decode_cfi
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class Tests(unittest.TestCase):
|
||||
@ -60,7 +61,7 @@ class Tests(unittest.TestCase):
|
||||
if after is not None:
|
||||
ta['after'] = after
|
||||
if params:
|
||||
ta['params'] = {unicode(k):(v,) if isinstance(v, unicode) else v for k, v in params.iteritems()}
|
||||
ta['params'] = {unicode_type(k):(v,) if isinstance(v, unicode_type) else v for k, v in params.iteritems()}
|
||||
if ta:
|
||||
step['text_assertion'] = ta
|
||||
return ans
|
||||
|
@ -11,6 +11,7 @@ import time
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre import strftime, prepare_string_for_xml as xml
|
||||
from calibre.utils.date import parse_date
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
SONY_METADATA = u'''\
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
@ -81,21 +82,21 @@ SONY_ATOM_ENTRY = u'''\
|
||||
|
||||
def sony_metadata(oeb):
|
||||
m = oeb.metadata
|
||||
title = short_title = unicode(m.title[0])
|
||||
title = short_title = unicode_type(m.title[0])
|
||||
publisher = __appname__ + ' ' + __version__
|
||||
try:
|
||||
pt = unicode(oeb.metadata.publication_type[0])
|
||||
pt = unicode_type(oeb.metadata.publication_type[0])
|
||||
short_title = u':'.join(pt.split(':')[2:])
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
date = parse_date(unicode(m.date[0]),
|
||||
date = parse_date(unicode_type(m.date[0]),
|
||||
as_utc=False).strftime('%Y-%m-%d')
|
||||
except:
|
||||
date = strftime('%Y-%m-%d')
|
||||
try:
|
||||
language = unicode(m.language[0]).replace('_', '-')
|
||||
language = unicode_type(m.language[0]).replace('_', '-')
|
||||
except:
|
||||
language = 'en'
|
||||
short_title = xml(short_title, True)
|
||||
@ -113,7 +114,7 @@ def sony_metadata(oeb):
|
||||
return True
|
||||
|
||||
try:
|
||||
base_id = unicode(list(filter(cal_id, m.identifier))[0])
|
||||
base_id = unicode_type(list(filter(cal_id, m.identifier))[0])
|
||||
except:
|
||||
base_id = str(uuid4())
|
||||
|
||||
@ -128,7 +129,7 @@ def sony_metadata(oeb):
|
||||
for x in toc:
|
||||
section.nodes.append(x)
|
||||
toc = TOC(klass='periodical', href=oeb.spine[2].href,
|
||||
title=unicode(oeb.metadata.title[0]))
|
||||
title=unicode_type(oeb.metadata.title[0]))
|
||||
toc.nodes.append(section)
|
||||
|
||||
entries = []
|
||||
@ -188,4 +189,3 @@ def sony_metadata(oeb):
|
||||
id=xml(base_id)).encode('utf-8')
|
||||
|
||||
return metadata, atom
|
||||
|
||||
|
@ -19,6 +19,7 @@ from calibre.constants import __appname__, __version__
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from calibre.utils.img import save_cover_data_to
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class FB2MLizer(object):
|
||||
@ -64,7 +65,7 @@ class FB2MLizer(object):
|
||||
output = self.clean_text(u''.join(output))
|
||||
|
||||
if self.opts.pretty_print:
|
||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True)
|
||||
else:
|
||||
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
||||
|
||||
@ -140,7 +141,7 @@ class FB2MLizer(object):
|
||||
metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
|
||||
|
||||
metadata['keywords'] = u''
|
||||
tags = list(map(unicode, self.oeb_book.metadata.subject))
|
||||
tags = list(map(unicode_type, self.oeb_book.metadata.subject))
|
||||
if tags:
|
||||
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
|
||||
metadata['keywords'] = '<keywords>%s</keywords>'%tags
|
||||
@ -155,8 +156,8 @@ class FB2MLizer(object):
|
||||
year = publisher = isbn = u''
|
||||
identifiers = self.oeb_book.metadata['identifier']
|
||||
for x in identifiers:
|
||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||
metadata['id'] = unicode(x).split(':')[-1]
|
||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
|
||||
metadata['id'] = unicode_type(x).split(':')[-1]
|
||||
break
|
||||
if metadata['id'] is None:
|
||||
self.log.warn('No UUID identifier found')
|
||||
@ -229,8 +230,8 @@ class FB2MLizer(object):
|
||||
cover_href = None
|
||||
|
||||
# Get the raster cover if it's available.
|
||||
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
|
||||
id = unicode(self.oeb_book.metadata.cover[0])
|
||||
if self.oeb_book.metadata.cover and unicode_type(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
|
||||
id = unicode_type(self.oeb_book.metadata.cover[0])
|
||||
cover_item = self.oeb_book.manifest.ids[id]
|
||||
if cover_item.media_type in OEB_RASTER_IMAGES:
|
||||
cover_href = cover_item.href
|
||||
|
@ -19,6 +19,7 @@ from calibre.ebooks.oeb.base import urlunquote
|
||||
from calibre.ebooks.chardet import detect_xml_encoding
|
||||
from calibre.constants import iswindows
|
||||
from calibre import unicode_path, as_unicode, replace_entities
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class Link(object):
|
||||
@ -46,7 +47,7 @@ class Link(object):
|
||||
:param base: The base directory that relative URLs are with respect to.
|
||||
Must be a unicode string.
|
||||
'''
|
||||
assert isinstance(url, unicode) and isinstance(base, unicode)
|
||||
assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
|
||||
self.url = url
|
||||
self.parsed_url = urlparse(self.url)
|
||||
self.is_local = self.parsed_url.scheme in ('', 'file')
|
||||
@ -248,6 +249,3 @@ def get_filelist(htmlfile, dir, opts, log):
|
||||
for f in filelist:
|
||||
log.debug('\t\t', f)
|
||||
return filelist
|
||||
|
||||
|
||||
|
||||
|
@ -11,6 +11,7 @@ import textwrap, os, glob
|
||||
|
||||
from calibre.customize import FileTypePlugin
|
||||
from calibre.constants import numeric_version
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class HTML2ZIP(FileTypePlugin):
|
||||
@ -114,10 +115,9 @@ every time you add an HTML file to the library.\
|
||||
config_dialog.exec_()
|
||||
|
||||
if config_dialog.result() == QDialog.Accepted:
|
||||
sc = unicode(sc.text()).strip()
|
||||
sc = unicode_type(sc.text()).strip()
|
||||
if bf.isChecked():
|
||||
sc += '|bf'
|
||||
customize_plugin(self, sc)
|
||||
|
||||
return config_dialog.result()
|
||||
|
||||
|
@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import (
|
||||
XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.utils.logging import default_log
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
|
||||
|
||||
@ -46,7 +47,7 @@ class OEB2HTML(object):
|
||||
self.log.info('Converting OEB book to HTML...')
|
||||
self.opts = opts
|
||||
try:
|
||||
self.book_title = unicode(oeb_book.metadata.title[0])
|
||||
self.book_title = unicode_type(oeb_book.metadata.title[0])
|
||||
except Exception:
|
||||
self.book_title = _('Unknown')
|
||||
self.links = {}
|
||||
|
@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import urlnormalize, xpath
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre import plugins
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||
|
||||
lzx, lxzerror = plugins['lzx']
|
||||
msdes, msdeserror = plugins['msdes']
|
||||
@ -110,7 +111,7 @@ def read_utf8_char(bytes, pos):
|
||||
raise LitError(
|
||||
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
|
||||
c = (c << 6) | (b & 0x3F)
|
||||
return unichr(c), pos+elsize
|
||||
return codepoint_to_chr(c), pos+elsize
|
||||
|
||||
|
||||
def consume_sized_utf8_string(bytes, zpad=False):
|
||||
@ -125,7 +126,7 @@ def consume_sized_utf8_string(bytes, zpad=False):
|
||||
|
||||
|
||||
def encode(string):
|
||||
return unicode(string).encode('ascii', 'xmlcharrefreplace')
|
||||
return unicode_type(string).encode('ascii', 'xmlcharrefreplace')
|
||||
|
||||
|
||||
class UnBinary(object):
|
||||
@ -243,9 +244,9 @@ class UnBinary(object):
|
||||
else:
|
||||
dynamic_tag += 1
|
||||
errors += 1
|
||||
tag_name = '?'+unichr(tag)+'?'
|
||||
tag_name = '?'+codepoint_to_chr(tag)+'?'
|
||||
current_map = self.tag_to_attr_map[tag]
|
||||
print('WARNING: tag %s unknown' % unichr(tag))
|
||||
print('WARNING: tag %s unknown' % codepoint_to_chr(tag))
|
||||
buf.write(encode(tag_name))
|
||||
elif flags & FLAG_CLOSING:
|
||||
if depth == 0:
|
||||
@ -947,4 +948,3 @@ class LitReader(OEBReader):
|
||||
item.media_type = 'application/xhtml+xml'
|
||||
item.data = item._parse_xhtml(etree.tostring(item.data))
|
||||
super(LitReader, self)._spine_from_opf(opf)
|
||||
|
||||
|
@ -31,6 +31,7 @@ import calibre
|
||||
from calibre import plugins
|
||||
msdes, msdeserror = plugins['msdes']
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||
|
||||
__all__ = ['LitWriter']
|
||||
|
||||
@ -163,9 +164,9 @@ class ReBinary(object):
|
||||
for value in values:
|
||||
if isinstance(value, (int, long)):
|
||||
try:
|
||||
value = unichr(value)
|
||||
value = codepoint_to_chr(value)
|
||||
except OverflowError:
|
||||
self.logger.warn('Unicode overflow for integer:', value)
|
||||
self.logger.warn('unicode_type overflow for integer:', value)
|
||||
value = u'?'
|
||||
self.buf.write(value.encode('utf-8'))
|
||||
|
||||
@ -216,9 +217,9 @@ class ReBinary(object):
|
||||
path, frag = urldefrag(value)
|
||||
if self.item:
|
||||
path = self.item.abshref(path)
|
||||
prefix = unichr(3)
|
||||
prefix = codepoint_to_chr(3)
|
||||
if path in self.manifest.hrefs:
|
||||
prefix = unichr(2)
|
||||
prefix = codepoint_to_chr(2)
|
||||
value = self.manifest.hrefs[path].id
|
||||
if frag:
|
||||
value = '#'.join((value, frag))
|
||||
@ -281,9 +282,9 @@ class ReBinary(object):
|
||||
self.logger.warn("More than six anchors in file %r. "
|
||||
"Some links may not work properly." % self.item.href)
|
||||
data = StringIO()
|
||||
data.write(unichr(len(self.anchors)).encode('utf-8'))
|
||||
data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8'))
|
||||
for anchor, offset in self.anchors:
|
||||
data.write(unichr(len(anchor)).encode('utf-8'))
|
||||
data.write(codepoint_to_chr(len(anchor)).encode('utf-8'))
|
||||
data.write(anchor)
|
||||
data.write(pack('<I', offset))
|
||||
return data.getvalue()
|
||||
@ -313,7 +314,7 @@ class LitWriter(object):
|
||||
oeb.metadata.add('calibre-version', calibre.__version__)
|
||||
cover = None
|
||||
if oeb.metadata.cover:
|
||||
id = unicode(oeb.metadata.cover[0])
|
||||
id = unicode_type(oeb.metadata.cover[0])
|
||||
cover = oeb.manifest.ids[id]
|
||||
for type, title in ALL_MS_COVER_TYPES:
|
||||
if type not in oeb.guide:
|
||||
@ -485,7 +486,7 @@ class LitWriter(object):
|
||||
data = rebin.content
|
||||
name = name + '/content'
|
||||
secnum = 1
|
||||
elif isinstance(data, unicode):
|
||||
elif isinstance(data, unicode_type):
|
||||
data = data.encode('utf-8')
|
||||
elif hasattr(data, 'cssText'):
|
||||
data = str(item)
|
||||
@ -521,9 +522,9 @@ class LitWriter(object):
|
||||
item.offset = offset \
|
||||
if state in ('linear', 'nonlinear') else 0
|
||||
data.write(pack('<I', item.offset))
|
||||
entry = [unichr(len(id)), unicode(id),
|
||||
unichr(len(href)), unicode(href),
|
||||
unichr(len(media_type)), unicode(media_type)]
|
||||
entry = [codepoint_to_chr(len(id)), unicode_type(id),
|
||||
codepoint_to_chr(len(href)), unicode_type(href),
|
||||
codepoint_to_chr(len(media_type)), unicode_type(media_type)]
|
||||
for value in entry:
|
||||
data.write(value.encode('utf-8'))
|
||||
data.write('\0')
|
||||
|
@ -36,6 +36,7 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.devices.interface import DevicePlugin as Device
|
||||
from calibre.ebooks.lrf.html.color_map import lrs_color
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def update_css(ncss, ocss):
|
||||
@ -54,10 +55,10 @@ def munge_paths(basepath, url):
|
||||
if not path:
|
||||
path = basepath
|
||||
elif not os.path.isabs(path):
|
||||
if isinstance(path, unicode):
|
||||
if isinstance(path, unicode_type):
|
||||
path = path.encode(sys.getfilesystemencoding())
|
||||
dn = os.path.dirname(basepath)
|
||||
if isinstance(dn, unicode):
|
||||
if isinstance(dn, unicode_type):
|
||||
dn = dn.encode(sys.getfilesystemencoding())
|
||||
path = os.path.join(dn, path)
|
||||
return os.path.normpath(path), fragment
|
||||
@ -272,7 +273,7 @@ class HTMLConverter(object):
|
||||
update_css(npcss, self.override_pcss)
|
||||
|
||||
paths = [os.path.abspath(path) for path in paths]
|
||||
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode) else path for path in paths]
|
||||
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
|
||||
|
||||
while len(paths) > 0 and self.link_level <= self.link_levels:
|
||||
for path in paths:
|
||||
@ -336,7 +337,7 @@ class HTMLConverter(object):
|
||||
markupMassage=nmassage)
|
||||
except ConversionError as err:
|
||||
if 'Failed to coerce to unicode' in str(err):
|
||||
raw = unicode(raw, 'utf8', 'replace')
|
||||
raw = unicode_type(raw, 'utf8', 'replace')
|
||||
soup = BeautifulSoup(raw,
|
||||
convertEntities=BeautifulSoup.XHTML_ENTITIES,
|
||||
markupMassage=nmassage)
|
||||
@ -359,7 +360,7 @@ class HTMLConverter(object):
|
||||
os.makedirs(tdir)
|
||||
try:
|
||||
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
|
||||
dump.write(unicode(soup).encode('utf-8'))
|
||||
dump.write(unicode_type(soup).encode('utf-8'))
|
||||
self.log.info(_('Written preprocessed HTML to ')+dump.name)
|
||||
dump.close()
|
||||
except:
|
||||
@ -394,7 +395,7 @@ class HTMLConverter(object):
|
||||
self.log.info(_('\tConverting to BBeB...'))
|
||||
self.current_style = {}
|
||||
self.page_break_found = False
|
||||
if not isinstance(path, unicode):
|
||||
if not isinstance(path, unicode_type):
|
||||
path = path.decode(sys.getfilesystemencoding())
|
||||
self.target_prefix = path
|
||||
self.previous_text = '\n'
|
||||
@ -589,7 +590,7 @@ class HTMLConverter(object):
|
||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||
continue
|
||||
if isinstance(c, NavigableString):
|
||||
text += unicode(c)
|
||||
text += unicode_type(c)
|
||||
elif isinstance(c, Tag):
|
||||
if c.name.lower() == 'img' and c.has_key('alt'): # noqa
|
||||
alt_text += c['alt']
|
||||
@ -644,7 +645,7 @@ class HTMLConverter(object):
|
||||
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
|
||||
ascii_text = text
|
||||
|
||||
if not isinstance(path, unicode):
|
||||
if not isinstance(path, unicode_type):
|
||||
path = path.decode(sys.getfilesystemencoding())
|
||||
if path in self.processed_files:
|
||||
if path+fragment in self.targets.keys():
|
||||
@ -1323,7 +1324,7 @@ class HTMLConverter(object):
|
||||
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
||||
if 'em' in tag_css['text-indent']:
|
||||
bl = '10pt'
|
||||
indent = self.unit_convert(unicode(tag_css['text-indent']), pts=True, base_length=bl)
|
||||
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
|
||||
if not indent:
|
||||
indent = 0
|
||||
if indent > 0 and indent < 10 * self.minimum_indent:
|
||||
@ -1482,7 +1483,7 @@ class HTMLConverter(object):
|
||||
enc = sys.getfilesystemencoding()
|
||||
if not enc:
|
||||
enc = 'utf8'
|
||||
if isinstance(path, unicode):
|
||||
if isinstance(path, unicode_type):
|
||||
path = path.encode(enc, 'replace')
|
||||
if os.access(path, os.R_OK) and os.path.isfile(path):
|
||||
if ext in ['png', 'jpg', 'bmp', 'jpeg']:
|
||||
@ -1526,7 +1527,7 @@ class HTMLConverter(object):
|
||||
elif tagname in ['style', 'link']:
|
||||
ncss, npcss = {}, {}
|
||||
if tagname == 'style':
|
||||
text = ''.join([unicode(i) for i in tag.findAll(text=True)])
|
||||
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
|
||||
css, pcss = self.parse_css(text)
|
||||
ncss.update(css)
|
||||
npcss.update(pcss)
|
||||
@ -1559,7 +1560,7 @@ class HTMLConverter(object):
|
||||
if tag.contents:
|
||||
c = tag.contents[0]
|
||||
if isinstance(c, NavigableString):
|
||||
c = unicode(c).replace('\r\n', '\n').replace('\r', '\n')
|
||||
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
|
||||
if c.startswith('\n'):
|
||||
c = c[1:]
|
||||
tag.contents[0] = NavigableString(c)
|
||||
@ -1759,7 +1760,7 @@ class HTMLConverter(object):
|
||||
except Exception as err:
|
||||
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
|
||||
self.log.exception('')
|
||||
self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300])
|
||||
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
|
||||
self.in_table = False
|
||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||
finally:
|
||||
@ -1810,7 +1811,7 @@ class HTMLConverter(object):
|
||||
|
||||
|
||||
def process_file(path, options, logger):
|
||||
if not isinstance(path, unicode):
|
||||
if not isinstance(path, unicode_type):
|
||||
path = path.decode(sys.getfilesystemencoding())
|
||||
path = os.path.abspath(path)
|
||||
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
||||
@ -1857,9 +1858,9 @@ def process_file(path, options, logger):
|
||||
|
||||
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
|
||||
val = getattr(options, prop, None)
|
||||
if val and not isinstance(val, unicode):
|
||||
if val and not isinstance(val, unicode_type):
|
||||
soup = BeautifulSoup(val)
|
||||
setattr(options, prop, unicode(soup))
|
||||
setattr(options, prop, unicode_type(soup))
|
||||
|
||||
title = (options.title, options.title_sort)
|
||||
author = (options.author, options.author_sort)
|
||||
@ -1903,7 +1904,7 @@ def process_file(path, options, logger):
|
||||
options.force_page_break = fpb
|
||||
options.link_exclude = le
|
||||
options.page_break = pb
|
||||
if not isinstance(options.chapter_regex, unicode):
|
||||
if not isinstance(options.chapter_regex, unicode_type):
|
||||
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
|
||||
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
|
||||
fpba = options.force_page_break_attr.split(',')
|
||||
|
@ -11,6 +11,8 @@ from PyQt5.Qt import QUrl, QApplication, QSize, QEventLoop, \
|
||||
QPainter, QImage, QObject, Qt
|
||||
from PyQt5.QtWebKitWidgets import QWebPage
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class HTMLTableRenderer(QObject):
|
||||
|
||||
@ -67,7 +69,7 @@ class HTMLTableRenderer(QObject):
|
||||
def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
|
||||
head = ''
|
||||
for e in soup.findAll(['link', 'style']):
|
||||
head += unicode(e)+'\n\n'
|
||||
head += unicode_type(e)+'\n\n'
|
||||
style = ''
|
||||
for key, val in css.items():
|
||||
style += key + ':%s;'%val
|
||||
@ -83,7 +85,7 @@ def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
|
||||
%s
|
||||
</body>
|
||||
</html>
|
||||
'''%(head, width-10, style, unicode(table))
|
||||
'''%(head, width-10, style, unicode_type(table))
|
||||
images, tdir = do_render(html, base_dir, width, height, dpi, factor)
|
||||
atexit.register(shutil.rmtree, tdir)
|
||||
return images
|
||||
|
@ -10,6 +10,7 @@ from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||
from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
|
||||
Font, Text, TOCObject, BookAttr, ruby_tags
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class LRFDocument(LRFMetaFile):
|
||||
@ -112,7 +113,7 @@ class LRFDocument(LRFMetaFile):
|
||||
pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
|
||||
close = u'</PageTree>\n'
|
||||
for page in page_tree:
|
||||
pages += unicode(page)
|
||||
pages += unicode_type(page)
|
||||
pages += close
|
||||
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
|
||||
|
||||
@ -125,9 +126,9 @@ class LRFDocument(LRFMetaFile):
|
||||
if isinstance(obj, (Font, Text, TOCObject)):
|
||||
continue
|
||||
if isinstance(obj, StyleObject):
|
||||
styles += unicode(obj)
|
||||
styles += unicode_type(obj)
|
||||
else:
|
||||
objects += unicode(obj)
|
||||
objects += unicode_type(obj)
|
||||
styles += '</Style>\n'
|
||||
objects += '</Objects>\n'
|
||||
if write_files:
|
||||
|
@ -20,6 +20,7 @@ import xml.dom.minidom as dom
|
||||
from functools import wraps
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
|
||||
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
|
||||
@ -195,8 +196,8 @@ class xml_field(object):
|
||||
|
||||
if not val:
|
||||
val = u''
|
||||
if type(val).__name__ != 'unicode':
|
||||
val = unicode(val, 'utf-8')
|
||||
if isinstance(val, unicode_type):
|
||||
val = unicode_type(val, 'utf-8')
|
||||
|
||||
elems = document.getElementsByTagName(self.tag_name)
|
||||
elem = None
|
||||
|
@ -6,6 +6,7 @@ import struct, array, zlib, cStringIO, collections, re
|
||||
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
||||
from calibre import entity_to_unicode, prepare_string_for_xml
|
||||
from calibre.ebooks.lrf.tags import Tag
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
ruby_tags = {
|
||||
0xF575: ['rubyAlignAndAdjust', 'W'],
|
||||
@ -88,10 +89,10 @@ class LRFObject(object):
|
||||
yield i
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.__class__.__name__)
|
||||
return unicode_type(self.__class__.__name__)
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self).encode('utf-8')
|
||||
return unicode_type(self).encode('utf-8')
|
||||
|
||||
|
||||
class LRFContentObject(LRFObject):
|
||||
@ -255,7 +256,7 @@ class Color(object):
|
||||
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
return unicode_type(self)
|
||||
|
||||
def __len__(self):
|
||||
return 4
|
||||
@ -274,7 +275,7 @@ class EmptyPageElement(object):
|
||||
yield i
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
return unicode_type(self)
|
||||
|
||||
|
||||
class PageDiv(EmptyPageElement):
|
||||
@ -429,12 +430,12 @@ class Page(LRFStream):
|
||||
def __unicode__(self):
|
||||
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
|
||||
for i in self:
|
||||
s += unicode(i)
|
||||
s += unicode_type(i)
|
||||
s += '\n</Page>\n'
|
||||
return s
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self)
|
||||
return unicode_type(self)
|
||||
|
||||
def to_html(self):
|
||||
s = u''
|
||||
@ -619,7 +620,7 @@ class Block(LRFStream, TextCSS):
|
||||
s += '%s="%s" '%(attr, self.attrs[attr])
|
||||
if self.name != 'ImageBlock':
|
||||
s = s.rstrip()+'>\n'
|
||||
s += unicode(self.content)
|
||||
s += unicode_type(self.content)
|
||||
s += '</%s>\n'%(self.name,)
|
||||
return s
|
||||
return s.rstrip() + ' />\n'
|
||||
@ -717,7 +718,7 @@ class Text(LRFStream):
|
||||
lineposition_map = {1:'before', 2:'after'}
|
||||
|
||||
def add_text(self, text):
|
||||
s = unicode(text, "utf-16-le")
|
||||
s = unicode_type(text, "utf-16-le")
|
||||
if s:
|
||||
s = s.translate(self.text_map)
|
||||
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
|
||||
@ -888,7 +889,7 @@ class Text(LRFStream):
|
||||
p = open_containers.pop()
|
||||
s += u'</%s>'%(p.name,)
|
||||
else:
|
||||
s += unicode(c)
|
||||
s += unicode_type(c)
|
||||
if not c.self_closing:
|
||||
open_containers.append(c)
|
||||
|
||||
@ -1001,7 +1002,7 @@ class Canvas(LRFStream):
|
||||
s += '%s="%s" '%(attr, self.attrs[attr])
|
||||
s = s.rstrip() + '>\n'
|
||||
for po in self:
|
||||
s += unicode(po) + '\n'
|
||||
s += unicode_type(po) + '\n'
|
||||
s += '</%s>\n'%(self.__class__.__name__,)
|
||||
return s
|
||||
|
||||
@ -1198,7 +1199,7 @@ class BookAttr(StyleObject, LRFObject):
|
||||
s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
|
||||
(self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth)
|
||||
for font in self._document.font_map.values():
|
||||
s += unicode(font)
|
||||
s += unicode_type(font)
|
||||
s += '</BookStyle>\n'
|
||||
return s
|
||||
|
||||
@ -1239,7 +1240,7 @@ class TOCObject(LRFStream):
|
||||
def __unicode__(self):
|
||||
s = u'<TOC>\n'
|
||||
for i in self:
|
||||
s += unicode(i)
|
||||
s += unicode_type(i)
|
||||
return s + '</TOC>\n'
|
||||
|
||||
|
||||
@ -1288,5 +1289,3 @@ def get_object(document, stream, id, offset, size, scramble_key):
|
||||
return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
|
||||
|
||||
raise LRFParseError("Unknown object type: %02X!" % obj_type)
|
||||
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
""" elements.py -- replacements and helpers for ElementTree """
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class ElementWriter(object):
|
||||
|
||||
@ -21,9 +23,9 @@ class ElementWriter(object):
|
||||
return text
|
||||
|
||||
def _writeAttribute(self, f, name, value):
|
||||
f.write(u' %s="' % unicode(name))
|
||||
f.write(u' %s="' % unicode_type(name))
|
||||
if not isinstance(value, basestring):
|
||||
value = unicode(value)
|
||||
value = unicode_type(value)
|
||||
value = self._encodeCdata(value)
|
||||
value = value.replace('"', '"')
|
||||
f.write(value)
|
||||
@ -34,7 +36,7 @@ class ElementWriter(object):
|
||||
f.write(text)
|
||||
|
||||
def _write(self, f, e):
|
||||
f.write(u'<' + unicode(e.tag))
|
||||
f.write(u'<' + unicode_type(e.tag))
|
||||
|
||||
attributes = e.items()
|
||||
attributes.sort()
|
||||
@ -72,6 +74,3 @@ class ElementWriter(object):
|
||||
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
||||
|
||||
self._write(f, self.e)
|
||||
|
||||
|
||||
|
||||
|
@ -5,6 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import struct
|
||||
|
||||
from calibre.ebooks.lrf import LRFParseError
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class Tag(object):
|
||||
@ -246,7 +247,7 @@ class Tag(object):
|
||||
@classmethod
|
||||
def string_parser(self, stream):
|
||||
size = struct.unpack("<H", stream.read(2))[0]
|
||||
return unicode(stream.read(size), "utf_16")
|
||||
return unicode_type(stream.read(size), "utf_16")
|
||||
|
||||
def type_one_parser(self, stream):
|
||||
cnt = struct.unpack("<H", stream.read(2))[0]
|
||||
|
@ -15,6 +15,8 @@ from calibre import relpath, guess_type, remove_bracketed_text, prints, force_un
|
||||
|
||||
from calibre.utils.config_base import tweaks
|
||||
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||
|
||||
try:
|
||||
_author_pat = re.compile(tweaks['authors_split_regex'])
|
||||
except:
|
||||
@ -134,7 +136,7 @@ def get_title_sort_pat(lang=None):
|
||||
return ans
|
||||
|
||||
|
||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in
|
||||
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in
|
||||
range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||
|
||||
|
||||
@ -227,7 +229,7 @@ class Resource(object):
|
||||
self._href = href_or_path
|
||||
else:
|
||||
pc = url[2]
|
||||
if isinstance(pc, unicode):
|
||||
if isinstance(pc, unicode_type):
|
||||
pc = pc.encode('utf-8')
|
||||
pc = unquote(pc).decode('utf-8')
|
||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
||||
@ -249,7 +251,7 @@ class Resource(object):
|
||||
basedir = os.getcwdu()
|
||||
if self.path is None:
|
||||
return self._href
|
||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
|
||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
||||
frag = '#'+quote(f) if self.fragment else ''
|
||||
if self.path == basedir:
|
||||
return ''+frag
|
||||
@ -257,7 +259,7 @@ class Resource(object):
|
||||
rpath = relpath(self.path, basedir)
|
||||
except OSError: # On windows path and basedir could be on different drives
|
||||
rpath = self.path
|
||||
if isinstance(rpath, unicode):
|
||||
if isinstance(rpath, unicode_type):
|
||||
rpath = rpath.encode('utf-8')
|
||||
return quote(rpath.replace(os.sep, '/'))+frag
|
||||
|
||||
|
@ -14,6 +14,7 @@ from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
|
||||
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.utils.icu import sort_key
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
# Special sets used to optimize the performance of getting and setting
|
||||
# attributes on Metadata objects
|
||||
@ -606,14 +607,14 @@ class Metadata(object):
|
||||
return authors_to_string(self.authors)
|
||||
|
||||
def format_tags(self):
|
||||
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
|
||||
return u', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)])
|
||||
|
||||
def format_rating(self, v=None, divide_by=1.0):
|
||||
if v is None:
|
||||
if self.rating is not None:
|
||||
return unicode(self.rating/divide_by)
|
||||
return unicode_type(self.rating/divide_by)
|
||||
return u'None'
|
||||
return unicode(v/divide_by)
|
||||
return unicode_type(v/divide_by)
|
||||
|
||||
def format_field(self, key, series_with_index=True):
|
||||
'''
|
||||
@ -637,15 +638,15 @@ class Metadata(object):
|
||||
if cmeta and cmeta['datatype'] == 'series':
|
||||
if self.get(tkey):
|
||||
res = self.get_extra(tkey)
|
||||
return (unicode(cmeta['name']+'_index'),
|
||||
return (unicode_type(cmeta['name']+'_index'),
|
||||
self.format_series_index(res), res, cmeta)
|
||||
else:
|
||||
return (unicode(cmeta['name']+'_index'), '', '', cmeta)
|
||||
return (unicode_type(cmeta['name']+'_index'), '', '', cmeta)
|
||||
|
||||
if key in self.custom_field_keys():
|
||||
res = self.get(key, None) # get evaluates all necessary composites
|
||||
cmeta = self.get_user_metadata(key, make_copy=False)
|
||||
name = unicode(cmeta['name'])
|
||||
name = unicode_type(cmeta['name'])
|
||||
if res is None or res == '': # can't check "not res" because of numeric fields
|
||||
return (name, res, None, None)
|
||||
orig_res = res
|
||||
@ -668,7 +669,7 @@ class Metadata(object):
|
||||
res = fmt.format(res)
|
||||
except:
|
||||
pass
|
||||
return (name, unicode(res), orig_res, cmeta)
|
||||
return (name, unicode_type(res), orig_res, cmeta)
|
||||
|
||||
# convert top-level ids into their value
|
||||
if key in TOP_LEVEL_IDENTIFIERS:
|
||||
@ -682,11 +683,11 @@ class Metadata(object):
|
||||
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
|
||||
res = self.get(key, None)
|
||||
fmeta = field_metadata[fmkey]
|
||||
name = unicode(fmeta['name'])
|
||||
name = unicode_type(fmeta['name'])
|
||||
if res is None or res == '':
|
||||
return (name, res, None, None)
|
||||
orig_res = res
|
||||
name = unicode(fmeta['name'])
|
||||
name = unicode_type(fmeta['name'])
|
||||
datatype = fmeta['datatype']
|
||||
if key == 'authors':
|
||||
res = authors_to_string(res)
|
||||
@ -704,7 +705,7 @@ class Metadata(object):
|
||||
res = u'%.2g'%(res/2.0)
|
||||
elif key == 'size':
|
||||
res = human_readable(res)
|
||||
return (name, unicode(res), orig_res, fmeta)
|
||||
return (name, unicode_type(res), orig_res, fmeta)
|
||||
|
||||
return (None, None, None, None)
|
||||
|
||||
@ -718,7 +719,7 @@ class Metadata(object):
|
||||
ans = []
|
||||
|
||||
def fmt(x, y):
|
||||
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
|
||||
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
|
||||
|
||||
fmt('Title', self.title)
|
||||
if self.title_sort:
|
||||
@ -732,7 +733,7 @@ class Metadata(object):
|
||||
if getattr(self, 'book_producer', False):
|
||||
fmt('Book Producer', self.book_producer)
|
||||
if self.tags:
|
||||
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
|
||||
fmt('Tags', u', '.join([unicode_type(t) for t in self.tags]))
|
||||
if self.series:
|
||||
fmt('Series', self.series + ' #%s'%self.format_series_index())
|
||||
if not self.is_null('languages'):
|
||||
@ -745,7 +746,7 @@ class Metadata(object):
|
||||
if self.pubdate is not None:
|
||||
fmt('Published', isoformat(self.pubdate))
|
||||
if self.rights is not None:
|
||||
fmt('Rights', unicode(self.rights))
|
||||
fmt('Rights', unicode_type(self.rights))
|
||||
if self.identifiers:
|
||||
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
|
||||
self.identifiers.iteritems()]))
|
||||
@ -756,7 +757,7 @@ class Metadata(object):
|
||||
val = self.get(key, None)
|
||||
if val:
|
||||
(name, val) = self.format_field(key)
|
||||
fmt(name, unicode(val))
|
||||
fmt(name, unicode_type(val))
|
||||
return u'\n'.join(ans)
|
||||
|
||||
def to_html(self):
|
||||
@ -765,22 +766,22 @@ class Metadata(object):
|
||||
'''
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.date import isoformat
|
||||
ans = [(_('Title'), unicode(self.title))]
|
||||
ans = [(_('Title'), unicode_type(self.title))]
|
||||
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
|
||||
ans += [(_('Publisher'), unicode(self.publisher))]
|
||||
ans += [(_('Producer'), unicode(self.book_producer))]
|
||||
ans += [(_('Comments'), unicode(self.comments))]
|
||||
ans += [('ISBN', unicode(self.isbn))]
|
||||
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
|
||||
ans += [(_('Publisher'), unicode_type(self.publisher))]
|
||||
ans += [(_('Producer'), unicode_type(self.book_producer))]
|
||||
ans += [(_('Comments'), unicode_type(self.comments))]
|
||||
ans += [('ISBN', unicode_type(self.isbn))]
|
||||
ans += [(_('Tags'), u', '.join([unicode_type(t) for t in self.tags]))]
|
||||
if self.series:
|
||||
ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())]
|
||||
ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())]
|
||||
ans += [(_('Languages'), u', '.join(self.languages))]
|
||||
if self.timestamp is not None:
|
||||
ans += [(_('Timestamp'), unicode(isoformat(self.timestamp, as_utc=False, sep=' ')))]
|
||||
ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))]
|
||||
if self.pubdate is not None:
|
||||
ans += [(_('Published'), unicode(isoformat(self.pubdate, as_utc=False, sep=' ')))]
|
||||
ans += [(_('Published'), unicode_type(isoformat(self.pubdate, as_utc=False, sep=' ')))]
|
||||
if self.rights is not None:
|
||||
ans += [(_('Rights'), unicode(self.rights))]
|
||||
ans += [(_('Rights'), unicode_type(self.rights))]
|
||||
for key in self.custom_field_keys():
|
||||
val = self.get(key, None)
|
||||
if val:
|
||||
|
@ -20,6 +20,7 @@ from calibre.utils.icu import sort_key
|
||||
from calibre.utils.formatter import EvalFormatter
|
||||
from calibre.utils.date import is_date_undefined
|
||||
from calibre.utils.localization import calibre_langcode_to_name
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
default_sort = ('title', 'title_sort', 'authors', 'author_sort', 'series', 'rating', 'pubdate', 'tags', 'publisher', 'identifiers')
|
||||
|
||||
@ -163,7 +164,7 @@ def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=
|
||||
path = force_unicode(mi.path, filesystem_encoding)
|
||||
scheme = u'devpath' if isdevice else u'path'
|
||||
url = prepare_string_for_xml(path if isdevice else
|
||||
unicode(book_id), True)
|
||||
unicode_type(book_id), True)
|
||||
pathstr = _('Click to open')
|
||||
extra = ''
|
||||
if isdevice:
|
||||
|
@ -10,10 +10,11 @@ from calibre.constants import preferred_encoding
|
||||
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.imghdr import what
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def ensure_unicode(obj, enc=preferred_encoding):
|
||||
if isinstance(obj, unicode):
|
||||
if isinstance(obj, unicode_type):
|
||||
return obj
|
||||
if isinstance(obj, bytes):
|
||||
return obj.decode(enc, 'replace')
|
||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
|
||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||
from calibre import prints
|
||||
from calibre.utils.date import parse_date
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
USAGE=_('%prog ebook_file [options]\n') + \
|
||||
_('''
|
||||
@ -181,7 +182,7 @@ def main(args=sys.argv):
|
||||
mi = get_metadata(stream, stream_type, force_read_metadata=True)
|
||||
if trying_to_set:
|
||||
prints(_('Original metadata')+'::')
|
||||
metadata = unicode(mi)
|
||||
metadata = unicode_type(mi)
|
||||
if trying_to_set:
|
||||
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
|
||||
prints(metadata, safe_encode=True)
|
||||
@ -198,7 +199,7 @@ def main(args=sys.argv):
|
||||
lrf.book_id = opts.lrf_bookid
|
||||
mi = get_metadata(stream, stream_type, force_read_metadata=True)
|
||||
prints('\n' + _('Changed metadata') + '::')
|
||||
metadata = unicode(mi)
|
||||
metadata = unicode_type(mi)
|
||||
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
|
||||
prints(metadata, safe_encode=True)
|
||||
if lrf is not None:
|
||||
|
@ -18,6 +18,7 @@ from calibre.utils.imghdr import identify
|
||||
from calibre import guess_type, guess_all_extensions, prints, force_unicode
|
||||
from calibre.ebooks.metadata import MetaInformation, check_isbn
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
NAMESPACES = {
|
||||
@ -26,7 +27,7 @@ NAMESPACES = {
|
||||
'xlink' : 'http://www.w3.org/1999/xlink'
|
||||
}
|
||||
|
||||
tostring = partial(etree.tostring, method='text', encoding=unicode)
|
||||
tostring = partial(etree.tostring, method='text', encoding=unicode_type)
|
||||
|
||||
|
||||
def XLINK(tag):
|
||||
@ -112,9 +113,9 @@ def get_metadata(stream):
|
||||
|
||||
# fallback for book_title
|
||||
if book_title:
|
||||
book_title = unicode(book_title)
|
||||
book_title = unicode_type(book_title)
|
||||
else:
|
||||
book_title = force_unicode(os.path.splitext(
|
||||
book_title = force_unicode_type(os.path.splitext(
|
||||
os.path.basename(getattr(stream, 'name',
|
||||
_('Unknown'))))[0])
|
||||
mi = MetaInformation(book_title, authors)
|
||||
@ -249,7 +250,7 @@ def _parse_tags(root, mi, ctx):
|
||||
# -- i18n Translations-- ?
|
||||
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
|
||||
if tags:
|
||||
mi.tags = list(map(unicode, tags))
|
||||
mi.tags = list(map(unicode_type, tags))
|
||||
break
|
||||
|
||||
|
||||
@ -447,7 +448,7 @@ def ensure_namespace(doc):
|
||||
break
|
||||
if bare_tags:
|
||||
import re
|
||||
raw = etree.tostring(doc, encoding=unicode)
|
||||
raw = etree.tostring(doc, encoding=unicode_type)
|
||||
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
||||
doc = etree.fromstring(raw)
|
||||
return doc
|
||||
|
@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
||||
import sys
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
|
||||
|
||||
@ -43,6 +44,6 @@ def get_metadata(stream):
|
||||
if category:
|
||||
mi.category = category
|
||||
except Exception as err:
|
||||
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
|
||||
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
|
||||
print(msg.encode('utf8'), file=sys.stderr)
|
||||
return mi
|
||||
|
@ -14,11 +14,12 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre import browser
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||
|
||||
URL = \
|
||||
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
|
||||
|
||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||
|
||||
|
||||
def get_series(title, authors, timeout=60):
|
||||
@ -28,7 +29,7 @@ def get_series(title, authors, timeout=60):
|
||||
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
|
||||
if not title:
|
||||
return mi
|
||||
if isinstance(title, unicode):
|
||||
if isinstance(title, unicode_type):
|
||||
title = title.encode('utf-8')
|
||||
|
||||
title = urllib.quote_plus(title)
|
||||
@ -73,7 +74,7 @@ def get_series(title, authors, timeout=60):
|
||||
mi.series = series
|
||||
ns = ss.nextSibling
|
||||
if ns.contents:
|
||||
raw = unicode(ns.contents[0])
|
||||
raw = unicode_type(ns.contents[0])
|
||||
raw = raw.partition('.')[0].strip()
|
||||
try:
|
||||
mi.series_index = int(raw)
|
||||
@ -85,4 +86,3 @@ def get_series(title, authors, timeout=60):
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
print(get_series(sys.argv[-2], [sys.argv[-1]]))
|
||||
|
||||
|
@ -18,6 +18,7 @@ from calibre.utils.config_base import tweaks
|
||||
from calibre.utils.date import parse_only_date
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.imghdr import identify
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class InvalidKFX(ValueError):
|
||||
@ -356,4 +357,4 @@ if __name__ == '__main__':
|
||||
from calibre import prints
|
||||
with open(sys.argv[-1], 'rb') as f:
|
||||
mi = read_metadata_kfx(f)
|
||||
prints(unicode(mi))
|
||||
prints(unicode_type(mi))
|
||||
|
@ -21,6 +21,7 @@ from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.utils.date import now as nowf
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def is_image(ss):
|
||||
@ -223,7 +224,7 @@ class MetadataUpdater(object):
|
||||
|
||||
def create_exth(self, new_title=None, exth=None):
|
||||
# Add an EXTH block to record 0, rewrite the stream
|
||||
if isinstance(new_title, unicode):
|
||||
if isinstance(new_title, unicode_type):
|
||||
new_title = new_title.encode(self.codec, 'replace')
|
||||
|
||||
# Fetch the existing title
|
||||
|
@ -25,6 +25,7 @@ from calibre.utils.localization import get_lang, canonicalize_lang
|
||||
from calibre import prints, guess_type
|
||||
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||
from calibre.utils.config import tweaks
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
pretty_print_opf = False
|
||||
|
||||
@ -82,7 +83,7 @@ class Resource(object): # {{{
|
||||
self._href = href_or_path
|
||||
else:
|
||||
pc = url[2]
|
||||
if isinstance(pc, unicode):
|
||||
if isinstance(pc, unicode_type):
|
||||
pc = pc.encode('utf-8')
|
||||
pc = pc.decode('utf-8')
|
||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
||||
@ -103,7 +104,7 @@ class Resource(object): # {{{
|
||||
basedir = os.getcwdu()
|
||||
if self.path is None:
|
||||
return self._href
|
||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
|
||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
||||
frag = '#'+f if self.fragment else ''
|
||||
if self.path == basedir:
|
||||
return ''+frag
|
||||
@ -111,7 +112,7 @@ class Resource(object): # {{{
|
||||
rpath = os.path.relpath(self.path, basedir)
|
||||
except ValueError: # On windows path and basedir could be on different drives
|
||||
rpath = self.path
|
||||
if isinstance(rpath, unicode):
|
||||
if isinstance(rpath, unicode_type):
|
||||
rpath = rpath.encode('utf-8')
|
||||
return rpath.replace(os.sep, '/')+frag
|
||||
|
||||
@ -206,10 +207,10 @@ class ManifestItem(Resource): # {{{
|
||||
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self).encode('utf-8')
|
||||
return unicode_type(self).encode('utf-8')
|
||||
|
||||
def __repr__(self):
|
||||
return unicode(self)
|
||||
return unicode_type(self)
|
||||
|
||||
def __getitem__(self, index):
|
||||
if index == 0:
|
||||
@ -410,7 +411,7 @@ class Guide(ResourceCollection): # {{{
|
||||
class MetadataField(object):
|
||||
|
||||
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
|
||||
renderer=lambda x: unicode(x)):
|
||||
renderer=lambda x: unicode_type(x)):
|
||||
self.name = name
|
||||
self.is_dc = is_dc
|
||||
self.formatter = formatter
|
||||
@ -791,7 +792,7 @@ class OPF(object): # {{{
|
||||
def unquote_urls(self):
|
||||
def get_href(item):
|
||||
raw = unquote(item.get('href', ''))
|
||||
if not isinstance(raw, unicode):
|
||||
if not isinstance(raw, unicode_type):
|
||||
raw = raw.decode('utf-8')
|
||||
return raw
|
||||
for item in self.itermanifest():
|
||||
@ -820,7 +821,7 @@ class OPF(object): # {{{
|
||||
titles = ()
|
||||
if val:
|
||||
title = titles[0] if titles else self.create_metadata_element('title')
|
||||
title.text = re.sub(r'\s+', ' ', unicode(val))
|
||||
title.text = re.sub(r'\s+', ' ', unicode_type(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -869,7 +870,7 @@ class OPF(object): # {{{
|
||||
for key in matches[0].attrib:
|
||||
if key.endswith('file-as'):
|
||||
matches[0].attrib.pop(key)
|
||||
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode(val))
|
||||
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode_type(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -889,7 +890,7 @@ class OPF(object): # {{{
|
||||
tag.getparent().remove(tag)
|
||||
for tag in val:
|
||||
elem = self.create_metadata_element('subject')
|
||||
self.set_text(elem, unicode(tag))
|
||||
self.set_text(elem, unicode_type(tag))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -900,7 +901,7 @@ class OPF(object): # {{{
|
||||
ans = None
|
||||
for match in self.pubdate_path(self.metadata):
|
||||
try:
|
||||
val = parse_date(etree.tostring(match, encoding=unicode,
|
||||
val = parse_date(etree.tostring(match, encoding=unicode_type,
|
||||
method='text', with_tail=False).strip())
|
||||
except:
|
||||
continue
|
||||
@ -912,7 +913,7 @@ class OPF(object): # {{{
|
||||
least_val = least_elem = None
|
||||
for match in self.pubdate_path(self.metadata):
|
||||
try:
|
||||
cval = parse_date(etree.tostring(match, encoding=unicode,
|
||||
cval = parse_date(etree.tostring(match, encoding=unicode_type,
|
||||
method='text', with_tail=False).strip())
|
||||
except:
|
||||
match.getparent().remove(match)
|
||||
@ -962,7 +963,7 @@ class OPF(object): # {{{
|
||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
|
||||
matches = [self.create_metadata_element('identifier',
|
||||
attrib=attrib)]
|
||||
self.set_text(matches[0], unicode(val))
|
||||
self.set_text(matches[0], unicode_type(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -975,7 +976,7 @@ class OPF(object): # {{{
|
||||
for attr, val in x.attrib.iteritems():
|
||||
if attr.endswith('scheme'):
|
||||
typ = icu_lower(val)
|
||||
val = etree.tostring(x, with_tail=False, encoding=unicode,
|
||||
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
|
||||
method='text').strip()
|
||||
if val and typ not in ('calibre', 'uuid'):
|
||||
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
|
||||
@ -984,7 +985,7 @@ class OPF(object): # {{{
|
||||
found_scheme = True
|
||||
break
|
||||
if not found_scheme:
|
||||
val = etree.tostring(x, with_tail=False, encoding=unicode,
|
||||
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
|
||||
method='text').strip()
|
||||
if val.lower().startswith('urn:isbn:'):
|
||||
val = check_isbn(val.split(':')[-1])
|
||||
@ -1017,7 +1018,7 @@ class OPF(object): # {{{
|
||||
for typ, val in identifiers.iteritems():
|
||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
|
||||
self.set_text(self.create_metadata_element(
|
||||
'identifier', attrib=attrib), unicode(val))
|
||||
'identifier', attrib=attrib), unicode_type(val))
|
||||
|
||||
@dynamic_property
|
||||
def application_id(self):
|
||||
@ -1041,7 +1042,7 @@ class OPF(object): # {{{
|
||||
if uuid_id and uuid_id in removed_ids:
|
||||
attrib['id'] = uuid_id
|
||||
self.set_text(self.create_metadata_element(
|
||||
'identifier', attrib=attrib), unicode(val))
|
||||
'identifier', attrib=attrib), unicode_type(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -1058,7 +1059,7 @@ class OPF(object): # {{{
|
||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
|
||||
matches = [self.create_metadata_element('identifier',
|
||||
attrib=attrib)]
|
||||
self.set_text(matches[0], unicode(val))
|
||||
self.set_text(matches[0], unicode_type(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -1095,7 +1096,7 @@ class OPF(object): # {{{
|
||||
|
||||
for lang in val:
|
||||
l = self.create_metadata_element('language')
|
||||
self.set_text(l, unicode(lang))
|
||||
self.set_text(l, unicode_type(lang))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@ -1118,7 +1119,7 @@ class OPF(object): # {{{
|
||||
if not matches:
|
||||
matches = [self.create_metadata_element('contributor')]
|
||||
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
|
||||
self.set_text(matches[0], unicode(val))
|
||||
self.set_text(matches[0], unicode_type(val))
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def identifier_iter(self):
|
||||
@ -1701,7 +1702,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
|
||||
metadata[-1].tail = '\n' +(' '*4)
|
||||
|
||||
if mi.cover:
|
||||
if not isinstance(mi.cover, unicode):
|
||||
if not isinstance(mi.cover, unicode_type):
|
||||
mi.cover = mi.cover.decode(filesystem_encoding)
|
||||
guide.text = '\n'+(' '*8)
|
||||
r = guide.makeelement(OPF('reference'),
|
||||
|
@ -12,6 +12,7 @@ from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.metadata import (
|
||||
MetaInformation, string_to_authors, check_isbn, check_doi)
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def get_tools():
|
||||
@ -88,8 +89,8 @@ def page_images(pdfpath, outputdir, first=1, last=1):
|
||||
import win32process as w
|
||||
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
|
||||
try:
|
||||
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode(first),
|
||||
'-l', unicode(last), pdfpath,
|
||||
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode_type(first),
|
||||
'-l', unicode_type(last), pdfpath,
|
||||
os.path.join(outputdir, 'page-images')], **args)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
|
||||
|
@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
||||
import sys, struct
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
|
||||
|
||||
@ -47,9 +48,7 @@ def get_metadata(stream):
|
||||
mi.author = value
|
||||
mi.authors = string_to_authors(value)
|
||||
except Exception as err:
|
||||
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
|
||||
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err))
|
||||
print(msg.encode('utf8'), file=sys.stderr)
|
||||
raise
|
||||
return mi
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ import re, cStringIO, codecs
|
||||
|
||||
from calibre import force_unicode
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||
|
||||
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
|
||||
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
|
||||
@ -75,7 +76,7 @@ def detect_codepage(stream):
|
||||
|
||||
|
||||
def encode(unistr):
|
||||
if not isinstance(unistr, unicode):
|
||||
if not isinstance(unistr, unicode_type):
|
||||
unistr = force_unicode(unistr)
|
||||
return ''.join([str(c) if ord(c) < 128 else '\\u' + str(ord(c)) + '?' for c in unistr])
|
||||
|
||||
@ -88,7 +89,7 @@ def decode(raw, codec):
|
||||
raw = raw.decode(codec)
|
||||
|
||||
def uni(match):
|
||||
return unichr(int(match.group(1)))
|
||||
return codepoint_to_chr(int(match.group(1)))
|
||||
raw = re.sub(r'\\u([0-9]{3,4}).', uni, raw)
|
||||
return raw
|
||||
|
||||
@ -232,4 +233,3 @@ def set_metadata(stream, options):
|
||||
stream.truncate()
|
||||
stream.write(src)
|
||||
stream.write(after)
|
||||
|
||||
|
@ -15,6 +15,7 @@ from calibre.constants import __appname__, __version__
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
||||
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
|
||||
@ -194,7 +195,7 @@ class TOC(list):
|
||||
text = u''
|
||||
for txt in txt_path(nl):
|
||||
text += etree.tostring(txt, method='text',
|
||||
encoding=unicode, with_tail=False)
|
||||
encoding=unicode_type, with_tail=False)
|
||||
content = content_path(np)
|
||||
if content and text:
|
||||
content = content[0]
|
||||
@ -229,7 +230,7 @@ class TOC(list):
|
||||
fragment = fragment.strip()
|
||||
href = href.strip()
|
||||
|
||||
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
|
||||
txt = ''.join([unicode_type(s).strip() for s in a.findAll(text=True)])
|
||||
add = True
|
||||
for i in self.flat():
|
||||
if i.href == href and i.fragment == fragment:
|
||||
@ -264,7 +265,7 @@ class TOC(list):
|
||||
text = clean_xml_chars(text)
|
||||
elem = E.navPoint(
|
||||
E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
|
||||
E.content(src=unicode(np.href)+(('#' + unicode(np.fragment))
|
||||
E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment))
|
||||
if np.fragment else '')),
|
||||
id=item_id,
|
||||
playOrder=str(np.play_order)
|
||||
|
@ -20,6 +20,7 @@ from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.ebooks.mobi.debug import format_bytes
|
||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class TagX(object): # {{{
|
||||
@ -564,7 +565,7 @@ class TBSIndexing(object): # {{{
|
||||
|
||||
def get_index(self, idx):
|
||||
for i in self.indices:
|
||||
if i.index in {idx, unicode(idx)}:
|
||||
if i.index in {idx, unicode_type(idx)}:
|
||||
return i
|
||||
raise IndexError('Index %d not found'%idx)
|
||||
|
||||
@ -844,5 +845,3 @@ def inspect_mobi(mobi_file, ddir):
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||
from calibre.ebooks.mobi.utils import convert_color_for_font_tag
|
||||
from calibre.utils.imghdr import identify
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||
|
||||
@ -151,7 +152,7 @@ class MobiMLizer(object):
|
||||
return "%dem" % int(round(ptsize / embase))
|
||||
|
||||
def preize_text(self, text, pre_wrap=False):
|
||||
text = unicode(text)
|
||||
text = unicode_type(text)
|
||||
if pre_wrap:
|
||||
# Replace n consecutive spaces with n-1 NBSP + space
|
||||
text = re.sub(r' {2,}', lambda m:(u'\xa0'*(len(m.group())-1) + u' '), text)
|
||||
@ -228,7 +229,7 @@ class MobiMLizer(object):
|
||||
while vspace > 0:
|
||||
wrapper.addprevious(etree.Element(XHTML('br')))
|
||||
vspace -= 1
|
||||
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode)):
|
||||
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)):
|
||||
para.attrib['align'] = istate.halign
|
||||
istate.rendered = True
|
||||
pstate = bstate.istate
|
||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
|
||||
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.config_base import tweaks
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
NULL_INDEX = 0xffffffff
|
||||
|
||||
@ -239,7 +240,7 @@ class BookHeader(object):
|
||||
|
||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||
self.exth = None
|
||||
if not isinstance(self.title, unicode):
|
||||
if not isinstance(self.title, unicode_type):
|
||||
self.title = self.title.decode(self.codec, 'replace')
|
||||
if self.exth_flag & 0x40:
|
||||
try:
|
||||
|
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import re, os
|
||||
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def update_internal_links(mobi8_reader, log):
|
||||
@ -130,7 +131,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
|
||||
flows.append(flow)
|
||||
continue
|
||||
|
||||
if not isinstance(flow, unicode):
|
||||
if not isinstance(flow, unicode_type):
|
||||
try:
|
||||
flow = flow.decode(mr.header.codec)
|
||||
except UnicodeDecodeError:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user