mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
python3: add unicode/unichr wrappers to polyglot
This commit is contained in:
parent
77728a15ef
commit
cbc42bec23
@ -12,6 +12,7 @@ from functools import partial
|
|||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from setup import iswindows
|
from setup import iswindows
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
if iswindows:
|
if iswindows:
|
||||||
from ctypes import windll, Structure, POINTER, c_size_t
|
from ctypes import windll, Structure, POINTER, c_size_t
|
||||||
@ -52,7 +53,7 @@ def run_worker(job, decorate=True):
|
|||||||
try:
|
try:
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return False, human_text, unicode(err)
|
return False, human_text, unicode_type(err)
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
if stdout:
|
if stdout:
|
||||||
stdout = stdout.decode('utf-8')
|
stdout = stdout.decode('utf-8')
|
||||||
|
@ -12,7 +12,7 @@ from itertools import chain
|
|||||||
is_ci = os.environ.get('CI', '').lower() == 'true'
|
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||||
|
|
||||||
from setup import Command, basenames, __appname__, download_securely
|
from setup import Command, basenames, __appname__, download_securely
|
||||||
from polyglot.builtins import itervalues, iteritems
|
from polyglot.builtins import codepoint_to_chr, itervalues, iteritems
|
||||||
|
|
||||||
|
|
||||||
def get_opts_from_parser(parser):
|
def get_opts_from_parser(parser):
|
||||||
@ -173,7 +173,7 @@ class Kakasi(Command): # {{{
|
|||||||
continue
|
continue
|
||||||
if re.match(r"^$",line):
|
if re.match(r"^$",line):
|
||||||
continue
|
continue
|
||||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:codepoint_to_chr(int(x.group(1),16)), line)
|
||||||
dic[pair[0]] = pair[1]
|
dic[pair[0]] = pair[1]
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
from calibre.utils.serialize import msgpack_dumps
|
||||||
with open(dst, 'wb') as f:
|
with open(dst, 'wb') as f:
|
||||||
|
@ -13,7 +13,7 @@ from functools import partial
|
|||||||
|
|
||||||
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file
|
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file
|
||||||
from setup.parallel_build import parallel_check_output
|
from setup.parallel_build import parallel_check_output
|
||||||
from polyglot.builtins import iteritems
|
from polyglot.builtins import codepoint_to_chr, iteritems
|
||||||
is_ci = os.environ.get('CI', '').lower() == 'true'
|
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||||
|
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ class POT(Command): # {{{
|
|||||||
ans = []
|
ans = []
|
||||||
for lineno, msg in msgs:
|
for lineno, msg in msgs:
|
||||||
ans.append('#: %s:%d'%(path, lineno))
|
ans.append('#: %s:%d'%(path, lineno))
|
||||||
slash = unichr(92)
|
slash = codepoint_to_chr(92)
|
||||||
msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
|
msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
|
||||||
r'\n').replace('\r', r'\r').replace('\t', r'\t')
|
r'\n').replace('\r', r'\r').replace('\t', r'\t')
|
||||||
ans.append('msgid "%s"'%msg)
|
ans.append('msgid "%s"'%msg)
|
||||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, os, re, time, random, warnings
|
import sys, os, re, time, random, warnings
|
||||||
from polyglot.builtins import builtins
|
from polyglot.builtins import builtins, codepoint_to_chr, unicode_type
|
||||||
builtins.__dict__['dynamic_property'] = lambda func: func(None)
|
builtins.__dict__['dynamic_property'] = lambda func: func(None)
|
||||||
from math import floor
|
from math import floor
|
||||||
from functools import partial
|
from functools import partial
|
||||||
@ -77,7 +77,7 @@ def get_types_map():
|
|||||||
|
|
||||||
|
|
||||||
def to_unicode(raw, encoding='utf-8', errors='strict'):
|
def to_unicode(raw, encoding='utf-8', errors='strict'):
|
||||||
if isinstance(raw, unicode):
|
if isinstance(raw, unicode_type):
|
||||||
return raw
|
return raw
|
||||||
return raw.decode(encoding, errors)
|
return raw.decode(encoding, errors)
|
||||||
|
|
||||||
@ -113,7 +113,7 @@ def confirm_config_name(name):
|
|||||||
|
|
||||||
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
|
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
|
||||||
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
|
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
|
||||||
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
|
u'"', u':', u'>', u'+', u'/'] + list(map(codepoint_to_chr, xrange(32))))
|
||||||
|
|
||||||
|
|
||||||
def sanitize_file_name(name, substitute='_', as_unicode=False):
|
def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||||
@ -126,7 +126,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
|
|||||||
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
|
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
|
||||||
are encoded in the filesystem encoding of the platform, or UTF-8.
|
are encoded in the filesystem encoding of the platform, or UTF-8.
|
||||||
'''
|
'''
|
||||||
if isinstance(name, unicode):
|
if isinstance(name, unicode_type):
|
||||||
name = name.encode(filesystem_encoding, 'ignore')
|
name = name.encode(filesystem_encoding, 'ignore')
|
||||||
one = _filename_sanitize.sub(substitute, name)
|
one = _filename_sanitize.sub(substitute, name)
|
||||||
one = re.sub(r'\s', ' ', one).strip()
|
one = re.sub(r'\s', ' ', one).strip()
|
||||||
@ -198,7 +198,7 @@ def prints(*args, **kwargs):
|
|||||||
safe_encode = kwargs.get('safe_encode', False)
|
safe_encode = kwargs.get('safe_encode', False)
|
||||||
count = 0
|
count = 0
|
||||||
for i, arg in enumerate(args):
|
for i, arg in enumerate(args):
|
||||||
if isinstance(arg, unicode):
|
if isinstance(arg, unicode_type):
|
||||||
if iswindows:
|
if iswindows:
|
||||||
from calibre.utils.terminal import Detect
|
from calibre.utils.terminal import Detect
|
||||||
cs = Detect(file)
|
cs = Detect(file)
|
||||||
@ -222,8 +222,8 @@ def prints(*args, **kwargs):
|
|||||||
try:
|
try:
|
||||||
arg = str(arg)
|
arg = str(arg)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
arg = unicode(arg)
|
arg = unicode_type(arg)
|
||||||
if isinstance(arg, unicode):
|
if isinstance(arg, unicode_type):
|
||||||
try:
|
try:
|
||||||
arg = arg.encode(enc)
|
arg = arg.encode(enc)
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
@ -288,7 +288,7 @@ def load_library(name, cdll):
|
|||||||
|
|
||||||
def filename_to_utf8(name):
|
def filename_to_utf8(name):
|
||||||
'''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
|
'''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
|
||||||
if isinstance(name, unicode):
|
if isinstance(name, unicode_type):
|
||||||
return name.encode('utf8')
|
return name.encode('utf8')
|
||||||
codec = 'cp1252' if iswindows else 'utf8'
|
codec = 'cp1252' if iswindows else 'utf8'
|
||||||
return name.decode(codec, 'replace').encode('utf8')
|
return name.decode(codec, 'replace').encode('utf8')
|
||||||
@ -557,7 +557,7 @@ def strftime(fmt, t=None):
|
|||||||
else:
|
else:
|
||||||
ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
|
ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')
|
||||||
if early_year:
|
if early_year:
|
||||||
ans = ans.replace(u'_early year hack##', unicode(orig_year))
|
ans = ans.replace(u'_early year hack##', unicode_type(orig_year))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
@ -669,7 +669,7 @@ def force_unicode(obj, enc=preferred_encoding):
|
|||||||
def as_unicode(obj, enc=preferred_encoding):
|
def as_unicode(obj, enc=preferred_encoding):
|
||||||
if not isbytestring(obj):
|
if not isbytestring(obj):
|
||||||
try:
|
try:
|
||||||
obj = unicode(obj)
|
obj = unicode_type(obj)
|
||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
obj = str(obj)
|
obj = str(obj)
|
||||||
|
@ -2,12 +2,12 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import map, unicode_type
|
||||||
import sys, locale, codecs, os, importlib, collections
|
import sys, locale, codecs, os, importlib, collections
|
||||||
|
|
||||||
__appname__ = u'calibre'
|
__appname__ = u'calibre'
|
||||||
numeric_version = (3, 40, 1)
|
numeric_version = (3, 40, 1)
|
||||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
__version__ = u'.'.join(map(unicode_type, numeric_version))
|
||||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -300,7 +300,7 @@ def get_portable_base():
|
|||||||
|
|
||||||
def get_unicode_windows_env_var(name):
|
def get_unicode_windows_env_var(name):
|
||||||
getenv = plugins['winutil'][0].getenv
|
getenv = plugins['winutil'][0].getenv
|
||||||
return getenv(unicode(name))
|
return getenv(unicode_type(name))
|
||||||
|
|
||||||
|
|
||||||
def get_windows_username():
|
def get_windows_username():
|
||||||
|
@ -7,6 +7,7 @@ import os, sys, zipfile, importlib
|
|||||||
|
|
||||||
from calibre.constants import numeric_version, iswindows, isosx
|
from calibre.constants import numeric_version, iswindows, isosx
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
platform = 'linux'
|
platform = 'linux'
|
||||||
if iswindows:
|
if iswindows:
|
||||||
@ -195,7 +196,7 @@ class Plugin(object): # {{{
|
|||||||
config_dialog.exec_()
|
config_dialog.exec_()
|
||||||
|
|
||||||
if config_dialog.result() == QDialog.Accepted:
|
if config_dialog.result() == QDialog.Accepted:
|
||||||
sc = unicode(sc.text()).strip()
|
sc = unicode_type(sc.text()).strip()
|
||||||
customize_plugin(self, sc)
|
customize_plugin(self, sc)
|
||||||
|
|
||||||
geom = bytearray(config_dialog.saveGeometry())
|
geom = bytearray(config_dialog.saveGeometry())
|
||||||
|
@ -10,6 +10,7 @@ from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
|
|||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
|
from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
|
||||||
from calibre.ebooks.html.to_zip import HTML2ZIP
|
from calibre.ebooks.html.to_zip import HTML2ZIP
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
plugins = []
|
plugins = []
|
||||||
|
|
||||||
@ -64,23 +65,23 @@ class TXT2TXTZ(FileTypePlugin):
|
|||||||
images = []
|
images = []
|
||||||
|
|
||||||
# Textile
|
# Textile
|
||||||
for m in re.finditer(unicode(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
|
for m in re.finditer(unicode_type(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
|
||||||
path = m.group('path')
|
path = m.group('path')
|
||||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||||
images.append(path)
|
images.append(path)
|
||||||
|
|
||||||
# Markdown inline
|
# Markdown inline
|
||||||
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
|
for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
|
||||||
path = m.group('path')
|
path = m.group('path')
|
||||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||||
images.append(path)
|
images.append(path)
|
||||||
|
|
||||||
# Markdown reference
|
# Markdown reference
|
||||||
refs = {}
|
refs = {}
|
||||||
for m in re.finditer(unicode(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
|
for m in re.finditer(unicode_type(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
|
||||||
if m.group('id') and m.group('path'):
|
if m.group('id') and m.group('path'):
|
||||||
refs[m.group('id')] = m.group('path')
|
refs[m.group('id')] = m.group('path')
|
||||||
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
|
for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
|
||||||
path = refs.get(m.group('id'), None)
|
path = refs.get(m.group('id'), None)
|
||||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||||
images.append(path)
|
images.append(path)
|
||||||
|
@ -6,6 +6,7 @@ import re, os, shutil
|
|||||||
|
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class ConversionOption(object):
|
class ConversionOption(object):
|
||||||
@ -79,7 +80,7 @@ class OptionRecommendation(object):
|
|||||||
self.option.choices:
|
self.option.choices:
|
||||||
raise ValueError('OpRec: %s: Recommended value not in choices'%
|
raise ValueError('OpRec: %s: Recommended value not in choices'%
|
||||||
self.option.name)
|
self.option.name)
|
||||||
if not (isinstance(self.recommended_value, (int, float, str, unicode)) or self.recommended_value is None):
|
if not (isinstance(self.recommended_value, (int, float, str, unicode_type)) or self.recommended_value is None):
|
||||||
raise ValueError('OpRec: %s:'%self.option.name + repr(
|
raise ValueError('OpRec: %s:'%self.option.name + repr(
|
||||||
self.recommended_value) + ' is not a string or a number')
|
self.recommended_value) + ' is not a string or a number')
|
||||||
|
|
||||||
@ -340,7 +341,7 @@ class OutputFormatPlugin(Plugin):
|
|||||||
@property
|
@property
|
||||||
def is_periodical(self):
|
def is_periodical(self):
|
||||||
return self.oeb.metadata.publication_type and \
|
return self.oeb.metadata.publication_type and \
|
||||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
unicode_type(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||||
|
|
||||||
def specialize_options(self, log, opts, input_fmt):
|
def specialize_options(self, log, opts, input_fmt):
|
||||||
'''
|
'''
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import map, unicode_type
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
@ -216,7 +216,7 @@ class PluginLoader(object):
|
|||||||
if ans.minimum_calibre_version > numeric_version:
|
if ans.minimum_calibre_version > numeric_version:
|
||||||
raise InvalidPlugin(
|
raise InvalidPlugin(
|
||||||
'The plugin at %s needs a version of calibre >= %s' %
|
'The plugin at %s needs a version of calibre >= %s' %
|
||||||
(as_unicode(path_to_zip_file), '.'.join(map(unicode,
|
(as_unicode(path_to_zip_file), '.'.join(map(unicode_type,
|
||||||
ans.minimum_calibre_version))))
|
ans.minimum_calibre_version))))
|
||||||
|
|
||||||
if platform not in ans.supported_platforms:
|
if platform not in ans.supported_platforms:
|
||||||
@ -231,7 +231,7 @@ class PluginLoader(object):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
def _locate_code(self, zf, path_to_zip_file):
|
def _locate_code(self, zf, path_to_zip_file):
|
||||||
names = [x if isinstance(x, unicode) else x.decode('utf-8') for x in
|
names = [x if isinstance(x, unicode_type) else x.decode('utf-8') for x in
|
||||||
zf.namelist()]
|
zf.namelist()]
|
||||||
names = [x[1:] if x[0] == '/' else x for x in names]
|
names = [x[1:] if x[0] == '/' else x for x in names]
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
import os, time, re
|
import os, time, re
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import map, unicode_type
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ def metadata_extensions():
|
|||||||
# but not actually added)
|
# but not actually added)
|
||||||
global _metadata_extensions
|
global _metadata_extensions
|
||||||
if _metadata_extensions is None:
|
if _metadata_extensions is None:
|
||||||
_metadata_extensions = frozenset(map(unicode, BOOK_EXTENSIONS)) | {'opf'}
|
_metadata_extensions = frozenset(map(unicode_type, BOOK_EXTENSIONS)) | {'opf'}
|
||||||
return _metadata_extensions
|
return _metadata_extensions
|
||||||
|
|
||||||
|
|
||||||
@ -143,7 +143,7 @@ def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(
|
|||||||
for path in listdir_impl(dirpath, sort_by_mtime=True):
|
for path in listdir_impl(dirpath, sort_by_mtime=True):
|
||||||
key, ext = splitext(path)
|
key, ext = splitext(path)
|
||||||
if allow_path(path, ext, compiled_rules):
|
if allow_path(path, ext, compiled_rules):
|
||||||
books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path
|
books[icu_lower(key) if isinstance(key, unicode_type) else key.lower()][ext] = path
|
||||||
|
|
||||||
for formats in books.itervalues():
|
for formats in books.itervalues():
|
||||||
if formats_ok(formats):
|
if formats_ok(formats):
|
||||||
|
@ -12,7 +12,7 @@ import os, shutil, uuid, json, glob, time, hashlib, errno, sys
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import apsw
|
import apsw
|
||||||
from polyglot.builtins import reraise
|
from polyglot.builtins import unicode_type, reraise
|
||||||
|
|
||||||
from calibre import isbytestring, force_unicode, prints, as_unicode
|
from calibre import isbytestring, force_unicode, prints, as_unicode
|
||||||
from calibre.constants import (iswindows, filesystem_encoding,
|
from calibre.constants import (iswindows, filesystem_encoding,
|
||||||
@ -93,7 +93,7 @@ class DBPrefs(dict): # {{{
|
|||||||
dict.__setitem__(self, key, val)
|
dict.__setitem__(self, key, val)
|
||||||
|
|
||||||
def raw_to_object(self, raw):
|
def raw_to_object(self, raw):
|
||||||
if not isinstance(raw, unicode):
|
if not isinstance(raw, unicode_type):
|
||||||
raw = raw.decode(preferred_encoding)
|
raw = raw.decode(preferred_encoding)
|
||||||
return json.loads(raw, object_hook=from_json)
|
return json.loads(raw, object_hook=from_json)
|
||||||
|
|
||||||
@ -561,10 +561,10 @@ class DB(object):
|
|||||||
prints('found user category case overlap', catmap[uc])
|
prints('found user category case overlap', catmap[uc])
|
||||||
cat = catmap[uc][0]
|
cat = catmap[uc][0]
|
||||||
suffix = 1
|
suffix = 1
|
||||||
while icu_lower((cat + unicode(suffix))) in catmap:
|
while icu_lower((cat + unicode_type(suffix))) in catmap:
|
||||||
suffix += 1
|
suffix += 1
|
||||||
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
|
prints('Renaming user category %s to %s'%(cat, cat+unicode_type(suffix)))
|
||||||
user_cats[cat + unicode(suffix)] = user_cats[cat]
|
user_cats[cat + unicode_type(suffix)] = user_cats[cat]
|
||||||
del user_cats[cat]
|
del user_cats[cat]
|
||||||
cats_changed = True
|
cats_changed = True
|
||||||
if cats_changed:
|
if cats_changed:
|
||||||
@ -670,23 +670,23 @@ class DB(object):
|
|||||||
if d['is_multiple']:
|
if d['is_multiple']:
|
||||||
if x is None:
|
if x is None:
|
||||||
return []
|
return []
|
||||||
if isinstance(x, (str, unicode, bytes)):
|
if isinstance(x, (str, unicode_type, bytes)):
|
||||||
x = x.split(d['multiple_seps']['ui_to_list'])
|
x = x.split(d['multiple_seps']['ui_to_list'])
|
||||||
x = [y.strip() for y in x if y.strip()]
|
x = [y.strip() for y in x if y.strip()]
|
||||||
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
||||||
unicode) else y for y in x]
|
unicode_type) else y for y in x]
|
||||||
return [u' '.join(y.split()) for y in x]
|
return [u' '.join(y.split()) for y in x]
|
||||||
else:
|
else:
|
||||||
return x if x is None or isinstance(x, unicode) else \
|
return x if x is None or isinstance(x, unicode_type) else \
|
||||||
x.decode(preferred_encoding, 'replace')
|
x.decode(preferred_encoding, 'replace')
|
||||||
|
|
||||||
def adapt_datetime(x, d):
|
def adapt_datetime(x, d):
|
||||||
if isinstance(x, (str, unicode, bytes)):
|
if isinstance(x, (str, unicode_type, bytes)):
|
||||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def adapt_bool(x, d):
|
def adapt_bool(x, d):
|
||||||
if isinstance(x, (str, unicode, bytes)):
|
if isinstance(x, (str, unicode_type, bytes)):
|
||||||
x = x.lower()
|
x = x.lower()
|
||||||
if x == 'true':
|
if x == 'true':
|
||||||
x = True
|
x = True
|
||||||
@ -707,7 +707,7 @@ class DB(object):
|
|||||||
def adapt_number(x, d):
|
def adapt_number(x, d):
|
||||||
if x is None:
|
if x is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(x, (str, unicode, bytes)):
|
if isinstance(x, (str, unicode_type, bytes)):
|
||||||
if x.lower() == 'none':
|
if x.lower() == 'none':
|
||||||
return None
|
return None
|
||||||
if d['datatype'] == 'int':
|
if d['datatype'] == 'int':
|
||||||
@ -1239,7 +1239,7 @@ class DB(object):
|
|||||||
return self._library_id_
|
return self._library_id_
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
self._library_id_ = unicode(val)
|
self._library_id_ = unicode_type(val)
|
||||||
self.execute('''
|
self.execute('''
|
||||||
DELETE FROM library_id;
|
DELETE FROM library_id;
|
||||||
INSERT INTO library_id (uuid) VALUES (?);
|
INSERT INTO library_id (uuid) VALUES (?);
|
||||||
@ -1715,7 +1715,7 @@ class DB(object):
|
|||||||
[(book_id, fmt.upper()) for book_id in book_ids])
|
[(book_id, fmt.upper()) for book_id in book_ids])
|
||||||
|
|
||||||
def set_conversion_options(self, options, fmt):
|
def set_conversion_options(self, options, fmt):
|
||||||
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode) else data)))
|
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data)))
|
||||||
for book_id, data in options.iteritems()]
|
for book_id, data in options.iteritems()]
|
||||||
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
|
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
|
||||||
|
|
||||||
@ -1754,7 +1754,7 @@ class DB(object):
|
|||||||
copyfile_using_links(src, dest, dest_is_dir=False)
|
copyfile_using_links(src, dest, dest_is_dir=False)
|
||||||
old_files.add(src)
|
old_files.add(src)
|
||||||
x = path_map[x]
|
x = path_map[x]
|
||||||
if not isinstance(x, unicode):
|
if not isinstance(x, unicode_type):
|
||||||
x = x.decode(filesystem_encoding, 'replace')
|
x = x.decode(filesystem_encoding, 'replace')
|
||||||
progress(x, i+1, total)
|
progress(x, i+1, total)
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ import os, traceback, random, shutil, operator
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from collections import defaultdict, Set, MutableSet
|
from collections import defaultdict, Set, MutableSet
|
||||||
from functools import wraps, partial
|
from functools import wraps, partial
|
||||||
from polyglot.builtins import zip
|
from polyglot.builtins import unicode_type, zip
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
from calibre import isbytestring, as_unicode
|
from calibre import isbytestring, as_unicode
|
||||||
@ -528,14 +528,14 @@ class Cache(object):
|
|||||||
@read_api
|
@read_api
|
||||||
def get_item_id(self, field, item_name):
|
def get_item_id(self, field, item_name):
|
||||||
' Return the item id for item_name (case-insensitive) '
|
' Return the item id for item_name (case-insensitive) '
|
||||||
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
||||||
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode) else item_name, None)
|
return rmap.get(icu_lower(item_name) if isinstance(item_name, unicode_type) else item_name, None)
|
||||||
|
|
||||||
@read_api
|
@read_api
|
||||||
def get_item_ids(self, field, item_names):
|
def get_item_ids(self, field, item_names):
|
||||||
' Return the item id for item_name (case-insensitive) '
|
' Return the item id for item_name (case-insensitive) '
|
||||||
rmap = {icu_lower(v) if isinstance(v, unicode) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
rmap = {icu_lower(v) if isinstance(v, unicode_type) else v:k for k, v in self.fields[field].table.id_map.iteritems()}
|
||||||
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode) else name, None) for name in item_names}
|
return {name:rmap.get(icu_lower(name) if isinstance(name, unicode_type) else name, None) for name in item_names}
|
||||||
|
|
||||||
@read_api
|
@read_api
|
||||||
def author_data(self, author_ids=None):
|
def author_data(self, author_ids=None):
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import unicode_type, map
|
||||||
|
|
||||||
from calibre.ebooks.metadata import author_to_author_sort
|
from calibre.ebooks.metadata import author_to_author_sort
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
@ -47,7 +47,7 @@ class Tag(object):
|
|||||||
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
|
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode(self).encode('utf-8')
|
return unicode_type(self).encode('utf-8')
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
@ -101,8 +101,8 @@ def clean_user_categories(dbcache):
|
|||||||
if len(comps) == 0:
|
if len(comps) == 0:
|
||||||
i = 1
|
i = 1
|
||||||
while True:
|
while True:
|
||||||
if unicode(i) not in user_cats:
|
if unicode_type(i) not in user_cats:
|
||||||
new_cats[unicode(i)] = user_cats[k]
|
new_cats[unicode_type(i)] = user_cats[k]
|
||||||
break
|
break
|
||||||
i += 1
|
i += 1
|
||||||
else:
|
else:
|
||||||
|
@ -10,6 +10,7 @@ from textwrap import TextWrapper
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
readonly = True
|
readonly = True
|
||||||
version = 0 # change this if you change signature of implementation()
|
version = 0 # change this if you change signature of implementation()
|
||||||
@ -79,7 +80,7 @@ def do_list(fields, data, opts):
|
|||||||
widths = list(map(lambda x: 0, fields))
|
widths = list(map(lambda x: 0, fields))
|
||||||
for i in data:
|
for i in data:
|
||||||
for j, field in enumerate(fields):
|
for j, field in enumerate(fields):
|
||||||
widths[j] = max(widths[j], max(len(field), len(unicode(i[field]))))
|
widths[j] = max(widths[j], max(len(field), len(unicode_type(i[field]))))
|
||||||
|
|
||||||
screen_width = geometry()[0]
|
screen_width = geometry()[0]
|
||||||
if not screen_width:
|
if not screen_width:
|
||||||
@ -110,7 +111,7 @@ def do_list(fields, data, opts):
|
|||||||
|
|
||||||
for record in data:
|
for record in data:
|
||||||
text = [
|
text = [
|
||||||
wrappers[i].wrap(unicode(record[field]))
|
wrappers[i].wrap(unicode_type(record[field]))
|
||||||
for i, field in enumerate(fields)
|
for i, field in enumerate(fields)
|
||||||
]
|
]
|
||||||
lines = max(map(len, text))
|
lines = max(map(len, text))
|
||||||
@ -129,7 +130,7 @@ def do_csv(fields, data, opts):
|
|||||||
for d in data:
|
for d in data:
|
||||||
row = [d[f] for f in fields]
|
row = [d[f] for f in fields]
|
||||||
csv_print.writerow([
|
csv_print.writerow([
|
||||||
x if isinstance(x, bytes) else unicode(x).encode('utf-8') for x in row
|
x if isinstance(x, bytes) else unicode_type(x).encode('utf-8') for x in row
|
||||||
])
|
])
|
||||||
print(buf.getvalue())
|
print(buf.getvalue())
|
||||||
|
|
||||||
@ -164,11 +165,11 @@ def main(opts, args, dbctx):
|
|||||||
is_rating = category_metadata(category)['datatype'] == 'rating'
|
is_rating = category_metadata(category)['datatype'] == 'rating'
|
||||||
for tag in category_data[category]:
|
for tag in category_data[category]:
|
||||||
if is_rating:
|
if is_rating:
|
||||||
tag.name = unicode(len(tag.name))
|
tag.name = unicode_type(len(tag.name))
|
||||||
data.append({
|
data.append({
|
||||||
'category': category,
|
'category': category,
|
||||||
'tag_name': tag.name,
|
'tag_name': tag.name,
|
||||||
'count': unicode(tag.count),
|
'count': unicode_type(tag.count),
|
||||||
'rating': fmtr(tag.avg_rating),
|
'rating': fmtr(tag.avg_rating),
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
@ -176,7 +177,7 @@ def main(opts, args, dbctx):
|
|||||||
data.append({
|
data.append({
|
||||||
'category': category,
|
'category': category,
|
||||||
'tag_name': _('CATEGORY ITEMS'),
|
'tag_name': _('CATEGORY ITEMS'),
|
||||||
'count': unicode(len(category_data[category])),
|
'count': unicode_type(len(category_data[category])),
|
||||||
'rating': ''
|
'rating': ''
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from calibre.ebooks.metadata.book.base import field_from_string
|
|||||||
from calibre.ebooks.metadata.book.serialize import read_cover
|
from calibre.ebooks.metadata.book.serialize import read_cover
|
||||||
from calibre.ebooks.metadata.opf import get_metadata
|
from calibre.ebooks.metadata.opf import get_metadata
|
||||||
from calibre.srv.changes import metadata
|
from calibre.srv.changes import metadata
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
readonly = False
|
readonly = False
|
||||||
version = 0 # change this if you change signature of implementation()
|
version = 0 # change this if you change signature of implementation()
|
||||||
@ -181,5 +182,5 @@ def main(opts, args, dbctx):
|
|||||||
if not final_mi:
|
if not final_mi:
|
||||||
raise SystemExit(_('No book with id: %s in the database') % book_id)
|
raise SystemExit(_('No book with id: %s in the database') % book_id)
|
||||||
|
|
||||||
prints(unicode(final_mi))
|
prints(unicode_type(final_mi))
|
||||||
return 0
|
return 0
|
||||||
|
@ -9,6 +9,7 @@ import sys
|
|||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
readonly = True
|
readonly = True
|
||||||
version = 0 # change this if you change signature of implementation()
|
version = 0 # change this if you change signature of implementation()
|
||||||
@ -52,6 +53,6 @@ def main(opts, args, dbctx):
|
|||||||
mi = OPFCreator(os.getcwdu(), mi)
|
mi = OPFCreator(os.getcwdu(), mi)
|
||||||
mi.render(sys.stdout)
|
mi.render(sys.stdout)
|
||||||
else:
|
else:
|
||||||
prints(unicode(mi))
|
prints(unicode_type(mi))
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
@ -15,6 +15,7 @@ from copy import deepcopy
|
|||||||
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES, ALL_METADATA_FIELDS
|
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES, ALL_METADATA_FIELDS
|
||||||
from calibre.ebooks.metadata.book.formatter import SafeFormat
|
from calibre.ebooks.metadata.book.formatter import SafeFormat
|
||||||
from calibre.utils.date import utcnow
|
from calibre.utils.date import utcnow
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
# Lazy format metadata retrieval {{{
|
# Lazy format metadata retrieval {{{
|
||||||
'''
|
'''
|
||||||
@ -46,7 +47,7 @@ class MutableBase(object):
|
|||||||
|
|
||||||
@resolved
|
@resolved
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return unicode(self._values)
|
return unicode_type(self._values)
|
||||||
|
|
||||||
@resolved
|
@resolved
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
@ -11,6 +11,7 @@ import os
|
|||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.utils.date import isoformat, DEFAULT_DATE
|
from calibre.utils.date import isoformat, DEFAULT_DATE
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class SchemaUpgrade(object):
|
class SchemaUpgrade(object):
|
||||||
@ -601,7 +602,7 @@ class SchemaUpgrade(object):
|
|||||||
id_ = str(id_)
|
id_ = str(id_)
|
||||||
fname = custom_recipe_filename(id_, title)
|
fname = custom_recipe_filename(id_, title)
|
||||||
custom_recipes[id_] = (title, fname)
|
custom_recipes[id_] = (title, fname)
|
||||||
if isinstance(script, unicode):
|
if isinstance(script, unicode_type):
|
||||||
script = script.encode('utf-8')
|
script = script.encode('utf-8')
|
||||||
with open(os.path.join(bdir, fname), 'wb') as f:
|
with open(os.path.join(bdir, fname), 'wb') as f:
|
||||||
f.write(script)
|
f.write(script)
|
||||||
|
@ -19,6 +19,7 @@ from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
|
|||||||
from calibre.utils.icu import primary_contains, sort_key
|
from calibre.utils.icu import primary_contains, sort_key
|
||||||
from calibre.utils.localization import lang_map, canonicalize_lang
|
from calibre.utils.localization import lang_map, canonicalize_lang
|
||||||
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
|
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
CONTAINS_MATCH = 0
|
CONTAINS_MATCH = 0
|
||||||
EQUALS_MATCH = 1
|
EQUALS_MATCH = 1
|
||||||
@ -148,7 +149,7 @@ class DateSearch(object): # {{{
|
|||||||
|
|
||||||
if query == 'false':
|
if query == 'false':
|
||||||
for v, book_ids in field_iter():
|
for v, book_ids in field_iter():
|
||||||
if isinstance(v, (str, unicode)):
|
if isinstance(v, (str, unicode_type)):
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is None or v <= UNDEFINED_DATE:
|
if v is None or v <= UNDEFINED_DATE:
|
||||||
matches |= book_ids
|
matches |= book_ids
|
||||||
@ -156,7 +157,7 @@ class DateSearch(object): # {{{
|
|||||||
|
|
||||||
if query == 'true':
|
if query == 'true':
|
||||||
for v, book_ids in field_iter():
|
for v, book_ids in field_iter():
|
||||||
if isinstance(v, (str, unicode)):
|
if isinstance(v, (str, unicode_type)):
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is not None and v > UNDEFINED_DATE:
|
if v is not None and v > UNDEFINED_DATE:
|
||||||
matches |= book_ids
|
matches |= book_ids
|
||||||
@ -198,7 +199,7 @@ class DateSearch(object): # {{{
|
|||||||
field_count = query.count('/') + 1
|
field_count = query.count('/') + 1
|
||||||
|
|
||||||
for v, book_ids in field_iter():
|
for v, book_ids in field_iter():
|
||||||
if isinstance(v, (str, unicode)):
|
if isinstance(v, (str, unicode_type)):
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is not None and relop(dt_as_local(v), qd, field_count):
|
if v is not None and relop(dt_as_local(v), qd, field_count):
|
||||||
matches |= book_ids
|
matches |= book_ids
|
||||||
@ -407,7 +408,7 @@ class SavedSearchQueries(object): # {{{
|
|||||||
return self._db()
|
return self._db()
|
||||||
|
|
||||||
def force_unicode(self, x):
|
def force_unicode(self, x):
|
||||||
if not isinstance(x, unicode):
|
if not isinstance(x, unicode_type):
|
||||||
x = x.decode(preferred_encoding, 'replace')
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import os, errno, cPickle, sys, re
|
import os, errno, cPickle, sys, re
|
||||||
from locale import localeconv
|
from locale import localeconv
|
||||||
from collections import OrderedDict, namedtuple
|
from collections import OrderedDict, namedtuple
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import map, unicode_type
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
|
||||||
from calibre import as_unicode, prints
|
from calibre import as_unicode, prints
|
||||||
@ -19,7 +19,7 @@ from calibre.utils.localization import canonicalize_lang
|
|||||||
|
|
||||||
|
|
||||||
def force_to_bool(val):
|
def force_to_bool(val):
|
||||||
if isinstance(val, (str, unicode)):
|
if isinstance(val, (str, unicode_type)):
|
||||||
try:
|
try:
|
||||||
val = icu_lower(val)
|
val = icu_lower(val)
|
||||||
if not val:
|
if not val:
|
||||||
|
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import weakref, operator
|
import weakref, operator
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from itertools import izip, imap
|
from itertools import izip, imap
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import map, unicode_type
|
||||||
|
|
||||||
from calibre.ebooks.metadata import title_sort
|
from calibre.ebooks.metadata import title_sort
|
||||||
from calibre.utils.config_base import tweaks, prefs
|
from calibre.utils.config_base import tweaks, prefs
|
||||||
@ -374,7 +374,7 @@ class View(object):
|
|||||||
self.marked_ids = dict.fromkeys(id_dict, u'true')
|
self.marked_ids = dict.fromkeys(id_dict, u'true')
|
||||||
else:
|
else:
|
||||||
# Ensure that all the items in the dict are text
|
# Ensure that all the items in the dict are text
|
||||||
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode,
|
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode_type,
|
||||||
id_dict.itervalues())))
|
id_dict.itervalues())))
|
||||||
# This invalidates all searches in the cache even though the cache may
|
# This invalidates all searches in the cache even though the cache may
|
||||||
# be shared by multiple views. This is not ideal, but...
|
# be shared by multiple views. This is not ideal, but...
|
||||||
@ -432,4 +432,3 @@ class View(object):
|
|||||||
self._map_filtered = ids + self._map_filtered
|
self._map_filtered = ids + self._map_filtered
|
||||||
if prefs['mark_new_books']:
|
if prefs['mark_new_books']:
|
||||||
self.toggle_marked_ids(ids)
|
self.toggle_marked_ids(ids)
|
||||||
|
|
||||||
|
@ -10,18 +10,15 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import re
|
import re
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from polyglot.builtins import zip
|
from polyglot.builtins import unicode_type, zip
|
||||||
|
|
||||||
from calibre.constants import preferred_encoding, ispy3
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.ebooks.metadata import author_to_author_sort, title_sort
|
from calibre.ebooks.metadata import author_to_author_sort, title_sort
|
||||||
from calibre.utils.date import (
|
from calibre.utils.date import (
|
||||||
parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined)
|
parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined)
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
from calibre.utils.icu import strcmp
|
from calibre.utils.icu import strcmp
|
||||||
|
|
||||||
if ispy3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
# Convert data into values suitable for the db {{{
|
# Convert data into values suitable for the db {{{
|
||||||
|
|
||||||
|
|
||||||
@ -32,7 +29,7 @@ def sqlite_datetime(x):
|
|||||||
def single_text(x):
|
def single_text(x):
|
||||||
if x is None:
|
if x is None:
|
||||||
return x
|
return x
|
||||||
if not isinstance(x, unicode):
|
if not isinstance(x, unicode_type):
|
||||||
x = x.decode(preferred_encoding, 'replace')
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
x = x.strip()
|
x = x.strip()
|
||||||
return x if x else None
|
return x if x else None
|
||||||
@ -60,7 +57,7 @@ def multiple_text(sep, ui_sep, x):
|
|||||||
return ()
|
return ()
|
||||||
if isinstance(x, bytes):
|
if isinstance(x, bytes):
|
||||||
x = x.decode(preferred_encoding, 'replace')
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
if isinstance(x, unicode):
|
if isinstance(x, unicode_type):
|
||||||
x = x.split(sep)
|
x = x.split(sep)
|
||||||
else:
|
else:
|
||||||
x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
|
x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
|
||||||
@ -72,7 +69,7 @@ def multiple_text(sep, ui_sep, x):
|
|||||||
|
|
||||||
|
|
||||||
def adapt_datetime(x):
|
def adapt_datetime(x):
|
||||||
if isinstance(x, (unicode, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||||
if x and is_date_undefined(x):
|
if x and is_date_undefined(x):
|
||||||
x = UNDEFINED_DATE
|
x = UNDEFINED_DATE
|
||||||
@ -80,7 +77,7 @@ def adapt_datetime(x):
|
|||||||
|
|
||||||
|
|
||||||
def adapt_date(x):
|
def adapt_date(x):
|
||||||
if isinstance(x, (unicode, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
x = parse_only_date(x)
|
x = parse_only_date(x)
|
||||||
if x is None or is_date_undefined(x):
|
if x is None or is_date_undefined(x):
|
||||||
x = UNDEFINED_DATE
|
x = UNDEFINED_DATE
|
||||||
@ -90,14 +87,14 @@ def adapt_date(x):
|
|||||||
def adapt_number(typ, x):
|
def adapt_number(typ, x):
|
||||||
if x is None:
|
if x is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(x, (unicode, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
if not x or x.lower() == 'none':
|
if not x or x.lower() == 'none':
|
||||||
return None
|
return None
|
||||||
return typ(x)
|
return typ(x)
|
||||||
|
|
||||||
|
|
||||||
def adapt_bool(x):
|
def adapt_bool(x):
|
||||||
if isinstance(x, (unicode, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
x = x.lower()
|
x = x.lower()
|
||||||
if x == 'true':
|
if x == 'true':
|
||||||
x = True
|
x = True
|
||||||
|
@ -14,6 +14,7 @@ import sys
|
|||||||
|
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
from calibre.ebooks.metadata import string_to_authors
|
from calibre.ebooks.metadata import string_to_authors
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class JETBOOK(USBMS):
|
class JETBOOK(USBMS):
|
||||||
@ -64,7 +65,7 @@ class JETBOOK(USBMS):
|
|||||||
|
|
||||||
def check_unicode(txt):
|
def check_unicode(txt):
|
||||||
txt = txt.replace('_', ' ')
|
txt = txt.replace('_', ' ')
|
||||||
if not isinstance(txt, unicode):
|
if not isinstance(txt, unicode_type):
|
||||||
return txt.decode(sys.getfilesystemencoding(), 'replace')
|
return txt.decode(sys.getfilesystemencoding(), 'replace')
|
||||||
|
|
||||||
return txt
|
return txt
|
||||||
|
@ -15,6 +15,7 @@ from calibre.constants import DEBUG
|
|||||||
from calibre.devices.kindle.bookmark import Bookmark
|
from calibre.devices.kindle.bookmark import Bookmark
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
from calibre import strftime, fsync, prints
|
from calibre import strftime, fsync, prints
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Notes on collections:
|
Notes on collections:
|
||||||
@ -113,7 +114,7 @@ class KINDLE(USBMS):
|
|||||||
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
|
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
|
||||||
if match is not None:
|
if match is not None:
|
||||||
mi.title = match.group('title')
|
mi.title = match.group('title')
|
||||||
if not isinstance(mi.title, unicode):
|
if not isinstance(mi.title, unicode_type):
|
||||||
mi.title = mi.title.decode(sys.getfilesystemencoding(),
|
mi.title = mi.title.decode(sys.getfilesystemencoding(),
|
||||||
'replace')
|
'replace')
|
||||||
return mi
|
return mi
|
||||||
@ -291,9 +292,9 @@ class KINDLE(USBMS):
|
|||||||
hrTag['class'] = 'annotations_divider'
|
hrTag['class'] = 'annotations_divider'
|
||||||
user_notes_soup.insert(0, hrTag)
|
user_notes_soup.insert(0, hrTag)
|
||||||
|
|
||||||
mi.comments += unicode(user_notes_soup.prettify())
|
mi.comments += unicode_type(user_notes_soup.prettify())
|
||||||
else:
|
else:
|
||||||
mi.comments = unicode(user_notes_soup.prettify())
|
mi.comments = unicode_type(user_notes_soup.prettify())
|
||||||
# Update library comments
|
# Update library comments
|
||||||
db.set_comment(db_id, mi.comments)
|
db.set_comment(db_id, mi.comments)
|
||||||
|
|
||||||
@ -547,7 +548,7 @@ class KINDLE2(KINDLE):
|
|||||||
cust_col_name = opts.extra_customization[self.OPT_APNX_METHOD_COL]
|
cust_col_name = opts.extra_customization[self.OPT_APNX_METHOD_COL]
|
||||||
if cust_col_name:
|
if cust_col_name:
|
||||||
try:
|
try:
|
||||||
temp = unicode(metadata.get(cust_col_name)).lower()
|
temp = unicode_type(metadata.get(cust_col_name)).lower()
|
||||||
if temp in self.EXTRA_CUSTOMIZATION_CHOICES[self.OPT_APNX_METHOD]:
|
if temp in self.EXTRA_CUSTOMIZATION_CHOICES[self.OPT_APNX_METHOD]:
|
||||||
method = temp
|
method = temp
|
||||||
else:
|
else:
|
||||||
|
@ -14,6 +14,7 @@ from calibre.devices.usbms.books import CollectionsBookList
|
|||||||
from calibre.utils.config_base import prefs
|
from calibre.utils.config_base import prefs
|
||||||
from calibre.devices.usbms.driver import debug_print
|
from calibre.devices.usbms.driver import debug_print
|
||||||
from calibre.ebooks.metadata import author_to_author_sort
|
from calibre.ebooks.metadata import author_to_author_sort
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class Book(Book_):
|
class Book(Book_):
|
||||||
@ -95,7 +96,7 @@ class Book(Book_):
|
|||||||
ans = [u"Kobo metadata:"]
|
ans = [u"Kobo metadata:"]
|
||||||
|
|
||||||
def fmt(x, y):
|
def fmt(x, y):
|
||||||
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
|
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
|
||||||
|
|
||||||
if self.contentID:
|
if self.contentID:
|
||||||
fmt('Content ID', self.contentID)
|
fmt('Content ID', self.contentID)
|
||||||
|
@ -32,6 +32,7 @@ from calibre import prints, fsync
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
from calibre.utils.config_base import prefs
|
from calibre.utils.config_base import prefs
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
EPUB_EXT = '.epub'
|
EPUB_EXT = '.epub'
|
||||||
KEPUB_EXT = '.kepub'
|
KEPUB_EXT = '.kepub'
|
||||||
@ -43,7 +44,7 @@ def qhash(inputstr):
|
|||||||
instr = b""
|
instr = b""
|
||||||
if isinstance(inputstr, bytes):
|
if isinstance(inputstr, bytes):
|
||||||
instr = inputstr
|
instr = inputstr
|
||||||
elif isinstance(inputstr, unicode):
|
elif isinstance(inputstr, unicode_type):
|
||||||
instr = inputstr.encode("utf8")
|
instr = inputstr.encode("utf8")
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
@ -1323,9 +1324,9 @@ class KOBO(USBMS):
|
|||||||
hrTag['class'] = 'annotations_divider'
|
hrTag['class'] = 'annotations_divider'
|
||||||
user_notes_soup.insert(0, hrTag)
|
user_notes_soup.insert(0, hrTag)
|
||||||
|
|
||||||
mi.comments += unicode(user_notes_soup.prettify())
|
mi.comments += unicode_type(user_notes_soup.prettify())
|
||||||
else:
|
else:
|
||||||
mi.comments = unicode(user_notes_soup.prettify())
|
mi.comments = unicode_type(user_notes_soup.prettify())
|
||||||
# Update library comments
|
# Update library comments
|
||||||
db.set_comment(db_id, mi.comments)
|
db.set_comment(db_id, mi.comments)
|
||||||
|
|
||||||
@ -1824,7 +1825,7 @@ class KOBOTOUCH(KOBO):
|
|||||||
bookshelves.append(row['ShelfName'])
|
bookshelves.append(row['ShelfName'])
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode(count_bookshelves))
|
# debug_print("KoboTouch:get_bookshelvesforbook - count bookshelves=" + unicode_type(count_bookshelves))
|
||||||
return bookshelves
|
return bookshelves
|
||||||
|
|
||||||
self.debug_index = 0
|
self.debug_index = 0
|
||||||
@ -2394,7 +2395,7 @@ class KOBOTOUCH(KOBO):
|
|||||||
|
|
||||||
if self.manage_collections:
|
if self.manage_collections:
|
||||||
if collections:
|
if collections:
|
||||||
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode(len(collections)))
|
# debug_print("KoboTouch:update_device_database_collections - length collections=" + unicode_type(len(collections)))
|
||||||
|
|
||||||
# Need to reset the collections outside the particular loops
|
# Need to reset the collections outside the particular loops
|
||||||
# otherwise the last item will not be removed
|
# otherwise the last item will not be removed
|
||||||
@ -2834,7 +2835,7 @@ class KOBOTOUCH(KOBO):
|
|||||||
# count_bookshelves = i + 1
|
# count_bookshelves = i + 1
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode(count_bookshelves))
|
# debug_print("KoboTouch:get_bookshelflist - count bookshelves=" + unicode_type(count_bookshelves))
|
||||||
|
|
||||||
return bookshelves
|
return bookshelves
|
||||||
|
|
||||||
@ -2918,7 +2919,7 @@ class KOBOTOUCH(KOBO):
|
|||||||
cursor.execute(addquery, add_values)
|
cursor.execute(addquery, add_values)
|
||||||
elif result['_IsDeleted'] == 'true':
|
elif result['_IsDeleted'] == 'true':
|
||||||
debug_print("KoboTouch:check_for_bookshelf - Shelf '%s' is deleted - undeleting. result['_IsDeleted']='%s'" % (
|
debug_print("KoboTouch:check_for_bookshelf - Shelf '%s' is deleted - undeleting. result['_IsDeleted']='%s'" % (
|
||||||
bookshelf_name, unicode(result['_IsDeleted'])))
|
bookshelf_name, unicode_type(result['_IsDeleted'])))
|
||||||
cursor.execute(updatequery, test_values)
|
cursor.execute(updatequery, test_values)
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
@ -16,6 +16,7 @@ from calibre.gui2.device_drivers.tabbed_device_config import TabbedDeviceConfig,
|
|||||||
from calibre.devices.usbms.driver import debug_print
|
from calibre.devices.usbms.driver import debug_print
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.gui2.dialogs.template_dialog import TemplateDialog
|
from calibre.gui2.dialogs.template_dialog import TemplateDialog
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def wrap_msg(msg):
|
def wrap_msg(msg):
|
||||||
@ -122,7 +123,7 @@ class KOBOTOUCHConfig(TabbedDeviceConfig):
|
|||||||
|
|
||||||
p['support_newer_firmware'] = self.support_newer_firmware
|
p['support_newer_firmware'] = self.support_newer_firmware
|
||||||
p['debugging_title'] = self.debugging_title
|
p['debugging_title'] = self.debugging_title
|
||||||
p['driver_version'] = '.'.join([unicode(i) for i in self.device.version])
|
p['driver_version'] = '.'.join([unicode_type(i) for i in self.device.version])
|
||||||
|
|
||||||
return p
|
return p
|
||||||
|
|
||||||
@ -397,7 +398,7 @@ class AdvancedGroupBox(DeviceOptionsGroupBox):
|
|||||||
'to perform full read-write functionality - Here be Dragons!! '
|
'to perform full read-write functionality - Here be Dragons!! '
|
||||||
'Enable only if you are comfortable with restoring your kobo '
|
'Enable only if you are comfortable with restoring your kobo '
|
||||||
'to factory defaults and testing software. '
|
'to factory defaults and testing software. '
|
||||||
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode(
|
'This driver supports firmware V2.x.x and DBVersion up to ') + unicode_type(
|
||||||
device.supported_dbversion), device.get_pref('support_newer_firmware')
|
device.supported_dbversion), device.get_pref('support_newer_firmware')
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -555,7 +556,7 @@ class TemplateConfig(QWidget): # {{{
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def template(self):
|
def template(self):
|
||||||
return unicode(self.t.text()).strip()
|
return unicode_type(self.t.text()).strip()
|
||||||
|
|
||||||
@template.setter
|
@template.setter
|
||||||
def template(self, template):
|
def template(self, template):
|
||||||
@ -577,7 +578,7 @@ class TemplateConfig(QWidget): # {{{
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
error_dialog(self, _('Invalid template'),
|
error_dialog(self, _('Invalid template'),
|
||||||
'<p>'+_('The template "%s" is invalid:')%tmpl +
|
'<p>'+_('The template "%s" is invalid:')%tmpl +
|
||||||
'<br>'+unicode(err), show=True)
|
'<br>'+unicode_type(err), show=True)
|
||||||
|
|
||||||
return False
|
return False
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -18,6 +18,7 @@ from calibre.devices.mtp.base import debug
|
|||||||
from calibre.devices.mtp.defaults import DeviceDefaults
|
from calibre.devices.mtp.defaults import DeviceDefaults
|
||||||
from calibre.ptempfile import SpooledTemporaryFile, PersistentTemporaryDirectory
|
from calibre.ptempfile import SpooledTemporaryFile, PersistentTemporaryDirectory
|
||||||
from calibre.utils.filenames import shorten_components_to
|
from calibre.utils.filenames import shorten_components_to
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%(
|
BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%(
|
||||||
'windows' if iswindows else 'unix')).MTP_DEVICE
|
'windows' if iswindows else 'unix')).MTP_DEVICE
|
||||||
@ -75,7 +76,7 @@ class MTP_DEVICE(BASE):
|
|||||||
|
|
||||||
def is_folder_ignored(self, storage_or_storage_id, path,
|
def is_folder_ignored(self, storage_or_storage_id, path,
|
||||||
ignored_folders=None):
|
ignored_folders=None):
|
||||||
storage_id = unicode(getattr(storage_or_storage_id, 'object_id',
|
storage_id = unicode_type(getattr(storage_or_storage_id, 'object_id',
|
||||||
storage_or_storage_id))
|
storage_or_storage_id))
|
||||||
lpath = tuple(icu_lower(name) for name in path)
|
lpath = tuple(icu_lower(name) for name in path)
|
||||||
if ignored_folders is None:
|
if ignored_folders is None:
|
||||||
@ -166,14 +167,14 @@ class MTP_DEVICE(BASE):
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
dinfo = {}
|
dinfo = {}
|
||||||
if dinfo.get('device_store_uuid', None) is None:
|
if dinfo.get('device_store_uuid', None) is None:
|
||||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
|
||||||
if dinfo.get('device_name', None) is None:
|
if dinfo.get('device_name', None) is None:
|
||||||
dinfo['device_name'] = self.current_friendly_name
|
dinfo['device_name'] = self.current_friendly_name
|
||||||
if name is not None:
|
if name is not None:
|
||||||
dinfo['device_name'] = name
|
dinfo['device_name'] = name
|
||||||
dinfo['location_code'] = location_code
|
dinfo['location_code'] = location_code
|
||||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
|
||||||
dinfo['date_last_connected'] = isoformat(now())
|
dinfo['date_last_connected'] = isoformat(now())
|
||||||
dinfo['mtp_prefix'] = storage.storage_prefix
|
dinfo['mtp_prefix'] = storage.storage_prefix
|
||||||
raw = json.dumps(dinfo, default=to_json)
|
raw = json.dumps(dinfo, default=to_json)
|
||||||
|
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import weakref, sys, json
|
import weakref, sys, json
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
from polyglot.builtins import map
|
from polyglot.builtins import map, unicode_type
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from calibre import human_readable, prints, force_unicode
|
from calibre import human_readable, prints, force_unicode
|
||||||
@ -74,7 +74,7 @@ class FileOrFolder(object):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
name = 'Folder' if self.is_folder else 'File'
|
name = 'Folder' if self.is_folder else 'File'
|
||||||
try:
|
try:
|
||||||
path = unicode(self.full_path)
|
path = unicode_type(self.full_path)
|
||||||
except:
|
except:
|
||||||
path = ''
|
path = ''
|
||||||
datum = 'size=%s'%(self.size)
|
datum = 'size=%s'%(self.size)
|
||||||
@ -250,5 +250,3 @@ class FilesystemCache(object):
|
|||||||
return self.id_map[object_id]
|
return self.id_map[object_id]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise ValueError('No object found with MTP path: %s'%path)
|
raise ValueError('No object found with MTP path: %s'%path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ from calibre.constants import plugins, islinux, isosx, ispy3
|
|||||||
from calibre.ptempfile import SpooledTemporaryFile
|
from calibre.ptempfile import SpooledTemporaryFile
|
||||||
from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice, OpenActionNeeded
|
from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice, OpenActionNeeded
|
||||||
from calibre.devices.mtp.base import MTPDeviceBase, synchronous, debug
|
from calibre.devices.mtp.base import MTPDeviceBase, synchronous, debug
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
MTPDevice = namedtuple('MTPDevice', 'busnum devnum vendor_id product_id '
|
MTPDevice = namedtuple('MTPDevice', 'busnum devnum vendor_id product_id '
|
||||||
'bcd serial manufacturer product')
|
'bcd serial manufacturer product')
|
||||||
@ -321,7 +322,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
|||||||
storage.append({'id':sid, 'size':capacity,
|
storage.append({'id':sid, 'size':capacity,
|
||||||
'is_folder':True, 'name':name, 'can_delete':False,
|
'is_folder':True, 'name':name, 'can_delete':False,
|
||||||
'is_system':True})
|
'is_system':True})
|
||||||
self._currently_getting_sid = unicode(sid)
|
self._currently_getting_sid = unicode_type(sid)
|
||||||
items, errs = self.dev.get_filesystem(sid,
|
items, errs = self.dev.get_filesystem(sid,
|
||||||
partial(self._filesystem_callback, {}))
|
partial(self._filesystem_callback, {}))
|
||||||
all_items.extend(items), all_errs.extend(errs)
|
all_items.extend(items), all_errs.extend(errs)
|
||||||
@ -373,7 +374,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
|||||||
e = parent.folder_named(name)
|
e = parent.folder_named(name)
|
||||||
if e is not None:
|
if e is not None:
|
||||||
return e
|
return e
|
||||||
ename = name.encode('utf-8') if isinstance(name, unicode) else name
|
ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
|
||||||
sid, pid = parent.storage_id, parent.object_id
|
sid, pid = parent.storage_id, parent.object_id
|
||||||
if pid == sid:
|
if pid == sid:
|
||||||
pid = 0
|
pid = 0
|
||||||
@ -396,7 +397,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
|||||||
raise ValueError('Cannot upload file %s, it already exists'%(
|
raise ValueError('Cannot upload file %s, it already exists'%(
|
||||||
e.full_path,))
|
e.full_path,))
|
||||||
self.delete_file_or_folder(e)
|
self.delete_file_or_folder(e)
|
||||||
ename = name.encode('utf-8') if isinstance(name, unicode) else name
|
ename = name.encode('utf-8') if isinstance(name, unicode_type) else name
|
||||||
sid, pid = parent.storage_id, parent.object_id
|
sid, pid = parent.storage_id, parent.object_id
|
||||||
if pid == sid:
|
if pid == sid:
|
||||||
pid = 0xFFFFFFFF
|
pid = 0xFFFFFFFF
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import time, threading, traceback
|
import time, threading, traceback
|
||||||
from functools import wraps, partial
|
from functools import wraps, partial
|
||||||
from polyglot.builtins import zip
|
from polyglot.builtins import unicode_type, zip
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from calibre import as_unicode, prints, force_unicode
|
from calibre import as_unicode, prints, force_unicode
|
||||||
@ -264,7 +264,7 @@ class MTP_DEVICE(MTPDeviceBase):
|
|||||||
break
|
break
|
||||||
storage = {'id':storage_id, 'size':capacity, 'name':name,
|
storage = {'id':storage_id, 'size':capacity, 'name':name,
|
||||||
'is_folder':True, 'can_delete':False, 'is_system':True}
|
'is_folder':True, 'can_delete':False, 'is_system':True}
|
||||||
self._currently_getting_sid = unicode(storage_id)
|
self._currently_getting_sid = unicode_type(storage_id)
|
||||||
id_map = self.dev.get_filesystem(storage_id, partial(
|
id_map = self.dev.get_filesystem(storage_id, partial(
|
||||||
self._filesystem_callback, {}))
|
self._filesystem_callback, {}))
|
||||||
for x in id_map.itervalues():
|
for x in id_map.itervalues():
|
||||||
@ -441,5 +441,3 @@ class MTP_DEVICE(MTPDeviceBase):
|
|||||||
ans = self.dev.put_file(pid, name, stream, size, callback)
|
ans = self.dev.put_file(pid, name, stream, size, callback)
|
||||||
ans['storage_id'] = sid
|
ans['storage_id'] = sid
|
||||||
return parent.add_child(ans)
|
return parent.add_child(ans)
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ from calibre.devices.usbms.books import CollectionsBookList
|
|||||||
from calibre.devices.usbms.books import BookList
|
from calibre.devices.usbms.books import BookList
|
||||||
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
|
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
|
||||||
from calibre.constants import islinux
|
from calibre.constants import islinux
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
DBPATH = 'Sony_Reader/database/books.db'
|
DBPATH = 'Sony_Reader/database/books.db'
|
||||||
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
|
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
|
||||||
@ -170,7 +171,7 @@ class PRST1(USBMS):
|
|||||||
|
|
||||||
with closing(sqlite.connect(dbpath)) as connection:
|
with closing(sqlite.connect(dbpath)) as connection:
|
||||||
# Replace undecodable characters in the db instead of erroring out
|
# Replace undecodable characters in the db instead of erroring out
|
||||||
connection.text_factory = lambda x: unicode(x, "utf-8", "replace")
|
connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace")
|
||||||
|
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
# Query collections
|
# Query collections
|
||||||
|
@ -38,6 +38,7 @@ from calibre.utils.filenames import ascii_filename as sanitize, shorten_componen
|
|||||||
from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
|
from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
|
||||||
unpublish_zeroconf, get_all_ips)
|
unpublish_zeroconf, get_all_ips)
|
||||||
from calibre.utils.socket_inheritance import set_socket_inherit
|
from calibre.utils.socket_inheritance import set_socket_inherit
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def synchronous(tlockname):
|
def synchronous(tlockname):
|
||||||
@ -397,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
if isinstance(a, dict):
|
if isinstance(a, dict):
|
||||||
printable = {}
|
printable = {}
|
||||||
for k,v in a.iteritems():
|
for k,v in a.iteritems():
|
||||||
if isinstance(v, (str, unicode)) and len(v) > 50:
|
if isinstance(v, (str, unicode_type)) and len(v) > 50:
|
||||||
printable[k] = 'too long'
|
printable[k] = 'too long'
|
||||||
else:
|
else:
|
||||||
printable[k] = v
|
printable[k] = v
|
||||||
@ -418,14 +419,14 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
if not isinstance(dinfo, dict):
|
if not isinstance(dinfo, dict):
|
||||||
dinfo = {}
|
dinfo = {}
|
||||||
if dinfo.get('device_store_uuid', None) is None:
|
if dinfo.get('device_store_uuid', None) is None:
|
||||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
|
||||||
if dinfo.get('device_name') is None:
|
if dinfo.get('device_name') is None:
|
||||||
dinfo['device_name'] = self.get_gui_name()
|
dinfo['device_name'] = self.get_gui_name()
|
||||||
if name is not None:
|
if name is not None:
|
||||||
dinfo['device_name'] = name
|
dinfo['device_name'] = name
|
||||||
dinfo['location_code'] = location_code
|
dinfo['location_code'] = location_code
|
||||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
|
||||||
dinfo['date_last_connected'] = isoformat(now())
|
dinfo['date_last_connected'] = isoformat(now())
|
||||||
dinfo['prefix'] = self.PREFIX
|
dinfo['prefix'] = self.PREFIX
|
||||||
return dinfo
|
return dinfo
|
||||||
@ -478,7 +479,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
from calibre.library.save_to_disk import get_components
|
from calibre.library.save_to_disk import get_components
|
||||||
from calibre.library.save_to_disk import config
|
from calibre.library.save_to_disk import config
|
||||||
opts = config().parse()
|
opts = config().parse()
|
||||||
if not isinstance(template, unicode):
|
if not isinstance(template, unicode_type):
|
||||||
template = template.decode('utf-8')
|
template = template.decode('utf-8')
|
||||||
app_id = str(getattr(mdata, 'application_id', ''))
|
app_id = str(getattr(mdata, 'application_id', ''))
|
||||||
id_ = mdata.get('id', fname)
|
id_ = mdata.get('id', fname)
|
||||||
@ -726,7 +727,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
from calibre.utils.date import now, parse_date
|
from calibre.utils.date import now, parse_date
|
||||||
try:
|
try:
|
||||||
key = self._make_metadata_cache_key(uuid, ext_or_lpath)
|
key = self._make_metadata_cache_key(uuid, ext_or_lpath)
|
||||||
if isinstance(lastmod, unicode):
|
if isinstance(lastmod, unicode_type):
|
||||||
if lastmod == 'None':
|
if lastmod == 'None':
|
||||||
return None
|
return None
|
||||||
lastmod = parse_date(lastmod)
|
lastmod = parse_date(lastmod)
|
||||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import os, re
|
import os, re
|
||||||
|
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
def node_mountpoint(node):
|
def node_mountpoint(node):
|
||||||
|
|
||||||
@ -48,7 +49,7 @@ class UDisks(object):
|
|||||||
def mount(self, device_node_path):
|
def mount(self, device_node_path):
|
||||||
d = self.device(device_node_path)
|
d = self.device(device_node_path)
|
||||||
try:
|
try:
|
||||||
return unicode(d.FilesystemMount('',
|
return unicode_type(d.FilesystemMount('',
|
||||||
['auth_no_user_interaction', 'rw', 'noexec', 'nosuid',
|
['auth_no_user_interaction', 'rw', 'noexec', 'nosuid',
|
||||||
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]))
|
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]))
|
||||||
except:
|
except:
|
||||||
@ -131,7 +132,7 @@ class UDisks2(object):
|
|||||||
mount_options = ['rw', 'noexec', 'nosuid',
|
mount_options = ['rw', 'noexec', 'nosuid',
|
||||||
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]
|
'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]
|
||||||
try:
|
try:
|
||||||
return unicode(d.Mount(
|
return unicode_type(d.Mount(
|
||||||
{
|
{
|
||||||
'auth.no_user_interaction':True,
|
'auth.no_user_interaction':True,
|
||||||
'options':','.join(mount_options)
|
'options':','.join(mount_options)
|
||||||
|
@ -5,6 +5,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.utils.config_base import Config, ConfigProxy
|
from calibre.utils.config_base import Config, ConfigProxy
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class DeviceConfig(object):
|
class DeviceConfig(object):
|
||||||
@ -107,15 +108,15 @@ class DeviceConfig(object):
|
|||||||
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
|
if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
|
||||||
ec.append(config_widget.opt_extra_customization[i].isChecked())
|
ec.append(config_widget.opt_extra_customization[i].isChecked())
|
||||||
elif hasattr(config_widget.opt_extra_customization[i], 'currentText'):
|
elif hasattr(config_widget.opt_extra_customization[i], 'currentText'):
|
||||||
ec.append(unicode(config_widget.opt_extra_customization[i].currentText()).strip())
|
ec.append(unicode_type(config_widget.opt_extra_customization[i].currentText()).strip())
|
||||||
else:
|
else:
|
||||||
ec.append(unicode(config_widget.opt_extra_customization[i].text()).strip())
|
ec.append(unicode_type(config_widget.opt_extra_customization[i].text()).strip())
|
||||||
else:
|
else:
|
||||||
ec = unicode(config_widget.opt_extra_customization.text()).strip()
|
ec = unicode_type(config_widget.opt_extra_customization.text()).strip()
|
||||||
if not ec:
|
if not ec:
|
||||||
ec = None
|
ec = None
|
||||||
proxy['extra_customization'] = ec
|
proxy['extra_customization'] = ec
|
||||||
st = unicode(config_widget.opt_save_template.text())
|
st = unicode_type(config_widget.opt_save_template.text())
|
||||||
proxy['save_template'] = st
|
proxy['save_template'] = st
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -20,6 +20,7 @@ from calibre.devices.usbms.cli import CLI
|
|||||||
from calibre.devices.usbms.device import Device
|
from calibre.devices.usbms.device import Device
|
||||||
from calibre.devices.usbms.books import BookList, Book
|
from calibre.devices.usbms.books import BookList, Book
|
||||||
from calibre.ebooks.metadata.book.json_codec import JsonCodec
|
from calibre.ebooks.metadata.book.json_codec import JsonCodec
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
BASE_TIME = None
|
BASE_TIME = None
|
||||||
|
|
||||||
@ -105,14 +106,14 @@ class USBMS(CLI, Device):
|
|||||||
if not isinstance(dinfo, dict):
|
if not isinstance(dinfo, dict):
|
||||||
dinfo = {}
|
dinfo = {}
|
||||||
if dinfo.get('device_store_uuid', None) is None:
|
if dinfo.get('device_store_uuid', None) is None:
|
||||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
dinfo['device_store_uuid'] = unicode_type(uuid.uuid4())
|
||||||
if dinfo.get('device_name', None) is None:
|
if dinfo.get('device_name', None) is None:
|
||||||
dinfo['device_name'] = self.get_gui_name()
|
dinfo['device_name'] = self.get_gui_name()
|
||||||
if name is not None:
|
if name is not None:
|
||||||
dinfo['device_name'] = name
|
dinfo['device_name'] = name
|
||||||
dinfo['location_code'] = location_code
|
dinfo['location_code'] = location_code
|
||||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
dinfo['calibre_version'] = '.'.join([unicode_type(i) for i in numeric_version])
|
||||||
dinfo['date_last_connected'] = isoformat(now())
|
dinfo['date_last_connected'] = isoformat(now())
|
||||||
dinfo['prefix'] = prefix.replace('\\', '/')
|
dinfo['prefix'] = prefix.replace('\\', '/')
|
||||||
return dinfo
|
return dinfo
|
||||||
|
@ -11,6 +11,7 @@ import os, time, re
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from calibre.devices.errors import DeviceError, WrongDestinationError, FreeSpaceError
|
from calibre.devices.errors import DeviceError, WrongDestinationError, FreeSpaceError
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def sanity_check(on_card, files, card_prefixes, free_space):
|
def sanity_check(on_card, files, card_prefixes, free_space):
|
||||||
@ -97,7 +98,7 @@ def create_upload_path(mdata, fname, template, sanitize,
|
|||||||
ext = path_type.splitext(fname)[1]
|
ext = path_type.splitext(fname)[1]
|
||||||
|
|
||||||
opts = config().parse()
|
opts = config().parse()
|
||||||
if not isinstance(template, unicode):
|
if not isinstance(template, unicode_type):
|
||||||
template = template.decode('utf-8')
|
template = template.decode('utf-8')
|
||||||
app_id = str(getattr(mdata, 'application_id', ''))
|
app_id = str(getattr(mdata, 'application_id', ''))
|
||||||
id_ = mdata.get('id', fname)
|
id_ = mdata.get('id', fname)
|
||||||
|
@ -9,6 +9,7 @@ from various formats.
|
|||||||
|
|
||||||
import traceback, os, re
|
import traceback, os, re
|
||||||
from calibre import CurrentDir, prints
|
from calibre import CurrentDir, prints
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class ConversionError(Exception):
|
class ConversionError(Exception):
|
||||||
@ -113,7 +114,7 @@ def extract_calibre_cover(raw, base, log):
|
|||||||
if matches is None:
|
if matches is None:
|
||||||
body = soup.find('body')
|
body = soup.find('body')
|
||||||
if body is not None:
|
if body is not None:
|
||||||
text = u''.join(map(unicode, body.findAll(text=True)))
|
text = u''.join(map(unicode_type, body.findAll(text=True)))
|
||||||
if text.strip():
|
if text.strip():
|
||||||
# Body has text, abort
|
# Body has text, abort
|
||||||
return
|
return
|
||||||
@ -210,7 +211,7 @@ def check_ebook_format(stream, current_guess):
|
|||||||
|
|
||||||
|
|
||||||
def normalize(x):
|
def normalize(x):
|
||||||
if isinstance(x, unicode):
|
if isinstance(x, unicode_type):
|
||||||
import unicodedata
|
import unicodedata
|
||||||
x = unicodedata.normalize('NFC', x)
|
x = unicodedata.normalize('NFC', x)
|
||||||
return x
|
return x
|
||||||
|
@ -8,6 +8,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, codecs
|
import re, codecs
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
ENCODING_PATS = [
|
ENCODING_PATS = [
|
||||||
# XML declaration
|
# XML declaration
|
||||||
@ -92,7 +93,7 @@ def force_encoding(raw, verbose, assume_utf8=False):
|
|||||||
|
|
||||||
|
|
||||||
def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
|
def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
|
||||||
if not raw or isinstance(raw, unicode):
|
if not raw or isinstance(raw, unicode_type):
|
||||||
return raw, None
|
return raw, None
|
||||||
for x in ('utf8', 'utf-16-le', 'utf-16-be'):
|
for x in ('utf8', 'utf-16-le', 'utf-16-be'):
|
||||||
bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace(
|
bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace(
|
||||||
@ -135,7 +136,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
|||||||
return '', None
|
return '', None
|
||||||
raw, encoding = detect_xml_encoding(raw, verbose=verbose,
|
raw, encoding = detect_xml_encoding(raw, verbose=verbose,
|
||||||
assume_utf8=assume_utf8)
|
assume_utf8=assume_utf8)
|
||||||
if not isinstance(raw, unicode):
|
if not isinstance(raw, unicode_type):
|
||||||
raw = raw.decode(encoding, 'replace')
|
raw = raw.decode(encoding, 'replace')
|
||||||
|
|
||||||
if strip_encoding_pats:
|
if strip_encoding_pats:
|
||||||
|
@ -14,6 +14,7 @@ from calibre.utils.chm.chm import CHMFile
|
|||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
chmlib, chmlib_err = plugins['chmlib']
|
chmlib, chmlib_err = plugins['chmlib']
|
||||||
@ -48,7 +49,7 @@ class CHMReader(CHMFile):
|
|||||||
|
|
||||||
def __init__(self, input, log, input_encoding=None):
|
def __init__(self, input, log, input_encoding=None):
|
||||||
CHMFile.__init__(self)
|
CHMFile.__init__(self)
|
||||||
if isinstance(input, unicode):
|
if isinstance(input, unicode_type):
|
||||||
input = input.encode(filesystem_encoding)
|
input = input.encode(filesystem_encoding)
|
||||||
if not self.LoadCHM(input):
|
if not self.LoadCHM(input):
|
||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
@ -113,7 +114,7 @@ class CHMReader(CHMFile):
|
|||||||
enc = 'cp1252'
|
enc = 'cp1252'
|
||||||
for path in self.Contents():
|
for path in self.Contents():
|
||||||
fpath = path
|
fpath = path
|
||||||
if not isinstance(path, unicode):
|
if not isinstance(path, unicode_type):
|
||||||
fpath = path.decode(enc)
|
fpath = path.decode(enc)
|
||||||
lpath = os.path.join(output_dir, fpath)
|
lpath = os.path.join(output_dir, fpath)
|
||||||
self._ensure_dir(lpath)
|
self._ensure_dir(lpath)
|
||||||
@ -146,7 +147,7 @@ class CHMReader(CHMFile):
|
|||||||
with open(lpath, 'r+b') as f:
|
with open(lpath, 'r+b') as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
data = self._reformat(data, lpath)
|
data = self._reformat(data, lpath)
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode_type):
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
f.truncate()
|
f.truncate()
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory
|
|||||||
from calibre.utils.icu import numeric_sort_key
|
from calibre.utils.icu import numeric_sort_key
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
from calibre.utils.ipc.job import ParallelJob
|
from calibre.utils.ipc.job import ParallelJob
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
# If the specified screen has either dimension larger than this value, no image
|
# If the specified screen has either dimension larger than this value, no image
|
||||||
# rescaling is done (we assume that it is a tablet output profile)
|
# rescaling is done (we assume that it is a tablet output profile)
|
||||||
@ -27,7 +28,7 @@ def extract_comic(path_to_comic_file):
|
|||||||
Un-archive the comic file.
|
Un-archive the comic file.
|
||||||
'''
|
'''
|
||||||
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
|
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
|
||||||
if not isinstance(tdir, unicode):
|
if not isinstance(tdir, unicode_type):
|
||||||
# Needed in case the zip file has wrongly encoded unicode file/dir
|
# Needed in case the zip file has wrongly encoded unicode file/dir
|
||||||
# names
|
# names
|
||||||
tdir = tdir.decode(filesystem_encoding)
|
tdir = tdir.decode(filesystem_encoding)
|
||||||
@ -273,6 +274,3 @@ def process_pages(pages, opts, update, tdir):
|
|||||||
ans += pages
|
ans += pages
|
||||||
failures += failures_
|
failures += failures_
|
||||||
return ans, failures
|
return ans, failures
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from calibre.utils.lock import ExclusiveFile
|
|||||||
from calibre import sanitize_file_name
|
from calibre import sanitize_file_name
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre.customize.ui import available_output_formats
|
from calibre.customize.ui import available_output_formats
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
config_dir = os.path.join(config_dir, 'conversion')
|
config_dir = os.path.join(config_dir, 'conversion')
|
||||||
@ -85,7 +86,7 @@ class GuiRecommendations(dict):
|
|||||||
|
|
||||||
def serialize(self):
|
def serialize(self):
|
||||||
ans = json.dumps(self, indent=2, ensure_ascii=False)
|
ans = json.dumps(self, indent=2, ensure_ascii=False)
|
||||||
if isinstance(ans, unicode):
|
if isinstance(ans, unicode_type):
|
||||||
ans = ans.encode('utf-8')
|
ans = ans.encode('utf-8')
|
||||||
return b'json:' + ans
|
return b'json:' + ans
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import os
|
|||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class CHMInput(InputFormatPlugin):
|
class CHMInput(InputFormatPlugin):
|
||||||
@ -34,7 +35,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
log.debug('Processing CHM...')
|
log.debug('Processing CHM...')
|
||||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||||
if not isinstance(tdir, unicode):
|
if not isinstance(tdir, unicode_type):
|
||||||
tdir = tdir.decode(filesystem_encoding)
|
tdir = tdir.decode(filesystem_encoding)
|
||||||
html_input = plugin_for_input_format('html')
|
html_input = plugin_for_input_format('html')
|
||||||
for opt in html_input.options:
|
for opt in html_input.options:
|
||||||
@ -125,7 +126,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
base = os.path.dirname(os.path.abspath(htmlpath))
|
base = os.path.dirname(os.path.abspath(htmlpath))
|
||||||
|
|
||||||
def unquote(x):
|
def unquote(x):
|
||||||
if isinstance(x, unicode):
|
if isinstance(x, unicode_type):
|
||||||
x = x.encode('utf-8')
|
x = x.encode('utf-8')
|
||||||
return _unquote(x).decode('utf-8')
|
return _unquote(x).decode('utf-8')
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import os, re, posixpath
|
|||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
|
||||||
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
|
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
|
||||||
@ -367,7 +368,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
def add_from_li(li, parent):
|
def add_from_li(li, parent):
|
||||||
href = text = None
|
href = text = None
|
||||||
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
for x in li.iterchildren(XHTML('a'), XHTML('span')):
|
||||||
text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
|
||||||
href = x.get('href')
|
href = x.get('href')
|
||||||
if href:
|
if href:
|
||||||
if href.startswith('#'):
|
if href.startswith('#'):
|
||||||
|
@ -13,6 +13,7 @@ from calibre.customize.conversion import (OutputFormatPlugin,
|
|||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
block_level_tags = (
|
block_level_tags = (
|
||||||
'address',
|
'address',
|
||||||
@ -225,8 +226,8 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
identifiers = oeb.metadata['identifier']
|
identifiers = oeb.metadata['identifier']
|
||||||
uuid = None
|
uuid = None
|
||||||
for x in identifiers:
|
for x in identifiers:
|
||||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
|
||||||
uuid = unicode(x).split(':')[-1]
|
uuid = unicode_type(x).split(':')[-1]
|
||||||
break
|
break
|
||||||
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
||||||
|
|
||||||
@ -241,7 +242,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
# for some absurd reason, or it will throw a hissy fit and refuse
|
# for some absurd reason, or it will throw a hissy fit and refuse
|
||||||
# to use the obfuscated fonts.
|
# to use the obfuscated fonts.
|
||||||
for x in identifiers:
|
for x in identifiers:
|
||||||
if unicode(x) == uuid:
|
if unicode_type(x) == uuid:
|
||||||
x.content = 'urn:uuid:'+uuid
|
x.content = 'urn:uuid:'+uuid
|
||||||
|
|
||||||
with TemporaryDirectory(u'_epub_output') as tdir:
|
with TemporaryDirectory(u'_epub_output') as tdir:
|
||||||
@ -325,7 +326,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
fonts = []
|
fonts = []
|
||||||
for uri in list(uris.keys()):
|
for uri in list(uris.keys()):
|
||||||
path = uris[uri]
|
path = uris[uri]
|
||||||
if isinstance(path, unicode):
|
if isinstance(path, unicode_type):
|
||||||
path = path.encode(filesystem_encoding)
|
path = path.encode(filesystem_encoding)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
uris.pop(uri)
|
uris.pop(uri)
|
||||||
@ -339,7 +340,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
f.write(chr(ord(data[i]) ^ key[i%16]))
|
f.write(chr(ord(data[i]) ^ key[i%16]))
|
||||||
else:
|
else:
|
||||||
self.log.warn('Font', path, 'is invalid, ignoring')
|
self.log.warn('Font', path, 'is invalid, ignoring')
|
||||||
if not isinstance(uri, unicode):
|
if not isinstance(uri, unicode_type):
|
||||||
uri = uri.decode('utf-8')
|
uri = uri.decode('utf-8')
|
||||||
fonts.append(u'''
|
fonts.append(u'''
|
||||||
<enc:EncryptedData>
|
<enc:EncryptedData>
|
||||||
|
@ -8,6 +8,7 @@ import os, re
|
|||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
|
||||||
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
|
||||||
@ -70,7 +71,7 @@ class FB2Input(InputFormatPlugin):
|
|||||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||||
css = ''
|
css = ''
|
||||||
for s in stylesheets:
|
for s in stylesheets:
|
||||||
css += etree.tostring(s, encoding=unicode, method='text',
|
css += etree.tostring(s, encoding=unicode_type, method='text',
|
||||||
with_tail=False) + '\n\n'
|
with_tail=False) + '\n\n'
|
||||||
if css:
|
if css:
|
||||||
import css_parser, logging
|
import css_parser, logging
|
||||||
@ -82,7 +83,7 @@ class FB2Input(InputFormatPlugin):
|
|||||||
log.debug('Parsing stylesheet...')
|
log.debug('Parsing stylesheet...')
|
||||||
stylesheet = parser.parseString(text)
|
stylesheet = parser.parseString(text)
|
||||||
stylesheet.namespaces['h'] = XHTML_NS
|
stylesheet.namespaces['h'] = XHTML_NS
|
||||||
css = unicode(stylesheet.cssText).replace('h|style', 'h|span')
|
css = unicode_type(stylesheet.cssText).replace('h|style', 'h|span')
|
||||||
css = re.sub(r'name\s*=\s*', 'class=', css)
|
css = re.sub(r'name\s*=\s*', 'class=', css)
|
||||||
self.extract_embedded_content(doc)
|
self.extract_embedded_content(doc)
|
||||||
log.debug('Converting XML to HTML...')
|
log.debug('Converting XML to HTML...')
|
||||||
|
@ -17,6 +17,7 @@ from calibre.customize.conversion import (InputFormatPlugin,
|
|||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def sanitize_file_name(x):
|
def sanitize_file_name(x):
|
||||||
@ -225,7 +226,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def link_to_local_path(self, link_, base=None):
|
def link_to_local_path(self, link_, base=None):
|
||||||
from calibre.ebooks.html.input import Link
|
from calibre.ebooks.html.input import Link
|
||||||
if not isinstance(link_, unicode):
|
if not isinstance(link_, unicode_type):
|
||||||
try:
|
try:
|
||||||
link_ = link_.decode('utf-8', 'error')
|
link_ = link_.decode('utf-8', 'error')
|
||||||
except:
|
except:
|
||||||
@ -289,7 +290,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
# bhref refers to an already existing file. The read() method of
|
# bhref refers to an already existing file. The read() method of
|
||||||
# DirContainer will call unquote on it before trying to read the
|
# DirContainer will call unquote on it before trying to read the
|
||||||
# file, therefore we quote it here.
|
# file, therefore we quote it here.
|
||||||
if isinstance(bhref, unicode):
|
if isinstance(bhref, unicode_type):
|
||||||
bhref = bhref.encode('utf-8')
|
bhref = bhref.encode('utf-8')
|
||||||
item.html_input_href = quote(bhref).decode('utf-8')
|
item.html_input_href = quote(bhref).decode('utf-8')
|
||||||
if guessed in self.OEB_STYLES:
|
if guessed in self.OEB_STYLES:
|
||||||
|
@ -9,6 +9,7 @@ from os.path import dirname, abspath, relpath as _relpath, exists, basename
|
|||||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def relpath(*args):
|
def relpath(*args):
|
||||||
@ -135,7 +136,7 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
toc=html_toc, meta=meta, nextLink=nextLink,
|
toc=html_toc, meta=meta, nextLink=nextLink,
|
||||||
tocUrl=tocUrl, cssLink=cssLink,
|
tocUrl=tocUrl, cssLink=cssLink,
|
||||||
firstContentPageLink=nextLink)
|
firstContentPageLink=nextLink)
|
||||||
if isinstance(t, unicode):
|
if isinstance(t, unicode_type):
|
||||||
t = t.encode('utf-8')
|
t = t.encode('utf-8')
|
||||||
f.write(t)
|
f.write(t)
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from cStringIO import StringIO
|
|||||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
OptionRecommendation
|
OptionRecommendation
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class HTMLZOutput(OutputFormatPlugin):
|
class HTMLZOutput(OutputFormatPlugin):
|
||||||
@ -81,9 +82,9 @@ class HTMLZOutput(OutputFormatPlugin):
|
|||||||
fname = u'index'
|
fname = u'index'
|
||||||
if opts.htmlz_title_filename:
|
if opts.htmlz_title_filename:
|
||||||
from calibre.utils.filenames import shorten_components_to
|
from calibre.utils.filenames import shorten_components_to
|
||||||
fname = shorten_components_to(100, (ascii_filename(unicode(oeb_book.metadata.title[0])),))[0]
|
fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
|
||||||
with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
|
with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
|
||||||
if isinstance(html, unicode):
|
if isinstance(html, unicode_type):
|
||||||
html = html.encode('utf-8')
|
html = html.encode('utf-8')
|
||||||
tf.write(html)
|
tf.write(html)
|
||||||
|
|
||||||
@ -100,7 +101,7 @@ class HTMLZOutput(OutputFormatPlugin):
|
|||||||
for item in oeb_book.manifest:
|
for item in oeb_book.manifest:
|
||||||
if item.media_type in OEB_IMAGES and item.href in images:
|
if item.media_type in OEB_IMAGES and item.href in images:
|
||||||
if item.media_type == SVG_MIME:
|
if item.media_type == SVG_MIME:
|
||||||
data = unicode(etree.tostring(item.data, encoding=unicode))
|
data = unicode_type(etree.tostring(item.data, encoding=unicode_type))
|
||||||
else:
|
else:
|
||||||
data = item.data
|
data = item.data
|
||||||
fname = os.path.join(tdir, u'images', images[item.href])
|
fname = os.path.join(tdir, u'images', images[item.href])
|
||||||
|
@ -10,6 +10,7 @@ import sys, os
|
|||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class LRFOptions(object):
|
class LRFOptions(object):
|
||||||
@ -17,7 +18,7 @@ class LRFOptions(object):
|
|||||||
def __init__(self, output, opts, oeb):
|
def __init__(self, output, opts, oeb):
|
||||||
def f2s(f):
|
def f2s(f):
|
||||||
try:
|
try:
|
||||||
return unicode(f[0])
|
return unicode_type(f[0])
|
||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
m = oeb.metadata
|
m = oeb.metadata
|
||||||
@ -31,13 +32,13 @@ class LRFOptions(object):
|
|||||||
self.title_sort = self.author_sort = ''
|
self.title_sort = self.author_sort = ''
|
||||||
for x in m.creator:
|
for x in m.creator:
|
||||||
if x.role == 'aut':
|
if x.role == 'aut':
|
||||||
self.author = unicode(x)
|
self.author = unicode_type(x)
|
||||||
fa = unicode(getattr(x, 'file_as', ''))
|
fa = unicode_type(getattr(x, 'file_as', ''))
|
||||||
if fa:
|
if fa:
|
||||||
self.author_sort = fa
|
self.author_sort = fa
|
||||||
for x in m.title:
|
for x in m.title:
|
||||||
if unicode(x.file_as):
|
if unicode_type(x.file_as):
|
||||||
self.title_sort = unicode(x.file_as)
|
self.title_sort = unicode_type(x.file_as)
|
||||||
self.freetext = f2s(m.description)
|
self.freetext = f2s(m.description)
|
||||||
self.category = f2s(m.subject)
|
self.category = f2s(m.subject)
|
||||||
self.cover = None
|
self.cover = None
|
||||||
|
@ -6,6 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class MOBIInput(InputFormatPlugin):
|
class MOBIInput(InputFormatPlugin):
|
||||||
@ -49,7 +50,7 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
|
|
||||||
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
||||||
if raw:
|
if raw:
|
||||||
if isinstance(raw, unicode):
|
if isinstance(raw, unicode_type):
|
||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
open(u'debug-raw.html', 'wb').write(raw)
|
open(u'debug-raw.html', 'wb').write(raw)
|
||||||
from calibre.ebooks.oeb.base import close_self_closing_tags
|
from calibre.ebooks.oeb.base import close_self_closing_tags
|
||||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def remove_html_cover(oeb, log):
|
def remove_html_cover(oeb, log):
|
||||||
@ -121,7 +122,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
if not found:
|
if not found:
|
||||||
from calibre.ebooks import generate_masthead
|
from calibre.ebooks import generate_masthead
|
||||||
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
|
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
|
||||||
raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
|
raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
|
||||||
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
||||||
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
||||||
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
||||||
@ -165,7 +166,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
sec.nodes.remove(a)
|
sec.nodes.remove(a)
|
||||||
|
|
||||||
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
|
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
|
||||||
title=unicode(self.oeb.metadata.title[0]))
|
title=unicode_type(self.oeb.metadata.title[0]))
|
||||||
|
|
||||||
for s in sections:
|
for s in sections:
|
||||||
if articles[id(s)]:
|
if articles[id(s)]:
|
||||||
|
@ -14,6 +14,7 @@ from calibre.constants import iswindows
|
|||||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
UNITS = ['millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
|
UNITS = ['millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
|
||||||
'cicero', 'devicepixel']
|
'cicero', 'devicepixel']
|
||||||
@ -202,8 +203,8 @@ class PDFOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
def get_cover_data(self):
|
def get_cover_data(self):
|
||||||
oeb = self.oeb
|
oeb = self.oeb
|
||||||
if (oeb.metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
if (oeb.metadata.cover and unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||||
cover_id = unicode(oeb.metadata.cover[0])
|
cover_id = unicode_type(oeb.metadata.cover[0])
|
||||||
item = oeb.manifest.ids[cover_id]
|
item = oeb.manifest.ids[cover_id]
|
||||||
self.cover_data = item.data
|
self.cover_data = item.data
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ import os, cStringIO
|
|||||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class PMLOutput(OutputFormatPlugin):
|
class PMLOutput(OutputFormatPlugin):
|
||||||
@ -40,7 +41,7 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
with TemporaryDirectory('_pmlz_output') as tdir:
|
with TemporaryDirectory('_pmlz_output') as tdir:
|
||||||
pmlmlizer = PMLMLizer(log)
|
pmlmlizer = PMLMLizer(log)
|
||||||
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
|
pml = unicode_type(pmlmlizer.extract_content(oeb_book, opts))
|
||||||
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
||||||
out.write(pml.encode(opts.pml_output_encoding, 'replace'))
|
out.write(pml.encode(opts.pml_output_encoding, 'replace'))
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ import os
|
|||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre import walk
|
from calibre import walk
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class RecipeDisabled(Exception):
|
class RecipeDisabled(Exception):
|
||||||
@ -161,6 +162,6 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def save_download(self, zf):
|
def save_download(self, zf):
|
||||||
raw = self.recipe_source
|
raw = self.recipe_source
|
||||||
if isinstance(raw, unicode):
|
if isinstance(raw, unicode_type):
|
||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
zf.writestr('download.recipe', raw)
|
zf.writestr('download.recipe', raw)
|
||||||
|
@ -9,6 +9,7 @@ import os, string
|
|||||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class SNBOutput(OutputFormatPlugin):
|
class SNBOutput(OutputFormatPlugin):
|
||||||
@ -73,20 +74,20 @@ class SNBOutput(OutputFormatPlugin):
|
|||||||
# Process Meta data
|
# Process Meta data
|
||||||
meta = oeb_book.metadata
|
meta = oeb_book.metadata
|
||||||
if meta.title:
|
if meta.title:
|
||||||
title = unicode(meta.title[0])
|
title = unicode_type(meta.title[0])
|
||||||
else:
|
else:
|
||||||
title = ''
|
title = ''
|
||||||
authors = [unicode(x) for x in meta.creator if x.role == 'aut']
|
authors = [unicode_type(x) for x in meta.creator if x.role == 'aut']
|
||||||
if meta.publisher:
|
if meta.publisher:
|
||||||
publishers = unicode(meta.publisher[0])
|
publishers = unicode_type(meta.publisher[0])
|
||||||
else:
|
else:
|
||||||
publishers = ''
|
publishers = ''
|
||||||
if meta.language:
|
if meta.language:
|
||||||
lang = unicode(meta.language[0]).upper()
|
lang = unicode_type(meta.language[0]).upper()
|
||||||
else:
|
else:
|
||||||
lang = ''
|
lang = ''
|
||||||
if meta.description:
|
if meta.description:
|
||||||
abstract = unicode(meta.description[0])
|
abstract = unicode_type(meta.description[0])
|
||||||
else:
|
else:
|
||||||
abstract = ''
|
abstract = ''
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ from calibre.utils.zipfile import ZipFile
|
|||||||
from calibre import (extract, walk, isbytestring, filesystem_encoding,
|
from calibre import (extract, walk, isbytestring, filesystem_encoding,
|
||||||
get_types_map)
|
get_types_map)
|
||||||
from calibre.constants import __version__
|
from calibre.constants import __version__
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
DEBUG_README=u'''
|
DEBUG_README=u'''
|
||||||
This debug directory contains snapshots of the e-book as it passes through the
|
This debug directory contains snapshots of the e-book as it passes through the
|
||||||
@ -794,7 +795,7 @@ OptionRecommendation(name='search_replace',
|
|||||||
def unarchive(self, path, tdir):
|
def unarchive(self, path, tdir):
|
||||||
extract(path, tdir)
|
extract(path, tdir)
|
||||||
files = list(walk(tdir))
|
files = list(walk(tdir))
|
||||||
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
|
files = [f if isinstance(f, unicode_type) else f.decode(filesystem_encoding)
|
||||||
for f in files]
|
for f in files]
|
||||||
from calibre.customize.ui import available_input_formats
|
from calibre.customize.ui import available_input_formats
|
||||||
fmts = set(available_input_formats())
|
fmts = set(available_input_formats())
|
||||||
@ -915,7 +916,7 @@ OptionRecommendation(name='search_replace',
|
|||||||
try:
|
try:
|
||||||
val = parse_date(val, assume_utc=x=='timestamp')
|
val = parse_date(val, assume_utc=x=='timestamp')
|
||||||
except:
|
except:
|
||||||
self.log.exception(_('Failed to parse date/time') + ' ' + unicode(val))
|
self.log.exception(_('Failed to parse date/time') + ' ' + unicode_type(val))
|
||||||
continue
|
continue
|
||||||
setattr(mi, x, val)
|
setattr(mi, x, val)
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import functools, re, json
|
import functools, re, json
|
||||||
|
|
||||||
from calibre import entity_to_unicode, as_unicode
|
from calibre import entity_to_unicode, as_unicode
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
|
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
|
||||||
SVG_NS = 'http://www.w3.org/2000/svg'
|
SVG_NS = 'http://www.w3.org/2000/svg'
|
||||||
@ -218,8 +219,8 @@ class Dehyphenator(object):
|
|||||||
wraptags = match.group('wraptags')
|
wraptags = match.group('wraptags')
|
||||||
except:
|
except:
|
||||||
wraptags = ''
|
wraptags = ''
|
||||||
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
|
hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf)
|
||||||
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
|
dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf)
|
||||||
if self.suffixes.match(secondhalf) is None:
|
if self.suffixes.match(secondhalf) is None:
|
||||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||||
else:
|
else:
|
||||||
@ -315,7 +316,7 @@ class CSSPreProcessor(object):
|
|||||||
# are commented lines before the first @import or @charset rule. Since
|
# are commented lines before the first @import or @charset rule. Since
|
||||||
# the conversion will remove all stylesheets anyway, we don't lose
|
# the conversion will remove all stylesheets anyway, we don't lose
|
||||||
# anything
|
# anything
|
||||||
data = re.sub(unicode(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
|
data = re.sub(unicode_type(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
|
||||||
|
|
||||||
ans, namespaced = [], False
|
ans, namespaced = [], False
|
||||||
for line in data.splitlines():
|
for line in data.splitlines():
|
||||||
|
@ -10,6 +10,7 @@ from math import ceil
|
|||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.wordcount import get_wordcount_obj
|
from calibre.utils.wordcount import get_wordcount_obj
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class HeuristicProcessor(object):
|
class HeuristicProcessor(object):
|
||||||
@ -50,8 +51,8 @@ class HeuristicProcessor(object):
|
|||||||
title = match.group('title')
|
title = match.group('title')
|
||||||
if not title:
|
if not title:
|
||||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
|
||||||
" chapters. - " + unicode(chap))
|
" chapters. - " + unicode_type(chap))
|
||||||
return '<h2>'+chap+'</h2>\n'
|
return '<h2>'+chap+'</h2>\n'
|
||||||
else:
|
else:
|
||||||
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
|
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
|
||||||
@ -59,16 +60,16 @@ class HeuristicProcessor(object):
|
|||||||
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
|
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
|
||||||
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
|
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
|
||||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
|
||||||
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
|
" chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title))
|
||||||
return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
|
return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
|
||||||
|
|
||||||
def chapter_break(self, match):
|
def chapter_break(self, match):
|
||||||
chap = match.group('section')
|
chap = match.group('section')
|
||||||
styles = match.group('styles')
|
styles = match.group('styles')
|
||||||
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
self.html_preprocess_sections = self.html_preprocess_sections + 1
|
||||||
self.log.debug("marked " + unicode(self.html_preprocess_sections) +
|
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) +
|
||||||
" section markers based on punctuation. - " + unicode(chap))
|
" section markers based on punctuation. - " + unicode_type(chap))
|
||||||
return '<'+styles+' style="page-break-before:always">'+chap
|
return '<'+styles+' style="page-break-before:always">'+chap
|
||||||
|
|
||||||
def analyze_title_matches(self, match):
|
def analyze_title_matches(self, match):
|
||||||
@ -111,8 +112,8 @@ class HeuristicProcessor(object):
|
|||||||
line_end = line_end_ere.findall(raw)
|
line_end = line_end_ere.findall(raw)
|
||||||
tot_htm_ends = len(htm_end)
|
tot_htm_ends = len(htm_end)
|
||||||
tot_ln_fds = len(line_end)
|
tot_ln_fds = len(line_end)
|
||||||
# self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
|
# self.log.debug("There are " + unicode_type(tot_ln_fds) + " total Line feeds, and " +
|
||||||
# unicode(tot_htm_ends) + " marked up endings")
|
# unicode_type(tot_htm_ends) + " marked up endings")
|
||||||
|
|
||||||
if percent > 1:
|
if percent > 1:
|
||||||
percent = 1
|
percent = 1
|
||||||
@ -120,7 +121,7 @@ class HeuristicProcessor(object):
|
|||||||
percent = 0
|
percent = 0
|
||||||
|
|
||||||
min_lns = tot_ln_fds * percent
|
min_lns = tot_ln_fds * percent
|
||||||
# self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
|
# self.log.debug("There must be fewer than " + unicode_type(min_lns) + " unmarked lines to add markup")
|
||||||
return min_lns > tot_htm_ends
|
return min_lns > tot_htm_ends
|
||||||
|
|
||||||
def dump(self, raw, where):
|
def dump(self, raw, where):
|
||||||
@ -157,17 +158,17 @@ class HeuristicProcessor(object):
|
|||||||
]
|
]
|
||||||
|
|
||||||
ITALICIZE_STYLE_PATS = [
|
ITALICIZE_STYLE_PATS = [
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/'),
|
||||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'),
|
unicode_type(r'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'),
|
||||||
]
|
]
|
||||||
|
|
||||||
for word in ITALICIZE_WORDS:
|
for word in ITALICIZE_WORDS:
|
||||||
@ -177,10 +178,10 @@ class HeuristicProcessor(object):
|
|||||||
search_text = re.sub(r'<[^>]*>', '', search_text)
|
search_text = re.sub(r'<[^>]*>', '', search_text)
|
||||||
for pat in ITALICIZE_STYLE_PATS:
|
for pat in ITALICIZE_STYLE_PATS:
|
||||||
for match in re.finditer(pat, search_text):
|
for match in re.finditer(pat, search_text):
|
||||||
ital_string = unicode(match.group('words'))
|
ital_string = unicode_type(match.group('words'))
|
||||||
# self.log.debug("italicising "+unicode(match.group(0))+" with <i>"+ital_string+"</i>")
|
# self.log.debug("italicising "+unicode_type(match.group(0))+" with <i>"+ital_string+"</i>")
|
||||||
try:
|
try:
|
||||||
html = re.sub(re.escape(unicode(match.group(0))), '<i>%s</i>' % ital_string, html)
|
html = re.sub(re.escape(unicode_type(match.group(0))), '<i>%s</i>' % ital_string, html)
|
||||||
except OverflowError:
|
except OverflowError:
|
||||||
# match.group(0) was too large to be compiled into a regex
|
# match.group(0) was too large to be compiled into a regex
|
||||||
continue
|
continue
|
||||||
@ -205,10 +206,10 @@ class HeuristicProcessor(object):
|
|||||||
if wordcount > 200000:
|
if wordcount > 200000:
|
||||||
typical_chapters = 15000.
|
typical_chapters = 15000.
|
||||||
self.min_chapters = int(ceil(wordcount / typical_chapters))
|
self.min_chapters = int(ceil(wordcount / typical_chapters))
|
||||||
self.log.debug("minimum chapters required are: "+unicode(self.min_chapters))
|
self.log.debug("minimum chapters required are: "+unicode_type(self.min_chapters))
|
||||||
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
||||||
self.html_preprocess_sections = len(heading.findall(html))
|
self.html_preprocess_sections = len(heading.findall(html))
|
||||||
self.log.debug("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
|
self.log.debug("found " + unicode_type(self.html_preprocess_sections) + " pre-existing headings")
|
||||||
|
|
||||||
# Build the Regular Expressions in pieces
|
# Build the Regular Expressions in pieces
|
||||||
init_lookahead = "(?=<(p|div))"
|
init_lookahead = "(?=<(p|div))"
|
||||||
@ -295,7 +296,7 @@ class HeuristicProcessor(object):
|
|||||||
if n_lookahead_req:
|
if n_lookahead_req:
|
||||||
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
|
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
|
||||||
if not analyze:
|
if not analyze:
|
||||||
self.log.debug("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
|
self.log.debug("Marked " + unicode_type(self.html_preprocess_sections) + " headings, " + log_message)
|
||||||
|
|
||||||
chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \
|
chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \
|
||||||
lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
|
lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
|
||||||
@ -308,9 +309,9 @@ class HeuristicProcessor(object):
|
|||||||
if float(self.chapters_with_title) / float(hits) > .5:
|
if float(self.chapters_with_title) / float(hits) > .5:
|
||||||
title_req = True
|
title_req = True
|
||||||
strict_title = False
|
strict_title = False
|
||||||
self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+
|
self.log.debug(unicode_type(type_name)+" had "+unicode_type(hits)+" hits - "+unicode_type(self.chapters_no_title)+" chapters with no title, "+
|
||||||
unicode(self.chapters_with_title)+" chapters with titles, "+
|
unicode_type(self.chapters_with_title)+" chapters with titles, "+
|
||||||
unicode(float(self.chapters_with_title) / float(hits))+" percent. ")
|
unicode_type(float(self.chapters_with_title) / float(hits))+" percent. ")
|
||||||
if type_name == 'common':
|
if type_name == 'common':
|
||||||
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
|
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
|
||||||
elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
|
elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
|
||||||
@ -327,8 +328,8 @@ class HeuristicProcessor(object):
|
|||||||
words_per_chptr = wordcount
|
words_per_chptr = wordcount
|
||||||
if words_per_chptr > 0 and self.html_preprocess_sections > 0:
|
if words_per_chptr > 0 and self.html_preprocess_sections > 0:
|
||||||
words_per_chptr = wordcount / self.html_preprocess_sections
|
words_per_chptr = wordcount / self.html_preprocess_sections
|
||||||
self.log.debug("Total wordcount is: "+ unicode(wordcount)+", Average words per section is: "+
|
self.log.debug("Total wordcount is: "+ unicode_type(wordcount)+", Average words per section is: "+
|
||||||
unicode(words_per_chptr)+", Marked up "+unicode(self.html_preprocess_sections)+" chapters")
|
unicode_type(words_per_chptr)+", Marked up "+unicode_type(self.html_preprocess_sections)+" chapters")
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def punctuation_unwrap(self, length, content, format):
|
def punctuation_unwrap(self, length, content, format):
|
||||||
@ -358,8 +359,8 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
# define the pieces of the regex
|
# define the pieces of the regex
|
||||||
# (?<!\&\w{4});) is a semicolon not part of an entity
|
# (?<!\&\w{4});) is a semicolon not part of an entity
|
||||||
lookahead = "(?<=.{"+unicode(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
lookahead = "(?<=.{"+unicode_type(length)+u"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IA\u00DF]|(?<!\\&\\w{4});))"
|
||||||
em_en_lookahead = "(?<=.{"+unicode(length)+u"}[\u2013\u2014])"
|
em_en_lookahead = "(?<=.{"+unicode_type(length)+u"}[\u2013\u2014])"
|
||||||
soft_hyphen = u"\xad"
|
soft_hyphen = u"\xad"
|
||||||
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
|
||||||
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
|
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
|
||||||
@ -419,18 +420,18 @@ class HeuristicProcessor(object):
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
def fix_nbsp_indents(self, html):
|
def fix_nbsp_indents(self, html):
|
||||||
txtindent = re.compile(unicode(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
|
txtindent = re.compile(unicode_type(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
|
||||||
html = txtindent.sub(self.insert_indent, html)
|
html = txtindent.sub(self.insert_indent, html)
|
||||||
if self.found_indents > 1:
|
if self.found_indents > 1:
|
||||||
self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
|
self.log.debug("replaced "+unicode_type(self.found_indents)+ " nbsp indents with inline styles")
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def cleanup_markup(self, html):
|
def cleanup_markup(self, html):
|
||||||
# remove remaining non-breaking spaces
|
# remove remaining non-breaking spaces
|
||||||
html = re.sub(unicode(r'\u00a0'), ' ', html)
|
html = re.sub(unicode_type(r'\u00a0'), ' ', html)
|
||||||
# Get rid of various common microsoft specific tags which can cause issues later
|
# Get rid of various common microsoft specific tags which can cause issues later
|
||||||
# Get rid of empty <o:p> tags to simplify other processing
|
# Get rid of empty <o:p> tags to simplify other processing
|
||||||
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
html = re.sub(unicode_type(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
||||||
# Delete microsoft 'smart' tags
|
# Delete microsoft 'smart' tags
|
||||||
html = re.sub('(?i)</?st1:\\w+>', '', html)
|
html = re.sub('(?i)</?st1:\\w+>', '', html)
|
||||||
# Re-open self closing paragraph tags
|
# Re-open self closing paragraph tags
|
||||||
@ -470,8 +471,8 @@ class HeuristicProcessor(object):
|
|||||||
blanklines = self.blankreg.findall(html)
|
blanklines = self.blankreg.findall(html)
|
||||||
lines = self.linereg.findall(html)
|
lines = self.linereg.findall(html)
|
||||||
if len(lines) > 1:
|
if len(lines) > 1:
|
||||||
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
|
self.log.debug("There are " + unicode_type(len(blanklines)) + " blank lines. " +
|
||||||
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
unicode_type(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
||||||
|
|
||||||
if float(len(blanklines)) / float(len(lines)) > 0.40:
|
if float(len(blanklines)) / float(len(lines)) > 0.40:
|
||||||
return True
|
return True
|
||||||
@ -493,11 +494,11 @@ class HeuristicProcessor(object):
|
|||||||
lines = float(len(self.single_blank.findall(to_merge))) - 1.
|
lines = float(len(self.single_blank.findall(to_merge))) - 1.
|
||||||
em = base_em + (em_per_line * lines)
|
em = base_em + (em_per_line * lines)
|
||||||
if to_merge.find('whitespace'):
|
if to_merge.find('whitespace'):
|
||||||
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode(int(em * 10))+
|
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode_type(int(em * 10))+
|
||||||
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0))
|
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
|
||||||
else:
|
else:
|
||||||
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode(int(em * 10))+
|
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode_type(int(em * 10))+
|
||||||
'" style="text-align:center; margin-top:'+unicode(em)+'em"> </p>', match.group(0))
|
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0))
|
||||||
return newline
|
return newline
|
||||||
|
|
||||||
html = self.any_multi_blank.sub(merge_matches, html)
|
html = self.any_multi_blank.sub(merge_matches, html)
|
||||||
@ -518,9 +519,9 @@ class HeuristicProcessor(object):
|
|||||||
top_margin = ''
|
top_margin = ''
|
||||||
bottom_margin = ''
|
bottom_margin = ''
|
||||||
if initblanks is not None:
|
if initblanks is not None:
|
||||||
top_margin = 'margin-top:'+unicode(len(self.single_blank.findall(initblanks)))+'em;'
|
top_margin = 'margin-top:'+unicode_type(len(self.single_blank.findall(initblanks)))+'em;'
|
||||||
if endblanks is not None:
|
if endblanks is not None:
|
||||||
bottom_margin = 'margin-bottom:'+unicode(len(self.single_blank.findall(endblanks)))+'em;'
|
bottom_margin = 'margin-bottom:'+unicode_type(len(self.single_blank.findall(endblanks)))+'em;'
|
||||||
|
|
||||||
if initblanks is None and endblanks is None:
|
if initblanks is None and endblanks is None:
|
||||||
return content
|
return content
|
||||||
@ -597,7 +598,7 @@ class HeuristicProcessor(object):
|
|||||||
else:
|
else:
|
||||||
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
|
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
|
||||||
divpercent = (100 - width) / 2
|
divpercent = (100 - width) / 2
|
||||||
hr_open = re.sub('45', unicode(divpercent), hr_open)
|
hr_open = re.sub('45', unicode_type(divpercent), hr_open)
|
||||||
scene_break = hr_open+replacement_break+'</div>'
|
scene_break = hr_open+replacement_break+'</div>'
|
||||||
else:
|
else:
|
||||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||||
@ -657,12 +658,12 @@ class HeuristicProcessor(object):
|
|||||||
else:
|
else:
|
||||||
styles = match.group('styles').split(';')
|
styles = match.group('styles').split(';')
|
||||||
is_paragraph = self.check_paragraph(content)
|
is_paragraph = self.check_paragraph(content)
|
||||||
# print "styles for this line are: "+unicode(styles)
|
# print "styles for this line are: "+unicode_type(styles)
|
||||||
split_styles = []
|
split_styles = []
|
||||||
for style in styles:
|
for style in styles:
|
||||||
# print "style is: "+unicode(style)
|
# print "style is: "+unicode_type(style)
|
||||||
newstyle = style.split(':')
|
newstyle = style.split(':')
|
||||||
# print "newstyle is: "+unicode(newstyle)
|
# print "newstyle is: "+unicode_type(newstyle)
|
||||||
split_styles.append(newstyle)
|
split_styles.append(newstyle)
|
||||||
styles = split_styles
|
styles = split_styles
|
||||||
for style, setting in styles:
|
for style, setting in styles:
|
||||||
@ -673,7 +674,7 @@ class HeuristicProcessor(object):
|
|||||||
if 9 < setting < 14:
|
if 9 < setting < 14:
|
||||||
text_indent = indented_text
|
text_indent = indented_text
|
||||||
else:
|
else:
|
||||||
text_indent = style+':'+unicode(setting)+'pt;'
|
text_indent = style+':'+unicode_type(setting)+'pt;'
|
||||||
if style == 'padding':
|
if style == 'padding':
|
||||||
setting = re.sub('pt', '', setting).split(' ')
|
setting = re.sub('pt', '', setting).split(' ')
|
||||||
if int(setting[1]) < 16 and int(setting[3]) < 16:
|
if int(setting[1]) < 16 and int(setting[3]) < 16:
|
||||||
@ -694,23 +695,23 @@ class HeuristicProcessor(object):
|
|||||||
blockquote_open_loop = blockquote_open
|
blockquote_open_loop = blockquote_open
|
||||||
if debugabby:
|
if debugabby:
|
||||||
self.log.debug('\n\n******\n')
|
self.log.debug('\n\n******\n')
|
||||||
self.log.debug('padding top is: '+unicode(setting[0]))
|
self.log.debug('padding top is: '+unicode_type(setting[0]))
|
||||||
self.log.debug('padding right is:' +unicode(setting[1]))
|
self.log.debug('padding right is:' +unicode_type(setting[1]))
|
||||||
self.log.debug('padding bottom is: ' + unicode(setting[2]))
|
self.log.debug('padding bottom is: ' + unicode_type(setting[2]))
|
||||||
self.log.debug('padding left is: ' +unicode(setting[3]))
|
self.log.debug('padding left is: ' +unicode_type(setting[3]))
|
||||||
|
|
||||||
# print "text-align is: "+unicode(text_align)
|
# print "text-align is: "+unicode_type(text_align)
|
||||||
# print "\n***\nline is:\n "+unicode(match.group(0))+'\n'
|
# print "\n***\nline is:\n "+unicode_type(match.group(0))+'\n'
|
||||||
if debugabby:
|
if debugabby:
|
||||||
# print "this line is a paragraph = "+unicode(is_paragraph)+", previous line was "+unicode(self.previous_was_paragraph)
|
# print "this line is a paragraph = "+unicode_type(is_paragraph)+", previous line was "+unicode_type(self.previous_was_paragraph)
|
||||||
self.log.debug("styles for this line were:", styles)
|
self.log.debug("styles for this line were:", styles)
|
||||||
self.log.debug('newline is:')
|
self.log.debug('newline is:')
|
||||||
self.log.debug(blockquote_open_loop+blockquote_close_loop+
|
self.log.debug(blockquote_open_loop+blockquote_close_loop+
|
||||||
paragraph_before+'<p style="'+text_indent+text_align+
|
paragraph_before+'<p style="'+text_indent+text_align+
|
||||||
'">'+content+'</p>'+paragraph_after+'\n\n\n\n\n')
|
'">'+content+'</p>'+paragraph_after+'\n\n\n\n\n')
|
||||||
# print "is_paragraph is "+unicode(is_paragraph)+", previous_was_paragraph is "+unicode(self.previous_was_paragraph)
|
# print "is_paragraph is "+unicode_type(is_paragraph)+", previous_was_paragraph is "+unicode_type(self.previous_was_paragraph)
|
||||||
self.previous_was_paragraph = is_paragraph
|
self.previous_was_paragraph = is_paragraph
|
||||||
# print "previous_was_paragraph is now set to "+unicode(self.previous_was_paragraph)+"\n\n\n"
|
# print "previous_was_paragraph is now set to "+unicode_type(self.previous_was_paragraph)+"\n\n\n"
|
||||||
return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after
|
return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after
|
||||||
|
|
||||||
html = abbyy_line.sub(convert_styles, html)
|
html = abbyy_line.sub(convert_styles, html)
|
||||||
@ -793,12 +794,12 @@ class HeuristicProcessor(object):
|
|||||||
# more of the lines break in the same region of the document then unwrapping is required
|
# more of the lines break in the same region of the document then unwrapping is required
|
||||||
docanalysis = DocAnalysis(format, html)
|
docanalysis = DocAnalysis(format, html)
|
||||||
hardbreaks = docanalysis.line_histogram(.50)
|
hardbreaks = docanalysis.line_histogram(.50)
|
||||||
self.log.debug("Hard line breaks check returned "+unicode(hardbreaks))
|
self.log.debug("Hard line breaks check returned "+unicode_type(hardbreaks))
|
||||||
|
|
||||||
# Calculate Length
|
# Calculate Length
|
||||||
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
||||||
length = docanalysis.line_length(unwrap_factor)
|
length = docanalysis.line_length(unwrap_factor)
|
||||||
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
|
self.log.debug("Median line length is " + unicode_type(length) + ", calculated with " + format + " format")
|
||||||
|
|
||||||
# ##### Unwrap lines ######
|
# ##### Unwrap lines ######
|
||||||
if getattr(self.extra_opts, 'unwrap_lines', False):
|
if getattr(self.extra_opts, 'unwrap_lines', False):
|
||||||
@ -820,7 +821,7 @@ class HeuristicProcessor(object):
|
|||||||
# If still no sections after unwrapping mark split points on lines with no punctuation
|
# If still no sections after unwrapping mark split points on lines with no punctuation
|
||||||
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||||
self.log.debug("Looking for more split points based on punctuation,"
|
self.log.debug("Looking for more split points based on punctuation,"
|
||||||
" currently have " + unicode(self.html_preprocess_sections))
|
" currently have " + unicode_type(self.html_preprocess_sections))
|
||||||
chapdetect3 = re.compile(
|
chapdetect3 = re.compile(
|
||||||
r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) # noqa
|
r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) # noqa
|
||||||
html = chapdetect3.sub(self.chapter_break, html)
|
html = chapdetect3.sub(self.chapter_break, html)
|
||||||
|
@ -20,6 +20,7 @@ from calibre.utils.localization import canonicalize_lang
|
|||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def fromstring(raw, parser=RECOVER_PARSER):
|
def fromstring(raw, parser=RECOVER_PARSER):
|
||||||
@ -56,7 +57,7 @@ def read_doc_props(raw, mi, XPath):
|
|||||||
|
|
||||||
desc = XPath('//dc:description')(root)
|
desc = XPath('//dc:description')(root)
|
||||||
if desc:
|
if desc:
|
||||||
raw = etree.tostring(desc[0], method='text', encoding=unicode)
|
raw = etree.tostring(desc[0], method='text', encoding=unicode_type)
|
||||||
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
|
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
|
||||||
mi.comments = raw.strip()
|
mi.comments = raw.strip()
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ from calibre.utils.filenames import ascii_filename
|
|||||||
from calibre.utils.fonts.scanner import font_scanner, NoFonts
|
from calibre.utils.fonts.scanner import font_scanner, NoFonts
|
||||||
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
|
from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
|
||||||
from calibre.utils.icu import ord_string
|
from calibre.utils.icu import ord_string
|
||||||
|
from polyglot.builtins import codepoint_to_chr
|
||||||
|
|
||||||
Embed = namedtuple('Embed', 'name key subsetted')
|
Embed = namedtuple('Embed', 'name key subsetted')
|
||||||
|
|
||||||
@ -124,7 +125,7 @@ def do_map(m, points):
|
|||||||
if base < p < limit:
|
if base < p < limit:
|
||||||
yield m[p - base]
|
yield m[p - base]
|
||||||
else:
|
else:
|
||||||
yield unichr(p)
|
yield codepoint_to_chr(p)
|
||||||
|
|
||||||
|
|
||||||
def map_symbol_text(text, font):
|
def map_symbol_text(text, font):
|
||||||
|
@ -11,6 +11,7 @@ from operator import itemgetter
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.utils.icu import partition_by_first_letter, sort_key
|
from calibre.utils.icu import partition_by_first_letter, sort_key
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def get_applicable_xe_fields(index, xe_fields, XPath, expand):
|
def get_applicable_xe_fields(index, xe_fields, XPath, expand):
|
||||||
@ -246,7 +247,7 @@ def polish_index_markup(index, blocks):
|
|||||||
a = block.xpath('descendant::a[1]')
|
a = block.xpath('descendant::a[1]')
|
||||||
text = ''
|
text = ''
|
||||||
if a:
|
if a:
|
||||||
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode).strip()
|
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip()
|
||||||
if ':' in text:
|
if ':' in text:
|
||||||
path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
|
path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
|
@ -504,8 +504,6 @@ class Table(object):
|
|||||||
|
|
||||||
def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
|
def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
|
||||||
cs = CellStyle(self.namespace)
|
cs = CellStyle(self.namespace)
|
||||||
# from lxml.etree import tostring
|
|
||||||
# txt = tostring(tc, method='text', encoding=unicode)
|
|
||||||
for o in overrides:
|
for o in overrides:
|
||||||
if o in self.overrides:
|
if o in self.overrides:
|
||||||
ovr = self.overrides[o]
|
ovr = self.overrides[o]
|
||||||
@ -699,4 +697,3 @@ class Tables(object):
|
|||||||
table = self.para_map.get(p, None)
|
table = self.para_map.get(p, None)
|
||||||
if table is not None:
|
if table is not None:
|
||||||
return table.style_map.get(p, (None, None))[1]
|
return table.style_map.get(p, (None, None))[1]
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from lxml.etree import tostring
|
|||||||
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def from_headings(body, log, namespace):
|
def from_headings(body, log, namespace):
|
||||||
@ -93,7 +94,7 @@ def link_to_txt(a, styles, object_map):
|
|||||||
if rs.css.get('display', None) == 'none':
|
if rs.css.get('display', None) == 'none':
|
||||||
a.remove(child)
|
a.remove(child)
|
||||||
|
|
||||||
return tostring(a, method='text', with_tail=False, encoding=unicode).strip()
|
return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip()
|
||||||
|
|
||||||
|
|
||||||
def from_toc(docx, link_map, styles, object_map, log, namespace):
|
def from_toc(docx, link_map, styles, object_map, log, namespace):
|
||||||
|
@ -19,6 +19,7 @@ from calibre.ebooks.docx.writer.lists import ListsManager
|
|||||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||||
from calibre.ebooks.oeb.base import XPath, barename
|
from calibre.ebooks.oeb.base import XPath, barename
|
||||||
from calibre.utils.localization import lang_as_iso639_1
|
from calibre.utils.localization import lang_as_iso639_1
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def lang_for_tag(tag):
|
def lang_for_tag(tag):
|
||||||
@ -439,8 +440,8 @@ class Convert(object):
|
|||||||
if self.add_toc:
|
if self.add_toc:
|
||||||
self.links_manager.process_toc_links(self.oeb)
|
self.links_manager.process_toc_links(self.oeb)
|
||||||
|
|
||||||
if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
||||||
cover_id = unicode(self.oeb.metadata.cover[0])
|
cover_id = unicode_type(self.oeb.metadata.cover[0])
|
||||||
item = self.oeb.manifest.ids[cover_id]
|
item = self.oeb.manifest.ids[cover_id]
|
||||||
self.cover_img = self.images_manager.read_image(item.href)
|
self.cover_img = self.images_manager.read_image(item.href)
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ from lxml import etree
|
|||||||
from calibre.ebooks import parse_css_length
|
from calibre.ebooks import parse_css_length
|
||||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||||
from calibre.utils.localization import lang_as_iso639_1
|
from calibre.utils.localization import lang_as_iso639_1
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
from tinycss.css21 import CSS21Parser
|
from tinycss.css21 import CSS21Parser
|
||||||
|
|
||||||
css_parser = CSS21Parser()
|
css_parser = CSS21Parser()
|
||||||
@ -45,7 +46,7 @@ def bmap(x):
|
|||||||
|
|
||||||
|
|
||||||
def is_dropcaps(html_tag, tag_style):
|
def is_dropcaps(html_tag, tag_style):
|
||||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||||
|
|
||||||
|
|
||||||
class CombinedStyle(object):
|
class CombinedStyle(object):
|
||||||
|
@ -10,6 +10,7 @@ import unittest
|
|||||||
from polyglot.builtins import map
|
from polyglot.builtins import map
|
||||||
|
|
||||||
from calibre.ebooks.epub.cfi.parse import parser, cfi_sort_key, decode_cfi
|
from calibre.ebooks.epub.cfi.parse import parser, cfi_sort_key, decode_cfi
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class Tests(unittest.TestCase):
|
class Tests(unittest.TestCase):
|
||||||
@ -60,7 +61,7 @@ class Tests(unittest.TestCase):
|
|||||||
if after is not None:
|
if after is not None:
|
||||||
ta['after'] = after
|
ta['after'] = after
|
||||||
if params:
|
if params:
|
||||||
ta['params'] = {unicode(k):(v,) if isinstance(v, unicode) else v for k, v in params.iteritems()}
|
ta['params'] = {unicode_type(k):(v,) if isinstance(v, unicode_type) else v for k, v in params.iteritems()}
|
||||||
if ta:
|
if ta:
|
||||||
step['text_assertion'] = ta
|
step['text_assertion'] = ta
|
||||||
return ans
|
return ans
|
||||||
|
@ -11,6 +11,7 @@ import time
|
|||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
from calibre import strftime, prepare_string_for_xml as xml
|
from calibre import strftime, prepare_string_for_xml as xml
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
SONY_METADATA = u'''\
|
SONY_METADATA = u'''\
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
@ -81,21 +82,21 @@ SONY_ATOM_ENTRY = u'''\
|
|||||||
|
|
||||||
def sony_metadata(oeb):
|
def sony_metadata(oeb):
|
||||||
m = oeb.metadata
|
m = oeb.metadata
|
||||||
title = short_title = unicode(m.title[0])
|
title = short_title = unicode_type(m.title[0])
|
||||||
publisher = __appname__ + ' ' + __version__
|
publisher = __appname__ + ' ' + __version__
|
||||||
try:
|
try:
|
||||||
pt = unicode(oeb.metadata.publication_type[0])
|
pt = unicode_type(oeb.metadata.publication_type[0])
|
||||||
short_title = u':'.join(pt.split(':')[2:])
|
short_title = u':'.join(pt.split(':')[2:])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
date = parse_date(unicode(m.date[0]),
|
date = parse_date(unicode_type(m.date[0]),
|
||||||
as_utc=False).strftime('%Y-%m-%d')
|
as_utc=False).strftime('%Y-%m-%d')
|
||||||
except:
|
except:
|
||||||
date = strftime('%Y-%m-%d')
|
date = strftime('%Y-%m-%d')
|
||||||
try:
|
try:
|
||||||
language = unicode(m.language[0]).replace('_', '-')
|
language = unicode_type(m.language[0]).replace('_', '-')
|
||||||
except:
|
except:
|
||||||
language = 'en'
|
language = 'en'
|
||||||
short_title = xml(short_title, True)
|
short_title = xml(short_title, True)
|
||||||
@ -113,7 +114,7 @@ def sony_metadata(oeb):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
base_id = unicode(list(filter(cal_id, m.identifier))[0])
|
base_id = unicode_type(list(filter(cal_id, m.identifier))[0])
|
||||||
except:
|
except:
|
||||||
base_id = str(uuid4())
|
base_id = str(uuid4())
|
||||||
|
|
||||||
@ -128,7 +129,7 @@ def sony_metadata(oeb):
|
|||||||
for x in toc:
|
for x in toc:
|
||||||
section.nodes.append(x)
|
section.nodes.append(x)
|
||||||
toc = TOC(klass='periodical', href=oeb.spine[2].href,
|
toc = TOC(klass='periodical', href=oeb.spine[2].href,
|
||||||
title=unicode(oeb.metadata.title[0]))
|
title=unicode_type(oeb.metadata.title[0]))
|
||||||
toc.nodes.append(section)
|
toc.nodes.append(section)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
@ -188,4 +189,3 @@ def sony_metadata(oeb):
|
|||||||
id=xml(base_id)).encode('utf-8')
|
id=xml(base_id)).encode('utf-8')
|
||||||
|
|
||||||
return metadata, atom
|
return metadata, atom
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ from calibre.constants import __appname__, __version__
|
|||||||
from calibre.utils.localization import lang_as_iso639_1
|
from calibre.utils.localization import lang_as_iso639_1
|
||||||
from calibre.utils.img import save_cover_data_to
|
from calibre.utils.img import save_cover_data_to
|
||||||
from calibre.ebooks.oeb.base import urlnormalize
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class FB2MLizer(object):
|
class FB2MLizer(object):
|
||||||
@ -64,7 +65,7 @@ class FB2MLizer(object):
|
|||||||
output = self.clean_text(u''.join(output))
|
output = self.clean_text(u''.join(output))
|
||||||
|
|
||||||
if self.opts.pretty_print:
|
if self.opts.pretty_print:
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True)
|
||||||
else:
|
else:
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
||||||
|
|
||||||
@ -140,7 +141,7 @@ class FB2MLizer(object):
|
|||||||
metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
|
metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
|
||||||
|
|
||||||
metadata['keywords'] = u''
|
metadata['keywords'] = u''
|
||||||
tags = list(map(unicode, self.oeb_book.metadata.subject))
|
tags = list(map(unicode_type, self.oeb_book.metadata.subject))
|
||||||
if tags:
|
if tags:
|
||||||
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
|
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
|
||||||
metadata['keywords'] = '<keywords>%s</keywords>'%tags
|
metadata['keywords'] = '<keywords>%s</keywords>'%tags
|
||||||
@ -155,8 +156,8 @@ class FB2MLizer(object):
|
|||||||
year = publisher = isbn = u''
|
year = publisher = isbn = u''
|
||||||
identifiers = self.oeb_book.metadata['identifier']
|
identifiers = self.oeb_book.metadata['identifier']
|
||||||
for x in identifiers:
|
for x in identifiers:
|
||||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'):
|
||||||
metadata['id'] = unicode(x).split(':')[-1]
|
metadata['id'] = unicode_type(x).split(':')[-1]
|
||||||
break
|
break
|
||||||
if metadata['id'] is None:
|
if metadata['id'] is None:
|
||||||
self.log.warn('No UUID identifier found')
|
self.log.warn('No UUID identifier found')
|
||||||
@ -229,8 +230,8 @@ class FB2MLizer(object):
|
|||||||
cover_href = None
|
cover_href = None
|
||||||
|
|
||||||
# Get the raster cover if it's available.
|
# Get the raster cover if it's available.
|
||||||
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
|
if self.oeb_book.metadata.cover and unicode_type(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
|
||||||
id = unicode(self.oeb_book.metadata.cover[0])
|
id = unicode_type(self.oeb_book.metadata.cover[0])
|
||||||
cover_item = self.oeb_book.manifest.ids[id]
|
cover_item = self.oeb_book.manifest.ids[id]
|
||||||
if cover_item.media_type in OEB_RASTER_IMAGES:
|
if cover_item.media_type in OEB_RASTER_IMAGES:
|
||||||
cover_href = cover_item.href
|
cover_href = cover_item.href
|
||||||
|
@ -19,6 +19,7 @@ from calibre.ebooks.oeb.base import urlunquote
|
|||||||
from calibre.ebooks.chardet import detect_xml_encoding
|
from calibre.ebooks.chardet import detect_xml_encoding
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
from calibre import unicode_path, as_unicode, replace_entities
|
from calibre import unicode_path, as_unicode, replace_entities
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class Link(object):
|
class Link(object):
|
||||||
@ -46,7 +47,7 @@ class Link(object):
|
|||||||
:param base: The base directory that relative URLs are with respect to.
|
:param base: The base directory that relative URLs are with respect to.
|
||||||
Must be a unicode string.
|
Must be a unicode string.
|
||||||
'''
|
'''
|
||||||
assert isinstance(url, unicode) and isinstance(base, unicode)
|
assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
|
||||||
self.url = url
|
self.url = url
|
||||||
self.parsed_url = urlparse(self.url)
|
self.parsed_url = urlparse(self.url)
|
||||||
self.is_local = self.parsed_url.scheme in ('', 'file')
|
self.is_local = self.parsed_url.scheme in ('', 'file')
|
||||||
@ -248,6 +249,3 @@ def get_filelist(htmlfile, dir, opts, log):
|
|||||||
for f in filelist:
|
for f in filelist:
|
||||||
log.debug('\t\t', f)
|
log.debug('\t\t', f)
|
||||||
return filelist
|
return filelist
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ import textwrap, os, glob
|
|||||||
|
|
||||||
from calibre.customize import FileTypePlugin
|
from calibre.customize import FileTypePlugin
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class HTML2ZIP(FileTypePlugin):
|
class HTML2ZIP(FileTypePlugin):
|
||||||
@ -114,10 +115,9 @@ every time you add an HTML file to the library.\
|
|||||||
config_dialog.exec_()
|
config_dialog.exec_()
|
||||||
|
|
||||||
if config_dialog.result() == QDialog.Accepted:
|
if config_dialog.result() == QDialog.Accepted:
|
||||||
sc = unicode(sc.text()).strip()
|
sc = unicode_type(sc.text()).strip()
|
||||||
if bf.isChecked():
|
if bf.isChecked():
|
||||||
sc += '|bf'
|
sc += '|bf'
|
||||||
customize_plugin(self, sc)
|
customize_plugin(self, sc)
|
||||||
|
|
||||||
return config_dialog.result()
|
return config_dialog.result()
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import (
|
|||||||
XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
|
XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
|
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
|
||||||
|
|
||||||
@ -46,7 +47,7 @@ class OEB2HTML(object):
|
|||||||
self.log.info('Converting OEB book to HTML...')
|
self.log.info('Converting OEB book to HTML...')
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
try:
|
try:
|
||||||
self.book_title = unicode(oeb_book.metadata.title[0])
|
self.book_title = unicode_type(oeb_book.metadata.title[0])
|
||||||
except Exception:
|
except Exception:
|
||||||
self.book_title = _('Unknown')
|
self.book_title = _('Unknown')
|
||||||
self.links = {}
|
self.links = {}
|
||||||
|
@ -22,6 +22,7 @@ from calibre.ebooks.oeb.base import urlnormalize, xpath
|
|||||||
from calibre.ebooks.oeb.reader import OEBReader
|
from calibre.ebooks.oeb.reader import OEBReader
|
||||||
from calibre.ebooks import DRMError
|
from calibre.ebooks import DRMError
|
||||||
from calibre import plugins
|
from calibre import plugins
|
||||||
|
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||||
|
|
||||||
lzx, lxzerror = plugins['lzx']
|
lzx, lxzerror = plugins['lzx']
|
||||||
msdes, msdeserror = plugins['msdes']
|
msdes, msdeserror = plugins['msdes']
|
||||||
@ -110,7 +111,7 @@ def read_utf8_char(bytes, pos):
|
|||||||
raise LitError(
|
raise LitError(
|
||||||
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
|
'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i]))
|
||||||
c = (c << 6) | (b & 0x3F)
|
c = (c << 6) | (b & 0x3F)
|
||||||
return unichr(c), pos+elsize
|
return codepoint_to_chr(c), pos+elsize
|
||||||
|
|
||||||
|
|
||||||
def consume_sized_utf8_string(bytes, zpad=False):
|
def consume_sized_utf8_string(bytes, zpad=False):
|
||||||
@ -125,7 +126,7 @@ def consume_sized_utf8_string(bytes, zpad=False):
|
|||||||
|
|
||||||
|
|
||||||
def encode(string):
|
def encode(string):
|
||||||
return unicode(string).encode('ascii', 'xmlcharrefreplace')
|
return unicode_type(string).encode('ascii', 'xmlcharrefreplace')
|
||||||
|
|
||||||
|
|
||||||
class UnBinary(object):
|
class UnBinary(object):
|
||||||
@ -243,9 +244,9 @@ class UnBinary(object):
|
|||||||
else:
|
else:
|
||||||
dynamic_tag += 1
|
dynamic_tag += 1
|
||||||
errors += 1
|
errors += 1
|
||||||
tag_name = '?'+unichr(tag)+'?'
|
tag_name = '?'+codepoint_to_chr(tag)+'?'
|
||||||
current_map = self.tag_to_attr_map[tag]
|
current_map = self.tag_to_attr_map[tag]
|
||||||
print('WARNING: tag %s unknown' % unichr(tag))
|
print('WARNING: tag %s unknown' % codepoint_to_chr(tag))
|
||||||
buf.write(encode(tag_name))
|
buf.write(encode(tag_name))
|
||||||
elif flags & FLAG_CLOSING:
|
elif flags & FLAG_CLOSING:
|
||||||
if depth == 0:
|
if depth == 0:
|
||||||
@ -947,4 +948,3 @@ class LitReader(OEBReader):
|
|||||||
item.media_type = 'application/xhtml+xml'
|
item.media_type = 'application/xhtml+xml'
|
||||||
item.data = item._parse_xhtml(etree.tostring(item.data))
|
item.data = item._parse_xhtml(etree.tostring(item.data))
|
||||||
super(LitReader, self)._spine_from_opf(opf)
|
super(LitReader, self)._spine_from_opf(opf)
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ import calibre
|
|||||||
from calibre import plugins
|
from calibre import plugins
|
||||||
msdes, msdeserror = plugins['msdes']
|
msdes, msdeserror = plugins['msdes']
|
||||||
import calibre.ebooks.lit.mssha1 as mssha1
|
import calibre.ebooks.lit.mssha1 as mssha1
|
||||||
|
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||||
|
|
||||||
__all__ = ['LitWriter']
|
__all__ = ['LitWriter']
|
||||||
|
|
||||||
@ -163,9 +164,9 @@ class ReBinary(object):
|
|||||||
for value in values:
|
for value in values:
|
||||||
if isinstance(value, (int, long)):
|
if isinstance(value, (int, long)):
|
||||||
try:
|
try:
|
||||||
value = unichr(value)
|
value = codepoint_to_chr(value)
|
||||||
except OverflowError:
|
except OverflowError:
|
||||||
self.logger.warn('Unicode overflow for integer:', value)
|
self.logger.warn('unicode_type overflow for integer:', value)
|
||||||
value = u'?'
|
value = u'?'
|
||||||
self.buf.write(value.encode('utf-8'))
|
self.buf.write(value.encode('utf-8'))
|
||||||
|
|
||||||
@ -216,9 +217,9 @@ class ReBinary(object):
|
|||||||
path, frag = urldefrag(value)
|
path, frag = urldefrag(value)
|
||||||
if self.item:
|
if self.item:
|
||||||
path = self.item.abshref(path)
|
path = self.item.abshref(path)
|
||||||
prefix = unichr(3)
|
prefix = codepoint_to_chr(3)
|
||||||
if path in self.manifest.hrefs:
|
if path in self.manifest.hrefs:
|
||||||
prefix = unichr(2)
|
prefix = codepoint_to_chr(2)
|
||||||
value = self.manifest.hrefs[path].id
|
value = self.manifest.hrefs[path].id
|
||||||
if frag:
|
if frag:
|
||||||
value = '#'.join((value, frag))
|
value = '#'.join((value, frag))
|
||||||
@ -281,9 +282,9 @@ class ReBinary(object):
|
|||||||
self.logger.warn("More than six anchors in file %r. "
|
self.logger.warn("More than six anchors in file %r. "
|
||||||
"Some links may not work properly." % self.item.href)
|
"Some links may not work properly." % self.item.href)
|
||||||
data = StringIO()
|
data = StringIO()
|
||||||
data.write(unichr(len(self.anchors)).encode('utf-8'))
|
data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8'))
|
||||||
for anchor, offset in self.anchors:
|
for anchor, offset in self.anchors:
|
||||||
data.write(unichr(len(anchor)).encode('utf-8'))
|
data.write(codepoint_to_chr(len(anchor)).encode('utf-8'))
|
||||||
data.write(anchor)
|
data.write(anchor)
|
||||||
data.write(pack('<I', offset))
|
data.write(pack('<I', offset))
|
||||||
return data.getvalue()
|
return data.getvalue()
|
||||||
@ -313,7 +314,7 @@ class LitWriter(object):
|
|||||||
oeb.metadata.add('calibre-version', calibre.__version__)
|
oeb.metadata.add('calibre-version', calibre.__version__)
|
||||||
cover = None
|
cover = None
|
||||||
if oeb.metadata.cover:
|
if oeb.metadata.cover:
|
||||||
id = unicode(oeb.metadata.cover[0])
|
id = unicode_type(oeb.metadata.cover[0])
|
||||||
cover = oeb.manifest.ids[id]
|
cover = oeb.manifest.ids[id]
|
||||||
for type, title in ALL_MS_COVER_TYPES:
|
for type, title in ALL_MS_COVER_TYPES:
|
||||||
if type not in oeb.guide:
|
if type not in oeb.guide:
|
||||||
@ -485,7 +486,7 @@ class LitWriter(object):
|
|||||||
data = rebin.content
|
data = rebin.content
|
||||||
name = name + '/content'
|
name = name + '/content'
|
||||||
secnum = 1
|
secnum = 1
|
||||||
elif isinstance(data, unicode):
|
elif isinstance(data, unicode_type):
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
elif hasattr(data, 'cssText'):
|
elif hasattr(data, 'cssText'):
|
||||||
data = str(item)
|
data = str(item)
|
||||||
@ -521,9 +522,9 @@ class LitWriter(object):
|
|||||||
item.offset = offset \
|
item.offset = offset \
|
||||||
if state in ('linear', 'nonlinear') else 0
|
if state in ('linear', 'nonlinear') else 0
|
||||||
data.write(pack('<I', item.offset))
|
data.write(pack('<I', item.offset))
|
||||||
entry = [unichr(len(id)), unicode(id),
|
entry = [codepoint_to_chr(len(id)), unicode_type(id),
|
||||||
unichr(len(href)), unicode(href),
|
codepoint_to_chr(len(href)), unicode_type(href),
|
||||||
unichr(len(media_type)), unicode(media_type)]
|
codepoint_to_chr(len(media_type)), unicode_type(media_type)]
|
||||||
for value in entry:
|
for value in entry:
|
||||||
data.write(value.encode('utf-8'))
|
data.write(value.encode('utf-8'))
|
||||||
data.write('\0')
|
data.write('\0')
|
||||||
|
@ -36,6 +36,7 @@ from calibre.ptempfile import PersistentTemporaryFile
|
|||||||
from calibre.devices.interface import DevicePlugin as Device
|
from calibre.devices.interface import DevicePlugin as Device
|
||||||
from calibre.ebooks.lrf.html.color_map import lrs_color
|
from calibre.ebooks.lrf.html.color_map import lrs_color
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def update_css(ncss, ocss):
|
def update_css(ncss, ocss):
|
||||||
@ -54,10 +55,10 @@ def munge_paths(basepath, url):
|
|||||||
if not path:
|
if not path:
|
||||||
path = basepath
|
path = basepath
|
||||||
elif not os.path.isabs(path):
|
elif not os.path.isabs(path):
|
||||||
if isinstance(path, unicode):
|
if isinstance(path, unicode_type):
|
||||||
path = path.encode(sys.getfilesystemencoding())
|
path = path.encode(sys.getfilesystemencoding())
|
||||||
dn = os.path.dirname(basepath)
|
dn = os.path.dirname(basepath)
|
||||||
if isinstance(dn, unicode):
|
if isinstance(dn, unicode_type):
|
||||||
dn = dn.encode(sys.getfilesystemencoding())
|
dn = dn.encode(sys.getfilesystemencoding())
|
||||||
path = os.path.join(dn, path)
|
path = os.path.join(dn, path)
|
||||||
return os.path.normpath(path), fragment
|
return os.path.normpath(path), fragment
|
||||||
@ -272,7 +273,7 @@ class HTMLConverter(object):
|
|||||||
update_css(npcss, self.override_pcss)
|
update_css(npcss, self.override_pcss)
|
||||||
|
|
||||||
paths = [os.path.abspath(path) for path in paths]
|
paths = [os.path.abspath(path) for path in paths]
|
||||||
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode) else path for path in paths]
|
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
|
||||||
|
|
||||||
while len(paths) > 0 and self.link_level <= self.link_levels:
|
while len(paths) > 0 and self.link_level <= self.link_levels:
|
||||||
for path in paths:
|
for path in paths:
|
||||||
@ -336,7 +337,7 @@ class HTMLConverter(object):
|
|||||||
markupMassage=nmassage)
|
markupMassage=nmassage)
|
||||||
except ConversionError as err:
|
except ConversionError as err:
|
||||||
if 'Failed to coerce to unicode' in str(err):
|
if 'Failed to coerce to unicode' in str(err):
|
||||||
raw = unicode(raw, 'utf8', 'replace')
|
raw = unicode_type(raw, 'utf8', 'replace')
|
||||||
soup = BeautifulSoup(raw,
|
soup = BeautifulSoup(raw,
|
||||||
convertEntities=BeautifulSoup.XHTML_ENTITIES,
|
convertEntities=BeautifulSoup.XHTML_ENTITIES,
|
||||||
markupMassage=nmassage)
|
markupMassage=nmassage)
|
||||||
@ -359,7 +360,7 @@ class HTMLConverter(object):
|
|||||||
os.makedirs(tdir)
|
os.makedirs(tdir)
|
||||||
try:
|
try:
|
||||||
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
|
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
|
||||||
dump.write(unicode(soup).encode('utf-8'))
|
dump.write(unicode_type(soup).encode('utf-8'))
|
||||||
self.log.info(_('Written preprocessed HTML to ')+dump.name)
|
self.log.info(_('Written preprocessed HTML to ')+dump.name)
|
||||||
dump.close()
|
dump.close()
|
||||||
except:
|
except:
|
||||||
@ -394,7 +395,7 @@ class HTMLConverter(object):
|
|||||||
self.log.info(_('\tConverting to BBeB...'))
|
self.log.info(_('\tConverting to BBeB...'))
|
||||||
self.current_style = {}
|
self.current_style = {}
|
||||||
self.page_break_found = False
|
self.page_break_found = False
|
||||||
if not isinstance(path, unicode):
|
if not isinstance(path, unicode_type):
|
||||||
path = path.decode(sys.getfilesystemencoding())
|
path = path.decode(sys.getfilesystemencoding())
|
||||||
self.target_prefix = path
|
self.target_prefix = path
|
||||||
self.previous_text = '\n'
|
self.previous_text = '\n'
|
||||||
@ -589,7 +590,7 @@ class HTMLConverter(object):
|
|||||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||||
continue
|
continue
|
||||||
if isinstance(c, NavigableString):
|
if isinstance(c, NavigableString):
|
||||||
text += unicode(c)
|
text += unicode_type(c)
|
||||||
elif isinstance(c, Tag):
|
elif isinstance(c, Tag):
|
||||||
if c.name.lower() == 'img' and c.has_key('alt'): # noqa
|
if c.name.lower() == 'img' and c.has_key('alt'): # noqa
|
||||||
alt_text += c['alt']
|
alt_text += c['alt']
|
||||||
@ -644,7 +645,7 @@ class HTMLConverter(object):
|
|||||||
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
|
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
|
||||||
ascii_text = text
|
ascii_text = text
|
||||||
|
|
||||||
if not isinstance(path, unicode):
|
if not isinstance(path, unicode_type):
|
||||||
path = path.decode(sys.getfilesystemencoding())
|
path = path.decode(sys.getfilesystemencoding())
|
||||||
if path in self.processed_files:
|
if path in self.processed_files:
|
||||||
if path+fragment in self.targets.keys():
|
if path+fragment in self.targets.keys():
|
||||||
@ -1323,7 +1324,7 @@ class HTMLConverter(object):
|
|||||||
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
||||||
if 'em' in tag_css['text-indent']:
|
if 'em' in tag_css['text-indent']:
|
||||||
bl = '10pt'
|
bl = '10pt'
|
||||||
indent = self.unit_convert(unicode(tag_css['text-indent']), pts=True, base_length=bl)
|
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
|
||||||
if not indent:
|
if not indent:
|
||||||
indent = 0
|
indent = 0
|
||||||
if indent > 0 and indent < 10 * self.minimum_indent:
|
if indent > 0 and indent < 10 * self.minimum_indent:
|
||||||
@ -1482,7 +1483,7 @@ class HTMLConverter(object):
|
|||||||
enc = sys.getfilesystemencoding()
|
enc = sys.getfilesystemencoding()
|
||||||
if not enc:
|
if not enc:
|
||||||
enc = 'utf8'
|
enc = 'utf8'
|
||||||
if isinstance(path, unicode):
|
if isinstance(path, unicode_type):
|
||||||
path = path.encode(enc, 'replace')
|
path = path.encode(enc, 'replace')
|
||||||
if os.access(path, os.R_OK) and os.path.isfile(path):
|
if os.access(path, os.R_OK) and os.path.isfile(path):
|
||||||
if ext in ['png', 'jpg', 'bmp', 'jpeg']:
|
if ext in ['png', 'jpg', 'bmp', 'jpeg']:
|
||||||
@ -1526,7 +1527,7 @@ class HTMLConverter(object):
|
|||||||
elif tagname in ['style', 'link']:
|
elif tagname in ['style', 'link']:
|
||||||
ncss, npcss = {}, {}
|
ncss, npcss = {}, {}
|
||||||
if tagname == 'style':
|
if tagname == 'style':
|
||||||
text = ''.join([unicode(i) for i in tag.findAll(text=True)])
|
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
|
||||||
css, pcss = self.parse_css(text)
|
css, pcss = self.parse_css(text)
|
||||||
ncss.update(css)
|
ncss.update(css)
|
||||||
npcss.update(pcss)
|
npcss.update(pcss)
|
||||||
@ -1559,7 +1560,7 @@ class HTMLConverter(object):
|
|||||||
if tag.contents:
|
if tag.contents:
|
||||||
c = tag.contents[0]
|
c = tag.contents[0]
|
||||||
if isinstance(c, NavigableString):
|
if isinstance(c, NavigableString):
|
||||||
c = unicode(c).replace('\r\n', '\n').replace('\r', '\n')
|
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
|
||||||
if c.startswith('\n'):
|
if c.startswith('\n'):
|
||||||
c = c[1:]
|
c = c[1:]
|
||||||
tag.contents[0] = NavigableString(c)
|
tag.contents[0] = NavigableString(c)
|
||||||
@ -1759,7 +1760,7 @@ class HTMLConverter(object):
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
|
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
|
||||||
self.log.exception('')
|
self.log.exception('')
|
||||||
self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300])
|
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
|
||||||
self.in_table = False
|
self.in_table = False
|
||||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||||
finally:
|
finally:
|
||||||
@ -1810,7 +1811,7 @@ class HTMLConverter(object):
|
|||||||
|
|
||||||
|
|
||||||
def process_file(path, options, logger):
|
def process_file(path, options, logger):
|
||||||
if not isinstance(path, unicode):
|
if not isinstance(path, unicode_type):
|
||||||
path = path.decode(sys.getfilesystemencoding())
|
path = path.decode(sys.getfilesystemencoding())
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
||||||
@ -1857,9 +1858,9 @@ def process_file(path, options, logger):
|
|||||||
|
|
||||||
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
|
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
|
||||||
val = getattr(options, prop, None)
|
val = getattr(options, prop, None)
|
||||||
if val and not isinstance(val, unicode):
|
if val and not isinstance(val, unicode_type):
|
||||||
soup = BeautifulSoup(val)
|
soup = BeautifulSoup(val)
|
||||||
setattr(options, prop, unicode(soup))
|
setattr(options, prop, unicode_type(soup))
|
||||||
|
|
||||||
title = (options.title, options.title_sort)
|
title = (options.title, options.title_sort)
|
||||||
author = (options.author, options.author_sort)
|
author = (options.author, options.author_sort)
|
||||||
@ -1903,7 +1904,7 @@ def process_file(path, options, logger):
|
|||||||
options.force_page_break = fpb
|
options.force_page_break = fpb
|
||||||
options.link_exclude = le
|
options.link_exclude = le
|
||||||
options.page_break = pb
|
options.page_break = pb
|
||||||
if not isinstance(options.chapter_regex, unicode):
|
if not isinstance(options.chapter_regex, unicode_type):
|
||||||
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
|
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
|
||||||
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
|
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
|
||||||
fpba = options.force_page_break_attr.split(',')
|
fpba = options.force_page_break_attr.split(',')
|
||||||
|
@ -11,6 +11,8 @@ from PyQt5.Qt import QUrl, QApplication, QSize, QEventLoop, \
|
|||||||
QPainter, QImage, QObject, Qt
|
QPainter, QImage, QObject, Qt
|
||||||
from PyQt5.QtWebKitWidgets import QWebPage
|
from PyQt5.QtWebKitWidgets import QWebPage
|
||||||
|
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class HTMLTableRenderer(QObject):
|
class HTMLTableRenderer(QObject):
|
||||||
|
|
||||||
@ -67,7 +69,7 @@ class HTMLTableRenderer(QObject):
|
|||||||
def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
|
def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
|
||||||
head = ''
|
head = ''
|
||||||
for e in soup.findAll(['link', 'style']):
|
for e in soup.findAll(['link', 'style']):
|
||||||
head += unicode(e)+'\n\n'
|
head += unicode_type(e)+'\n\n'
|
||||||
style = ''
|
style = ''
|
||||||
for key, val in css.items():
|
for key, val in css.items():
|
||||||
style += key + ':%s;'%val
|
style += key + ':%s;'%val
|
||||||
@ -83,7 +85,7 @@ def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
|
|||||||
%s
|
%s
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
'''%(head, width-10, style, unicode(table))
|
'''%(head, width-10, style, unicode_type(table))
|
||||||
images, tdir = do_render(html, base_dir, width, height, dpi, factor)
|
images, tdir = do_render(html, base_dir, width, height, dpi, factor)
|
||||||
atexit.register(shutil.rmtree, tdir)
|
atexit.register(shutil.rmtree, tdir)
|
||||||
return images
|
return images
|
||||||
|
@ -10,6 +10,7 @@ from calibre.utils.filenames import ascii_filename
|
|||||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||||
from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
|
from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
|
||||||
Font, Text, TOCObject, BookAttr, ruby_tags
|
Font, Text, TOCObject, BookAttr, ruby_tags
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class LRFDocument(LRFMetaFile):
|
class LRFDocument(LRFMetaFile):
|
||||||
@ -112,7 +113,7 @@ class LRFDocument(LRFMetaFile):
|
|||||||
pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
|
pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
|
||||||
close = u'</PageTree>\n'
|
close = u'</PageTree>\n'
|
||||||
for page in page_tree:
|
for page in page_tree:
|
||||||
pages += unicode(page)
|
pages += unicode_type(page)
|
||||||
pages += close
|
pages += close
|
||||||
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
|
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
|
||||||
|
|
||||||
@ -125,9 +126,9 @@ class LRFDocument(LRFMetaFile):
|
|||||||
if isinstance(obj, (Font, Text, TOCObject)):
|
if isinstance(obj, (Font, Text, TOCObject)):
|
||||||
continue
|
continue
|
||||||
if isinstance(obj, StyleObject):
|
if isinstance(obj, StyleObject):
|
||||||
styles += unicode(obj)
|
styles += unicode_type(obj)
|
||||||
else:
|
else:
|
||||||
objects += unicode(obj)
|
objects += unicode_type(obj)
|
||||||
styles += '</Style>\n'
|
styles += '</Style>\n'
|
||||||
objects += '</Objects>\n'
|
objects += '</Objects>\n'
|
||||||
if write_files:
|
if write_files:
|
||||||
|
@ -20,6 +20,7 @@ import xml.dom.minidom as dom
|
|||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
|
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
|
||||||
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
|
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
|
||||||
@ -195,8 +196,8 @@ class xml_field(object):
|
|||||||
|
|
||||||
if not val:
|
if not val:
|
||||||
val = u''
|
val = u''
|
||||||
if type(val).__name__ != 'unicode':
|
if isinstance(val, unicode_type):
|
||||||
val = unicode(val, 'utf-8')
|
val = unicode_type(val, 'utf-8')
|
||||||
|
|
||||||
elems = document.getElementsByTagName(self.tag_name)
|
elems = document.getElementsByTagName(self.tag_name)
|
||||||
elem = None
|
elem = None
|
||||||
|
@ -6,6 +6,7 @@ import struct, array, zlib, cStringIO, collections, re
|
|||||||
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
||||||
from calibre import entity_to_unicode, prepare_string_for_xml
|
from calibre import entity_to_unicode, prepare_string_for_xml
|
||||||
from calibre.ebooks.lrf.tags import Tag
|
from calibre.ebooks.lrf.tags import Tag
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
ruby_tags = {
|
ruby_tags = {
|
||||||
0xF575: ['rubyAlignAndAdjust', 'W'],
|
0xF575: ['rubyAlignAndAdjust', 'W'],
|
||||||
@ -88,10 +89,10 @@ class LRFObject(object):
|
|||||||
yield i
|
yield i
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return unicode(self.__class__.__name__)
|
return unicode_type(self.__class__.__name__)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode(self).encode('utf-8')
|
return unicode_type(self).encode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
class LRFContentObject(LRFObject):
|
class LRFContentObject(LRFObject):
|
||||||
@ -255,7 +256,7 @@ class Color(object):
|
|||||||
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
|
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode(self)
|
return unicode_type(self)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return 4
|
return 4
|
||||||
@ -274,7 +275,7 @@ class EmptyPageElement(object):
|
|||||||
yield i
|
yield i
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode(self)
|
return unicode_type(self)
|
||||||
|
|
||||||
|
|
||||||
class PageDiv(EmptyPageElement):
|
class PageDiv(EmptyPageElement):
|
||||||
@ -429,12 +430,12 @@ class Page(LRFStream):
|
|||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
|
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
|
||||||
for i in self:
|
for i in self:
|
||||||
s += unicode(i)
|
s += unicode_type(i)
|
||||||
s += '\n</Page>\n'
|
s += '\n</Page>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode(self)
|
return unicode_type(self)
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
s = u''
|
s = u''
|
||||||
@ -619,7 +620,7 @@ class Block(LRFStream, TextCSS):
|
|||||||
s += '%s="%s" '%(attr, self.attrs[attr])
|
s += '%s="%s" '%(attr, self.attrs[attr])
|
||||||
if self.name != 'ImageBlock':
|
if self.name != 'ImageBlock':
|
||||||
s = s.rstrip()+'>\n'
|
s = s.rstrip()+'>\n'
|
||||||
s += unicode(self.content)
|
s += unicode_type(self.content)
|
||||||
s += '</%s>\n'%(self.name,)
|
s += '</%s>\n'%(self.name,)
|
||||||
return s
|
return s
|
||||||
return s.rstrip() + ' />\n'
|
return s.rstrip() + ' />\n'
|
||||||
@ -717,7 +718,7 @@ class Text(LRFStream):
|
|||||||
lineposition_map = {1:'before', 2:'after'}
|
lineposition_map = {1:'before', 2:'after'}
|
||||||
|
|
||||||
def add_text(self, text):
|
def add_text(self, text):
|
||||||
s = unicode(text, "utf-16-le")
|
s = unicode_type(text, "utf-16-le")
|
||||||
if s:
|
if s:
|
||||||
s = s.translate(self.text_map)
|
s = s.translate(self.text_map)
|
||||||
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
|
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
|
||||||
@ -888,7 +889,7 @@ class Text(LRFStream):
|
|||||||
p = open_containers.pop()
|
p = open_containers.pop()
|
||||||
s += u'</%s>'%(p.name,)
|
s += u'</%s>'%(p.name,)
|
||||||
else:
|
else:
|
||||||
s += unicode(c)
|
s += unicode_type(c)
|
||||||
if not c.self_closing:
|
if not c.self_closing:
|
||||||
open_containers.append(c)
|
open_containers.append(c)
|
||||||
|
|
||||||
@ -1001,7 +1002,7 @@ class Canvas(LRFStream):
|
|||||||
s += '%s="%s" '%(attr, self.attrs[attr])
|
s += '%s="%s" '%(attr, self.attrs[attr])
|
||||||
s = s.rstrip() + '>\n'
|
s = s.rstrip() + '>\n'
|
||||||
for po in self:
|
for po in self:
|
||||||
s += unicode(po) + '\n'
|
s += unicode_type(po) + '\n'
|
||||||
s += '</%s>\n'%(self.__class__.__name__,)
|
s += '</%s>\n'%(self.__class__.__name__,)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@ -1198,7 +1199,7 @@ class BookAttr(StyleObject, LRFObject):
|
|||||||
s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
|
s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
|
||||||
(self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth)
|
(self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth)
|
||||||
for font in self._document.font_map.values():
|
for font in self._document.font_map.values():
|
||||||
s += unicode(font)
|
s += unicode_type(font)
|
||||||
s += '</BookStyle>\n'
|
s += '</BookStyle>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@ -1239,7 +1240,7 @@ class TOCObject(LRFStream):
|
|||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
s = u'<TOC>\n'
|
s = u'<TOC>\n'
|
||||||
for i in self:
|
for i in self:
|
||||||
s += unicode(i)
|
s += unicode_type(i)
|
||||||
return s + '</TOC>\n'
|
return s + '</TOC>\n'
|
||||||
|
|
||||||
|
|
||||||
@ -1288,5 +1289,3 @@ def get_object(document, stream, id, offset, size, scramble_key):
|
|||||||
return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
|
return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
|
||||||
|
|
||||||
raise LRFParseError("Unknown object type: %02X!" % obj_type)
|
raise LRFParseError("Unknown object type: %02X!" % obj_type)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
""" elements.py -- replacements and helpers for ElementTree """
|
""" elements.py -- replacements and helpers for ElementTree """
|
||||||
|
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class ElementWriter(object):
|
class ElementWriter(object):
|
||||||
|
|
||||||
@ -21,9 +23,9 @@ class ElementWriter(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def _writeAttribute(self, f, name, value):
|
def _writeAttribute(self, f, name, value):
|
||||||
f.write(u' %s="' % unicode(name))
|
f.write(u' %s="' % unicode_type(name))
|
||||||
if not isinstance(value, basestring):
|
if not isinstance(value, basestring):
|
||||||
value = unicode(value)
|
value = unicode_type(value)
|
||||||
value = self._encodeCdata(value)
|
value = self._encodeCdata(value)
|
||||||
value = value.replace('"', '"')
|
value = value.replace('"', '"')
|
||||||
f.write(value)
|
f.write(value)
|
||||||
@ -34,7 +36,7 @@ class ElementWriter(object):
|
|||||||
f.write(text)
|
f.write(text)
|
||||||
|
|
||||||
def _write(self, f, e):
|
def _write(self, f, e):
|
||||||
f.write(u'<' + unicode(e.tag))
|
f.write(u'<' + unicode_type(e.tag))
|
||||||
|
|
||||||
attributes = e.items()
|
attributes = e.items()
|
||||||
attributes.sort()
|
attributes.sort()
|
||||||
@ -72,6 +74,3 @@ class ElementWriter(object):
|
|||||||
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
||||||
|
|
||||||
self._write(f, self.e)
|
self._write(f, self.e)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import struct
|
import struct
|
||||||
|
|
||||||
from calibre.ebooks.lrf import LRFParseError
|
from calibre.ebooks.lrf import LRFParseError
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class Tag(object):
|
class Tag(object):
|
||||||
@ -246,7 +247,7 @@ class Tag(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def string_parser(self, stream):
|
def string_parser(self, stream):
|
||||||
size = struct.unpack("<H", stream.read(2))[0]
|
size = struct.unpack("<H", stream.read(2))[0]
|
||||||
return unicode(stream.read(size), "utf_16")
|
return unicode_type(stream.read(size), "utf_16")
|
||||||
|
|
||||||
def type_one_parser(self, stream):
|
def type_one_parser(self, stream):
|
||||||
cnt = struct.unpack("<H", stream.read(2))[0]
|
cnt = struct.unpack("<H", stream.read(2))[0]
|
||||||
|
@ -15,6 +15,8 @@ from calibre import relpath, guess_type, remove_bracketed_text, prints, force_un
|
|||||||
|
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
|
|
||||||
|
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_author_pat = re.compile(tweaks['authors_split_regex'])
|
_author_pat = re.compile(tweaks['authors_split_regex'])
|
||||||
except:
|
except:
|
||||||
@ -134,7 +136,7 @@ def get_title_sort_pat(lang=None):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in
|
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in
|
||||||
range(0x2018, 0x201e)+[0x2032, 0x2033])
|
range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||||
|
|
||||||
|
|
||||||
@ -227,7 +229,7 @@ class Resource(object):
|
|||||||
self._href = href_or_path
|
self._href = href_or_path
|
||||||
else:
|
else:
|
||||||
pc = url[2]
|
pc = url[2]
|
||||||
if isinstance(pc, unicode):
|
if isinstance(pc, unicode_type):
|
||||||
pc = pc.encode('utf-8')
|
pc = pc.encode('utf-8')
|
||||||
pc = unquote(pc).decode('utf-8')
|
pc = unquote(pc).decode('utf-8')
|
||||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
||||||
@ -249,7 +251,7 @@ class Resource(object):
|
|||||||
basedir = os.getcwdu()
|
basedir = os.getcwdu()
|
||||||
if self.path is None:
|
if self.path is None:
|
||||||
return self._href
|
return self._href
|
||||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
|
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
||||||
frag = '#'+quote(f) if self.fragment else ''
|
frag = '#'+quote(f) if self.fragment else ''
|
||||||
if self.path == basedir:
|
if self.path == basedir:
|
||||||
return ''+frag
|
return ''+frag
|
||||||
@ -257,7 +259,7 @@ class Resource(object):
|
|||||||
rpath = relpath(self.path, basedir)
|
rpath = relpath(self.path, basedir)
|
||||||
except OSError: # On windows path and basedir could be on different drives
|
except OSError: # On windows path and basedir could be on different drives
|
||||||
rpath = self.path
|
rpath = self.path
|
||||||
if isinstance(rpath, unicode):
|
if isinstance(rpath, unicode_type):
|
||||||
rpath = rpath.encode('utf-8')
|
rpath = rpath.encode('utf-8')
|
||||||
return quote(rpath.replace(os.sep, '/'))+frag
|
return quote(rpath.replace(os.sep, '/'))+frag
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
|
|||||||
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
||||||
from calibre.library.field_metadata import FieldMetadata
|
from calibre.library.field_metadata import FieldMetadata
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
# Special sets used to optimize the performance of getting and setting
|
# Special sets used to optimize the performance of getting and setting
|
||||||
# attributes on Metadata objects
|
# attributes on Metadata objects
|
||||||
@ -606,14 +607,14 @@ class Metadata(object):
|
|||||||
return authors_to_string(self.authors)
|
return authors_to_string(self.authors)
|
||||||
|
|
||||||
def format_tags(self):
|
def format_tags(self):
|
||||||
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
|
return u', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)])
|
||||||
|
|
||||||
def format_rating(self, v=None, divide_by=1.0):
|
def format_rating(self, v=None, divide_by=1.0):
|
||||||
if v is None:
|
if v is None:
|
||||||
if self.rating is not None:
|
if self.rating is not None:
|
||||||
return unicode(self.rating/divide_by)
|
return unicode_type(self.rating/divide_by)
|
||||||
return u'None'
|
return u'None'
|
||||||
return unicode(v/divide_by)
|
return unicode_type(v/divide_by)
|
||||||
|
|
||||||
def format_field(self, key, series_with_index=True):
|
def format_field(self, key, series_with_index=True):
|
||||||
'''
|
'''
|
||||||
@ -637,15 +638,15 @@ class Metadata(object):
|
|||||||
if cmeta and cmeta['datatype'] == 'series':
|
if cmeta and cmeta['datatype'] == 'series':
|
||||||
if self.get(tkey):
|
if self.get(tkey):
|
||||||
res = self.get_extra(tkey)
|
res = self.get_extra(tkey)
|
||||||
return (unicode(cmeta['name']+'_index'),
|
return (unicode_type(cmeta['name']+'_index'),
|
||||||
self.format_series_index(res), res, cmeta)
|
self.format_series_index(res), res, cmeta)
|
||||||
else:
|
else:
|
||||||
return (unicode(cmeta['name']+'_index'), '', '', cmeta)
|
return (unicode_type(cmeta['name']+'_index'), '', '', cmeta)
|
||||||
|
|
||||||
if key in self.custom_field_keys():
|
if key in self.custom_field_keys():
|
||||||
res = self.get(key, None) # get evaluates all necessary composites
|
res = self.get(key, None) # get evaluates all necessary composites
|
||||||
cmeta = self.get_user_metadata(key, make_copy=False)
|
cmeta = self.get_user_metadata(key, make_copy=False)
|
||||||
name = unicode(cmeta['name'])
|
name = unicode_type(cmeta['name'])
|
||||||
if res is None or res == '': # can't check "not res" because of numeric fields
|
if res is None or res == '': # can't check "not res" because of numeric fields
|
||||||
return (name, res, None, None)
|
return (name, res, None, None)
|
||||||
orig_res = res
|
orig_res = res
|
||||||
@ -668,7 +669,7 @@ class Metadata(object):
|
|||||||
res = fmt.format(res)
|
res = fmt.format(res)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return (name, unicode(res), orig_res, cmeta)
|
return (name, unicode_type(res), orig_res, cmeta)
|
||||||
|
|
||||||
# convert top-level ids into their value
|
# convert top-level ids into their value
|
||||||
if key in TOP_LEVEL_IDENTIFIERS:
|
if key in TOP_LEVEL_IDENTIFIERS:
|
||||||
@ -682,11 +683,11 @@ class Metadata(object):
|
|||||||
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
|
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
|
||||||
res = self.get(key, None)
|
res = self.get(key, None)
|
||||||
fmeta = field_metadata[fmkey]
|
fmeta = field_metadata[fmkey]
|
||||||
name = unicode(fmeta['name'])
|
name = unicode_type(fmeta['name'])
|
||||||
if res is None or res == '':
|
if res is None or res == '':
|
||||||
return (name, res, None, None)
|
return (name, res, None, None)
|
||||||
orig_res = res
|
orig_res = res
|
||||||
name = unicode(fmeta['name'])
|
name = unicode_type(fmeta['name'])
|
||||||
datatype = fmeta['datatype']
|
datatype = fmeta['datatype']
|
||||||
if key == 'authors':
|
if key == 'authors':
|
||||||
res = authors_to_string(res)
|
res = authors_to_string(res)
|
||||||
@ -704,7 +705,7 @@ class Metadata(object):
|
|||||||
res = u'%.2g'%(res/2.0)
|
res = u'%.2g'%(res/2.0)
|
||||||
elif key == 'size':
|
elif key == 'size':
|
||||||
res = human_readable(res)
|
res = human_readable(res)
|
||||||
return (name, unicode(res), orig_res, fmeta)
|
return (name, unicode_type(res), orig_res, fmeta)
|
||||||
|
|
||||||
return (None, None, None, None)
|
return (None, None, None, None)
|
||||||
|
|
||||||
@ -718,7 +719,7 @@ class Metadata(object):
|
|||||||
ans = []
|
ans = []
|
||||||
|
|
||||||
def fmt(x, y):
|
def fmt(x, y):
|
||||||
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
|
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
|
||||||
|
|
||||||
fmt('Title', self.title)
|
fmt('Title', self.title)
|
||||||
if self.title_sort:
|
if self.title_sort:
|
||||||
@ -732,7 +733,7 @@ class Metadata(object):
|
|||||||
if getattr(self, 'book_producer', False):
|
if getattr(self, 'book_producer', False):
|
||||||
fmt('Book Producer', self.book_producer)
|
fmt('Book Producer', self.book_producer)
|
||||||
if self.tags:
|
if self.tags:
|
||||||
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
|
fmt('Tags', u', '.join([unicode_type(t) for t in self.tags]))
|
||||||
if self.series:
|
if self.series:
|
||||||
fmt('Series', self.series + ' #%s'%self.format_series_index())
|
fmt('Series', self.series + ' #%s'%self.format_series_index())
|
||||||
if not self.is_null('languages'):
|
if not self.is_null('languages'):
|
||||||
@ -745,7 +746,7 @@ class Metadata(object):
|
|||||||
if self.pubdate is not None:
|
if self.pubdate is not None:
|
||||||
fmt('Published', isoformat(self.pubdate))
|
fmt('Published', isoformat(self.pubdate))
|
||||||
if self.rights is not None:
|
if self.rights is not None:
|
||||||
fmt('Rights', unicode(self.rights))
|
fmt('Rights', unicode_type(self.rights))
|
||||||
if self.identifiers:
|
if self.identifiers:
|
||||||
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
|
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
|
||||||
self.identifiers.iteritems()]))
|
self.identifiers.iteritems()]))
|
||||||
@ -756,7 +757,7 @@ class Metadata(object):
|
|||||||
val = self.get(key, None)
|
val = self.get(key, None)
|
||||||
if val:
|
if val:
|
||||||
(name, val) = self.format_field(key)
|
(name, val) = self.format_field(key)
|
||||||
fmt(name, unicode(val))
|
fmt(name, unicode_type(val))
|
||||||
return u'\n'.join(ans)
|
return u'\n'.join(ans)
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
@ -765,22 +766,22 @@ class Metadata(object):
|
|||||||
'''
|
'''
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre.utils.date import isoformat
|
from calibre.utils.date import isoformat
|
||||||
ans = [(_('Title'), unicode(self.title))]
|
ans = [(_('Title'), unicode_type(self.title))]
|
||||||
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
|
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
|
||||||
ans += [(_('Publisher'), unicode(self.publisher))]
|
ans += [(_('Publisher'), unicode_type(self.publisher))]
|
||||||
ans += [(_('Producer'), unicode(self.book_producer))]
|
ans += [(_('Producer'), unicode_type(self.book_producer))]
|
||||||
ans += [(_('Comments'), unicode(self.comments))]
|
ans += [(_('Comments'), unicode_type(self.comments))]
|
||||||
ans += [('ISBN', unicode(self.isbn))]
|
ans += [('ISBN', unicode_type(self.isbn))]
|
||||||
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
|
ans += [(_('Tags'), u', '.join([unicode_type(t) for t in self.tags]))]
|
||||||
if self.series:
|
if self.series:
|
||||||
ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())]
|
ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())]
|
||||||
ans += [(_('Languages'), u', '.join(self.languages))]
|
ans += [(_('Languages'), u', '.join(self.languages))]
|
||||||
if self.timestamp is not None:
|
if self.timestamp is not None:
|
||||||
ans += [(_('Timestamp'), unicode(isoformat(self.timestamp, as_utc=False, sep=' ')))]
|
ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))]
|
||||||
if self.pubdate is not None:
|
if self.pubdate is not None:
|
||||||
ans += [(_('Published'), unicode(isoformat(self.pubdate, as_utc=False, sep=' ')))]
|
ans += [(_('Published'), unicode_type(isoformat(self.pubdate, as_utc=False, sep=' ')))]
|
||||||
if self.rights is not None:
|
if self.rights is not None:
|
||||||
ans += [(_('Rights'), unicode(self.rights))]
|
ans += [(_('Rights'), unicode_type(self.rights))]
|
||||||
for key in self.custom_field_keys():
|
for key in self.custom_field_keys():
|
||||||
val = self.get(key, None)
|
val = self.get(key, None)
|
||||||
if val:
|
if val:
|
||||||
|
@ -20,6 +20,7 @@ from calibre.utils.icu import sort_key
|
|||||||
from calibre.utils.formatter import EvalFormatter
|
from calibre.utils.formatter import EvalFormatter
|
||||||
from calibre.utils.date import is_date_undefined
|
from calibre.utils.date import is_date_undefined
|
||||||
from calibre.utils.localization import calibre_langcode_to_name
|
from calibre.utils.localization import calibre_langcode_to_name
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
default_sort = ('title', 'title_sort', 'authors', 'author_sort', 'series', 'rating', 'pubdate', 'tags', 'publisher', 'identifiers')
|
default_sort = ('title', 'title_sort', 'authors', 'author_sort', 'series', 'rating', 'pubdate', 'tags', 'publisher', 'identifiers')
|
||||||
|
|
||||||
@ -163,7 +164,7 @@ def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=
|
|||||||
path = force_unicode(mi.path, filesystem_encoding)
|
path = force_unicode(mi.path, filesystem_encoding)
|
||||||
scheme = u'devpath' if isdevice else u'path'
|
scheme = u'devpath' if isdevice else u'path'
|
||||||
url = prepare_string_for_xml(path if isdevice else
|
url = prepare_string_for_xml(path if isdevice else
|
||||||
unicode(book_id), True)
|
unicode_type(book_id), True)
|
||||||
pathstr = _('Click to open')
|
pathstr = _('Click to open')
|
||||||
extra = ''
|
extra = ''
|
||||||
if isdevice:
|
if isdevice:
|
||||||
|
@ -10,10 +10,11 @@ from calibre.constants import preferred_encoding
|
|||||||
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
|
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def ensure_unicode(obj, enc=preferred_encoding):
|
def ensure_unicode(obj, enc=preferred_encoding):
|
||||||
if isinstance(obj, unicode):
|
if isinstance(obj, unicode_type):
|
||||||
return obj
|
return obj
|
||||||
if isinstance(obj, bytes):
|
if isinstance(obj, bytes):
|
||||||
return obj.decode(enc, 'replace')
|
return obj.decode(enc, 'replace')
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
|
|||||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
USAGE=_('%prog ebook_file [options]\n') + \
|
USAGE=_('%prog ebook_file [options]\n') + \
|
||||||
_('''
|
_('''
|
||||||
@ -181,7 +182,7 @@ def main(args=sys.argv):
|
|||||||
mi = get_metadata(stream, stream_type, force_read_metadata=True)
|
mi = get_metadata(stream, stream_type, force_read_metadata=True)
|
||||||
if trying_to_set:
|
if trying_to_set:
|
||||||
prints(_('Original metadata')+'::')
|
prints(_('Original metadata')+'::')
|
||||||
metadata = unicode(mi)
|
metadata = unicode_type(mi)
|
||||||
if trying_to_set:
|
if trying_to_set:
|
||||||
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
|
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
|
||||||
prints(metadata, safe_encode=True)
|
prints(metadata, safe_encode=True)
|
||||||
@ -198,7 +199,7 @@ def main(args=sys.argv):
|
|||||||
lrf.book_id = opts.lrf_bookid
|
lrf.book_id = opts.lrf_bookid
|
||||||
mi = get_metadata(stream, stream_type, force_read_metadata=True)
|
mi = get_metadata(stream, stream_type, force_read_metadata=True)
|
||||||
prints('\n' + _('Changed metadata') + '::')
|
prints('\n' + _('Changed metadata') + '::')
|
||||||
metadata = unicode(mi)
|
metadata = unicode_type(mi)
|
||||||
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
|
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
|
||||||
prints(metadata, safe_encode=True)
|
prints(metadata, safe_encode=True)
|
||||||
if lrf is not None:
|
if lrf is not None:
|
||||||
|
@ -18,6 +18,7 @@ from calibre.utils.imghdr import identify
|
|||||||
from calibre import guess_type, guess_all_extensions, prints, force_unicode
|
from calibre import guess_type, guess_all_extensions, prints, force_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn
|
from calibre.ebooks.metadata import MetaInformation, check_isbn
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
@ -26,7 +27,7 @@ NAMESPACES = {
|
|||||||
'xlink' : 'http://www.w3.org/1999/xlink'
|
'xlink' : 'http://www.w3.org/1999/xlink'
|
||||||
}
|
}
|
||||||
|
|
||||||
tostring = partial(etree.tostring, method='text', encoding=unicode)
|
tostring = partial(etree.tostring, method='text', encoding=unicode_type)
|
||||||
|
|
||||||
|
|
||||||
def XLINK(tag):
|
def XLINK(tag):
|
||||||
@ -112,9 +113,9 @@ def get_metadata(stream):
|
|||||||
|
|
||||||
# fallback for book_title
|
# fallback for book_title
|
||||||
if book_title:
|
if book_title:
|
||||||
book_title = unicode(book_title)
|
book_title = unicode_type(book_title)
|
||||||
else:
|
else:
|
||||||
book_title = force_unicode(os.path.splitext(
|
book_title = force_unicode_type(os.path.splitext(
|
||||||
os.path.basename(getattr(stream, 'name',
|
os.path.basename(getattr(stream, 'name',
|
||||||
_('Unknown'))))[0])
|
_('Unknown'))))[0])
|
||||||
mi = MetaInformation(book_title, authors)
|
mi = MetaInformation(book_title, authors)
|
||||||
@ -249,7 +250,7 @@ def _parse_tags(root, mi, ctx):
|
|||||||
# -- i18n Translations-- ?
|
# -- i18n Translations-- ?
|
||||||
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
|
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
|
||||||
if tags:
|
if tags:
|
||||||
mi.tags = list(map(unicode, tags))
|
mi.tags = list(map(unicode_type, tags))
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
@ -447,7 +448,7 @@ def ensure_namespace(doc):
|
|||||||
break
|
break
|
||||||
if bare_tags:
|
if bare_tags:
|
||||||
import re
|
import re
|
||||||
raw = etree.tostring(doc, encoding=unicode)
|
raw = etree.tostring(doc, encoding=unicode_type)
|
||||||
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
||||||
doc = etree.fromstring(raw)
|
doc = etree.fromstring(raw)
|
||||||
return doc
|
return doc
|
||||||
|
@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
|
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
|
||||||
|
|
||||||
@ -43,6 +44,6 @@ def get_metadata(stream):
|
|||||||
if category:
|
if category:
|
||||||
mi.category = category
|
mi.category = category
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
|
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
|
||||||
print(msg.encode('utf8'), file=sys.stderr)
|
print(msg.encode('utf8'), file=sys.stderr)
|
||||||
return mi
|
return mi
|
||||||
|
@ -14,11 +14,12 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||||
|
|
||||||
URL = \
|
URL = \
|
||||||
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
|
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
|
||||||
|
|
||||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||||
|
|
||||||
|
|
||||||
def get_series(title, authors, timeout=60):
|
def get_series(title, authors, timeout=60):
|
||||||
@ -28,7 +29,7 @@ def get_series(title, authors, timeout=60):
|
|||||||
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
|
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
|
||||||
if not title:
|
if not title:
|
||||||
return mi
|
return mi
|
||||||
if isinstance(title, unicode):
|
if isinstance(title, unicode_type):
|
||||||
title = title.encode('utf-8')
|
title = title.encode('utf-8')
|
||||||
|
|
||||||
title = urllib.quote_plus(title)
|
title = urllib.quote_plus(title)
|
||||||
@ -73,7 +74,7 @@ def get_series(title, authors, timeout=60):
|
|||||||
mi.series = series
|
mi.series = series
|
||||||
ns = ss.nextSibling
|
ns = ss.nextSibling
|
||||||
if ns.contents:
|
if ns.contents:
|
||||||
raw = unicode(ns.contents[0])
|
raw = unicode_type(ns.contents[0])
|
||||||
raw = raw.partition('.')[0].strip()
|
raw = raw.partition('.')[0].strip()
|
||||||
try:
|
try:
|
||||||
mi.series_index = int(raw)
|
mi.series_index = int(raw)
|
||||||
@ -85,4 +86,3 @@ def get_series(title, authors, timeout=60):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
print(get_series(sys.argv[-2], [sys.argv[-1]]))
|
print(get_series(sys.argv[-2], [sys.argv[-1]]))
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ from calibre.utils.config_base import tweaks
|
|||||||
from calibre.utils.date import parse_only_date
|
from calibre.utils.date import parse_only_date
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
from calibre.utils.imghdr import identify
|
from calibre.utils.imghdr import identify
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class InvalidKFX(ValueError):
|
class InvalidKFX(ValueError):
|
||||||
@ -356,4 +357,4 @@ if __name__ == '__main__':
|
|||||||
from calibre import prints
|
from calibre import prints
|
||||||
with open(sys.argv[-1], 'rb') as f:
|
with open(sys.argv[-1], 'rb') as f:
|
||||||
mi = read_metadata_kfx(f)
|
mi = read_metadata_kfx(f)
|
||||||
prints(unicode(mi))
|
prints(unicode_type(mi))
|
||||||
|
@ -21,6 +21,7 @@ from calibre.ebooks.mobi.langcodes import iana2mobi
|
|||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def is_image(ss):
|
def is_image(ss):
|
||||||
@ -223,7 +224,7 @@ class MetadataUpdater(object):
|
|||||||
|
|
||||||
def create_exth(self, new_title=None, exth=None):
|
def create_exth(self, new_title=None, exth=None):
|
||||||
# Add an EXTH block to record 0, rewrite the stream
|
# Add an EXTH block to record 0, rewrite the stream
|
||||||
if isinstance(new_title, unicode):
|
if isinstance(new_title, unicode_type):
|
||||||
new_title = new_title.encode(self.codec, 'replace')
|
new_title = new_title.encode(self.codec, 'replace')
|
||||||
|
|
||||||
# Fetch the existing title
|
# Fetch the existing title
|
||||||
|
@ -25,6 +25,7 @@ from calibre.utils.localization import get_lang, canonicalize_lang
|
|||||||
from calibre import prints, guess_type
|
from calibre import prints, guess_type
|
||||||
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
pretty_print_opf = False
|
pretty_print_opf = False
|
||||||
|
|
||||||
@ -82,7 +83,7 @@ class Resource(object): # {{{
|
|||||||
self._href = href_or_path
|
self._href = href_or_path
|
||||||
else:
|
else:
|
||||||
pc = url[2]
|
pc = url[2]
|
||||||
if isinstance(pc, unicode):
|
if isinstance(pc, unicode_type):
|
||||||
pc = pc.encode('utf-8')
|
pc = pc.encode('utf-8')
|
||||||
pc = pc.decode('utf-8')
|
pc = pc.decode('utf-8')
|
||||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
||||||
@ -103,7 +104,7 @@ class Resource(object): # {{{
|
|||||||
basedir = os.getcwdu()
|
basedir = os.getcwdu()
|
||||||
if self.path is None:
|
if self.path is None:
|
||||||
return self._href
|
return self._href
|
||||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
|
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
||||||
frag = '#'+f if self.fragment else ''
|
frag = '#'+f if self.fragment else ''
|
||||||
if self.path == basedir:
|
if self.path == basedir:
|
||||||
return ''+frag
|
return ''+frag
|
||||||
@ -111,7 +112,7 @@ class Resource(object): # {{{
|
|||||||
rpath = os.path.relpath(self.path, basedir)
|
rpath = os.path.relpath(self.path, basedir)
|
||||||
except ValueError: # On windows path and basedir could be on different drives
|
except ValueError: # On windows path and basedir could be on different drives
|
||||||
rpath = self.path
|
rpath = self.path
|
||||||
if isinstance(rpath, unicode):
|
if isinstance(rpath, unicode_type):
|
||||||
rpath = rpath.encode('utf-8')
|
rpath = rpath.encode('utf-8')
|
||||||
return rpath.replace(os.sep, '/')+frag
|
return rpath.replace(os.sep, '/')+frag
|
||||||
|
|
||||||
@ -206,10 +207,10 @@ class ManifestItem(Resource): # {{{
|
|||||||
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode(self).encode('utf-8')
|
return unicode_type(self).encode('utf-8')
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return unicode(self)
|
return unicode_type(self)
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
if index == 0:
|
if index == 0:
|
||||||
@ -410,7 +411,7 @@ class Guide(ResourceCollection): # {{{
|
|||||||
class MetadataField(object):
|
class MetadataField(object):
|
||||||
|
|
||||||
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
|
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
|
||||||
renderer=lambda x: unicode(x)):
|
renderer=lambda x: unicode_type(x)):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.is_dc = is_dc
|
self.is_dc = is_dc
|
||||||
self.formatter = formatter
|
self.formatter = formatter
|
||||||
@ -791,7 +792,7 @@ class OPF(object): # {{{
|
|||||||
def unquote_urls(self):
|
def unquote_urls(self):
|
||||||
def get_href(item):
|
def get_href(item):
|
||||||
raw = unquote(item.get('href', ''))
|
raw = unquote(item.get('href', ''))
|
||||||
if not isinstance(raw, unicode):
|
if not isinstance(raw, unicode_type):
|
||||||
raw = raw.decode('utf-8')
|
raw = raw.decode('utf-8')
|
||||||
return raw
|
return raw
|
||||||
for item in self.itermanifest():
|
for item in self.itermanifest():
|
||||||
@ -820,7 +821,7 @@ class OPF(object): # {{{
|
|||||||
titles = ()
|
titles = ()
|
||||||
if val:
|
if val:
|
||||||
title = titles[0] if titles else self.create_metadata_element('title')
|
title = titles[0] if titles else self.create_metadata_element('title')
|
||||||
title.text = re.sub(r'\s+', ' ', unicode(val))
|
title.text = re.sub(r'\s+', ' ', unicode_type(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -869,7 +870,7 @@ class OPF(object): # {{{
|
|||||||
for key in matches[0].attrib:
|
for key in matches[0].attrib:
|
||||||
if key.endswith('file-as'):
|
if key.endswith('file-as'):
|
||||||
matches[0].attrib.pop(key)
|
matches[0].attrib.pop(key)
|
||||||
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode(val))
|
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode_type(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -889,7 +890,7 @@ class OPF(object): # {{{
|
|||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
for tag in val:
|
for tag in val:
|
||||||
elem = self.create_metadata_element('subject')
|
elem = self.create_metadata_element('subject')
|
||||||
self.set_text(elem, unicode(tag))
|
self.set_text(elem, unicode_type(tag))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -900,7 +901,7 @@ class OPF(object): # {{{
|
|||||||
ans = None
|
ans = None
|
||||||
for match in self.pubdate_path(self.metadata):
|
for match in self.pubdate_path(self.metadata):
|
||||||
try:
|
try:
|
||||||
val = parse_date(etree.tostring(match, encoding=unicode,
|
val = parse_date(etree.tostring(match, encoding=unicode_type,
|
||||||
method='text', with_tail=False).strip())
|
method='text', with_tail=False).strip())
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
@ -912,7 +913,7 @@ class OPF(object): # {{{
|
|||||||
least_val = least_elem = None
|
least_val = least_elem = None
|
||||||
for match in self.pubdate_path(self.metadata):
|
for match in self.pubdate_path(self.metadata):
|
||||||
try:
|
try:
|
||||||
cval = parse_date(etree.tostring(match, encoding=unicode,
|
cval = parse_date(etree.tostring(match, encoding=unicode_type,
|
||||||
method='text', with_tail=False).strip())
|
method='text', with_tail=False).strip())
|
||||||
except:
|
except:
|
||||||
match.getparent().remove(match)
|
match.getparent().remove(match)
|
||||||
@ -962,7 +963,7 @@ class OPF(object): # {{{
|
|||||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
|
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
|
||||||
matches = [self.create_metadata_element('identifier',
|
matches = [self.create_metadata_element('identifier',
|
||||||
attrib=attrib)]
|
attrib=attrib)]
|
||||||
self.set_text(matches[0], unicode(val))
|
self.set_text(matches[0], unicode_type(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -975,7 +976,7 @@ class OPF(object): # {{{
|
|||||||
for attr, val in x.attrib.iteritems():
|
for attr, val in x.attrib.iteritems():
|
||||||
if attr.endswith('scheme'):
|
if attr.endswith('scheme'):
|
||||||
typ = icu_lower(val)
|
typ = icu_lower(val)
|
||||||
val = etree.tostring(x, with_tail=False, encoding=unicode,
|
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
if val and typ not in ('calibre', 'uuid'):
|
if val and typ not in ('calibre', 'uuid'):
|
||||||
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
|
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
|
||||||
@ -984,7 +985,7 @@ class OPF(object): # {{{
|
|||||||
found_scheme = True
|
found_scheme = True
|
||||||
break
|
break
|
||||||
if not found_scheme:
|
if not found_scheme:
|
||||||
val = etree.tostring(x, with_tail=False, encoding=unicode,
|
val = etree.tostring(x, with_tail=False, encoding=unicode_type,
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
if val.lower().startswith('urn:isbn:'):
|
if val.lower().startswith('urn:isbn:'):
|
||||||
val = check_isbn(val.split(':')[-1])
|
val = check_isbn(val.split(':')[-1])
|
||||||
@ -1017,7 +1018,7 @@ class OPF(object): # {{{
|
|||||||
for typ, val in identifiers.iteritems():
|
for typ, val in identifiers.iteritems():
|
||||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
|
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
|
||||||
self.set_text(self.create_metadata_element(
|
self.set_text(self.create_metadata_element(
|
||||||
'identifier', attrib=attrib), unicode(val))
|
'identifier', attrib=attrib), unicode_type(val))
|
||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def application_id(self):
|
def application_id(self):
|
||||||
@ -1041,7 +1042,7 @@ class OPF(object): # {{{
|
|||||||
if uuid_id and uuid_id in removed_ids:
|
if uuid_id and uuid_id in removed_ids:
|
||||||
attrib['id'] = uuid_id
|
attrib['id'] = uuid_id
|
||||||
self.set_text(self.create_metadata_element(
|
self.set_text(self.create_metadata_element(
|
||||||
'identifier', attrib=attrib), unicode(val))
|
'identifier', attrib=attrib), unicode_type(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -1058,7 +1059,7 @@ class OPF(object): # {{{
|
|||||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
|
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
|
||||||
matches = [self.create_metadata_element('identifier',
|
matches = [self.create_metadata_element('identifier',
|
||||||
attrib=attrib)]
|
attrib=attrib)]
|
||||||
self.set_text(matches[0], unicode(val))
|
self.set_text(matches[0], unicode_type(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -1095,7 +1096,7 @@ class OPF(object): # {{{
|
|||||||
|
|
||||||
for lang in val:
|
for lang in val:
|
||||||
l = self.create_metadata_element('language')
|
l = self.create_metadata_element('language')
|
||||||
self.set_text(l, unicode(lang))
|
self.set_text(l, unicode_type(lang))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@ -1118,7 +1119,7 @@ class OPF(object): # {{{
|
|||||||
if not matches:
|
if not matches:
|
||||||
matches = [self.create_metadata_element('contributor')]
|
matches = [self.create_metadata_element('contributor')]
|
||||||
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
|
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
|
||||||
self.set_text(matches[0], unicode(val))
|
self.set_text(matches[0], unicode_type(val))
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
def identifier_iter(self):
|
def identifier_iter(self):
|
||||||
@ -1701,7 +1702,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
|
|||||||
metadata[-1].tail = '\n' +(' '*4)
|
metadata[-1].tail = '\n' +(' '*4)
|
||||||
|
|
||||||
if mi.cover:
|
if mi.cover:
|
||||||
if not isinstance(mi.cover, unicode):
|
if not isinstance(mi.cover, unicode_type):
|
||||||
mi.cover = mi.cover.decode(filesystem_encoding)
|
mi.cover = mi.cover.decode(filesystem_encoding)
|
||||||
guide.text = '\n'+(' '*8)
|
guide.text = '\n'+(' '*8)
|
||||||
r = guide.makeelement(OPF('reference'),
|
r = guide.makeelement(OPF('reference'),
|
||||||
|
@ -12,6 +12,7 @@ from calibre.ptempfile import TemporaryDirectory
|
|||||||
from calibre.ebooks.metadata import (
|
from calibre.ebooks.metadata import (
|
||||||
MetaInformation, string_to_authors, check_isbn, check_doi)
|
MetaInformation, string_to_authors, check_isbn, check_doi)
|
||||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def get_tools():
|
def get_tools():
|
||||||
@ -88,8 +89,8 @@ def page_images(pdfpath, outputdir, first=1, last=1):
|
|||||||
import win32process as w
|
import win32process as w
|
||||||
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
|
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode(first),
|
subprocess.check_call([pdftoppm, '-cropbox', '-jpeg', '-f', unicode_type(first),
|
||||||
'-l', unicode(last), pdfpath,
|
'-l', unicode_type(last), pdfpath,
|
||||||
os.path.join(outputdir, 'page-images')], **args)
|
os.path.join(outputdir, 'page-images')], **args)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
|
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
|
||||||
|
@ -6,6 +6,7 @@ __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
|||||||
import sys, struct
|
import sys, struct
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
|
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
|
||||||
|
|
||||||
@ -47,9 +48,7 @@ def get_metadata(stream):
|
|||||||
mi.author = value
|
mi.author = value
|
||||||
mi.authors = string_to_authors(value)
|
mi.authors = string_to_authors(value)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
|
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err))
|
||||||
print(msg.encode('utf8'), file=sys.stderr)
|
print(msg.encode('utf8'), file=sys.stderr)
|
||||||
raise
|
raise
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import re, cStringIO, codecs
|
|||||||
|
|
||||||
from calibre import force_unicode
|
from calibre import force_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
from polyglot.builtins import codepoint_to_chr, unicode_type
|
||||||
|
|
||||||
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
|
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
|
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
@ -75,7 +76,7 @@ def detect_codepage(stream):
|
|||||||
|
|
||||||
|
|
||||||
def encode(unistr):
|
def encode(unistr):
|
||||||
if not isinstance(unistr, unicode):
|
if not isinstance(unistr, unicode_type):
|
||||||
unistr = force_unicode(unistr)
|
unistr = force_unicode(unistr)
|
||||||
return ''.join([str(c) if ord(c) < 128 else '\\u' + str(ord(c)) + '?' for c in unistr])
|
return ''.join([str(c) if ord(c) < 128 else '\\u' + str(ord(c)) + '?' for c in unistr])
|
||||||
|
|
||||||
@ -88,7 +89,7 @@ def decode(raw, codec):
|
|||||||
raw = raw.decode(codec)
|
raw = raw.decode(codec)
|
||||||
|
|
||||||
def uni(match):
|
def uni(match):
|
||||||
return unichr(int(match.group(1)))
|
return codepoint_to_chr(int(match.group(1)))
|
||||||
raw = re.sub(r'\\u([0-9]{3,4}).', uni, raw)
|
raw = re.sub(r'\\u([0-9]{3,4}).', uni, raw)
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
@ -232,4 +233,3 @@ def set_metadata(stream, options):
|
|||||||
stream.truncate()
|
stream.truncate()
|
||||||
stream.write(src)
|
stream.write(src)
|
||||||
stream.write(after)
|
stream.write(after)
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from calibre.constants import __appname__, __version__
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.cleantext import clean_xml_chars
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
||||||
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
|
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
|
||||||
@ -194,7 +195,7 @@ class TOC(list):
|
|||||||
text = u''
|
text = u''
|
||||||
for txt in txt_path(nl):
|
for txt in txt_path(nl):
|
||||||
text += etree.tostring(txt, method='text',
|
text += etree.tostring(txt, method='text',
|
||||||
encoding=unicode, with_tail=False)
|
encoding=unicode_type, with_tail=False)
|
||||||
content = content_path(np)
|
content = content_path(np)
|
||||||
if content and text:
|
if content and text:
|
||||||
content = content[0]
|
content = content[0]
|
||||||
@ -229,7 +230,7 @@ class TOC(list):
|
|||||||
fragment = fragment.strip()
|
fragment = fragment.strip()
|
||||||
href = href.strip()
|
href = href.strip()
|
||||||
|
|
||||||
txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
|
txt = ''.join([unicode_type(s).strip() for s in a.findAll(text=True)])
|
||||||
add = True
|
add = True
|
||||||
for i in self.flat():
|
for i in self.flat():
|
||||||
if i.href == href and i.fragment == fragment:
|
if i.href == href and i.fragment == fragment:
|
||||||
@ -264,7 +265,7 @@ class TOC(list):
|
|||||||
text = clean_xml_chars(text)
|
text = clean_xml_chars(text)
|
||||||
elem = E.navPoint(
|
elem = E.navPoint(
|
||||||
E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
|
E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
|
||||||
E.content(src=unicode(np.href)+(('#' + unicode(np.fragment))
|
E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment))
|
||||||
if np.fragment else '')),
|
if np.fragment else '')),
|
||||||
id=item_id,
|
id=item_id,
|
||||||
playOrder=str(np.play_order)
|
playOrder=str(np.play_order)
|
||||||
|
@ -20,6 +20,7 @@ from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
|||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
from calibre.ebooks.mobi.debug import format_bytes
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class TagX(object): # {{{
|
class TagX(object): # {{{
|
||||||
@ -564,7 +565,7 @@ class TBSIndexing(object): # {{{
|
|||||||
|
|
||||||
def get_index(self, idx):
|
def get_index(self, idx):
|
||||||
for i in self.indices:
|
for i in self.indices:
|
||||||
if i.index in {idx, unicode(idx)}:
|
if i.index in {idx, unicode_type(idx)}:
|
||||||
return i
|
return i
|
||||||
raise IndexError('Index %d not found'%idx)
|
raise IndexError('Index %d not found'%idx)
|
||||||
|
|
||||||
@ -844,5 +845,3 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.oeb.stylizer import Stylizer
|
|||||||
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||||
from calibre.ebooks.mobi.utils import convert_color_for_font_tag
|
from calibre.ebooks.mobi.utils import convert_color_for_font_tag
|
||||||
from calibre.utils.imghdr import identify
|
from calibre.utils.imghdr import identify
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||||
|
|
||||||
@ -151,7 +152,7 @@ class MobiMLizer(object):
|
|||||||
return "%dem" % int(round(ptsize / embase))
|
return "%dem" % int(round(ptsize / embase))
|
||||||
|
|
||||||
def preize_text(self, text, pre_wrap=False):
|
def preize_text(self, text, pre_wrap=False):
|
||||||
text = unicode(text)
|
text = unicode_type(text)
|
||||||
if pre_wrap:
|
if pre_wrap:
|
||||||
# Replace n consecutive spaces with n-1 NBSP + space
|
# Replace n consecutive spaces with n-1 NBSP + space
|
||||||
text = re.sub(r' {2,}', lambda m:(u'\xa0'*(len(m.group())-1) + u' '), text)
|
text = re.sub(r' {2,}', lambda m:(u'\xa0'*(len(m.group())-1) + u' '), text)
|
||||||
@ -228,7 +229,7 @@ class MobiMLizer(object):
|
|||||||
while vspace > 0:
|
while vspace > 0:
|
||||||
wrapper.addprevious(etree.Element(XHTML('br')))
|
wrapper.addprevious(etree.Element(XHTML('br')))
|
||||||
vspace -= 1
|
vspace -= 1
|
||||||
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode)):
|
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)):
|
||||||
para.attrib['align'] = istate.halign
|
para.attrib['align'] = istate.halign
|
||||||
istate.rendered = True
|
istate.rendered = True
|
||||||
pstate = bstate.istate
|
pstate = bstate.istate
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
|
|||||||
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
NULL_INDEX = 0xffffffff
|
NULL_INDEX = 0xffffffff
|
||||||
|
|
||||||
@ -239,7 +240,7 @@ class BookHeader(object):
|
|||||||
|
|
||||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||||
self.exth = None
|
self.exth = None
|
||||||
if not isinstance(self.title, unicode):
|
if not isinstance(self.title, unicode_type):
|
||||||
self.title = self.title.decode(self.codec, 'replace')
|
self.title = self.title.decode(self.codec, 'replace')
|
||||||
if self.exth_flag & 0x40:
|
if self.exth_flag & 0x40:
|
||||||
try:
|
try:
|
||||||
|
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import re, os
|
import re, os
|
||||||
|
|
||||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
def update_internal_links(mobi8_reader, log):
|
def update_internal_links(mobi8_reader, log):
|
||||||
@ -130,7 +131,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
|
|||||||
flows.append(flow)
|
flows.append(flow)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not isinstance(flow, unicode):
|
if not isinstance(flow, unicode_type):
|
||||||
try:
|
try:
|
||||||
flow = flow.decode(mr.header.codec)
|
flow = flow.decode(mr.header.codec)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user