Various py3 related fixes exposed by the unicode patch

This commit is contained in:
Kovid Goyal 2019-03-13 06:40:38 +05:30
parent 56af613e10
commit 5b76089839
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
48 changed files with 390 additions and 3210 deletions

View File

@ -670,7 +670,7 @@ class DB(object):
if d['is_multiple']: if d['is_multiple']:
if x is None: if x is None:
return [] return []
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
x = x.split(d['multiple_seps']['ui_to_list']) x = x.split(d['multiple_seps']['ui_to_list'])
x = [y.strip() for y in x if y.strip()] x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
@ -681,12 +681,16 @@ class DB(object):
x.decode(preferred_encoding, 'replace') x.decode(preferred_encoding, 'replace')
def adapt_datetime(x, d): def adapt_datetime(x, d):
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace')
x = parse_date(x, assume_utc=False, as_utc=False) x = parse_date(x, assume_utc=False, as_utc=False)
return x return x
def adapt_bool(x, d): def adapt_bool(x, d):
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace')
x = x.lower() x = x.lower()
if x == 'true': if x == 'true':
x = True x = True
@ -707,7 +711,9 @@ class DB(object):
def adapt_number(x, d): def adapt_number(x, d):
if x is None: if x is None:
return None return None
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace')
if x.lower() == 'none': if x.lower() == 'none':
return None return None
if d['datatype'] == 'int': if d['datatype'] == 'int':
@ -1083,7 +1089,7 @@ class DB(object):
def dump_and_restore(self, callback=None, sql=None): def dump_and_restore(self, callback=None, sql=None):
import codecs import codecs
from calibre.utils.apsw_shell import Shell from apsw import Shell
from contextlib import closing from contextlib import closing
if callback is None: if callback is None:
callback = lambda x: x callback = lambda x: x
@ -1096,7 +1102,7 @@ class DB(object):
shell = Shell(db=self.conn, stdout=buf) shell = Shell(db=self.conn, stdout=buf)
shell.process_command('.dump') shell.process_command('.dump')
else: else:
with open(fname, 'wb') as buf: with lopen(fname, 'wb') as buf:
buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8')) buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8'))
with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb: with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb:

View File

@ -11,6 +11,7 @@ import copy
from functools import partial from functools import partial
from polyglot.builtins import unicode_type, map from polyglot.builtins import unicode_type, map
from calibre.constants import ispy3
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
from calibre.utils.icu import sort_key, collation_order from calibre.utils.icu import sort_key, collation_order
@ -43,11 +44,19 @@ class Tag(object):
self.search_expression = search_expression self.search_expression = search_expression
self.original_categories = None self.original_categories = None
def __unicode__(self): @property
def string_representation(self):
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category) return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
def __str__(self): if ispy3:
return unicode_type(self).encode('utf-8') def __str__(self):
return self.string_representation
else:
def __str__(self):
return self.string_representation.encode('utf-8')
def __unicode__(self):
return self.string_representation
def __repr__(self): def __repr__(self):
return str(self) return str(self)

View File

@ -80,7 +80,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit
if primary_contains(query, t): if primary_contains(query, t):
return True return True
elif query in t: elif query in t:
return True return True
except re.error: except re.error:
pass pass
return False return False
@ -149,7 +149,9 @@ class DateSearch(object): # {{{
if query == 'false': if query == 'false':
for v, book_ids in field_iter(): for v, book_ids in field_iter():
if isinstance(v, (str, unicode_type)): if isinstance(v, (bytes, unicode_type)):
if isinstance(v, bytes):
v = v.decode(preferred_encoding, 'replace')
v = parse_date(v) v = parse_date(v)
if v is None or v <= UNDEFINED_DATE: if v is None or v <= UNDEFINED_DATE:
matches |= book_ids matches |= book_ids
@ -157,7 +159,9 @@ class DateSearch(object): # {{{
if query == 'true': if query == 'true':
for v, book_ids in field_iter(): for v, book_ids in field_iter():
if isinstance(v, (str, unicode_type)): if isinstance(v, (bytes, unicode_type)):
if isinstance(v, bytes):
v = v.decode(preferred_encoding, 'replace')
v = parse_date(v) v = parse_date(v)
if v is not None and v > UNDEFINED_DATE: if v is not None and v > UNDEFINED_DATE:
matches |= book_ids matches |= book_ids

View File

@ -13,13 +13,15 @@ from polyglot.builtins import map, unicode_type
from threading import Lock from threading import Lock
from calibre import as_unicode, prints from calibre import as_unicode, prints
from calibre.constants import cache_dir, get_windows_number_formats, iswindows from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
def force_to_bool(val): def force_to_bool(val):
if isinstance(val, (str, unicode_type)): if isinstance(val, (bytes, unicode_type)):
if isinstance(val, bytes):
val = val.decode(preferred_encoding, 'replace')
try: try:
val = icu_lower(val) val = icu_lower(val)
if not val: if not val:

View File

@ -88,6 +88,8 @@ def adapt_number(typ, x):
if x is None: if x is None:
return None return None
if isinstance(x, (unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace')
if not x or x.lower() == 'none': if not x or x.lower() == 'none':
return None return None
return typ(x) return typ(x)
@ -95,6 +97,8 @@ def adapt_number(typ, x):
def adapt_bool(x): def adapt_bool(x):
if isinstance(x, (unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replace')
x = x.lower() x = x.lower()
if x == 'true': if x == 'true':
x = True x = True

View File

@ -171,7 +171,7 @@ class PRST1(USBMS):
with closing(sqlite.connect(dbpath)) as connection: with closing(sqlite.connect(dbpath)) as connection:
# Replace undecodable characters in the db instead of erroring out # Replace undecodable characters in the db instead of erroring out
connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace") connection.text_factory = lambda x: x if isinstance(x, unicode_type) else x.decode('utf-8', 'replace')
cursor = connection.cursor() cursor = connection.cursor()
# Query collections # Query collections
@ -758,7 +758,7 @@ class PRST1(USBMS):
thumbnail_path = THUMBPATH%book.bookId thumbnail_path = THUMBPATH%book.bookId
prefix = self._main_prefix if source_id is 0 else self._card_a_prefix prefix = self._main_prefix if source_id == 0 else self._card_a_prefix
thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/')) thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
thumbnail_dir_path = os.path.dirname(thumbnail_file_path) thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
if not os.path.exists(thumbnail_dir_path): if not os.path.exists(thumbnail_dir_path):

View File

@ -398,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if isinstance(a, dict): if isinstance(a, dict):
printable = {} printable = {}
for k,v in a.iteritems(): for k,v in a.iteritems():
if isinstance(v, (str, unicode_type)) and len(v) > 50: if isinstance(v, (bytes, unicode_type)) and len(v) > 50:
printable[k] = 'too long' printable[k] = 'too long'
else: else:
printable[k] = v printable[k] = v
@ -666,7 +666,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if v: if v:
v = json.loads(v, object_hook=from_json) v = json.loads(v, object_hook=from_json)
if print_debug_info and extra_debug: if print_debug_info and extra_debug:
self._debug('receive after decode') # , v) self._debug('receive after decode') # , v)
return (self.reverse_opcodes[v[0]], v[1]) return (self.reverse_opcodes[v[0]], v[1])
self._debug('protocol error -- empty json string') self._debug('protocol error -- empty json string')
except socket.timeout: except socket.timeout:
@ -1155,7 +1155,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
(self.DEFAULT_THUMBNAIL_HEIGHT/3) * 4) (self.DEFAULT_THUMBNAIL_HEIGHT/3) * 4)
self._debug('cover width', self.THUMBNAIL_WIDTH) self._debug('cover width', self.THUMBNAIL_WIDTH)
elif hasattr(self, 'THUMBNAIL_WIDTH'): elif hasattr(self, 'THUMBNAIL_WIDTH'):
delattr(self, 'THUMBNAIL_WIDTH') delattr(self, 'THUMBNAIL_WIDTH')
self.is_read_sync_col = result.get('isReadSyncCol', None) self.is_read_sync_col = result.get('isReadSyncCol', None)
self._debug('Device is_read sync col', self.is_read_sync_col) self._debug('Device is_read sync col', self.is_read_sync_col)

View File

@ -12,7 +12,6 @@ from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir from calibre import CurrentDir
from calibre.constants import filesystem_encoding
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
block_level_tags = ( block_level_tags = (
@ -41,7 +40,7 @@ block_level_tags = (
'pre', 'pre',
'table', 'table',
'ul', 'ul',
) )
class EPUBOutput(OutputFormatPlugin): class EPUBOutput(OutputFormatPlugin):
@ -326,13 +325,11 @@ class EPUBOutput(OutputFormatPlugin):
fonts = [] fonts = []
for uri in list(uris.keys()): for uri in list(uris.keys()):
path = uris[uri] path = uris[uri]
if isinstance(path, unicode_type):
path = path.encode(filesystem_encoding)
if not os.path.exists(path): if not os.path.exists(path):
uris.pop(uri) uris.pop(uri)
continue continue
self.log.debug('Encrypting font:', uri) self.log.debug('Encrypting font:', uri)
with open(path, 'r+b') as f: with lopen(path, 'r+b') as f:
data = f.read(1024) data = f.read(1024)
if len(data) >= 1024: if len(data) >= 1024:
f.seek(0) f.seek(0)

View File

@ -55,11 +55,7 @@ def munge_paths(basepath, url):
if not path: if not path:
path = basepath path = basepath
elif not os.path.isabs(path): elif not os.path.isabs(path):
if isinstance(path, unicode_type):
path = path.encode(sys.getfilesystemencoding())
dn = os.path.dirname(basepath) dn = os.path.dirname(basepath)
if isinstance(dn, unicode_type):
dn = dn.encode(sys.getfilesystemencoding())
path = os.path.join(dn, path) path = os.path.join(dn, path)
return os.path.normpath(path), fragment return os.path.normpath(path), fragment
@ -1480,11 +1476,6 @@ class HTMLConverter(object):
ext = os.path.splitext(path)[1] ext = os.path.splitext(path)[1]
if ext: if ext:
ext = ext[1:].lower() ext = ext[1:].lower()
enc = sys.getfilesystemencoding()
if not enc:
enc = 'utf8'
if isinstance(path, unicode_type):
path = path.encode(enc, 'replace')
if os.access(path, os.R_OK) and os.path.isfile(path): if os.access(path, os.R_OK) and os.path.isfile(path):
if ext in ['png', 'jpg', 'bmp', 'jpeg']: if ext in ['png', 'jpg', 'bmp', 'jpeg']:
self.process_image(path, tag_css) self.process_image(path, tag_css)
@ -1811,8 +1802,6 @@ class HTMLConverter(object):
def process_file(path, options, logger): def process_file(path, options, logger):
if not isinstance(path, unicode_type):
path = path.decode(sys.getfilesystemencoding())
path = os.path.abspath(path) path = os.path.abspath(path)
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0]) default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
dirpath = os.path.dirname(path) dirpath = os.path.dirname(path)

View File

@ -196,8 +196,8 @@ class xml_field(object):
if not val: if not val:
val = u'' val = u''
if isinstance(val, unicode_type): if not isinstance(val, unicode_type):
val = unicode_type(val, 'utf-8') val = val.decode('utf-8')
elems = document.getElementsByTagName(self.tag_name) elems = document.getElementsByTagName(self.tag_name)
elem = None elem = None

View File

@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import struct, array, zlib, cStringIO, collections, re import struct, array, zlib, cStringIO, collections, re
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
from calibre.constants import ispy3
from calibre import entity_to_unicode, prepare_string_for_xml from calibre import entity_to_unicode, prepare_string_for_xml
from calibre.ebooks.lrf.tags import Tag from calibre.ebooks.lrf.tags import Tag
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
@ -88,11 +89,8 @@ class LRFObject(object):
for i in range(0): for i in range(0):
yield i yield i
def __unicode__(self):
return unicode_type(self.__class__.__name__)
def __str__(self): def __str__(self):
return unicode_type(self).encode('utf-8') return self.__class__.__name__
class LRFContentObject(LRFObject): class LRFContentObject(LRFObject):
@ -204,12 +202,15 @@ class StyleObject(object):
s += u'%s="%s" '%(attr, getattr(self, attr)) s += u'%s="%s" '%(attr, getattr(self, attr))
return s return s
def __unicode__(self): def __str__(self):
s = u'<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id) s = u'<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id)
s += self._tags_to_xml() s += self._tags_to_xml()
s += u'/>\n' s += u'/>\n'
return s return s
if not ispy3:
__unicode__ = __str__
def as_dict(self): def as_dict(self):
d = {} d = {}
for h in self.tag_map.values(): for h in self.tag_map.values():
@ -252,11 +253,11 @@ class Color(object):
def __init__(self, val): def __init__(self, val):
self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
def __unicode__(self): def __str__(self):
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
def __str__(self): if not ispy3:
return unicode_type(self) __unicode__ = __str__
def __len__(self): def __len__(self):
return 4 return 4
@ -284,10 +285,13 @@ class PageDiv(EmptyPageElement):
self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth
self.linecolor = Color(linecolor) self.linecolor = Color(linecolor)
def __unicode__(self): def __str__(self):
return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\ return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
(self.pain, self.spacesize, self.linewidth, self.color) (self.pain, self.spacesize, self.linewidth, self.color)
if not ispy3:
__unicode__ = __str__
class RuledLine(EmptyPageElement): class RuledLine(EmptyPageElement):
@ -299,19 +303,25 @@ class RuledLine(EmptyPageElement):
self.linecolor = Color(linecolor) self.linecolor = Color(linecolor)
self.id = -1 self.id = -1
def __unicode__(self): def __str__(self):
return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\ return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
(self.linelength, self.linetype, self.linewidth, self.linecolor) (self.linelength, self.linetype, self.linewidth, self.linecolor)
if not ispy3:
__unicode__ = __str__
class Wait(EmptyPageElement): class Wait(EmptyPageElement):
def __init__(self, time): def __init__(self, time):
self.time = time self.time = time
def __unicode__(self): def __str__(self):
return u'\n<Wait time="%d" />\n'%(self.time) return u'\n<Wait time="%d" />\n'%(self.time)
if not ispy3:
__unicode__ = __str__
class Locate(EmptyPageElement): class Locate(EmptyPageElement):
@ -320,19 +330,25 @@ class Locate(EmptyPageElement):
def __init__(self, pos): def __init__(self, pos):
self.pos = self.pos_map[pos] self.pos = self.pos_map[pos]
def __unicode__(self): def __str__(self):
return u'\n<Locate pos="%s" />\n'%(self.pos) return u'\n<Locate pos="%s" />\n'%(self.pos)
if not ispy3:
__unicode__ = __str__
class BlockSpace(EmptyPageElement): class BlockSpace(EmptyPageElement):
def __init__(self, xspace, yspace): def __init__(self, xspace, yspace):
self.xspace, self.yspace = xspace, yspace self.xspace, self.yspace = xspace, yspace
def __unicode__(self): def __str__(self):
return u'\n<BlockSpace xspace="%d" yspace="%d" />\n'%\ return u'\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
(self.xspace, self.yspace) (self.xspace, self.yspace)
if not ispy3:
__unicode__ = __str__
class Page(LRFStream): class Page(LRFStream):
tag_map = { tag_map = {
@ -427,15 +443,15 @@ class Page(LRFStream):
for i in self.content: for i in self.content:
yield i yield i
def __unicode__(self): def __str__(self):
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id) s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
for i in self: for i in self:
s += unicode_type(i) s += unicode_type(i)
s += '\n</Page>\n' s += '\n</Page>\n'
return s return s
def __str__(self): if not ispy3:
return unicode_type(self) __unicode__ = __str__
def to_html(self): def to_html(self):
s = u'' s = u''
@ -612,7 +628,7 @@ class Block(LRFStream, TextCSS):
if hasattr(self, attr): if hasattr(self, attr):
self.attrs[attr] = getattr(self, attr) self.attrs[attr] = getattr(self, attr)
def __unicode__(self): def __str__(self):
s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id)
if hasattr(self, 'textstyle_id'): if hasattr(self, 'textstyle_id'):
s += 'textstyle="%d" '%(self.textstyle_id,) s += 'textstyle="%d" '%(self.textstyle_id,)
@ -625,6 +641,9 @@ class Block(LRFStream, TextCSS):
return s return s
return s.rstrip() + ' />\n' return s.rstrip() + ' />\n'
if not ispy3:
__unicode__ = __str__
def to_html(self): def to_html(self):
if self.name == 'TextBlock': if self.name == 'TextBlock':
return u'<div class="block%s text%s">%s</div>'%(self.style_id, self.textstyle_id, self.content.to_html()) return u'<div class="block%s text%s">%s</div>'%(self.style_id, self.textstyle_id, self.content.to_html())
@ -697,12 +716,15 @@ class Text(LRFStream):
self.attrs = attrs self.attrs = attrs
self.self_closing = self_closing self.self_closing = self_closing
def __unicode__(self): def __str__(self):
s = u'<%s '%(self.name,) s = u'<%s '%(self.name,)
for name, val in self.attrs.items(): for name, val in self.attrs.items():
s += '%s="%s" '%(name, val) s += '%s="%s" '%(name, val)
return s.rstrip() + (u' />' if self.self_closing else u'>') return s.rstrip() + (u' />' if self.self_closing else u'>')
if not ispy3:
__unicode__ = __str__
def to_html(self): def to_html(self):
s = u'' s = u''
return s return s
@ -878,7 +900,7 @@ class Text(LRFStream):
self.close_containers() self.close_containers()
self.stream = None self.stream = None
def __unicode__(self): def __str__(self):
s = u'' s = u''
open_containers = collections.deque() open_containers = collections.deque()
for c in self.content: for c in self.content:
@ -900,6 +922,9 @@ class Text(LRFStream):
raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
return s return s
if not ispy3:
__unicode__ = __str__
def to_html(self): def to_html(self):
s = u'' s = u''
open_containers = collections.deque() open_containers = collections.deque()
@ -944,10 +969,13 @@ class Image(LRFObject):
encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding) encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding)
data = property(fget=lambda self : self._document.objects[self.refstream].stream) data = property(fget=lambda self : self._document.objects[self.refstream].stream)
def __unicode__(self): def __str__(self):
return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\ return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
(self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
if not ispy3:
__unicode__ = __str__
class PutObj(EmptyPageElement): class PutObj(EmptyPageElement):
@ -955,9 +983,12 @@ class PutObj(EmptyPageElement):
self.x1, self.y1, self.refobj = x1, y1, refobj self.x1, self.y1, self.refobj = x1, y1, refobj
self.object = objects[refobj] self.object = objects[refobj]
def __unicode__(self): def __str__(self):
return u'<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj) return u'<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj)
if not ispy3:
__unicode__ = __str__
class Canvas(LRFStream): class Canvas(LRFStream):
tag_map = { tag_map = {
@ -996,7 +1027,7 @@ class Canvas(LRFStream):
except struct.error: except struct.error:
print('Canvas object has errors, skipping.') print('Canvas object has errors, skipping.')
def __unicode__(self): def __str__(self):
s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,) s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)
for attr in self.attrs: for attr in self.attrs:
s += '%s="%s" '%(attr, self.attrs[attr]) s += '%s="%s" '%(attr, self.attrs[attr])
@ -1006,6 +1037,9 @@ class Canvas(LRFStream):
s += '</%s>\n'%(self.__class__.__name__,) s += '</%s>\n'%(self.__class__.__name__,)
return s return s
if not ispy3:
__unicode__ = __str__
def __iter__(self): def __iter__(self):
for i in self._contents: for i in self._contents:
yield i yield i
@ -1039,10 +1073,13 @@ class ImageStream(LRFStream):
if self._document is not None: if self._document is not None:
self._document.image_map[self.id] = self self._document.image_map[self.id] = self
def __unicode__(self): def __str__(self):
return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\ return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
(self.id, self.encoding, self.file) (self.id, self.encoding, self.file)
if not ispy3:
__unicode__ = __str__
class Import(LRFStream): class Import(LRFStream):
pass pass
@ -1118,7 +1155,7 @@ class Button(LRFObject):
return i[1:][0] return i[1:][0]
return (None, None) return (None, None)
def __unicode__(self): def __str__(self):
s = u'<Button objid="%s">\n'%(self.id,) s = u'<Button objid="%s">\n'%(self.id,)
if self.button_flags & 0x10 != 0: if self.button_flags & 0x10 != 0:
s += '<PushButton ' s += '<PushButton '
@ -1132,6 +1169,9 @@ class Button(LRFObject):
s += '</Button>\n' s += '</Button>\n'
return s return s
if not ispy3:
__unicode__ = __str__
refpage = property(fget=lambda self : self.jump_action(2)[0]) refpage = property(fget=lambda self : self.jump_action(2)[0])
refobj = property(fget=lambda self : self.jump_action(2)[1]) refobj = property(fget=lambda self : self.jump_action(2)[1])
@ -1192,7 +1232,7 @@ class BookAttr(StyleObject, LRFObject):
def add_font(self, tag, f): def add_font(self, tag, f):
self.font_link_list.append(tag.dword) self.font_link_list.append(tag.dword)
def __unicode__(self): def __str__(self):
s = u'<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id) s = u'<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id)
s += u'<SetDefault %s />\n'%(self._tags_to_xml(),) s += u'<SetDefault %s />\n'%(self._tags_to_xml(),)
doc = self._document doc = self._document
@ -1203,6 +1243,9 @@ class BookAttr(StyleObject, LRFObject):
s += '</BookStyle>\n' s += '</BookStyle>\n'
return s return s
if not ispy3:
__unicode__ = __str__
class SimpleText(Text): class SimpleText(Text):
pass pass
@ -1213,9 +1256,12 @@ class TocLabel(object):
def __init__(self, refpage, refobject, label): def __init__(self, refpage, refobject, label):
self.refpage, self.refobject, self.label = refpage, refobject, label self.refpage, self.refobject, self.label = refpage, refobject, label
def __unicode__(self): def __str__(self):
return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label) return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
if not ispy3:
__unicode__ = __str__
class TOCObject(LRFStream): class TOCObject(LRFStream):
@ -1237,12 +1283,15 @@ class TOCObject(LRFStream):
for i in self._contents: for i in self._contents:
yield i yield i
def __unicode__(self): def __str__(self):
s = u'<TOC>\n' s = u'<TOC>\n'
for i in self: for i in self:
s += unicode_type(i) s += unicode_type(i)
return s + '</TOC>\n' return s + '</TOC>\n'
if not ispy3:
__unicode__ = __str__
object_map = [ object_map = [
None, # 00 None, # 00

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import copy, traceback import copy, traceback
from calibre import prints from calibre import prints
from calibre.constants import DEBUG from calibre.constants import DEBUG, ispy3
from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS, from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS, SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS) TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
@ -709,7 +709,7 @@ class Metadata(object):
return (None, None, None, None) return (None, None, None, None)
def __unicode__(self): def __unicode__representation__(self):
''' '''
A string representation of this object, suitable for printing to A string representation of this object, suitable for printing to
console console
@ -791,11 +791,17 @@ class Metadata(object):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans) return u'<table>%s</table>'%u'\n'.join(ans)
def __str__(self): if ispy3:
return self.__unicode__().encode('utf-8') __str__ = __unicode__representation__
else:
__unicode__ = __unicode__representation__
def __str__(self):
return self.__unicode__().encode('utf-8')
def __nonzero__(self): def __nonzero__(self):
return bool(self.title or self.author or self.comments or self.tags) return bool(self.title or self.author or self.comments or self.tags)
__bool__ = __nonzero__
# }}} # }}}

View File

@ -115,7 +115,7 @@ def get_metadata(stream):
if book_title: if book_title:
book_title = unicode_type(book_title) book_title = unicode_type(book_title)
else: else:
book_title = force_unicode_type(os.path.splitext( book_title = force_unicode(os.path.splitext(
os.path.basename(getattr(stream, 'name', os.path.basename(getattr(stream, 'name',
_('Unknown'))))[0]) _('Unknown'))))[0])
mi = MetaInformation(book_title, authors) mi = MetaInformation(book_title, authors)

View File

@ -15,7 +15,7 @@ from urlparse import urlparse
from lxml import etree from lxml import etree
from calibre.ebooks import escape_xpath_attr from calibre.ebooks import escape_xpath_attr
from calibre.constants import __appname__, __version__, filesystem_encoding from calibre.constants import __appname__, __version__, filesystem_encoding, ispy3
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf as _pretty_print from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf as _pretty_print
from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_isbn from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_isbn
@ -73,7 +73,7 @@ class Resource(object): # {{{
path = href_or_path path = href_or_path
if not os.path.isabs(path): if not os.path.isabs(path):
path = os.path.abspath(os.path.join(basedir, path)) path = os.path.abspath(os.path.join(basedir, path))
if isinstance(path, str): if isinstance(path, bytes):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
self.path = path self.path = path
else: else:
@ -112,8 +112,8 @@ class Resource(object): # {{{
rpath = os.path.relpath(self.path, basedir) rpath = os.path.relpath(self.path, basedir)
except ValueError: # On windows path and basedir could be on different drives except ValueError: # On windows path and basedir could be on different drives
rpath = self.path rpath = self.path
if isinstance(rpath, unicode_type): if isinstance(rpath, bytes):
rpath = rpath.encode('utf-8') rpath = rpath.decode(filesystem_encoding)
return rpath.replace(os.sep, '/')+frag return rpath.replace(os.sep, '/')+frag
def set_basedir(self, path): def set_basedir(self, path):
@ -203,11 +203,16 @@ class ManifestItem(Resource): # {{{
self.mime_type = val self.mime_type = val
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
def __unicode__(self): def __unicode__representation__(self):
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type) return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
def __str__(self): if ispy3:
return unicode_type(self).encode('utf-8') __str__ = __unicode__representation__
else:
__unicode__ = __unicode__representation__
def __str__(self):
return unicode_type(self).encode('utf-8')
def __repr__(self): def __repr__(self):
return unicode_type(self) return unicode_type(self)

View File

@ -229,7 +229,9 @@ class MobiMLizer(object):
while vspace > 0: while vspace > 0:
wrapper.addprevious(etree.Element(XHTML('br'))) wrapper.addprevious(etree.Element(XHTML('br')))
vspace -= 1 vspace -= 1
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)): if istate.halign != 'auto' and isinstance(istate.halign, (bytes, unicode_type)):
if isinstance(istate.halign, bytes):
istate.halign = istate.halign.decode('utf-8')
para.attrib['align'] = istate.halign para.attrib['align'] = istate.halign
istate.rendered = True istate.rendered = True
pstate = bstate.istate pstate = bstate.istate
@ -568,17 +570,17 @@ class MobiMLizer(object):
self.opts.mobi_ignore_margins = False self.opts.mobi_ignore_margins = False
if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or ( if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
# We have an id but no text and no children, the id should still # We have an id but no text and no children, the id should still
# be added. # be added.
istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and
len(elem)==0)): len(elem)==0)):
if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib: if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib:
try: try:
value = int(elem.attrib['value']) value = int(elem.attrib['value'])
istates[-2].list_num = value - 1 istates[-2].list_num = value - 1
except: except:
pass pass
self.mobimlize_content(tag, text, bstate, istates) self.mobimlize_content(tag, text, bstate, istates)
for child in elem: for child in elem:
self.mobimlize_elem(child, stylizer, bstate, istates) self.mobimlize_elem(child, stylizer, bstate, istates)
tail = None tail = None

View File

@ -283,24 +283,29 @@ class MobiReader(object):
ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href'] ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
except AttributeError: except AttributeError:
pass pass
def write_as_utf8(path, data):
if isinstance(data, unicode_type):
data = data.encode('utf-8')
with lopen(path, 'wb') as f:
f.write(data)
parse_cache[htmlfile] = root parse_cache[htmlfile] = root
self.htmlfile = htmlfile self.htmlfile = htmlfile
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf' self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
opf.render(open(self.created_opf_path, 'wb'), ncx, opf.render(lopen(self.created_opf_path, 'wb'), ncx,
ncx_manifest_entry=ncx_manifest_entry) ncx_manifest_entry=ncx_manifest_entry)
ncx = ncx.getvalue() ncx = ncx.getvalue()
if ncx: if ncx:
ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx') ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
open(ncx_path, 'wb').write(ncx) write_as_utf8(ncx_path, ncx)
with open('styles.css', 'wb') as s: css = [self.base_css_rules, '\n\n']
s.write(self.base_css_rules + '\n\n') for cls, rule in self.tag_css_rules.items():
for cls, rule in self.tag_css_rules.items(): css.append('.%s { %s }\n\n' % (cls, rule))
if isinstance(rule, unicode_type): write_as_utf8('styles.css', ''.join(css))
rule = rule.encode('utf-8')
s.write('.%s { %s }\n\n' % (cls, rule))
if self.book_header.exth is not None or self.embedded_mi is not None: if self.book_header.exth is not None or self.embedded_mi is not None:
self.log.debug('Creating OPF...') self.log.debug('Creating OPF...')
@ -310,7 +315,7 @@ class MobiReader(object):
ncx_manifest_entry) ncx_manifest_entry)
ncx = ncx.getvalue() ncx = ncx.getvalue()
if ncx: if ncx:
open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx) write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx)
def read_embedded_metadata(self, root, elem, guide): def read_embedded_metadata(self, root, elem, guide):
raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \ raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
@ -423,24 +428,25 @@ class MobiReader(object):
styles.append(style) styles.append(style)
if 'height' in attrib: if 'height' in attrib:
height = attrib.pop('height').strip() height = attrib.pop('height').strip()
if height and '<' not in height and '>' not in height and \ if (
re.search(r'\d+', height): height and '<' not in height and '>' not in height and
if tag.tag in ('table', 'td', 'tr'): re.search(r'\d+', height)):
pass if tag.tag in ('table', 'td', 'tr'):
elif tag.tag == 'img': pass
tag.set('height', height) elif tag.tag == 'img':
tag.set('height', height)
else:
if tag.tag == 'div' and not tag.text and \
(not tag.tail or not tag.tail.strip()) and \
not len(list(tag.iterdescendants())):
# Paragraph spacer
# Insert nbsp so that the element is never
# discarded by a renderer
tag.text = u'\u00a0' # nbsp
styles.append('height: %s' %
self.ensure_unit(height))
else: else:
if tag.tag == 'div' and not tag.text and \ styles.append('margin-top: %s' % self.ensure_unit(height))
(not tag.tail or not tag.tail.strip()) and \
not len(list(tag.iterdescendants())):
# Paragraph spacer
# Insert nbsp so that the element is never
# discarded by a renderer
tag.text = u'\u00a0' # nbsp
styles.append('height: %s' %
self.ensure_unit(height))
else:
styles.append('margin-top: %s' % self.ensure_unit(height))
if 'width' in attrib: if 'width' in attrib:
width = attrib.pop('width').strip() width = attrib.pop('width').strip()
if width and re.search(r'\d+', width): if width and re.search(r'\d+', width):
@ -837,11 +843,10 @@ class MobiReader(object):
anchor = '<a id="filepos%d"></a>' anchor = '<a id="filepos%d"></a>'
if r > -1 and (r < l or l == end or l == -1): if r > -1 and (r < l or l == end or l == -1):
p = self.mobi_html.rfind('<', 0, end + 1) p = self.mobi_html.rfind('<', 0, end + 1)
if pos < end and p > -1 and \ if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and
not end_tag_re.match(self.mobi_html[p:r]) and \ not self.mobi_html[p:r + 1].endswith('/>')):
not self.mobi_html[p:r + 1].endswith('/>'): anchor = ' filepos-id="filepos%d"'
anchor = ' filepos-id="filepos%d"' end = r
end = r
else: else:
end = r + 1 end = r + 1
processed_html.write(self.mobi_html[pos:end] + (anchor % oend)) processed_html.write(self.mobi_html[pos:end] + (anchor % oend))

View File

@ -1,23 +1,32 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, from __future__ import absolute_import, division, print_function, unicode_literals
print_function)
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, unicodedata
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS, import re
namespace, prefixname, urlnormalize) import unicodedata
from collections import defaultdict
from io import BytesIO
from urlparse import urldefrag
from calibre.ebooks.mobi.mobiml import MBP_NS from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.mobi.utils import is_guide_ref_start from calibre.ebooks.mobi.utils import is_guide_ref_start
from calibre.ebooks.oeb.base import (
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
)
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
from collections import defaultdict
from urlparse import urldefrag class Buf(BytesIO):
from cStringIO import StringIO
def write(self, x):
if isinstance(x, unicode_type):
x = x.encode('utf-8')
BytesIO.write(self, x)
class Serializer(object): class Serializer(object):
@ -116,7 +125,7 @@ class Serializer(object):
''' '''
Return the document serialized as a single UTF-8 encoded bytestring. Return the document serialized as a single UTF-8 encoded bytestring.
''' '''
buf = self.buf = StringIO() buf = self.buf = Buf()
buf.write(b'<html>') buf.write(b'<html>')
self.serialize_head() self.serialize_head()
self.serialize_body() self.serialize_body()
@ -214,22 +223,22 @@ class Serializer(object):
# if href is provided add a link ref to the toc level output (e.g. feed_0/index.html) # if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
if href is not None: if href is not None:
# resolve the section url in id_offsets # resolve the section url in id_offsets
buf.write('<mbp:pagebreak />') buf.write(b'<mbp:pagebreak />')
self.id_offsets[urlnormalize(href)] = buf.tell() self.id_offsets[urlnormalize(href)] = buf.tell()
if tocref.klass == "periodical": if tocref.klass == "periodical":
buf.write('<div> <div height="1em"></div>') buf.write(b'<div> <div height="1em"></div>')
else: else:
t = tocref.title t = tocref.title
if isinstance(t, unicode_type): if isinstance(t, unicode_type):
t = t.encode('utf-8') t = t.encode('utf-8')
buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t + buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
'</b></font></h2> <div height="1em"></div>') b'</b></font></h2> <div height="1em"></div>')
buf.write('<ul>') buf.write(b'<ul>')
for tocitem in tocref.nodes: for tocitem in tocref.nodes:
buf.write('<li><a filepos=') buf.write(b'<li><a filepos=')
itemhref = tocitem.href itemhref = tocitem.href
if tocref.klass == 'periodical': if tocref.klass == 'periodical':
# This is a section node. # This is a section node.
@ -238,15 +247,15 @@ class Serializer(object):
# so we change the href. # so we change the href.
itemhref = re.sub(r'article_\d+/', '', itemhref) itemhref = re.sub(r'article_\d+/', '', itemhref)
self.href_offsets[itemhref].append(buf.tell()) self.href_offsets[itemhref].append(buf.tell())
buf.write('0000000000') buf.write(b'0000000000')
buf.write(' ><font size="+1"><b><u>') buf.write(b' ><font size="+1"><b><u>')
t = tocitem.title t = tocitem.title
if isinstance(t, unicode_type): if isinstance(t, unicode_type):
t = t.encode('utf-8') t = t.encode('utf-8')
buf.write(t) buf.write(t)
buf.write('</u></b></font></a></li>') buf.write(b'</u></b></font></a></li>')
buf.write('</ul><div height="1em"></div></div><mbp:pagebreak />') buf.write(b'</ul><div height="1em"></div></div><mbp:pagebreak />')
self.anchor_offset = buf.tell() self.anchor_offset = buf.tell()
buf.write(b'<body>') buf.write(b'<body>')
@ -350,7 +359,7 @@ class Serializer(object):
if child.tail: if child.tail:
self.anchor_offset = None self.anchor_offset = None
self.serialize_text(child.tail) self.serialize_text(child.tail)
buf.write(b'</%s>' % tag.encode('utf-8')) buf.write(('</%s>' % tag).encode('utf-8'))
def serialize_text(self, text, quot=False): def serialize_text(self, text, quot=False):
text = text.replace('&', '&amp;') text = text.replace('&', '&amp;')
@ -384,4 +393,4 @@ class Serializer(object):
self.start_offset = ioff self.start_offset = ioff
for hoff in hoffs: for hoff in hoffs:
buf.seek(hoff) buf.seek(hoff)
buf.write(b'%010d' % ioff) buf.write(('%010d' % ioff).encode('utf-8'))

View File

@ -13,7 +13,7 @@ from urlparse import urldefrag, urlparse, urlunparse, urljoin
from urllib import unquote from urllib import unquote
from lxml import etree, html from lxml import etree, html
from calibre.constants import filesystem_encoding, __version__ from calibre.constants import filesystem_encoding, __version__, ispy3
from calibre.translations.dynamic import translate from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.conversion.preprocess import CSSPreProcessor from calibre.ebooks.conversion.preprocess import CSSPreProcessor
@ -107,13 +107,35 @@ self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b
'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var', 'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
'video', 'title', 'script', 'style'} 'video', 'title', 'script', 'style'}
_self_closing_pat = re.compile(
r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(self_closing_bad_tags)), def as_string_type(pat, for_unicode):
re.IGNORECASE) if for_unicode:
if isinstance(pat, bytes):
pat = pat.decode('utf-8')
else:
if isinstance(pat, unicode_type):
pat = pat.encode('utf-8')
return pat
def self_closing_pat(for_unicode):
attr = 'unicode_ans' if for_unicode else 'bytes_ans'
ans = getattr(self_closing_pat, attr, None)
if ans is None:
sub = '|'.join(self_closing_bad_tags)
template = r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'
pat = template % sub
pat = as_string_type(pat, for_unicode)
ans = re.compile(pat, flags=re.IGNORECASE)
setattr(self_closing_pat, attr, ans)
return ans
def close_self_closing_tags(raw): def close_self_closing_tags(raw):
return _self_closing_pat.sub(r'<\g<tag>\g<arg>></\g<tag>>', raw) for_unicode = isinstance(raw, unicode_type)
repl = as_string_type(r'<\g<tag>\g<arg>></\g<tag>>', for_unicode)
pat = self_closing_pat(for_unicode)
return pat.sub(repl, raw)
def uuid_id(): def uuid_id():
@ -745,11 +767,15 @@ class Metadata(object):
return 'Item(term=%r, value=%r, attrib=%r)' \ return 'Item(term=%r, value=%r, attrib=%r)' \
% (barename(self.term), self.value, self.attrib) % (barename(self.term), self.value, self.attrib)
def __str__(self): if ispy3:
return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace') def __str__(self):
return as_unicode(self.value)
else:
def __str__(self):
return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace')
def __unicode__(self): def __unicode__(self):
return as_unicode(self.value) return as_unicode(self.value)
def to_opf1(self, dcmeta=None, xmeta=None, nsrmap={}): def to_opf1(self, dcmeta=None, xmeta=None, nsrmap={}):
attrib = {} attrib = {}
@ -1075,19 +1101,27 @@ class Manifest(object):
self._loader = loader2 self._loader = loader2
self._data = None self._data = None
def __str__(self): @property
return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print) def unicode_representation(self):
def __unicode__(self):
data = self.data data = self.data
if isinstance(data, etree._Element): if isinstance(data, etree._Element):
return xml2unicode(data, pretty_print=self.oeb.pretty_print) return xml2unicode(data, pretty_print=self.oeb.pretty_print)
if isinstance(data, unicode_type): if isinstance(data, unicode_type):
return data return data
if hasattr(data, 'cssText'): if hasattr(data, 'cssText'):
return data.cssText return unicode_type(data.cssText, 'utf-8', 'replace')
return unicode_type(data) return unicode_type(data)
if ispy3:
def __str__(self):
return self.unicode_representation
else:
def __unicode__(self):
return self.unicode_representation
def __str__(self):
return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print)
def __eq__(self, other): def __eq__(self, other):
return id(self) == id(other) return id(self) == id(other)
@ -1616,11 +1650,15 @@ class TOC(object):
ans.extend(child.get_lines(lvl+1)) ans.extend(child.get_lines(lvl+1))
return ans return ans
def __str__(self): if ispy3:
return b'\n'.join([x.encode('utf-8') for x in self.get_lines()]) def __str__(self):
return u'\n'.join(self.get_lines())
else:
def __unicode__(self):
return u'\n'.join(self.get_lines())
def __unicode__(self): def __str__(self):
return u'\n'.join(self.get_lines()) return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
def to_opf1(self, tour): def to_opf1(self, tour):
for node in self.nodes: for node in self.nodes:

View File

@ -53,7 +53,7 @@ class SpineItem(unicode_type):
if not os.path.exists(path) and os.path.exists(ppath): if not os.path.exists(path) and os.path.exists(ppath):
path = ppath path = ppath
obj = super(SpineItem, cls).__new__(cls, path) obj = super(SpineItem, cls).__new__(cls, path)
with open(path, 'rb') as f: with lopen(path, 'rb') as f:
raw = f.read() raw = f.read()
if from_epub: if from_epub:
# According to the spec, HTML in EPUB must be encoded in utf-8 or # According to the spec, HTML in EPUB must be encoded in utf-8 or

View File

@ -99,7 +99,7 @@ def html5_parse(data, max_nesting_depth=100):
# Check that the asinine HTML 5 algorithm did not result in a tree with # Check that the asinine HTML 5 algorithm did not result in a tree with
# insane nesting depths # insane nesting depths
for x in data.iterdescendants(): for x in data.iterdescendants():
if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node if isinstance(x.tag, basestring) and not len(x): # Leaf node
depth = node_depth(x) depth = node_depth(x)
if depth > max_nesting_depth: if depth > max_nesting_depth:
raise ValueError('HTML 5 parsing resulted in a tree with nesting' raise ValueError('HTML 5 parsing resulted in a tree with nesting'
@ -259,7 +259,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
nroot = etree.fromstring('<html></html>') nroot = etree.fromstring('<html></html>')
has_body = False has_body = False
for child in list(data): for child in list(data):
if isinstance(child.tag, (unicode_type, str)) and barename(child.tag) == 'body': if isinstance(child.tag, (unicode_type, bytes)) and barename(child.tag) == 'body':
has_body = True has_body = True
break break
parent = nroot parent = nroot

View File

@ -607,12 +607,12 @@ class Style(object):
result = base result = base
else: else:
result = self._unit_convert(width, base=base) result = self._unit_convert(width, base=base)
if isinstance(result, (unicode_type, str, bytes)): if isinstance(result, (unicode_type, bytes)):
result = self._profile.width result = self._profile.width
self._width = result self._width = result
if 'max-width' in self._style: if 'max-width' in self._style:
result = self._unit_convert(self._style['max-width'], base=base) result = self._unit_convert(self._style['max-width'], base=base)
if isinstance(result, (unicode_type, str, bytes)): if isinstance(result, (unicode_type, bytes)):
result = self._width result = self._width
if result < self._width: if result < self._width:
self._width = result self._width = result
@ -644,12 +644,12 @@ class Style(object):
result = base result = base
else: else:
result = self._unit_convert(height, base=base) result = self._unit_convert(height, base=base)
if isinstance(result, (unicode_type, str, bytes)): if isinstance(result, (unicode_type, bytes)):
result = self._profile.height result = self._profile.height
self._height = result self._height = result
if 'max-height' in self._style: if 'max-height' in self._style:
result = self._unit_convert(self._style['max-height'], base=base) result = self._unit_convert(self._style['max-height'], base=base)
if isinstance(result, (unicode_type, str, bytes)): if isinstance(result, (unicode_type, bytes)):
result = self._height result = self._height
if result < self._height: if result < self._height:
self._height = result self._height = result

View File

@ -15,7 +15,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.conversion.preprocess import DocAnalysis from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type, map, range
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>' HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
@ -55,7 +55,7 @@ def split_txt(txt, epub_split_size_kb=0):
result in the entire document being one giant result in the entire document being one giant
paragraph. In this case the EPUB parser will not paragraph. In this case the EPUB parser will not
be able to determine where to split the file be able to determine where to split the file
to accomidate the EPUB file size limitation to accommodate the EPUB file size limitation
and will fail. and will fail.
''' '''
# Takes care if there is no point to split # Takes care if there is no point to split
@ -66,9 +66,12 @@ def split_txt(txt, epub_split_size_kb=0):
# Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin) # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2)) chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
# if there are chunks with a superior size then go and break # if there are chunks with a superior size then go and break
if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) : parts = txt.split(b'\n\n')
txt = '\n\n'.join([split_string_separator(line, chunk_size) lengths = tuple(map(len, parts))
for line in txt.split('\n\n')]) if lengths and max(lengths) > chunk_size:
txt = b'\n\n'.join([
split_string_separator(line, chunk_size) for line in parts
])
if isbytestring(txt): if isbytestring(txt):
txt = txt.decode('utf-8') txt = txt.decode('utf-8')
@ -227,7 +230,7 @@ def opf_writer(path, opf_name, manifest, spine, mi):
opf = OPFCreator(path, mi) opf = OPFCreator(path, mi)
opf.create_manifest(manifest) opf.create_manifest(manifest)
opf.create_spine(spine) opf.create_spine(spine)
with open(os.path.join(path, opf_name), 'wb') as opffile: with lopen(os.path.join(path, opf_name), 'wb') as opffile:
opf.render(opffile) opf.render(opffile)
@ -236,9 +239,16 @@ def split_string_separator(txt, size):
Splits the text by putting \n\n at the point size. Splits the text by putting \n\n at the point size.
''' '''
if len(txt) > size: if len(txt) > size:
txt = ''.join([re.sub(type(u'')(r'\.(?P<ends>[^.]*)$'), r'.\n\n\g<ends>', size -= 2
txt[i:i+size], 1) for i in txt = []
xrange(0, len(txt), size)]) for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
idx = part.rfind('.')
if idx == -1:
part += b'\n\n'
else:
part = part[:idx + 1] + b'\n\n' + part[idx:]
txt.append(part)
txt = b''.join(txt)
return txt return txt

View File

@ -19,8 +19,6 @@ Tranliterate the string from unicode characters to ASCII in Chinese and others.
''' '''
import unicodedata import unicodedata
from calibre.constants import ispy3
class Unihandecoder(object): class Unihandecoder(object):
preferred_encoding = None preferred_encoding = None
@ -43,15 +41,11 @@ class Unihandecoder(object):
self.decoder = Unidecoder() self.decoder = Unidecoder()
def decode(self, text): def decode(self, text):
if not ispy3: if isinstance(text, bytes):
if not isinstance(text, unicode): try:
try: text = text.decode(self.preferred_encoding)
text = unicode(text) except Exception:
except: text = text.decode('utf-8', 'replace')
try:
text = text.decode(self.preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
# at first unicode normalize it. (see Unicode standards) # at first unicode normalize it. (see Unicode standards)
ntext = unicodedata.normalize('NFKC', text) ntext = unicodedata.normalize('NFKC', text)
return self.decoder.decode(ntext) return self.decoder.decode(ntext)

View File

@ -4,7 +4,7 @@
# Copyright 2011 Hiroshi Miura <miurahr@linux.com> # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
from zlib import decompress from zlib import decompress
from calibre.constants import ispy3 from polyglot.builtins import unicode_type
class jisyo (object): class jisyo (object):
@ -34,8 +34,8 @@ class jisyo (object):
P('localization/pykakasi/kanadict2.calibre_msgpack', data=True)) P('localization/pykakasi/kanadict2.calibre_msgpack', data=True))
def load_jisyo(self, char): def load_jisyo(self, char):
if not ispy3: if not isinstance(char, unicode_type):
char = unicode(char) char = unicode_type(char, 'utf-8')
key = "%04x"%ord(char) key = "%04x"%ord(char)
try: # already exist? try: # already exist?

View File

@ -60,9 +60,9 @@ it under the same terms as Perl itself.
''' '''
import re import re
from calibre.constants import ispy3
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
from polyglot.builtins import unicode_type
class Unidecoder(object): class Unidecoder(object):
@ -95,8 +95,8 @@ class Unidecoder(object):
Find what group character is a part of. Find what group character is a part of.
''' '''
# Code groups withing CODEPOINTS take the form 'xAB' # Code groups withing CODEPOINTS take the form 'xAB'
if not ispy3: if not isinstance(character, unicode_type):
character = unicode(character) character = unicode_type(character, "utf-8")
return 'x%02x' % (ord(character) >> 8) return 'x%02x' % (ord(character) >> 8)
def grouped_point(self, character): def grouped_point(self, character):
@ -104,6 +104,6 @@ class Unidecoder(object):
Return the location the replacement character is in the list for a Return the location the replacement character is in the list for a
the group character is a part of. the group character is a part of.
''' '''
if not ispy3: if not isinstance(character, unicode_type):
character = unicode(character) character = unicode_type(character, "utf-8")
return ord(character) & 255 return ord(character) & 255

View File

@ -347,7 +347,7 @@ class EditorWidget(QWebView, LineEditECM): # {{{
return unicode_type(self.page().mainFrame().toHtml()) return unicode_type(self.page().mainFrame().toHtml())
check = unicode_type(self.page().mainFrame().toPlainText()).strip() check = unicode_type(self.page().mainFrame().toPlainText()).strip()
raw = unicode_type(self.page().mainFrame().toHtml()) raw = unicode_type(self.page().mainFrame().toHtml())
raw = xml_to_unicode_type(raw, strip_encoding_pats=True, raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0] resolve_entities=True)[0]
raw = self.comments_pat.sub('', raw) raw = self.comments_pat.sub('', raw)
if not check and '<img' not in raw.lower(): if not check and '<img' not in raw.lower():

View File

@ -201,7 +201,7 @@ class MenuExampleWindow(Gtk.ApplicationWindow):
def convert(v): def convert(v):
if isinstance(v, basestring): if isinstance(v, (unicode_type, bytes)):
return unicode_type(v) return unicode_type(v)
if isinstance(v, dbus.Struct): if isinstance(v, dbus.Struct):
return tuple(convert(val) for val in v) return tuple(convert(val) for val in v)
@ -309,6 +309,7 @@ class MyApplication(Gtk.Application):
def do_startup(self): def do_startup(self):
Gtk.Application.do_startup(self) Gtk.Application.do_startup(self)
app = MyApplication(application_id='com.calibre-ebook.test-gtk') app = MyApplication(application_id='com.calibre-ebook.test-gtk')
signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGINT, signal.SIG_DFL)
sys.exit(app.run(sys.argv)) sys.exit(app.run(sys.argv))

View File

@ -43,7 +43,7 @@ class TableItem(QTableWidgetItem):
# self is not None and other is None therefore self >= other # self is not None and other is None therefore self >= other
return True return True
if isinstance(self.sort, (str, unicode_type)): if isinstance(self.sort, (bytes, unicode_type)):
l = sort_key(self.sort) l = sort_key(self.sort)
r = sort_key(other.sort) r = sort_key(other.sort)
else: else:
@ -66,7 +66,7 @@ class TableItem(QTableWidgetItem):
# self is not None therefore self > other # self is not None therefore self > other
return False return False
if isinstance(self.sort, (str, unicode_type)): if isinstance(self.sort, (bytes, unicode_type)):
l = sort_key(self.sort) l = sort_key(self.sort)
r = sort_key(other.sort) r = sort_key(other.sort)
else: else:

View File

@ -13,7 +13,7 @@ from PyQt5.Qt import (Qt, QApplication, QStackedWidget, QMenu, QTimer,
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.constants import (isosx, __appname__, preferred_encoding, from calibre.constants import (__appname__, preferred_encoding,
get_version) get_version)
from calibre.gui2 import config, is_widescreen, gprefs, error_dialog, open_url from calibre.gui2 import config, is_widescreen, gprefs, error_dialog, open_url
from calibre.gui2.library.views import BooksView, DeviceBooksView from calibre.gui2.library.views import BooksView, DeviceBooksView
@ -323,11 +323,6 @@ class StatusBar(QStatusBar): # {{{
def show_message(self, msg, timeout=0, show_notification=True): def show_message(self, msg, timeout=0, show_notification=True):
self.showMessage(msg, timeout) self.showMessage(msg, timeout)
if self.notifier is not None and not config['disable_tray_notification'] and show_notification: if self.notifier is not None and not config['disable_tray_notification'] and show_notification:
if isosx and isinstance(msg, unicode_type):
try:
msg = msg.encode(preferred_encoding)
except UnicodeEncodeError:
msg = msg.encode('utf-8')
self.notifier(msg) self.notifier(msg)
def clear_message(self): def clear_message(self):

View File

@ -129,7 +129,7 @@ class LocationManager(QObject): # {{{
had_device = self.has_device had_device = self.has_device
if cp is None: if cp is None:
cp = (None, None) cp = (None, None)
if isinstance(cp, (str, unicode_type)): if isinstance(cp, (bytes, unicode_type)):
cp = (cp, None) cp = (cp, None)
if len(fs) < 3: if len(fs) < 3:
fs = list(fs) + [0] fs = list(fs) + [0]

View File

@ -6,7 +6,7 @@ import sys, logging, os, traceback, time
from PyQt5.Qt import ( from PyQt5.Qt import (
QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon, Qt, QCoreApplication, QThread, QScrollBar) QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon, Qt, QCoreApplication, QThread, QScrollBar)
from calibre import __appname__, setup_cli_handlers, islinux, isbsd from calibre import __appname__, setup_cli_handlers, islinux, isbsd, as_unicode
from calibre.ebooks.lrf.lrfparser import LRFDocument from calibre.ebooks.lrf.lrfparser import LRFDocument
from calibre.gui2 import error_dialog, \ from calibre.gui2 import error_dialog, \
@ -17,7 +17,6 @@ from calibre.gui2.lrf_renderer.config_ui import Ui_ViewerConfig
from calibre.gui2.main_window import MainWindow from calibre.gui2.main_window import MainWindow
from calibre.gui2.lrf_renderer.document import Document from calibre.gui2.lrf_renderer.document import Document
from calibre.gui2.search_box import SearchBox2 from calibre.gui2.search_box import SearchBox2
from polyglot.builtins import unicode_type
class RenderWorker(QThread): class RenderWorker(QThread):
@ -201,7 +200,7 @@ class Main(MainWindow, Ui_MainWindow):
print('Error rendering document', file=sys.stderr) print('Error rendering document', file=sys.stderr)
print(exception, file=sys.stderr) print(exception, file=sys.stderr)
print(self.renderer.formatted_traceback, file=sys.stderr) print(self.renderer.formatted_traceback, file=sys.stderr)
msg = u'<p><b>%s</b>: '%(exception.__class__.__name__,) + unicode_type(str(exception), 'utf8', 'replace') + u'</p>' msg = u'<p><b>%s</b>: '%(exception.__class__.__name__,) + as_unicode(exception) + u'</p>'
msg += u'<p>Failed to render document</p>' msg += u'<p>Failed to render document</p>'
msg += u'<p>Detailed <b>traceback</b>:<pre>' msg += u'<p>Detailed <b>traceback</b>:<pre>'
msg += self.renderer.formatted_traceback + '</pre>' msg += self.renderer.formatted_traceback + '</pre>'

View File

@ -132,7 +132,7 @@ def get_default_library_path():
fname = 'Calibre Library' fname = 'Calibre Library'
if isinstance(fname, unicode_type): if isinstance(fname, unicode_type):
try: try:
fname = fname.encode(filesystem_encoding) fname.encode(filesystem_encoding)
except: except:
fname = 'Calibre Library' fname = 'Calibre Library'
x = os.path.expanduser('~'+os.sep+fname) x = os.path.expanduser('~'+os.sep+fname)

View File

@ -5,14 +5,14 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import StringIO, traceback, sys, gc, weakref import traceback, sys, gc, weakref
from io import BytesIO
from PyQt5.Qt import (QMainWindow, QTimer, QAction, QMenu, QMenuBar, QIcon, from PyQt5.Qt import (QMainWindow, QTimer, QAction, QMenu, QMenuBar, QIcon,
QObject) QObject)
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre import prints from calibre import prints, force_unicode
from polyglot.builtins import unicode_type
def option_parser(usage='''\ def option_parser(usage='''\
@ -134,7 +134,7 @@ class MainWindow(QMainWindow):
if type is KeyboardInterrupt: if type is KeyboardInterrupt:
return return
try: try:
sio = StringIO.StringIO() sio = BytesIO()
try: try:
from calibre.debug import print_basic_debug_info from calibre.debug import print_basic_debug_info
print_basic_debug_info(out=sio) print_basic_debug_info(out=sio)
@ -145,7 +145,8 @@ class MainWindow(QMainWindow):
prints(value.locking_debug_msg, file=sio) prints(value.locking_debug_msg, file=sio)
fe = sio.getvalue() fe = sio.getvalue()
prints(fe, file=sys.stderr) prints(fe, file=sys.stderr)
msg = '<b>%s</b>:'%type.__name__ + unicode_type(str(value), 'utf8', 'replace') fe = force_unicode(fe)
msg = '<b>%s</b>:'%type.__name__ + force_unicode(value)
error_dialog(self, _('Unhandled exception'), msg, det_msg=fe, error_dialog(self, _('Unhandled exception'), msg, det_msg=fe,
show=True) show=True)
except BaseException: except BaseException:

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import time import time
from calibre import prints from calibre import prints
from calibre.constants import islinux, isosx, get_osx_version, DEBUG from calibre.constants import islinux, isosx, get_osx_version, DEBUG, ispy3
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
@ -145,8 +145,12 @@ class AppleNotifier(Notifier):
def notify(self, body, summary): def notify(self, body, summary):
def encode(x): def encode(x):
if isinstance(x, unicode_type): if ispy3:
x = x.encode('utf-8') if isinstance(x, bytes):
x = x.decode('utf-8')
else:
if isinstance(x, unicode_type):
x = x.encode('utf-8')
return x return x
cmd = [self.exe, '-activate', cmd = [self.exe, '-activate',

View File

@ -67,7 +67,7 @@ class SearchDialog(QDialog, Ui_Dialog):
self.setup_store_checks() self.setup_store_checks()
# Set the search query # Set the search query
if isinstance(query, (str, unicode_type)): if isinstance(query, (bytes, unicode_type)):
self.search_edit.setText(query) self.search_edit.setText(query)
elif isinstance(query, dict): elif isinstance(query, dict):
if 'author' in query: if 'author' in query:
@ -233,7 +233,7 @@ class SearchDialog(QDialog, Ui_Dialog):
query = query.replace('<', '') query = query.replace('<', '')
# Remove the prefix. # Remove the prefix.
for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'): for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query) query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, r'\g<a>', query)
query = query.replace('%s:' % loc, '') query = query.replace('%s:' % loc, '')
# Remove the prefix and search text. # Remove the prefix and search text.
for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'): for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):

View File

@ -30,8 +30,8 @@ user_functions = JSONConfig('editor-search-replace-functions')
def compile_code(src, name='<string>'): def compile_code(src, name='<string>'):
if not isinstance(src, unicode_type): if not isinstance(src, unicode_type):
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200]) match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
enc = match.group(1) if match else 'utf-8' enc = match.group(1).decode('utf-8') if match else 'utf-8'
src = src.decode(enc) src = src.decode(enc)
if not src or not src.strip(): if not src or not src.strip():
src = EMPTY_FUNC src = EMPTY_FUNC

View File

@ -9,7 +9,6 @@ import os
from hashlib import sha1 from hashlib import sha1
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
from polyglot.builtins import unicode_type
def find_folders_under(root, db, add_root=True, # {{{ def find_folders_under(root, db, add_root=True, # {{{
@ -106,11 +105,9 @@ class FormatCollection(object): # {{{
def books_in_folder(folder, one_per_folder, # {{{ def books_in_folder(folder, one_per_folder, # {{{
cancel_callback=lambda : False): cancel_callback=lambda : False):
assert not isinstance(folder, unicode_type)
dirpath = os.path.abspath(folder) dirpath = os.path.abspath(folder)
if one_per_folder: if one_per_folder:
formats = set([]) formats = set()
for path in os.listdir(dirpath): for path in os.listdir(dirpath):
if cancel_callback(): if cancel_callback():
return [] return []

View File

@ -19,7 +19,7 @@ from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre import prints from calibre import prints, force_unicode
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
@ -137,7 +137,9 @@ del y, c, n, u
def force_to_bool(val): def force_to_bool(val):
if isinstance(val, (str, unicode_type)): if isinstance(val, (bytes, unicode_type)):
if isinstance(val, bytes):
val = force_unicode(val)
try: try:
val = icu_lower(val) val = icu_lower(val)
if not val: if not val:
@ -348,7 +350,7 @@ class ResultCache(SearchQueryParser): # {{{
if item is None: if item is None:
continue continue
v = item[loc] v = item[loc]
if isinstance(v, (str, unicode_type)): if isinstance(v, (bytes, unicode_type)):
v = parse_date(v) v = parse_date(v)
if v is None or v <= UNDEFINED_DATE: if v is None or v <= UNDEFINED_DATE:
matches.add(item[0]) matches.add(item[0])
@ -359,7 +361,7 @@ class ResultCache(SearchQueryParser): # {{{
if item is None: if item is None:
continue continue
v = item[loc] v = item[loc]
if isinstance(v, (str, unicode_type)): if isinstance(v, (bytes, unicode_type)):
v = parse_date(v) v = parse_date(v)
if v is not None and v > UNDEFINED_DATE: if v is not None and v > UNDEFINED_DATE:
matches.add(item[0]) matches.add(item[0])
@ -371,7 +373,7 @@ class ResultCache(SearchQueryParser): # {{{
(p, relop) = self.date_search_relops[k] (p, relop) = self.date_search_relops[k]
query = query[p:] query = query[p:]
if relop is None: if relop is None:
(p, relop) = self.date_search_relops['='] (p, relop) = self.date_search_relops['=']
if query in self.local_today: if query in self.local_today:
qd = now() qd = now()
@ -403,7 +405,7 @@ class ResultCache(SearchQueryParser): # {{{
if item is None or item[loc] is None: if item is None or item[loc] is None:
continue continue
v = item[loc] v = item[loc]
if isinstance(v, (str, unicode_type)): if isinstance(v, (bytes, unicode_type)):
v = parse_date(v) v = parse_date(v)
if relop(v, qd, field_count): if relop(v, qd, field_count):
matches.add(item[0]) matches.add(item[0])
@ -448,7 +450,7 @@ class ResultCache(SearchQueryParser): # {{{
(p, relop) = self.numeric_search_relops[k] (p, relop) = self.numeric_search_relops[k]
query = query[p:] query = query[p:]
if relop is None: if relop is None:
(p, relop) = self.numeric_search_relops['='] (p, relop) = self.numeric_search_relops['=']
if dt == 'int': if dt == 'int':
cast = lambda x: int(x) cast = lambda x: int(x)

View File

@ -154,7 +154,7 @@ class CSV_XML(CatalogPlugin):
item = u'%.2g' % (item / 2.0) item = u'%.2g' % (item / 2.0)
# Convert HTML to markdown text # Convert HTML to markdown text
if type(item) is unicode_type: if isinstance(item, unicode_type):
opening_tag = re.search('<(\\w+)(\x20|>)', item) opening_tag = re.search('<(\\w+)(\x20|>)', item)
if opening_tag: if opening_tag:
closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item) closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item)
@ -177,7 +177,7 @@ class CSV_XML(CatalogPlugin):
for field in fields: for field in fields:
if field.startswith('#'): if field.startswith('#'):
val = db.get_field(r['id'], field, index_is_id=True) val = db.get_field(r['id'], field, index_is_id=True)
if not isinstance(val, (str, unicode_type)): if not isinstance(val, unicode_type):
val = unicode_type(val) val = unicode_type(val)
item = getattr(E, field.replace('#', '_'))(val) item = getattr(E, field.replace('#', '_'))(val)
record.append(item) record.append(item)
@ -188,7 +188,7 @@ class CSV_XML(CatalogPlugin):
val = r[field] val = r[field]
if not val: if not val:
continue continue
if not isinstance(val, (str, unicode_type)): if not isinstance(val, (bytes, unicode_type)):
if (fm.get(field, {}).get('datatype', None) == if (fm.get(field, {}).get('datatype', None) ==
'rating' and val): 'rating' and val):
val = u'%.2g' % (val / 2.0) val = u'%.2g' % (val / 2.0)

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import json, re import json, re
from functools import partial from functools import partial
from calibre import prints from calibre import prints, force_unicode
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.library.field_metadata import FieldMetadata from calibre.library.field_metadata import FieldMetadata
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
@ -131,7 +131,7 @@ class CustomColumns(object):
if d['is_multiple']: if d['is_multiple']:
if x is None: if x is None:
return [] return []
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
x = x.split(d['multiple_seps']['ui_to_list']) x = x.split(d['multiple_seps']['ui_to_list'])
x = [y.strip() for y in x if y.strip()] x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
@ -142,12 +142,14 @@ class CustomColumns(object):
x.decode(preferred_encoding, 'replace') x.decode(preferred_encoding, 'replace')
def adapt_datetime(x, d): def adapt_datetime(x, d):
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False) x = parse_date(x, assume_utc=False, as_utc=False)
return x return x
def adapt_bool(x, d): def adapt_bool(x, d):
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = force_unicode(x)
x = x.lower() x = x.lower()
if x == 'true': if x == 'true':
x = True x = True
@ -168,7 +170,9 @@ class CustomColumns(object):
def adapt_number(x, d): def adapt_number(x, d):
if x is None: if x is None:
return None return None
if isinstance(x, (str, unicode_type, bytes)): if isinstance(x, (unicode_type, bytes)):
if isinstance(x, bytes):
x = force_unicode(x)
if x.lower() == 'none': if x.lower() == 'none':
return None return None
if d['datatype'] == 'int': if d['datatype'] == 'int':

View File

@ -24,7 +24,7 @@ from calibre.library.custom_columns import CustomColumns
from calibre.library.sqlite import connect, IntegrityError from calibre.library.sqlite import connect, IntegrityError
from calibre.library.prefs import DBPrefs from calibre.library.prefs import DBPrefs
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.constants import preferred_encoding, iswindows, filesystem_encoding from calibre.constants import preferred_encoding, iswindows, filesystem_encoding, ispy3
from calibre.ptempfile import (PersistentTemporaryFile, from calibre.ptempfile import (PersistentTemporaryFile,
base_dir, SpooledTemporaryFile) base_dir, SpooledTemporaryFile)
from calibre.customize.ui import (run_plugins_on_import, from calibre.customize.ui import (run_plugins_on_import,
@ -1754,12 +1754,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.rc = rc self.rc = rc
self.id = id self.id = id
def __str__(self): def __unicode_representation__(self):
return unicode_type(self) return u'n=%s s=%s c=%d rt=%d rc=%d id=%s' % (
self.n, self.s, self.c, self.rt, self.rc, self.id)
def __unicode__(self): if ispy3:
return 'n=%s s=%s c=%d rt=%d rc=%d id=%s'%\ __str__ = __unicode_representation__
(self.n, self.s, self.c, self.rt, self.rc, self.id) else:
__str__ = __unicode__ = __unicode_representation__
def clean_user_categories(self): def clean_user_categories(self):
user_cats = self.prefs.get('user_categories', {}) user_cats = self.prefs.get('user_categories', {})

View File

@ -202,7 +202,7 @@ class Route(object):
raise RouteError('The variable(s) %s are not part of the route: %s' % (','.join(unknown), self.endpoint.route)) raise RouteError('The variable(s) %s are not part of the route: %s' % (','.join(unknown), self.endpoint.route))
def quoted(x): def quoted(x):
if not isinstance(x, unicode_type) and not isinstance(x, bytes): if not isinstance(x, (unicode_type, bytes)):
x = unicode_type(x) x = unicode_type(x)
if isinstance(x, unicode_type): if isinstance(x, unicode_type):
x = x.encode('utf-8') x = x.encode('utf-8')

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR
from functools import partial from functools import partial
from calibre import strftime from calibre import strftime
from calibre.constants import iswindows, isosx, plugins from calibre.constants import iswindows, isosx, plugins, preferred_encoding
from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
from calibre.utils.localization import lcdata from calibre.utils.localization import lcdata
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
@ -101,6 +101,8 @@ def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
from dateutil.parser import parse from dateutil.parser import parse
if not date_string: if not date_string:
return UNDEFINED_DATE return UNDEFINED_DATE
if isinstance(date_string, bytes):
date_string = date_string.decode(preferred_encoding, 'replace')
if default is None: if default is None:
func = datetime.utcnow if assume_utc else datetime.now func = datetime.utcnow if assume_utc else datetime.now
default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0, default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,

View File

@ -132,7 +132,7 @@ class FormatterFunction(object):
def eval_(self, formatter, kwargs, mi, locals, *args): def eval_(self, formatter, kwargs, mi, locals, *args):
ret = self.evaluate(formatter, kwargs, mi, locals, *args) ret = self.evaluate(formatter, kwargs, mi, locals, *args)
if isinstance(ret, (str, unicode_type)): if isinstance(ret, (bytes, unicode_type)):
return ret return ret
if isinstance(ret, list): if isinstance(ret, list):
return ','.join(ret) return ','.join(ret)

View File

@ -253,8 +253,8 @@ def offload_worker(env={}, priority='normal', cwd=None):
def compile_code(src): def compile_code(src):
import re, io import re, io
if not isinstance(src, unicode_type): if not isinstance(src, unicode_type):
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200]) match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
enc = match.group(1) if match else 'utf-8' enc = match.group(1).decode('utf-8') if match else 'utf-8'
src = src.decode(enc) src = src.decode(enc)
# Python complains if there is a coding declaration in a unicode string # Python complains if there is a coding declaration in a unicode string
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE) src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)

View File

@ -32,8 +32,8 @@ def compile_recipe(src):
:return: Recipe class or None, if no such class was found in src :return: Recipe class or None, if no such class was found in src
''' '''
if not isinstance(src, unicode_type): if not isinstance(src, unicode_type):
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200]) match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
enc = match.group(1) if match else 'utf-8' enc = match.group(1).decode('utf-8') if match else 'utf-8'
src = src.decode(enc) src = src.decode(enc)
# Python complains if there is a coding declaration in a unicode string # Python complains if there is a coding declaration in a unicode string
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src.lstrip(u'\ufeff'), flags=re.MULTILINE) src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src.lstrip(u'\ufeff'), flags=re.MULTILINE)

View File

@ -25,6 +25,7 @@ if is_py3:
zip = builtins.__dict__['zip'] zip = builtins.__dict__['zip']
map = builtins.__dict__['map'] map = builtins.__dict__['map']
filter = builtins.__dict__['filter'] filter = builtins.__dict__['filter']
range = builtins.__dict__['range']
codepoint_to_chr = chr codepoint_to_chr = chr
unicode_type = str unicode_type = str
@ -47,6 +48,7 @@ else:
""") """)
from future_builtins import zip, map, filter # noqa from future_builtins import zip, map, filter # noqa
range = xrange
import __builtin__ as builtins import __builtin__ as builtins
codepoint_to_chr = unichr codepoint_to_chr = unichr