From 5b7608983987f1fc782325f392de8a4338d8fe8c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 13 Mar 2019 06:40:38 +0530 Subject: [PATCH] Various py3 related fixes exposed by the unicode patch --- src/calibre/db/backend.py | 18 +- src/calibre/db/categories.py | 15 +- src/calibre/db/search.py | 10 +- src/calibre/db/utils.py | 6 +- src/calibre/db/write.py | 4 + src/calibre/devices/prst1/driver.py | 4 +- .../devices/smart_device_app/driver.py | 6 +- .../ebooks/conversion/plugins/epub_output.py | 7 +- src/calibre/ebooks/lrf/html/convert_from.py | 11 - src/calibre/ebooks/lrf/meta.py | 4 +- src/calibre/ebooks/lrf/objects.py | 103 +- src/calibre/ebooks/metadata/book/base.py | 14 +- src/calibre/ebooks/metadata/fb2.py | 2 +- src/calibre/ebooks/metadata/opf2.py | 19 +- src/calibre/ebooks/mobi/mobiml.py | 26 +- src/calibre/ebooks/mobi/reader/mobi6.py | 67 +- src/calibre/ebooks/mobi/writer2/serializer.py | 51 +- src/calibre/ebooks/oeb/base.py | 74 +- src/calibre/ebooks/oeb/iterator/spine.py | 2 +- src/calibre/ebooks/oeb/parse_utils.py | 4 +- src/calibre/ebooks/oeb/stylizer.py | 8 +- src/calibre/ebooks/txt/processor.py | 28 +- src/calibre/ebooks/unihandecode/__init__.py | 16 +- .../ebooks/unihandecode/pykakasi/jisyo.py | 6 +- src/calibre/ebooks/unihandecode/unidecoder.py | 10 +- src/calibre/gui2/comments_editor.py | 2 +- src/calibre/gui2/dbus_export/gtk.py | 3 +- src/calibre/gui2/dialogs/quickview.py | 4 +- src/calibre/gui2/init.py | 7 +- src/calibre/gui2/layout.py | 2 +- src/calibre/gui2/lrf_renderer/main.py | 5 +- src/calibre/gui2/main.py | 2 +- src/calibre/gui2/main_window.py | 11 +- src/calibre/gui2/notify.py | 10 +- src/calibre/gui2/store/search/search.py | 4 +- .../gui2/tweak_book/function_replace.py | 4 +- src/calibre/library/add_to_library.py | 5 +- src/calibre/library/caches.py | 16 +- src/calibre/library/catalogs/csv_xml.py | 6 +- src/calibre/library/custom_columns.py | 14 +- src/calibre/library/database2.py | 14 +- src/calibre/srv/routes.py | 2 +- src/calibre/utils/apsw_shell.py | 2958 ----------------- src/calibre/utils/date.py | 4 +- src/calibre/utils/formatter_functions.py | 2 +- src/calibre/utils/ipc/simple_worker.py | 4 +- src/calibre/web/feeds/recipes/__init__.py | 4 +- src/polyglot/builtins.py | 2 + 48 files changed, 390 insertions(+), 3210 deletions(-) delete mode 100644 src/calibre/utils/apsw_shell.py diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index c1d7e6b9b1..146bb8eba4 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -670,7 +670,7 @@ class DB(object): if d['is_multiple']: if x is None: return [] - if isinstance(x, (str, unicode_type, bytes)): + if isinstance(x, (unicode_type, bytes)): x = x.split(d['multiple_seps']['ui_to_list']) x = [y.strip() for y in x if y.strip()] x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, @@ -681,12 +681,16 @@ class DB(object): x.decode(preferred_encoding, 'replace') def adapt_datetime(x, d): - if isinstance(x, (str, unicode_type, bytes)): + if isinstance(x, (unicode_type, bytes)): + if isinstance(x, bytes): + x = x.decode(preferred_encoding, 'replace') x = parse_date(x, assume_utc=False, as_utc=False) return x def adapt_bool(x, d): - if isinstance(x, (str, unicode_type, bytes)): + if isinstance(x, (unicode_type, bytes)): + if isinstance(x, bytes): + x = x.decode(preferred_encoding, 'replace') x = x.lower() if x == 'true': x = True @@ -707,7 +711,9 @@ class DB(object): def adapt_number(x, d): if x is None: return None - if isinstance(x, (str, unicode_type, bytes)): + if isinstance(x, (unicode_type, bytes)): + if isinstance(x, bytes): + x = x.decode(preferred_encoding, 'replace') if x.lower() == 'none': return None if d['datatype'] == 'int': @@ -1083,7 +1089,7 @@ class DB(object): def dump_and_restore(self, callback=None, sql=None): import codecs - from calibre.utils.apsw_shell import Shell + from apsw import Shell from contextlib import closing if callback is None: callback = lambda x: x @@ -1096,7 +1102,7 @@ class DB(object): shell = Shell(db=self.conn, stdout=buf) shell.process_command('.dump') else: - with open(fname, 'wb') as buf: + with lopen(fname, 'wb') as buf: buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8')) with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb: diff --git a/src/calibre/db/categories.py b/src/calibre/db/categories.py index 377eab54ba..0ae1cee97f 100644 --- a/src/calibre/db/categories.py +++ b/src/calibre/db/categories.py @@ -11,6 +11,7 @@ import copy from functools import partial from polyglot.builtins import unicode_type, map +from calibre.constants import ispy3 from calibre.ebooks.metadata import author_to_author_sort from calibre.utils.config_base import tweaks from calibre.utils.icu import sort_key, collation_order @@ -43,11 +44,19 @@ class Tag(object): self.search_expression = search_expression self.original_categories = None - def __unicode__(self): + @property + def string_representation(self): return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category) - def __str__(self): - return unicode_type(self).encode('utf-8') + if ispy3: + def __str__(self): + return self.string_representation + else: + def __str__(self): + return self.string_representation.encode('utf-8') + + def __unicode__(self): + return self.string_representation def __repr__(self): return str(self) diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index 175cd36d6e..da021d0e36 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -80,7 +80,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit if primary_contains(query, t): return True elif query in t: - return True + return True except re.error: pass return False @@ -149,7 +149,9 @@ class DateSearch(object): # {{{ if query == 'false': for v, book_ids in field_iter(): - if isinstance(v, (str, unicode_type)): + if isinstance(v, (bytes, unicode_type)): + if isinstance(v, bytes): + v = v.decode(preferred_encoding, 'replace') v = parse_date(v) if v is None or v <= UNDEFINED_DATE: matches |= book_ids @@ -157,7 +159,9 @@ class DateSearch(object): # {{{ if query == 'true': for v, book_ids in field_iter(): - if isinstance(v, (str, unicode_type)): + if isinstance(v, (bytes, unicode_type)): + if isinstance(v, bytes): + v = v.decode(preferred_encoding, 'replace') v = parse_date(v) if v is not None and v > UNDEFINED_DATE: matches |= book_ids diff --git a/src/calibre/db/utils.py b/src/calibre/db/utils.py index 99f281ad59..65d3e678d2 100644 --- a/src/calibre/db/utils.py +++ b/src/calibre/db/utils.py @@ -13,13 +13,15 @@ from polyglot.builtins import map, unicode_type from threading import Lock from calibre import as_unicode, prints -from calibre.constants import cache_dir, get_windows_number_formats, iswindows +from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding from calibre.utils.localization import canonicalize_lang def force_to_bool(val): - if isinstance(val, (str, unicode_type)): + if isinstance(val, (bytes, unicode_type)): + if isinstance(val, bytes): + val = val.decode(preferred_encoding, 'replace') try: val = icu_lower(val) if not val: diff --git a/src/calibre/db/write.py b/src/calibre/db/write.py index dc8caace86..5670fff3f5 100644 --- a/src/calibre/db/write.py +++ b/src/calibre/db/write.py @@ -88,6 +88,8 @@ def adapt_number(typ, x): if x is None: return None if isinstance(x, (unicode_type, bytes)): + if isinstance(x, bytes): + x = x.decode(preferred_encoding, 'replace') if not x or x.lower() == 'none': return None return typ(x) @@ -95,6 +97,8 @@ def adapt_number(typ, x): def adapt_bool(x): if isinstance(x, (unicode_type, bytes)): + if isinstance(x, bytes): + x = x.decode(preferred_encoding, 'replace') x = x.lower() if x == 'true': x = True diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py index 12120249d3..3dbb353713 100644 --- a/src/calibre/devices/prst1/driver.py +++ b/src/calibre/devices/prst1/driver.py @@ -171,7 +171,7 @@ class PRST1(USBMS): with closing(sqlite.connect(dbpath)) as connection: # Replace undecodable characters in the db instead of erroring out - connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace") + connection.text_factory = lambda x: x if isinstance(x, unicode_type) else x.decode('utf-8', 'replace') cursor = connection.cursor() # Query collections @@ -758,7 +758,7 @@ class PRST1(USBMS): thumbnail_path = THUMBPATH%book.bookId - prefix = self._main_prefix if source_id is 0 else self._card_a_prefix + prefix = self._main_prefix if source_id == 0 else self._card_a_prefix thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/')) thumbnail_dir_path = os.path.dirname(thumbnail_file_path) if not os.path.exists(thumbnail_dir_path): diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py index 9619a82f04..7d60bf2916 100644 --- a/src/calibre/devices/smart_device_app/driver.py +++ b/src/calibre/devices/smart_device_app/driver.py @@ -398,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): if isinstance(a, dict): printable = {} for k,v in a.iteritems(): - if isinstance(v, (str, unicode_type)) and len(v) > 50: + if isinstance(v, (bytes, unicode_type)) and len(v) > 50: printable[k] = 'too long' else: printable[k] = v @@ -666,7 +666,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): if v: v = json.loads(v, object_hook=from_json) if print_debug_info and extra_debug: - self._debug('receive after decode') # , v) + self._debug('receive after decode') # , v) return (self.reverse_opcodes[v[0]], v[1]) self._debug('protocol error -- empty json string') except socket.timeout: @@ -1155,7 +1155,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): (self.DEFAULT_THUMBNAIL_HEIGHT/3) * 4) self._debug('cover width', self.THUMBNAIL_WIDTH) elif hasattr(self, 'THUMBNAIL_WIDTH'): - delattr(self, 'THUMBNAIL_WIDTH') + delattr(self, 'THUMBNAIL_WIDTH') self.is_read_sync_col = result.get('isReadSyncCol', None) self._debug('Device is_read sync col', self.is_read_sync_col) diff --git a/src/calibre/ebooks/conversion/plugins/epub_output.py b/src/calibre/ebooks/conversion/plugins/epub_output.py index a6967a8645..25f6c3b2aa 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_output.py +++ b/src/calibre/ebooks/conversion/plugins/epub_output.py @@ -12,7 +12,6 @@ from calibre.customize.conversion import (OutputFormatPlugin, OptionRecommendation) from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir -from calibre.constants import filesystem_encoding from polyglot.builtins import unicode_type block_level_tags = ( @@ -41,7 +40,7 @@ block_level_tags = ( 'pre', 'table', 'ul', - ) +) class EPUBOutput(OutputFormatPlugin): @@ -326,13 +325,11 @@ class EPUBOutput(OutputFormatPlugin): fonts = [] for uri in list(uris.keys()): path = uris[uri] - if isinstance(path, unicode_type): - path = path.encode(filesystem_encoding) if not os.path.exists(path): uris.pop(uri) continue self.log.debug('Encrypting font:', uri) - with open(path, 'r+b') as f: + with lopen(path, 'r+b') as f: data = f.read(1024) if len(data) >= 1024: f.seek(0) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 9ccfe8f2a5..9244ebb686 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -55,11 +55,7 @@ def munge_paths(basepath, url): if not path: path = basepath elif not os.path.isabs(path): - if isinstance(path, unicode_type): - path = path.encode(sys.getfilesystemencoding()) dn = os.path.dirname(basepath) - if isinstance(dn, unicode_type): - dn = dn.encode(sys.getfilesystemencoding()) path = os.path.join(dn, path) return os.path.normpath(path), fragment @@ -1480,11 +1476,6 @@ class HTMLConverter(object): ext = os.path.splitext(path)[1] if ext: ext = ext[1:].lower() - enc = sys.getfilesystemencoding() - if not enc: - enc = 'utf8' - if isinstance(path, unicode_type): - path = path.encode(enc, 'replace') if os.access(path, os.R_OK) and os.path.isfile(path): if ext in ['png', 'jpg', 'bmp', 'jpeg']: self.process_image(path, tag_css) @@ -1811,8 +1802,6 @@ class HTMLConverter(object): def process_file(path, options, logger): - if not isinstance(path, unicode_type): - path = path.decode(sys.getfilesystemencoding()) path = os.path.abspath(path) default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0]) dirpath = os.path.dirname(path) diff --git a/src/calibre/ebooks/lrf/meta.py b/src/calibre/ebooks/lrf/meta.py index 49e944a70d..882762a78c 100644 --- a/src/calibre/ebooks/lrf/meta.py +++ b/src/calibre/ebooks/lrf/meta.py @@ -196,8 +196,8 @@ class xml_field(object): if not val: val = u'' - if isinstance(val, unicode_type): - val = unicode_type(val, 'utf-8') + if not isinstance(val, unicode_type): + val = val.decode('utf-8') elems = document.getElementsByTagName(self.tag_name) elem = None diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py index c23b1136ac..83895b8860 100644 --- a/src/calibre/ebooks/lrf/objects.py +++ b/src/calibre/ebooks/lrf/objects.py @@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal ' import struct, array, zlib, cStringIO, collections, re from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE +from calibre.constants import ispy3 from calibre import entity_to_unicode, prepare_string_for_xml from calibre.ebooks.lrf.tags import Tag from polyglot.builtins import unicode_type @@ -88,11 +89,8 @@ class LRFObject(object): for i in range(0): yield i - def __unicode__(self): - return unicode_type(self.__class__.__name__) - def __str__(self): - return unicode_type(self).encode('utf-8') + return self.__class__.__name__ class LRFContentObject(LRFObject): @@ -204,12 +202,15 @@ class StyleObject(object): s += u'%s="%s" '%(attr, getattr(self, attr)) return s - def __unicode__(self): + def __str__(self): s = u'<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id) s += self._tags_to_xml() s += u'/>\n' return s + if not ispy3: + __unicode__ = __str__ + def as_dict(self): d = {} for h in self.tag_map.values(): @@ -252,11 +253,11 @@ class Color(object): def __init__(self, val): self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF - def __unicode__(self): + def __str__(self): return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) - def __str__(self): - return unicode_type(self) + if not ispy3: + __unicode__ = __str__ def __len__(self): return 4 @@ -284,10 +285,13 @@ class PageDiv(EmptyPageElement): self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth self.linecolor = Color(linecolor) - def __unicode__(self): + def __str__(self): return u'\n\n'%\ (self.pain, self.spacesize, self.linewidth, self.color) + if not ispy3: + __unicode__ = __str__ + class RuledLine(EmptyPageElement): @@ -299,19 +303,25 @@ class RuledLine(EmptyPageElement): self.linecolor = Color(linecolor) self.id = -1 - def __unicode__(self): + def __str__(self): return u'\n\n'%\ (self.linelength, self.linetype, self.linewidth, self.linecolor) + if not ispy3: + __unicode__ = __str__ + class Wait(EmptyPageElement): def __init__(self, time): self.time = time - def __unicode__(self): + def __str__(self): return u'\n\n'%(self.time) + if not ispy3: + __unicode__ = __str__ + class Locate(EmptyPageElement): @@ -320,19 +330,25 @@ class Locate(EmptyPageElement): def __init__(self, pos): self.pos = self.pos_map[pos] - def __unicode__(self): + def __str__(self): return u'\n\n'%(self.pos) + if not ispy3: + __unicode__ = __str__ + class BlockSpace(EmptyPageElement): def __init__(self, xspace, yspace): self.xspace, self.yspace = xspace, yspace - def __unicode__(self): + def __str__(self): return u'\n\n'%\ (self.xspace, self.yspace) + if not ispy3: + __unicode__ = __str__ + class Page(LRFStream): tag_map = { @@ -427,15 +443,15 @@ class Page(LRFStream): for i in self.content: yield i - def __unicode__(self): + def __str__(self): s = u'\n\n'%(self.style_id, self.id) for i in self: s += unicode_type(i) s += '\n\n' return s - def __str__(self): - return unicode_type(self) + if not ispy3: + __unicode__ = __str__ def to_html(self): s = u'' @@ -612,7 +628,7 @@ class Block(LRFStream, TextCSS): if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) - def __unicode__(self): + def __str__(self): s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) if hasattr(self, 'textstyle_id'): s += 'textstyle="%d" '%(self.textstyle_id,) @@ -625,6 +641,9 @@ class Block(LRFStream, TextCSS): return s return s.rstrip() + ' />\n' + if not ispy3: + __unicode__ = __str__ + def to_html(self): if self.name == 'TextBlock': return u'
%s
'%(self.style_id, self.textstyle_id, self.content.to_html()) @@ -697,12 +716,15 @@ class Text(LRFStream): self.attrs = attrs self.self_closing = self_closing - def __unicode__(self): + def __str__(self): s = u'<%s '%(self.name,) for name, val in self.attrs.items(): s += '%s="%s" '%(name, val) return s.rstrip() + (u' />' if self.self_closing else u'>') + if not ispy3: + __unicode__ = __str__ + def to_html(self): s = u'' return s @@ -878,7 +900,7 @@ class Text(LRFStream): self.close_containers() self.stream = None - def __unicode__(self): + def __str__(self): s = u'' open_containers = collections.deque() for c in self.content: @@ -900,6 +922,9 @@ class Text(LRFStream): raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) return s + if not ispy3: + __unicode__ = __str__ + def to_html(self): s = u'' open_containers = collections.deque() @@ -944,10 +969,13 @@ class Image(LRFObject): encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding) data = property(fget=lambda self : self._document.objects[self.refstream].stream) - def __unicode__(self): + def __str__(self): return u'\n'%\ (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) + if not ispy3: + __unicode__ = __str__ + class PutObj(EmptyPageElement): @@ -955,9 +983,12 @@ class PutObj(EmptyPageElement): self.x1, self.y1, self.refobj = x1, y1, refobj self.object = objects[refobj] - def __unicode__(self): + def __str__(self): return u''%(self.x1, self.y1, self.refobj) + if not ispy3: + __unicode__ = __str__ + class Canvas(LRFStream): tag_map = { @@ -996,7 +1027,7 @@ class Canvas(LRFStream): except struct.error: print('Canvas object has errors, skipping.') - def __unicode__(self): + def __str__(self): s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,) for attr in self.attrs: s += '%s="%s" '%(attr, self.attrs[attr]) @@ -1006,6 +1037,9 @@ class Canvas(LRFStream): s += '\n'%(self.__class__.__name__,) return s + if not ispy3: + __unicode__ = __str__ + def __iter__(self): for i in self._contents: yield i @@ -1039,10 +1073,13 @@ class ImageStream(LRFStream): if self._document is not None: self._document.image_map[self.id] = self - def __unicode__(self): + def __str__(self): return u'\n'%\ (self.id, self.encoding, self.file) + if not ispy3: + __unicode__ = __str__ + class Import(LRFStream): pass @@ -1118,7 +1155,7 @@ class Button(LRFObject): return i[1:][0] return (None, None) - def __unicode__(self): + def __str__(self): s = u'