mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various py3 related fixes exposed by the unicode patch
This commit is contained in:
parent
56af613e10
commit
5b76089839
@ -670,7 +670,7 @@ class DB(object):
|
|||||||
if d['is_multiple']:
|
if d['is_multiple']:
|
||||||
if x is None:
|
if x is None:
|
||||||
return []
|
return []
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
x = x.split(d['multiple_seps']['ui_to_list'])
|
x = x.split(d['multiple_seps']['ui_to_list'])
|
||||||
x = [y.strip() for y in x if y.strip()]
|
x = [y.strip() for y in x if y.strip()]
|
||||||
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
||||||
@ -681,12 +681,16 @@ class DB(object):
|
|||||||
x.decode(preferred_encoding, 'replace')
|
x.decode(preferred_encoding, 'replace')
|
||||||
|
|
||||||
def adapt_datetime(x, d):
|
def adapt_datetime(x, d):
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def adapt_bool(x, d):
|
def adapt_bool(x, d):
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
x = x.lower()
|
x = x.lower()
|
||||||
if x == 'true':
|
if x == 'true':
|
||||||
x = True
|
x = True
|
||||||
@ -707,7 +711,9 @@ class DB(object):
|
|||||||
def adapt_number(x, d):
|
def adapt_number(x, d):
|
||||||
if x is None:
|
if x is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
if x.lower() == 'none':
|
if x.lower() == 'none':
|
||||||
return None
|
return None
|
||||||
if d['datatype'] == 'int':
|
if d['datatype'] == 'int':
|
||||||
@ -1083,7 +1089,7 @@ class DB(object):
|
|||||||
|
|
||||||
def dump_and_restore(self, callback=None, sql=None):
|
def dump_and_restore(self, callback=None, sql=None):
|
||||||
import codecs
|
import codecs
|
||||||
from calibre.utils.apsw_shell import Shell
|
from apsw import Shell
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
if callback is None:
|
if callback is None:
|
||||||
callback = lambda x: x
|
callback = lambda x: x
|
||||||
@ -1096,7 +1102,7 @@ class DB(object):
|
|||||||
shell = Shell(db=self.conn, stdout=buf)
|
shell = Shell(db=self.conn, stdout=buf)
|
||||||
shell.process_command('.dump')
|
shell.process_command('.dump')
|
||||||
else:
|
else:
|
||||||
with open(fname, 'wb') as buf:
|
with lopen(fname, 'wb') as buf:
|
||||||
buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8'))
|
buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8'))
|
||||||
|
|
||||||
with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb:
|
with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb:
|
||||||
|
@ -11,6 +11,7 @@ import copy
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from polyglot.builtins import unicode_type, map
|
from polyglot.builtins import unicode_type, map
|
||||||
|
|
||||||
|
from calibre.constants import ispy3
|
||||||
from calibre.ebooks.metadata import author_to_author_sort
|
from calibre.ebooks.metadata import author_to_author_sort
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
from calibre.utils.icu import sort_key, collation_order
|
from calibre.utils.icu import sort_key, collation_order
|
||||||
@ -43,11 +44,19 @@ class Tag(object):
|
|||||||
self.search_expression = search_expression
|
self.search_expression = search_expression
|
||||||
self.original_categories = None
|
self.original_categories = None
|
||||||
|
|
||||||
def __unicode__(self):
|
@property
|
||||||
|
def string_representation(self):
|
||||||
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
|
return u'%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state, self.category)
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode_type(self).encode('utf-8')
|
return self.string_representation
|
||||||
|
else:
|
||||||
|
def __str__(self):
|
||||||
|
return self.string_representation.encode('utf-8')
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.string_representation
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
|
@ -149,7 +149,9 @@ class DateSearch(object): # {{{
|
|||||||
|
|
||||||
if query == 'false':
|
if query == 'false':
|
||||||
for v, book_ids in field_iter():
|
for v, book_ids in field_iter():
|
||||||
if isinstance(v, (str, unicode_type)):
|
if isinstance(v, (bytes, unicode_type)):
|
||||||
|
if isinstance(v, bytes):
|
||||||
|
v = v.decode(preferred_encoding, 'replace')
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is None or v <= UNDEFINED_DATE:
|
if v is None or v <= UNDEFINED_DATE:
|
||||||
matches |= book_ids
|
matches |= book_ids
|
||||||
@ -157,7 +159,9 @@ class DateSearch(object): # {{{
|
|||||||
|
|
||||||
if query == 'true':
|
if query == 'true':
|
||||||
for v, book_ids in field_iter():
|
for v, book_ids in field_iter():
|
||||||
if isinstance(v, (str, unicode_type)):
|
if isinstance(v, (bytes, unicode_type)):
|
||||||
|
if isinstance(v, bytes):
|
||||||
|
v = v.decode(preferred_encoding, 'replace')
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is not None and v > UNDEFINED_DATE:
|
if v is not None and v > UNDEFINED_DATE:
|
||||||
matches |= book_ids
|
matches |= book_ids
|
||||||
|
@ -13,13 +13,15 @@ from polyglot.builtins import map, unicode_type
|
|||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
|
||||||
from calibre import as_unicode, prints
|
from calibre import as_unicode, prints
|
||||||
from calibre.constants import cache_dir, get_windows_number_formats, iswindows
|
from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding
|
||||||
|
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
|
|
||||||
def force_to_bool(val):
|
def force_to_bool(val):
|
||||||
if isinstance(val, (str, unicode_type)):
|
if isinstance(val, (bytes, unicode_type)):
|
||||||
|
if isinstance(val, bytes):
|
||||||
|
val = val.decode(preferred_encoding, 'replace')
|
||||||
try:
|
try:
|
||||||
val = icu_lower(val)
|
val = icu_lower(val)
|
||||||
if not val:
|
if not val:
|
||||||
|
@ -88,6 +88,8 @@ def adapt_number(typ, x):
|
|||||||
if x is None:
|
if x is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(x, (unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
if not x or x.lower() == 'none':
|
if not x or x.lower() == 'none':
|
||||||
return None
|
return None
|
||||||
return typ(x)
|
return typ(x)
|
||||||
@ -95,6 +97,8 @@ def adapt_number(typ, x):
|
|||||||
|
|
||||||
def adapt_bool(x):
|
def adapt_bool(x):
|
||||||
if isinstance(x, (unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = x.decode(preferred_encoding, 'replace')
|
||||||
x = x.lower()
|
x = x.lower()
|
||||||
if x == 'true':
|
if x == 'true':
|
||||||
x = True
|
x = True
|
||||||
|
@ -171,7 +171,7 @@ class PRST1(USBMS):
|
|||||||
|
|
||||||
with closing(sqlite.connect(dbpath)) as connection:
|
with closing(sqlite.connect(dbpath)) as connection:
|
||||||
# Replace undecodable characters in the db instead of erroring out
|
# Replace undecodable characters in the db instead of erroring out
|
||||||
connection.text_factory = lambda x: unicode_type(x, "utf-8", "replace")
|
connection.text_factory = lambda x: x if isinstance(x, unicode_type) else x.decode('utf-8', 'replace')
|
||||||
|
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
# Query collections
|
# Query collections
|
||||||
@ -758,7 +758,7 @@ class PRST1(USBMS):
|
|||||||
|
|
||||||
thumbnail_path = THUMBPATH%book.bookId
|
thumbnail_path = THUMBPATH%book.bookId
|
||||||
|
|
||||||
prefix = self._main_prefix if source_id is 0 else self._card_a_prefix
|
prefix = self._main_prefix if source_id == 0 else self._card_a_prefix
|
||||||
thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
|
thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
|
||||||
thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
|
thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
|
||||||
if not os.path.exists(thumbnail_dir_path):
|
if not os.path.exists(thumbnail_dir_path):
|
||||||
|
@ -398,7 +398,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
if isinstance(a, dict):
|
if isinstance(a, dict):
|
||||||
printable = {}
|
printable = {}
|
||||||
for k,v in a.iteritems():
|
for k,v in a.iteritems():
|
||||||
if isinstance(v, (str, unicode_type)) and len(v) > 50:
|
if isinstance(v, (bytes, unicode_type)) and len(v) > 50:
|
||||||
printable[k] = 'too long'
|
printable[k] = 'too long'
|
||||||
else:
|
else:
|
||||||
printable[k] = v
|
printable[k] = v
|
||||||
|
@ -12,7 +12,6 @@ from calibre.customize.conversion import (OutputFormatPlugin,
|
|||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
from calibre.constants import filesystem_encoding
|
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
block_level_tags = (
|
block_level_tags = (
|
||||||
@ -326,13 +325,11 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
fonts = []
|
fonts = []
|
||||||
for uri in list(uris.keys()):
|
for uri in list(uris.keys()):
|
||||||
path = uris[uri]
|
path = uris[uri]
|
||||||
if isinstance(path, unicode_type):
|
|
||||||
path = path.encode(filesystem_encoding)
|
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
uris.pop(uri)
|
uris.pop(uri)
|
||||||
continue
|
continue
|
||||||
self.log.debug('Encrypting font:', uri)
|
self.log.debug('Encrypting font:', uri)
|
||||||
with open(path, 'r+b') as f:
|
with lopen(path, 'r+b') as f:
|
||||||
data = f.read(1024)
|
data = f.read(1024)
|
||||||
if len(data) >= 1024:
|
if len(data) >= 1024:
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
|
@ -55,11 +55,7 @@ def munge_paths(basepath, url):
|
|||||||
if not path:
|
if not path:
|
||||||
path = basepath
|
path = basepath
|
||||||
elif not os.path.isabs(path):
|
elif not os.path.isabs(path):
|
||||||
if isinstance(path, unicode_type):
|
|
||||||
path = path.encode(sys.getfilesystemencoding())
|
|
||||||
dn = os.path.dirname(basepath)
|
dn = os.path.dirname(basepath)
|
||||||
if isinstance(dn, unicode_type):
|
|
||||||
dn = dn.encode(sys.getfilesystemencoding())
|
|
||||||
path = os.path.join(dn, path)
|
path = os.path.join(dn, path)
|
||||||
return os.path.normpath(path), fragment
|
return os.path.normpath(path), fragment
|
||||||
|
|
||||||
@ -1480,11 +1476,6 @@ class HTMLConverter(object):
|
|||||||
ext = os.path.splitext(path)[1]
|
ext = os.path.splitext(path)[1]
|
||||||
if ext:
|
if ext:
|
||||||
ext = ext[1:].lower()
|
ext = ext[1:].lower()
|
||||||
enc = sys.getfilesystemencoding()
|
|
||||||
if not enc:
|
|
||||||
enc = 'utf8'
|
|
||||||
if isinstance(path, unicode_type):
|
|
||||||
path = path.encode(enc, 'replace')
|
|
||||||
if os.access(path, os.R_OK) and os.path.isfile(path):
|
if os.access(path, os.R_OK) and os.path.isfile(path):
|
||||||
if ext in ['png', 'jpg', 'bmp', 'jpeg']:
|
if ext in ['png', 'jpg', 'bmp', 'jpeg']:
|
||||||
self.process_image(path, tag_css)
|
self.process_image(path, tag_css)
|
||||||
@ -1811,8 +1802,6 @@ class HTMLConverter(object):
|
|||||||
|
|
||||||
|
|
||||||
def process_file(path, options, logger):
|
def process_file(path, options, logger):
|
||||||
if not isinstance(path, unicode_type):
|
|
||||||
path = path.decode(sys.getfilesystemencoding())
|
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
||||||
dirpath = os.path.dirname(path)
|
dirpath = os.path.dirname(path)
|
||||||
|
@ -196,8 +196,8 @@ class xml_field(object):
|
|||||||
|
|
||||||
if not val:
|
if not val:
|
||||||
val = u''
|
val = u''
|
||||||
if isinstance(val, unicode_type):
|
if not isinstance(val, unicode_type):
|
||||||
val = unicode_type(val, 'utf-8')
|
val = val.decode('utf-8')
|
||||||
|
|
||||||
elems = document.getElementsByTagName(self.tag_name)
|
elems = document.getElementsByTagName(self.tag_name)
|
||||||
elem = None
|
elem = None
|
||||||
|
@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import struct, array, zlib, cStringIO, collections, re
|
import struct, array, zlib, cStringIO, collections, re
|
||||||
|
|
||||||
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
|
||||||
|
from calibre.constants import ispy3
|
||||||
from calibre import entity_to_unicode, prepare_string_for_xml
|
from calibre import entity_to_unicode, prepare_string_for_xml
|
||||||
from calibre.ebooks.lrf.tags import Tag
|
from calibre.ebooks.lrf.tags import Tag
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
@ -88,11 +89,8 @@ class LRFObject(object):
|
|||||||
for i in range(0):
|
for i in range(0):
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def __unicode__(self):
|
|
||||||
return unicode_type(self.__class__.__name__)
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode_type(self).encode('utf-8')
|
return self.__class__.__name__
|
||||||
|
|
||||||
|
|
||||||
class LRFContentObject(LRFObject):
|
class LRFContentObject(LRFObject):
|
||||||
@ -204,12 +202,15 @@ class StyleObject(object):
|
|||||||
s += u'%s="%s" '%(attr, getattr(self, attr))
|
s += u'%s="%s" '%(attr, getattr(self, attr))
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id)
|
s = u'<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id)
|
||||||
s += self._tags_to_xml()
|
s += self._tags_to_xml()
|
||||||
s += u'/>\n'
|
s += u'/>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
d = {}
|
d = {}
|
||||||
for h in self.tag_map.values():
|
for h in self.tag_map.values():
|
||||||
@ -252,11 +253,11 @@ class Color(object):
|
|||||||
def __init__(self, val):
|
def __init__(self, val):
|
||||||
self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
|
self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
|
return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
|
||||||
|
|
||||||
def __str__(self):
|
if not ispy3:
|
||||||
return unicode_type(self)
|
__unicode__ = __str__
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return 4
|
return 4
|
||||||
@ -284,10 +285,13 @@ class PageDiv(EmptyPageElement):
|
|||||||
self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth
|
self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth
|
||||||
self.linecolor = Color(linecolor)
|
self.linecolor = Color(linecolor)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
|
return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
|
||||||
(self.pain, self.spacesize, self.linewidth, self.color)
|
(self.pain, self.spacesize, self.linewidth, self.color)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class RuledLine(EmptyPageElement):
|
class RuledLine(EmptyPageElement):
|
||||||
|
|
||||||
@ -299,19 +303,25 @@ class RuledLine(EmptyPageElement):
|
|||||||
self.linecolor = Color(linecolor)
|
self.linecolor = Color(linecolor)
|
||||||
self.id = -1
|
self.id = -1
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
|
return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
|
||||||
(self.linelength, self.linetype, self.linewidth, self.linecolor)
|
(self.linelength, self.linetype, self.linewidth, self.linecolor)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class Wait(EmptyPageElement):
|
class Wait(EmptyPageElement):
|
||||||
|
|
||||||
def __init__(self, time):
|
def __init__(self, time):
|
||||||
self.time = time
|
self.time = time
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'\n<Wait time="%d" />\n'%(self.time)
|
return u'\n<Wait time="%d" />\n'%(self.time)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class Locate(EmptyPageElement):
|
class Locate(EmptyPageElement):
|
||||||
|
|
||||||
@ -320,19 +330,25 @@ class Locate(EmptyPageElement):
|
|||||||
def __init__(self, pos):
|
def __init__(self, pos):
|
||||||
self.pos = self.pos_map[pos]
|
self.pos = self.pos_map[pos]
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'\n<Locate pos="%s" />\n'%(self.pos)
|
return u'\n<Locate pos="%s" />\n'%(self.pos)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class BlockSpace(EmptyPageElement):
|
class BlockSpace(EmptyPageElement):
|
||||||
|
|
||||||
def __init__(self, xspace, yspace):
|
def __init__(self, xspace, yspace):
|
||||||
self.xspace, self.yspace = xspace, yspace
|
self.xspace, self.yspace = xspace, yspace
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
|
return u'\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
|
||||||
(self.xspace, self.yspace)
|
(self.xspace, self.yspace)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class Page(LRFStream):
|
class Page(LRFStream):
|
||||||
tag_map = {
|
tag_map = {
|
||||||
@ -427,15 +443,15 @@ class Page(LRFStream):
|
|||||||
for i in self.content:
|
for i in self.content:
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
|
s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
|
||||||
for i in self:
|
for i in self:
|
||||||
s += unicode_type(i)
|
s += unicode_type(i)
|
||||||
s += '\n</Page>\n'
|
s += '\n</Page>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def __str__(self):
|
if not ispy3:
|
||||||
return unicode_type(self)
|
__unicode__ = __str__
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
s = u''
|
s = u''
|
||||||
@ -612,7 +628,7 @@ class Block(LRFStream, TextCSS):
|
|||||||
if hasattr(self, attr):
|
if hasattr(self, attr):
|
||||||
self.attrs[attr] = getattr(self, attr)
|
self.attrs[attr] = getattr(self, attr)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id)
|
s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id)
|
||||||
if hasattr(self, 'textstyle_id'):
|
if hasattr(self, 'textstyle_id'):
|
||||||
s += 'textstyle="%d" '%(self.textstyle_id,)
|
s += 'textstyle="%d" '%(self.textstyle_id,)
|
||||||
@ -625,6 +641,9 @@ class Block(LRFStream, TextCSS):
|
|||||||
return s
|
return s
|
||||||
return s.rstrip() + ' />\n'
|
return s.rstrip() + ' />\n'
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
if self.name == 'TextBlock':
|
if self.name == 'TextBlock':
|
||||||
return u'<div class="block%s text%s">%s</div>'%(self.style_id, self.textstyle_id, self.content.to_html())
|
return u'<div class="block%s text%s">%s</div>'%(self.style_id, self.textstyle_id, self.content.to_html())
|
||||||
@ -697,12 +716,15 @@ class Text(LRFStream):
|
|||||||
self.attrs = attrs
|
self.attrs = attrs
|
||||||
self.self_closing = self_closing
|
self.self_closing = self_closing
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'<%s '%(self.name,)
|
s = u'<%s '%(self.name,)
|
||||||
for name, val in self.attrs.items():
|
for name, val in self.attrs.items():
|
||||||
s += '%s="%s" '%(name, val)
|
s += '%s="%s" '%(name, val)
|
||||||
return s.rstrip() + (u' />' if self.self_closing else u'>')
|
return s.rstrip() + (u' />' if self.self_closing else u'>')
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
s = u''
|
s = u''
|
||||||
return s
|
return s
|
||||||
@ -878,7 +900,7 @@ class Text(LRFStream):
|
|||||||
self.close_containers()
|
self.close_containers()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u''
|
s = u''
|
||||||
open_containers = collections.deque()
|
open_containers = collections.deque()
|
||||||
for c in self.content:
|
for c in self.content:
|
||||||
@ -900,6 +922,9 @@ class Text(LRFStream):
|
|||||||
raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
|
raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
s = u''
|
s = u''
|
||||||
open_containers = collections.deque()
|
open_containers = collections.deque()
|
||||||
@ -944,10 +969,13 @@ class Image(LRFObject):
|
|||||||
encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding)
|
encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding)
|
||||||
data = property(fget=lambda self : self._document.objects[self.refstream].stream)
|
data = property(fget=lambda self : self._document.objects[self.refstream].stream)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
|
return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
|
||||||
(self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
|
(self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class PutObj(EmptyPageElement):
|
class PutObj(EmptyPageElement):
|
||||||
|
|
||||||
@ -955,9 +983,12 @@ class PutObj(EmptyPageElement):
|
|||||||
self.x1, self.y1, self.refobj = x1, y1, refobj
|
self.x1, self.y1, self.refobj = x1, y1, refobj
|
||||||
self.object = objects[refobj]
|
self.object = objects[refobj]
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj)
|
return u'<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class Canvas(LRFStream):
|
class Canvas(LRFStream):
|
||||||
tag_map = {
|
tag_map = {
|
||||||
@ -996,7 +1027,7 @@ class Canvas(LRFStream):
|
|||||||
except struct.error:
|
except struct.error:
|
||||||
print('Canvas object has errors, skipping.')
|
print('Canvas object has errors, skipping.')
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)
|
s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)
|
||||||
for attr in self.attrs:
|
for attr in self.attrs:
|
||||||
s += '%s="%s" '%(attr, self.attrs[attr])
|
s += '%s="%s" '%(attr, self.attrs[attr])
|
||||||
@ -1006,6 +1037,9 @@ class Canvas(LRFStream):
|
|||||||
s += '</%s>\n'%(self.__class__.__name__,)
|
s += '</%s>\n'%(self.__class__.__name__,)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for i in self._contents:
|
for i in self._contents:
|
||||||
yield i
|
yield i
|
||||||
@ -1039,10 +1073,13 @@ class ImageStream(LRFStream):
|
|||||||
if self._document is not None:
|
if self._document is not None:
|
||||||
self._document.image_map[self.id] = self
|
self._document.image_map[self.id] = self
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
|
return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
|
||||||
(self.id, self.encoding, self.file)
|
(self.id, self.encoding, self.file)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class Import(LRFStream):
|
class Import(LRFStream):
|
||||||
pass
|
pass
|
||||||
@ -1118,7 +1155,7 @@ class Button(LRFObject):
|
|||||||
return i[1:][0]
|
return i[1:][0]
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'<Button objid="%s">\n'%(self.id,)
|
s = u'<Button objid="%s">\n'%(self.id,)
|
||||||
if self.button_flags & 0x10 != 0:
|
if self.button_flags & 0x10 != 0:
|
||||||
s += '<PushButton '
|
s += '<PushButton '
|
||||||
@ -1132,6 +1169,9 @@ class Button(LRFObject):
|
|||||||
s += '</Button>\n'
|
s += '</Button>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
refpage = property(fget=lambda self : self.jump_action(2)[0])
|
refpage = property(fget=lambda self : self.jump_action(2)[0])
|
||||||
refobj = property(fget=lambda self : self.jump_action(2)[1])
|
refobj = property(fget=lambda self : self.jump_action(2)[1])
|
||||||
|
|
||||||
@ -1192,7 +1232,7 @@ class BookAttr(StyleObject, LRFObject):
|
|||||||
def add_font(self, tag, f):
|
def add_font(self, tag, f):
|
||||||
self.font_link_list.append(tag.dword)
|
self.font_link_list.append(tag.dword)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id)
|
s = u'<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id)
|
||||||
s += u'<SetDefault %s />\n'%(self._tags_to_xml(),)
|
s += u'<SetDefault %s />\n'%(self._tags_to_xml(),)
|
||||||
doc = self._document
|
doc = self._document
|
||||||
@ -1203,6 +1243,9 @@ class BookAttr(StyleObject, LRFObject):
|
|||||||
s += '</BookStyle>\n'
|
s += '</BookStyle>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class SimpleText(Text):
|
class SimpleText(Text):
|
||||||
pass
|
pass
|
||||||
@ -1213,9 +1256,12 @@ class TocLabel(object):
|
|||||||
def __init__(self, refpage, refobject, label):
|
def __init__(self, refpage, refobject, label):
|
||||||
self.refpage, self.refobject, self.label = refpage, refobject, label
|
self.refpage, self.refobject, self.label = refpage, refobject, label
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
|
return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class TOCObject(LRFStream):
|
class TOCObject(LRFStream):
|
||||||
|
|
||||||
@ -1237,12 +1283,15 @@ class TOCObject(LRFStream):
|
|||||||
for i in self._contents:
|
for i in self._contents:
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def __unicode__(self):
|
def __str__(self):
|
||||||
s = u'<TOC>\n'
|
s = u'<TOC>\n'
|
||||||
for i in self:
|
for i in self:
|
||||||
s += unicode_type(i)
|
s += unicode_type(i)
|
||||||
return s + '</TOC>\n'
|
return s + '</TOC>\n'
|
||||||
|
|
||||||
|
if not ispy3:
|
||||||
|
__unicode__ = __str__
|
||||||
|
|
||||||
|
|
||||||
object_map = [
|
object_map = [
|
||||||
None, # 00
|
None, # 00
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import copy, traceback
|
import copy, traceback
|
||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG, ispy3
|
||||||
from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
|
from calibre.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
|
||||||
SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
|
SC_FIELDS_COPY_NOT_NULL, STANDARD_METADATA_FIELDS,
|
||||||
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
|
||||||
@ -709,7 +709,7 @@ class Metadata(object):
|
|||||||
|
|
||||||
return (None, None, None, None)
|
return (None, None, None, None)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__representation__(self):
|
||||||
'''
|
'''
|
||||||
A string representation of this object, suitable for printing to
|
A string representation of this object, suitable for printing to
|
||||||
console
|
console
|
||||||
@ -791,11 +791,17 @@ class Metadata(object):
|
|||||||
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
||||||
return u'<table>%s</table>'%u'\n'.join(ans)
|
return u'<table>%s</table>'%u'\n'.join(ans)
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
|
__str__ = __unicode__representation__
|
||||||
|
else:
|
||||||
|
__unicode__ = __unicode__representation__
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.__unicode__().encode('utf-8')
|
return self.__unicode__().encode('utf-8')
|
||||||
|
|
||||||
def __nonzero__(self):
|
def __nonzero__(self):
|
||||||
return bool(self.title or self.author or self.comments or self.tags)
|
return bool(self.title or self.author or self.comments or self.tags)
|
||||||
|
__bool__ = __nonzero__
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ def get_metadata(stream):
|
|||||||
if book_title:
|
if book_title:
|
||||||
book_title = unicode_type(book_title)
|
book_title = unicode_type(book_title)
|
||||||
else:
|
else:
|
||||||
book_title = force_unicode_type(os.path.splitext(
|
book_title = force_unicode(os.path.splitext(
|
||||||
os.path.basename(getattr(stream, 'name',
|
os.path.basename(getattr(stream, 'name',
|
||||||
_('Unknown'))))[0])
|
_('Unknown'))))[0])
|
||||||
mi = MetaInformation(book_title, authors)
|
mi = MetaInformation(book_title, authors)
|
||||||
|
@ -15,7 +15,7 @@ from urlparse import urlparse
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks import escape_xpath_attr
|
from calibre.ebooks import escape_xpath_attr
|
||||||
from calibre.constants import __appname__, __version__, filesystem_encoding
|
from calibre.constants import __appname__, __version__, filesystem_encoding, ispy3
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf as _pretty_print
|
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf as _pretty_print
|
||||||
from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_isbn
|
from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_isbn
|
||||||
@ -73,7 +73,7 @@ class Resource(object): # {{{
|
|||||||
path = href_or_path
|
path = href_or_path
|
||||||
if not os.path.isabs(path):
|
if not os.path.isabs(path):
|
||||||
path = os.path.abspath(os.path.join(basedir, path))
|
path = os.path.abspath(os.path.join(basedir, path))
|
||||||
if isinstance(path, str):
|
if isinstance(path, bytes):
|
||||||
path = path.decode(sys.getfilesystemencoding())
|
path = path.decode(sys.getfilesystemencoding())
|
||||||
self.path = path
|
self.path = path
|
||||||
else:
|
else:
|
||||||
@ -112,8 +112,8 @@ class Resource(object): # {{{
|
|||||||
rpath = os.path.relpath(self.path, basedir)
|
rpath = os.path.relpath(self.path, basedir)
|
||||||
except ValueError: # On windows path and basedir could be on different drives
|
except ValueError: # On windows path and basedir could be on different drives
|
||||||
rpath = self.path
|
rpath = self.path
|
||||||
if isinstance(rpath, unicode_type):
|
if isinstance(rpath, bytes):
|
||||||
rpath = rpath.encode('utf-8')
|
rpath = rpath.decode(filesystem_encoding)
|
||||||
return rpath.replace(os.sep, '/')+frag
|
return rpath.replace(os.sep, '/')+frag
|
||||||
|
|
||||||
def set_basedir(self, path):
|
def set_basedir(self, path):
|
||||||
@ -203,9 +203,14 @@ class ManifestItem(Resource): # {{{
|
|||||||
self.mime_type = val
|
self.mime_type = val
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__representation__(self):
|
||||||
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
|
__str__ = __unicode__representation__
|
||||||
|
else:
|
||||||
|
__unicode__ = __unicode__representation__
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode_type(self).encode('utf-8')
|
return unicode_type(self).encode('utf-8')
|
||||||
|
|
||||||
|
@ -229,7 +229,9 @@ class MobiMLizer(object):
|
|||||||
while vspace > 0:
|
while vspace > 0:
|
||||||
wrapper.addprevious(etree.Element(XHTML('br')))
|
wrapper.addprevious(etree.Element(XHTML('br')))
|
||||||
vspace -= 1
|
vspace -= 1
|
||||||
if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode_type)):
|
if istate.halign != 'auto' and isinstance(istate.halign, (bytes, unicode_type)):
|
||||||
|
if isinstance(istate.halign, bytes):
|
||||||
|
istate.halign = istate.halign.decode('utf-8')
|
||||||
para.attrib['align'] = istate.halign
|
para.attrib['align'] = istate.halign
|
||||||
istate.rendered = True
|
istate.rendered = True
|
||||||
pstate = bstate.istate
|
pstate = bstate.istate
|
||||||
|
@ -283,24 +283,29 @@ class MobiReader(object):
|
|||||||
ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
|
ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def write_as_utf8(path, data):
|
||||||
|
if isinstance(data, unicode_type):
|
||||||
|
data = data.encode('utf-8')
|
||||||
|
with lopen(path, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
parse_cache[htmlfile] = root
|
parse_cache[htmlfile] = root
|
||||||
self.htmlfile = htmlfile
|
self.htmlfile = htmlfile
|
||||||
ncx = cStringIO.StringIO()
|
ncx = cStringIO.StringIO()
|
||||||
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
||||||
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
|
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
|
||||||
opf.render(open(self.created_opf_path, 'wb'), ncx,
|
opf.render(lopen(self.created_opf_path, 'wb'), ncx,
|
||||||
ncx_manifest_entry=ncx_manifest_entry)
|
ncx_manifest_entry=ncx_manifest_entry)
|
||||||
ncx = ncx.getvalue()
|
ncx = ncx.getvalue()
|
||||||
if ncx:
|
if ncx:
|
||||||
ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
|
ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
|
||||||
open(ncx_path, 'wb').write(ncx)
|
write_as_utf8(ncx_path, ncx)
|
||||||
|
|
||||||
with open('styles.css', 'wb') as s:
|
css = [self.base_css_rules, '\n\n']
|
||||||
s.write(self.base_css_rules + '\n\n')
|
|
||||||
for cls, rule in self.tag_css_rules.items():
|
for cls, rule in self.tag_css_rules.items():
|
||||||
if isinstance(rule, unicode_type):
|
css.append('.%s { %s }\n\n' % (cls, rule))
|
||||||
rule = rule.encode('utf-8')
|
write_as_utf8('styles.css', ''.join(css))
|
||||||
s.write('.%s { %s }\n\n' % (cls, rule))
|
|
||||||
|
|
||||||
if self.book_header.exth is not None or self.embedded_mi is not None:
|
if self.book_header.exth is not None or self.embedded_mi is not None:
|
||||||
self.log.debug('Creating OPF...')
|
self.log.debug('Creating OPF...')
|
||||||
@ -310,7 +315,7 @@ class MobiReader(object):
|
|||||||
ncx_manifest_entry)
|
ncx_manifest_entry)
|
||||||
ncx = ncx.getvalue()
|
ncx = ncx.getvalue()
|
||||||
if ncx:
|
if ncx:
|
||||||
open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
|
write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx)
|
||||||
|
|
||||||
def read_embedded_metadata(self, root, elem, guide):
|
def read_embedded_metadata(self, root, elem, guide):
|
||||||
raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
|
raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
|
||||||
@ -423,8 +428,9 @@ class MobiReader(object):
|
|||||||
styles.append(style)
|
styles.append(style)
|
||||||
if 'height' in attrib:
|
if 'height' in attrib:
|
||||||
height = attrib.pop('height').strip()
|
height = attrib.pop('height').strip()
|
||||||
if height and '<' not in height and '>' not in height and \
|
if (
|
||||||
re.search(r'\d+', height):
|
height and '<' not in height and '>' not in height and
|
||||||
|
re.search(r'\d+', height)):
|
||||||
if tag.tag in ('table', 'td', 'tr'):
|
if tag.tag in ('table', 'td', 'tr'):
|
||||||
pass
|
pass
|
||||||
elif tag.tag == 'img':
|
elif tag.tag == 'img':
|
||||||
@ -837,9 +843,8 @@ class MobiReader(object):
|
|||||||
anchor = '<a id="filepos%d"></a>'
|
anchor = '<a id="filepos%d"></a>'
|
||||||
if r > -1 and (r < l or l == end or l == -1):
|
if r > -1 and (r < l or l == end or l == -1):
|
||||||
p = self.mobi_html.rfind('<', 0, end + 1)
|
p = self.mobi_html.rfind('<', 0, end + 1)
|
||||||
if pos < end and p > -1 and \
|
if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and
|
||||||
not end_tag_re.match(self.mobi_html[p:r]) and \
|
not self.mobi_html[p:r + 1].endswith('/>')):
|
||||||
not self.mobi_html[p:r + 1].endswith('/>'):
|
|
||||||
anchor = ' filepos-id="filepos%d"'
|
anchor = ' filepos-id="filepos%d"'
|
||||||
end = r
|
end = r
|
||||||
else:
|
else:
|
||||||
|
@ -1,23 +1,32 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
print_function)
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, unicodedata
|
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
|
import re
|
||||||
namespace, prefixname, urlnormalize)
|
import unicodedata
|
||||||
|
from collections import defaultdict
|
||||||
|
from io import BytesIO
|
||||||
|
from urlparse import urldefrag
|
||||||
|
|
||||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||||
from calibre.ebooks.mobi.utils import is_guide_ref_start
|
from calibre.ebooks.mobi.utils import is_guide_ref_start
|
||||||
|
from calibre.ebooks.oeb.base import (
|
||||||
|
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
|
||||||
|
)
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
from collections import defaultdict
|
|
||||||
from urlparse import urldefrag
|
class Buf(BytesIO):
|
||||||
from cStringIO import StringIO
|
|
||||||
|
def write(self, x):
|
||||||
|
if isinstance(x, unicode_type):
|
||||||
|
x = x.encode('utf-8')
|
||||||
|
BytesIO.write(self, x)
|
||||||
|
|
||||||
|
|
||||||
class Serializer(object):
|
class Serializer(object):
|
||||||
@ -116,7 +125,7 @@ class Serializer(object):
|
|||||||
'''
|
'''
|
||||||
Return the document serialized as a single UTF-8 encoded bytestring.
|
Return the document serialized as a single UTF-8 encoded bytestring.
|
||||||
'''
|
'''
|
||||||
buf = self.buf = StringIO()
|
buf = self.buf = Buf()
|
||||||
buf.write(b'<html>')
|
buf.write(b'<html>')
|
||||||
self.serialize_head()
|
self.serialize_head()
|
||||||
self.serialize_body()
|
self.serialize_body()
|
||||||
@ -214,22 +223,22 @@ class Serializer(object):
|
|||||||
# if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
|
# if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
|
||||||
if href is not None:
|
if href is not None:
|
||||||
# resolve the section url in id_offsets
|
# resolve the section url in id_offsets
|
||||||
buf.write('<mbp:pagebreak />')
|
buf.write(b'<mbp:pagebreak />')
|
||||||
self.id_offsets[urlnormalize(href)] = buf.tell()
|
self.id_offsets[urlnormalize(href)] = buf.tell()
|
||||||
|
|
||||||
if tocref.klass == "periodical":
|
if tocref.klass == "periodical":
|
||||||
buf.write('<div> <div height="1em"></div>')
|
buf.write(b'<div> <div height="1em"></div>')
|
||||||
else:
|
else:
|
||||||
t = tocref.title
|
t = tocref.title
|
||||||
if isinstance(t, unicode_type):
|
if isinstance(t, unicode_type):
|
||||||
t = t.encode('utf-8')
|
t = t.encode('utf-8')
|
||||||
buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
|
buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
|
||||||
'</b></font></h2> <div height="1em"></div>')
|
b'</b></font></h2> <div height="1em"></div>')
|
||||||
|
|
||||||
buf.write('<ul>')
|
buf.write(b'<ul>')
|
||||||
|
|
||||||
for tocitem in tocref.nodes:
|
for tocitem in tocref.nodes:
|
||||||
buf.write('<li><a filepos=')
|
buf.write(b'<li><a filepos=')
|
||||||
itemhref = tocitem.href
|
itemhref = tocitem.href
|
||||||
if tocref.klass == 'periodical':
|
if tocref.klass == 'periodical':
|
||||||
# This is a section node.
|
# This is a section node.
|
||||||
@ -238,15 +247,15 @@ class Serializer(object):
|
|||||||
# so we change the href.
|
# so we change the href.
|
||||||
itemhref = re.sub(r'article_\d+/', '', itemhref)
|
itemhref = re.sub(r'article_\d+/', '', itemhref)
|
||||||
self.href_offsets[itemhref].append(buf.tell())
|
self.href_offsets[itemhref].append(buf.tell())
|
||||||
buf.write('0000000000')
|
buf.write(b'0000000000')
|
||||||
buf.write(' ><font size="+1"><b><u>')
|
buf.write(b' ><font size="+1"><b><u>')
|
||||||
t = tocitem.title
|
t = tocitem.title
|
||||||
if isinstance(t, unicode_type):
|
if isinstance(t, unicode_type):
|
||||||
t = t.encode('utf-8')
|
t = t.encode('utf-8')
|
||||||
buf.write(t)
|
buf.write(t)
|
||||||
buf.write('</u></b></font></a></li>')
|
buf.write(b'</u></b></font></a></li>')
|
||||||
|
|
||||||
buf.write('</ul><div height="1em"></div></div><mbp:pagebreak />')
|
buf.write(b'</ul><div height="1em"></div></div><mbp:pagebreak />')
|
||||||
|
|
||||||
self.anchor_offset = buf.tell()
|
self.anchor_offset = buf.tell()
|
||||||
buf.write(b'<body>')
|
buf.write(b'<body>')
|
||||||
@ -350,7 +359,7 @@ class Serializer(object):
|
|||||||
if child.tail:
|
if child.tail:
|
||||||
self.anchor_offset = None
|
self.anchor_offset = None
|
||||||
self.serialize_text(child.tail)
|
self.serialize_text(child.tail)
|
||||||
buf.write(b'</%s>' % tag.encode('utf-8'))
|
buf.write(('</%s>' % tag).encode('utf-8'))
|
||||||
|
|
||||||
def serialize_text(self, text, quot=False):
|
def serialize_text(self, text, quot=False):
|
||||||
text = text.replace('&', '&')
|
text = text.replace('&', '&')
|
||||||
@ -384,4 +393,4 @@ class Serializer(object):
|
|||||||
self.start_offset = ioff
|
self.start_offset = ioff
|
||||||
for hoff in hoffs:
|
for hoff in hoffs:
|
||||||
buf.seek(hoff)
|
buf.seek(hoff)
|
||||||
buf.write(b'%010d' % ioff)
|
buf.write(('%010d' % ioff).encode('utf-8'))
|
||||||
|
@ -13,7 +13,7 @@ from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
|||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from calibre.constants import filesystem_encoding, __version__
|
from calibre.constants import filesystem_encoding, __version__, ispy3
|
||||||
from calibre.translations.dynamic import translate
|
from calibre.translations.dynamic import translate
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
@ -107,13 +107,35 @@ self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b
|
|||||||
'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
|
'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
|
||||||
'video', 'title', 'script', 'style'}
|
'video', 'title', 'script', 'style'}
|
||||||
|
|
||||||
_self_closing_pat = re.compile(
|
|
||||||
r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(self_closing_bad_tags)),
|
def as_string_type(pat, for_unicode):
|
||||||
re.IGNORECASE)
|
if for_unicode:
|
||||||
|
if isinstance(pat, bytes):
|
||||||
|
pat = pat.decode('utf-8')
|
||||||
|
else:
|
||||||
|
if isinstance(pat, unicode_type):
|
||||||
|
pat = pat.encode('utf-8')
|
||||||
|
return pat
|
||||||
|
|
||||||
|
|
||||||
|
def self_closing_pat(for_unicode):
|
||||||
|
attr = 'unicode_ans' if for_unicode else 'bytes_ans'
|
||||||
|
ans = getattr(self_closing_pat, attr, None)
|
||||||
|
if ans is None:
|
||||||
|
sub = '|'.join(self_closing_bad_tags)
|
||||||
|
template = r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'
|
||||||
|
pat = template % sub
|
||||||
|
pat = as_string_type(pat, for_unicode)
|
||||||
|
ans = re.compile(pat, flags=re.IGNORECASE)
|
||||||
|
setattr(self_closing_pat, attr, ans)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def close_self_closing_tags(raw):
|
def close_self_closing_tags(raw):
|
||||||
return _self_closing_pat.sub(r'<\g<tag>\g<arg>></\g<tag>>', raw)
|
for_unicode = isinstance(raw, unicode_type)
|
||||||
|
repl = as_string_type(r'<\g<tag>\g<arg>></\g<tag>>', for_unicode)
|
||||||
|
pat = self_closing_pat(for_unicode)
|
||||||
|
return pat.sub(repl, raw)
|
||||||
|
|
||||||
|
|
||||||
def uuid_id():
|
def uuid_id():
|
||||||
@ -745,6 +767,10 @@ class Metadata(object):
|
|||||||
return 'Item(term=%r, value=%r, attrib=%r)' \
|
return 'Item(term=%r, value=%r, attrib=%r)' \
|
||||||
% (barename(self.term), self.value, self.attrib)
|
% (barename(self.term), self.value, self.attrib)
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
|
def __str__(self):
|
||||||
|
return as_unicode(self.value)
|
||||||
|
else:
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace')
|
return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace')
|
||||||
|
|
||||||
@ -1075,19 +1101,27 @@ class Manifest(object):
|
|||||||
self._loader = loader2
|
self._loader = loader2
|
||||||
self._data = None
|
self._data = None
|
||||||
|
|
||||||
def __str__(self):
|
@property
|
||||||
return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print)
|
def unicode_representation(self):
|
||||||
|
|
||||||
def __unicode__(self):
|
|
||||||
data = self.data
|
data = self.data
|
||||||
if isinstance(data, etree._Element):
|
if isinstance(data, etree._Element):
|
||||||
return xml2unicode(data, pretty_print=self.oeb.pretty_print)
|
return xml2unicode(data, pretty_print=self.oeb.pretty_print)
|
||||||
if isinstance(data, unicode_type):
|
if isinstance(data, unicode_type):
|
||||||
return data
|
return data
|
||||||
if hasattr(data, 'cssText'):
|
if hasattr(data, 'cssText'):
|
||||||
return data.cssText
|
return unicode_type(data.cssText, 'utf-8', 'replace')
|
||||||
return unicode_type(data)
|
return unicode_type(data)
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
|
def __str__(self):
|
||||||
|
return self.unicode_representation
|
||||||
|
else:
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.unicode_representation
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return id(self) == id(other)
|
return id(self) == id(other)
|
||||||
|
|
||||||
@ -1616,12 +1650,16 @@ class TOC(object):
|
|||||||
ans.extend(child.get_lines(lvl+1))
|
ans.extend(child.get_lines(lvl+1))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
|
return u'\n'.join(self.get_lines())
|
||||||
|
else:
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return u'\n'.join(self.get_lines())
|
return u'\n'.join(self.get_lines())
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
|
||||||
|
|
||||||
def to_opf1(self, tour):
|
def to_opf1(self, tour):
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
element(tour, 'site', attrib={
|
element(tour, 'site', attrib={
|
||||||
|
@ -53,7 +53,7 @@ class SpineItem(unicode_type):
|
|||||||
if not os.path.exists(path) and os.path.exists(ppath):
|
if not os.path.exists(path) and os.path.exists(ppath):
|
||||||
path = ppath
|
path = ppath
|
||||||
obj = super(SpineItem, cls).__new__(cls, path)
|
obj = super(SpineItem, cls).__new__(cls, path)
|
||||||
with open(path, 'rb') as f:
|
with lopen(path, 'rb') as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
if from_epub:
|
if from_epub:
|
||||||
# According to the spec, HTML in EPUB must be encoded in utf-8 or
|
# According to the spec, HTML in EPUB must be encoded in utf-8 or
|
||||||
|
@ -99,7 +99,7 @@ def html5_parse(data, max_nesting_depth=100):
|
|||||||
# Check that the asinine HTML 5 algorithm did not result in a tree with
|
# Check that the asinine HTML 5 algorithm did not result in a tree with
|
||||||
# insane nesting depths
|
# insane nesting depths
|
||||||
for x in data.iterdescendants():
|
for x in data.iterdescendants():
|
||||||
if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node
|
if isinstance(x.tag, basestring) and not len(x): # Leaf node
|
||||||
depth = node_depth(x)
|
depth = node_depth(x)
|
||||||
if depth > max_nesting_depth:
|
if depth > max_nesting_depth:
|
||||||
raise ValueError('HTML 5 parsing resulted in a tree with nesting'
|
raise ValueError('HTML 5 parsing resulted in a tree with nesting'
|
||||||
@ -259,7 +259,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
|||||||
nroot = etree.fromstring('<html></html>')
|
nroot = etree.fromstring('<html></html>')
|
||||||
has_body = False
|
has_body = False
|
||||||
for child in list(data):
|
for child in list(data):
|
||||||
if isinstance(child.tag, (unicode_type, str)) and barename(child.tag) == 'body':
|
if isinstance(child.tag, (unicode_type, bytes)) and barename(child.tag) == 'body':
|
||||||
has_body = True
|
has_body = True
|
||||||
break
|
break
|
||||||
parent = nroot
|
parent = nroot
|
||||||
|
@ -607,12 +607,12 @@ class Style(object):
|
|||||||
result = base
|
result = base
|
||||||
else:
|
else:
|
||||||
result = self._unit_convert(width, base=base)
|
result = self._unit_convert(width, base=base)
|
||||||
if isinstance(result, (unicode_type, str, bytes)):
|
if isinstance(result, (unicode_type, bytes)):
|
||||||
result = self._profile.width
|
result = self._profile.width
|
||||||
self._width = result
|
self._width = result
|
||||||
if 'max-width' in self._style:
|
if 'max-width' in self._style:
|
||||||
result = self._unit_convert(self._style['max-width'], base=base)
|
result = self._unit_convert(self._style['max-width'], base=base)
|
||||||
if isinstance(result, (unicode_type, str, bytes)):
|
if isinstance(result, (unicode_type, bytes)):
|
||||||
result = self._width
|
result = self._width
|
||||||
if result < self._width:
|
if result < self._width:
|
||||||
self._width = result
|
self._width = result
|
||||||
@ -644,12 +644,12 @@ class Style(object):
|
|||||||
result = base
|
result = base
|
||||||
else:
|
else:
|
||||||
result = self._unit_convert(height, base=base)
|
result = self._unit_convert(height, base=base)
|
||||||
if isinstance(result, (unicode_type, str, bytes)):
|
if isinstance(result, (unicode_type, bytes)):
|
||||||
result = self._profile.height
|
result = self._profile.height
|
||||||
self._height = result
|
self._height = result
|
||||||
if 'max-height' in self._style:
|
if 'max-height' in self._style:
|
||||||
result = self._unit_convert(self._style['max-height'], base=base)
|
result = self._unit_convert(self._style['max-height'], base=base)
|
||||||
if isinstance(result, (unicode_type, str, bytes)):
|
if isinstance(result, (unicode_type, bytes)):
|
||||||
result = self._height
|
result = self._height
|
||||||
if result < self._height:
|
if result < self._height:
|
||||||
self._height = result
|
self._height = result
|
||||||
|
@ -15,7 +15,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
|
|||||||
|
|
||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type, map, range
|
||||||
|
|
||||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
||||||
|
|
||||||
@ -55,7 +55,7 @@ def split_txt(txt, epub_split_size_kb=0):
|
|||||||
result in the entire document being one giant
|
result in the entire document being one giant
|
||||||
paragraph. In this case the EPUB parser will not
|
paragraph. In this case the EPUB parser will not
|
||||||
be able to determine where to split the file
|
be able to determine where to split the file
|
||||||
to accomidate the EPUB file size limitation
|
to accommodate the EPUB file size limitation
|
||||||
and will fail.
|
and will fail.
|
||||||
'''
|
'''
|
||||||
# Takes care if there is no point to split
|
# Takes care if there is no point to split
|
||||||
@ -66,9 +66,12 @@ def split_txt(txt, epub_split_size_kb=0):
|
|||||||
# Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
|
# Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
|
||||||
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
|
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
|
||||||
# if there are chunks with a superior size then go and break
|
# if there are chunks with a superior size then go and break
|
||||||
if (len(filter(lambda x: len(x) > chunk_size, txt.split('\n\n')))) :
|
parts = txt.split(b'\n\n')
|
||||||
txt = '\n\n'.join([split_string_separator(line, chunk_size)
|
lengths = tuple(map(len, parts))
|
||||||
for line in txt.split('\n\n')])
|
if lengths and max(lengths) > chunk_size:
|
||||||
|
txt = b'\n\n'.join([
|
||||||
|
split_string_separator(line, chunk_size) for line in parts
|
||||||
|
])
|
||||||
if isbytestring(txt):
|
if isbytestring(txt):
|
||||||
txt = txt.decode('utf-8')
|
txt = txt.decode('utf-8')
|
||||||
|
|
||||||
@ -227,7 +230,7 @@ def opf_writer(path, opf_name, manifest, spine, mi):
|
|||||||
opf = OPFCreator(path, mi)
|
opf = OPFCreator(path, mi)
|
||||||
opf.create_manifest(manifest)
|
opf.create_manifest(manifest)
|
||||||
opf.create_spine(spine)
|
opf.create_spine(spine)
|
||||||
with open(os.path.join(path, opf_name), 'wb') as opffile:
|
with lopen(os.path.join(path, opf_name), 'wb') as opffile:
|
||||||
opf.render(opffile)
|
opf.render(opffile)
|
||||||
|
|
||||||
|
|
||||||
@ -236,9 +239,16 @@ def split_string_separator(txt, size):
|
|||||||
Splits the text by putting \n\n at the point size.
|
Splits the text by putting \n\n at the point size.
|
||||||
'''
|
'''
|
||||||
if len(txt) > size:
|
if len(txt) > size:
|
||||||
txt = ''.join([re.sub(type(u'')(r'\.(?P<ends>[^.]*)$'), r'.\n\n\g<ends>',
|
size -= 2
|
||||||
txt[i:i+size], 1) for i in
|
txt = []
|
||||||
xrange(0, len(txt), size)])
|
for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
|
||||||
|
idx = part.rfind('.')
|
||||||
|
if idx == -1:
|
||||||
|
part += b'\n\n'
|
||||||
|
else:
|
||||||
|
part = part[:idx + 1] + b'\n\n' + part[idx:]
|
||||||
|
txt.append(part)
|
||||||
|
txt = b''.join(txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,8 +19,6 @@ Tranliterate the string from unicode characters to ASCII in Chinese and others.
|
|||||||
'''
|
'''
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from calibre.constants import ispy3
|
|
||||||
|
|
||||||
|
|
||||||
class Unihandecoder(object):
|
class Unihandecoder(object):
|
||||||
preferred_encoding = None
|
preferred_encoding = None
|
||||||
@ -43,14 +41,10 @@ class Unihandecoder(object):
|
|||||||
self.decoder = Unidecoder()
|
self.decoder = Unidecoder()
|
||||||
|
|
||||||
def decode(self, text):
|
def decode(self, text):
|
||||||
if not ispy3:
|
if isinstance(text, bytes):
|
||||||
if not isinstance(text, unicode):
|
|
||||||
try:
|
|
||||||
text = unicode(text)
|
|
||||||
except:
|
|
||||||
try:
|
try:
|
||||||
text = text.decode(self.preferred_encoding)
|
text = text.decode(self.preferred_encoding)
|
||||||
except:
|
except Exception:
|
||||||
text = text.decode('utf-8', 'replace')
|
text = text.decode('utf-8', 'replace')
|
||||||
# at first unicode normalize it. (see Unicode standards)
|
# at first unicode normalize it. (see Unicode standards)
|
||||||
ntext = unicodedata.normalize('NFKC', text)
|
ntext = unicodedata.normalize('NFKC', text)
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
from zlib import decompress
|
from zlib import decompress
|
||||||
|
|
||||||
from calibre.constants import ispy3
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class jisyo (object):
|
class jisyo (object):
|
||||||
@ -34,8 +34,8 @@ class jisyo (object):
|
|||||||
P('localization/pykakasi/kanadict2.calibre_msgpack', data=True))
|
P('localization/pykakasi/kanadict2.calibre_msgpack', data=True))
|
||||||
|
|
||||||
def load_jisyo(self, char):
|
def load_jisyo(self, char):
|
||||||
if not ispy3:
|
if not isinstance(char, unicode_type):
|
||||||
char = unicode(char)
|
char = unicode_type(char, 'utf-8')
|
||||||
key = "%04x"%ord(char)
|
key = "%04x"%ord(char)
|
||||||
|
|
||||||
try: # already exist?
|
try: # already exist?
|
||||||
|
@ -60,9 +60,9 @@ it under the same terms as Perl itself.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.constants import ispy3
|
|
||||||
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
|
from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
class Unidecoder(object):
|
class Unidecoder(object):
|
||||||
@ -95,8 +95,8 @@ class Unidecoder(object):
|
|||||||
Find what group character is a part of.
|
Find what group character is a part of.
|
||||||
'''
|
'''
|
||||||
# Code groups withing CODEPOINTS take the form 'xAB'
|
# Code groups withing CODEPOINTS take the form 'xAB'
|
||||||
if not ispy3:
|
if not isinstance(character, unicode_type):
|
||||||
character = unicode(character)
|
character = unicode_type(character, "utf-8")
|
||||||
return 'x%02x' % (ord(character) >> 8)
|
return 'x%02x' % (ord(character) >> 8)
|
||||||
|
|
||||||
def grouped_point(self, character):
|
def grouped_point(self, character):
|
||||||
@ -104,6 +104,6 @@ class Unidecoder(object):
|
|||||||
Return the location the replacement character is in the list for a
|
Return the location the replacement character is in the list for a
|
||||||
the group character is a part of.
|
the group character is a part of.
|
||||||
'''
|
'''
|
||||||
if not ispy3:
|
if not isinstance(character, unicode_type):
|
||||||
character = unicode(character)
|
character = unicode_type(character, "utf-8")
|
||||||
return ord(character) & 255
|
return ord(character) & 255
|
||||||
|
@ -347,7 +347,7 @@ class EditorWidget(QWebView, LineEditECM): # {{{
|
|||||||
return unicode_type(self.page().mainFrame().toHtml())
|
return unicode_type(self.page().mainFrame().toHtml())
|
||||||
check = unicode_type(self.page().mainFrame().toPlainText()).strip()
|
check = unicode_type(self.page().mainFrame().toPlainText()).strip()
|
||||||
raw = unicode_type(self.page().mainFrame().toHtml())
|
raw = unicode_type(self.page().mainFrame().toHtml())
|
||||||
raw = xml_to_unicode_type(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
raw = self.comments_pat.sub('', raw)
|
raw = self.comments_pat.sub('', raw)
|
||||||
if not check and '<img' not in raw.lower():
|
if not check and '<img' not in raw.lower():
|
||||||
|
@ -201,7 +201,7 @@ class MenuExampleWindow(Gtk.ApplicationWindow):
|
|||||||
|
|
||||||
|
|
||||||
def convert(v):
|
def convert(v):
|
||||||
if isinstance(v, basestring):
|
if isinstance(v, (unicode_type, bytes)):
|
||||||
return unicode_type(v)
|
return unicode_type(v)
|
||||||
if isinstance(v, dbus.Struct):
|
if isinstance(v, dbus.Struct):
|
||||||
return tuple(convert(val) for val in v)
|
return tuple(convert(val) for val in v)
|
||||||
@ -309,6 +309,7 @@ class MyApplication(Gtk.Application):
|
|||||||
def do_startup(self):
|
def do_startup(self):
|
||||||
Gtk.Application.do_startup(self)
|
Gtk.Application.do_startup(self)
|
||||||
|
|
||||||
|
|
||||||
app = MyApplication(application_id='com.calibre-ebook.test-gtk')
|
app = MyApplication(application_id='com.calibre-ebook.test-gtk')
|
||||||
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
||||||
sys.exit(app.run(sys.argv))
|
sys.exit(app.run(sys.argv))
|
||||||
|
@ -43,7 +43,7 @@ class TableItem(QTableWidgetItem):
|
|||||||
# self is not None and other is None therefore self >= other
|
# self is not None and other is None therefore self >= other
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if isinstance(self.sort, (str, unicode_type)):
|
if isinstance(self.sort, (bytes, unicode_type)):
|
||||||
l = sort_key(self.sort)
|
l = sort_key(self.sort)
|
||||||
r = sort_key(other.sort)
|
r = sort_key(other.sort)
|
||||||
else:
|
else:
|
||||||
@ -66,7 +66,7 @@ class TableItem(QTableWidgetItem):
|
|||||||
# self is not None therefore self > other
|
# self is not None therefore self > other
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if isinstance(self.sort, (str, unicode_type)):
|
if isinstance(self.sort, (bytes, unicode_type)):
|
||||||
l = sort_key(self.sort)
|
l = sort_key(self.sort)
|
||||||
r = sort_key(other.sort)
|
r = sort_key(other.sort)
|
||||||
else:
|
else:
|
||||||
|
@ -13,7 +13,7 @@ from PyQt5.Qt import (Qt, QApplication, QStackedWidget, QMenu, QTimer,
|
|||||||
|
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre.constants import (isosx, __appname__, preferred_encoding,
|
from calibre.constants import (__appname__, preferred_encoding,
|
||||||
get_version)
|
get_version)
|
||||||
from calibre.gui2 import config, is_widescreen, gprefs, error_dialog, open_url
|
from calibre.gui2 import config, is_widescreen, gprefs, error_dialog, open_url
|
||||||
from calibre.gui2.library.views import BooksView, DeviceBooksView
|
from calibre.gui2.library.views import BooksView, DeviceBooksView
|
||||||
@ -323,11 +323,6 @@ class StatusBar(QStatusBar): # {{{
|
|||||||
def show_message(self, msg, timeout=0, show_notification=True):
|
def show_message(self, msg, timeout=0, show_notification=True):
|
||||||
self.showMessage(msg, timeout)
|
self.showMessage(msg, timeout)
|
||||||
if self.notifier is not None and not config['disable_tray_notification'] and show_notification:
|
if self.notifier is not None and not config['disable_tray_notification'] and show_notification:
|
||||||
if isosx and isinstance(msg, unicode_type):
|
|
||||||
try:
|
|
||||||
msg = msg.encode(preferred_encoding)
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
msg = msg.encode('utf-8')
|
|
||||||
self.notifier(msg)
|
self.notifier(msg)
|
||||||
|
|
||||||
def clear_message(self):
|
def clear_message(self):
|
||||||
|
@ -129,7 +129,7 @@ class LocationManager(QObject): # {{{
|
|||||||
had_device = self.has_device
|
had_device = self.has_device
|
||||||
if cp is None:
|
if cp is None:
|
||||||
cp = (None, None)
|
cp = (None, None)
|
||||||
if isinstance(cp, (str, unicode_type)):
|
if isinstance(cp, (bytes, unicode_type)):
|
||||||
cp = (cp, None)
|
cp = (cp, None)
|
||||||
if len(fs) < 3:
|
if len(fs) < 3:
|
||||||
fs = list(fs) + [0]
|
fs = list(fs) + [0]
|
||||||
|
@ -6,7 +6,7 @@ import sys, logging, os, traceback, time
|
|||||||
from PyQt5.Qt import (
|
from PyQt5.Qt import (
|
||||||
QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon, Qt, QCoreApplication, QThread, QScrollBar)
|
QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon, Qt, QCoreApplication, QThread, QScrollBar)
|
||||||
|
|
||||||
from calibre import __appname__, setup_cli_handlers, islinux, isbsd
|
from calibre import __appname__, setup_cli_handlers, islinux, isbsd, as_unicode
|
||||||
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||||
|
|
||||||
from calibre.gui2 import error_dialog, \
|
from calibre.gui2 import error_dialog, \
|
||||||
@ -17,7 +17,6 @@ from calibre.gui2.lrf_renderer.config_ui import Ui_ViewerConfig
|
|||||||
from calibre.gui2.main_window import MainWindow
|
from calibre.gui2.main_window import MainWindow
|
||||||
from calibre.gui2.lrf_renderer.document import Document
|
from calibre.gui2.lrf_renderer.document import Document
|
||||||
from calibre.gui2.search_box import SearchBox2
|
from calibre.gui2.search_box import SearchBox2
|
||||||
from polyglot.builtins import unicode_type
|
|
||||||
|
|
||||||
|
|
||||||
class RenderWorker(QThread):
|
class RenderWorker(QThread):
|
||||||
@ -201,7 +200,7 @@ class Main(MainWindow, Ui_MainWindow):
|
|||||||
print('Error rendering document', file=sys.stderr)
|
print('Error rendering document', file=sys.stderr)
|
||||||
print(exception, file=sys.stderr)
|
print(exception, file=sys.stderr)
|
||||||
print(self.renderer.formatted_traceback, file=sys.stderr)
|
print(self.renderer.formatted_traceback, file=sys.stderr)
|
||||||
msg = u'<p><b>%s</b>: '%(exception.__class__.__name__,) + unicode_type(str(exception), 'utf8', 'replace') + u'</p>'
|
msg = u'<p><b>%s</b>: '%(exception.__class__.__name__,) + as_unicode(exception) + u'</p>'
|
||||||
msg += u'<p>Failed to render document</p>'
|
msg += u'<p>Failed to render document</p>'
|
||||||
msg += u'<p>Detailed <b>traceback</b>:<pre>'
|
msg += u'<p>Detailed <b>traceback</b>:<pre>'
|
||||||
msg += self.renderer.formatted_traceback + '</pre>'
|
msg += self.renderer.formatted_traceback + '</pre>'
|
||||||
|
@ -132,7 +132,7 @@ def get_default_library_path():
|
|||||||
fname = 'Calibre Library'
|
fname = 'Calibre Library'
|
||||||
if isinstance(fname, unicode_type):
|
if isinstance(fname, unicode_type):
|
||||||
try:
|
try:
|
||||||
fname = fname.encode(filesystem_encoding)
|
fname.encode(filesystem_encoding)
|
||||||
except:
|
except:
|
||||||
fname = 'Calibre Library'
|
fname = 'Calibre Library'
|
||||||
x = os.path.expanduser('~'+os.sep+fname)
|
x = os.path.expanduser('~'+os.sep+fname)
|
||||||
|
@ -5,14 +5,14 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
|
||||||
import StringIO, traceback, sys, gc, weakref
|
import traceback, sys, gc, weakref
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
from PyQt5.Qt import (QMainWindow, QTimer, QAction, QMenu, QMenuBar, QIcon,
|
from PyQt5.Qt import (QMainWindow, QTimer, QAction, QMenu, QMenuBar, QIcon,
|
||||||
QObject)
|
QObject)
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre import prints
|
from calibre import prints, force_unicode
|
||||||
from polyglot.builtins import unicode_type
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser(usage='''\
|
def option_parser(usage='''\
|
||||||
@ -134,7 +134,7 @@ class MainWindow(QMainWindow):
|
|||||||
if type is KeyboardInterrupt:
|
if type is KeyboardInterrupt:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
sio = StringIO.StringIO()
|
sio = BytesIO()
|
||||||
try:
|
try:
|
||||||
from calibre.debug import print_basic_debug_info
|
from calibre.debug import print_basic_debug_info
|
||||||
print_basic_debug_info(out=sio)
|
print_basic_debug_info(out=sio)
|
||||||
@ -145,7 +145,8 @@ class MainWindow(QMainWindow):
|
|||||||
prints(value.locking_debug_msg, file=sio)
|
prints(value.locking_debug_msg, file=sio)
|
||||||
fe = sio.getvalue()
|
fe = sio.getvalue()
|
||||||
prints(fe, file=sys.stderr)
|
prints(fe, file=sys.stderr)
|
||||||
msg = '<b>%s</b>:'%type.__name__ + unicode_type(str(value), 'utf8', 'replace')
|
fe = force_unicode(fe)
|
||||||
|
msg = '<b>%s</b>:'%type.__name__ + force_unicode(value)
|
||||||
error_dialog(self, _('Unhandled exception'), msg, det_msg=fe,
|
error_dialog(self, _('Unhandled exception'), msg, det_msg=fe,
|
||||||
show=True)
|
show=True)
|
||||||
except BaseException:
|
except BaseException:
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.constants import islinux, isosx, get_osx_version, DEBUG
|
from calibre.constants import islinux, isosx, get_osx_version, DEBUG, ispy3
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
@ -145,6 +145,10 @@ class AppleNotifier(Notifier):
|
|||||||
|
|
||||||
def notify(self, body, summary):
|
def notify(self, body, summary):
|
||||||
def encode(x):
|
def encode(x):
|
||||||
|
if ispy3:
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = x.decode('utf-8')
|
||||||
|
else:
|
||||||
if isinstance(x, unicode_type):
|
if isinstance(x, unicode_type):
|
||||||
x = x.encode('utf-8')
|
x = x.encode('utf-8')
|
||||||
return x
|
return x
|
||||||
|
@ -67,7 +67,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
|||||||
self.setup_store_checks()
|
self.setup_store_checks()
|
||||||
|
|
||||||
# Set the search query
|
# Set the search query
|
||||||
if isinstance(query, (str, unicode_type)):
|
if isinstance(query, (bytes, unicode_type)):
|
||||||
self.search_edit.setText(query)
|
self.search_edit.setText(query)
|
||||||
elif isinstance(query, dict):
|
elif isinstance(query, dict):
|
||||||
if 'author' in query:
|
if 'author' in query:
|
||||||
@ -233,7 +233,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
|||||||
query = query.replace('<', '')
|
query = query.replace('<', '')
|
||||||
# Remove the prefix.
|
# Remove the prefix.
|
||||||
for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
|
for loc in ('all', 'author', 'author2', 'authors', 'title', 'title2'):
|
||||||
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
|
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, r'\g<a>', query)
|
||||||
query = query.replace('%s:' % loc, '')
|
query = query.replace('%s:' % loc, '')
|
||||||
# Remove the prefix and search text.
|
# Remove the prefix and search text.
|
||||||
for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
|
for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
|
||||||
|
@ -30,8 +30,8 @@ user_functions = JSONConfig('editor-search-replace-functions')
|
|||||||
|
|
||||||
def compile_code(src, name='<string>'):
|
def compile_code(src, name='<string>'):
|
||||||
if not isinstance(src, unicode_type):
|
if not isinstance(src, unicode_type):
|
||||||
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
|
||||||
enc = match.group(1) if match else 'utf-8'
|
enc = match.group(1).decode('utf-8') if match else 'utf-8'
|
||||||
src = src.decode(enc)
|
src = src.decode(enc)
|
||||||
if not src or not src.strip():
|
if not src or not src.strip():
|
||||||
src = EMPTY_FUNC
|
src = EMPTY_FUNC
|
||||||
|
@ -9,7 +9,6 @@ import os
|
|||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
|
|
||||||
from calibre.ebooks import BOOK_EXTENSIONS
|
from calibre.ebooks import BOOK_EXTENSIONS
|
||||||
from polyglot.builtins import unicode_type
|
|
||||||
|
|
||||||
|
|
||||||
def find_folders_under(root, db, add_root=True, # {{{
|
def find_folders_under(root, db, add_root=True, # {{{
|
||||||
@ -106,11 +105,9 @@ class FormatCollection(object): # {{{
|
|||||||
|
|
||||||
def books_in_folder(folder, one_per_folder, # {{{
|
def books_in_folder(folder, one_per_folder, # {{{
|
||||||
cancel_callback=lambda : False):
|
cancel_callback=lambda : False):
|
||||||
assert not isinstance(folder, unicode_type)
|
|
||||||
|
|
||||||
dirpath = os.path.abspath(folder)
|
dirpath = os.path.abspath(folder)
|
||||||
if one_per_folder:
|
if one_per_folder:
|
||||||
formats = set([])
|
formats = set()
|
||||||
for path in os.listdir(dirpath):
|
for path in os.listdir(dirpath):
|
||||||
if cancel_callback():
|
if cancel_callback():
|
||||||
return []
|
return []
|
||||||
|
@ -19,7 +19,7 @@ from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
|
|||||||
from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
|
from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
|
||||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre import prints
|
from calibre import prints, force_unicode
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
|
|
||||||
@ -137,7 +137,9 @@ del y, c, n, u
|
|||||||
|
|
||||||
|
|
||||||
def force_to_bool(val):
|
def force_to_bool(val):
|
||||||
if isinstance(val, (str, unicode_type)):
|
if isinstance(val, (bytes, unicode_type)):
|
||||||
|
if isinstance(val, bytes):
|
||||||
|
val = force_unicode(val)
|
||||||
try:
|
try:
|
||||||
val = icu_lower(val)
|
val = icu_lower(val)
|
||||||
if not val:
|
if not val:
|
||||||
@ -348,7 +350,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
if item is None:
|
if item is None:
|
||||||
continue
|
continue
|
||||||
v = item[loc]
|
v = item[loc]
|
||||||
if isinstance(v, (str, unicode_type)):
|
if isinstance(v, (bytes, unicode_type)):
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is None or v <= UNDEFINED_DATE:
|
if v is None or v <= UNDEFINED_DATE:
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
@ -359,7 +361,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
if item is None:
|
if item is None:
|
||||||
continue
|
continue
|
||||||
v = item[loc]
|
v = item[loc]
|
||||||
if isinstance(v, (str, unicode_type)):
|
if isinstance(v, (bytes, unicode_type)):
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if v is not None and v > UNDEFINED_DATE:
|
if v is not None and v > UNDEFINED_DATE:
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
@ -403,7 +405,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
if item is None or item[loc] is None:
|
if item is None or item[loc] is None:
|
||||||
continue
|
continue
|
||||||
v = item[loc]
|
v = item[loc]
|
||||||
if isinstance(v, (str, unicode_type)):
|
if isinstance(v, (bytes, unicode_type)):
|
||||||
v = parse_date(v)
|
v = parse_date(v)
|
||||||
if relop(v, qd, field_count):
|
if relop(v, qd, field_count):
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
|
@ -154,7 +154,7 @@ class CSV_XML(CatalogPlugin):
|
|||||||
item = u'%.2g' % (item / 2.0)
|
item = u'%.2g' % (item / 2.0)
|
||||||
|
|
||||||
# Convert HTML to markdown text
|
# Convert HTML to markdown text
|
||||||
if type(item) is unicode_type:
|
if isinstance(item, unicode_type):
|
||||||
opening_tag = re.search('<(\\w+)(\x20|>)', item)
|
opening_tag = re.search('<(\\w+)(\x20|>)', item)
|
||||||
if opening_tag:
|
if opening_tag:
|
||||||
closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item)
|
closing_tag = re.search('<\\/%s>$' % opening_tag.group(1), item)
|
||||||
@ -177,7 +177,7 @@ class CSV_XML(CatalogPlugin):
|
|||||||
for field in fields:
|
for field in fields:
|
||||||
if field.startswith('#'):
|
if field.startswith('#'):
|
||||||
val = db.get_field(r['id'], field, index_is_id=True)
|
val = db.get_field(r['id'], field, index_is_id=True)
|
||||||
if not isinstance(val, (str, unicode_type)):
|
if not isinstance(val, unicode_type):
|
||||||
val = unicode_type(val)
|
val = unicode_type(val)
|
||||||
item = getattr(E, field.replace('#', '_'))(val)
|
item = getattr(E, field.replace('#', '_'))(val)
|
||||||
record.append(item)
|
record.append(item)
|
||||||
@ -188,7 +188,7 @@ class CSV_XML(CatalogPlugin):
|
|||||||
val = r[field]
|
val = r[field]
|
||||||
if not val:
|
if not val:
|
||||||
continue
|
continue
|
||||||
if not isinstance(val, (str, unicode_type)):
|
if not isinstance(val, (bytes, unicode_type)):
|
||||||
if (fm.get(field, {}).get('datatype', None) ==
|
if (fm.get(field, {}).get('datatype', None) ==
|
||||||
'rating' and val):
|
'rating' and val):
|
||||||
val = u'%.2g' % (val / 2.0)
|
val = u'%.2g' % (val / 2.0)
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import json, re
|
import json, re
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from calibre import prints
|
from calibre import prints, force_unicode
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.library.field_metadata import FieldMetadata
|
from calibre.library.field_metadata import FieldMetadata
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
@ -131,7 +131,7 @@ class CustomColumns(object):
|
|||||||
if d['is_multiple']:
|
if d['is_multiple']:
|
||||||
if x is None:
|
if x is None:
|
||||||
return []
|
return []
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
x = x.split(d['multiple_seps']['ui_to_list'])
|
x = x.split(d['multiple_seps']['ui_to_list'])
|
||||||
x = [y.strip() for y in x if y.strip()]
|
x = [y.strip() for y in x if y.strip()]
|
||||||
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
||||||
@ -142,12 +142,14 @@ class CustomColumns(object):
|
|||||||
x.decode(preferred_encoding, 'replace')
|
x.decode(preferred_encoding, 'replace')
|
||||||
|
|
||||||
def adapt_datetime(x, d):
|
def adapt_datetime(x, d):
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def adapt_bool(x, d):
|
def adapt_bool(x, d):
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = force_unicode(x)
|
||||||
x = x.lower()
|
x = x.lower()
|
||||||
if x == 'true':
|
if x == 'true':
|
||||||
x = True
|
x = True
|
||||||
@ -168,7 +170,9 @@ class CustomColumns(object):
|
|||||||
def adapt_number(x, d):
|
def adapt_number(x, d):
|
||||||
if x is None:
|
if x is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(x, (str, unicode_type, bytes)):
|
if isinstance(x, (unicode_type, bytes)):
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
x = force_unicode(x)
|
||||||
if x.lower() == 'none':
|
if x.lower() == 'none':
|
||||||
return None
|
return None
|
||||||
if d['datatype'] == 'int':
|
if d['datatype'] == 'int':
|
||||||
|
@ -24,7 +24,7 @@ from calibre.library.custom_columns import CustomColumns
|
|||||||
from calibre.library.sqlite import connect, IntegrityError
|
from calibre.library.sqlite import connect, IntegrityError
|
||||||
from calibre.library.prefs import DBPrefs
|
from calibre.library.prefs import DBPrefs
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.constants import preferred_encoding, iswindows, filesystem_encoding
|
from calibre.constants import preferred_encoding, iswindows, filesystem_encoding, ispy3
|
||||||
from calibre.ptempfile import (PersistentTemporaryFile,
|
from calibre.ptempfile import (PersistentTemporaryFile,
|
||||||
base_dir, SpooledTemporaryFile)
|
base_dir, SpooledTemporaryFile)
|
||||||
from calibre.customize.ui import (run_plugins_on_import,
|
from calibre.customize.ui import (run_plugins_on_import,
|
||||||
@ -1754,12 +1754,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
self.rc = rc
|
self.rc = rc
|
||||||
self.id = id
|
self.id = id
|
||||||
|
|
||||||
def __str__(self):
|
def __unicode_representation__(self):
|
||||||
return unicode_type(self)
|
return u'n=%s s=%s c=%d rt=%d rc=%d id=%s' % (
|
||||||
|
self.n, self.s, self.c, self.rt, self.rc, self.id)
|
||||||
|
|
||||||
def __unicode__(self):
|
if ispy3:
|
||||||
return 'n=%s s=%s c=%d rt=%d rc=%d id=%s'%\
|
__str__ = __unicode_representation__
|
||||||
(self.n, self.s, self.c, self.rt, self.rc, self.id)
|
else:
|
||||||
|
__str__ = __unicode__ = __unicode_representation__
|
||||||
|
|
||||||
def clean_user_categories(self):
|
def clean_user_categories(self):
|
||||||
user_cats = self.prefs.get('user_categories', {})
|
user_cats = self.prefs.get('user_categories', {})
|
||||||
|
@ -202,7 +202,7 @@ class Route(object):
|
|||||||
raise RouteError('The variable(s) %s are not part of the route: %s' % (','.join(unknown), self.endpoint.route))
|
raise RouteError('The variable(s) %s are not part of the route: %s' % (','.join(unknown), self.endpoint.route))
|
||||||
|
|
||||||
def quoted(x):
|
def quoted(x):
|
||||||
if not isinstance(x, unicode_type) and not isinstance(x, bytes):
|
if not isinstance(x, (unicode_type, bytes)):
|
||||||
x = unicode_type(x)
|
x = unicode_type(x)
|
||||||
if isinstance(x, unicode_type):
|
if isinstance(x, unicode_type):
|
||||||
x = x.encode('utf-8')
|
x = x.encode('utf-8')
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,7 @@ from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.constants import iswindows, isosx, plugins
|
from calibre.constants import iswindows, isosx, plugins, preferred_encoding
|
||||||
from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
|
from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
|
||||||
from calibre.utils.localization import lcdata
|
from calibre.utils.localization import lcdata
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
@ -101,6 +101,8 @@ def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
|
|||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
if not date_string:
|
if not date_string:
|
||||||
return UNDEFINED_DATE
|
return UNDEFINED_DATE
|
||||||
|
if isinstance(date_string, bytes):
|
||||||
|
date_string = date_string.decode(preferred_encoding, 'replace')
|
||||||
if default is None:
|
if default is None:
|
||||||
func = datetime.utcnow if assume_utc else datetime.now
|
func = datetime.utcnow if assume_utc else datetime.now
|
||||||
default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,
|
default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,
|
||||||
|
@ -132,7 +132,7 @@ class FormatterFunction(object):
|
|||||||
|
|
||||||
def eval_(self, formatter, kwargs, mi, locals, *args):
|
def eval_(self, formatter, kwargs, mi, locals, *args):
|
||||||
ret = self.evaluate(formatter, kwargs, mi, locals, *args)
|
ret = self.evaluate(formatter, kwargs, mi, locals, *args)
|
||||||
if isinstance(ret, (str, unicode_type)):
|
if isinstance(ret, (bytes, unicode_type)):
|
||||||
return ret
|
return ret
|
||||||
if isinstance(ret, list):
|
if isinstance(ret, list):
|
||||||
return ','.join(ret)
|
return ','.join(ret)
|
||||||
|
@ -253,8 +253,8 @@ def offload_worker(env={}, priority='normal', cwd=None):
|
|||||||
def compile_code(src):
|
def compile_code(src):
|
||||||
import re, io
|
import re, io
|
||||||
if not isinstance(src, unicode_type):
|
if not isinstance(src, unicode_type):
|
||||||
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
|
||||||
enc = match.group(1) if match else 'utf-8'
|
enc = match.group(1).decode('utf-8') if match else 'utf-8'
|
||||||
src = src.decode(enc)
|
src = src.decode(enc)
|
||||||
# Python complains if there is a coding declaration in a unicode string
|
# Python complains if there is a coding declaration in a unicode string
|
||||||
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
|
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
|
||||||
|
@ -32,8 +32,8 @@ def compile_recipe(src):
|
|||||||
:return: Recipe class or None, if no such class was found in src
|
:return: Recipe class or None, if no such class was found in src
|
||||||
'''
|
'''
|
||||||
if not isinstance(src, unicode_type):
|
if not isinstance(src, unicode_type):
|
||||||
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
|
||||||
enc = match.group(1) if match else 'utf-8'
|
enc = match.group(1).decode('utf-8') if match else 'utf-8'
|
||||||
src = src.decode(enc)
|
src = src.decode(enc)
|
||||||
# Python complains if there is a coding declaration in a unicode string
|
# Python complains if there is a coding declaration in a unicode string
|
||||||
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src.lstrip(u'\ufeff'), flags=re.MULTILINE)
|
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src.lstrip(u'\ufeff'), flags=re.MULTILINE)
|
||||||
|
@ -25,6 +25,7 @@ if is_py3:
|
|||||||
zip = builtins.__dict__['zip']
|
zip = builtins.__dict__['zip']
|
||||||
map = builtins.__dict__['map']
|
map = builtins.__dict__['map']
|
||||||
filter = builtins.__dict__['filter']
|
filter = builtins.__dict__['filter']
|
||||||
|
range = builtins.__dict__['range']
|
||||||
|
|
||||||
codepoint_to_chr = chr
|
codepoint_to_chr = chr
|
||||||
unicode_type = str
|
unicode_type = str
|
||||||
@ -47,6 +48,7 @@ else:
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
from future_builtins import zip, map, filter # noqa
|
from future_builtins import zip, map, filter # noqa
|
||||||
|
range = xrange
|
||||||
import __builtin__ as builtins
|
import __builtin__ as builtins
|
||||||
|
|
||||||
codepoint_to_chr = unichr
|
codepoint_to_chr = unichr
|
||||||
|
Loading…
x
Reference in New Issue
Block a user