mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Some more fixes for the unicode type
Now replaced in all dynamically loaded code. Recipes/metadata sources/etc. In the case of recipes, since they get compiled by calibre we simply make the unicode/unichr names available, no need for any changes to the actual recipes themselves.
This commit is contained in:
parent
6ad22b392b
commit
2d21a8efa2
@ -36,7 +36,7 @@ def merge():
|
||||
for child in svg.iterchildren('*'):
|
||||
clone_node(child, symbol)
|
||||
ans.append(symbol)
|
||||
ans = etree.tostring(ans, encoding=unicode, pretty_print=True, with_tail=False)
|
||||
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
|
||||
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
||||
return ans
|
||||
|
||||
|
@ -168,7 +168,7 @@ def sort_languages(x):
|
||||
lc, name = x
|
||||
if lc == language:
|
||||
return ''
|
||||
return sort_key(unicode(name))
|
||||
return sort_key(type(u'')(name))
|
||||
|
||||
|
||||
html_context['other_languages'].sort(key=sort_languages)
|
||||
|
@ -198,7 +198,7 @@ def generate_ebook_convert_help(preamble, app):
|
||||
|
||||
|
||||
def update_cli_doc(name, raw, app):
|
||||
if isinstance(raw, unicode):
|
||||
if isinstance(raw, type(u'')):
|
||||
raw = raw.encode('utf-8')
|
||||
path = 'generated/%s/%s.rst' % (app.config.language, name)
|
||||
old_raw = open(path, 'rb').read() if os.path.exists(path) else ''
|
||||
|
@ -21,6 +21,7 @@ prefs = JSONConfig('plugins/interface_demo')
|
||||
# Set defaults
|
||||
prefs.defaults['hello_world_msg'] = 'Hello, World!'
|
||||
|
||||
|
||||
class ConfigWidget(QWidget):
|
||||
|
||||
def __init__(self):
|
||||
@ -37,5 +38,4 @@ class ConfigWidget(QWidget):
|
||||
self.label.setBuddy(self.msg)
|
||||
|
||||
def save_settings(self):
|
||||
prefs['hello_world_msg'] = unicode(self.msg.text())
|
||||
|
||||
prefs['hello_world_msg'] = self.msg.text()
|
||||
|
@ -139,7 +139,7 @@ else:
|
||||
enc = preferred_encoding
|
||||
safe_encode = kwargs.get('safe_encode', False)
|
||||
for i, arg in enumerate(args):
|
||||
if isinstance(arg, unicode):
|
||||
if isinstance(arg, type(u'')):
|
||||
try:
|
||||
arg = arg.encode(enc)
|
||||
except UnicodeEncodeError:
|
||||
@ -150,8 +150,8 @@ else:
|
||||
try:
|
||||
arg = str(arg)
|
||||
except ValueError:
|
||||
arg = unicode(arg)
|
||||
if isinstance(arg, unicode):
|
||||
arg = type(u'')(arg)
|
||||
if isinstance(arg, type(u'')):
|
||||
try:
|
||||
arg = arg.encode(enc)
|
||||
except UnicodeEncodeError:
|
||||
|
@ -1795,41 +1795,41 @@ class UnicodeDammit:
|
||||
elif xml_data[:4] == '\x00\x3c\x00\x3f':
|
||||
# UTF-16BE
|
||||
sniffed_xml_encoding = 'utf-16be'
|
||||
#xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data, 'utf-16be').encode('utf-8')
|
||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
|
||||
and (xml_data[2:4] != '\x00\x00'):
|
||||
# UTF-16BE with BOM
|
||||
sniffed_xml_encoding = 'utf-16be'
|
||||
#xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data[2:], 'utf-16be').encode('utf-8')
|
||||
elif xml_data[:4] == '\x3c\x00\x3f\x00':
|
||||
# UTF-16LE
|
||||
sniffed_xml_encoding = 'utf-16le'
|
||||
#xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data, 'utf-16le').encode('utf-8')
|
||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
|
||||
(xml_data[2:4] != '\x00\x00'):
|
||||
# UTF-16LE with BOM
|
||||
sniffed_xml_encoding = 'utf-16le'
|
||||
#xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data[2:], 'utf-16le').encode('utf-8')
|
||||
elif xml_data[:4] == '\x00\x00\x00\x3c':
|
||||
# UTF-32BE
|
||||
sniffed_xml_encoding = 'utf-32be'
|
||||
#xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data, 'utf-32be').encode('utf-8')
|
||||
elif xml_data[:4] == '\x3c\x00\x00\x00':
|
||||
# UTF-32LE
|
||||
sniffed_xml_encoding = 'utf-32le'
|
||||
#xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data, 'utf-32le').encode('utf-8')
|
||||
elif xml_data[:4] == '\x00\x00\xfe\xff':
|
||||
# UTF-32BE with BOM
|
||||
sniffed_xml_encoding = 'utf-32be'
|
||||
#xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data[4:], 'utf-32be').encode('utf-8')
|
||||
elif xml_data[:4] == '\xff\xfe\x00\x00':
|
||||
# UTF-32LE with BOM
|
||||
sniffed_xml_encoding = 'utf-32le'
|
||||
#xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data[4:], 'utf-32le').encode('utf-8')
|
||||
elif xml_data[:3] == '\xef\xbb\xbf':
|
||||
# UTF-8 with BOM
|
||||
sniffed_xml_encoding = 'utf-8'
|
||||
#xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
|
||||
#xml_data = type(u'')(xml_data[3:], 'utf-8').encode('utf-8')
|
||||
else:
|
||||
sniffed_xml_encoding = 'ascii'
|
||||
pass
|
||||
|
@ -93,7 +93,7 @@ def parse_details_page(url, log, timeout, browser, domain):
|
||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||
if errmsg:
|
||||
msg = 'Failed to parse amazon details page: %r' % url
|
||||
msg += tostring(errmsg, method='text', encoding=unicode).strip()
|
||||
msg += tostring(errmsg, method='text', encoding='unicode').strip()
|
||||
log.error(msg)
|
||||
return
|
||||
|
||||
@ -466,7 +466,7 @@ class Worker(Thread): # Get details {{{
|
||||
self.result_queue.put(mi)
|
||||
|
||||
def totext(self, elem):
|
||||
return self.tostring(elem, encoding=unicode, method='text').strip()
|
||||
return self.tostring(elem, encoding='unicode', method='text').strip()
|
||||
|
||||
def parse_title(self, root):
|
||||
h1 = root.xpath('//h1[@id="title"]')
|
||||
@ -478,10 +478,10 @@ class Worker(Thread): # Get details {{{
|
||||
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
|
||||
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
|
||||
if actual_title:
|
||||
title = self.tostring(actual_title[0], encoding=unicode,
|
||||
title = self.tostring(actual_title[0], encoding='unicode',
|
||||
method='text').strip()
|
||||
else:
|
||||
title = self.tostring(tdiv, encoding=unicode,
|
||||
title = self.tostring(tdiv, encoding='unicode',
|
||||
method='text').strip()
|
||||
ans = re.sub(r'[(\[].*[)\]]', '', title).strip()
|
||||
if not ans:
|
||||
@ -508,7 +508,7 @@ class Worker(Thread): # Get details {{{
|
||||
''')
|
||||
for x in aname:
|
||||
x.tail = ''
|
||||
authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
|
||||
authors = [self.tostring(x, encoding='unicode', method='text').strip() for x
|
||||
in aname]
|
||||
authors = [a for a in authors if a]
|
||||
return authors
|
||||
@ -559,7 +559,7 @@ class Worker(Thread): # Get details {{{
|
||||
for a in desc.xpath('descendant::a[@href]'):
|
||||
del a.attrib['href']
|
||||
a.tag = 'span'
|
||||
desc = self.tostring(desc, method='html', encoding=unicode).strip()
|
||||
desc = self.tostring(desc, method='html', encoding='unicode').strip()
|
||||
|
||||
# Encoding bug in Amazon data U+fffd (replacement char)
|
||||
# in some examples it is present in place of '
|
||||
@ -626,14 +626,14 @@ class Worker(Thread): # Get details {{{
|
||||
spans = series.xpath('./span')
|
||||
if spans:
|
||||
raw = self.tostring(
|
||||
spans[0], encoding=unicode, method='text', with_tail=False).strip()
|
||||
spans[0], encoding='unicode', method='text', with_tail=False).strip()
|
||||
m = re.search(r'\s+([0-9.]+)$', raw.strip())
|
||||
if m is not None:
|
||||
series_index = float(m.group(1))
|
||||
s = series.xpath('./a[@id="series-page-link"]')
|
||||
if s:
|
||||
series = self.tostring(
|
||||
s[0], encoding=unicode, method='text', with_tail=False).strip()
|
||||
s[0], encoding='unicode', method='text', with_tail=False).strip()
|
||||
if series:
|
||||
ans = (series, series_index)
|
||||
# This is found on Kindle edition pages on amazon.com
|
||||
@ -646,7 +646,7 @@ class Worker(Thread): # Get details {{{
|
||||
a = span.xpath('./a[@href]')
|
||||
if a:
|
||||
series = self.tostring(
|
||||
a[0], encoding=unicode, method='text', with_tail=False).strip()
|
||||
a[0], encoding='unicode', method='text', with_tail=False).strip()
|
||||
if series:
|
||||
ans = (series, series_index)
|
||||
# This is found on newer Kindle edition pages on amazon.com
|
||||
@ -659,14 +659,14 @@ class Worker(Thread): # Get details {{{
|
||||
a = b.getparent().xpath('./a[@href]')
|
||||
if a:
|
||||
series = self.tostring(
|
||||
a[0], encoding=unicode, method='text', with_tail=False).partition('(')[0].strip()
|
||||
a[0], encoding='unicode', method='text', with_tail=False).partition('(')[0].strip()
|
||||
if series:
|
||||
ans = series, series_index
|
||||
|
||||
if ans == (None, None):
|
||||
desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]')
|
||||
if desc:
|
||||
raw = self.tostring(desc[0], method='text', encoding=unicode)
|
||||
raw = self.tostring(desc[0], method='text', encoding='unicode')
|
||||
raw = re.sub(r'\s+', ' ', raw)
|
||||
match = self.series_pat.search(raw)
|
||||
if match is not None:
|
||||
@ -1161,7 +1161,7 @@ class Amazon(Source):
|
||||
if not result_links:
|
||||
result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]')
|
||||
for a in result_links:
|
||||
title = tostring(a, method='text', encoding=unicode)
|
||||
title = tostring(a, method='text', encoding='unicode')
|
||||
if title_ok(title):
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
@ -1177,7 +1177,7 @@ class Amazon(Source):
|
||||
# New amazon markup
|
||||
links = div.xpath('descendant::h3/a[@href]')
|
||||
for a in links:
|
||||
title = tostring(a, method='text', encoding=unicode)
|
||||
title = tostring(a, method='text', encoding='unicode')
|
||||
if title_ok(title):
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
@ -1192,7 +1192,7 @@ class Amazon(Source):
|
||||
for td in root.xpath(
|
||||
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
|
||||
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
|
||||
title = tostring(a, method='text', encoding=unicode)
|
||||
title = tostring(a, method='text', encoding='unicode')
|
||||
if title_ok(title):
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
|
@ -99,7 +99,7 @@ def main(args=sys.argv):
|
||||
log = buf.getvalue()
|
||||
|
||||
result = (metadata_to_opf(result) if opts.opf else
|
||||
unicode(result).encode('utf-8'))
|
||||
type(u'')(result).encode('utf-8'))
|
||||
|
||||
if opts.verbose:
|
||||
print (log, file=sys.stderr)
|
||||
|
@ -203,7 +203,7 @@ class Douban(Source):
|
||||
build_term('author', author_tokens))
|
||||
t = 'search'
|
||||
q = q.strip()
|
||||
if isinstance(q, unicode):
|
||||
if isinstance(q, type(u'')):
|
||||
q = q.encode('utf-8')
|
||||
if not q:
|
||||
return None
|
||||
|
@ -31,7 +31,7 @@ def parse_html(raw):
|
||||
|
||||
def astext(node):
|
||||
from lxml import etree
|
||||
return etree.tostring(node, method='text', encoding=unicode,
|
||||
return etree.tostring(node, method='text', encoding='unicode',
|
||||
with_tail=False).strip()
|
||||
|
||||
|
||||
@ -110,7 +110,7 @@ class Worker(Thread): # {{{
|
||||
for a in desc.xpath('descendant::a[@href]'):
|
||||
del a.attrib['href']
|
||||
a.tag = 'span'
|
||||
desc = etree.tostring(desc, method='html', encoding=unicode).strip()
|
||||
desc = etree.tostring(desc, method='html', encoding='unicode').strip()
|
||||
|
||||
# remove all attributes from tags
|
||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||
@ -160,7 +160,7 @@ def get_basic_data(browser, log, *skus):
|
||||
tags = []
|
||||
rating = 0
|
||||
for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
|
||||
m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar)
|
||||
m = re.search(r'width: (\d+)px;.*max-width: (\d+)px', bar)
|
||||
if m is not None:
|
||||
rating = float(m.group(1)) / float(m.group(2))
|
||||
break
|
||||
@ -283,7 +283,7 @@ class Edelweiss(Source):
|
||||
except Exception as e:
|
||||
log.exception('Failed to make identify query: %r'%query)
|
||||
return as_unicode(e)
|
||||
items = re.search('window[.]items\s*=\s*(.+?);', raw)
|
||||
items = re.search(r'window[.]items\s*=\s*(.+?);', raw)
|
||||
if items is None:
|
||||
log.error('Failed to get list of matching items')
|
||||
log.debug('Response text:')
|
||||
|
@ -214,7 +214,7 @@ class GoogleBooks(Source):
|
||||
if author_tokens:
|
||||
q += ('+' if q else '') + build_term('author', author_tokens)
|
||||
|
||||
if isinstance(q, unicode):
|
||||
if isinstance(q, type(u'')):
|
||||
q = q.encode('utf-8')
|
||||
if not q:
|
||||
return None
|
||||
|
@ -471,7 +471,7 @@ def identify(log, abort, # {{{
|
||||
for r in presults:
|
||||
log('\n\n---')
|
||||
try:
|
||||
log(unicode(r))
|
||||
log(type(u'')(r))
|
||||
except TypeError:
|
||||
log(repr(r))
|
||||
if plog:
|
||||
|
@ -233,7 +233,7 @@ class OverDrive(Source):
|
||||
xreq.add_header('Referer', q_init_search)
|
||||
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
||||
raw = br.open_novisit(xreq).read()
|
||||
for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
|
||||
for m in re.finditer(type(u'')(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
|
||||
if int(m.group('totalrecords')) == 0:
|
||||
return ''
|
||||
elif int(m.group('displayrecords')) >= 1:
|
||||
@ -450,7 +450,7 @@ class OverDrive(Source):
|
||||
|
||||
if desc:
|
||||
desc = desc[0]
|
||||
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
||||
desc = html.tostring(desc, method='html', encoding='unicode').strip()
|
||||
# remove all attributes from tags
|
||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||
# Remove comments
|
||||
|
@ -100,7 +100,7 @@ class Ozon(Source):
|
||||
qItems.discard('')
|
||||
searchText = u' '.join(qItems).strip()
|
||||
|
||||
if isinstance(searchText, unicode):
|
||||
if isinstance(searchText, type(u'')):
|
||||
searchText = searchText.encode('utf-8')
|
||||
if not searchText:
|
||||
return None
|
||||
@ -148,7 +148,7 @@ class Ozon(Source):
|
||||
else:
|
||||
# Redirect page: trying to extract ozon_id from javascript data
|
||||
h = HTMLParser()
|
||||
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
|
||||
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding='unicode')))
|
||||
json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
|
||||
json_info = re.search(json_pat, entry_string)
|
||||
jsondata = json_info.group(1) if json_info else None
|
||||
@ -198,16 +198,16 @@ class Ozon(Source):
|
||||
|
||||
reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
||||
|
||||
title = unicode(title).upper() if title else ''
|
||||
title = type(u'')(title).upper() if title else ''
|
||||
if reRemoveFromTitle:
|
||||
title = reRemoveFromTitle.sub('', title)
|
||||
authors = map(_normalizeAuthorNameWithInitials,
|
||||
map(unicode.upper, map(unicode, authors))) if authors else None
|
||||
map(type(u'').upper, map(type(u''), authors))) if authors else None
|
||||
|
||||
ozon_id = identifiers.get('ozon', None)
|
||||
# log.debug(u'ozonid: ', ozon_id)
|
||||
|
||||
unk = unicode(_('Unknown')).upper()
|
||||
unk = type(u'')(_('Unknown')).upper()
|
||||
|
||||
if title == unk:
|
||||
title = None
|
||||
@ -226,7 +226,7 @@ class Ozon(Source):
|
||||
def calc_source_relevance(mi): # {{{
|
||||
relevance = 0
|
||||
if title:
|
||||
mititle = unicode(mi.title).upper() if mi.title else ''
|
||||
mititle = type(u'')(mi.title).upper() if mi.title else ''
|
||||
|
||||
if reRemoveFromTitle:
|
||||
mititle = reRemoveFromTitle.sub('', mititle)
|
||||
@ -240,7 +240,7 @@ class Ozon(Source):
|
||||
relevance += 1
|
||||
|
||||
if authors:
|
||||
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
|
||||
miauthors = map(type(u'').upper, map(type(u''), mi.authors)) if mi.authors else []
|
||||
# log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors)))
|
||||
|
||||
if (in_authors(authors, miauthors)):
|
||||
@ -320,13 +320,13 @@ class Ozon(Source):
|
||||
# }}}
|
||||
|
||||
def to_metadata(self, log, entry): # {{{
|
||||
title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
|
||||
title = type(u'')(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
|
||||
# log.debug(u'Title: -----> %s' % title)
|
||||
|
||||
author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
|
||||
author = type(u'')(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
|
||||
# log.debug(u'Author: -----> %s' % author)
|
||||
|
||||
norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
|
||||
norm_authors = map(_normalizeAuthorNameWithInitials, map(type(u'').strip, type(u'')(author).split(u',')))
|
||||
mi = Metadata(title, norm_authors)
|
||||
|
||||
ozon_id = entry.get('data-href').split('/')[-2]
|
||||
@ -524,7 +524,7 @@ class Ozon(Source):
|
||||
# comments, from Javascript data
|
||||
beginning = fullString.find(u'FirstBlock')
|
||||
end = fullString.find(u'}', beginning)
|
||||
comments = unicode(fullString[beginning + 75:end - 1]).decode("unicode-escape")
|
||||
comments = type(u'')(fullString[beginning + 75:end - 1]).decode("unicode-escape")
|
||||
metadata.comments = replace_entities(comments, 'utf-8')
|
||||
# }}}
|
||||
|
||||
@ -603,7 +603,7 @@ def _format_isbn(log, isbn): # {{{
|
||||
|
||||
|
||||
def _translageLanguageToCode(displayLang): # {{{
|
||||
displayLang = unicode(displayLang).strip() if displayLang else None
|
||||
displayLang = type(u'')(displayLang).strip() if displayLang else None
|
||||
langTbl = {None: 'ru',
|
||||
u'Русский': 'ru',
|
||||
u'Немецкий': 'de',
|
||||
@ -627,9 +627,9 @@ def _normalizeAuthorNameWithInitials(name): # {{{
|
||||
if name:
|
||||
re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||
re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||
matcher = re.match(re1, unicode(name), re.UNICODE)
|
||||
matcher = re.match(re1, type(u'')(name), re.UNICODE)
|
||||
if not matcher:
|
||||
matcher = re.match(re2, unicode(name), re.UNICODE)
|
||||
matcher = re.match(re2, type(u'')(name), re.UNICODE)
|
||||
|
||||
if matcher:
|
||||
d = matcher.groupdict()
|
||||
@ -653,7 +653,7 @@ def toPubdate(log, yearAsString): # {{{
|
||||
# }}}
|
||||
|
||||
def _listToUnicodePrintStr(lst): # {{{
|
||||
return u'[' + u', '.join(unicode(x) for x in lst) + u']'
|
||||
return u'[' + u', '.join(type(u'')(x) for x in lst) + u']'
|
||||
|
||||
|
||||
# }}}
|
||||
|
@ -26,7 +26,7 @@ Result = namedtuple('Result', 'url title cached_url')
|
||||
|
||||
|
||||
def tostring(elem):
|
||||
return etree.tostring(elem, encoding=unicode, method='text', with_tail=False)
|
||||
return etree.tostring(elem, encoding='unicode', method='text', with_tail=False)
|
||||
|
||||
|
||||
def browser():
|
||||
|
@ -128,11 +128,11 @@ class Textile(object):
|
||||
|
||||
pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
|
||||
# urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
|
||||
urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
|
||||
urlch = r'[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
|
||||
|
||||
url_schemes = ('http', 'https', 'ftp', 'mailto')
|
||||
|
||||
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
|
||||
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p')
|
||||
btag_lite = ('bq', 'bc', 'p')
|
||||
|
||||
macro_defaults = [
|
||||
@ -292,7 +292,7 @@ class Textile(object):
|
||||
"""
|
||||
self.html_type = html_type
|
||||
|
||||
# text = unicode(text)
|
||||
# text = type(u'')(text)
|
||||
text = _normalize_newlines(text)
|
||||
|
||||
if self.restricted:
|
||||
|
@ -21,7 +21,9 @@ class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
# check for cyrillic symbols before performing search
|
||||
uquery = unicode(query.strip(), 'utf-8')
|
||||
if isinstance(query, bytes):
|
||||
query = query.decode('utf-8')
|
||||
uquery = query.strip()
|
||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
||||
if not reObj:
|
||||
return
|
||||
|
@ -43,7 +43,9 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
# check for cyrillic symbols before performing search
|
||||
uquery = unicode(query.strip(), 'utf-8')
|
||||
if isinstance(query, bytes):
|
||||
query = query.decode('utf-8')
|
||||
uquery = query.strip()
|
||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
||||
if not reObj:
|
||||
return
|
||||
@ -56,7 +58,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
||||
br = browser()
|
||||
try:
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
f = unicode(f.read(), 'utf-8')
|
||||
f = f.read().decode('utf-8')
|
||||
doc = html.fromstring(f)
|
||||
|
||||
for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
|
||||
@ -98,7 +100,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
||||
with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
|
||||
if counter <= 0:
|
||||
break
|
||||
f = unicode(f.read(), 'utf-8')
|
||||
f = f.read().decode('utf-8')
|
||||
doc2 = html.fromstring(f)
|
||||
|
||||
# search for book title
|
||||
|
@ -22,10 +22,9 @@ class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||
return ascii_text(s)
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
query = self.strip_accents(unicode(query))
|
||||
query = self.strip_accents(type(u'')(query))
|
||||
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||
if s.downloads:
|
||||
s.drm = SearchResult.DRM_UNLOCKED
|
||||
s.price = '$0.00'
|
||||
yield s
|
||||
|
||||
|
@ -49,7 +49,7 @@ class eKnigiStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
# check for cyrillic symbols before performing search
|
||||
uquery = unicode(query.strip(), 'utf-8')
|
||||
uquery = type(u'')(query.strip(), 'utf-8')
|
||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
|
||||
if not reObj:
|
||||
return
|
||||
|
@ -46,7 +46,7 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
||||
cover_url = None
|
||||
|
||||
for p in select('p.title', item):
|
||||
title = etree.tostring(p, method='text', encoding=unicode).strip()
|
||||
title = etree.tostring(p, method='text', encoding='unicode').strip()
|
||||
for a in select('a[href]', p):
|
||||
url = a.get('href')
|
||||
break
|
||||
@ -58,11 +58,11 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
||||
|
||||
authors = []
|
||||
for a in select('p.contributor-list a.contributor-name', item):
|
||||
authors.append(etree.tostring(a, method='text', encoding=unicode).strip())
|
||||
authors.append(etree.tostring(a, method='text', encoding='unicode').strip())
|
||||
authors = authors_to_string(authors)
|
||||
|
||||
for p in select('p.price', item):
|
||||
price = etree.tostring(p, method='text', encoding=unicode).strip()
|
||||
price = etree.tostring(p, method='text', encoding='unicode').strip()
|
||||
break
|
||||
else:
|
||||
price = None
|
||||
|
@ -88,7 +88,7 @@ class LitResStore(BasicStoreConfig, StorePlugin):
|
||||
authors = data.xpath('.//title-info/author/first-name/text()|'
|
||||
'.//title-info/author/middle-name/text()|'
|
||||
'.//title-info/author/last-name/text()')
|
||||
sRes.author = u' '.join(map(unicode, authors))
|
||||
sRes.author = u' '.join(map(type(u''), authors))
|
||||
sRes.price = data.xpath(xp_template.format('price'))
|
||||
# cover vs cover_preview
|
||||
sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
|
||||
@ -107,7 +107,7 @@ def format_price_in_RUR(price):
|
||||
@return: formatted price if possible otherwise original value
|
||||
@rtype: unicode
|
||||
'''
|
||||
if price and re.match("^\d*?\.\d*?$", price):
|
||||
if price and re.match(r"^\d*?\.\d*?$", price):
|
||||
try:
|
||||
price = u'{:,.2F} руб.'.format(float(price))
|
||||
price = price.replace(',', ' ').replace('.', ',', 1)
|
||||
|
@ -67,7 +67,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
|
||||
self.mc = '='
|
||||
else:
|
||||
self.mc = '~'
|
||||
all, any, phrase, none = map(lambda x: unicode(x.text()),
|
||||
all, any, phrase, none = map(lambda x: type(u'')(x.text()),
|
||||
(self.all, self.any, self.phrase, self.none))
|
||||
all, any, none = map(self.tokens, (all, any, none))
|
||||
phrase = phrase.strip()
|
||||
@ -86,11 +86,11 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
|
||||
return ans
|
||||
|
||||
def token(self):
|
||||
txt = unicode(self.text.text()).strip()
|
||||
txt = type(u'')(self.text.text()).strip()
|
||||
if txt:
|
||||
if self.negate.isChecked():
|
||||
txt = '!'+txt
|
||||
tok = self.FIELDS[unicode(self.field.currentText())]+txt
|
||||
tok = self.FIELDS[type(u'')(self.field.currentText())]+txt
|
||||
if re.search(r'\s', tok):
|
||||
tok = '"%s"'%tok
|
||||
return tok
|
||||
@ -106,13 +106,13 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
|
||||
|
||||
ans = []
|
||||
self.box_last_values = {}
|
||||
title = unicode(self.title_box.text()).strip()
|
||||
title = type(u'')(self.title_box.text()).strip()
|
||||
if title:
|
||||
ans.append('title:"' + self.mc + title + '"')
|
||||
author = unicode(self.author_box.text()).strip()
|
||||
author = type(u'')(self.author_box.text()).strip()
|
||||
if author:
|
||||
ans.append('author:"' + self.mc + author + '"')
|
||||
format = unicode(self.format_box.text()).strip()
|
||||
format = type(u'')(self.format_box.text()).strip()
|
||||
if format:
|
||||
ans.append('format:"' + self.mc + format + '"')
|
||||
if ans:
|
||||
|
@ -22,7 +22,7 @@ class CacheUpdateThread(Thread, QObject):
|
||||
|
||||
total_changed = pyqtSignal(int)
|
||||
update_progress = pyqtSignal(int)
|
||||
update_details = pyqtSignal(unicode)
|
||||
update_details = pyqtSignal(type(u''))
|
||||
|
||||
def __init__(self, config, seralize_books_function, timeout):
|
||||
Thread.__init__(self)
|
||||
|
@ -105,7 +105,7 @@ class BooksModel(QAbstractItemModel):
|
||||
return
|
||||
descending = order == Qt.DescendingOrder
|
||||
self.books.sort(None,
|
||||
lambda x: sort_key(unicode(self.data_as_text(x, col))),
|
||||
lambda x: sort_key(type(u'')(self.data_as_text(x, col))),
|
||||
descending)
|
||||
if reset:
|
||||
self.beginResetModel(), self.endResetModel()
|
||||
|
@ -40,7 +40,7 @@ class MobileReadStoreDialog(QDialog, Ui_Dialog):
|
||||
self.restore_state()
|
||||
|
||||
def do_search(self):
|
||||
self.results_view.model().search(unicode(self.search_query.text()))
|
||||
self.results_view.model().search(type(u'')(self.search_query.text()))
|
||||
|
||||
def open_store(self, index):
|
||||
result = self.results_view.model().get_book(index)
|
||||
|
@ -6,10 +6,14 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
|
||||
import sys
|
||||
|
||||
from calibre.constants import iswindows, preferred_encoding
|
||||
from calibre.constants import iswindows, preferred_encoding, ispy3
|
||||
|
||||
|
||||
def getpass(prompt):
|
||||
if ispy3:
|
||||
from getpass import getpass
|
||||
getpass
|
||||
else:
|
||||
def getpass(prompt):
|
||||
if iswindows:
|
||||
# getpass is broken on windows with python 2.x and unicode, the
|
||||
# below implementation is from the python 3 source code
|
||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
|
||||
AutomaticNewsRecipe, CalibrePeriodical)
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.utils.config import JSONConfig
|
||||
from polyglot.builtins import unicode_type
|
||||
from polyglot.builtins import unicode_type, codepoint_to_chr
|
||||
|
||||
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
|
||||
CalibrePeriodical)
|
||||
@ -44,7 +44,9 @@ def compile_recipe(src):
|
||||
'BasicNewsRecipe':BasicNewsRecipe,
|
||||
'AutomaticNewsRecipe':AutomaticNewsRecipe,
|
||||
'time':time, 're':re,
|
||||
'BeautifulSoup':BeautifulSoup
|
||||
'BeautifulSoup':BeautifulSoup,
|
||||
'unicode': unicode_type,
|
||||
'unichr': codepoint_to_chr,
|
||||
}
|
||||
exec(src, namespace)
|
||||
|
||||
|
@ -15,20 +15,17 @@ import operator
|
||||
import string
|
||||
|
||||
from css_selectors.errors import SelectorSyntaxError, ExpressionError
|
||||
from polyglot.builtins import unicode_type, codepoint_to_chr
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
_unicode = unicode
|
||||
_unichr = unichr
|
||||
else:
|
||||
_unicode = str
|
||||
_unichr = chr
|
||||
|
||||
tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
|
||||
utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)}
|
||||
|
||||
|
||||
def ascii_lower(string):
|
||||
"""Lower-case, but only in the ASCII range."""
|
||||
return string.translate(utab if isinstance(string, _unicode) else tab)
|
||||
return string.translate(utab if isinstance(string, unicode_type) else tab)
|
||||
|
||||
|
||||
def urepr(x):
|
||||
if isinstance(x, list):
|
||||
@ -38,6 +35,7 @@ def urepr(x):
|
||||
ans = ans[1:]
|
||||
return ans
|
||||
|
||||
|
||||
# Parsed objects
|
||||
|
||||
class Selector(object):
|
||||
@ -385,6 +383,7 @@ def parse_selector_group(stream):
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def parse_selector(stream):
|
||||
result, pseudo_element = parse_simple_selector(stream)
|
||||
while 1:
|
||||
@ -461,7 +460,7 @@ def parse_simple_selector(stream, inside_negation=False):
|
||||
'before', 'after'):
|
||||
# Special case: CSS 2.1 pseudo-elements can have a single ':'
|
||||
# Any new pseudo-element must have two.
|
||||
pseudo_element = _unicode(ident)
|
||||
pseudo_element = unicode_type(ident)
|
||||
continue
|
||||
if stream.peek() != ('DELIM', '('):
|
||||
result = Pseudo(result, ident)
|
||||
@ -626,11 +625,13 @@ class TokenMacros:
|
||||
nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
|
||||
nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
|
||||
|
||||
|
||||
def _compile(pattern):
|
||||
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
|
||||
|
||||
|
||||
_match_whitespace = _compile(r'[ \t\r\n\f]+')
|
||||
_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
|
||||
_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
|
||||
_match_hash = _compile('#(?:%(nmchar)s)+')
|
||||
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
|
||||
_match_string_by_quote = {
|
||||
@ -650,11 +651,12 @@ else:
|
||||
def _replace_simple(match):
|
||||
return match.group(1)
|
||||
|
||||
|
||||
def _replace_unicode(match):
|
||||
codepoint = int(match.group(1), 16)
|
||||
if codepoint > sys.maxunicode:
|
||||
codepoint = 0xFFFD
|
||||
return _unichr(codepoint)
|
||||
return codepoint_to_chr(codepoint)
|
||||
|
||||
|
||||
def unescape_ident(value):
|
||||
|
@ -29,7 +29,7 @@ def make_NCName(arg):
|
||||
return arg
|
||||
|
||||
def cnv_anyURI(attribute, arg, element):
|
||||
return unicode(arg)
|
||||
return type(u'')(arg)
|
||||
|
||||
def cnv_boolean(attribute, arg, element):
|
||||
if arg.lower() in ("false","no"):
|
||||
@ -85,13 +85,13 @@ def cnv_family(attribute, arg, element):
|
||||
def __save_prefix(attribute, arg, element):
|
||||
prefix = arg.split(':',1)[0]
|
||||
if prefix == arg:
|
||||
return unicode(arg)
|
||||
return type(u'')(arg)
|
||||
namespace = element.get_knownns(prefix)
|
||||
if namespace is None:
|
||||
#raise ValueError, "'%s' is an unknown prefix" % str(prefix)
|
||||
return unicode(arg)
|
||||
return type(u'')(arg)
|
||||
p = element.get_nsprefix(namespace)
|
||||
return unicode(arg)
|
||||
return type(u'')(arg)
|
||||
|
||||
def cnv_formula(attribute, arg, element):
|
||||
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should
|
||||
@ -218,7 +218,7 @@ def cnv_positiveInteger(attribute, arg, element):
|
||||
return str(arg)
|
||||
|
||||
def cnv_string(attribute, arg, element):
|
||||
return unicode(arg)
|
||||
return type(u'')(arg)
|
||||
|
||||
def cnv_textnoteclass(attribute, arg, element):
|
||||
if str(arg) not in ("footnote", "endnote"):
|
||||
@ -1480,5 +1480,4 @@ class AttrConverters:
|
||||
conversion = attrconverters.get((attribute, None), None)
|
||||
if conversion is not None:
|
||||
return conversion(attribute, value, element)
|
||||
return unicode(value)
|
||||
|
||||
return type(u'')(value)
|
||||
|
@ -182,7 +182,7 @@ class Node(xml.dom.Node):
|
||||
def __unicode__(self):
|
||||
val = []
|
||||
for c in self.childNodes:
|
||||
val.append(unicode(c))
|
||||
val.append(type(u'')(c))
|
||||
return u''.join(val)
|
||||
|
||||
defproperty(Node, "firstChild", doc="First child node, or None.")
|
||||
@ -253,7 +253,7 @@ class Text(Childless, Node):
|
||||
def toXml(self,level,f):
|
||||
""" Write XML in UTF-8 """
|
||||
if self.data:
|
||||
f.write(_escape(unicode(self.data).encode('utf-8')))
|
||||
f.write(_escape(type(u'')(self.data).encode('utf-8')))
|
||||
|
||||
class CDATASection(Childless, Text):
|
||||
nodeType = Node.CDATA_SECTION_NODE
|
||||
@ -469,7 +469,7 @@ class Element(Node):
|
||||
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
||||
for qname in self.attributes.keys():
|
||||
prefix = self.get_nsprefix(qname[0])
|
||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
|
||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
|
||||
f.write('>')
|
||||
|
||||
def write_close_tag(self, level, f):
|
||||
@ -483,7 +483,7 @@ class Element(Node):
|
||||
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
||||
for qname in self.attributes.keys():
|
||||
prefix = self.get_nsprefix(qname[0])
|
||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
|
||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
|
||||
if self.childNodes:
|
||||
f.write('>')
|
||||
for element in self.childNodes:
|
||||
@ -509,5 +509,3 @@ class Element(Node):
|
||||
""" This is a check to see if the object is an instance of a type """
|
||||
obj = element(check_grammar=False)
|
||||
return self.qname == obj.qname
|
||||
|
||||
|
||||
|
@ -55,6 +55,7 @@ if False: # Added by Kovid
|
||||
# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
|
||||
# In ODF a style can have a parent, these parents can be chained.
|
||||
|
||||
|
||||
class StyleToCSS:
|
||||
|
||||
""" The purpose of the StyleToCSS class is to contain the rules to convert
|
||||
@ -317,6 +318,7 @@ class TagStack:
|
||||
if attr in attrs:
|
||||
return attrs[attr]
|
||||
return None
|
||||
|
||||
def count_tags(self, tag):
|
||||
c = 0
|
||||
for ttag, tattrs in self.stack:
|
||||
@ -324,6 +326,7 @@ class TagStack:
|
||||
c = c + 1
|
||||
return c
|
||||
|
||||
|
||||
special_styles = {
|
||||
'S-Emphasis':'em',
|
||||
'S-Citation':'cite',
|
||||
@ -352,6 +355,8 @@ special_styles = {
|
||||
# ODFCONTENTHANDLER
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ODF2XHTML(handler.ContentHandler):
|
||||
|
||||
""" The ODF2XHTML parses an ODF file and produces XHTML"""
|
||||
@ -625,9 +630,6 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
|
||||
return self.anchors.get(name)
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
|
||||
def purgedata(self):
|
||||
self.data = []
|
||||
|
||||
@ -1457,7 +1459,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
|
||||
# self.writeout( escape(mark) )
|
||||
# Since HTML only knows about endnotes, there is too much risk that the
|
||||
# marker is reused in the source. Therefore we force numeric markers
|
||||
self.writeout(unicode(self.currentnote))
|
||||
self.writeout(type(u'')(self.currentnote))
|
||||
self.closetag('a')
|
||||
self.closetag('sup')
|
||||
|
||||
@ -1566,12 +1568,11 @@ dl.notes dd:last-of-type { page-break-after: avoid }
|
||||
self.writedata()
|
||||
self.purgedata()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
#
|
||||
# Reading the file
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# -----------------------------------------------------------------------------
|
||||
#
|
||||
# Reading the file
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def load(self, odffile):
|
||||
""" Loads a document into the parser and parses it.
|
||||
@ -1593,7 +1594,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
|
||||
self._walknode(c)
|
||||
self.endElementNS(node.qname, node.tagName)
|
||||
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
|
||||
self.characters(unicode(node))
|
||||
self.characters(type(u'')(node))
|
||||
|
||||
def odf2xhtml(self, odffile):
|
||||
""" Load a file and return the XHTML
|
||||
|
@ -8,10 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import unittest
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
def jsonify(tokens):
|
||||
"""Turn tokens into "JSON-compatible" data structures."""
|
||||
@ -24,6 +20,7 @@ def jsonify(tokens):
|
||||
else:
|
||||
yield token.type, token.value
|
||||
|
||||
|
||||
class BaseTest(unittest.TestCase):
|
||||
|
||||
longMessage = True
|
||||
@ -34,10 +31,8 @@ class BaseTest(unittest.TestCase):
|
||||
"""Test not complete error messages but only substrings."""
|
||||
self.ae(len(errors), len(expected_errors))
|
||||
for error, expected in zip(errors, expected_errors):
|
||||
self.assertIn(expected, unicode(error))
|
||||
self.assertIn(expected, type(u'')(error))
|
||||
|
||||
def jsonify_declarations(self, rule):
|
||||
return [(decl.name, list(jsonify(decl.value)))
|
||||
for decl in rule.declarations]
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user